- Introduced MEMORY_TRADEOFFS.md to explain memory vs deduplication trade-offs in anagram generation. - Added PERFORMANCE.md detailing optimizations for handling large volumes of anagram generation efficiently. - Created USAGE.md for comprehensive usage instructions, including installation, basic commands, and advanced generation modes. - Enhanced generator with streaming and batch processing capabilities for improved memory management. - Implemented quick hashing for deduplication to reduce memory footprint. - Updated main.rs to support new command-line arguments for streaming and batch modes. - Added tests to ensure letter removal maintains minimum word length and to verify anagram sorting functionality.
178 lines
6.0 KiB
Rust
178 lines
6.0 KiB
Rust
use anagram_generator::{
|
|
analyzer::PronounceabilityAnalyzer, generator::AnagramGenerator, generator::GenerationConfig,
|
|
generator::LetterRemovalStrategy,
|
|
};
|
|
use rand::SeedableRng;
|
|
use rand::rngs::StdRng;
|
|
|
|
#[test]
|
|
fn test_letter_removal_disabled_by_default() {
|
|
let config = GenerationConfig::default();
|
|
assert_eq!(config.letter_removal, LetterRemovalStrategy::None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_letter_removal_can_be_enabled() {
|
|
let config = GenerationConfig::default().allow_removing_letters(3);
|
|
assert_eq!(
|
|
config.letter_removal,
|
|
LetterRemovalStrategy::Adaptive { max_removals: 3 }
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_generation_without_removal_produces_same_length() {
|
|
let rng = StdRng::seed_from_u64(42);
|
|
let scorer = PronounceabilityAnalyzer::with_defaults();
|
|
let mut generator = AnagramGenerator::new(rng, scorer);
|
|
|
|
let source = "difficult";
|
|
let config = GenerationConfig::new(0, 1000);
|
|
let anagrams = generator.generate(source, 5, &config);
|
|
|
|
for anagram in &anagrams {
|
|
assert_eq!(anagram.text().len(), source.len());
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_generation_with_removal_may_produce_shorter_words() {
|
|
let rng = StdRng::seed_from_u64(42);
|
|
let scorer = PronounceabilityAnalyzer::with_defaults();
|
|
let mut generator = AnagramGenerator::new(rng, scorer);
|
|
|
|
let source = "xyzqwbcdfg"; // Very hard to pronounce word
|
|
let config = GenerationConfig::new(60, 5000).allow_removing_letters(4);
|
|
let anagrams = generator.generate(source, 5, &config);
|
|
|
|
// With letter removal enabled, some results might be shorter
|
|
let has_shorter = anagrams.iter().any(|a| a.text().len() < source.len());
|
|
// Due to randomness and the difficult source word, we expect some shorter results
|
|
assert!(has_shorter || anagrams.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn test_letter_removal_improves_pronounceability() {
|
|
let rng1 = StdRng::seed_from_u64(42);
|
|
let rng2 = StdRng::seed_from_u64(42);
|
|
let scorer1 = PronounceabilityAnalyzer::with_defaults();
|
|
let scorer2 = PronounceabilityAnalyzer::with_defaults();
|
|
let mut generator_without = AnagramGenerator::new(rng1, scorer1);
|
|
let mut generator_with = AnagramGenerator::new(rng2, scorer2);
|
|
|
|
let source = "bcdfghjklm"; // No vowels, very hard to pronounce
|
|
let config_without = GenerationConfig::new(40, 10000);
|
|
let config_with = GenerationConfig::new(40, 10000).allow_removing_letters(5);
|
|
|
|
let anagrams_without = generator_without.generate(source, 10, &config_without);
|
|
let anagrams_with = generator_with.generate(source, 10, &config_with);
|
|
|
|
// With letter removal, we should be able to generate more anagrams
|
|
// or achieve higher scores on average
|
|
if !anagrams_with.is_empty() && !anagrams_without.is_empty() {
|
|
let avg_score_without: f32 = anagrams_without
|
|
.iter()
|
|
.map(|a| a.score().value() as f32)
|
|
.sum::<f32>()
|
|
/ anagrams_without.len() as f32;
|
|
|
|
let avg_score_with: f32 = anagrams_with
|
|
.iter()
|
|
.map(|a| a.score().value() as f32)
|
|
.sum::<f32>()
|
|
/ anagrams_with.len() as f32;
|
|
|
|
// Letter removal should help achieve better or equal scores
|
|
assert!(
|
|
avg_score_with >= avg_score_without || anagrams_with.len() > anagrams_without.len()
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_letter_removal_respects_max_removals() {
|
|
let rng = StdRng::seed_from_u64(42);
|
|
let scorer = PronounceabilityAnalyzer::with_defaults();
|
|
let mut generator = AnagramGenerator::new(rng, scorer);
|
|
|
|
let source = "testing";
|
|
let max_removals = 2;
|
|
let config = GenerationConfig::new(0, 1000).allow_removing_letters(max_removals);
|
|
let anagrams = generator.generate(source, 20, &config);
|
|
|
|
// All anagrams should have at least (source.len() - max_removals) letters
|
|
let min_length = source.len() - max_removals;
|
|
for anagram in &anagrams {
|
|
assert!(
|
|
anagram.text().len() >= min_length,
|
|
"Anagram '{}' is too short (min: {})",
|
|
anagram.text(),
|
|
min_length
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_letter_removal_maintains_min_word_length() {
|
|
let rng = StdRng::seed_from_u64(42);
|
|
let scorer = PronounceabilityAnalyzer::with_defaults();
|
|
let mut generator = AnagramGenerator::new(rng, scorer);
|
|
|
|
let source = "abc";
|
|
let config = GenerationConfig::new(0, 1000).allow_removing_letters(10); // More than word length
|
|
let anagrams = generator.generate(source, 10, &config);
|
|
|
|
// Should maintain at least 2 characters (word length - 1)
|
|
for anagram in &anagrams {
|
|
assert!(
|
|
!anagram.text().is_empty(),
|
|
"Anagram '{}' is too short",
|
|
anagram.text()
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_letter_removal_with_good_word() {
|
|
let rng = StdRng::seed_from_u64(42);
|
|
let scorer = PronounceabilityAnalyzer::with_defaults();
|
|
let mut generator = AnagramGenerator::new(rng, scorer);
|
|
|
|
let source = "example"; // Already pronounceable
|
|
let config = GenerationConfig::new(70, 1000).allow_removing_letters(2);
|
|
let anagrams = generator.generate(source, 10, &config);
|
|
|
|
// With an already good word, letter removal might not be necessary
|
|
// But it should still work and produce results
|
|
assert!(!anagrams.is_empty());
|
|
for anagram in &anagrams {
|
|
assert!(anagram.score().value() >= 70);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_config_builder_with_letter_removal() {
|
|
let config = GenerationConfig::default()
|
|
.with_min_score(60)
|
|
.with_max_attempts(5000)
|
|
.allow_removing_letters(3);
|
|
|
|
assert_eq!(config.min_score.value(), 60);
|
|
assert_eq!(config.max_attempts_per_anagram, 5000);
|
|
assert_eq!(
|
|
config.letter_removal,
|
|
LetterRemovalStrategy::Adaptive { max_removals: 3 }
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_letter_removal_strategy_with_method() {
|
|
let config = GenerationConfig::default()
|
|
.with_letter_removal(LetterRemovalStrategy::Adaptive { max_removals: 5 });
|
|
|
|
assert_eq!(
|
|
config.letter_removal,
|
|
LetterRemovalStrategy::Adaptive { max_removals: 5 }
|
|
);
|
|
}
|