perf: Add MEMORY_TRADEOFFS and PERFORMANCE documentation

- Introduced MEMORY_TRADEOFFS.md to explain memory vs deduplication trade-offs in anagram generation.
- Added PERFORMANCE.md detailing optimizations for handling large volumes of anagram generation efficiently.
- Created USAGE.md for comprehensive usage instructions, including installation, basic commands, and advanced generation modes.
- Enhanced generator with streaming and batch processing capabilities for improved memory management.
- Implemented quick hashing for deduplication to reduce memory footprint.
- Updated main.rs to support new command-line arguments for streaming and batch modes.
- Added tests to ensure letter removal maintains minimum word length and to verify anagram sorting functionality.
This commit is contained in:
2025-11-06 23:38:05 +01:00
parent ebdbe60e04
commit 02cf48088a
12 changed files with 1733 additions and 19 deletions

View File

@@ -0,0 +1,129 @@
use anagram_generator::{AnagramGenerator, GenerationConfig, PronounceabilityAnalyzer};
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
use rand::thread_rng;
fn bench_generate_small(c: &mut Criterion) {
let mut group = c.benchmark_group("generate_small");
for count in [10, 100, 1000] {
group.bench_with_input(BenchmarkId::from_parameter(count), &count, |b, &count| {
b.iter(|| {
let rng = thread_rng();
let scorer = PronounceabilityAnalyzer::with_defaults();
let mut generator = AnagramGenerator::new(rng, scorer);
let config = GenerationConfig::default();
let anagrams = generator.generate(black_box("programming"), count, &config);
black_box(anagrams)
});
});
}
group.finish();
}
fn bench_generate_iter_vs_collect(c: &mut Criterion) {
let mut group = c.benchmark_group("iter_vs_collect");
let count = 1000;
group.bench_function("collect", |b| {
b.iter(|| {
let rng = thread_rng();
let scorer = PronounceabilityAnalyzer::with_defaults();
let mut generator = AnagramGenerator::new(rng, scorer);
let config = GenerationConfig::default();
let anagrams = generator.generate(black_box("programming"), count, &config);
black_box(anagrams)
});
});
group.bench_function("iterator", |b| {
b.iter(|| {
let rng = thread_rng();
let scorer = PronounceabilityAnalyzer::with_defaults();
let mut generator = AnagramGenerator::new(rng, scorer);
let config = GenerationConfig::default();
let anagrams: Vec<_> = generator
.generate_iter(black_box("programming"), count, &config)
.collect();
black_box(anagrams)
});
});
group.finish();
}
fn bench_generate_batches(c: &mut Criterion) {
let mut group = c.benchmark_group("batches");
for batch_size in [100, 1000, 10000] {
group.bench_with_input(
BenchmarkId::from_parameter(batch_size),
&batch_size,
|b, &batch_size| {
b.iter(|| {
let rng = thread_rng();
let scorer = PronounceabilityAnalyzer::with_defaults();
let mut generator = AnagramGenerator::new(rng, scorer);
let config = GenerationConfig::default();
let batches = generator.generate_batches(
black_box("programming"),
10000,
batch_size,
&config,
);
black_box(batches)
});
},
);
}
group.finish();
}
fn bench_memory_efficiency(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_efficiency");
group.sample_size(10); // Fewer samples for large tests
// Test with large count to measure memory impact
group.bench_function("large_count_10k", |b| {
b.iter(|| {
let rng = thread_rng();
let scorer = PronounceabilityAnalyzer::with_defaults();
let mut generator = AnagramGenerator::new(rng, scorer);
let config = GenerationConfig::default();
let anagrams = generator.generate(black_box("programming"), 10000, &config);
black_box(anagrams.len())
});
});
group.bench_function("large_count_10k_iter", |b| {
b.iter(|| {
let rng = thread_rng();
let scorer = PronounceabilityAnalyzer::with_defaults();
let mut generator = AnagramGenerator::new(rng, scorer);
let config = GenerationConfig::default();
let count = generator
.generate_iter(black_box("programming"), 10000, &config)
.count();
black_box(count)
});
});
group.finish();
}
criterion_group!(
benches,
bench_generate_small,
bench_generate_iter_vs_collect,
bench_generate_batches,
bench_memory_efficiency
);
criterion_main!(benches);