Compare commits

..

6 Commits

Author SHA1 Message Date
e6da7fdf8d build: change project name 2025-04-05 11:59:23 +02:00
c943112efc feat: update dockerignore 2025-04-05 11:54:00 +02:00
f9ccdb0ee0 chore: remove data files 2025-04-05 11:53:11 +02:00
ba04745609 feat: rewrite pseudonyme generator in rust 2025-04-05 11:49:54 +02:00
722e1da854 docs: update readme 2024-11-19 22:55:58 +01:00
3e8a40b71d docs: update readme 2024-11-19 22:54:09 +01:00
9 changed files with 626 additions and 54 deletions

View File

@@ -1,2 +1,195 @@
out
# My ignores
data/
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
# RustRover
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc

195
.gitignore vendored
View File

@@ -1,2 +1,195 @@
out
# My ignores
data/
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
# RustRover
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc

133
Cargo.lock generated Normal file
View File

@@ -0,0 +1,133 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "libc"
version = "0.2.171"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
[[package]]
name = "ppv-lite86"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy",
]
[[package]]
name = "proc-macro2"
version = "1.0.94"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "rpg"
version = "1.0.0"
dependencies = [
"rand",
]
[[package]]
name = "syn"
version = "2.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "zerocopy"
version = "0.8.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

7
Cargo.toml Normal file
View File

@@ -0,0 +1,7 @@
[package]
name = "rpg"
version = "1.0.0"
edition = "2021"
[dependencies]
rand = "0.8"

View File

@@ -3,7 +3,7 @@ FROM python:3-alpine
COPY . /app
WORKDIR /app
VOLUME /app/out
VOLUME /app/data
RUN pip install -r requirements.txt

View File

@@ -2,21 +2,27 @@
Generate funny random pseudonyms composed of an adjective, a noun and a number between 1 and 999.
## Usage
## Download data
You can either use python to directly run the script like this :
To download the data files, you can either use python to directly run the script like this :
```bash
python pyrpg.py $NUMBER_OF_PSEUDONYMS
python download_data.py
```
Or you can use the provided Dockerfile to build a docker image and run it like this :
```bash
docker build -t pyrpg .
docker run --volume $(pwd)/out:/out --env NUMBER_OF_PSEUDONYMS=$NUMBER_OF_PSEUDONYMS pyrpg
docker build -t download_data .
docker run --volume $(pwd)/data:/app/data download_data
```
# Generate pseudonyms
To generate pseudonyms, just run the rust program with cargo :
```bash
cargo run $NUMBER_OF_PSEUDONYMS
```
If you don't provide a number of pseudonyms, the script will generate 100 by default.
Pseudonyms are written to a file named `pseudonyms.txt` in and 'out' volume so you can retrieve them easily.

33
download_data.py Normal file
View File

@@ -0,0 +1,33 @@
import nltk
import os
def main():
# Load the corpora
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('universal_tagset')
# Get all the adjectives and nouns
adjectives = set()
nouns = set()
for synset in nltk.corpus.wordnet.all_synsets():
for lemma in synset.lemmas():
if lemma.name().isalpha():
if synset.pos() == 'a':
adjectives.add(lemma.name())
elif synset.pos() == 'n':
nouns.add(lemma.name())
# Create the output directory if it doesn't exist
os.makedirs('data', exist_ok=True)
with open('data/adjectives.txt', 'w+') as file:
for adjective in adjectives:
file.write(str(adjective).capitalize() + '\n')
with open('data/nouns.txt', 'w+') as file:
for noun in nouns:
file.write(str(noun).capitalize() + '\n')
if __name__ == '__main__':
main()

View File

@@ -1,42 +0,0 @@
import nltk
import sys
import random
import os
def main():
try:
num_pseudonyms = int(sys.argv[1]) if len(sys.argv) == 2 else 100
except ValueError:
print("The argument must be an integer.")
return
# Load the corpora
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('universal_tagset')
# Get all the adjectives and nouns
adjectives = set()
nouns = set()
for synset in nltk.corpus.wordnet.all_synsets():
for lemma in synset.lemmas():
if lemma.name().isalpha():
if synset.pos() == 'a':
adjectives.add(lemma.name())
elif synset.pos() == 'n':
nouns.add(lemma.name())
# Create the output directory if it doesn't exist
os.makedirs('out', exist_ok=True)
# Write the pseudonyms to a file
with open('out/pseudonyms.txt', 'w+') as file:
for _ in range(num_pseudonyms):
adjective = list(adjectives)[random.randint(0, len(adjectives) - 1)].capitalize()
noun = list(nouns)[random.randint(0, len(nouns) - 1)].capitalize()
number = random.randint(1, 999)
pseudonym = f'{adjective}-{noun}-{number}'
file.write(pseudonym + '\n')
if __name__ == '__main__':
main()

49
src/main.rs Normal file
View File

@@ -0,0 +1,49 @@
use rand::seq::SliceRandom;
use rand::Rng;
use std::env;
use std::io::{BufRead, BufReader};
fn main() {
let num_pseudonyms: usize = {
let args: Vec<String> = env::args().collect();
if args.len() > 1 {
match args[1].parse::<usize>() {
Ok(num) => num,
Err(_) => {
eprintln!("Le nombre de pseudonymes doit être un entier positif.");
std::process::exit(1);
}
}
} else {
100
}
};
let adjectives: Vec<String> = {
BufReader::new(include_str!("../data/adjectives.txt").as_bytes())
.lines()
.filter_map(Result::ok)
.collect()
};
let nouns: Vec<String> = {
BufReader::new(include_str!("../data/nouns.txt").as_bytes())
.lines()
.filter_map(Result::ok)
.collect()
};
let mut rng = rand::thread_rng();
for _ in 0..num_pseudonyms {
let Some(adjective) = adjectives.choose(&mut rng) else {
eprintln!("Impossible de choisir un adjectif.");
std::process::exit(1);
};
let Some(noun) = nouns.choose(&mut rng) else {
eprintln!("Impossible de choisir un nom.");
std::process::exit(1);
};
let number: u32 = rng.gen_range(1..=999);
println!("{}", format!("{}-{}-{}", adjective, noun, number));
}
}