Compare commits

..

9 Commits

9 changed files with 476 additions and 54 deletions

View File

@@ -1,2 +1,195 @@
out
# My ignores
data/
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
# RustRover
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc

195
.gitignore vendored
View File

@@ -1,2 +1,195 @@
out
# My ignores
data/
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
# RustRover
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc

16
Cargo.lock generated Normal file
View File

@@ -0,0 +1,16 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "fastrand"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "rpg"
version = "1.0.0"
dependencies = [
"fastrand",
]

7
Cargo.toml Normal file
View File

@@ -0,0 +1,7 @@
[package]
name = "rpg"
version = "1.0.0"
edition = "2021"
[dependencies]
fastrand = "2.3.0"

View File

@@ -3,7 +3,7 @@ FROM python:3-alpine
COPY . /app
WORKDIR /app
VOLUME /app/out
VOLUME /app/data
RUN pip install -r requirements.txt

View File

@@ -2,21 +2,27 @@
Generate funny random pseudonyms composed of an adjective, a noun and a number between 1 and 999.
## Usage
## Download data
You can either use python to directly run the script like this :
To download the data files, you can either use python to directly run the script like this :
```bash
python pyrpg.py $NUMBER_OF_PSEUDONYMS
python download_data.py
```
Or you can use the provided Dockerfile to build a docker image and run it like this :
```bash
docker build -t pyrpg .
docker run --volume $(pwd)/out:/out --env NUMBER_OF_PSEUDONYMS=$NUMBER_OF_PSEUDONYMS pyrpg
docker build -t download_data .
docker run --volume $(pwd)/data:/app/data download_data
```
# Generate pseudonyms
To generate pseudonyms, just run the rust program with cargo :
```bash
cargo run $NUMBER_OF_PSEUDONYMS
```
If you don't provide a number of pseudonyms, the script will generate 100 by default.
Pseudonyms are written to a file named `pseudonyms.txt` in and 'out' volume so you can retrieve them easily.

29
download_data.py Normal file
View File

@@ -0,0 +1,29 @@
import nltk
import os
def main():
nltk.download("wordnet")
adjectives = {
lemma.name().capitalize()
for synset in nltk.corpus.wordnet.all_synsets(pos="a")
for lemma in synset.lemmas()
if lemma.name().isalpha()
}
nouns = {
lemma.name().capitalize()
for synset in nltk.corpus.wordnet.all_synsets(pos="n")
for lemma in synset.lemmas()
if lemma.name().isalpha()
}
os.makedirs("data", exist_ok=True)
with open("data/adjectives.txt", "w") as adj_file:
adj_file.write("\n".join(sorted(adjectives)))
with open("data/nouns.txt", "w") as noun_file:
noun_file.write("\n".join(sorted(nouns)))
if __name__ == "__main__":
main()

View File

@@ -1,42 +0,0 @@
import nltk
import sys
import random
import os
def main():
try:
num_pseudonyms = int(sys.argv[1]) if len(sys.argv) == 2 else 100
except ValueError:
print("The argument must be an integer.")
return
# Load the corpora
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('universal_tagset')
# Get all the adjectives and nouns
adjectives = set()
nouns = set()
for synset in nltk.corpus.wordnet.all_synsets():
for lemma in synset.lemmas():
if lemma.name().isalpha():
if synset.pos() == 'a':
adjectives.add(lemma.name())
elif synset.pos() == 'n':
nouns.add(lemma.name())
# Create the output directory if it doesn't exist
os.makedirs('out', exist_ok=True)
# Write the pseudonyms to a file
with open('out/pseudonyms.txt', 'w+') as file:
for _ in range(num_pseudonyms):
adjective = list(adjectives)[random.randint(0, len(adjectives) - 1)].capitalize()
noun = list(nouns)[random.randint(0, len(nouns) - 1)].capitalize()
number = random.randint(1, 999)
pseudonym = f'{adjective}-{noun}-{number}'
file.write(pseudonym + '\n')
if __name__ == '__main__':
main()

20
src/main.rs Normal file
View File

@@ -0,0 +1,20 @@
use std::env;
fn main() {
let num_pseudonyms = env::args()
.nth(1)
.and_then(|arg| arg.parse().ok())
.unwrap_or(10);
let adjectives = include_str!("../data/adjectives.txt").lines().collect::<Vec<_>>();
let nouns = include_str!("../data/nouns.txt").lines().collect::<Vec<_>>();
(0..num_pseudonyms).for_each(|_| {
println!(
"{}-{}-{}",
adjectives[fastrand::usize(0..adjectives.len())],
nouns[fastrand::usize(0..nouns.len())],
fastrand::u32(0..1000)
);
});
}