feat: optimize and simplify python download script

feat: make it even simpler
feat: over simplify the code
2025-04-05 15:25:40 +02:00 · 2025-04-05 14:33:02 +02:00 · 2025-04-05 14:28:31 +02:00 · 2025-04-05 11:59:23 +02:00 · 2025-04-05 11:54:00 +02:00 · 2025-04-05 11:53:11 +02:00
9 changed files with 476 additions and 54 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,2 +1,195 @@
-out
+# My ignores
+
+data/
+
+# Generated by Cargo
+# will have compiled files and executables
+debug/
+target/
+
+# These are backup files generated by rustfmt
+**/*.rs.bk
+
+# MSVC Windows builds of rustc generate these, which store debugging information
+*.pdb
+
+# RustRover
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
 .venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,195 @@
-out
+# My ignores
+
+data/
+
+# Generated by Cargo
+# will have compiled files and executables
+debug/
+target/
+
+# These are backup files generated by rustfmt
+**/*.rs.bk
+
+# MSVC Windows builds of rustc generate these, which store debugging information
+*.pdb
+
+# RustRover
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
 .venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -0,0 +1,16 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "rpg"
+version = "1.0.0"
+dependencies = [
+ "fastrand",
+]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "rpg"
+version = "1.0.0"
+edition = "2021"
+
+[dependencies]
+fastrand = "2.3.0"
--- a/2
+++ b/2
@@ -3,7 +3,7 @@ FROM python:3-alpine
 COPY . /app
 WORKDIR /app

-VOLUME /app/out
+VOLUME /app/data

 RUN pip install -r requirements.txt

--- a/README.md
+++ b/README.md
@@ -2,21 +2,27 @@

 Generate funny random pseudonyms composed of an adjective, a noun and a number between 1 and 999.

-## Usage
+## Download data

-You can either use python to directly run the script like this : 
+To download the data files, you can either use python to directly run the script like this : 

 ```bash
-python pyrpg.py $NUMBER_OF_PSEUDONYMS
+python download_data.py
 ```

 Or you can use the provided Dockerfile to build a docker image and run it like this : 

 ```bash
-docker build -t pyrpg .
-docker run --volume $(pwd)/out:/out --env NUMBER_OF_PSEUDONYMS=$NUMBER_OF_PSEUDONYMS pyrpg
+docker build -t download_data .
+docker run --volume $(pwd)/data:/app/data download_data
+```
+
+# Generate pseudonyms
+
+To generate pseudonyms, just run the rust program with cargo :
+
+```bash
+cargo run $NUMBER_OF_PSEUDONYMS
 ```

 If you don't provide a number of pseudonyms, the script will generate 100 by default.
-
-Pseudonyms are written to a file named `pseudonyms.txt` in and 'out' volume so you can retrieve them easily.
--- a/download_data.py
+++ b/download_data.py
@@ -0,0 +1,29 @@
+import nltk
+import os
+
+
+def main():
+    nltk.download("wordnet")
+
+    adjectives = {
+        lemma.name().capitalize()
+        for synset in nltk.corpus.wordnet.all_synsets(pos="a")
+        for lemma in synset.lemmas()
+        if lemma.name().isalpha()
+    }
+    nouns = {
+        lemma.name().capitalize()
+        for synset in nltk.corpus.wordnet.all_synsets(pos="n")
+        for lemma in synset.lemmas()
+        if lemma.name().isalpha()
+    }
+
+    os.makedirs("data", exist_ok=True)
+    with open("data/adjectives.txt", "w") as adj_file:
+        adj_file.write("\n".join(sorted(adjectives)))
+    with open("data/nouns.txt", "w") as noun_file:
+        noun_file.write("\n".join(sorted(nouns)))
+
+
+if __name__ == "__main__":
+    main()
--- a/pyrpg.py
+++ b/pyrpg.py
@@ -1,42 +0,0 @@
-import nltk
-import sys
-import random
-import os
-
-def main():
-    try:
-        num_pseudonyms = int(sys.argv[1]) if len(sys.argv) == 2 else 100
-    except ValueError:
-        print("The argument must be an integer.")
-        return
-
-    # Load the corpora
-    nltk.download('wordnet')
-    nltk.download('averaged_perceptron_tagger')
-    nltk.download('universal_tagset')
-
-    # Get all the adjectives and nouns
-    adjectives = set()
-    nouns = set()
-    for synset in nltk.corpus.wordnet.all_synsets():
-        for lemma in synset.lemmas():
-            if lemma.name().isalpha():
-                if synset.pos() == 'a':
-                    adjectives.add(lemma.name())
-                elif synset.pos() == 'n':
-                    nouns.add(lemma.name())
-    
-    # Create the output directory if it doesn't exist
-    os.makedirs('out', exist_ok=True)
-
-    # Write the pseudonyms to a file
-    with open('out/pseudonyms.txt', 'w+') as file:
-        for _ in range(num_pseudonyms):
-            adjective = list(adjectives)[random.randint(0, len(adjectives) - 1)].capitalize()
-            noun = list(nouns)[random.randint(0, len(nouns) - 1)].capitalize()
-            number = random.randint(1, 999)
-            pseudonym = f'{adjective}-{noun}-{number}'
-            file.write(pseudonym + '\n')
-
-if __name__ == '__main__':
-    main()
--- a/src/main.rs
+++ b/src/main.rs
@@ -0,0 +1,20 @@
+use std::env;
+
+fn main() {
+    let num_pseudonyms = env::args()
+        .nth(1)
+        .and_then(|arg| arg.parse().ok())
+        .unwrap_or(10);
+
+    let adjectives = include_str!("../data/adjectives.txt").lines().collect::<Vec<_>>();
+    let nouns = include_str!("../data/nouns.txt").lines().collect::<Vec<_>>();
+
+    (0..num_pseudonyms).for_each(|_| {
+        println!(
+            "{}-{}-{}",
+            adjectives[fastrand::usize(0..adjectives.len())],
+            nouns[fastrand::usize(0..nouns.len())],
+            fastrand::u32(0..1000)
+        );
+    });
+}
Author	SHA1	Message	Date
Rawleenc Dev	5071c28f4f	feat: optimize and simplify python download script	2025-04-05 15:25:40 +02:00
Rawleenc Dev	f5b2737178	feat: make it even simpler	2025-04-05 14:33:02 +02:00
Rawleenc Dev	6d9a5bbb84	feat: over simplify the code	2025-04-05 14:28:31 +02:00
Rawleenc Dev	e6da7fdf8d	build: change project name	2025-04-05 11:59:23 +02:00
Rawleenc Dev	c943112efc	feat: update dockerignore	2025-04-05 11:54:00 +02:00
Rawleenc Dev	f9ccdb0ee0	chore: remove data files	2025-04-05 11:53:11 +02:00
Rawleenc Dev	ba04745609	feat: rewrite pseudonyme generator in rust	2025-04-05 11:49:54 +02:00
Rawleenc	722e1da854	docs: update readme	2024-11-19 22:55:58 +01:00
Rawleenc	3e8a40b71d	docs: update readme	2024-11-19 22:54:09 +01:00