From 5071c28f4f61b2d8608ca32faf495df513839910 Mon Sep 17 00:00:00 2001
From: Rawleenc Dev <lawrence.dnp@gmail.com>
Date: Sat, 5 Apr 2025 15:25:40 +0200
Subject: [PATCH] feat: optimize and simplify python download script

---
 download_data.py | 48 ++++++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

diff --git a/download_data.py b/download_data.py
index 31ecdd7..6e0f982 100644
--- a/download_data.py
+++ b/download_data.py
@@ -1,33 +1,29 @@
 import nltk
 import os
 
+
 def main():
-    # Load the corpora
-    nltk.download('wordnet')
-    nltk.download('averaged_perceptron_tagger')
-    nltk.download('universal_tagset')
+    nltk.download("wordnet")
 
-    # Get all the adjectives and nouns
-    adjectives = set()
-    nouns = set()
-    for synset in nltk.corpus.wordnet.all_synsets():
-        for lemma in synset.lemmas():
-            if lemma.name().isalpha():
-                if synset.pos() == 'a':
-                    adjectives.add(lemma.name())
-                elif synset.pos() == 'n':
-                    nouns.add(lemma.name())
-    
-    # Create the output directory if it doesn't exist
-    os.makedirs('data', exist_ok=True)
+    adjectives = {
+        lemma.name().capitalize()
+        for synset in nltk.corpus.wordnet.all_synsets(pos="a")
+        for lemma in synset.lemmas()
+        if lemma.name().isalpha()
+    }
+    nouns = {
+        lemma.name().capitalize()
+        for synset in nltk.corpus.wordnet.all_synsets(pos="n")
+        for lemma in synset.lemmas()
+        if lemma.name().isalpha()
+    }
 
-    with open('data/adjectives.txt', 'w+') as file:
-        for adjective in adjectives:
-            file.write(str(adjective).capitalize() + '\n')
-    
-    with open('data/nouns.txt', 'w+') as file:
-        for noun in nouns:
-            file.write(str(noun).capitalize() + '\n')
+    os.makedirs("data", exist_ok=True)
+    with open("data/adjectives.txt", "w") as adj_file:
+        adj_file.write("\n".join(sorted(adjectives)))
+    with open("data/nouns.txt", "w") as noun_file:
+        noun_file.write("\n".join(sorted(nouns)))
 
-if __name__ == '__main__':
-    main()
\ No newline at end of file
+
+if __name__ == "__main__":
+    main()