feat: optimize and simplify python download script

This commit is contained in:
2025-04-05 15:25:40 +02:00
parent f5b2737178
commit 5071c28f4f

View File

@@ -1,33 +1,29 @@
import nltk
import os
def main():
# Load the corpora
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('universal_tagset')
nltk.download("wordnet")
# Get all the adjectives and nouns
adjectives = set()
nouns = set()
for synset in nltk.corpus.wordnet.all_synsets():
for lemma in synset.lemmas():
if lemma.name().isalpha():
if synset.pos() == 'a':
adjectives.add(lemma.name())
elif synset.pos() == 'n':
nouns.add(lemma.name())
adjectives = {
lemma.name().capitalize()
for synset in nltk.corpus.wordnet.all_synsets(pos="a")
for lemma in synset.lemmas()
if lemma.name().isalpha()
}
nouns = {
lemma.name().capitalize()
for synset in nltk.corpus.wordnet.all_synsets(pos="n")
for lemma in synset.lemmas()
if lemma.name().isalpha()
}
# Create the output directory if it doesn't exist
os.makedirs('data', exist_ok=True)
os.makedirs("data", exist_ok=True)
with open("data/adjectives.txt", "w") as adj_file:
adj_file.write("\n".join(sorted(adjectives)))
with open("data/nouns.txt", "w") as noun_file:
noun_file.write("\n".join(sorted(nouns)))
with open('data/adjectives.txt', 'w+') as file:
for adjective in adjectives:
file.write(str(adjective).capitalize() + '\n')
with open('data/nouns.txt', 'w+') as file:
for noun in nouns:
file.write(str(noun).capitalize() + '\n')
if __name__ == '__main__':
if __name__ == "__main__":
main()