feat: optimize and simplify python download script

This commit is contained in:
2025-04-05 15:25:40 +02:00
parent f5b2737178
commit 5071c28f4f

View File

@@ -1,33 +1,29 @@
import nltk import nltk
import os import os
def main(): def main():
# Load the corpora nltk.download("wordnet")
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('universal_tagset')
# Get all the adjectives and nouns adjectives = {
adjectives = set() lemma.name().capitalize()
nouns = set() for synset in nltk.corpus.wordnet.all_synsets(pos="a")
for synset in nltk.corpus.wordnet.all_synsets(): for lemma in synset.lemmas()
for lemma in synset.lemmas(): if lemma.name().isalpha()
if lemma.name().isalpha(): }
if synset.pos() == 'a': nouns = {
adjectives.add(lemma.name()) lemma.name().capitalize()
elif synset.pos() == 'n': for synset in nltk.corpus.wordnet.all_synsets(pos="n")
nouns.add(lemma.name()) for lemma in synset.lemmas()
if lemma.name().isalpha()
# Create the output directory if it doesn't exist }
os.makedirs('data', exist_ok=True)
with open('data/adjectives.txt', 'w+') as file: os.makedirs("data", exist_ok=True)
for adjective in adjectives: with open("data/adjectives.txt", "w") as adj_file:
file.write(str(adjective).capitalize() + '\n') adj_file.write("\n".join(sorted(adjectives)))
with open("data/nouns.txt", "w") as noun_file:
with open('data/nouns.txt', 'w+') as file: noun_file.write("\n".join(sorted(nouns)))
for noun in nouns:
file.write(str(noun).capitalize() + '\n')
if __name__ == '__main__':
main() if __name__ == "__main__":
main()