feat: optimize and simplify python download script
This commit is contained in:
@@ -1,33 +1,29 @@
|
|||||||
import nltk
|
import nltk
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Load the corpora
|
nltk.download("wordnet")
|
||||||
nltk.download('wordnet')
|
|
||||||
nltk.download('averaged_perceptron_tagger')
|
|
||||||
nltk.download('universal_tagset')
|
|
||||||
|
|
||||||
# Get all the adjectives and nouns
|
adjectives = {
|
||||||
adjectives = set()
|
lemma.name().capitalize()
|
||||||
nouns = set()
|
for synset in nltk.corpus.wordnet.all_synsets(pos="a")
|
||||||
for synset in nltk.corpus.wordnet.all_synsets():
|
for lemma in synset.lemmas()
|
||||||
for lemma in synset.lemmas():
|
if lemma.name().isalpha()
|
||||||
if lemma.name().isalpha():
|
}
|
||||||
if synset.pos() == 'a':
|
nouns = {
|
||||||
adjectives.add(lemma.name())
|
lemma.name().capitalize()
|
||||||
elif synset.pos() == 'n':
|
for synset in nltk.corpus.wordnet.all_synsets(pos="n")
|
||||||
nouns.add(lemma.name())
|
for lemma in synset.lemmas()
|
||||||
|
if lemma.name().isalpha()
|
||||||
|
}
|
||||||
|
|
||||||
# Create the output directory if it doesn't exist
|
os.makedirs("data", exist_ok=True)
|
||||||
os.makedirs('data', exist_ok=True)
|
with open("data/adjectives.txt", "w") as adj_file:
|
||||||
|
adj_file.write("\n".join(sorted(adjectives)))
|
||||||
|
with open("data/nouns.txt", "w") as noun_file:
|
||||||
|
noun_file.write("\n".join(sorted(nouns)))
|
||||||
|
|
||||||
with open('data/adjectives.txt', 'w+') as file:
|
|
||||||
for adjective in adjectives:
|
|
||||||
file.write(str(adjective).capitalize() + '\n')
|
|
||||||
|
|
||||||
with open('data/nouns.txt', 'w+') as file:
|
if __name__ == "__main__":
|
||||||
for noun in nouns:
|
|
||||||
file.write(str(noun).capitalize() + '\n')
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
main()
|
||||||
Reference in New Issue
Block a user