feat: optimize and simplify python download script
This commit is contained in:
@@ -1,33 +1,29 @@
|
||||
import nltk
|
||||
import os
|
||||
|
||||
|
||||
def main():
|
||||
# Load the corpora
|
||||
nltk.download('wordnet')
|
||||
nltk.download('averaged_perceptron_tagger')
|
||||
nltk.download('universal_tagset')
|
||||
nltk.download("wordnet")
|
||||
|
||||
# Get all the adjectives and nouns
|
||||
adjectives = set()
|
||||
nouns = set()
|
||||
for synset in nltk.corpus.wordnet.all_synsets():
|
||||
for lemma in synset.lemmas():
|
||||
if lemma.name().isalpha():
|
||||
if synset.pos() == 'a':
|
||||
adjectives.add(lemma.name())
|
||||
elif synset.pos() == 'n':
|
||||
nouns.add(lemma.name())
|
||||
|
||||
# Create the output directory if it doesn't exist
|
||||
os.makedirs('data', exist_ok=True)
|
||||
adjectives = {
|
||||
lemma.name().capitalize()
|
||||
for synset in nltk.corpus.wordnet.all_synsets(pos="a")
|
||||
for lemma in synset.lemmas()
|
||||
if lemma.name().isalpha()
|
||||
}
|
||||
nouns = {
|
||||
lemma.name().capitalize()
|
||||
for synset in nltk.corpus.wordnet.all_synsets(pos="n")
|
||||
for lemma in synset.lemmas()
|
||||
if lemma.name().isalpha()
|
||||
}
|
||||
|
||||
with open('data/adjectives.txt', 'w+') as file:
|
||||
for adjective in adjectives:
|
||||
file.write(str(adjective).capitalize() + '\n')
|
||||
|
||||
with open('data/nouns.txt', 'w+') as file:
|
||||
for noun in nouns:
|
||||
file.write(str(noun).capitalize() + '\n')
|
||||
os.makedirs("data", exist_ok=True)
|
||||
with open("data/adjectives.txt", "w") as adj_file:
|
||||
adj_file.write("\n".join(sorted(adjectives)))
|
||||
with open("data/nouns.txt", "w") as noun_file:
|
||||
noun_file.write("\n".join(sorted(nouns)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user