33 lines
977 B
Python
33 lines
977 B
Python
import nltk
|
|
import os
|
|
|
|
def main():
|
|
# Load the corpora
|
|
nltk.download('wordnet')
|
|
nltk.download('averaged_perceptron_tagger')
|
|
nltk.download('universal_tagset')
|
|
|
|
# Get all the adjectives and nouns
|
|
adjectives = set()
|
|
nouns = set()
|
|
for synset in nltk.corpus.wordnet.all_synsets():
|
|
for lemma in synset.lemmas():
|
|
if lemma.name().isalpha():
|
|
if synset.pos() == 'a':
|
|
adjectives.add(lemma.name())
|
|
elif synset.pos() == 'n':
|
|
nouns.add(lemma.name())
|
|
|
|
# Create the output directory if it doesn't exist
|
|
os.makedirs('data', exist_ok=True)
|
|
|
|
with open('data/adjectives.txt', 'w+') as file:
|
|
for adjective in adjectives:
|
|
file.write(str(adjective).capitalize() + '\n')
|
|
|
|
with open('data/nouns.txt', 'w+') as file:
|
|
for noun in nouns:
|
|
file.write(str(noun).capitalize() + '\n')
|
|
|
|
if __name__ == '__main__':
|
|
main() |