1
+ import spacy
1
2
import requests
2
3
import random
3
4
import logging
5
+ from concurrent .futures import ThreadPoolExecutor , as_completed
6
+ import nltk
7
+
8
+ # Load the NLP model
9
+ nlp = spacy .load ("en_core_web_lg" )
10
+ nltk .download ("punkt" )
4
11
5
12
# Configure logging
6
13
logging .basicConfig (
9
16
datefmt = "%Y-%m-%d %H:%M:%S"
10
17
)
11
18
12
- # API URLs for Datamuse
13
19
DATAMUSE_API = "https://api.datamuse.com/words"
14
20
STOP_WORDS = {"the" , "a" , "an" , "and" , "of" , "in" , "on" , "at" , "to" , "is" , "for" }
15
21
16
- def get_words_by_topic (topic_word , part_of_speech = None ):
17
- """Fetch words related to a given topic word using Datamuse API, optionally filtered by part of speech."""
18
- logging .info (f"Fetching related { part_of_speech or 'words' } for topic: { topic_word } " )
19
-
20
- params = {"ml" : topic_word }
21
- if part_of_speech :
22
- params ["sp" ] = f"*{ part_of_speech } "
23
-
22
+ def fetch_api_data (params ):
23
+ """Helper function to fetch data from the Datamuse API."""
24
24
response = requests .get (DATAMUSE_API , params = params )
25
-
26
25
if response .status_code == 200 :
27
- data = response .json ()
28
- words = [item ['word' ] for item in data if 'word' in item ]
29
- logging .debug (f"Received { len (words )} { part_of_speech or 'words' } related to { topic_word } " )
30
- return words
26
+ return response .json ()
31
27
else :
32
- logging .error (f"Failed to fetch related words for { topic_word } . Status code: { response .status_code } " )
28
+ logging .error (f"API request failed with status code { response .status_code } . " )
33
29
return []
34
30
35
31
def clean_topic_input (topic_sentence ):
@@ -40,13 +36,18 @@ def clean_topic_input(topic_sentence):
40
36
logging .debug (f"Filtered words: { filtered_words } " )
41
37
return filtered_words
42
38
43
- def determine_topics (words ):
39
+ def determine_topics (words , use_threads = False ):
44
40
"""Analyze the topics of filtered words using the Datamuse API."""
45
41
topics = set ()
46
42
47
- for word in words :
48
- # Make the API call to get related topics from Datamuse
49
- try :
43
+ if use_threads :
44
+ with ThreadPoolExecutor () as executor :
45
+ futures = [executor .submit (fetch_api_data , {"rel_trg" : word }) for word in words ]
46
+ for future in as_completed (futures ):
47
+ data = future .result ()
48
+ topics .update ([item ['word' ] for item in data ])
49
+ else :
50
+ for word in words :
50
51
logging .info (f"Fetching topics for word: { word } " )
51
52
response = requests .get (f"{ DATAMUSE_API } ?rel_trg={ word } " )
52
53
if response .status_code == 200 :
@@ -55,53 +56,101 @@ def determine_topics(words):
55
56
logging .debug (f"Related topics found for { word } : { [item ['word' ] for item in data ]} " )
56
57
else :
57
58
logging .warning (f"Datamuse API error for word: { word } . Status code: { response .status_code } " )
58
- except Exception as e :
59
- logging .error (f"Error fetching topics for { word } : { e } " )
60
59
61
- return list (topics ) # Return a list of unique topics
60
+ return list (topics )
62
61
63
- def create_sentence (topic ):
64
- """Form a sentence from related words with a specific structure: 2 nouns, 1 verb, 2 adjectives (optional), 1 adverb."""
65
-
66
- # Fetch at least 2 nouns, 1 verb, 2 adjectives (optional), and 1 adverb related to the topic
67
- nouns = get_words_by_topic (topic , 'n' ) or ['thing' ]
68
- verbs = get_words_by_topic (topic , 'v' ) or ['does' ]
69
- adjectives = get_words_by_topic (topic , 'adj' ) or []
70
- adverbs = get_words_by_topic (topic , 'adv' ) or ['' ]
71
-
72
- if len (nouns ) < 2 :
73
- logging .warning (f"Not enough nouns for { topic } , filling with defaults." )
74
- nouns .extend (['object' , 'entity' ])
62
+ def is_word_related_to_topic (word , topic ):
63
+ """Check if the word is closely related to the given topic."""
64
+ logging .info (f"Checking if word '{ word } ' is related to topic '{ topic } '" )
65
+ response = requests .get (f"{ DATAMUSE_API } ?ml={ topic } &max=10" )
66
+ if response .status_code == 200 :
67
+ related_words = [item ['word' ] for item in response .json ()]
68
+ return word in related_words
69
+ return False
75
70
76
- if not verbs :
77
- logging . warning ( f"No verbs found for { topic } , filling with default." )
78
- verbs . append ( 'acts' )
71
+ def get_words_by_topic ( topic_word , part_of_speech = None , max_words = 10 , use_threads = False ) :
72
+ """Fetch a limited number of words related to a given topic word using Datamuse API, optionally filtered by part of speech."""
73
+ logging . info ( f"Fetching related { part_of_speech or 'words' } for topic: { topic_word } " )
79
74
80
- # Select 2 nouns, 1 verb, up to 2 adjectives, and 1 adverb to form a structured sentence
81
- chosen_nouns = random .choices (nouns , k = 2 )
82
- chosen_verb = random .choice (verbs )
83
- chosen_adjectives = random .choices (adjectives , k = min (2 , len (adjectives )))
84
- chosen_adverb = random .choice (adverbs )
75
+ params = {"ml" : topic_word , "max" : max_words }
76
+ if part_of_speech :
77
+ params ["sp" ] = f"*{ part_of_speech } "
85
78
86
- # Construct the sentence
87
- sentence = f" { chosen_adjectives [ 0 ] if chosen_adjectives else '' } { chosen_nouns [ 0 ] } { chosen_adjectives [ 1 ] if len ( chosen_adjectives ) > 1 else '' } { chosen_verb } { chosen_nouns [ 1 ] } { chosen_adverb } ."
88
- sentence = " " . join ( sentence . split ()) # Clean up any extra spaces
89
- logging . debug ( f"Generated sentence: { sentence } " )
79
+ if use_threads :
80
+ with ThreadPoolExecutor () as executor :
81
+ future = executor . submit ( fetch_api_data , params )
82
+ return [ item [ 'word' ] for item in future . result () if 'word' in item ]
90
83
91
- return sentence .capitalize ()
84
+ # Sequential fetch (default)
85
+ response = requests .get (DATAMUSE_API , params = params )
86
+ if response .status_code == 200 :
87
+ return [item ['word' ] for item in response .json () if 'word' in item ]
88
+ else :
89
+ logging .error (f"Failed to fetch related words for { topic_word } . Status code: { response .status_code } " )
90
+ return []
92
91
93
- def generate_paragraphs (topics , num_paragraphs = 5 ):
92
+ def nlp_based_sentence (template_sentence , topic ):
93
+ """
94
+ Improve sentence structure using NLP.
95
+ - Use Spacy for parsing and replacing with more natural words.
96
+ - Fill sentence templates more intelligently.
97
+ """
98
+ doc = nlp (template_sentence )
99
+ logging .debug (f"Original Sentence Structure: { [token .text for token in doc ]} " )
100
+
101
+ # Create a dictionary of POS -> words from Datamuse API
102
+ pos_map = {
103
+ "NOUN" : get_words_by_topic (topic , 'n' , max_words = 10 ),
104
+ "VERB" : get_words_by_topic (topic , 'v' , max_words = 5 ),
105
+ "ADJ" : get_words_by_topic (topic , 'adj' , max_words = 5 ),
106
+ "ADV" : get_words_by_topic (topic , 'adv' , max_words = 5 )
107
+ }
108
+
109
+ # Default fallback words
110
+ pos_fallback = {
111
+ "NOUN" : ["thing" , "object" , "item" ],
112
+ "VERB" : ["does" , "is" ],
113
+ "ADJ" : ["nice" , "good" ],
114
+ "ADV" : ["quickly" ]
115
+ }
116
+
117
+ # Construct the sentence by replacing each part of speech
118
+ generated_sentence = []
119
+ for token in doc :
120
+ if token .pos_ in pos_map and pos_map [token .pos_ ] and pos_map [token .pos_ ]:
121
+ generated_sentence .append (pos_map [token .pos_ ].pop (0 )) # Sequential selection
122
+ else :
123
+ generated_sentence .append (token .text ) # Keep the original if no word is available
124
+
125
+ final_sentence = " " .join (generated_sentence )
126
+ logging .debug (f"Generated NLP Sentence: { final_sentence } " )
127
+ return final_sentence .capitalize ()
128
+
129
+ def create_sentence (topic , use_threads = False ):
130
+ """
131
+ Form a sentence using NLP techniques to make it more natural.
132
+ Structure: 2 nouns, 1 verb, 2 adjectives, 1 adverb.
133
+ """
134
+ # Template sentence to be modified by NLP
135
+ template_sentence = "The [ADJ] [NOUN] [VERB] the [NOUN] [ADV]."
136
+
137
+ # Use NLP to parse and replace placeholders with more natural words
138
+ return nlp_based_sentence (template_sentence , topic )
139
+
140
+ def generate_paragraphs (topics , num_paragraphs = 5 , use_threads = False ):
94
141
"""Generate paragraphs by fetching related words for each topic and forming sentences."""
95
142
paragraphs = []
96
-
97
143
logging .info (f"Generating { num_paragraphs } paragraphs." )
144
+
98
145
for _ in range (num_paragraphs ):
99
146
paragraph = []
100
147
selected_topic = topics [random .randint (0 , len (topics )- 1 )]
101
148
logging .info (f"Generating sentences for topic: { selected_topic } " )
102
- for _ in range (random .randint (2 , 3 )): # 2 to 3 sentences per paragraph
103
- paragraph .append (create_sentence (selected_topic ))
104
- paragraphs .append (" " .join (paragraph )) # Join sentences into a paragraph
149
+
150
+ for _ in range (2 ): # Fixed at 2 sentences per paragraph
151
+ paragraph .append (create_sentence (selected_topic , use_threads = use_threads ))
152
+
153
+ paragraphs .append (" " .join (paragraph ))
105
154
return paragraphs
106
155
107
156
def main ():
@@ -122,14 +171,16 @@ def main():
122
171
\____ )MMMMMM| .'
123
172
`-' `--' hjm
124
173
""" )
174
+
125
175
# Get user input for the topic sentence
126
176
user_input = input ("Enter a topic: " )
127
177
128
178
# Step 1: Clean the input by removing stop words (articles, conjunctions, etc.)
129
179
filtered_words = clean_topic_input (user_input )
130
180
131
181
# Step 2: Determine topics from the filtered words
132
- topics = determine_topics (filtered_words )
182
+ use_threads = input ("Do you want to use threads to speed up (yes/no)? " ).lower () == "yes"
183
+ topics = determine_topics (filtered_words , use_threads = use_threads )
133
184
134
185
if not topics :
135
186
logging .warning ("No valid topics found. Please enter a valid input." )
@@ -138,7 +189,7 @@ def main():
138
189
logging .info (f"Identified topics: { topics } " )
139
190
140
191
# Step 3: Generate paragraphs based on the topics
141
- paragraphs = generate_paragraphs (topics )
192
+ paragraphs = generate_paragraphs (topics , use_threads = use_threads )
142
193
143
194
# Step 4: Output the generated paragraphs
144
195
logging .info ("Generated paragraphs successfully." )
0 commit comments