Skip to content

Commit 72bb099

Browse files
authored
Merge pull request #4 from sudo-arash/sudo-arash-patch-1
Update program to fix unralted sentences
2 parents ecb70e2 + 083b088 commit 72bb099

File tree

3 files changed

+226
-139
lines changed

3 files changed

+226
-139
lines changed

.github/workflows/test.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
steps:
2+
- uses: actions/checkout@v4
3+
- name: Set up Python
4+
uses: actions/setup-python@v5
5+
with:
6+
python-version: '3.x'
7+
- name: Install dependencies
8+
run: |
9+
python -m pip install --upgrade pip
10+
pip install spacy requests nltk
11+
python -m spacy download en_core_web_lg
12+
- name: Test with pytest
13+
run: |
14+
pip install pytest pytest-cov
15+
pytest test.py --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html

main.py

Lines changed: 105 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
import spacy
12
import requests
23
import random
34
import logging
5+
from concurrent.futures import ThreadPoolExecutor, as_completed
6+
import nltk
7+
8+
# Load the NLP model
9+
nlp = spacy.load("en_core_web_lg")
10+
nltk.download("punkt")
411

512
# Configure logging
613
logging.basicConfig(
@@ -9,27 +16,16 @@
916
datefmt="%Y-%m-%d %H:%M:%S"
1017
)
1118

12-
# API URLs for Datamuse
1319
DATAMUSE_API = "https://api.datamuse.com/words"
1420
STOP_WORDS = {"the", "a", "an", "and", "of", "in", "on", "at", "to", "is", "for"}
1521

16-
def get_words_by_topic(topic_word, part_of_speech=None):
17-
"""Fetch words related to a given topic word using Datamuse API, optionally filtered by part of speech."""
18-
logging.info(f"Fetching related {part_of_speech or 'words'} for topic: {topic_word}")
19-
20-
params = {"ml": topic_word}
21-
if part_of_speech:
22-
params["sp"] = f"*{part_of_speech}"
23-
22+
def fetch_api_data(params):
23+
"""Helper function to fetch data from the Datamuse API."""
2424
response = requests.get(DATAMUSE_API, params=params)
25-
2625
if response.status_code == 200:
27-
data = response.json()
28-
words = [item['word'] for item in data if 'word' in item]
29-
logging.debug(f"Received {len(words)} {part_of_speech or 'words'} related to {topic_word}")
30-
return words
26+
return response.json()
3127
else:
32-
logging.error(f"Failed to fetch related words for {topic_word}. Status code: {response.status_code}")
28+
logging.error(f"API request failed with status code {response.status_code}.")
3329
return []
3430

3531
def clean_topic_input(topic_sentence):
@@ -40,13 +36,18 @@ def clean_topic_input(topic_sentence):
4036
logging.debug(f"Filtered words: {filtered_words}")
4137
return filtered_words
4238

43-
def determine_topics(words):
39+
def determine_topics(words, use_threads=False):
4440
"""Analyze the topics of filtered words using the Datamuse API."""
4541
topics = set()
4642

47-
for word in words:
48-
# Make the API call to get related topics from Datamuse
49-
try:
43+
if use_threads:
44+
with ThreadPoolExecutor() as executor:
45+
futures = [executor.submit(fetch_api_data, {"rel_trg": word}) for word in words]
46+
for future in as_completed(futures):
47+
data = future.result()
48+
topics.update([item['word'] for item in data])
49+
else:
50+
for word in words:
5051
logging.info(f"Fetching topics for word: {word}")
5152
response = requests.get(f"{DATAMUSE_API}?rel_trg={word}")
5253
if response.status_code == 200:
@@ -55,53 +56,101 @@ def determine_topics(words):
5556
logging.debug(f"Related topics found for {word}: {[item['word'] for item in data]}")
5657
else:
5758
logging.warning(f"Datamuse API error for word: {word}. Status code: {response.status_code}")
58-
except Exception as e:
59-
logging.error(f"Error fetching topics for {word}: {e}")
6059

61-
return list(topics) # Return a list of unique topics
60+
return list(topics)
6261

63-
def create_sentence(topic):
64-
"""Form a sentence from related words with a specific structure: 2 nouns, 1 verb, 2 adjectives (optional), 1 adverb."""
65-
66-
# Fetch at least 2 nouns, 1 verb, 2 adjectives (optional), and 1 adverb related to the topic
67-
nouns = get_words_by_topic(topic, 'n') or ['thing']
68-
verbs = get_words_by_topic(topic, 'v') or ['does']
69-
adjectives = get_words_by_topic(topic, 'adj') or []
70-
adverbs = get_words_by_topic(topic, 'adv') or ['']
71-
72-
if len(nouns) < 2:
73-
logging.warning(f"Not enough nouns for {topic}, filling with defaults.")
74-
nouns.extend(['object', 'entity'])
62+
def is_word_related_to_topic(word, topic):
63+
"""Check if the word is closely related to the given topic."""
64+
logging.info(f"Checking if word '{word}' is related to topic '{topic}'")
65+
response = requests.get(f"{DATAMUSE_API}?ml={topic}&max=10")
66+
if response.status_code == 200:
67+
related_words = [item['word'] for item in response.json()]
68+
return word in related_words
69+
return False
7570

76-
if not verbs:
77-
logging.warning(f"No verbs found for {topic}, filling with default.")
78-
verbs.append('acts')
71+
def get_words_by_topic(topic_word, part_of_speech=None, max_words=10, use_threads=False):
72+
"""Fetch a limited number of words related to a given topic word using Datamuse API, optionally filtered by part of speech."""
73+
logging.info(f"Fetching related {part_of_speech or 'words'} for topic: {topic_word}")
7974

80-
# Select 2 nouns, 1 verb, up to 2 adjectives, and 1 adverb to form a structured sentence
81-
chosen_nouns = random.choices(nouns, k=2)
82-
chosen_verb = random.choice(verbs)
83-
chosen_adjectives = random.choices(adjectives, k=min(2, len(adjectives)))
84-
chosen_adverb = random.choice(adverbs)
75+
params = {"ml": topic_word, "max": max_words}
76+
if part_of_speech:
77+
params["sp"] = f"*{part_of_speech}"
8578

86-
# Construct the sentence
87-
sentence = f"{chosen_adjectives[0] if chosen_adjectives else ''} {chosen_nouns[0]} {chosen_adjectives[1] if len(chosen_adjectives) > 1 else ''} {chosen_verb} {chosen_nouns[1]} {chosen_adverb}."
88-
sentence = " ".join(sentence.split()) # Clean up any extra spaces
89-
logging.debug(f"Generated sentence: {sentence}")
79+
if use_threads:
80+
with ThreadPoolExecutor() as executor:
81+
future = executor.submit(fetch_api_data, params)
82+
return [item['word'] for item in future.result() if 'word' in item]
9083

91-
return sentence.capitalize()
84+
# Sequential fetch (default)
85+
response = requests.get(DATAMUSE_API, params=params)
86+
if response.status_code == 200:
87+
return [item['word'] for item in response.json() if 'word' in item]
88+
else:
89+
logging.error(f"Failed to fetch related words for {topic_word}. Status code: {response.status_code}")
90+
return []
9291

93-
def generate_paragraphs(topics, num_paragraphs=5):
92+
def nlp_based_sentence(template_sentence, topic):
93+
"""
94+
Improve sentence structure using NLP.
95+
- Use Spacy for parsing and replacing with more natural words.
96+
- Fill sentence templates more intelligently.
97+
"""
98+
doc = nlp(template_sentence)
99+
logging.debug(f"Original Sentence Structure: {[token.text for token in doc]}")
100+
101+
# Create a dictionary of POS -> words from Datamuse API
102+
pos_map = {
103+
"NOUN": get_words_by_topic(topic, 'n', max_words=10),
104+
"VERB": get_words_by_topic(topic, 'v', max_words=5),
105+
"ADJ": get_words_by_topic(topic, 'adj', max_words=5),
106+
"ADV": get_words_by_topic(topic, 'adv', max_words=5)
107+
}
108+
109+
# Default fallback words
110+
pos_fallback = {
111+
"NOUN": ["thing", "object", "item"],
112+
"VERB": ["does", "is"],
113+
"ADJ": ["nice", "good"],
114+
"ADV": ["quickly"]
115+
}
116+
117+
# Construct the sentence by replacing each part of speech
118+
generated_sentence = []
119+
for token in doc:
120+
if token.pos_ in pos_map and pos_map[token.pos_] and pos_map[token.pos_]:
121+
generated_sentence.append(pos_map[token.pos_].pop(0)) # Sequential selection
122+
else:
123+
generated_sentence.append(token.text) # Keep the original if no word is available
124+
125+
final_sentence = " ".join(generated_sentence)
126+
logging.debug(f"Generated NLP Sentence: {final_sentence}")
127+
return final_sentence.capitalize()
128+
129+
def create_sentence(topic, use_threads=False):
130+
"""
131+
Form a sentence using NLP techniques to make it more natural.
132+
Structure: 2 nouns, 1 verb, 2 adjectives, 1 adverb.
133+
"""
134+
# Template sentence to be modified by NLP
135+
template_sentence = "The [ADJ] [NOUN] [VERB] the [NOUN] [ADV]."
136+
137+
# Use NLP to parse and replace placeholders with more natural words
138+
return nlp_based_sentence(template_sentence, topic)
139+
140+
def generate_paragraphs(topics, num_paragraphs=5, use_threads=False):
94141
"""Generate paragraphs by fetching related words for each topic and forming sentences."""
95142
paragraphs = []
96-
97143
logging.info(f"Generating {num_paragraphs} paragraphs.")
144+
98145
for _ in range(num_paragraphs):
99146
paragraph = []
100147
selected_topic = topics[random.randint(0, len(topics)-1)]
101148
logging.info(f"Generating sentences for topic: {selected_topic}")
102-
for _ in range(random.randint(2, 3)): # 2 to 3 sentences per paragraph
103-
paragraph.append(create_sentence(selected_topic))
104-
paragraphs.append(" ".join(paragraph)) # Join sentences into a paragraph
149+
150+
for _ in range(2): # Fixed at 2 sentences per paragraph
151+
paragraph.append(create_sentence(selected_topic, use_threads=use_threads))
152+
153+
paragraphs.append(" ".join(paragraph))
105154
return paragraphs
106155

107156
def main():
@@ -122,14 +171,16 @@ def main():
122171
\____ )MMMMMM| .'
123172
`-' `--' hjm
124173
""")
174+
125175
# Get user input for the topic sentence
126176
user_input = input("Enter a topic: ")
127177

128178
# Step 1: Clean the input by removing stop words (articles, conjunctions, etc.)
129179
filtered_words = clean_topic_input(user_input)
130180

131181
# Step 2: Determine topics from the filtered words
132-
topics = determine_topics(filtered_words)
182+
use_threads = input("Do you want to use threads to speed up (yes/no)? ").lower() == "yes"
183+
topics = determine_topics(filtered_words, use_threads=use_threads)
133184

134185
if not topics:
135186
logging.warning("No valid topics found. Please enter a valid input.")
@@ -138,7 +189,7 @@ def main():
138189
logging.info(f"Identified topics: {topics}")
139190

140191
# Step 3: Generate paragraphs based on the topics
141-
paragraphs = generate_paragraphs(topics)
192+
paragraphs = generate_paragraphs(topics, use_threads=use_threads)
142193

143194
# Step 4: Output the generated paragraphs
144195
logging.info("Generated paragraphs successfully.")

0 commit comments

Comments
 (0)