Improve post-format filtering

Shynee1 · Shynee1 · commit d4670c575645 · 2023-12-22T16:31:30.000-05:00
diff --git a/src/main.py b/src/main.py
@@ -10,7 +10,7 @@
 background_audio_path = "assets/background_audios/"
 output_video_path = "outputs/videos/"
 output_audio_path = "outputs/audios/"
-iterations = 2
+iterations = 1
 
 # Check if output directories are missing
 if not os.path.exists(output_video_path) or not os.path.exists(output_audio_path):
@@ -24,22 +24,25 @@
 post_grabber = PostGrabber(subreddit, category)
 tts = TikTokTTS("en_us_006")
 
-for i in range(1, iterations):
+for i in range(iterations):
     # Scrape subreddit and obtain post text
     post = post_grabber.next_post()
     text = post_grabber.get_post_text(post)
-    
-    audio_destination = output_audio_path + "audio" + str(i) + ".mp3"
-    video_desination = output_video_path + "final" + str(i) + ".mp4"
+    print(text)
+
+    file_index = len(os.listdir(output_video_path))
+    audio_destination = output_audio_path + "audio" + str(file_index) + ".mp3"
+    video_desination = output_video_path + "final" + str(file_index) + ".mp4"
 
     # Generate TTS using TikTok's text-to-speech
     tts.create_tts(text, audio_destination)
 
     # Generate and save video 
     creator = VideoCreator(text, "Impact", 65, 2)
     comp = creator.create_composition(background_video_path, background_audio_path, audio_destination)
+    print(f"Writing file to {video_desination}...")
     comp.write_videofile(video_desination, threads = 4, logger = None, fps = 60)
-    print(f"File written to {video_desination}")
+    print(f"File successfully written")
 
     creator.free_memory()
 
diff --git a/src/post_grabber.py b/src/post_grabber.py
@@ -1,6 +1,7 @@
 from praw import Reddit
 import random
 import string
+import re
 
 class PostGrabber:
     def __init__(self, subreddit_name: str, post_category: str):
@@ -49,17 +50,29 @@ def change_post_category(self, new_post_category: str):
 
     # Format post text and merge with title
     def get_post_text(self, post: str) -> str:
-        full = ((post.title + " " + post.selftext)
-                .replace("\n", " ")
-                .replace("\t", " ")
-                .replace("AITA", "Am I the Asshole")
-                .replace("SIL", "sister-in-law")
-                .replace("GF", "girlfriend")
-                .replace("”", "")
-                .replace("“", "")
-                .replace("\"", "")
-                .replace("\'", "")
-        )
+        full = post.title + " " + post.selftext
+        formatting = {
+            "[\n\t]": " ",
+            "[“”\"]": "",
+            "[-]": ".",
+            "&#x200B;": " ",
+            "aita": "am I the asshole",
+            "sil": "sister-in-law",
+            "gf": "girlfriend",
+            "cuz": "because",
+            "ffs": "for fuck's sake",
+            "imo": "in my opinion",
+            "pos": "piece-of-shit",
+            "bf": "boyfriend",
+            "bil": "brother-in-law"
+        }
+
+        # Filter any post-specific formatting or abreviations
+        for key in formatting:
+            if key[0] == '[':
+                full = re.sub(r'{0}'.format(key), formatting[key], full, flags=re.IGNORECASE)
+            else:
+                full = re.sub(r'\b{0}\b'.format(key), formatting[key], full, flags=re.IGNORECASE)
 
         # Removes last punctuation mark from the text
         if (full[-1] in string.punctuation ):
diff --git a/src/video_creator.py b/src/video_creator.py
@@ -35,7 +35,7 @@ def get_background_audio(self, path: str, duration: float) -> AudioFileClip:
     # Create subtitles using OpenAI Whisper
     def create_subtitles(self, audio_path: str) -> list[TextClip]:
         result = whisper_timestamped.transcribe(self.whisper, audio_path, compression_ratio_threshold=1.8)
-        print("Successfully transcribed video")
+        print("Transcriptions created")
         clips = []
         segments = result["segments"]
         for i in range(len(segments)):
@@ -47,7 +47,7 @@ def create_subtitles(self, audio_path: str) -> list[TextClip]:
                 clips.append(self.create_clip(word, start, end))
                 start = end
 
-        print("Successfully created subtitles")
+        print("Subtitles created")
         return clips
     
     # Create a textclip with the given duration
@@ -82,7 +82,7 @@ def create_composition(self, video_background_folder: str, audio_background_fold
         comp = comp.set_duration(tts.duration) 
         self.memory_pool.append(comp) 
 
-        print("Successfully created composition")
+        print("Composition created")
         return comp
     
     def free_memory(self):