Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

Eason Lu commited on Mar 22, 2023

Commit

0091da5

•

2 Parent(s): 5a7c441 09cabee

solve conflict

Browse files

Former-commit-id: c9d4906675aac0476496e62adf2e6482e75c0bca

Files changed (5) hide show

.gitignore +1 -1
README.md +5 -4
SRT.py +30 -0
__pycache__/{srt2ass.cpython-310.pyc → srt2ass.cpython-38.pyc} +0 -0
pipeline.py +8 -8

.gitignore CHANGED Viewed

@@ -3,4 +3,4 @@
 .DS_Store
 test.py
 test.srt
-test.txt

 .DS_Store
 test.py
 test.srt
+test.txt

README.md CHANGED Viewed

@@ -19,10 +19,11 @@ example offline:
 python3 pipeline.py --audio_file test_translation.m4a --result ./results --video_name test_translation
 ```
-example srt input:
-```
-python pipeline.py --text_file "/home/jiaenliu/project-t/results/huanghe_translation_en.txt" --result "/home/jiaenliu/project-t/results" --video_name "huanghe_test"
-```
 ## Usage
 ```

 python3 pipeline.py --audio_file test_translation.m4a --result ./results --video_name test_translation
 ```
+python3 pipeline.py --link https://www.youtube.com/watch?v=VrigMmXt9A0 --video_name Ukraine_and_its_Global_Impact
+python3 pipeline.py --video_file '/home/jiaenliu/project-t/downloads/audio/Ukraine_and_its_Global_Impact.mp4' -v --video_name Ukraine_and_its_Global_Impact
+example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
 ## Usage
 ```

SRT.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from datetime import timedelta
+import os
+import whisper
+class SRT_segment(object):
+    def __init__(self, segment) -> None:
+        self.start_time_str = str(0)+str(timedelta(seconds=int(segment['start'])))+',000'
+        self.end_time_str = str(0)+str(timedelta(seconds=int(segment['end'])))+',000'
+        self.segment_id = segment['id']+1
+        self.source_text = segment['text']
+        self.duration = f"{self.start_time_str} --> {self.end_time_str}"
+        self.translation = ""
+class SRT_script():
+    def __init__(self, segments) -> None:
+        self.segments = []
+        for seg in segments:
+            srt_seg = SRT_segment(seg)
+            self.segments.append(srt_seg)
+    def get_source_only():
+        # return a string
+        pass
+    def write_srt_file(path:str):
+        # write srt file to path
+        pass

__pycache__/{srt2ass.cpython-310.pyc → srt2ass.cpython-38.pyc} RENAMED Viewed

File without changes

pipeline.py CHANGED Viewed

@@ -2,9 +2,7 @@ import openai
 from pytube import YouTube
 import argparse
 import os
-import io
 import whisper
-import ffmpeg
 from tqdm import tqdm
 parser = argparse.ArgumentParser()
@@ -66,14 +64,12 @@ if args.link is not None and args.video_file is None:
         exit()
     video_path = f'{DOWNLOAD_PATH}/video/{video.default_filename}'
-    # video_file = open(video_path, "rb")
     audio_path = '{}/audio/{}'.format(DOWNLOAD_PATH, audio.default_filename)
     audio_file = open(audio_path, "rb")
     if VIDEO_NAME == 'placeholder':
         VIDEO_NAME = audio.default_filename.split('.')[0]
 elif args.video_file is not None:
     # Read from local
-    # video_file = open(args.video_file, "rb")
     video_path = args.video_file
     if args.audio_file is not None:
         audio_file= open(args.audio_file, "rb")
@@ -162,17 +158,17 @@ script_input_withForceTerm = re.sub('\n ', '\n', "".join(ready_words))
 # Split the video script by sentences and create chunks within the token limit
-n_threshold = 1500  # Token limit for the GPT-3 model
-script_split = script_input_withForceTerm.split('.')
 script_arr = []
 script = ""
 for sentence in script_split:
     if len(script) + len(sentence) + 1 <= n_threshold:
-        script += sentence + '.'
     else:
         script_arr.append(script.strip())
-        script = sentence + '.'
 if script.strip():
     script_arr.append(script.strip())
@@ -185,12 +181,15 @@ for s in tqdm(script_arr):
             model=model_name,
             messages = [
                 {"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
                 {"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(s)}
             ],
             temperature=0.15
         )
         with open(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", 'a+') as f:
             f.write(response['choices'][0]['message']['content'].strip())
     if model_name == "text-davinci-003":
         prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n"
@@ -207,6 +206,7 @@ for s in tqdm(script_arr):
         with open(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", 'a+') as f:
             f.write(response['choices'][0]['text'].strip())
 if not args.only_srt:
     assSub_zh = srt2ass(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", "default", "No", "Modest")

 from pytube import YouTube
 import argparse
 import os
 import whisper
 from tqdm import tqdm
 parser = argparse.ArgumentParser()
         exit()
     video_path = f'{DOWNLOAD_PATH}/video/{video.default_filename}'
     audio_path = '{}/audio/{}'.format(DOWNLOAD_PATH, audio.default_filename)
     audio_file = open(audio_path, "rb")
     if VIDEO_NAME == 'placeholder':
         VIDEO_NAME = audio.default_filename.split('.')[0]
 elif args.video_file is not None:
     # Read from local
     video_path = args.video_file
     if args.audio_file is not None:
         audio_file= open(args.audio_file, "rb")
 # Split the video script by sentences and create chunks within the token limit
+n_threshold = 1000  # Token limit for the GPT-3 model
+script_split = script_input.split('\n')
 script_arr = []
 script = ""
 for sentence in script_split:
     if len(script) + len(sentence) + 1 <= n_threshold:
+        script += sentence + '\n'
     else:
         script_arr.append(script.strip())
+        script = sentence + '\n'
 if script.strip():
     script_arr.append(script.strip())
             model=model_name,
             messages = [
                 {"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
+                {"role": "system", "content": "Your translation has to keep the orginal format and be as accurate as possible."},
+                {"role": "system", "content": "There is no need for you to add any comments or notes."},
                 {"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(s)}
             ],
             temperature=0.15
         )
         with open(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", 'a+') as f:
             f.write(response['choices'][0]['message']['content'].strip())
+            f.write("\n")
     if model_name == "text-davinci-003":
         prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n"
         with open(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", 'a+') as f:
             f.write(response['choices'][0]['text'].strip())
+            f.write("\n")
 if not args.only_srt:
     assSub_zh = srt2ass(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", "default", "No", "Modest")