Spaces:
Sleeping
Sleeping
Yuhan-Lu
commited on
Commit
·
3e39830
1
Parent(s):
f76455f
merge sentence issue, rerun 5 times
Browse filesFormer-commit-id: b807248fae583ddbecee84cd9a5b8ffdc2888cc8
- pipeline.py +42 -1
pipeline.py
CHANGED
@@ -24,7 +24,7 @@ def parse_args():
|
|
24 |
parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
|
25 |
parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
|
26 |
parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False)
|
27 |
-
parser.add_argument("--model_name", help="model name only support gpt-4 and gpt-3.5-turbo", type=str, required=False, default="gpt-
|
28 |
parser.add_argument("--log_dir", help="log path", default='./logs', type=str, required=False)
|
29 |
parser.add_argument("-only_srt", help="set script output to only .srt file", action='store_true')
|
30 |
parser.add_argument("-v", help="auto encode script with video", action='store_true')
|
@@ -186,8 +186,33 @@ def script_split(script_in, chunk_size = 1000):
|
|
186 |
# print('temp_contents')
|
187 |
# print(srt.get_source_only())
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
def get_response(model_name, sentence):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
if model_name == "gpt-3.5-turbo" or model_name == "gpt-4":
|
192 |
response = openai.ChatCompletion.create(
|
193 |
model=model_name,
|
@@ -223,12 +248,28 @@ def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
|
|
223 |
flag = False
|
224 |
try:
|
225 |
translate = get_response(model_name, sentence)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
except Exception as e:
|
227 |
logging.debug("An error has occurred during translation:",e)
|
228 |
print("An error has occurred during translation:",e)
|
229 |
print("Retrying... the script will continue after 30 seconds.")
|
230 |
time.sleep(30)
|
231 |
flag = True
|
|
|
232 |
srt.set_translation(translate, range, model_name, video_name, video_link)
|
233 |
|
234 |
|
|
|
24 |
parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
|
25 |
parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
|
26 |
parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False)
|
27 |
+
parser.add_argument("--model_name", help="model name only support gpt-4 and gpt-3.5-turbo", type=str, required=False, default="gpt-3.5-turbo") # default change to gpt-4
|
28 |
parser.add_argument("--log_dir", help="log path", default='./logs', type=str, required=False)
|
29 |
parser.add_argument("-only_srt", help="set script output to only .srt file", action='store_true')
|
30 |
parser.add_argument("-v", help="auto encode script with video", action='store_true')
|
|
|
186 |
# print('temp_contents')
|
187 |
# print(srt.get_source_only())
|
188 |
|
189 |
+
def check_translation(sentence, translation):
|
190 |
+
"""
|
191 |
+
check merge sentence issue from openai translation
|
192 |
+
"""
|
193 |
+
sentence_count = sentence.count('\n\n') + 1
|
194 |
+
translation_count = translation.count('\n\n') + 1
|
195 |
+
print("sentence length: ", len(sentence), sentence_count)
|
196 |
+
print("translation length: ", len(translation), translation_count)
|
197 |
+
|
198 |
+
if sentence_count != translation_count:
|
199 |
+
return False
|
200 |
+
else:
|
201 |
+
return True
|
202 |
+
|
203 |
|
204 |
def get_response(model_name, sentence):
|
205 |
+
"""
|
206 |
+
Generates a translated response for a given sentence using a specified OpenAI model.
|
207 |
+
|
208 |
+
Args:
|
209 |
+
model_name (str): The name of the OpenAI model to be used for translation, either "gpt-3.5-turbo" or "gpt-4".
|
210 |
+
sentence (str): The English sentence related to StarCraft 2 videos that needs to be translated into Chinese.
|
211 |
+
|
212 |
+
Returns:
|
213 |
+
str: The translated Chinese sentence, maintaining the original format, meaning, and number of lines.
|
214 |
+
"""
|
215 |
+
|
216 |
if model_name == "gpt-3.5-turbo" or model_name == "gpt-4":
|
217 |
response = openai.ChatCompletion.create(
|
218 |
model=model_name,
|
|
|
248 |
flag = False
|
249 |
try:
|
250 |
translate = get_response(model_name, sentence)
|
251 |
+
# detect merge sentence issue and try to solve for five times:
|
252 |
+
attempt_left = 5
|
253 |
+
while not check_translation(sentence, translate) and attempt_left > 0:
|
254 |
+
translate = get_response(model_name, sentence)
|
255 |
+
attempt_left -= 1
|
256 |
+
|
257 |
+
# if failure still happen, split into smaller tokens
|
258 |
+
if attempt_left == 0:
|
259 |
+
single_sentences = sentence.split("\n\n")
|
260 |
+
print("merge sentence issue found: ", len(single_sentences), len(translate), single_sentences, translate)
|
261 |
+
translate = ""
|
262 |
+
for single_sentence in single_sentences:
|
263 |
+
translate += get_response(model_name, single_sentence) + "\n\n"
|
264 |
+
print("after correction: ", "chinese length: ", len(translate), translate)
|
265 |
+
# print(type(translate))
|
266 |
except Exception as e:
|
267 |
logging.debug("An error has occurred during translation:",e)
|
268 |
print("An error has occurred during translation:",e)
|
269 |
print("Retrying... the script will continue after 30 seconds.")
|
270 |
time.sleep(30)
|
271 |
flag = True
|
272 |
+
|
273 |
srt.set_translation(translate, range, model_name, video_name, video_link)
|
274 |
|
275 |
|