worldqwq commited on
Commit
ce7a58b
1 Parent(s): 6273515

Added auto retry

Browse files

Former-commit-id: ea5db6f694a99db696b50e7b9546d59c9e97d7fb

Files changed (2) hide show
  1. SRT.py +26 -11
  2. pipeline.py +19 -7
SRT.py CHANGED
@@ -106,11 +106,26 @@ class SRT_script():
106
  segments.append(self.merge_segs(idx_list))
107
 
108
  self.segments = segments # need memory release?
 
 
109
 
110
  def set_translation(self, translate:str, id_range:tuple):
111
  start_seg_id = id_range[0]
112
  end_seg_id = id_range[1]
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  lines = translate.split('\n\n')
115
  if len(lines) < (end_seg_id - start_seg_id + 1):
116
  count = 0
@@ -126,17 +141,17 @@ class SRT_script():
126
  #Adds sentence index let GPT keep track of sentence breaks
127
  input_str += translate
128
  #append translate to prompt
129
- response = openai.ChatCompletion.create(
130
- model="gpt-3.5-turbo",
131
- messages = [
132
- {"role": "system", "content": "You are a helpful assistant that help calibrates English to Chinese subtitle translations in starcraft2."},
133
- {"role": "system", "content": "You are provided with a translated Chinese transcript; you must modify or split the Chinese sentence to match the meaning and the number of the English transcript exactly one by one. You must not merge ANY Chinese lines, you can only split them but the total Chinese lines MUST equals to number of English lines."},
134
- {"role": "system", "content": "There is no need for you to add any comments or notes, and do not modify the English transcript."},
135
- {"role": "user", "content": 'You are given the English transcript and line number, your task is to merge or split the Chinese to match the exact number of lines in English transcript, no more no less. For example, if there are more Chinese lines than English lines, merge some the Chinese lines to match the number of English lines. If Chinese lines is less than English lines, split some Chinese lines to match the english lines: "{}"'.format(input_str)}
136
- ],
137
- temperature=0.7
138
- )
139
- translate = response['choices'][0]['message']['content'].strip()
140
  lines = translate.split('\n\n')
141
  if len(lines) < (end_seg_id - start_seg_id + 1):
142
  print("Failed Solving unmatched lines, Manually parse needed")
 
106
  segments.append(self.merge_segs(idx_list))
107
 
108
  self.segments = segments # need memory release?
109
+
110
+
111
 
112
  def set_translation(self, translate:str, id_range:tuple):
113
  start_seg_id = id_range[0]
114
  end_seg_id = id_range[1]
115
 
116
+ def inner_func(input_str):
117
+ response = openai.ChatCompletion.create(
118
+ model="gpt-3.5-turbo",
119
+ messages = [
120
+ {"role": "system", "content": "You are a helpful assistant that help calibrates English to Chinese subtitle translations in starcraft2."},
121
+ {"role": "system", "content": "You are provided with a translated Chinese transcript; you must modify or split the Chinese sentence to match the meaning and the number of the English transcript exactly one by one. You must not merge ANY Chinese lines, you can only split them but the total Chinese lines MUST equals to number of English lines."},
122
+ {"role": "system", "content": "There is no need for you to add any comments or notes, and do not modify the English transcript."},
123
+ {"role": "user", "content": 'You are given the English transcript and line number, your task is to merge or split the Chinese to match the exact number of lines in English transcript, no more no less. For example, if there are more Chinese lines than English lines, merge some the Chinese lines to match the number of English lines. If Chinese lines is less than English lines, split some Chinese lines to match the english lines: "{}"'.format(input_str)}
124
+ ],
125
+ temperature=0.7
126
+ )
127
+ return response['choices'][0]['message']['content'].strip()
128
+
129
  lines = translate.split('\n\n')
130
  if len(lines) < (end_seg_id - start_seg_id + 1):
131
  count = 0
 
141
  #Adds sentence index let GPT keep track of sentence breaks
142
  input_str += translate
143
  #append translate to prompt
144
+
145
+ flag = True
146
+ while flag:
147
+ flag = False
148
+ try:
149
+ translate = inner_func(input_str)
150
+ except Exception as e:
151
+ print("An error has occurred during solving unmatched lines:",e)
152
+ print("Retrying...")
153
+ flag = True
154
+
155
  lines = translate.split('\n\n')
156
  if len(lines) < (end_seg_id - start_seg_id + 1):
157
  print("Failed Solving unmatched lines, Manually parse needed")
pipeline.py CHANGED
@@ -163,10 +163,7 @@ def script_split(script_in, chunk_size = 1000):
163
 
164
  script_arr, range_arr = script_split(script_input)
165
 
166
- # Translate and save
167
- for s, range in tqdm(zip(script_arr, range_arr)):
168
- # using chatgpt model
169
- print(f"now translating sentences {range}")
170
  if model_name == "gpt-3.5-turbo":
171
  # print(s + "\n")
172
  response = openai.ChatCompletion.create(
@@ -180,7 +177,7 @@ for s, range in tqdm(zip(script_arr, range_arr)):
180
  temperature=0.15
181
  )
182
 
183
- translate = response['choices'][0]['message']['content'].strip()
184
 
185
  if model_name == "text-davinci-003":
186
  prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n"
@@ -194,8 +191,23 @@ for s, range in tqdm(zip(script_arr, range_arr)):
194
  frequency_penalty=0.0,
195
  presence_penalty=0.0
196
  )
197
- translate = response['choices'][0]['text'].strip()
198
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  srt.set_translation(translate, range)
200
 
201
  srt.check_len_and_split()
 
163
 
164
  script_arr, range_arr = script_split(script_input)
165
 
166
+ def get_response(model_name):
 
 
 
167
  if model_name == "gpt-3.5-turbo":
168
  # print(s + "\n")
169
  response = openai.ChatCompletion.create(
 
177
  temperature=0.15
178
  )
179
 
180
+ return response['choices'][0]['message']['content'].strip()
181
 
182
  if model_name == "text-davinci-003":
183
  prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n"
 
191
  frequency_penalty=0.0,
192
  presence_penalty=0.0
193
  )
194
+ return response['choices'][0]['text'].strip()
195
+ pass
196
+
197
+
198
+ # Translate and save
199
+ for s, range in tqdm(zip(script_arr, range_arr)):
200
+ # using chatgpt model
201
+ print(f"now translating sentences {range}")
202
+ flag = True
203
+ while flag:
204
+ flag = False
205
+ try:
206
+ translate = get_response(model_name)
207
+ except Exception as e:
208
+ print("An error has occurred during translation:",e)
209
+ print("Retrying...")
210
+ flag = True
211
  srt.set_translation(translate, range)
212
 
213
  srt.check_len_and_split()