worldqwq commited on
Commit
a161330
1 Parent(s): 2642df2

Updated prompt solution, improved solving efficiency

Browse files

Former-commit-id: 1f607712ccfcb856435dc4fe669c51ed704a29a4

Files changed (1) hide show
  1. SRT.py +31 -20
SRT.py CHANGED
@@ -115,51 +115,62 @@ class SRT_script():
115
  start_seg_id = id_range[0]
116
  end_seg_id = id_range[1]
117
 
118
- def inner_func(input_str):
119
  response = openai.ChatCompletion.create(
120
- model=model,
 
121
  messages = [
122
- {"role": "system", "content": "You are a helpful assistant that help calibrates English to Chinese subtitle translations in starcraft2."},
123
- {"role": "system", "content": "You are provided with a translated Chinese transcript; you must modify or split the Chinese sentence to match the meaning and the number of the English transcript exactly one by one. You must not merge ANY Chinese lines, you can only split them but the total Chinese lines MUST equals to number of English lines."},
124
- {"role": "system", "content": "There is no need for you to add any comments or notes, and do not modify the English transcript."},
125
- {"role": "user", "content": 'You are given the English transcript and line number, your task is to merge or split the Chinese to match the exact number of lines in English transcript, no more no less. For example, if there are more Chinese lines than English lines, merge some the Chinese lines to match the number of English lines. If Chinese lines is less than English lines, split some Chinese lines to match the english lines: "{}"'.format(input_str)}
 
 
 
126
  ],
127
- temperature=0.7
 
128
  )
129
  return response['choices'][0]['message']['content'].strip()
130
 
 
131
  lines = translate.split('\n\n')
132
  if len(lines) < (end_seg_id - start_seg_id + 1):
133
  count = 0
134
  solved = False
135
  while count<5 and len(lines) != (end_seg_id - start_seg_id + 1):
136
-
137
  count += 1
138
  print("Solving Unmatched Lines|iteration {}".format(count))
139
- input_str = "\n"
140
  #initialize GPT input
141
- for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
142
- input_str += 'Sentence %d: ' %(i+1)+ seg.source_text + '\n'
143
- #Append to prompt string
144
- #Adds sentence index let GPT keep track of sentence breaks
145
- input_str += translate
146
  #append translate to prompt
147
-
148
  flag = True
149
  while flag:
150
  flag = False
 
 
151
  try:
152
- translate = inner_func(input_str)
 
 
153
  except Exception as e:
154
  print("An error has occurred during solving unmatched lines:",e)
155
  print("Retrying...")
156
  flag = True
157
-
158
- lines = translate.split('\n\n')
 
 
159
  if len(lines) < (end_seg_id - start_seg_id + 1):
160
  print("Failed Solving unmatched lines, Manually parse needed")
161
 
162
-
 
163
  if video_link:
164
  log_file = "./logs/log_link.csv"
165
  log_exist = os.path.exists(log_file)
@@ -193,7 +204,7 @@ class SRT_script():
193
  if i == len(lines) - 1:
194
  break
195
  try:
196
- seg.translation = lines[i].split(":" or ":")[1]
197
  except:
198
  seg.translation = lines[i]
199
  #print(lines[i])
 
115
  start_seg_id = id_range[0]
116
  end_seg_id = id_range[1]
117
 
118
+ def inner_func(target,input_str):
119
  response = openai.ChatCompletion.create(
120
+ #model=model,
121
+ model = "gpt-3.5-turbo",
122
  messages = [
123
+ #{"role": "system", "content": "You are a helpful assistant that help calibrates English to Chinese subtitle translations in starcraft2."},
124
+ #{"role": "system", "content": "You are provided with a translated Chinese transcript; you must modify or split the Chinese sentence to match the meaning and the number of the English transcript exactly one by one. You must not merge ANY Chinese lines, you can only split them but the total Chinese lines MUST equals to number of English lines."},
125
+ #{"role": "system", "content": "There is no need for you to add any comments or notes, and do not modify the English transcript."},
126
+ #{"role": "user", "content": 'You are given the English transcript and line number, your task is to merge or split the Chinese to match the exact number of lines in English transcript, no more no less. For example, if there are more Chinese lines than English lines, merge some the Chinese lines to match the number of English lines. If Chinese lines is less than English lines, split some Chinese lines to match the english lines: "{}"'.format(input_str)}
127
+ {"role": "system", "content": "你的任务是按照要求合并或拆分句子到指定行数,你需要尽可能保证句意,但必要时可以将一句话分为两行输出"},
128
+ {"role": "system", "content": "注意:你只需要输出处理过的中文句子,如果你要输出序号,请使用冒号隔开"},
129
+ {"role": "user", "content": '请将下面的句子拆分或组合为{}句:\n{}'.format(target,input_str)}
130
  ],
131
+ #temperature=0.7
132
+ temperature = 0.15
133
  )
134
  return response['choices'][0]['message']['content'].strip()
135
 
136
+
137
  lines = translate.split('\n\n')
138
  if len(lines) < (end_seg_id - start_seg_id + 1):
139
  count = 0
140
  solved = False
141
  while count<5 and len(lines) != (end_seg_id - start_seg_id + 1):
 
142
  count += 1
143
  print("Solving Unmatched Lines|iteration {}".format(count))
144
+ #input_str = "\n"
145
  #initialize GPT input
146
+ #for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
147
+ # input_str += 'Sentence %d: ' %(i+1)+ seg.source_text + '\n'
148
+ # #Append to prompt string
149
+ # #Adds sentence index let GPT keep track of sentence breaks
150
+ #input_str += translate
151
  #append translate to prompt
 
152
  flag = True
153
  while flag:
154
  flag = False
155
+ #print("translate:")
156
+ #print(translate)
157
  try:
158
+ #print("target")
159
+ #print(end_seg_id - start_seg_id + 1)
160
+ translate = inner_func(end_seg_id - start_seg_id + 1,translate)
161
  except Exception as e:
162
  print("An error has occurred during solving unmatched lines:",e)
163
  print("Retrying...")
164
  flag = True
165
+ lines = translate.split('\n')
166
+ #print("result")
167
+ #print(len(lines))
168
+
169
  if len(lines) < (end_seg_id - start_seg_id + 1):
170
  print("Failed Solving unmatched lines, Manually parse needed")
171
 
172
+ if not os.path.exists("./logs"):
173
+ os.mkdir("./logs")
174
  if video_link:
175
  log_file = "./logs/log_link.csv"
176
  log_exist = os.path.exists(log_file)
 
204
  if i == len(lines) - 1:
205
  break
206
  try:
207
+ seg.translation = lines[i].split(":" or ":" or ".")[1]
208
  except:
209
  seg.translation = lines[i]
210
  #print(lines[i])