worldqwq commited on
Commit
b39d769
·
1 Parent(s): 5f10ef2

Use GPT prompt to solve sentence merging issue

Browse files

Former-commit-id: 6469c29004d96a82159e26f3f957f761005ca4ef

Files changed (1) hide show
  1. SRT.py +32 -5
SRT.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  import whisper
4
  from csv import reader
5
  import re
 
6
 
7
  class SRT_segment(object):
8
  def __init__(self, *args) -> None:
@@ -95,13 +96,36 @@ class SRT_script():
95
  def set_translation(self, translate:str, id_range:tuple):
96
  start_seg_id = id_range[0]
97
  end_seg_id = id_range[1]
98
-
99
  lines = translate.split('\n\n')
 
100
  if len(lines) != (end_seg_id - start_seg_id + 1):
101
- print(id_range)
 
102
  for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
103
- print(seg.source_text)
104
- print(translate)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
107
  # naive way to due with merge translation problem
@@ -112,7 +136,10 @@ class SRT_script():
112
  lines.remove(lines[i])
113
  if i == len(lines) - 1:
114
  break
115
- seg.translation = lines[i].split(":")[1]
 
 
 
116
  pass
117
 
118
  def split_seg(self, seg_id):
 
3
  import whisper
4
  from csv import reader
5
  import re
6
+ import openai
7
 
8
  class SRT_segment(object):
9
  def __init__(self, *args) -> None:
 
96
  def set_translation(self, translate:str, id_range:tuple):
97
  start_seg_id = id_range[0]
98
  end_seg_id = id_range[1]
99
+
100
  lines = translate.split('\n\n')
101
+
102
  if len(lines) != (end_seg_id - start_seg_id + 1):
103
+ input_str = "\n";
104
+ #initialize GPT input
105
  for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
106
+ input_str += 'Sentence %d: ' %(i+1)+ seg.source_text + '\n'
107
+ #Append to prompt string
108
+ #Adds sentence index let GPT keep track of sentence breaks
109
+ input_str += translate
110
+ #append translate to prompt
111
+ response = openai.ChatCompletion.create(
112
+ model="gpt-3.5-turbo",
113
+ messages = [
114
+ {"role": "system", "content": "You are a helpful assistant that help calibrates English to Chinese subtitle translations in starcraft2."},
115
+ {"role": "system", "content": "You are provided with a translated Chinese transcript, you need to reformat the Chinese sentence to match the meaning and sentence number as the English transcript"},
116
+ {"role": "system", "content": "There is no need for you to add any comments or notes, and do not modify the English transcript."},
117
+ {"role": "user", "content": 'Reformat the Chinese with the English transcript given: "{}"'.format(input_str)}
118
+ ],
119
+ temperature=0.15
120
+ )
121
+
122
+ translate = response['choices'][0]['text'].strip()
123
+
124
+
125
+ #print(id_range)
126
+ #for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
127
+ # print(seg.source_text)
128
+ #print(translate)
129
 
130
  for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
131
  # naive way to due with merge translation problem
 
136
  lines.remove(lines[i])
137
  if i == len(lines) - 1:
138
  break
139
+ try:
140
+ seg.translation = lines[i].split(":" or ": ")[1]
141
+ except:
142
+ seg.translation = lines[i]
143
  pass
144
 
145
  def split_seg(self, seg_id):