Spaces:
Sleeping
Sleeping
worldqwq
commited on
Commit
·
b39d769
1
Parent(s):
5f10ef2
Use GPT prompt to solve sentence merging issue
Browse filesFormer-commit-id: 6469c29004d96a82159e26f3f957f761005ca4ef
SRT.py
CHANGED
@@ -3,6 +3,7 @@ import os
|
|
3 |
import whisper
|
4 |
from csv import reader
|
5 |
import re
|
|
|
6 |
|
7 |
class SRT_segment(object):
|
8 |
def __init__(self, *args) -> None:
|
@@ -95,13 +96,36 @@ class SRT_script():
|
|
95 |
def set_translation(self, translate:str, id_range:tuple):
|
96 |
start_seg_id = id_range[0]
|
97 |
end_seg_id = id_range[1]
|
98 |
-
|
99 |
lines = translate.split('\n\n')
|
|
|
100 |
if len(lines) != (end_seg_id - start_seg_id + 1):
|
101 |
-
|
|
|
102 |
for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
|
107 |
# naive way to due with merge translation problem
|
@@ -112,7 +136,10 @@ class SRT_script():
|
|
112 |
lines.remove(lines[i])
|
113 |
if i == len(lines) - 1:
|
114 |
break
|
115 |
-
|
|
|
|
|
|
|
116 |
pass
|
117 |
|
118 |
def split_seg(self, seg_id):
|
|
|
3 |
import whisper
|
4 |
from csv import reader
|
5 |
import re
|
6 |
+
import openai
|
7 |
|
8 |
class SRT_segment(object):
|
9 |
def __init__(self, *args) -> None:
|
|
|
96 |
def set_translation(self, translate:str, id_range:tuple):
|
97 |
start_seg_id = id_range[0]
|
98 |
end_seg_id = id_range[1]
|
99 |
+
|
100 |
lines = translate.split('\n\n')
|
101 |
+
|
102 |
if len(lines) != (end_seg_id - start_seg_id + 1):
|
103 |
+
input_str = "\n";
|
104 |
+
#initialize GPT input
|
105 |
for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
|
106 |
+
input_str += 'Sentence %d: ' %(i+1)+ seg.source_text + '\n'
|
107 |
+
#Append to prompt string
|
108 |
+
#Adds sentence index let GPT keep track of sentence breaks
|
109 |
+
input_str += translate
|
110 |
+
#append translate to prompt
|
111 |
+
response = openai.ChatCompletion.create(
|
112 |
+
model="gpt-3.5-turbo",
|
113 |
+
messages = [
|
114 |
+
{"role": "system", "content": "You are a helpful assistant that help calibrates English to Chinese subtitle translations in starcraft2."},
|
115 |
+
{"role": "system", "content": "You are provided with a translated Chinese transcript, you need to reformat the Chinese sentence to match the meaning and sentence number as the English transcript"},
|
116 |
+
{"role": "system", "content": "There is no need for you to add any comments or notes, and do not modify the English transcript."},
|
117 |
+
{"role": "user", "content": 'Reformat the Chinese with the English transcript given: "{}"'.format(input_str)}
|
118 |
+
],
|
119 |
+
temperature=0.15
|
120 |
+
)
|
121 |
+
|
122 |
+
translate = response['choices'][0]['text'].strip()
|
123 |
+
|
124 |
+
|
125 |
+
#print(id_range)
|
126 |
+
#for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
|
127 |
+
# print(seg.source_text)
|
128 |
+
#print(translate)
|
129 |
|
130 |
for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
|
131 |
# naive way to due with merge translation problem
|
|
|
136 |
lines.remove(lines[i])
|
137 |
if i == len(lines) - 1:
|
138 |
break
|
139 |
+
try:
|
140 |
+
seg.translation = lines[i].split(":" or ": ")[1]
|
141 |
+
except:
|
142 |
+
seg.translation = lines[i]
|
143 |
pass
|
144 |
|
145 |
def split_seg(self, seg_id):
|