JiaenLiu commited on
Commit
ae0ed1b
1 Parent(s): 87a205e

add gpt3.5 and text input

Browse files

Former-commit-id: 0a72cc1d3f69a74fc35bb0981462370dbe566518

Files changed (2) hide show
  1. README.md +2 -0
  2. pipeline.py +61 -36
README.md CHANGED
@@ -16,6 +16,8 @@ example online: python3 pipeline.py --link https://www.youtube.com/watch?v=XbgFI
16
 
17
  example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
18
 
 
 
19
  options:
20
  -h, --help show this help message and exit
21
  --link LINK youtube video link here
 
16
 
17
  example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
18
 
19
+ example text input: python pipeline.py --text_file "/home/jiaenliu/project-t/results/huanghe_translation_en.txt" --result "/home/jiaenliu/project-t/results" --video_name "huanghe_test"
20
+
21
  options:
22
  -h, --help show this help message and exit
23
  --link LINK youtube video link here
pipeline.py CHANGED
@@ -4,32 +4,33 @@ import argparse
4
  import os
5
  import io
6
 
 
 
7
  parser = argparse.ArgumentParser()
8
  parser.add_argument("--link", help="youtube video link here", default=None, type=str, required=False)
9
  parser.add_argument("--local_path", help="local video path here", default=None, type=str, required=False)
 
10
  parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
11
  parser.add_argument("--result", help="translate result path", default='./results', type=str, required=False)
12
  parser.add_argument("--video_name", help="video name", default='placeholder', type=str, required=False)
 
13
  args = parser.parse_args()
14
 
15
- if args.link is None and args.local_path is None:
16
- print("need video source")
17
  exit()
18
 
19
- # openai.api_key = "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"
20
- openai.api_key = "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"
21
- # openai.api_key = os.getenv("OPENAI_API_KEY")
22
-
23
  DOWNLOAD_PATH = args.download
24
  RESULT_PATH = args.result
25
  VIDEO_NAME = args.video_name
26
- n_threshold = 5000
27
  # model_name = "text-davinci-003" # replace this to our own fintune model
28
- model_name = "gpt-3.5-turbo"
29
-
30
 
31
  # get source audio
32
- if args.link is not None:
33
  # Download audio from YouTube
34
  video_link = args.link
35
  try:
@@ -42,25 +43,33 @@ if args.link is not None:
42
  print(e)
43
  audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb")
44
  VIDEO_NAME = audio.default_filename.split('.')[0]
45
- else:
46
  # Read from local
47
  audio_file= open(args.local_path, "rb")
48
 
49
- # perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
50
- if not os.path.exists("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME)):
51
- transcript = openai.Audio.transcribe("whisper-1", audio_file)
52
- with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'w') as f:
53
- f.write(transcript['text'])
54
 
55
- # split the video script(open ai prompt limit: about 5000)
56
- with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'r') as f:
57
- script_en = f.read()
58
- N = len(script_en)
59
- script_split = script_en.split('.')
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  # Split the video script by sentences and create chunks within the token limit
62
- n_threshold = 2048 # Token limit for the GPT-3 model
63
- script_split = script_en.split('.')
64
 
65
  script_arr = []
66
  script = ""
@@ -75,17 +84,33 @@ if script.strip():
75
 
76
  # Translate and save
77
  for s in script_arr:
78
- prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n"
79
- response = openai.Completion.create(
80
- model=model_name,
81
- prompt=prompt,
82
- temperature=0.1,
83
- max_tokens=2048,
84
- top_p=1.0,
85
- frequency_penalty=0.0,
86
- presence_penalty=0.0
87
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.txt", 'a+') as f:
90
- f.write(response['choices'][0]['text'].strip())
91
- f.write('\n')
 
4
  import os
5
  import io
6
 
7
+
8
+
9
  parser = argparse.ArgumentParser()
10
  parser.add_argument("--link", help="youtube video link here", default=None, type=str, required=False)
11
  parser.add_argument("--local_path", help="local video path here", default=None, type=str, required=False)
12
+ parser.add_argument("--text_file", help="text file path here", default=None, type=str, required=False) # New argument
13
  parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
14
  parser.add_argument("--result", help="translate result path", default='./results', type=str, required=False)
15
  parser.add_argument("--video_name", help="video name", default='placeholder', type=str, required=False)
16
+ parser.add_argument("--model_name", help="model name only support text-davinci-003 and gpt-3.5-turbo", default='placeholder', type=str, required=False, default="gpt-3.5-turbo")
17
  args = parser.parse_args()
18
 
19
+ if args.link is None and args.local_path is None and args.text_file is None:
20
+ print("need video source or text file")
21
  exit()
22
 
23
+ openai.api_key = os.getenv("OPENAI_API_KEY")
 
 
 
24
  DOWNLOAD_PATH = args.download
25
  RESULT_PATH = args.result
26
  VIDEO_NAME = args.video_name
27
+ n_threshold = 1000 # Token limit for the GPT-3.5 model
28
  # model_name = "text-davinci-003" # replace this to our own fintune model
29
+ model_name = args.model_name
30
+ # model_name = "davinci"
31
 
32
  # get source audio
33
+ if args.link is not None and args.local_path is None:
34
  # Download audio from YouTube
35
  video_link = args.link
36
  try:
 
43
  print(e)
44
  audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb")
45
  VIDEO_NAME = audio.default_filename.split('.')[0]
46
+ elif args.local_path is not None:
47
  # Read from local
48
  audio_file= open(args.local_path, "rb")
49
 
 
 
 
 
 
50
 
51
+
52
+ # Instead of using the script_en variable directly, we'll use script_input
53
+ if args.text_file is not None:
54
+ with open(args.text_file, 'r') as f:
55
+ script_input = f.read()
56
+ else:
57
+ # perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
58
+ if not os.path.exists("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME)):
59
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
60
+ with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'w') as f:
61
+ f.write(transcript['text'])
62
+
63
+ # split the video script(open ai prompt limit: about 5000)
64
+ with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'r') as f:
65
+ script_en = f.read()
66
+ # N = len(script_en)
67
+ # script_split = script_en.split('.')
68
+ script_input = script_en
69
 
70
  # Split the video script by sentences and create chunks within the token limit
71
+ n_threshold = 4096 # Token limit for the GPT-3 model
72
+ script_split = script_input.split('.')
73
 
74
  script_arr = []
75
  script = ""
 
84
 
85
  # Translate and save
86
  for s in script_arr:
87
+ # using chatgpt model
88
+ if model_name == "gpt-3.5-turbo":
89
+ print(s + "\n")
90
+ response = openai.ChatCompletion.create(
91
+ model=model_name,
92
+ messages = [
93
+ {"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
94
+ {"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(s)}
95
+ ],
96
+ temperature=0.1
97
+ )
98
+ with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.txt", 'a+') as f:
99
+ f.write(response['choices'][0]['message']['content'].strip())
100
+ f.write('\n')
101
+ if model_name == "text-davinci-003":
102
+ prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n"
103
+ print(prompt)
104
+ response = openai.Completion.create(
105
+ model=model_name,
106
+ prompt=prompt,
107
+ temperature=0.1,
108
+ max_tokens=2000,
109
+ top_p=1.0,
110
+ frequency_penalty=0.0,
111
+ presence_penalty=0.0
112
+ )
113
 
114
+ with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.txt", 'a+') as f:
115
+ f.write(response['choices'][0]['text'].strip())
116
+ f.write('\n')