LaoCzi commited on
Commit
a8d6167
1 Parent(s): e702c5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -13
app.py CHANGED
@@ -6,7 +6,6 @@ from urllib.parse import urlparse, parse_qs
6
  from requests.structures import CaseInsensitiveDict
7
 
8
  openai.api_key = os.getenv("OPENAI_KEY")
9
-
10
  def Prompt_T(context, lang):
11
 
12
  prompt = """I want you to act as a content writer who is working with youtube video transcript. Summarise the following text in 40 words:
@@ -30,6 +29,43 @@ def Prompt_T(context, lang):
30
  return prompt
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def split_string(string, chunk_size):
34
  return [string[i:i+chunk_size] for i in range(0, len(string), chunk_size)]
35
 
@@ -54,7 +90,7 @@ def generate_video_html(video_url, request: gr.Request):
54
  #Пробуем извлеч video_id пока на английском
55
  video_id = video_url[-11:]
56
 
57
- html_embed='<iframe width="450" height="250" src="https://www.youtube.com/embed/'+ video_id +'" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>'
58
  return html_embed
59
 
60
 
@@ -93,17 +129,19 @@ def generate(video_url, request: gr.Request):
93
  except Exception as e:
94
  return "No access for transcript"
95
 
96
-
97
  finalString = ""
98
  for item in t:
99
  text = item['text']
100
 
101
  finalString += text + " "
102
 
103
-
104
- print("Transcript:",finalString)
105
  print("Transcript lenght:",len(finalString))
106
  print ("===============================================")
 
 
107
  input_string = finalString
108
 
109
  chunk_size = 4000
@@ -111,19 +149,17 @@ def generate(video_url, request: gr.Request):
111
  if (lang_video=="uk"): chunk_size = 2000
112
 
113
  result_list = split_string(input_string, chunk_size)
 
114
  final_answer_gpt=""
115
  count= 0
116
 
117
- print("++++++++++++++++++++++++++++++++++++++")
118
- for item in result_list:
119
- print(item)
120
-
121
-
122
  for item in result_list:
123
- count = count +1
124
- context = item
 
125
  input_gpt = Prompt_T(context,lang_video)
126
- final_answer_gpt = final_answer_gpt +"<p>" + gpt_api (input_gpt)+"</p>"
 
127
  html_content="<h6>"+"<br>"+final_answer_gpt+"</h6>"
128
  yield html_content
129
 
 
6
  from requests.structures import CaseInsensitiveDict
7
 
8
  openai.api_key = os.getenv("OPENAI_KEY")
 
9
  def Prompt_T(context, lang):
10
 
11
  prompt = """I want you to act as a content writer who is working with youtube video transcript. Summarise the following text in 40 words:
 
29
  return prompt
30
 
31
 
32
+ def convert_seconds(seconds):
33
+ seconds = round(seconds)
34
+ minutes = seconds // 60
35
+ hours = minutes // 60
36
+ minutes = minutes % 60
37
+ seconds = seconds % 60
38
+ if (int(hours) > 0 ): time_m= str(hours) + ":" + str(minutes) + ":" + str(seconds)
39
+ else : time_m = str(minutes) + ":" + str(seconds)
40
+ return time_m
41
+
42
+ def get_transcript(video_id, lang_video, chunk_size):
43
+ global final_string
44
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
45
+
46
+ t = YouTubeTranscriptApi.get_transcript(video_id,languages=[lang_video])
47
+
48
+ finalString = ""
49
+ result_text_duration=[]
50
+ text_duration=""
51
+
52
+ prev_start=0.0
53
+ for item in t:
54
+ text = item['text']
55
+ finalString += text + " "
56
+
57
+ text_duration = text_duration + " " + item['text']
58
+ if (len(text_duration)>chunk_size):
59
+ new_item = {'text': prev_text_duration, 'start': prev_start}
60
+ result_text_duration.append(new_item)
61
+ text_duration=""
62
+ prev_start=item['start']
63
+ prev_text_duration=text_duration
64
+
65
+ new_item = {'text': text_duration, 'start': prev_start}
66
+ result_text_duration.append(new_item)
67
+ return result_text_duration
68
+
69
  def split_string(string, chunk_size):
70
  return [string[i:i+chunk_size] for i in range(0, len(string), chunk_size)]
71
 
 
90
  #Пробуем извлеч video_id пока на английском
91
  video_id = video_url[-11:]
92
 
93
+ html_embed='<iframe width="450" height="250" src="https://www.youtube.com/embed/'+ video_id +'" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen name="video_yt" ></iframe>'
94
  return html_embed
95
 
96
 
 
129
  except Exception as e:
130
  return "No access for transcript"
131
 
132
+
133
  finalString = ""
134
  for item in t:
135
  text = item['text']
136
 
137
  finalString += text + " "
138
 
139
+ print ("===============================================")
140
+ #print("Transcript:",finalString)
141
  print("Transcript lenght:",len(finalString))
142
  print ("===============================================")
143
+
144
+
145
  input_string = finalString
146
 
147
  chunk_size = 4000
 
149
  if (lang_video=="uk"): chunk_size = 2000
150
 
151
  result_list = split_string(input_string, chunk_size)
152
+ result_list= text_video = get_transcript(video_id, lang_video, chunk_size)
153
  final_answer_gpt=""
154
  count= 0
155
 
 
 
 
 
 
156
  for item in result_list:
157
+ context = item['text']
158
+ time_text = str(convert_seconds(item['start']))
159
+ time_seconds = str(round(item['start']))
160
  input_gpt = Prompt_T(context,lang_video)
161
+ time_url='<a href="//www.youtube.com/embed/'+ str(video_id) + '?rel=0&amp;autoplay=1&amp;start='+time_seconds +'" target="video_yt">'+ time_text+'</a>'
162
+ final_answer_gpt = final_answer_gpt +"<p>" + time_url +" " + gpt_api (input_gpt)+"</p>"
163
  html_content="<h6>"+"<br>"+final_answer_gpt+"</h6>"
164
  yield html_content
165