LaoCzi commited on
Commit
5a8ba9d
1 Parent(s): 2274cd3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -20
app.py CHANGED
@@ -1,20 +1,46 @@
1
  import os
2
  import openai
3
- openai.api_key = os.getenv("OPENAI_API_KEY")
4
-
5
  import gradio as gr
6
  from youtube_transcript_api import YouTubeTranscriptApi
7
  from urllib.parse import urlparse, parse_qs
8
  from requests.structures import CaseInsensitiveDict
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- def Prompt_T(context):
12
- result = """I want you to act as a content writer who is working with youtube video transcript. Summarise the following text:
13
  =========
14
  """+ context +"""
15
  =========
16
  Answer:"""
17
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  def split_string(string, chunk_size):
@@ -45,7 +71,6 @@ def generate(video_url, request: gr.Request):
45
  my_v = my_dict['v'][0]
46
  video_url ="https://youtube.com/watch?v="+my_v
47
  except KeyError:
48
- print("Ключ 'v' отсутствует в словаре.")
49
  my_v = ""
50
 
51
  #Если две переменные пустые, то показываем базовую страницу с рекламой
@@ -60,51 +85,74 @@ def generate(video_url, request: gr.Request):
60
 
61
  #Пробуем извлеч video_id пока на английском
62
  video_id = video_url[-11:]
 
 
 
 
 
 
 
 
 
 
 
 
63
  try:
64
- t = YouTubeTranscriptApi.get_transcript(video_id,languages=["en"])
65
  # do something with the transcript
66
  except Exception as e:
67
- return "Несмогли нати трнскрипт", "Ошибка"
68
 
69
  finalString = ""
70
  for item in t:
71
  text = item['text']
72
  finalString += text + " "
73
 
74
-
75
  print("Transcript:",finalString)
76
  print("Transcript lenght:",len(finalString))
77
  print ("===============================================")
78
  input_string = finalString
79
- chunk_size = 10000
 
 
 
 
 
80
  result_list = split_string(input_string, chunk_size)
81
- eng_answer=""
82
  count= 0
 
 
 
 
 
 
83
  for item in result_list:
84
  count = count +1
85
  context = item
86
- eng_prompt = Prompt_T(context)
87
- eng_answer = eng_answer +" \n" + gpt_api (eng_prompt)
88
- print("Context:", context)
89
- print(count, " - part eng_answer:", eng_answer)
90
- print("==========================")
91
 
92
  html_embed='<iframe width="450" height="158" src="https://www.youtube.com/embed/'+ video_id +'" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>'
93
- html_content="<h6>"+"<br>"+eng_answer+"</h6>"
94
  return html_content, html_embed
95
- title = "YouTube Summorize (only english video)"
 
96
  css="""
97
  footer {visibility: hidden}
98
  .gradio-container {padding-top: 100px}
99
  """
100
  with gr.Blocks(css=css, title=title) as demo:
 
101
  with gr.Row():
102
  with gr.Column():
103
- input_d = gr.Textbox(label="YouTube video URL", placeholder="https://www.youtube.com/watch?v=XXXXXXXX")
104
  greet_btn = gr.Button("Summarise")
105
  dt_2 = gr.outputs.HTML()
106
  dt_1 = gr.outputs.HTML()
107
- dt =[dt_1, dt_2 ]
108
  greet_btn.click(generate, inputs=input_d, outputs=dt)
109
  demo.load(generate, inputs=input_d, outputs=dt)
110
 
 
1
  import os
2
  import openai
 
 
3
  import gradio as gr
4
  from youtube_transcript_api import YouTubeTranscriptApi
5
  from urllib.parse import urlparse, parse_qs
6
  from requests.structures import CaseInsensitiveDict
7
 
8
+ openai.api_key = os.getenv("OPENAI_API_KEY")
9
+
10
+ google_analtycs="""
11
+ <!-- Google tag (gtag.js) -->
12
+ <script async src="https://www.googletagmanager.com/gtag/js?id=G-S9JEXRFQJF"></script>
13
+ <script>
14
+ window.dataLayer = window.dataLayer || [];
15
+ function gtag(){dataLayer.push(arguments);}
16
+ gtag('js', new Date());
17
+
18
+ gtag('config', 'G-S9JEXRFQJF');
19
+ </script>
20
+ """
21
+
22
+
23
+ def Prompt_T(context, lang):
24
 
25
+ prompt = """I want you to act as a content writer who is working with youtube video transcript. Summarise the following text in 70 words:
 
26
  =========
27
  """+ context +"""
28
  =========
29
  Answer:"""
30
+
31
+ if (lang=="ru"):prompt = """Я хочу, чтобы вы выступили в роли автора контента, который взят с транскрипт youtube видео, его нужно преобразовать в читаемый вид. Резюмируйте следующий текст в 50 слов:
32
+ =========
33
+ """+ context +"""
34
+ =========
35
+ Ответ:"""
36
+
37
+ if (lang=="uk"):prompt = """Я хочу, щоб ви виступили в ролі автора контенту, який узятий з транскрипту youtube відео, його треба перетворити у читабельний вигляд. Резюмуйте наступний текст у 50 слів:
38
+ =========
39
+ """+ context +"""
40
+ =========
41
+ Ответ:"""
42
+
43
+ return prompt
44
 
45
 
46
  def split_string(string, chunk_size):
 
71
  my_v = my_dict['v'][0]
72
  video_url ="https://youtube.com/watch?v="+my_v
73
  except KeyError:
 
74
  my_v = ""
75
 
76
  #Если две переменные пустые, то показываем базовую страницу с рекламой
 
85
 
86
  #Пробуем извлеч video_id пока на английском
87
  video_id = video_url[-11:]
88
+
89
+ try:
90
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
91
+ except Exception as e:
92
+ return "No access for transcript ", "Error transcript_list"
93
+
94
+ # iterate over all available transcripts
95
+ lang_video="en"
96
+ for transcript in transcript_list:
97
+ if (transcript.is_generated == True): lang_video = transcript.language_code
98
+ print ("transcript.language_code=", transcript.language_code)
99
+
100
  try:
101
+ t = YouTubeTranscriptApi.get_transcript(video_id,languages=[lang_video])
102
  # do something with the transcript
103
  except Exception as e:
104
+ return "No access for transcript", "Error transcript"
105
 
106
  finalString = ""
107
  for item in t:
108
  text = item['text']
109
  finalString += text + " "
110
 
111
+
112
  print("Transcript:",finalString)
113
  print("Transcript lenght:",len(finalString))
114
  print ("===============================================")
115
  input_string = finalString
116
+
117
+
118
+ chunk_size = 12000
119
+ if (lang_video=="ru"): chunk_size = 5000
120
+ if (lang_video=="uk"): chunk_size = 5000
121
+
122
  result_list = split_string(input_string, chunk_size)
123
+ final_answer_gpt=""
124
  count= 0
125
+
126
+ print("++++++++++++++++++++++++++++++++++++++")
127
+ for item in result_list:
128
+ print(item)
129
+
130
+
131
  for item in result_list:
132
  count = count +1
133
  context = item
134
+ input_gpt = Prompt_T(context,lang_video)
135
+ final_answer_gpt = final_answer_gpt +"<p>" + gpt_api (input_gpt)+"</p>"
136
+
 
 
137
 
138
  html_embed='<iframe width="450" height="158" src="https://www.youtube.com/embed/'+ video_id +'" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>'
139
+ html_content="<h6>"+"<br>"+final_answer_gpt+"</h6>"
140
  return html_content, html_embed
141
+
142
+ title = "YouTube Summorize (en,ua,ru)"
143
  css="""
144
  footer {visibility: hidden}
145
  .gradio-container {padding-top: 100px}
146
  """
147
  with gr.Blocks(css=css, title=title) as demo:
148
+ gr.HTML("<h1>A simple way to summarise and translate the YouTube video in 22 languages</h1>"+google_analtycs)
149
  with gr.Row():
150
  with gr.Column():
151
+ input_d = gr.Textbox(label="YouTube video URL", placeholder="https://www.youtube.com/watch?v=XXXXXXXX", value="")
152
  greet_btn = gr.Button("Summarise")
153
  dt_2 = gr.outputs.HTML()
154
  dt_1 = gr.outputs.HTML()
155
+ dt =[dt_1, dt_2]
156
  greet_btn.click(generate, inputs=input_d, outputs=dt)
157
  demo.load(generate, inputs=input_d, outputs=dt)
158