File size: 7,193 Bytes
2274cd3
 
 
 
 
 
 
5ac9a87
5a8ba9d
2274cd3
6685eea
2274cd3
 
 
 
5a8ba9d
6d1c02f
5a8ba9d
 
 
 
 
 
 
 
 
 
 
 
2274cd3
 
a8d6167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2274cd3
 
 
 
 
 
 
 
 
 
 
 
6d1c02f
 
 
 
 
 
 
 
 
 
 
 
a8d6167
6d1c02f
 
 
 
2274cd3
c8fcb49
2274cd3
6d1c02f
2274cd3
 
6d1c02f
2274cd3
6d1c02f
2274cd3
 
 
6d1c02f
 
2274cd3
 
6d1c02f
 
5a8ba9d
 
 
 
6d1c02f
5a8ba9d
 
 
 
6d1c02f
2274cd3
5a8ba9d
2274cd3
 
6d1c02f
 
a8d6167
2274cd3
 
 
6d1c02f
2274cd3
 
a8d6167
 
2274cd3
 
a8d6167
 
2274cd3
5a8ba9d
6d1c02f
 
 
5a8ba9d
2274cd3
a8d6167
5a8ba9d
2274cd3
5a8ba9d
01d71d0
 
 
2274cd3
a8d6167
 
 
5a8ba9d
a8d6167
 
01d71d0
6d1c02f
5a8ba9d
6d1c02f
5a8ba9d
0c8203a
5a8ba9d
 
2274cd3
 
 
 
 
6d1c02f
7693546
 
 
 
 
2274cd3
5a8ba9d
6d1c02f
 
 
 
2274cd3
6d1c02f
e702c5b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import os
import openai 
import gradio as gr 
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
from requests.structures import CaseInsensitiveDict

openai.api_key = os.getenv("OPENAI_KEY")
def Prompt_T(context, lang):

  prompt  = """I want you to act as a content writer who is working with youtube video transcript. Summarise the following text in 50 words:
  =========
  """+ context +"""
  =========
  Answer:"""

  if (lang=="ru"):prompt  = """Я хочу, чтобы вы выступили в роли автора контента, который взят с  транскрипт youtube видео, его нужно преобразовать  в читаемый вид. Резюмируйте следующий текст в 40 слов:
  =========
  """+ context +"""
  =========
  Ответ:"""

  if (lang=="uk"):prompt  = """Я хочу, щоб ви виступили в ролі автора контенту, який узятий з транскрипту youtube відео,  його треба перетворити у читабельний вигляд. Резюмуйте наступний текст у 50 слів: 
  =========
  """+ context +"""
  =========
  Ответ:"""

  return prompt


def convert_seconds(seconds):
    seconds = round(seconds)
    minutes = seconds // 60
    hours = minutes // 60
    minutes = minutes % 60
    seconds = seconds % 60
    if (int(hours) > 0 ): time_m= str(hours) + ":" +  str(minutes) + ":" + str(seconds)
    else : time_m = str(minutes) + ":" + str(seconds) 
    return time_m

def get_transcript(video_id, lang_video,  chunk_size):
    global final_string
    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)

    t = YouTubeTranscriptApi.get_transcript(video_id,languages=[lang_video])

    finalString = ""
    result_text_duration=[]
    text_duration=""

    prev_start=0.0
    for item in t:
        text = item['text']
        finalString += text + " "

        text_duration = text_duration + " " + item['text']
        if (len(text_duration)>chunk_size):
             new_item = {'text': prev_text_duration, 'start': prev_start}
             result_text_duration.append(new_item)
             text_duration=""
             prev_start=item['start']   
        prev_text_duration=text_duration

    new_item = {'text': text_duration, 'start': prev_start}
    result_text_duration.append(new_item)
    return result_text_duration

def split_string(string, chunk_size):
    return [string[i:i+chunk_size] for i in range(0, len(string), chunk_size)]


def gpt_api (input_text):
  completion = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[  {"role": "system", "content": input_text} ]
    )
  response = completion.choices[0].message.content
  return response

def generate_video_html(video_url, request: gr.Request):
    my_v = ""
    
    if (video_url =="") and (my_v == ""):  video_url ="https://youtube.com/watch?v=PQBYZDyDBrY"
    
    #похоже ли video_url на номальну ссылку
    if "youtube.com/watch?v=" in video_url: x=111
    else: return "Неверный URL"

    #Пробуем извлеч video_id пока на английском
    video_id = video_url[-11:]

    html_embed='<iframe width="450" height="250" src="https://www.youtube.com/embed/'+ video_id +'" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen name="video_yt" ></iframe>'
    return html_embed



def generate(video_url, request: gr.Request):
    my_v = ""


    #Если две переменные пустые, то показываем базовую страницу с рекламой
    if (video_url =="") and (my_v == ""):  
      html_embed='<div><br> An easy way to get video descriptions If you are on YouTube itself, simply add "zxc" in front of YouTube to the videos address.</div>'
      summarize=""
      return summarize
    
    #похоже ли video_url на номальну ссылку
    if "youtube.com/watch?v=" in video_url: x=111
    else: return "Неверный URL"

    #Пробуем извлеч video_id пока на английском
    video_id = video_url[-11:]
    print("video_id=", video_id)


    try:
      transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
    except Exception as e:
      return "No access for transcript "
    lang_video="en"
    for transcript in transcript_list:
      if (transcript.is_generated == True): lang_video = transcript.language_code
      print ("transcript.language_code=", transcript.language_code)

    try:
      t = YouTubeTranscriptApi.get_transcript(video_id,languages=[lang_video])
      # do something with the transcript
    except Exception as e:
      return "No access for transcript"

    
    finalString = ""
    for item in t:
        text = item['text']
       
        finalString += text + " "

    print ("===============================================")    
    #print("Transcript:",finalString)
    print("Transcript lenght:",len(finalString))
    print ("===============================================")

    
    input_string = finalString

    chunk_size = 4000
    if (lang_video=="ru"): chunk_size = 2000
    if (lang_video=="uk"): chunk_size = 2000

    result_list = split_string(input_string, chunk_size)
    result_list= text_video  = get_transcript(video_id, lang_video, chunk_size)
    final_answer_gpt=""
    count= 0

    img_wait='<img src="https://huggingface.co/spaces/LaoCzi/YouTube_Summarize2/resolve/main/22.gif">'
    html_content="<br>"+img_wait
    yield html_content
    for item in result_list:
      context = item['text']
      time_text = str(convert_seconds(item['start']))
      time_seconds = str(round(item['start']))
      input_gpt = Prompt_T(context,lang_video)
      time_url='<a href="//www.youtube.com/embed/'+ str(video_id) + '?rel=0&amp;autoplay=1&amp;start='+time_seconds +'" target="video_yt">'+ time_text+'</a>'
      final_answer_gpt = final_answer_gpt  +"<p>" + time_url +" " + gpt_api (input_gpt)+"</p>"
      html_content="<h6>"+"<br>"+final_answer_gpt+img_wait
      yield html_content

    
    html_content="<h6>"+"<br>"+final_answer_gpt+"</h6>"
    yield html_content  

title = "YouTube Summorize (en,ua,ru)"
css="""
footer {visibility: hidden}
.gradio-container {padding-top: 100px}
"""
with gr.Blocks(css=css, title=title) as demo:
    gr.HTML("<h3>A simple way to summarize   YouTube video</h3>")
    with gr.Row():   
        with gr.Column():
          input_d = gr.Textbox(label="YouTube video URL", placeholder="https://www.youtube.com/watch?v=XXXXXXXX", value="")
          greet_btn = gr.Button("Summarise")
        dt_2 = gr.outputs.HTML() 
    dt_1 = gr.outputs.HTML()
    dt =[dt_1, dt_2]
    greet_btn.click(generate_video_html, inputs=input_d, outputs=dt_2)
    greet_btn.click(generate, inputs=input_d, outputs=dt_1)
    demo.load(generate_video_html, inputs=input_d, outputs=dt_2)
    demo.load(generate, inputs=input_d, outputs=dt_1)

demo.queue()
demo.launch(debug=True, share=False)