LaoCzi's picture
Update app.py
2274cd3
raw
history blame
4.02 kB
import os
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
from requests.structures import CaseInsensitiveDict
def Prompt_T(context):
result = """I want you to act as a content writer who is working with youtube video transcript. Summarise the following text:
=========
"""+ context +"""
=========
Answer:"""
return result
def split_string(string, chunk_size):
return [string[i:i+chunk_size] for i in range(0, len(string), chunk_size)]
def gpt_api (input_text):
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[ {"role": "system", "content": input_text} ]
)
response = completion.choices[0].message.content
return response
def generate(video_url, request: gr.Request):
#Если есть get переменная v, создаем video_url
try:
headers =request.headers
url = headers.get('referer')
parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)
my_dict = query_params
except KeyError as e:
print("Что то случилось в header", e)
try:
my_v = my_dict['v'][0]
video_url ="https://youtube.com/watch?v="+my_v
except KeyError:
print("Ключ 'v' отсутствует в словаре.")
my_v = ""
#Если две переменные пустые, то показываем базовую страницу с рекламой
if (video_url =="") and (my_v == ""):
html_embed='<div><br> An easy way to get video descriptions If you are on YouTube itself, simply add "zxc" in front of YouTube to the videos address.</div>'
summarize=""
return summarize, html_embed
#похоже ли video_url на номальну ссылку
if "youtube.com/watch?v=" in video_url: x=111
else: return "Неверный URL", "Ошибка"
#Пробуем извлеч video_id пока на английском
video_id = video_url[-11:]
try:
t = YouTubeTranscriptApi.get_transcript(video_id,languages=["en"])
# do something with the transcript
except Exception as e:
return "Несмогли нати трнскрипт", "Ошибка"
finalString = ""
for item in t:
text = item['text']
finalString += text + " "
print("Transcript:",finalString)
print("Transcript lenght:",len(finalString))
print ("===============================================")
input_string = finalString
chunk_size = 10000
result_list = split_string(input_string, chunk_size)
eng_answer=""
count= 0
for item in result_list:
count = count +1
context = item
eng_prompt = Prompt_T(context)
eng_answer = eng_answer +" \n" + gpt_api (eng_prompt)
print("Context:", context)
print(count, " - part eng_answer:", eng_answer)
print("==========================")
html_embed='<iframe width="450" height="158" src="https://www.youtube.com/embed/'+ video_id +'" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>'
html_content="<h6>"+"<br>"+eng_answer+"</h6>"
return html_content, html_embed
title = "YouTube Summorize (only english video)"
css="""
footer {visibility: hidden}
.gradio-container {padding-top: 100px}
"""
with gr.Blocks(css=css, title=title) as demo:
with gr.Row():
with gr.Column():
input_d = gr.Textbox(label="YouTube video URL", placeholder="https://www.youtube.com/watch?v=XXXXXXXX")
greet_btn = gr.Button("Summarise")
dt_2 = gr.outputs.HTML()
dt_1 = gr.outputs.HTML()
dt =[dt_1, dt_2 ]
greet_btn.click(generate, inputs=input_d, outputs=dt)
demo.load(generate, inputs=input_d, outputs=dt)
demo.launch(share=False, debug=True )