Spaces:
Running
Running
import os | |
import openai | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
import gradio as gr | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from urllib.parse import urlparse, parse_qs | |
from requests.structures import CaseInsensitiveDict | |
def Prompt_T(context): | |
result = """I want you to act as a content writer who is working with youtube video transcript. Summarise the following text: | |
========= | |
"""+ context +""" | |
========= | |
Answer:""" | |
return result | |
def split_string(string, chunk_size): | |
return [string[i:i+chunk_size] for i in range(0, len(string), chunk_size)] | |
def gpt_api (input_text): | |
completion = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[ {"role": "system", "content": input_text} ] | |
) | |
response = completion.choices[0].message.content | |
return response | |
def generate(video_url, request: gr.Request): | |
#Если есть get переменная v, создаем video_url | |
try: | |
headers =request.headers | |
url = headers.get('referer') | |
parsed_url = urlparse(url) | |
query_params = parse_qs(parsed_url.query) | |
my_dict = query_params | |
except KeyError as e: | |
print("Что то случилось в header", e) | |
try: | |
my_v = my_dict['v'][0] | |
video_url ="https://youtube.com/watch?v="+my_v | |
except KeyError: | |
print("Ключ 'v' отсутствует в словаре.") | |
my_v = "" | |
#Если две переменные пустые, то показываем базовую страницу с рекламой | |
if (video_url =="") and (my_v == ""): | |
html_embed='<div><br> An easy way to get video descriptions If you are on YouTube itself, simply add "zxc" in front of YouTube to the videos address.</div>' | |
summarize="" | |
return summarize, html_embed | |
#похоже ли video_url на номальну ссылку | |
if "youtube.com/watch?v=" in video_url: x=111 | |
else: return "Неверный URL", "Ошибка" | |
#Пробуем извлеч video_id пока на английском | |
video_id = video_url[-11:] | |
try: | |
t = YouTubeTranscriptApi.get_transcript(video_id,languages=["en"]) | |
# do something with the transcript | |
except Exception as e: | |
return "Несмогли нати трнскрипт", "Ошибка" | |
finalString = "" | |
for item in t: | |
text = item['text'] | |
finalString += text + " " | |
print("Transcript:",finalString) | |
print("Transcript lenght:",len(finalString)) | |
print ("===============================================") | |
input_string = finalString | |
chunk_size = 10000 | |
result_list = split_string(input_string, chunk_size) | |
eng_answer="" | |
count= 0 | |
for item in result_list: | |
count = count +1 | |
context = item | |
eng_prompt = Prompt_T(context) | |
eng_answer = eng_answer +" \n" + gpt_api (eng_prompt) | |
print("Context:", context) | |
print(count, " - part eng_answer:", eng_answer) | |
print("==========================") | |
html_embed='<iframe width="450" height="158" src="https://www.youtube.com/embed/'+ video_id +'" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>' | |
html_content="<h6>"+"<br>"+eng_answer+"</h6>" | |
return html_content, html_embed | |
title = "YouTube Summorize (only english video)" | |
css=""" | |
footer {visibility: hidden} | |
.gradio-container {padding-top: 100px} | |
""" | |
with gr.Blocks(css=css, title=title) as demo: | |
with gr.Row(): | |
with gr.Column(): | |
input_d = gr.Textbox(label="YouTube video URL", placeholder="https://www.youtube.com/watch?v=XXXXXXXX") | |
greet_btn = gr.Button("Summarise") | |
dt_2 = gr.outputs.HTML() | |
dt_1 = gr.outputs.HTML() | |
dt =[dt_1, dt_2 ] | |
greet_btn.click(generate, inputs=input_d, outputs=dt) | |
demo.load(generate, inputs=input_d, outputs=dt) | |
demo.launch(share=False, debug=True ) |