Spaces:
Running
Running
File size: 5,499 Bytes
2274cd3 5ac9a87 5a8ba9d 2274cd3 6d1c02f 2274cd3 5a8ba9d 6d1c02f 5a8ba9d 2274cd3 6d1c02f 2274cd3 c8fcb49 2274cd3 6d1c02f 2274cd3 6d1c02f 2274cd3 6d1c02f 2274cd3 6d1c02f 2274cd3 6d1c02f 5a8ba9d 6d1c02f 5a8ba9d 6d1c02f 2274cd3 5a8ba9d 2274cd3 6d1c02f 2274cd3 6d1c02f 2274cd3 5a8ba9d 2274cd3 5a8ba9d 6d1c02f 5a8ba9d 2274cd3 5a8ba9d 2274cd3 5a8ba9d 2274cd3 5a8ba9d 6d1c02f 5a8ba9d 6d1c02f 5a8ba9d 6d1c02f 5a8ba9d 2274cd3 6d1c02f 7693546 2274cd3 5a8ba9d 6d1c02f 2274cd3 6d1c02f e702c5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import os
import openai
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
from requests.structures import CaseInsensitiveDict
openai.api_key = os.getenv("OPENAI_KEY")
def Prompt_T(context, lang):
prompt = """I want you to act as a content writer who is working with youtube video transcript. Summarise the following text in 40 words:
=========
"""+ context +"""
=========
Answer:"""
if (lang=="ru"):prompt = """Я хочу, чтобы вы выступили в роли автора контента, который взят с транскрипт youtube видео, его нужно преобразовать в читаемый вид. Резюмируйте следующий текст в 40 слов:
=========
"""+ context +"""
=========
Ответ:"""
if (lang=="uk"):prompt = """Я хочу, щоб ви виступили в ролі автора контенту, який узятий з транскрипту youtube відео, його треба перетворити у читабельний вигляд. Резюмуйте наступний текст у 50 слів:
=========
"""+ context +"""
=========
Ответ:"""
return prompt
def split_string(string, chunk_size):
return [string[i:i+chunk_size] for i in range(0, len(string), chunk_size)]
def gpt_api (input_text):
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[ {"role": "system", "content": input_text} ]
)
response = completion.choices[0].message.content
return response
def generate_video_html(video_url, request: gr.Request):
my_v = ""
if (video_url =="") and (my_v == ""): video_url ="https://youtube.com/watch?v=PQBYZDyDBrY"
#похоже ли video_url на номальну ссылку
if "youtube.com/watch?v=" in video_url: x=111
else: return "Неверный URL"
#Пробуем извлеч video_id пока на английском
video_id = video_url[-11:]
html_embed='<iframe width="450" height="250" src="https://www.youtube.com/embed/'+ video_id +'" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>'
return html_embed
def generate(video_url, request: gr.Request):
my_v = ""
#Если две переменные пустые, то показываем базовую страницу с рекламой
if (video_url =="") and (my_v == ""):
html_embed='<div><br> An easy way to get video descriptions If you are on YouTube itself, simply add "zxc" in front of YouTube to the videos address.</div>'
summarize=""
return summarize
#похоже ли video_url на номальну ссылку
if "youtube.com/watch?v=" in video_url: x=111
else: return "Неверный URL"
#Пробуем извлеч video_id пока на английском
video_id = video_url[-11:]
print("video_id=", video_id)
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
except Exception as e:
return "No access for transcript "
lang_video="en"
for transcript in transcript_list:
if (transcript.is_generated == True): lang_video = transcript.language_code
print ("transcript.language_code=", transcript.language_code)
try:
t = YouTubeTranscriptApi.get_transcript(video_id,languages=[lang_video])
# do something with the transcript
except Exception as e:
return "No access for transcript"
finalString = ""
for item in t:
text = item['text']
finalString += text + " "
print("Transcript:",finalString)
print("Transcript lenght:",len(finalString))
print ("===============================================")
input_string = finalString
chunk_size = 4000
if (lang_video=="ru"): chunk_size = 2000
if (lang_video=="uk"): chunk_size = 2000
result_list = split_string(input_string, chunk_size)
final_answer_gpt=""
count= 0
print("++++++++++++++++++++++++++++++++++++++")
for item in result_list:
print(item)
for item in result_list:
count = count +1
context = item
input_gpt = Prompt_T(context,lang_video)
final_answer_gpt = final_answer_gpt +"<p>" + gpt_api (input_gpt)+"</p>"
html_content="<h6>"+"<br>"+final_answer_gpt+"</h6>"
yield html_content
html_content="<h6>"+"<br>"+final_answer_gpt+"</h6>"
return html_content
title = "YouTube Summorize (en,ua,ru)"
css="""
footer {visibility: hidden}
.gradio-container {padding-top: 100px}
"""
with gr.Blocks(css=css, title=title) as demo:
gr.HTML("<h3>A simple way to summarize YouTube video</h3>")
with gr.Row():
with gr.Column():
input_d = gr.Textbox(label="YouTube video URL", placeholder="https://www.youtube.com/watch?v=XXXXXXXX", value="")
greet_btn = gr.Button("Summarise")
dt_2 = gr.outputs.HTML()
dt_1 = gr.outputs.HTML()
dt =[dt_1, dt_2]
greet_btn.click(generate_video_html, inputs=input_d, outputs=dt_2)
greet_btn.click(generate, inputs=input_d, outputs=dt_1)
demo.load(generate_video_html, inputs=input_d, outputs=dt_2)
demo.load(generate, inputs=input_d, outputs=dt_1)
demo.queue()
demo.launch(debug=True, share=False)
|