Spaces:
Running
Running
File size: 3,814 Bytes
8e06b61 80c6882 8e06b61 80c6882 8e06b61 679b2b3 8e06b61 e1f6d6e 8e06b61 4d7b78f e1f6d6e aed9bbc e09809e e1f6d6e 38dd7d1 412cf6f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import pickle
from youtube_transcript_api import YouTubeTranscriptApi
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.faiss import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ChatVectorDBChain
from langchain.prompts import PromptTemplate
from pathlib import Path
import os
import openai
import gradio as gr
load_dotenv()
OPENAI_KEY = os.getenv('OPENAI_KEY')
_template = """ Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
template = """You are an AI version of the youtuber {name} .
You are given the following extracted parts of a long document and a question. Provide a conversational answer.
Question: {question}
=========
{context}
=========
Answer:"""
QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context", "name"])
video1 = "ReeLQR7KCcM"
youtuberName = ""
def gpt_api (input_text):
completion = openai.Completion.create(
engine="text-davinci-003",
prompt=input_text,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
max_tokens=300,
n=1,
stop="",
temperature=0.6,
)
response = completion.choices[0].text.strip()
return response
def generate(video_url, question):
if (video_url ==""): return ""
if "youtube.com/watch?v=" in video_url: x=111
else: return "Неверный URL"
video_id = video_url[-11:]
try:
t = YouTubeTranscriptApi.get_transcript(video_id,languages=["en"])
# do something with the transcript
except Exception as e:
return "An error occurred:"+e
finalString = ""
for item in t:
text = item['text']
finalString += text + " "
print("Transcript:",finalString)
print("Transcript lenght:",len(finalString))
if (len(finalString)>15000): finalString = finalString[:15000]
# load data sources to text (yt->text)
text_splitter = CharacterTextSplitter()
chunks = text_splitter.split_text(finalString)
vectorStorePkl = Path("vectorstore.pkl")
vectorStore = None
# if vectorStorePkl.is_file():
# print("vector index found.. ")
# with open('vectorstore.pkl', 'rb') as f:
# vectorStore = pickle.load(f)
# else:
print("regenerating search index vector store..")
# It uses OpenAI API to create embeddings (i.e. a feature vector)
# https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture
vectorStore = FAISS.from_texts(chunks, OpenAIEmbeddings(openai_api_key=OPENAI_KEY))
with open("vectorstore.pkl", "wb") as f:
pickle.dump(vectorStore, f)
qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0, openai_api_key=OPENAI_KEY),
vectorstore=vectorStore, qa_prompt=QA_PROMPT)
chat_history = []
userInput = question
response = qa({"name": youtuberName, "question": userInput, "chat_history": chat_history}, return_only_outputs=True)
print("Result:",response["answer"])
return response["answer"]
#======================================
title = "YouTube Summorize (only english video < 15 min)"
demo = gr.Interface(fn=generate, css=".gradio-container {background-color: lightblue}",
inputs=[
gr.Textbox(lines=1, label="Video URL"),
gr.Textbox(lines=1, label="Question", value="What is this video about?"),
],
outputs=[gr.Textbox(lines=4, label="Ответ:")],
title = title)
demo.launch(share=False, debug=True)
|