LaoCzi commited on
Commit
bffa842
1 Parent(s): 80c6882

Create ap.py

Browse files
Files changed (1) hide show
  1. ap.py +113 -0
ap.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+
3
+ from youtube_transcript_api import YouTubeTranscriptApi
4
+ from dotenv import load_dotenv
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.vectorstores.faiss import FAISS
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+ from langchain.llms import OpenAI
9
+ from langchain.chains import ChatVectorDBChain
10
+ from langchain.prompts import PromptTemplate
11
+ from pathlib import Path
12
+ import os
13
+ import openai
14
+ import gradio as gr
15
+
16
+ load_dotenv()
17
+ OPENAI_KEY = os.getenv('OPENAI_KEY')
18
+
19
+
20
+
21
+
22
+ _template = """ Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
23
+ Chat History:
24
+ {chat_history}
25
+ Follow Up Input: {question}
26
+ Standalone question:"""
27
+ CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
28
+
29
+ template = """You are an AI version of the youtuber {name} .
30
+ You are given the following extracted parts of a long document and a question. Provide a conversational answer.
31
+ Question: {question}
32
+ =========
33
+ {context}
34
+ =========
35
+ Answer:"""
36
+ QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context", "name"])
37
+
38
+ video1 = "ReeLQR7KCcM"
39
+ youtuberName = ""
40
+
41
+ def gpt_api (input_text):
42
+ completion = openai.Completion.create(
43
+ engine="text-davinci-003",
44
+ prompt=input_text,
45
+ top_p=1,
46
+ frequency_penalty=0,
47
+ presence_penalty=0,
48
+ max_tokens=300,
49
+ n=1,
50
+ stop="",
51
+ temperature=0.6,
52
+ )
53
+ response = completion.choices[0].text.strip()
54
+ return response
55
+
56
+ def generate(video_url, question):
57
+ if (video_url ==""): return ""
58
+ if "youtube.com/watch?v=" in video_url: x=111
59
+ else: return "Неверный URL"
60
+
61
+ video_id = video_url[-11:]
62
+ try:
63
+ t = YouTubeTranscriptApi.get_transcript(video_id,languages=["en"])
64
+ # do something with the transcript
65
+ except Exception as e:
66
+ return "An error occurred:"+e
67
+
68
+ finalString = ""
69
+ for item in t:
70
+ text = item['text']
71
+ finalString += text + " "
72
+ print("Transcript:",finalString)
73
+ print("Transcript lenght:",len(finalString))
74
+ if (len(finalString)>15000): finalString = finalString[:15000]
75
+
76
+ # load data sources to text (yt->text)
77
+ text_splitter = CharacterTextSplitter()
78
+ chunks = text_splitter.split_text(finalString)
79
+ vectorStorePkl = Path("vectorstore.pkl")
80
+ vectorStore = None
81
+ # if vectorStorePkl.is_file():
82
+ # print("vector index found.. ")
83
+ # with open('vectorstore.pkl', 'rb') as f:
84
+ # vectorStore = pickle.load(f)
85
+ # else:
86
+ print("regenerating search index vector store..")
87
+ # It uses OpenAI API to create embeddings (i.e. a feature vector)
88
+ # https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture
89
+ vectorStore = FAISS.from_texts(chunks, OpenAIEmbeddings(openai_api_key=OPENAI_KEY))
90
+ with open("vectorstore.pkl", "wb") as f:
91
+ pickle.dump(vectorStore, f)
92
+
93
+ qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0, openai_api_key=OPENAI_KEY),
94
+ vectorstore=vectorStore, qa_prompt=QA_PROMPT)
95
+
96
+ chat_history = []
97
+ userInput = question
98
+
99
+ response = qa({"name": youtuberName, "question": userInput, "chat_history": chat_history}, return_only_outputs=True)
100
+ print("Result:",response["answer"])
101
+ return response["answer"]
102
+ #======================================
103
+
104
+
105
+ title = "YouTube Summorize (only english video < 15 min)"
106
+ demo = gr.Interface(fn=generate, css=".gradio-container {background-color: lightblue}",
107
+ inputs=[
108
+ gr.Textbox(lines=1, label="Video URL"),
109
+ gr.Textbox(lines=1, label="Question", value="What is this video about?"),
110
+ ],
111
+ outputs=[gr.Textbox(lines=4, label="Ответ:")],
112
+ title = title)
113
+ demo.launch(share=False, debug=True)