YouTube_Summarize_Hades

Running

App Files Files Community

podsni

LaoCzi commited on Mar 1, 2023

Commit

cd4446f

0 Parent(s):

Duplicate from LaoCzi/YouTube_Summarize2

Browse files

Co-authored-by: Alex <LaoCzi@users.noreply.huggingface.co>

Files changed (5) hide show

.gitattributes +34 -0
README.md +14 -0
ap.py +113 -0
app.py +113 -0
requirements.txt +6 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: YouTube Summarize
+emoji: 👀
+colorFrom: blue
+colorTo: yellow
+sdk: gradio
+sdk_version: 3.19.1
+app_file: app.py
+pinned: false
+license: cc
+duplicated_from: LaoCzi/YouTube_Summarize2
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

ap.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import pickle
+from youtube_transcript_api import YouTubeTranscriptApi
+from dotenv import load_dotenv
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores.faiss import FAISS
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.llms import OpenAI
+from langchain.chains import ChatVectorDBChain
+from langchain.prompts import PromptTemplate
+from pathlib import Path
+import os
+import openai
+import gradio as gr
+load_dotenv()
+OPENAI_KEY = os.getenv('OPENAI_KEY')
+_template = """ Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
+Chat History:
+{chat_history}
+Follow Up Input: {question}
+Standalone question:"""
+CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+template = """You are an AI version of the youtuber {name} .
+You are given the following extracted parts of a long document and a question. Provide a conversational answer.
+Question: {question}
+=========
+{context}
+=========
+Answer:"""
+QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context", "name"])
+video1 = "ReeLQR7KCcM"
+youtuberName = ""
+def gpt_api (input_text):
+    completion = openai.Completion.create(
+    engine="text-davinci-003",
+    prompt=input_text,
+    top_p=1,
+    frequency_penalty=0,
+    presence_penalty=0,
+    max_tokens=300,
+    n=1,
+    stop="",
+    temperature=0.6,
+    )
+    response = completion.choices[0].text.strip()
+    return response
+def generate(video_url, question):
+  if (video_url ==""): return ""
+  if "youtube.com/watch?v=" in video_url: x=111
+  else: return "Неверный URL"
+  video_id = video_url[-11:]
+  try:
+    t = YouTubeTranscriptApi.get_transcript(video_id,languages=["en"])
+    # do something with the transcript
+  except Exception as e:
+    return "An error occurred:"+e
+  finalString = ""
+  for item in t:
+      text = item['text']
+      finalString += text + " "
+  print("Transcript:",finalString)
+  print("Transcript lenght:",len(finalString))
+  if (len(finalString)>15000): finalString = finalString[:15000]
+  # load data sources to text (yt->text)
+  text_splitter = CharacterTextSplitter()
+  chunks = text_splitter.split_text(finalString)
+  vectorStorePkl = Path("vectorstore.pkl")
+  vectorStore = None
+  # if vectorStorePkl.is_file():
+  #     print("vector index found.. ")
+  #     with open('vectorstore.pkl', 'rb') as f:
+  #         vectorStore = pickle.load(f)
+  # else:
+  print("regenerating search index vector store..")
+  # It uses OpenAI API to create embeddings (i.e. a feature vector)
+  # https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture
+  vectorStore = FAISS.from_texts(chunks, OpenAIEmbeddings(openai_api_key=OPENAI_KEY))
+  with open("vectorstore.pkl", "wb") as f:
+      pickle.dump(vectorStore, f)
+  qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0, openai_api_key=OPENAI_KEY),
+                                  vectorstore=vectorStore, qa_prompt=QA_PROMPT)
+  chat_history = []
+  userInput = question
+  response = qa({"name": youtuberName, "question": userInput, "chat_history": chat_history}, return_only_outputs=True)
+  print("Result:",response["answer"])
+  return response["answer"]
+#======================================
+title = "YouTube Summorize (only english video < 15 min)"
+demo = gr.Interface(fn=generate, css=".gradio-container {background-color: lightblue}",
+                     inputs=[
+                              gr.Textbox(lines=1, label="Video URL"),
+                              gr.Textbox(lines=1, label="Question", value="What is this video about?"),
+                              ],
+                      outputs=[gr.Textbox(lines=4, label="Ответ:")],
+                      title = title)
+demo.launch(share=False, debug=True)

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import pickle
+from youtube_transcript_api import YouTubeTranscriptApi
+from dotenv import load_dotenv
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores.faiss import FAISS
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.llms import OpenAI
+from langchain.chains import ChatVectorDBChain
+from langchain.prompts import PromptTemplate
+from pathlib import Path
+import os
+import openai
+import gradio as gr
+load_dotenv()
+OPENAI_KEY = os.getenv('OPENAI_KEY')
+_template = """ Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
+Chat History:
+{chat_history}
+Follow Up Input: {question}
+Standalone question:"""
+CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+template = """You are an AI version of the youtuber {name} .
+You are given the following extracted parts of a long document and a question. Provide a conversational answer.
+Question: {question}
+=========
+{context}
+=========
+Answer:"""
+QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context", "name"])
+video1 = "ReeLQR7KCcM"
+youtuberName = ""
+def gpt_api (input_text):
+    completion = openai.Completion.create(
+    engine="text-davinci-003",
+    prompt=input_text,
+    top_p=1,
+    frequency_penalty=0,
+    presence_penalty=0,
+    max_tokens=300,
+    n=1,
+    stop="",
+    temperature=0.6,
+    )
+    response = completion.choices[0].text.strip()
+    return response
+def generate(video_url, question):
+  if (video_url ==""): return ""
+  if "youtube.com/watch?v=" in video_url: x=111
+  else: return "Неверный URL"
+  video_id = video_url[-11:]
+  try:
+    t = YouTubeTranscriptApi.get_transcript(video_id,languages=["en"])
+    # do something with the transcript
+  except Exception as e:
+    return "An error occurred:"+e
+  finalString = ""
+  for item in t:
+      text = item['text']
+      finalString += text + " "
+  print("Transcript:",finalString)
+  print("Transcript lenght:",len(finalString))
+  if (len(finalString)>15000): finalString = finalString[:15000]
+  # load data sources to text (yt->text)
+  text_splitter = CharacterTextSplitter()
+  chunks = text_splitter.split_text(finalString)
+  vectorStorePkl = Path("vectorstore.pkl")
+  vectorStore = None
+  # if vectorStorePkl.is_file():
+  #     print("vector index found.. ")
+  #     with open('vectorstore.pkl', 'rb') as f:
+  #         vectorStore = pickle.load(f)
+  # else:
+  print("regenerating search index vector store..")
+  # It uses OpenAI API to create embeddings (i.e. a feature vector)
+  # https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture
+  vectorStore = FAISS.from_texts(chunks, OpenAIEmbeddings(openai_api_key=OPENAI_KEY))
+  with open("vectorstore.pkl", "wb") as f:
+      pickle.dump(vectorStore, f)
+  qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0, openai_api_key=OPENAI_KEY),
+                                  vectorstore=vectorStore, qa_prompt=QA_PROMPT)
+  chat_history = []
+  userInput = question
+  response = qa({"name": youtuberName, "question": userInput, "chat_history": chat_history}, return_only_outputs=True)
+  print("Result:",response["answer"])
+  return response["answer"]
+#======================================
+title = "YouTube Summorize (only english video < 15 min)"
+demo = gr.Interface(fn=generate, css=".gradio-container {background-color: lightblue}",
+                     inputs=[
+                              gr.Textbox(lines=1, label="Video URL"),
+                              gr.Textbox(lines=1, label="Question", value="What is this video about?"),
+                              ],
+                      outputs=[gr.Textbox(lines=4, label="Ответ:")],
+                      title = title)
+demo.launch(share=False, debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+langchain==0.0.82
+openai==0.26.4
+python-dotenv==0.21.1
+streamlit==1.12.0
+youtube_transcript_api==0.5.0
+faiss-cpu==1.7.3