AskTRS / app.py
ucalyptus's picture
Create app.py
bd4fb4f
raw
history blame
2.68 kB
import whisper
import pytube
import gradio as gr
import openai
import faiss
from datetime import datetime
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.chains import RetrievalQAWithSourcesChain
from langchain import OpenAI
from langchain.vectorstores.base import VectorStoreRetriever
import os
video_data_cache = {}
def get_answer(api_key, video_link, question):
os.environ["OPENAI_API_KEY"] = api_key
if video_link not in video_data_cache:
video = pytube.YouTube(video_link)
audio = video.streams.get_audio_only()
fn = audio.download(output_path="tmp.mp3")
model = whisper.load_model("base")
transcription = model.transcribe(fn)
res = transcription['text']
def store_segments(text):
segment_size = 1000
segments = [{'text': text[i:i+segment_size], 'start': i} for i in range(0, len(text), segment_size)]
texts = []
start_times = []
for segment in segments:
text = segment['text']
start = segment['start']
start_datetime = datetime.fromtimestamp(start)
formatted_start_time = start_datetime.strftime('%H:%M:%S')
texts.append(text)
start_times.append(formatted_start_time)
return texts, start_times
texts, start_times = store_segments(res)
text_splitter = CharacterTextSplitter(chunk_size=1500, separator="\n")
docs = []
metadatas = []
for i, d in enumerate(texts):
splits = text_splitter.split_text(d)
docs.extend(splits)
metadatas.extend([{"source": start_times[i]}] * len(splits))
embeddings = OpenAIEmbeddings()
store = FAISS.from_texts(docs, embeddings, metadatas=metadatas)
faiss.write_index(store.index, f"docs.index")
video_data_cache[video_link] = f"docs.index"
index_file = video_data_cache[video_link]
store = faiss.read_index(index_file)
retri = VectorStoreRetriever(vectorstore=store)
chain = RetrievalQAWithSourcesChain.from_llm(llm=OpenAI(temperature=0), retriever=retri)
result = chain({"question": question})
return result['answer'], result['sources']
iface = gr.Interface(
fn=get_answer,
inputs=["text", "text", "text"],
outputs=["text", "text"],
examples=[
["sk-kVc5h5YtNXyD6WxUd4aSxIyWuGc", "https://www.youtube.com/watch?v=xNAm9O_duSA", "Who could be the next Prime Minister ?"]
],
)
iface.queue().launch()