import whisper import pytube import gradio as gr import openai import faiss from datetime import datetime from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores.faiss import FAISS from langchain.chains import RetrievalQAWithSourcesChain from langchain import OpenAI from langchain.vectorstores.base import VectorStoreRetriever import os def get_answer(api_key, video_link, question): os.environ["OPENAI_API_KEY"] = api_key video = pytube.YouTube(video_link) audio = video.streams.get_audio_only() fn = audio.download(output_path="tmp.mp3") model = whisper.load_model("base") transcription = model.transcribe(fn) res = transcription['text'] def store_segments(text): segment_size = 1000 segments = [{'text': text[i:i+segment_size], 'start': i} for i in range(0, len(text), segment_size)] texts = [] start_times = [] for segment in segments: text = segment['text'] start = segment['start'] start_datetime = datetime.fromtimestamp(start) formatted_start_time = start_datetime.strftime('%H:%M:%S') texts.append(text) start_times.append(formatted_start_time) return texts, start_times texts, start_times = store_segments(res) text_splitter = CharacterTextSplitter(chunk_size=1500, separator="\n") docs = [] metadatas = [] for i, d in enumerate(texts): splits = text_splitter.split_text(d) docs.extend(splits) metadatas.extend([{"source": start_times[i]}] * len(splits)) embeddings = OpenAIEmbeddings() store = FAISS.from_texts(docs, embeddings, metadatas=metadatas) faiss.write_index(store.index, "docs.index") retri = VectorStoreRetriever(vectorstore=store) chain = RetrievalQAWithSourcesChain.from_llm(llm=OpenAI(temperature=0), retriever=retri) result = chain({"question": question}) return result['answer'], result['sources'] iface = gr.Interface( fn=get_answer, inputs=["text", "text", "text"], outputs=["text", "text"], ) iface.queue().launch()