Spaces:

ucalyptus
/

AskTRS

Sleeping

App Files Files Community

ucalyptus commited on May 10, 2023

Commit

53ffbd2

•

1 Parent(s): 5912886

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -40

app.py CHANGED Viewed

@@ -12,56 +12,48 @@ from langchain import OpenAI
 from langchain.vectorstores.base import VectorStoreRetriever
 import os
-video_data_cache = {}
 def get_answer(api_key, video_link, question):
     os.environ["OPENAI_API_KEY"] = api_key
-    if video_link not in video_data_cache:
-        video = pytube.YouTube(video_link)
-        audio = video.streams.get_audio_only()
-        fn = audio.download(output_path="tmp.mp3")
-        model = whisper.load_model("base")
-        transcription = model.transcribe(fn)
-        res = transcription['text']
-        def store_segments(text):
-            segment_size = 1000
-            segments = [{'text': text[i:i+segment_size], 'start': i} for i in range(0, len(text), segment_size)]
-            texts = []
-            start_times = []
-            for segment in segments:
-                text = segment['text']
-                start = segment['start']
-                start_datetime = datetime.fromtimestamp(start)
-                formatted_start_time = start_datetime.strftime('%H:%M:%S')
-                texts.append(text)
-                start_times.append(formatted_start_time)
-            return texts, start_times
-        texts, start_times = store_segments(res)
-        text_splitter = CharacterTextSplitter(chunk_size=1500, separator="\n")
-        docs = []
-        metadatas = []
-        for i, d in enumerate(texts):
-            splits = text_splitter.split_text(d)
-            docs.extend(splits)
-            metadatas.extend([{"source": start_times[i]}] * len(splits))
-        embeddings = OpenAIEmbeddings()
-        store = FAISS.from_texts(docs, embeddings, metadatas=metadatas)
-        faiss.write_index(store.index, f"docs.index")
-        video_data_cache[video_link] = f"docs.index"
-    index_file = video_data_cache[video_link]
-    store = faiss.read_index(index_file)
     retri = VectorStoreRetriever(vectorstore=store)
@@ -71,13 +63,12 @@ def get_answer(api_key, video_link, question):
     return result['answer'], result['sources']
 iface = gr.Interface(
     fn=get_answer,
     inputs=["text", "text", "text"],
     outputs=["text", "text"],
     examples=[
-        ["sk-kVc5h5YtNXyD6WxUd4aSxIyWuGc", "https://www.youtube.com/watch?v=xNAm9O_duSA", "Who could be the next Prime Minister ?"]
     ],
 )

 from langchain.vectorstores.base import VectorStoreRetriever
 import os
 def get_answer(api_key, video_link, question):
     os.environ["OPENAI_API_KEY"] = api_key
+    video = pytube.YouTube(video_link)
+    audio = video.streams.get_audio_only()
+    fn = audio.download(output_path="tmp.mp3")
+    model = whisper.load_model("base")
+    transcription = model.transcribe(fn)
+    res = transcription['text']
+    def store_segments(text):
+        segment_size = 1000
+        segments = [{'text': text[i:i+segment_size], 'start': i} for i in range(0, len(text), segment_size)]
+        texts = []
+        start_times = []
+        for segment in segments:
+            text = segment['text']
+            start = segment['start']
+            start_datetime = datetime.fromtimestamp(start)
+            formatted_start_time = start_datetime.strftime('%H:%M:%S')
+            texts.append(text)
+            start_times.append(formatted_start_time)
+        return texts, start_times
+    texts, start_times = store_segments(res)
+    text_splitter = CharacterTextSplitter(chunk_size=1500, separator="\n")
+    docs = []
+    metadatas = []
+    for i, d in enumerate(texts):
+        splits = text_splitter.split_text(d)
+        docs.extend(splits)
+        metadatas.extend([{"source": start_times[i]}] * len(splits))
+    embeddings = OpenAIEmbeddings()
+    store = FAISS.from_texts(docs, embeddings, metadatas=metadatas)
+    faiss.write_index(store.index, "docs.index")
     retri = VectorStoreRetriever(vectorstore=store)
     return result['answer'], result['sources']
 iface = gr.Interface(
     fn=get_answer,
     inputs=["text", "text", "text"],
     outputs=["text", "text"],
     examples=[
+        [os.environ["OPENAI_API_KEY"], "https://www.youtube.com/watch?v=xNAm9O_duSA", "Who could be the next Prime Minister ?"]
     ],
 )