File size: 2,676 Bytes
bd4fb4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import whisper 
import pytube
import gradio as gr
import openai
import faiss
from datetime import datetime
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.chains import RetrievalQAWithSourcesChain
from langchain import OpenAI
from langchain.vectorstores.base import VectorStoreRetriever
import os

video_data_cache = {}

def get_answer(api_key, video_link, question):
    os.environ["OPENAI_API_KEY"] = api_key

    if video_link not in video_data_cache:
        video = pytube.YouTube(video_link)
        audio = video.streams.get_audio_only()
        fn = audio.download(output_path="tmp.mp3")
        model = whisper.load_model("base")
        transcription = model.transcribe(fn)
        res = transcription['text']

        def store_segments(text):
            segment_size = 1000
            segments = [{'text': text[i:i+segment_size], 'start': i} for i in range(0, len(text), segment_size)]

            texts = []
            start_times = []

            for segment in segments:
                text = segment['text']
                start = segment['start']

                start_datetime = datetime.fromtimestamp(start)
                formatted_start_time = start_datetime.strftime('%H:%M:%S')

                texts.append(text)
                start_times.append(formatted_start_time)

            return texts, start_times

        texts, start_times = store_segments(res)

        text_splitter = CharacterTextSplitter(chunk_size=1500, separator="\n")
        docs = []
        metadatas = []
        for i, d in enumerate(texts):
            splits = text_splitter.split_text(d)
            docs.extend(splits)
            metadatas.extend([{"source": start_times[i]}] * len(splits))

        embeddings = OpenAIEmbeddings()
        store = FAISS.from_texts(docs, embeddings, metadatas=metadatas)
        faiss.write_index(store.index, f"docs.index")

        video_data_cache[video_link] = f"docs.index"
    
    index_file = video_data_cache[video_link]
    store = faiss.read_index(index_file)

    retri = VectorStoreRetriever(vectorstore=store)

    chain = RetrievalQAWithSourcesChain.from_llm(llm=OpenAI(temperature=0), retriever=retri)

    result = chain({"question": question})

    return result['answer'], result['sources']


iface = gr.Interface(
    fn=get_answer,
    inputs=["text", "text", "text"],
    outputs=["text", "text"],
    examples=[
        ["sk-kVc5h5YtNXyD6WxUd4aSxIyWuGc", "https://www.youtube.com/watch?v=xNAm9O_duSA", "Who could be the next Prime Minister ?"]
    ],
)

iface.queue().launch()