Spaces:
Build error
Build error
File size: 3,713 Bytes
5a444be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import os
import pdb
import pickle
from langchain.llms import OpenAI
from langchain.vectorstores.faiss import FAISS
from langchain.chains import ChatVectorDBChain
from langchain.prompts.prompt import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings import OpenAIEmbeddings
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
You can assume the discussion is about the video content.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
qa_template = """You are an AI assistant designed for answering questions about a video.
You are given a document and a question, the document records what people see and hear from this video.
Try to connet these information and provide a conversational answer.
Question: {question}
=========
{context}
=========
"""
QA_PROMPT = PromptTemplate(template=qa_template, input_variables=["question", "context"])
class LlmReasoner():
def __init__(self, args):
self.history = []
self.gpt_version = args.gpt_version
self.data_dir = args.data_dir
self.tmp_dir = args.tmp_dir
self.qa_chain = None
self.vectorstore = None
self.top_k = 3
self.llm = OpenAI(temperature=0, model_name=self.gpt_version)
def exist_vectorstore(self, video_id):
pkl_path = os.path.join(self.tmp_dir, f"{video_id}.pkl")
log_path = os.path.join(self.data_dir, f"{video_id}.log")
if os.path.exists(pkl_path) and os.path.exists(log_path):
with open(pkl_path, 'rb') as file:
self.vectorstore = pickle.load(file)
self.qa_chain = ChatVectorDBChain.from_llm(
self.llm,
self.vectorstore,
qa_prompt=QA_PROMPT,
condense_question_prompt=CONDENSE_QUESTION_PROMPT,
)
self.qa_chain.top_k_docs_for_context = self.top_k
return True
return False
def create_vectorstore(self, video_id):
pkl_path = os.path.join(self.tmp_dir, f"{video_id}.pkl")
if not os.path.exists(pkl_path):
loader = UnstructuredFileLoader(os.path.join(self.data_dir, f"{video_id}.log"))
raw_documents = loader.load()
# Split text
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(raw_documents)
# Load Data to vectorstore
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(documents, embeddings)
# Save vectorstore
with open(pkl_path, "wb") as f:
pickle.dump(vectorstore, f)
with open(pkl_path, 'rb') as file:
self.vectorstore = pickle.load(file)
self.qa_chain = ChatVectorDBChain.from_llm(
self.llm,
self.vectorstore,
qa_prompt=QA_PROMPT,
condense_question_prompt=CONDENSE_QUESTION_PROMPT,
)
self.qa_chain.top_k_docs_for_context = self.top_k
return
def __call__(self, question):
print(f"Question: {question}")
response = self.qa_chain({"question": question, "chat_history": self.history})["answer"]
self.history.append((question, response))
print(f"Assistant: {response}")
print("\n")
return response
def clean_history(self):
self.history = []
|