|
|
|
|
|
"""Doc_chat_vegleges_like.ipynb |
|
|
|
|
|
Automatically generated by Colaboratory. |
|
|
|
|
|
Original file is located at |
|
|
https://colab.research.google.com/drive/1Igjhvd8GhC8qJf7syPEa2x0KKjroy7KV |
|
|
|
|
|
# Setting up environment |
|
|
""" |
|
|
|
|
|
from PyPDF2 import PdfReader |
|
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
|
from langchain.text_splitter import CharacterTextSplitter |
|
|
from langchain_community.vectorstores import ElasticVectorSearch, Pinecone, Weaviate |
|
|
from langchain_community.vectorstores import FAISS |
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
|
|
|
print(os.environ["OPENAI_API_KEY"]) |
|
|
print(os.environ["DATASET_ACCES"],'HALOOO') |
|
|
"""# Preprocessing document""" |
|
|
|
|
|
|
|
|
reader = PdfReader('samu-en-567.pdf') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
raw_text = '' |
|
|
for i, page in enumerate(reader.pages): |
|
|
text = page.extract_text() |
|
|
if text: |
|
|
raw_text += text |
|
|
|
|
|
|
|
|
|
|
|
text_splitter = CharacterTextSplitter( |
|
|
separator = "\n", |
|
|
chunk_size = 800, |
|
|
chunk_overlap = 150, |
|
|
length_function = len, |
|
|
) |
|
|
texts = text_splitter.split_text(raw_text) |
|
|
|
|
|
len(texts) |
|
|
|
|
|
"""## Setting up doc search""" |
|
|
|
|
|
embeddings = OpenAIEmbeddings() |
|
|
doc_search = FAISS.from_texts(texts, embeddings) |
|
|
|
|
|
"""# Setting up chatbot""" |
|
|
|
|
|
from langchain.chains.question_answering import load_qa_chain |
|
|
from langchain.memory import ConversationBufferWindowMemory |
|
|
from langchain.prompts import PromptTemplate |
|
|
from langchain_openai import OpenAI |
|
|
|
|
|
template = """You are a chatbot having a conversation with a human. |
|
|
|
|
|
Given the following extracted parts of a long document and a question, create a final answer based on the document ONLY and NOTHING else. |
|
|
Any questions outside of the document is irrelevant and you certanly dont know! If You cannot find the answer say "The document does not contain that information." |
|
|
|
|
|
{context} |
|
|
|
|
|
{chat_history} |
|
|
Human: {human_input} |
|
|
Chatbot:""" |
|
|
|
|
|
prompt = PromptTemplate( |
|
|
input_variables=["chat_history", "human_input", "context"], template=template |
|
|
) |
|
|
|
|
|
memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input",k=3) |
|
|
chain = load_qa_chain( OpenAI(), chain_type="stuff", memory=memory, prompt=prompt) |
|
|
|
|
|
"""# Demo |
|
|
|
|
|
## Setting up methods |
|
|
""" |
|
|
|
|
|
def chat(query,history): |
|
|
docs = doc_search.similarity_search(query) |
|
|
return chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text'] |
|
|
|
|
|
"""## Setting up UI with gradio""" |
|
|
|
|
|
import gradio as gr |
|
|
from huggingface_hub import HfFileSystem |
|
|
|
|
|
fs = HfFileSystem(token=os.environ.get('DATASET_ACCES')) |
|
|
|
|
|
def write_to_file(file_name,content): |
|
|
file_path = f"datasets/mgreg555/samu_reference_book/" + file_name |
|
|
with fs.open(file_path, "r") as file_old: |
|
|
content_old = file_old.read() |
|
|
print(content_old) |
|
|
|
|
|
with fs.open(file_path, "w") as file: |
|
|
file.write(f"{content_old}\n" + content) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def vote(tmp, index_state, data: gr.LikeData): |
|
|
value_new = data.value |
|
|
index_new = data.index |
|
|
file_name = 'good.txt' if data.liked else 'bad.txt' |
|
|
write_to_file(file_name, value_new + ';' + find_previous_question(value_new)) |
|
|
|
|
|
def find_previous_question(answer_string): |
|
|
lines = chain.memory.buffer.split('\n') |
|
|
last_question = None |
|
|
current_question = None |
|
|
|
|
|
for line in lines: |
|
|
if line.startswith('Human:'): |
|
|
current_question = line[7:].strip() |
|
|
elif line.startswith('AI:') and line[3:].strip() == answer_string: |
|
|
return current_question |
|
|
|
|
|
return None |
|
|
|
|
|
chatbot = gr.Chatbot(height=600, likeable=True) |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
index_state = gr.State(value=[]) |
|
|
tmp = gr.Textbox(visible=False, value="") |
|
|
gr.ChatInterface( |
|
|
chat, |
|
|
chatbot=chatbot, |
|
|
title="Doc-chat", |
|
|
description="Ask about SAMU!", |
|
|
theme="soft", |
|
|
examples=["What is SAMU?","What is the capital of France?"], |
|
|
cache_examples=True, |
|
|
retry_btn=None, |
|
|
undo_btn="Delete Previous", |
|
|
clear_btn="Clear", |
|
|
) |
|
|
chatbot.like(vote, [tmp, index_state], [tmp, index_state]) |
|
|
|
|
|
demo.launch() |