Spaces:
Running
Running
File size: 6,069 Bytes
3c54435 bfd2b47 3c54435 bfd2b47 7a4de72 bfd2b47 3c54435 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import pandas as pd
import streamlit as st
import google.generativeai as palm
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings import GooglePalmEmbeddings
from langchain.llms import GooglePalm
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
# from langchain.llms import CTransformers
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA,ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.cache import InMemoryCache
from langchain.llms import VLLM
from langchain.memory.buffer import ConversationBufferMemory
from langchain.chains.conversation.memory import ConversationSummaryBufferMemory
import gradio as gr
import requests
import os
from langchain.embeddings import HuggingFaceBgeEmbeddings
# models = [m for m in palm.list_models() if "generateText" in m.supported_generation_methods]
# model = models[0].name
# print('Imports Done')
db_path = './vectordb/db_faiss'
# print('Reading Document')
# os.mkdir('/home/Sparsh/data')
# url = 'https://ia803106.us.archive.org/13/items/Encyclopedia_Of_Agriculture_And_Food_Systems/Encyclopedia%20of%20Agriculture%20and%20Food%20Systems.pdf'
# response = requests.get(url)
# with open('/home/Sparsh/data/document.pdf', 'wb') as f:
# f.write(response.content)
#
# print('Creating Chunks')
# loader = DirectoryLoader('C:/Users/HP/PycharmProjects/MLSCBot/venv/MLSCBot',glob = "*.pdf",loader_cls = PyPDFLoader)
# data = loader.load()
# splitter = RecursiveCharacterTextSplitter(chunk_size = 500,chunk_overlap = 100)
# chunks = splitter.split_documents(data)
#
# print('Mapping Embeddings')
model_name = "BAAI/bge-base-en"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
model_norm = HuggingFaceBgeEmbeddings(model_name=model_name,
encode_kwargs=encode_kwargs)
embeddings = model_norm
# db = FAISS.from_documents(chunks,embeddings)
# db.save_local(db_path)
db = FAISS.load_local(db_path,embeddings)
print('Prompt Chain')
custom_prompt_template = """You are a helpful bot designd for MLSC TIET that is Microsoft Student Learn Chapter,TIET which a technical society for thir website your task is to answer all queries about MLSC every answer you provide should be i context of MLSC if any question is not in that context then yyou should ecline that question by saying 'It is out of context',if you don't know the answer don't try to make it up just politely decline that question,you can extrapolayte the things a little just to be more informative but dont sound boasty and exaggerating say something else out of the context of the document,don't answer any questions that pertain to any specific persons and if questions about roles demnad names of position holders of MLSC give a general description of role instead of person
You can accept some basic greetings to interact with the user but be sure to remisn in context of MLSC only
Context: {context}
Question: {question}
Only return the helpful answer below and nothing else.
Helpful answer:
"""
prompt = PromptTemplate(template=custom_prompt_template,
input_variables=['context', 'question'])
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
print('Creating LLM')
llm2 = GooglePalm(
max_new_tokens=1024,
top_k=10,
top_p=0.5,
temperature=0.5)
print(llm2("What is the capital of France ?"))
# qa_chain = ConversationalRetrievalChain.from_llm(llm2,retriever=db.as_retriever(search_kwargs={'k': 2}),
# return_source_documents=False,
# memory=memory)
qa_chain = RetrievalQA.from_chain_type(llm=llm2,
chain_type='stuff',
retriever=db.as_retriever(search_kwargs={'k': 5}),
return_source_documents=False,
chain_type_kwargs={'prompt': prompt})
history_df = pd.DataFrame(columns = ['Question','Answer'])
def qa_bot(query):
global history_df
response = qa_chain({'query': query})
print(response)
response_df = pd.DataFrame.from_dict([response])
response_df.rename(columns = {'query' : 'Question','result' : 'Answer'},inplace = True)
history_df = pd.concat([history_df,response_df])
history_df.reset_index(drop = True,inplace = True)
history_df.to_csv('./responses.csv')
print(history_df)
return (response['result'])
st.title("MLSCBot")
st.image('./banner.png',use_column_width=True)
if "messages" not in st.session_state:
st.session_state.messages = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Hello!How can I help you?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = qa_bot(prompt)
message_placeholder.markdown(full_response + "▌")
message_placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
# with gr.Blocks(theme='upsatwal/mlsc_tiet') as demo:
# title = gr.HTML("<h1>MLSCBot</h1>")
# with gr.Row():
# img = gr.Image('C:/Users/HP/Downlo0ads/banner.png',label = 'MLSC Logo',show_label = False,elem_id = 'image',height = 200)
# input = gr.Textbox(label="How can I assist you?") # Textbox for user input
# output = gr.Textbox(label="Here you go:") # Textbox for chatbot response
# btn = gr.Button(value="Answer",elem_classes="button-chatbot",variant = "primary") # Button to trigger the agent call
# btn.click(fn=qa_bot, inputs=input,outputs=output)
# demo.launch(share=True, debug=True,show_api = False,show_error = False)
|