adnaniqbal001 commited on
Commit
3ba2207
·
verified ·
1 Parent(s): 6a1908f

Create qabot.py

Browse files
Files changed (1) hide show
  1. qabot.py +106 -0
qabot.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ibm_watsonx_ai.foundation_models import ModelInference
2
+ from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
3
+ from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames
4
+ from ibm_watsonx_ai import Credentials
5
+ from langchain_ibm import WatsonxLLM, WatsonxEmbeddings
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_community.vectorstores import Chroma
8
+ from langchain_community.document_loaders import PyPDFLoader
9
+ from langchain.chains import RetrievalQA
10
+
11
+ import gradio as gr
12
+
13
+ # You can use this section to suppress warnings generated by your code:
14
+ def warn(*args, **kwargs):
15
+ pass
16
+ import warnings
17
+ warnings.warn = warn
18
+ warnings.filterwarnings('ignore')
19
+
20
+ ## LLM
21
+ def get_llm():
22
+ model_id = 'mistralai/mixtral-8x7b-instruct-v01'
23
+ parameters = {
24
+ GenParams.MAX_NEW_TOKENS: 256,
25
+ GenParams.TEMPERATURE: 0.5,
26
+ }
27
+ project_id = "skills-network"
28
+ watsonx_llm = WatsonxLLM(
29
+ model_id=model_id,
30
+ url="https://us-south.ml.cloud.ibm.com",
31
+ project_id=project_id,
32
+ params=parameters,
33
+ )
34
+ return watsonx_llm
35
+
36
+ ## Document loader
37
+ def document_loader(file):
38
+ loader = PyPDFLoader(file.name)
39
+ loaded_document = loader.load()
40
+ return loaded_document
41
+
42
+ ## Text splitter
43
+ def text_splitter(data):
44
+ text_splitter = RecursiveCharacterTextSplitter(
45
+ chunk_size=1000,
46
+ chunk_overlap=50,
47
+ length_function=len,
48
+ )
49
+ chunks = text_splitter.split_documents(data)
50
+ return chunks
51
+
52
+ ## Vector db
53
+ def vector_database(chunks):
54
+ embedding_model = watsonx_embedding()
55
+ vectordb = Chroma.from_documents(chunks, embedding_model)
56
+ return vectordb
57
+
58
+ ## Embedding model
59
+ def watsonx_embedding():
60
+ embed_params = {
61
+ EmbedTextParamsMetaNames.TRUNCATE_INPUT_TOKENS: 3,
62
+ EmbedTextParamsMetaNames.RETURN_OPTIONS: {"input_text": True},
63
+ }
64
+ watsonx_embedding = WatsonxEmbeddings(
65
+ model_id="ibm/slate-125m-english-rtrvr",
66
+ url="https://us-south.ml.cloud.ibm.com",
67
+ project_id="skills-network",
68
+ params=embed_params,
69
+ )
70
+ return watsonx_embedding
71
+
72
+ ## Retriever
73
+ def retriever(file):
74
+ splits = document_loader(file)
75
+ chunks = text_splitter(splits)
76
+ vectordb = vector_database(chunks)
77
+ retriever = vectordb.as_retriever()
78
+ return retriever
79
+
80
+ ## QA Chain
81
+ def retriever_qa(file, query):
82
+ llm = get_llm()
83
+ retriever_obj = retriever(file)
84
+ qa = RetrievalQA.from_chain_type(llm=llm,
85
+ chain_type="stuff",
86
+ retriever=retriever_obj,
87
+ return_source_documents=False)
88
+ response = qa.invoke(query)
89
+ return response['result']
90
+
91
+
92
+ # Create Gradio interface
93
+ rag_application = gr.Interface(
94
+ fn=retriever_qa,
95
+ allow_flagging="never",
96
+ inputs=[
97
+ gr.File(label="Upload PDF File", file_count="single", file_types=['.pdf'], type="filepath"), # Drag and drop file upload
98
+ gr.Textbox(label="Input Query", lines=2, placeholder="Type your question here...")
99
+ ],
100
+ outputs=gr.Textbox(label="Output"),
101
+ title="RAG Chatbot",
102
+ description="Upload a PDF document and ask any question. The chatbot will try to answer using the provided document."
103
+ )
104
+
105
+ # Launch the app
106
+ rag_application.launch(server_name="0.0.0.0", server_port= 7860)