nerella commited on
Commit
7f1d8d0
1 Parent(s): 2c6ac4c

creating app.py file

Browse files
Files changed (1) hide show
  1. app.py +126 -0
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ import gradio as gr
4
+ from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings
5
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
6
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
7
+ from sentence_transformers import SentenceTransformer
8
+
9
+ load_dotenv()
10
+
11
+ # Configure the Llama index settings
12
+ Settings.llm = HuggingFaceInferenceAPI(
13
+ model_name="meta-llama/Meta-Llama-3-8B-Instruct",
14
+ tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
15
+ context_window=3000,
16
+ token=os.getenv("HF_TOKEN"),
17
+ max_new_tokens=512,
18
+ generate_kwargs={"temperature": 0.1},
19
+ )
20
+ Settings.embed_model = HuggingFaceEmbedding(
21
+ model_name="BAAI/bge-small-en-v1.5"
22
+ )
23
+
24
+ # Define the directory for persistent storage and data
25
+ PERSIST_DIR = "db"
26
+ PDF_DIRECTORY = 'data' # Changed to the directory containing PDFs
27
+
28
+ # Ensure directories exist
29
+ os.makedirs(PDF_DIRECTORY, exist_ok=True)
30
+ os.makedirs(PERSIST_DIR, exist_ok=True)
31
+
32
+ # Variable to store current chat conversation
33
+ current_chat_history = []
34
+
35
+ def data_ingestion_from_directory():
36
+ # Use SimpleDirectoryReader on the directory containing the PDF files
37
+ documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
38
+ storage_context = StorageContext.from_defaults()
39
+ index = VectorStoreIndex.from_documents(documents)
40
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
41
+
42
+ def handle_query(query):
43
+ chat_text_qa_msgs = [
44
+ (
45
+ "user",
46
+ """
47
+ You are the JackNJill Solutions chatbot. Your goal is to provide accurate, professional, and helpful answers to user queries based on the company's data. Always ensure your responses are clear and concise.
48
+
49
+ Context:
50
+ {context_str}
51
+
52
+ Question:
53
+ {query_str}
54
+ """
55
+ )
56
+ ]
57
+
58
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
59
+
60
+ # Load index from storage
61
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
62
+ index = load_index_from_storage(storage_context)
63
+
64
+ # Use chat history to enhance response
65
+ context_str = ""
66
+ for past_query, response in reversed(current_chat_history):
67
+ if past_query.strip():
68
+ context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
69
+
70
+ query_engine = index.as_query_engine(text_qa_template=text_qa_template, context_str=context_str)
71
+ answer = query_engine.query(query)
72
+
73
+ if hasattr(answer, 'response'):
74
+ response = answer.response
75
+ elif isinstance(answer, dict) and 'response' in answer:
76
+ response = answer['response']
77
+ else:
78
+ response = "Sorry, as per my current knowledge I am unable to answer this question. Is there anything else I can help you with?"
79
+
80
+ # Remove sensitive information and unwanted sections from the response
81
+ sensitive_keywords = [PERSIST_DIR, PDF_DIRECTORY, "/", "\\", ".pdf", ".doc", ".txt"]
82
+ for keyword in sensitive_keywords:
83
+ response = response.replace(keyword, "")
84
+
85
+ # Remove sections starting with specific keywords
86
+ unwanted_sections = ["Page Label","Page Label:","page_label","page_label:","file_path:","file_path",]
87
+ for section in unwanted_sections:
88
+ if section in response:
89
+ response = response.split(section)[0]
90
+
91
+ # Additional cleanup for any remaining artifacts from replacements
92
+ response = ' '.join(response.split())
93
+
94
+ # Update current chat history
95
+ current_chat_history.append((query, response))
96
+
97
+ return response
98
+
99
+ # Example usage: Process PDF ingestion from directory
100
+ print("Processing PDF ingestion from directory:", PDF_DIRECTORY)
101
+ data_ingestion_from_directory()
102
+
103
+ # Define the input and output components for the Gradio interface
104
+ input_component = gr.Textbox(
105
+ show_label=False,
106
+ placeholder="Ask me anything about JackNJill Solutions..."
107
+ )
108
+
109
+ output_component = gr.Textbox()
110
+
111
+ # Function to handle queries
112
+ def chatbot_handler(query):
113
+ response = handle_query(query)
114
+ return response
115
+
116
+ # Create the Gradio interface
117
+ interface = gr.Interface(
118
+ fn=chatbot_handler,
119
+ inputs=input_component,
120
+ outputs=output_component,
121
+ title="Welcome to JackNJill Solutions",
122
+ description="I am here to assist you with any questions you have about JackNJill Solutions. How can I help you today?"
123
+ )
124
+
125
+ # Launch the Gradio interface
126
+ interface.launch()