Vira21 commited on
Commit
8c0f8b8
โ€ข
1 Parent(s): da114b1

Updated Agent RAG

Browse files
Files changed (6) hide show
  1. .gitignore +1 -0
  2. app.py +84 -44
  3. chainlit.md +16 -2
  4. venv/Scripts/activate +1 -1
  5. venv/Scripts/activate.bat +1 -1
  6. venv/pyvenv.cfg +1 -1
.gitignore CHANGED
@@ -1 +1,2 @@
1
  __pycache__/
 
 
1
  __pycache__/
2
+ venv/
app.py CHANGED
@@ -11,9 +11,16 @@ from aimakerspace.openai_utils.embedding import EmbeddingModel
11
  from aimakerspace.vectordatabase import VectorDatabase
12
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
13
  import chainlit as cl
 
 
 
14
 
15
  system_template = """\
16
- Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
 
 
 
 
17
  system_role_prompt = SystemRolePrompt(system_template)
18
 
19
  user_prompt_template = """\
@@ -49,74 +56,107 @@ class RetrievalAugmentedQAPipeline:
49
 
50
  text_splitter = CharacterTextSplitter()
51
 
52
-
53
  def process_text_file(file: AskFileResponse):
54
- import tempfile
55
-
56
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
57
  temp_file_path = temp_file.name
58
-
59
- with open(temp_file_path, "wb") as f:
60
- f.write(file.content)
61
 
62
  text_loader = TextFileLoader(temp_file_path)
63
  documents = text_loader.load_documents()
64
  texts = text_splitter.split_texts(documents)
65
  return texts
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  @cl.on_chat_start
69
  async def on_chat_start():
70
- files = None
71
-
72
- # Wait for the user to upload a file
73
- while files == None:
74
- files = await cl.AskFileMessage(
75
- content="Please upload a Text File file to begin!",
76
- accept=["text/plain"],
77
- max_size_mb=2,
78
- timeout=180,
79
- ).send()
80
-
81
- file = files[0]
82
-
83
- msg = cl.Message(
84
- content=f"Processing `{file.name}`...", disable_human_feedback=True
85
- )
86
- await msg.send()
87
 
88
- # load the file
89
- texts = process_text_file(file)
90
 
91
- print(f"Processing {len(texts)} text chunks")
 
92
 
93
- # Create a dict vector store
94
- vector_db = VectorDatabase()
95
- vector_db = await vector_db.abuild_from_list(texts)
96
-
97
- chat_openai = ChatOpenAI()
 
 
 
 
 
98
 
99
- # Create a chain
100
- retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(
101
- vector_db_retriever=vector_db,
102
- llm=chat_openai
103
- )
104
-
105
- # Let the user know that the system is ready
106
- msg.content = f"Processing `{file.name}` done. You can now ask questions!"
107
- await msg.update()
108
 
109
- cl.user_session.set("chain", retrieval_augmented_qa_pipeline)
110
 
 
111
 
112
  @cl.on_message
113
  async def main(message):
114
  chain = cl.user_session.get("chain")
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  msg = cl.Message(content="")
117
  result = await chain.arun_pipeline(message.content)
118
 
119
  async for stream_resp in result["response"]:
120
  await msg.stream_token(stream_resp)
121
 
122
- await msg.send()
 
11
  from aimakerspace.vectordatabase import VectorDatabase
12
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
13
  import chainlit as cl
14
+ import tempfile
15
+ import pandas as pd
16
+ import pdfplumber
17
 
18
  system_template = """\
19
+ Use the following context to answer the user's question. If you cannot find the answer in the context,
20
+ say you don't know the answer. Additionally, if the user requests a summary or context overview,
21
+ generate an engaging and concise summary that captures the main ideas with an interesting and appealing tone.
22
+
23
+ """
24
  system_role_prompt = SystemRolePrompt(system_template)
25
 
26
  user_prompt_template = """\
 
56
 
57
  text_splitter = CharacterTextSplitter()
58
 
 
59
  def process_text_file(file: AskFileResponse):
60
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".txt") as temp_file:
 
 
61
  temp_file_path = temp_file.name
62
+ temp_file.write(file.content)
 
 
63
 
64
  text_loader = TextFileLoader(temp_file_path)
65
  documents = text_loader.load_documents()
66
  texts = text_splitter.split_texts(documents)
67
  return texts
68
 
69
+ def process_pdf_file(file: AskFileResponse):
70
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".pdf") as temp_file:
71
+ temp_file_path = temp_file.name
72
+ temp_file.write(file.content)
73
+
74
+ extracted_text = ""
75
+ with pdfplumber.open(temp_file_path) as pdf:
76
+ for page in pdf.pages:
77
+ extracted_text += page.extract_text()
78
+
79
+ texts = text_splitter.split_texts([extracted_text])
80
+ return texts
81
+
82
+ def process_csv_file(file: AskFileResponse):
83
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".csv") as temp_file:
84
+ temp_file_path = temp_file.name
85
+ temp_file.write(file.content)
86
+
87
+ df = pd.read_csv(temp_file_path)
88
+ texts = df.apply(lambda row: ' '.join(row.astype(str)), axis=1).tolist()
89
+ return text_splitter.split_texts(texts)
90
 
91
  @cl.on_chat_start
92
  async def on_chat_start():
93
+ cl.user_session.set("all_texts", [])
94
+
95
+ files = await cl.AskFileMessage(
96
+ content="Please upload one or more Text, PDF, or CSV files to begin!",
97
+ accept=["text/plain", "application/pdf", "text/csv"],
98
+ max_size_mb=20,
99
+ timeout=180,
100
+ ).send()
101
+
102
+ if not files:
103
+ await cl.Message(content="No files were uploaded. Please upload at least one file to proceed.").send()
104
+ return
105
+
106
+ all_texts = cl.user_session.get("all_texts", [])
 
 
 
107
 
108
+ for file in files:
109
+ file_type = file.name.split(".")[-1].lower()
110
 
111
+ msg = cl.Message(content=f"Processing `{file.name}`...", disable_human_feedback=True)
112
+ await msg.send()
113
 
114
+ # Process each file based on its type
115
+ if file_type == "txt":
116
+ texts = process_text_file(file)
117
+ elif file_type == "pdf":
118
+ texts = process_pdf_file(file)
119
+ elif file_type == "csv":
120
+ texts = process_csv_file(file)
121
+ else:
122
+ await cl.Message(content=f"Unsupported file type: `{file.name}`. Please upload text, PDF, or CSV files.").send()
123
+ continue
124
 
125
+ all_texts.extend(texts) # Combine texts from all uploaded files
 
 
 
 
 
 
 
 
126
 
127
+ cl.user_session.set("all_texts", all_texts)
128
 
129
+ await cl.Message(content="Files processed! You can now start asking questions.").send()
130
 
131
  @cl.on_message
132
  async def main(message):
133
  chain = cl.user_session.get("chain")
134
 
135
+ if not chain:
136
+ all_texts = cl.user_session.get("all_texts")
137
+ if not all_texts:
138
+ await cl.Message(content="Please upload at least one file before asking questions.").send()
139
+ return
140
+
141
+ # Create a dict vector store
142
+ vector_db = VectorDatabase()
143
+ vector_db = await vector_db.abuild_from_list(all_texts)
144
+
145
+ chat_openai = ChatOpenAI()
146
+
147
+ # Create a chain
148
+ retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(
149
+ vector_db_retriever=vector_db,
150
+ llm=chat_openai
151
+ )
152
+
153
+ cl.user_session.set("chain", retrieval_augmented_qa_pipeline)
154
+ chain = retrieval_augmented_qa_pipeline
155
+
156
  msg = cl.Message(content="")
157
  result = await chain.arun_pipeline(message.content)
158
 
159
  async for stream_resp in result["response"]:
160
  await msg.stream_token(stream_resp)
161
 
162
+ await msg.send()
chainlit.md CHANGED
@@ -1,3 +1,17 @@
1
- # Welcome to Chat with Your Text File
2
 
3
- With this application, you can chat with an uploaded text file that is smaller than 2MB!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # โœจ๐Ÿ”ฎ Dive into Data Magic with Chat-to-Chart! ๐Ÿ“Š๐Ÿ’ฌ
2
 
3
+ Welcome to the ultimate AI-powered assistant, designed especially for **Business Owners**, **Stakeholders**, **CEOs**, and other **non-technical professionals**! ๐Ÿ’ผ๐Ÿš€
4
+
5
+ Seamlessly interact with your data by uploading:
6
+ - ๐Ÿ“„ **Text Files** (under 2MB)
7
+ - ๐Ÿ“š **PDFs**
8
+ - ๐Ÿ“Š **CSV Files**
9
+
10
+ ๐Ÿ”— **Connect directly to your database** and let the insights flow! Whether you need:
11
+ - ๐Ÿง  **Instant answers** and insights from your files or database
12
+ - ๐Ÿ” **Effortless SQL querying** in plain language
13
+ - ๐Ÿ“ˆ **Dynamic charts** (bar, line, pie) to visualize your data
14
+
15
+ This tool is all about transforming complex data into clear, actionable insights โ€” as easy as having a conversation! ๐ŸŒ๐Ÿค–
16
+
17
+ Itโ€™s not just data analysis; itโ€™s **Chat-to-Chart**! ๐Ÿ“Š๐Ÿ’ฌ Upload your files, connect your database, and watch the magic happen. โœจ
venv/Scripts/activate CHANGED
@@ -35,7 +35,7 @@ deactivate () {
35
  # unset irrelevant variables
36
  deactivate nondestructive
37
 
38
- VIRTUAL_ENV="D:\DataTicon\AIE4 Pythonic RAG\AIE4-DeployPythonicRAG\venv"
39
  export VIRTUAL_ENV
40
 
41
  _OLD_VIRTUAL_PATH="$PATH"
 
35
  # unset irrelevant variables
36
  deactivate nondestructive
37
 
38
+ VIRTUAL_ENV="D:\DataTicon\AIE4 Pythonic RAG - Copy\AIE4-DeployPythonicRAG\venv"
39
  export VIRTUAL_ENV
40
 
41
  _OLD_VIRTUAL_PATH="$PATH"
venv/Scripts/activate.bat CHANGED
@@ -8,7 +8,7 @@ if defined _OLD_CODEPAGE (
8
  "%SystemRoot%\System32\chcp.com" 65001 > nul
9
  )
10
 
11
- set VIRTUAL_ENV=D:\DataTicon\AIE4 Pythonic RAG\AIE4-DeployPythonicRAG\venv
12
 
13
  if not defined PROMPT set PROMPT=$P$G
14
 
 
8
  "%SystemRoot%\System32\chcp.com" 65001 > nul
9
  )
10
 
11
+ set VIRTUAL_ENV=D:\DataTicon\AIE4 Pythonic RAG - Copy\AIE4-DeployPythonicRAG\venv
12
 
13
  if not defined PROMPT set PROMPT=$P$G
14
 
venv/pyvenv.cfg CHANGED
@@ -2,4 +2,4 @@ home = C:\Users\USER\anaconda3
2
  include-system-site-packages = false
3
  version = 3.11.7
4
  executable = C:\Users\USER\anaconda3\python.exe
5
- command = C:\Users\USER\anaconda3\python.exe -m venv D:\DataTicon\AIE4 Pythonic RAG\AIE4-DeployPythonicRAG\venv
 
2
  include-system-site-packages = false
3
  version = 3.11.7
4
  executable = C:\Users\USER\anaconda3\python.exe
5
+ command = C:\Users\USER\anaconda3\python.exe -m venv D:\DataTicon\AIE4 Pythonic RAG - Copy\AIE4-DeployPythonicRAG\venv