elinstallation commited on
Commit
a84e6da
·
verified ·
1 Parent(s): 11b0fb5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +235 -0
app.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import random
3
+ from huggingface_hub import InferenceClient
4
+
5
+ import pandas as pd
6
+
7
+ from sentence_transformers import SentenceTransformer
8
+ import torch
9
+
10
+ # LOAD FILES
11
+
12
+ def load_files(path):
13
+ with open(path, "r", encoding = "utf-8") as f:
14
+ return f.read()
15
+
16
+
17
+ charities_text = load_files("charities.txt")
18
+ financial_advice_text = load_files("financial_advice.txt")
19
+
20
+
21
+ #
22
+
23
+ ###
24
+ def preprocess_text(text):
25
+ # Strip extra whitespace from the beginning and the end of the text
26
+ cleaned_text = text.strip()
27
+
28
+ # Split the cleaned_text by every newline character (\n)
29
+ chunks = cleaned_text.split("\n")
30
+
31
+ # Create an empty list to store cleaned chunks
32
+ cleaned_chunks = []
33
+
34
+ # Write your for-in loop below to clean each chunk and add it to the cleaned_chunks list
35
+ for chunk in chunks:
36
+ stripped_chunk = chunk.strip()
37
+ if len(stripped_chunk) > 0:
38
+ cleaned_chunks.append(stripped_chunk)
39
+
40
+
41
+ # Print the length of cleaned_chunks
42
+ num_of_chunks = len(cleaned_chunks)
43
+ # print(num_of_chunks)
44
+
45
+
46
+ return cleaned_chunks
47
+
48
+ cleaned_charities = preprocess_text(charities_text)
49
+ cleaned_finance = preprocess_text(financial_advice_text)
50
+
51
+ # Load the pre-trained embedding model that converts text to vectors
52
+ model = SentenceTransformer('all-MiniLM-L6-v2')
53
+
54
+ ### STEP 4
55
+ def create_embeddings(text_chunks):
56
+ # Convert each text chunk into a vector embedding and store as a tensor
57
+ chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) # Replace ... with the text_chunks list
58
+
59
+ # Print the chunk embeddings
60
+ print(chunk_embeddings)
61
+
62
+ # Print the shape of chunk_embeddings
63
+ print(chunk_embeddings.shape)
64
+
65
+ # Return the chunk_embeddings
66
+ return chunk_embeddings
67
+
68
+ charity_embeddings = create_embeddings(cleaned_charities)
69
+ finance_embeddings = create_embeddings(cleaned_finance)
70
+
71
+ ###STEP 5
72
+ # Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
73
+ def get_top_chunks(query, chunk_embeddings, text_chunks):
74
+ # Convert the query text into a vector embedding
75
+ query_embedding = model.encode(query, convert_to_tensor = True) # Complete this line
76
+
77
+ # Normalize the query embedding to unit length for accurate similarity comparison
78
+ query_embedding_normalized = query_embedding / query_embedding.norm()
79
+
80
+ # Normalize all chunk embeddings to unit length for consistent comparison
81
+ chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
82
+
83
+ # Calculate cosine similarity between query and all chunks using matrix multiplication
84
+ similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized) # Complete this line
85
+
86
+
87
+
88
+ # Find the indices of the 3 chunks with highest similarity scores
89
+ top_indices = torch.topk(similarities, k=3).indices
90
+
91
+ # Create an empty list to store the most relevant chunks
92
+ top_chunks = []
93
+
94
+ # Loop through the top indices and retrieve the corresponding text chunks
95
+ for i in top_indices:
96
+ relevant_info = text_chunks[i]
97
+ top_chunks.append(relevant_info)
98
+
99
+ # Return the list of most relevant chunks
100
+ return top_chunks
101
+
102
+ #CSV files
103
+ columns = ["TransactionID", "UserID", "Date", "Description", "Amount", "Type", "Extra1", "Extra2"]
104
+ spendings = pd.read_csv("september_transactions_detailed.csv", names = columns)
105
+ spendings['Amount'] = pd.to_numeric(spendings['Amount'], errors='coerce').fillna(0)
106
+
107
+ def get_advice(user_id):
108
+ user_data = spendings[spendings['UserID'] == user_id]
109
+
110
+ if user_data.empty:
111
+ return "No spending data found for this user."
112
+
113
+ # Only consider expenses
114
+ expenses = user_data[user_data['Type'].str.lower() == "expense"]
115
+ total_spent = expenses['Amount'].sum()
116
+ category_spent = expenses.groupby('Description')['Amount'].sum().to_dict()
117
+
118
+ advice = []
119
+ for cat, amt in category_spent.items():
120
+ if amt > total_spent * 0.3:
121
+ advice.append(f"You spend a lot on {cat}. Consider budgeting here.")
122
+
123
+ advice_text = " | ".join(advice) if advice else "Your spending looks balanced across categories."
124
+
125
+ summary_text = f"Total spent: ${total_spent:.2f}. Category breakdown: {category_spent}. Advice: {advice_text}"
126
+
127
+ return summary_text
128
+
129
+ #AI API being used
130
+ client= InferenceClient("openai/gpt-oss-20b")
131
+ #defining role of AI and user
132
+
133
+ information=""
134
+
135
+ def respond(message, history, chatbot_topic_values, chatbot_mode_values, user_id=1):
136
+ topic_chunks = []
137
+ if chatbot_topic_values and "Helping Charities" in chatbot_topic_values:
138
+ topic_chunks = get_top_chunks(message, charity_embeddings, cleaned_charities)
139
+ elif chatbot_topic_values and "Financial Aid" in chatbot_topic_values:
140
+ topic_chunks = get_top_chunks(message, finance_embeddings, cleaned_finance)
141
+
142
+ csv_advice = get_advice(user_id)
143
+
144
+ if chatbot_mode_values and "General Advice" in chatbot_mode_values:
145
+ role_message = (
146
+ "You are a helpful and insightful chatbot who acts like a financial "
147
+ "advisor of a university student. Respond in under five bullet points, "
148
+ f"under 500 characters, using this context: {topic_chunks}"
149
+ )
150
+ elif chatbot_mode_values and "Personal Advice" in chatbot_mode_values:
151
+ role_message = (
152
+ "You are a helpful and insightful chatbot who acts like a financial "
153
+ "DO NOT ask the user for additional numbers or input"
154
+ f"Use the following spending data from the CSV file to provide advice {csv_advice}"
155
+ )
156
+ else:
157
+ role_message = f"You are a helpful chatbot. Use this context: {topic_chunks}"
158
+ messages = [{"role": "assistant", "content": role_message}]
159
+ if history:
160
+ messages.extend(history)
161
+ messages.append({"role": "user", "content": message})
162
+
163
+ response = client.chat_completion(messages, temperature=0.2)
164
+ return response['choices'][0]['message']['content'].strip()
165
+
166
+
167
+ ### STEP 6
168
+ # Call the preprocess_text function and store the result in a cleaned_chunks variable
169
+ cleaned_chunks = preprocess_text(financial_advice_text) # Complete this line
170
+ top_results = get_top_chunks("What financial advice you give me?", finance_embeddings, cleaned_finance)
171
+
172
+
173
+ #Defining chatbot giving user a UI to interact, see their conversation history, and see new messages using built in gr feature
174
+ #ChatInterface requires at least one parameter(a function)
175
+ chatbot = gr.ChatInterface(respond,type="messages", title="Finance Management Hub", theme="Taithrah/Minimal")
176
+
177
+ def save_chat_history(history, username):
178
+ if not username:
179
+ username = "anonymous"
180
+
181
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
182
+ filename = f"chat_history_{username}_{timestamp}.txt"
183
+
184
+ with open(filename, "w", encoding="utf-8") as f:
185
+ f.write(f"Chat History for {username} - {timestamp}\n\n")
186
+ for exchange in history:
187
+ if isinstance(exchange, tuple) and len(exchange) == 2:
188
+ user_msg, bot_msg = exchange
189
+ f.write(f"User: {user_msg}\n")
190
+ f.write(f"Bot: {bot_msg}\n\n")
191
+ elif isinstance(exchange, dict):
192
+ # Handle dictionary format if needed
193
+ role = exchange.get("role", "unknown")
194
+ content = exchange.get("content", "")
195
+ f.write(f"{role.capitalize()}: {content}\n\n")
196
+
197
+ return filename
198
+
199
+ with gr.Blocks(
200
+ theme=gr.themes.Soft(
201
+ primary_hue="blue",
202
+ secondary_hue="fuchsia",
203
+ neutral_hue="gray",
204
+ text_size="lg",
205
+
206
+ ).set(
207
+ background_fill_primary='*neutral_200',
208
+ background_fill_secondary='neutral_100',
209
+ background_fill_secondary_dark='secondary_500',
210
+ border_color_accent='*secondary_400',
211
+ border_color_accent_dark='*secondary_800',
212
+ color_accent='*secondary_600',
213
+ color_accent_soft='*secondary_200',
214
+ color_accent_soft_dark='*secondary_800',
215
+ button_primary_background_fill='*secondary_400',
216
+ button_primary_background_fill_dark='*secondary_600',
217
+ button_primary_text_color='white',
218
+ button_primary_border_color='*secondary_700',
219
+ button_primary_border_color_dark='*secondary_900'
220
+ )
221
+ ) as demo:
222
+ with gr.Row(scale=1):
223
+ chatbot_topic=gr.CheckboxGroup(["Helping Charities", "Financial Aid"], label="What would you like advice about?")
224
+ with gr.Row(scale=1):
225
+ chatbot_mode=gr.CheckboxGroup(["General Advice", "Personal Advice"], label="How would you like the chatbot to respond?")
226
+
227
+ gr.ChatInterface(
228
+ fn=lambda msg, hist, topic_vals, mode_vals: respond(msg, hist, topic_vals, mode_vals),
229
+ title="Finance Management Hub",
230
+ description="Ask about your personal finance",
231
+ type="messages",
232
+ additional_inputs=[chatbot_topic, chatbot_mode]
233
+ )
234
+ #launching chatbot
235
+ demo.launch()