Spaces:

AnilNiraula
/

FinChat

Sleeping

App Files Files Community

AnilNiraula commited on Jul 6

Commit

f9c52da

verified ·

1 Parent(s): 74f4aad

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -45

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import os
 import torch
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -45,6 +46,14 @@ response_cache = {
         "The best places to open a brokerage account include Vanguard, Fidelity, Charles Schwab, and Robinhood. "
         "They offer low fees, no minimums, and user-friendly platforms for beginners."
     ),
 }
 # Load persistent cache
@@ -73,13 +82,13 @@ except Exception as e:
     logger.error(f"Error loading model/tokenizer: {e}")
     raise RuntimeError(f"Failed to load model: {str(e)}")
-# Updated prompt prefix with better instructions examples
 prompt_prefix = (
     "You are FinChat, a financial advisor. Always provide clear, step-by-step answers to the user's exact question. "
     "Avoid vague or unrelated topics. Use a numbered list format where appropriate and explain each step.\n\n"
     "Example 1:\n"
     "Q: How can I start investing with $100 a month?\n"
-    "A: Here’s a step-by-step guide:\n"
     "1. Open a brokerage account with a platform like Fidelity or Robinhood. They offer low fees and no minimums.\n"
     "2. Deposit your $100 monthly. You can set up automatic transfers.\n"
     "3. Choose a low-cost ETF like VOO, which tracks the S&P 500.\n"
@@ -97,10 +106,12 @@ def get_closest_cache_key(message, cache_keys, threshold=0.7):
     matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=threshold)
     return matches[0] if matches else None
-# Define chat function with updated generation parameters
 def chat_with_model(user_input, history=None):
     try:
         logger.info(f"Processing user input: {user_input}")
         cache_key = user_input.lower().strip()
         cache_keys = list(response_cache.keys())
         closest_key = cache_key if cache_key in response_cache else get_closest_cache_key(cache_key, cache_keys)
@@ -111,6 +122,8 @@ def chat_with_model(user_input, history=None):
             history = history or []
             history.append({"role": "user", "content": user_input})
             history.append({"role": "assistant", "content": response})
             return response, history
         if len(user_input.strip()) <= 5:
@@ -120,22 +133,26 @@ def chat_with_model(user_input, history=None):
             history = history or []
             history.append({"role": "user", "content": user_input})
             history.append({"role": "assistant", "content": response})
             return response, history
         full_prompt = prompt_prefix + user_input + "\nA:"
         inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
         with torch.inference_mode():
             outputs = model.generate(
                 **inputs,
-                max_new_tokens=150,  # Increased for longer responses
                 min_length=20,
-                do_sample=True,
-                temperature=0.5,  # Lowered for more focused responses
-                top_p=0.9,
                 repetition_penalty=1.2,
                 pad_token_id=tokenizer.eos_token_id
             )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         response = response[len(full_prompt):].strip() if response.startswith(full_prompt) else response
         logger.info(f"Chatbot response: {response}")
@@ -147,6 +164,8 @@ def chat_with_model(user_input, history=None):
         history.append({"role": "user", "content": user_input})
         history.append({"role": "assistant", "content": response})
         torch.cuda.empty_cache()
         return response, history
     except Exception as e:
         logger.error(f"Error generating response: {e}")
@@ -157,34 +176,7 @@ def chat_with_model(user_input, history=None):
         history.append({"role": "assistant", "content": response})
         return response, history
-# Feedback logging functions
-def log_feedback_up(history):
-    if history:
-        last_user = history[-2]['content']
-        last_assistant = history[-1]['content']
-        feedback = {"question": last_user, "response": last_assistant, "feedback": "up"}
-        try:
-            with open("feedback.json", "a") as f:
-                json.dump(feedback, f)
-                f.write("\n")
-            logger.info("Logged positive feedback")
-        except Exception as e:
-            logger.warning(f"Failed to log feedback: {e}")
-def log_feedback_down(history):
-    if history:
-        last_user = history[-2]['content']
-        last_assistant = history[-1]['content']
-        feedback = {"question": last_user, "response": last_assistant, "feedback": "down"}
-        try:
-            with open("feedback.json", "a") as f:
-                json.dump(feedback, f)
-                f.write("\n")
-            logger.info("Logged negative feedback")
-        except Exception as e:
-            logger.warning(f"Failed to log feedback: {e}")
-# Create Gradio interface with feedback buttons
 with gr.Blocks(
     title="FinChat: An LLM based on distilgpt2 model",
     css=".feedback {display: flex; gap: 10px; justify-content: center; margin-top: 10px;}"
@@ -201,12 +193,6 @@ with gr.Blocks(
     submit = gr.Button("Send")
     clear = gr.Button("Clear")
-    # Feedback section
-    gr.Markdown("**Was this helpful?**")
-    with gr.Row(elem_classes="feedback"):
-        thumbs_up = gr.Button("👍")
-        thumbs_down = gr.Button("👎")
     def submit_message(user_input, history):
         response, updated_history = chat_with_model(user_input, history)
         return "", updated_history  # Clear input, update chatbot
@@ -220,15 +206,12 @@ with gr.Blocks(
         fn=lambda: ("", []),  # Clear input and chatbot
         outputs=[msg, chatbot]
     )
-    thumbs_up.click(fn=log_feedback_up, inputs=[chatbot], outputs=None)
-    thumbs_down.click(fn=log_feedback_down, inputs=[chatbot], outputs=None)
 # Launch interface (conditional for Spaces)
 if __name__ == "__main__" and not os.getenv("HF_SPACE"):
     logger.info("Launching Gradio interface locally")
     try:
-        interface.launch(share=False, debug=True)
     except Exception as e:
         logger.error(f"Error launching interface: {e}")
         raise

 import logging
 import os
+import time  # Added for timing logs
 import torch
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
         "The best places to open a brokerage account include Vanguard, Fidelity, Charles Schwab, and Robinhood. "
         "They offer low fees, no minimums, and user-friendly platforms for beginners."
     ),
+    "what is dollar-cost averaging?": (
+        "Dollar-cost averaging is investing a fixed amount regularly (e.g., $100 monthly) in ETFs, "
+        "reducing risk by spreading purchases over time."
+    ),
+    "how much should i invest?": (
+        "Invest what you can afford after expenses and an emergency fund. Start with $100-$500 monthly "
+        "in ETFs like VOO using dollar-cost averaging. Consult a financial planner."
+    ),
 }
 # Load persistent cache
     logger.error(f"Error loading model/tokenizer: {e}")
     raise RuntimeError(f"Failed to load model: {str(e)}")
+# Updated prompt prefix with better instructions and examples
 prompt_prefix = (
     "You are FinChat, a financial advisor. Always provide clear, step-by-step answers to the user's exact question. "
     "Avoid vague or unrelated topics. Use a numbered list format where appropriate and explain each step.\n\n"
     "Example 1:\n"
     "Q: How can I start investing with $100 a month?\n"
+    "A: Here’s a step-by point-by-step guide:\n"
     "1. Open a brokerage account with a platform like Fidelity or Robinhood. They offer low fees and no minimums.\n"
     "2. Deposit your $100 monthly. You can set up automatic transfers.\n"
     "3. Choose a low-cost ETF like VOO, which tracks the S&P 500.\n"
     matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=threshold)
     return matches[0] if matches else None
+# Define chat function with optimized generation parameters
 def chat_with_model(user_input, history=None):
     try:
+        start_time = time.time()  # Start timing
         logger.info(f"Processing user input: {user_input}")
         cache_key = user_input.lower().strip()
         cache_keys = list(response_cache.keys())
         closest_key = cache_key if cache_key in response_cache else get_closest_cache_key(cache_key, cache_keys)
             history = history or []
             history.append({"role": "user", "content": user_input})
             history.append({"role": "assistant", "content": response})
+            end_time = time.time()
+            logger.info(f"Response time: {end_time - start_time:.2f} seconds")
             return response, history
         if len(user_input.strip()) <= 5:
             history = history or []
             history.append({"role": "user", "content": user_input})
             history.append({"role": "assistant", "content": response})
+            end_time = time.time()
+            logger.info(f"Response time: {end_time - start_time:.2f} seconds")
             return response, history
         full_prompt = prompt_prefix + user_input + "\nA:"
         inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
         with torch.inference_mode():
+            gen_start_time = time.time()  # Start generation timing
             outputs = model.generate(
                 **inputs,
+                max_new_tokens=75,  # Reduced for faster generation
                 min_length=20,
+                do_sample=False,  # Use greedy decoding for speed
                 repetition_penalty=1.2,
                 pad_token_id=tokenizer.eos_token_id
             )
+            gen_end_time = time.time()
+            logger.info(f"Generation time: {gen_end_time - gen_start_time:.2f} seconds")
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         response = response[len(full_prompt):].strip() if response.startswith(full_prompt) else response
         logger.info(f"Chatbot response: {response}")
         history.append({"role": "user", "content": user_input})
         history.append({"role": "assistant", "content": response})
         torch.cuda.empty_cache()
+        end_time = time.time()
+        logger.info(f"Total response time: {end_time - start_time:.2f} seconds")
         return response, history
     except Exception as e:
         logger.error(f"Error generating response: {e}")
         history.append({"role": "assistant", "content": response})
         return response, history
+# Create Gradio interface
 with gr.Blocks(
     title="FinChat: An LLM based on distilgpt2 model",
     css=".feedback {display: flex; gap: 10px; justify-content: center; margin-top: 10px;}"
     submit = gr.Button("Send")
     clear = gr.Button("Clear")
     def submit_message(user_input, history):
         response, updated_history = chat_with_model(user_input, history)
         return "", updated_history  # Clear input, update chatbot
         fn=lambda: ("", []),  # Clear input and chatbot
         outputs=[msg, chatbot]
     )
 # Launch interface (conditional for Spaces)
 if __name__ == "__main__" and not os.getenv("HF_SPACE"):
     logger.info("Launching Gradio interface locally")
     try:
+            interface.launch(share=False, debug=True)
     except Exception as e:
         logger.error(f"Error launching interface: {e}")
         raise