brandonmusic commited on
Commit
b6805a5
·
verified ·
1 Parent(s): 8137280

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -25
app.py CHANGED
@@ -1,13 +1,15 @@
1
  # app.py
2
  # This is the updated main script. Copy-paste this over your existing app.py.
3
  # Changes:
4
- # - Removed unused 'bm25s' import (replaced with rank_bm25 in retrieval.py).
5
- # - Integrated retrieve_context from retrieval.py, which now uses hybrid_cap_search with lazy loading.
6
- # - Added handling for missing CAP components, logging warnings and skipping RAG if caches are absent.
7
- # - Retained Flask for serving the custom HTML+CSS+JS frontend and API endpoint /api/chat.
8
- # - Kept file handling, IRAC mode, web search toggle, and task classification logic.
9
- # - Updated route_model to use retrieve_context only if CAP components are available.
10
- # - Note: Precompute CAP components with precompute_cap_embeddings.py before deployment.
 
 
11
 
12
  from flask import Flask, request, jsonify, send_from_directory
13
  from werkzeug.utils import secure_filename
@@ -18,11 +20,12 @@ import pdfplumber
18
  from docx import Document
19
  from googleapiclient.discovery import build
20
  import re
21
- from retrieval import retrieve_context, municipal_search # Updated import
22
  from task_processing import process_task_response
23
  from gpt_helpers import ask_gpt41_mini
24
  from prompt_builder import build_saul_prompt, build_editor_prompt
25
  from post_processing import ground_statutes
 
26
 
27
  app_flask = Flask(__name__)
28
  os.environ["HF_HOME"] = "/data/.huggingface"
@@ -76,22 +79,18 @@ STATES = {
76
  def route_model(prompt, task_type, files=None, search_web=False, jurisdiction="KY"):
77
  logger.info(f"Routing prompt: {prompt}, Task: {task_type}, Web Search: {search_web}, Jurisdiction: {jurisdiction}")
78
  rag_context = ""
79
- if task_type in ["case_law", "irac", "statute"] and not os.getenv("SKIP_CAP_INIT", "false").lower() == "true":
80
- # Check if CAP components are available
81
- if all(os.path.exists(f"/data/cap_{ext}") for ext in ["tfidf.pkl", "tfidf_matrix.npz", "gte.npy", "openai.npy"]):
82
- combined_results = retrieve_context(prompt, task_type)
83
- # Filter by jurisdiction if specified
84
- if jurisdiction and jurisdiction != "All States":
85
- state_name = STATES.get(jurisdiction, "")
86
- state_code = jurisdiction
87
- combined_results = [r for r in combined_results if any(s in (r.get('citation', '') + r.get('name', '')) for s in [state_code, state_name])]
88
- if combined_results:
89
- rag_context = "Retrieved legal authorities (case law and statutes):\n" + "\n".join(
90
- [f"{i+1}. [{auth.get('source', 'Unknown')}] {auth['name']}, {auth['citation']}: \"{auth['snippet']}\"" for i, auth in enumerate(combined_results)])
91
- prompt = f"User prompt: {prompt}\n\n{rag_context}"
92
- else:
93
- logger.warning("CAP hybrid components missing. Precompute them with precompute_cap_embeddings.py. Skipping RAG.")
94
-
95
  if task_type == "document_creation":
96
  # Route directly to fine-tuned GPT for document creation
97
  saul_response = ask_gpt41_mini(prompt, jurisdiction)
@@ -176,7 +175,7 @@ def extract_text_from_file(file_path):
176
  def classify_prompt(prompt):
177
  prompt_lower = prompt.lower()
178
  if "summarize" in prompt_lower:
179
- return "document_analysis"
180
  if any(k in prompt_lower for k in ["irac", "issue", "rule", "analysis", "conclusion", "brief", "memorandum", "memo"]):
181
  return "irac"
182
  elif any(k in prompt_lower for k in ["case", "precedent", "law"]):
 
1
  # app.py
2
  # This is the updated main script. Copy-paste this over your existing app.py.
3
  # Changes:
4
+ # - Fixed import for OpenAI (added line break and ensured it's not commented out).
5
+ # - Retained Flask for serving the custom HTML+CSS+JS frontend.
6
+ # - Added API endpoint /api/chat for handling user inputs (prompt, jurisdiction, IRAC mode, web search toggle, file).
7
+ # - Serves index.html as the root page (you'll need to add index.html to your repo with the provided HTML code).
8
+ # - Forced task_type to "irac" if IRAC mode is enabled; otherwise, uses classify_prompt.
9
+ # - Forced web_search toggle handling.
10
+ # - Forced task to "document_creation" routes directly to the fine-tuned GPT model.
11
+ # - Updated route_model to use retrieve_context(prompt, task_type) instead of separate semantic_search/municipal_search.
12
+ # - For document_creation/summaries, skip RAG (no retrieve_context call) to avoid slowdown.
13
 
14
  from flask import Flask, request, jsonify, send_from_directory
15
  from werkzeug.utils import secure_filename
 
20
  from docx import Document
21
  from googleapiclient.discovery import build
22
  import re
23
+ from retrieval import retrieve_context # Import from retrieval.py
24
  from task_processing import process_task_response
25
  from gpt_helpers import ask_gpt41_mini
26
  from prompt_builder import build_saul_prompt, build_editor_prompt
27
  from post_processing import ground_statutes
28
+ from openai import OpenAI # Fixed import for OpenAI client
29
 
30
  app_flask = Flask(__name__)
31
  os.environ["HF_HOME"] = "/data/.huggingface"
 
79
  def route_model(prompt, task_type, files=None, search_web=False, jurisdiction="KY"):
80
  logger.info(f"Routing prompt: {prompt}, Task: {task_type}, Web Search: {search_web}, Jurisdiction: {jurisdiction}")
81
  rag_context = ""
82
+ if task_type in ["case_law", "irac", "statute"]:
83
+ combined_results = retrieve_context(prompt, task_type)
84
+ # Filter by jurisdiction if specified
85
+ if jurisdiction and jurisdiction != "All States":
86
+ state_name = STATES.get(jurisdiction, "")
87
+ state_code = jurisdiction
88
+ combined_results = [r for r in combined_results if any(s in (r.get('citation', '') + r.get('name', '')) for s in [state_code, state_name])]
89
+ if combined_results:
90
+ rag_context = "Retrieved legal authorities (case law and statutes):\n" + "\n".join(
91
+ [f"{i+1}. [{auth.get('source', 'Unknown')}] {auth['name']}, {auth['citation']}: \"{auth['snippet']}\"" for i, auth in enumerate(combined_results)])
92
+ prompt = f"User prompt: {prompt}\n\n{rag_context}"
93
+
 
 
 
 
94
  if task_type == "document_creation":
95
  # Route directly to fine-tuned GPT for document creation
96
  saul_response = ask_gpt41_mini(prompt, jurisdiction)
 
175
  def classify_prompt(prompt):
176
  prompt_lower = prompt.lower()
177
  if "summarize" in prompt_lower:
178
+ return "document_analysis" # Treat summarize as analysis for routing
179
  if any(k in prompt_lower for k in ["irac", "issue", "rule", "analysis", "conclusion", "brief", "memorandum", "memo"]):
180
  return "irac"
181
  elif any(k in prompt_lower for k in ["case", "precedent", "law"]):