brandonmusic commited on
Commit
8137280
·
verified ·
1 Parent(s): 9a07481

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -187
app.py CHANGED
@@ -1,71 +1,39 @@
1
  # app.py
2
  # This is the updated main script. Copy-paste this over your existing app.py.
3
  # Changes:
4
- # - Switched from Gradio to Flask for serving the custom HTML+CSS+JS frontend.
5
- # - Added API endpoint /api/chat for handling user inputs (prompt, jurisdiction, IRAC mode, web search toggle, file).
6
- # - Serves index.html as the root page (you'll need to add index.html to your repo with the provided HTML code).
7
- # - Integrated file handling in API (extracts text and appends to prompt if needed).
8
- # - Forced task_type to "irac" if IRAC mode is enabled; otherwise, uses classify_prompt.
9
- # - Added web_search toggle handling.
10
- # - Updated ask_gpt41_mini to use the fine-tuned model ft:gpt-4.1-mini-2025-04-14:w-jeffrey-scott-psc:verdictaitrain:BysFkyX4.
11
- # - If the task is document_creation, routes directly to the fine-tuned GPT model.
12
- # - Retained all other logic, including RAG (semantic_search for CAP + municipal_search for municipal; now hybrid with BM25 for municipal).
13
- # - Note: Add 'bm25s' to your requirements.txt for hybrid search (pip install bm25s).
14
- # - Note: The SaulLM endpoint is kept as-is (likely 7B; if you want 141B, update SAUL_ENDPOINT to a new HF cloud endpoint for SaulLM-141B).
15
- # - Note: For full chat history, the frontend JS handles appending messages client-side (stateless backend).
16
- # - Updated route_model to use retrieve_context(prompt, task_type) instead of separate semantic_search/municipal_search.
17
- # - For document_creation/summaries, skip RAG (no retrieve_context call) to avoid slowdown.
18
 
19
- import gradio as gr # Retained if needed, but not used for UI anymore
20
- from openai import OpenAI
21
- import requests
22
  import os
23
  import logging
24
  from datetime import datetime
25
  import pdfplumber
26
- from docx import Document # Added for .docx support
27
  from googleapiclient.discovery import build
28
  import re
29
- from datasets import load_dataset, Dataset, load_from_disk
30
- from sentence_transformers import SentenceTransformer
31
- import torch
32
- import numpy as np
33
- import shutil
34
- import pyarrow.parquet as pq
35
- from huggingface_hub import hf_hub_download
36
- import pickle
37
- import faiss
38
- import threading
39
- import subprocess
40
  from task_processing import process_task_response
41
  from gpt_helpers import ask_gpt41_mini
 
 
42
 
43
- # New imports for split modules
44
- from retrieval import *
45
- from prompt_builder import *
46
- from post_processing import *
47
-
48
- # Flask imports
49
- from flask import Flask, request, jsonify, send_from_directory
50
- from werkzeug.utils import secure_filename
51
-
52
- # BM25 for hybrid search (add 'bm25s' to requirements.txt)
53
- from bm25s import BM25
54
-
55
- app_flask = Flask(__name__) # Renamed to avoid conflict with 'app' variable
56
-
57
  os.environ["HF_HOME"] = "/data/.huggingface"
58
- # Add or update this section in script.py
59
- # Ensure this is placed after imports but before any dataset loading or function definitions
60
 
61
- from huggingface_hub import login
 
 
62
 
63
- # Load HF token for SaulLM endpoint and gated repos
 
64
  hf_token = os.environ.get("HF_TOKEN", "")
65
- if not hf_token:
66
- logger.warning("HF_TOKEN not set; SaulLM endpoint may require authentication and gated repos may not be accessible.")
67
-
68
- # Authenticate for gated Hugging Face repos (e.g., for centroids download)
69
  if hf_token:
70
  login(hf_token)
71
  logger.info("Authenticated with Hugging Face token for gated repos.")
@@ -75,158 +43,75 @@ else:
75
  # Check environment variables
76
  try:
77
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "Missing")
78
- GOOGLE_SEARCH_API = os.environ.get("GOOGLE_SEARCH_API", "Missing") # This is now treated as CSE ID (cx)
79
- GOOGLE_CUSTOM_SEARCH_API_KEY = os.environ.get("GOOGLE_CUSTOM_SEARCH_API_KEY", "Missing") # New: API key (developerKey)
80
  if OPENAI_API_KEY == "Missing" or GOOGLE_CUSTOM_SEARCH_API_KEY == "Missing" or GOOGLE_SEARCH_API == "Missing":
81
  raise KeyError("API keys not set")
82
  logger.info(f"OpenAI API Key starts with: {OPENAI_API_KEY[:10]}...")
83
- logger.info("API keys loaded successfully")
84
  except KeyError as e:
85
  logger.error(f"Missing environment variable: {str(e)}")
86
  raise EnvironmentError(f"Required secrets OPENAI_API_KEY, GOOGLE_CUSTOM_SEARCH_API_KEY, and GOOGLE_SEARCH_API must be set in Hugging Face Space Secrets")
87
 
88
- # Load HF token for SaulLM endpoint
89
- hf_token = os.environ.get("HF_TOKEN", "")
90
- if not hf_token:
91
- logger.warning("HF_TOKEN not set; SaulLM endpoint may require authentication")
92
-
93
- import requests
94
-
95
-
96
  # Initialize OpenAI client
97
  openai_client = OpenAI(api_key=OPENAI_API_KEY)
98
 
99
  # SaulLM endpoint
100
  SAUL_ENDPOINT = "https://l4tuv4j9bu616t5x.us-east-1.aws.endpoints.huggingface.cloud"
101
 
102
- # Persistent storage path for dataset
103
- LOCAL_PATH = "/data/cap_dataset"
104
- dataset_info_path = os.path.join(LOCAL_PATH, 'dataset_info.json')
105
- if os.path.exists(dataset_info_path):
106
- cap_dataset = load_from_disk(LOCAL_PATH)
107
- else:
108
- try:
109
- cap_dataset = load_dataset("TeraflopAI/Caselaw-Access-Project", split="train")
110
- cap_dataset.save_to_disk(LOCAL_PATH)
111
- except Exception as e:
112
- logger.error(f"Dataset download/save failed: {str(e)}")
113
- if os.path.exists(LOCAL_PATH):
114
- shutil.rmtree(LOCAL_PATH) # Clean up partial save
115
- raise
116
-
117
- # Precompute CID to index mapping for CAP dataset
118
- cap_id_to_index = {doc['cid']: i for i, doc in enumerate(cap_dataset) if 'cid' in doc}
119
-
120
- # Preload some clusters in background (e.g., clusters 0-9)
121
- def preload_clusters():
122
- for cluster_id in range(10): # Adjust range as needed
123
- try:
124
- load_cluster_vectors(cluster_id, model="gte-Qwen2-1.5B-instruct")
125
- logger.info(f"Preloaded cluster {cluster_id}")
126
- except Exception as e:
127
- logger.error(f"Preload failed for cluster {cluster_id}: {e}")
128
-
129
- threading.Thread(target=preload_clusters).start()
130
-
131
  # State dictionary for jurisdiction
132
  STATES = {
133
- "AL": "Alabama",
134
- "AK": "Alaska",
135
- "AZ": "Arizona",
136
- "AR": "Arkansas",
137
- "CA": "California",
138
- "CO": "Colorado",
139
- "CT": "Connecticut",
140
- "DE": "Delaware",
141
- "FL": "Florida",
142
- "GA": "Georgia",
143
- "HI": "Hawaii",
144
- "ID": "Idaho",
145
- "IL": "Illinois",
146
- "IN": "Indiana",
147
- "IA": "Iowa",
148
- "KS": "Kansas",
149
- "KY": "Kentucky",
150
- "LA": "Louisiana",
151
- "ME": "Maine",
152
- "MD": "Maryland",
153
- "MA": "Massachusetts",
154
- "MI": "Michigan",
155
- "MN": "Minnesota",
156
- "MS": "Mississippi",
157
- "MO": "Missouri",
158
- "MT": "Montana",
159
- "NE": "Nebraska",
160
- "NV": "Nevada",
161
- "NH": "New Hampshire",
162
- "NJ": "New Jersey",
163
- "NM": "New Mexico",
164
- "NY": "New York",
165
- "NC": "North Carolina",
166
- "ND": "North Dakota",
167
- "OH": "Ohio",
168
- "OK": "Oklahoma",
169
- "OR": "Oregon",
170
- "PA": "Pennsylvania",
171
- "RI": "Rhode Island",
172
- "SC": "South Carolina",
173
- "SD": "South Dakota",
174
- "TN": "Tennessee",
175
- "TX": "Texas",
176
- "UT": "Utah",
177
- "VT": "Vermont",
178
- "VA": "Virginia",
179
- "WA": "Washington",
180
- "WV": "West Virginia",
181
- "WI": "Wisconsin",
182
- "WY": "Wyoming",
183
- "Federal": "Federal",
184
- "All States": "All States",
185
- "Other": "Other States"
186
  }
187
 
188
  def route_model(prompt, task_type, files=None, search_web=False, jurisdiction="KY"):
189
  logger.info(f"Routing prompt: {prompt}, Task: {task_type}, Web Search: {search_web}, Jurisdiction: {jurisdiction}")
190
-
191
  rag_context = ""
192
- if task_type in ["case_law", "irac", "statute"]: # Skip RAG for pure document_creation/summaries
193
- combined_results = retrieve_context(prompt, task_type)
194
-
195
- # Filter by jurisdiction if specified (e.g., "KY" for Kentucky)
196
- if jurisdiction and jurisdiction != "All States":
197
- state_name = STATES.get(jurisdiction, "")
198
- state_code = jurisdiction # e.g., "KY"
199
- combined_results = [r for r in combined_results if state_code in r['citation'] or state_name in r['citation'] or state_code in r['name'] or state_name in r['name']]
200
-
201
- if combined_results:
202
- rag_context = "Retrieved legal authorities (case law and statutes):\n" + "\n".join([f"{i+1}. [{auth.get('source', 'Unknown')}] {auth['name']}, {auth['citation']}: \"{auth['snippet']}\"" for i, auth in enumerate(combined_results)])
203
-
204
- prompt = f"User prompt: {prompt}\n\n{rag_context}"
 
 
205
 
206
  if task_type == "document_creation":
207
  # Route directly to fine-tuned GPT for document creation
208
  saul_response = ask_gpt41_mini(prompt, jurisdiction)
209
  else:
210
  try:
211
- messages = build_saul_prompt(prompt, task_type, jurisdiction, rag_context) # From prompt_builder
212
  saul_response = ask_saul(messages, task_type, jurisdiction)
213
  except Exception as e:
214
  logger.error(f"SaulLM failed: {e}. Falling back to GPT-4o.")
215
- saul_response = ask_gpt4o(prompt) # Fallback
216
-
217
- # Task-specific processing (existing code)
218
  saul_response = process_task_response(task_type, saul_response, prompt, jurisdiction)
219
-
220
  if search_web:
221
  web_data = google_search(prompt, GOOGLE_CUSTOM_SEARCH_API_KEY, GOOGLE_SEARCH_API)
222
  saul_response = f"Google Search results: {web_data}\n{saul_response}"
223
 
224
  editor_prompt = build_editor_prompt(prompt, task_type, jurisdiction, saul_response, rag_context)
225
-
226
  final_response = ask_gpt4o(editor_prompt)
227
-
228
  final_response = ground_statutes(final_response, jurisdiction, GOOGLE_CUSTOM_SEARCH_API_KEY, GOOGLE_SEARCH_API, ask_gpt4o)
229
-
230
  return final_response
231
 
232
  def ask_saul(messages, task_type, jurisdiction):
@@ -234,10 +119,7 @@ def ask_saul(messages, task_type, jurisdiction):
234
  headers = {"Authorization": f"Bearer {hf_token}"} if hf_token else {}
235
  payload = {
236
  "messages": messages,
237
- "parameters": {
238
- "max_length": 32768,
239
- "temperature": 0.3
240
- }
241
  }
242
  logger.info(f"SaulLM payload: messages length={len(messages)}, max_length={payload['parameters']['max_length']}")
243
  response = requests.post(SAUL_ENDPOINT, headers=headers, json=payload)
@@ -249,10 +131,9 @@ def ask_saul(messages, task_type, jurisdiction):
249
  return result[0].get("generated_text", "[No response from SaulLM]")
250
  else:
251
  return result.get("generated_text", "[No response from SaulLM]")
252
-
253
  except Exception as e:
254
  logger.error(f"SaulLM error: {str(e)}")
255
- raise # Raise to catch in route_model for fallback
256
 
257
  def ask_gpt4o(prompt):
258
  try:
@@ -260,11 +141,9 @@ def ask_gpt4o(prompt):
260
  response = openai_client.chat.completions.create(
261
  model="gpt-4o",
262
  messages=[
263
- {"role": "system", "content": (
264
- f"You are the final editor for a legal research assistant. {irac_system} "
265
- "Ensure high quote density from retrieved authorities and include relevant facts from the cited cases. "
266
- "Maintain accurate citations. Do not paraphrase legal holdings when direct quotes are available."
267
- )},
268
  {"role": "user", "content": prompt}
269
  ],
270
  temperature=0.3,
@@ -297,7 +176,7 @@ def extract_text_from_file(file_path):
297
  def classify_prompt(prompt):
298
  prompt_lower = prompt.lower()
299
  if "summarize" in prompt_lower:
300
- return "document_analysis" # Treat summarize as analysis for routing
301
  if any(k in prompt_lower for k in ["irac", "issue", "rule", "analysis", "conclusion", "brief", "memorandum", "memo"]):
302
  return "irac"
303
  elif any(k in prompt_lower for k in ["case", "precedent", "law"]):
@@ -335,10 +214,9 @@ def summarize_document(files):
335
  file = files[0]
336
  text = extract_text_from_file(file)
337
  if text:
338
- summary = ask_gpt4o(f"Summarize the following document: {text[:10000]}") # Limit to avoid token limits
339
  return f"Summary: {summary}"
340
- return "No text extracted from file."
341
- return "Please upload a file to summarize."
342
 
343
  def analyze_document(files):
344
  if files:
@@ -346,8 +224,7 @@ def analyze_document(files):
346
  if text:
347
  analysis = ask_gpt4o(f"Analyze the following document for legal issues, risks, or key clauses: {text[:10000]}")
348
  return f"Analysis: {analysis}"
349
- return "No text extracted from file."
350
- return "No file uploaded for analysis."
351
 
352
  def check_issues(files):
353
  if files:
@@ -355,8 +232,7 @@ def check_issues(files):
355
  if text:
356
  issues = ask_gpt4o(f"Check for red flags, unusual clauses, or potential issues in this legal document and highlight them: {text[:10000]}")
357
  return f"Highlighted Issues: {issues}"
358
- return "No text extracted from file."
359
- return "No file uploaded to check."
360
 
361
  # Flask routes
362
  @app_flask.route('/')
@@ -370,7 +246,6 @@ def api_chat():
370
  irac_mode = request.form.get('irac_mode', 'false') == 'true'
371
  search_web = request.form.get('web_search', 'false') == 'true'
372
  file = request.files.get('file')
373
-
374
  file_text = ""
375
  files = None
376
  if file:
@@ -378,7 +253,7 @@ def api_chat():
378
  temp_path = os.path.join('/tmp', filename)
379
  file.save(temp_path)
380
  file_text = extract_text_from_file(temp_path)
381
- files = [temp_path] # Pass as list for route_model
382
  os.remove(temp_path)
383
 
384
  task_type = classify_prompt(prompt)
 
1
  # app.py
2
  # This is the updated main script. Copy-paste this over your existing app.py.
3
  # Changes:
4
+ # - Removed unused 'bm25s' import (replaced with rank_bm25 in retrieval.py).
5
+ # - Integrated retrieve_context from retrieval.py, which now uses hybrid_cap_search with lazy loading.
6
+ # - Added handling for missing CAP components, logging warnings and skipping RAG if caches are absent.
7
+ # - Retained Flask for serving the custom HTML+CSS+JS frontend and API endpoint /api/chat.
8
+ # - Kept file handling, IRAC mode, web search toggle, and task classification logic.
9
+ # - Updated route_model to use retrieve_context only if CAP components are available.
10
+ # - Note: Precompute CAP components with precompute_cap_embeddings.py before deployment.
 
 
 
 
 
 
 
11
 
12
+ from flask import Flask, request, jsonify, send_from_directory
13
+ from werkzeug.utils import secure_filename
 
14
  import os
15
  import logging
16
  from datetime import datetime
17
  import pdfplumber
18
+ from docx import Document
19
  from googleapiclient.discovery import build
20
  import re
21
+ from retrieval import retrieve_context, municipal_search # Updated import
 
 
 
 
 
 
 
 
 
 
22
  from task_processing import process_task_response
23
  from gpt_helpers import ask_gpt41_mini
24
+ from prompt_builder import build_saul_prompt, build_editor_prompt
25
+ from post_processing import ground_statutes
26
 
27
+ app_flask = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  os.environ["HF_HOME"] = "/data/.huggingface"
 
 
29
 
30
+ # Logging setup
31
+ logger = logging.getLogger("app")
32
+ logging.basicConfig(level=logging.INFO)
33
 
34
+ # Hugging Face authentication
35
+ from huggingface_hub import login
36
  hf_token = os.environ.get("HF_TOKEN", "")
 
 
 
 
37
  if hf_token:
38
  login(hf_token)
39
  logger.info("Authenticated with Hugging Face token for gated repos.")
 
43
  # Check environment variables
44
  try:
45
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "Missing")
46
+ GOOGLE_SEARCH_API = os.environ.get("GOOGLE_SEARCH_API", "Missing")
47
+ GOOGLE_CUSTOM_SEARCH_API_KEY = os.environ.get("GOOGLE_CUSTOM_SEARCH_API_KEY", "Missing")
48
  if OPENAI_API_KEY == "Missing" or GOOGLE_CUSTOM_SEARCH_API_KEY == "Missing" or GOOGLE_SEARCH_API == "Missing":
49
  raise KeyError("API keys not set")
50
  logger.info(f"OpenAI API Key starts with: {OPENAI_API_KEY[:10]}...")
 
51
  except KeyError as e:
52
  logger.error(f"Missing environment variable: {str(e)}")
53
  raise EnvironmentError(f"Required secrets OPENAI_API_KEY, GOOGLE_CUSTOM_SEARCH_API_KEY, and GOOGLE_SEARCH_API must be set in Hugging Face Space Secrets")
54
 
 
 
 
 
 
 
 
 
55
  # Initialize OpenAI client
56
  openai_client = OpenAI(api_key=OPENAI_API_KEY)
57
 
58
  # SaulLM endpoint
59
  SAUL_ENDPOINT = "https://l4tuv4j9bu616t5x.us-east-1.aws.endpoints.huggingface.cloud"
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # State dictionary for jurisdiction
62
  STATES = {
63
+ "AL": "Alabama", "AK": "Alaska", "AZ": "Arizona", "AR": "Arkansas", "CA": "California",
64
+ "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware", "FL": "Florida", "GA": "Georgia",
65
+ "HI": "Hawaii", "ID": "Idaho", "IL": "Illinois", "IN": "Indiana", "IA": "Iowa",
66
+ "KS": "Kansas", "KY": "Kentucky", "LA": "Louisiana", "ME": "Maine", "MD": "Maryland",
67
+ "MA": "Massachusetts", "MI": "Michigan", "MN": "Minnesota", "MS": "Mississippi", "MO": "Missouri",
68
+ "MT": "Montana", "NE": "Nebraska", "NV": "Nevada", "NH": "New Hampshire", "NJ": "New Jersey",
69
+ "NM": "New Mexico", "NY": "New York", "NC": "North Carolina", "ND": "North Dakota", "OH": "Ohio",
70
+ "OK": "Oklahoma", "OR": "Oregon", "PA": "Pennsylvania", "RI": "Rhode Island", "SC": "South Carolina",
71
+ "SD": "South Dakota", "TN": "Tennessee", "TX": "Texas", "UT": "Utah", "VT": "Vermont",
72
+ "VA": "Virginia", "WA": "Washington", "WV": "West Virginia", "WI": "Wisconsin", "WY": "Wyoming",
73
+ "Federal": "Federal", "All States": "All States", "Other": "Other States"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  }
75
 
76
  def route_model(prompt, task_type, files=None, search_web=False, jurisdiction="KY"):
77
  logger.info(f"Routing prompt: {prompt}, Task: {task_type}, Web Search: {search_web}, Jurisdiction: {jurisdiction}")
 
78
  rag_context = ""
79
+ if task_type in ["case_law", "irac", "statute"] and not os.getenv("SKIP_CAP_INIT", "false").lower() == "true":
80
+ # Check if CAP components are available
81
+ if all(os.path.exists(f"/data/cap_{ext}") for ext in ["tfidf.pkl", "tfidf_matrix.npz", "gte.npy", "openai.npy"]):
82
+ combined_results = retrieve_context(prompt, task_type)
83
+ # Filter by jurisdiction if specified
84
+ if jurisdiction and jurisdiction != "All States":
85
+ state_name = STATES.get(jurisdiction, "")
86
+ state_code = jurisdiction
87
+ combined_results = [r for r in combined_results if any(s in (r.get('citation', '') + r.get('name', '')) for s in [state_code, state_name])]
88
+ if combined_results:
89
+ rag_context = "Retrieved legal authorities (case law and statutes):\n" + "\n".join(
90
+ [f"{i+1}. [{auth.get('source', 'Unknown')}] {auth['name']}, {auth['citation']}: \"{auth['snippet']}\"" for i, auth in enumerate(combined_results)])
91
+ prompt = f"User prompt: {prompt}\n\n{rag_context}"
92
+ else:
93
+ logger.warning("CAP hybrid components missing. Precompute them with precompute_cap_embeddings.py. Skipping RAG.")
94
 
95
  if task_type == "document_creation":
96
  # Route directly to fine-tuned GPT for document creation
97
  saul_response = ask_gpt41_mini(prompt, jurisdiction)
98
  else:
99
  try:
100
+ messages = build_saul_prompt(prompt, task_type, jurisdiction, rag_context)
101
  saul_response = ask_saul(messages, task_type, jurisdiction)
102
  except Exception as e:
103
  logger.error(f"SaulLM failed: {e}. Falling back to GPT-4o.")
104
+ saul_response = ask_gpt4o(prompt)
105
+
106
+ # Task-specific processing
107
  saul_response = process_task_response(task_type, saul_response, prompt, jurisdiction)
 
108
  if search_web:
109
  web_data = google_search(prompt, GOOGLE_CUSTOM_SEARCH_API_KEY, GOOGLE_SEARCH_API)
110
  saul_response = f"Google Search results: {web_data}\n{saul_response}"
111
 
112
  editor_prompt = build_editor_prompt(prompt, task_type, jurisdiction, saul_response, rag_context)
 
113
  final_response = ask_gpt4o(editor_prompt)
 
114
  final_response = ground_statutes(final_response, jurisdiction, GOOGLE_CUSTOM_SEARCH_API_KEY, GOOGLE_SEARCH_API, ask_gpt4o)
 
115
  return final_response
116
 
117
  def ask_saul(messages, task_type, jurisdiction):
 
119
  headers = {"Authorization": f"Bearer {hf_token}"} if hf_token else {}
120
  payload = {
121
  "messages": messages,
122
+ "parameters": {"max_length": 32768, "temperature": 0.3}
 
 
 
123
  }
124
  logger.info(f"SaulLM payload: messages length={len(messages)}, max_length={payload['parameters']['max_length']}")
125
  response = requests.post(SAUL_ENDPOINT, headers=headers, json=payload)
 
131
  return result[0].get("generated_text", "[No response from SaulLM]")
132
  else:
133
  return result.get("generated_text", "[No response from SaulLM]")
 
134
  except Exception as e:
135
  logger.error(f"SaulLM error: {str(e)}")
136
+ raise
137
 
138
  def ask_gpt4o(prompt):
139
  try:
 
141
  response = openai_client.chat.completions.create(
142
  model="gpt-4o",
143
  messages=[
144
+ {"role": "system", "content": f"You are the final editor for a legal research assistant. {irac_system} "
145
+ "Ensure high quote density from retrieved authorities and include relevant facts from the cited cases. "
146
+ "Maintain accurate citations. Do not paraphrase legal holdings when direct quotes are available."},
 
 
147
  {"role": "user", "content": prompt}
148
  ],
149
  temperature=0.3,
 
176
  def classify_prompt(prompt):
177
  prompt_lower = prompt.lower()
178
  if "summarize" in prompt_lower:
179
+ return "document_analysis"
180
  if any(k in prompt_lower for k in ["irac", "issue", "rule", "analysis", "conclusion", "brief", "memorandum", "memo"]):
181
  return "irac"
182
  elif any(k in prompt_lower for k in ["case", "precedent", "law"]):
 
214
  file = files[0]
215
  text = extract_text_from_file(file)
216
  if text:
217
+ summary = ask_gpt4o(f"Summarize the following document: {text[:10000]}")
218
  return f"Summary: {summary}"
219
+ return "No text extracted from file." if files else "Please upload a file to summarize."
 
220
 
221
  def analyze_document(files):
222
  if files:
 
224
  if text:
225
  analysis = ask_gpt4o(f"Analyze the following document for legal issues, risks, or key clauses: {text[:10000]}")
226
  return f"Analysis: {analysis}"
227
+ return "No text extracted from file." if files else "No file uploaded for analysis."
 
228
 
229
  def check_issues(files):
230
  if files:
 
232
  if text:
233
  issues = ask_gpt4o(f"Check for red flags, unusual clauses, or potential issues in this legal document and highlight them: {text[:10000]}")
234
  return f"Highlighted Issues: {issues}"
235
+ return "No text extracted from file." if files else "No file uploaded to check."
 
236
 
237
  # Flask routes
238
  @app_flask.route('/')
 
246
  irac_mode = request.form.get('irac_mode', 'false') == 'true'
247
  search_web = request.form.get('web_search', 'false') == 'true'
248
  file = request.files.get('file')
 
249
  file_text = ""
250
  files = None
251
  if file:
 
253
  temp_path = os.path.join('/tmp', filename)
254
  file.save(temp_path)
255
  file_text = extract_text_from_file(temp_path)
256
+ files = [temp_path]
257
  os.remove(temp_path)
258
 
259
  task_type = classify_prompt(prompt)