Spaces:
Running
Running
Upload 2 files
Browse files- utils/interface_utils.py +136 -0
- utils/llm_utils.py +315 -0
utils/interface_utils.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import difflib
|
2 |
+
import html
|
3 |
+
import re
|
4 |
+
from typing import List, Tuple
|
5 |
+
|
6 |
+
|
7 |
+
# --- Helper Function for Markdown Highlighting ---
|
8 |
+
def generate_highlighted_markdown(text, spans_with_info):
|
9 |
+
"""Applies highlighting spans with hover info to text for Markdown output."""
|
10 |
+
# Ensure spans are sorted by start index and valid
|
11 |
+
# Expects spans_with_info to be list of (start, end, hover_text_string)
|
12 |
+
valid_spans = sorted(
|
13 |
+
[
|
14 |
+
(s, e, info)
|
15 |
+
for s, e, info in spans_with_info # Unpack the tuple
|
16 |
+
if isinstance(s, int) and isinstance(e, int) and 0 <= s <= e <= len(text)
|
17 |
+
],
|
18 |
+
key=lambda x: x[0],
|
19 |
+
)
|
20 |
+
|
21 |
+
highlighted_parts = []
|
22 |
+
current_pos = 0
|
23 |
+
# Iterate through sorted spans with info
|
24 |
+
for start, end, hover_text in valid_spans:
|
25 |
+
# Add text before the current span (NO HTML escaping)
|
26 |
+
if start > current_pos:
|
27 |
+
highlighted_parts.append(text[current_pos:start])
|
28 |
+
# Add the highlighted span with title attribute
|
29 |
+
if start < end:
|
30 |
+
# Escape hover text for the title attribute
|
31 |
+
escaped_hover_text = html.escape(hover_text, quote=True)
|
32 |
+
# Escape span content for display
|
33 |
+
escaped_content = html.escape(text[start:end])
|
34 |
+
highlighted_parts.append(
|
35 |
+
f"<span style='background-color: lightgreen;' title='{escaped_hover_text}'>{escaped_content}</span>"
|
36 |
+
)
|
37 |
+
# Update current position, ensuring it doesn't go backward in case of overlap
|
38 |
+
current_pos = max(current_pos, end)
|
39 |
+
|
40 |
+
# Add any remaining text after the last span (NO HTML escaping)
|
41 |
+
if current_pos < len(text):
|
42 |
+
highlighted_parts.append(text[current_pos:])
|
43 |
+
|
44 |
+
return "".join(highlighted_parts)
|
45 |
+
|
46 |
+
|
47 |
+
# --- Citation Span Matching Function ---
|
48 |
+
def find_citation_spans(document: str, citation: str) -> List[Tuple[int, int]]:
|
49 |
+
"""
|
50 |
+
Finds character spans in the document that likely form the citation,
|
51 |
+
allowing for fragments and minor differences. Uses SequenceMatcher
|
52 |
+
on alphanumeric words and maps back to character indices.
|
53 |
+
This follows a greedy iterative strategy to find the longest match to account for cases where fragments are reordered.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
document: The source document string.
|
57 |
+
citation: The citation string, potentially with fragments/typos.
|
58 |
+
|
59 |
+
Returns:
|
60 |
+
A list of (start, end) character tuples from the document,
|
61 |
+
representing the most likely origins of the citation fragments.
|
62 |
+
"""
|
63 |
+
# 1. Tokenize document and citation into ALPHANUMERIC words with char spans
|
64 |
+
doc_tokens = [
|
65 |
+
(m.group(0), m.start(), m.end()) for m in re.finditer(r"[a-zA-Z0-9]+", document)
|
66 |
+
]
|
67 |
+
cite_tokens = [
|
68 |
+
(m.group(0), m.start(), m.end()) for m in re.finditer(r"[a-zA-Z0-9]+", citation)
|
69 |
+
]
|
70 |
+
if not doc_tokens or not cite_tokens:
|
71 |
+
return []
|
72 |
+
|
73 |
+
doc_words = [t[0].lower() for t in doc_tokens]
|
74 |
+
cite_words = [t[0].lower() for t in cite_tokens]
|
75 |
+
|
76 |
+
# 2. Find longest common blocks of words using SequenceMatcher
|
77 |
+
matcher = difflib.SequenceMatcher(None, doc_words, cite_words, autojunk=False)
|
78 |
+
matching_blocks = []
|
79 |
+
matched_tokens = 0
|
80 |
+
|
81 |
+
unmatched_doc_words = [(0, len(doc_words))]
|
82 |
+
unmatched_cite_words = [(0, len(cite_words))]
|
83 |
+
|
84 |
+
while matched_tokens < len(cite_words):
|
85 |
+
next_match_candidates = []
|
86 |
+
for da, db in unmatched_doc_words:
|
87 |
+
for ca, cb in unmatched_cite_words:
|
88 |
+
match = matcher.find_longest_match(da, db, ca, cb)
|
89 |
+
if match.size > 0:
|
90 |
+
next_match_candidates.append(match)
|
91 |
+
if len(next_match_candidates) == 0:
|
92 |
+
break
|
93 |
+
next_match = max(next_match_candidates, key=lambda x: x.size)
|
94 |
+
matching_blocks.append(next_match)
|
95 |
+
matched_tokens += next_match.size
|
96 |
+
|
97 |
+
# Update unmatched regions (this part needs careful implementation)
|
98 |
+
# Simplified logic: remove fully contained regions and split overlapping ones
|
99 |
+
new_unmatched_docs = []
|
100 |
+
for da, db in unmatched_doc_words:
|
101 |
+
# Check if this doc segment overlaps with the match
|
102 |
+
if next_match.a < db and next_match.a + next_match.size > da:
|
103 |
+
# Add segment before the match
|
104 |
+
if next_match.a > da:
|
105 |
+
new_unmatched_docs.append((da, next_match.a))
|
106 |
+
# Add segment after the match
|
107 |
+
if next_match.a + next_match.size < db:
|
108 |
+
new_unmatched_docs.append((next_match.a + next_match.size, db))
|
109 |
+
else:
|
110 |
+
new_unmatched_docs.append((da, db)) # Keep non-overlapping segment
|
111 |
+
unmatched_doc_words = new_unmatched_docs
|
112 |
+
|
113 |
+
new_unmatched_cites = []
|
114 |
+
for ca, cb in unmatched_cite_words:
|
115 |
+
if next_match.b < cb and next_match.b + next_match.size > ca:
|
116 |
+
if next_match.b > ca:
|
117 |
+
new_unmatched_cites.append((ca, next_match.b))
|
118 |
+
if next_match.b + next_match.size < cb:
|
119 |
+
new_unmatched_cites.append((next_match.b + next_match.size, cb))
|
120 |
+
else:
|
121 |
+
new_unmatched_cites.append((ca, cb))
|
122 |
+
unmatched_cite_words = new_unmatched_cites
|
123 |
+
|
124 |
+
# 3. Convert matching word blocks back to character spans
|
125 |
+
char_spans = []
|
126 |
+
for i, j, n in sorted(matching_blocks, key=lambda x: x.a):
|
127 |
+
if n == 0:
|
128 |
+
continue
|
129 |
+
start_char = doc_tokens[i][1]
|
130 |
+
end_char = doc_tokens[i + n - 1][2]
|
131 |
+
if char_spans and char_spans[-1][1] >= start_char - 1:
|
132 |
+
char_spans[-1] = (char_spans[-1][0], max(char_spans[-1][1], end_char))
|
133 |
+
else:
|
134 |
+
char_spans.append((start_char, end_char))
|
135 |
+
|
136 |
+
return char_spans
|
utils/llm_utils.py
ADDED
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import logging
|
4 |
+
|
5 |
+
from huggingface_hub import HfApi, InferenceClient
|
6 |
+
|
7 |
+
import utils.interface_utils as interface_utils
|
8 |
+
|
9 |
+
# Renamed constant to indicate it's a default/fallback
|
10 |
+
DEFAULT_LLM_ENDPOINT_URL = (
|
11 |
+
"https://r5lahjemc2zuajga.us-east-1.aws.endpoints.huggingface.cloud"
|
12 |
+
)
|
13 |
+
|
14 |
+
# Added Endpoint name constant
|
15 |
+
LLM_ENDPOINT_NAME = os.getenv(
|
16 |
+
"HF_LLM_ENDPOINT_NAME", "phi-4-max"
|
17 |
+
) # Get from env or default
|
18 |
+
|
19 |
+
RETRIEVAL_SYSTEM_PROMPT = """**Instructions:**
|
20 |
+
You are a helpful assistant presented with a document excerpts and a question.
|
21 |
+
Your job is to retrieve the most relevant passages from the provided document excerpt that contribute to help answer the question.
|
22 |
+
|
23 |
+
For each passage retrieved from the documents, provide:
|
24 |
+
- a brief summary of the context leading up to the passage (2 sentences max)
|
25 |
+
- the supported passage quoted exactly
|
26 |
+
- a brief summary of how the points in the passage are relevant to the question (2 sentences max)
|
27 |
+
|
28 |
+
The supporting passages should be a JSON-formatted list of dictionaries with the keys 'context' 'quote' and 'relevance'.
|
29 |
+
Provide up to 4 different supporting passages covering as many different aspects of the topic in question as possible.
|
30 |
+
Only include passages that are relevant to the question. If there are fewer or no relevant passages in the document, just return a shorter or empty list.
|
31 |
+
"""
|
32 |
+
|
33 |
+
QA_RETRIEVAL_PROMPT = """Find passages from the following documents that help answer the question.
|
34 |
+
|
35 |
+
**Document Content:**
|
36 |
+
```markdown
|
37 |
+
{document}
|
38 |
+
```
|
39 |
+
|
40 |
+
**Question:**
|
41 |
+
{question}
|
42 |
+
|
43 |
+
JSON Output:"""
|
44 |
+
|
45 |
+
ANSWER_SYSTEM_PROMPT = """**Instructions:**
|
46 |
+
You are a helpful assistant presented with a list of snippets extracted from documents and a question.
|
47 |
+
The snippets are presented in a JSON-formatted list that includes a unique id (`id`), context, relevance, and the exact quote.
|
48 |
+
Your job is to answer the question based *only* on the most relevant provided snippet quotes, citing the snippets used for each sentence.
|
49 |
+
|
50 |
+
**Output Format:**
|
51 |
+
Your response *must* be a JSON-formatted list of dictionaries. Each dictionary represents a sentence in your answer and must have the following keys:
|
52 |
+
- `sentence`: A string containing the sentence.
|
53 |
+
- `citations`: A list of integers, where each integer is the `id` of a snippet that supports the sentence.
|
54 |
+
|
55 |
+
**Example Output:**
|
56 |
+
```json
|
57 |
+
[
|
58 |
+
{
|
59 |
+
"sentence": "This is the first sentence of the answer.",
|
60 |
+
"citations": [1, 3]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"sentence": "This is the second sentence, supported by another snippet.",
|
64 |
+
"citations": [5]
|
65 |
+
}
|
66 |
+
]
|
67 |
+
```
|
68 |
+
|
69 |
+
**Constraints:**
|
70 |
+
- Base your answer *only* on the information within the provided snippets.
|
71 |
+
- Do *not* use external knowledge.
|
72 |
+
- The sentences should flow together coherently.
|
73 |
+
- A single sentence can cite multiple snippets.
|
74 |
+
- The final answer should be no more than 5-6 sentences long.
|
75 |
+
- Ensure the output is valid JSON.
|
76 |
+
"""
|
77 |
+
|
78 |
+
ANSWER_PROMPT = """
|
79 |
+
Given the following snippets, answer the question.
|
80 |
+
```json
|
81 |
+
{snippets}
|
82 |
+
```
|
83 |
+
|
84 |
+
**Question:**
|
85 |
+
{question}
|
86 |
+
|
87 |
+
JSON Output:"""
|
88 |
+
|
89 |
+
# Initialize client using token from environment variables
|
90 |
+
client = InferenceClient(token=os.getenv("HF_TOKEN"))
|
91 |
+
|
92 |
+
|
93 |
+
# --- Endpoint Status Check Function ---
|
94 |
+
def check_endpoint_status(token: str | None, endpoint_name: str = LLM_ENDPOINT_NAME):
|
95 |
+
"""Checks the Inference Endpoint status and returns status dict."""
|
96 |
+
# (Function body moved from app.py - Ensure logging is configured)
|
97 |
+
logging.info(f"Checking endpoint status for '{endpoint_name}'...")
|
98 |
+
if not token:
|
99 |
+
logging.warning("HF Token not available, cannot check endpoint status.")
|
100 |
+
return {
|
101 |
+
"status": "ready",
|
102 |
+
"warning": "HF Token not available for status check.",
|
103 |
+
}
|
104 |
+
try:
|
105 |
+
api = HfApi(token=token)
|
106 |
+
endpoint = api.get_inference_endpoint(name=endpoint_name, token=token)
|
107 |
+
status = endpoint.status
|
108 |
+
logging.info(f"Endpoint '{endpoint_name}' status: {status}")
|
109 |
+
if status == "running":
|
110 |
+
return {"status": "ready"}
|
111 |
+
else:
|
112 |
+
if status == "scaledToZero":
|
113 |
+
logging.info(
|
114 |
+
f"Endpoint '{endpoint_name}' is scaled to zero. Attempting to resume..."
|
115 |
+
)
|
116 |
+
try:
|
117 |
+
endpoint.resume()
|
118 |
+
user_message = f"The required LLM endpoint ('{endpoint_name}') was scaled to zero and is **now restarting**. Please wait a few minutes and try submitting your query again."
|
119 |
+
logging.info(f"Resume command sent for '{endpoint_name}'.")
|
120 |
+
return {"status": "error", "ui_message": user_message}
|
121 |
+
except Exception as resume_error:
|
122 |
+
logging.error(
|
123 |
+
f"Failed to resume endpoint '{endpoint_name}': {resume_error}"
|
124 |
+
)
|
125 |
+
user_message = f"The required LLM endpoint ('{endpoint_name}') is scaled to zero. An attempt to automatically resume it failed: {resume_error}. Please check the endpoint status on Hugging Face."
|
126 |
+
return {"status": "error", "ui_message": user_message}
|
127 |
+
else:
|
128 |
+
user_message = f"The required LLM endpoint ('{endpoint_name}') is currently **{status}**. Analysis cannot proceed until it is running. Please check the endpoint status on Hugging Face."
|
129 |
+
logging.warning(
|
130 |
+
f"Endpoint '{endpoint_name}' is not ready (Status: {status})."
|
131 |
+
)
|
132 |
+
return {"status": "error", "ui_message": user_message}
|
133 |
+
except Exception as e:
|
134 |
+
error_msg = f"Error checking endpoint status for {endpoint_name}: {e}"
|
135 |
+
logging.error(error_msg)
|
136 |
+
return {
|
137 |
+
"status": "error",
|
138 |
+
"ui_message": f"Failed to check endpoint status. Please verify the endpoint name ('{endpoint_name}') and your token. Error: {e}",
|
139 |
+
}
|
140 |
+
|
141 |
+
|
142 |
+
def retrieve_passages(
|
143 |
+
query, doc_embeds, passages, processed_docs, embed_model, max_docs=3
|
144 |
+
):
|
145 |
+
"""Retrieves relevant passages based on embedding similarity, limited by max_docs."""
|
146 |
+
queries = [query]
|
147 |
+
query_embeddings = embed_model.encode(queries, prompt_name="query")
|
148 |
+
scores = embed_model.similarity(query_embeddings, doc_embeds)
|
149 |
+
sorted_scores = scores.sort(descending=True)
|
150 |
+
sorted_vals = sorted_scores.values[0].tolist()
|
151 |
+
sorted_idx = sorted_scores.indices[0].tolist()
|
152 |
+
results = [
|
153 |
+
{
|
154 |
+
"passage_id": i,
|
155 |
+
"document_id": passages[i][0],
|
156 |
+
"chunk_id": passages[i][1],
|
157 |
+
"document_url": processed_docs[passages[i][0]]["url"],
|
158 |
+
"passage_text": passages[i][2],
|
159 |
+
"relevance": v,
|
160 |
+
}
|
161 |
+
for i, v in zip(sorted_idx, sorted_vals)
|
162 |
+
]
|
163 |
+
# Slice the results here
|
164 |
+
return results[:max_docs]
|
165 |
+
|
166 |
+
|
167 |
+
# --- Excerpt Processing Function ---
|
168 |
+
def process_single_excerpt(
|
169 |
+
excerpt_index: int, excerpt: dict, query: str, hf_client: InferenceClient
|
170 |
+
):
|
171 |
+
"""Processes a single retrieved excerpt using an LLM to find citations and spans."""
|
172 |
+
|
173 |
+
passage_text = excerpt.get("passage_text", "")
|
174 |
+
if not passage_text:
|
175 |
+
return {
|
176 |
+
"citations": [],
|
177 |
+
"all_spans": [],
|
178 |
+
"parse_successful": False,
|
179 |
+
"raw_error_response": "Empty passage text",
|
180 |
+
}
|
181 |
+
|
182 |
+
citations = []
|
183 |
+
all_spans = []
|
184 |
+
is_parse_successful = False
|
185 |
+
raw_error_response = None
|
186 |
+
|
187 |
+
try:
|
188 |
+
retrieval_prompt = QA_RETRIEVAL_PROMPT.format(
|
189 |
+
document=passage_text, question=query
|
190 |
+
)
|
191 |
+
response = hf_client.chat_completion(
|
192 |
+
messages=[
|
193 |
+
{"role": "system", "content": RETRIEVAL_SYSTEM_PROMPT},
|
194 |
+
{"role": "user", "content": retrieval_prompt},
|
195 |
+
],
|
196 |
+
model=os.getenv("HF_LLM_ENDPOINT_URL", DEFAULT_LLM_ENDPOINT_URL),
|
197 |
+
max_tokens=2048,
|
198 |
+
temperature=0.01,
|
199 |
+
)
|
200 |
+
|
201 |
+
# Attempt to parse JSON
|
202 |
+
response_content = response.choices[0].message.content.strip()
|
203 |
+
try:
|
204 |
+
# Find JSON block
|
205 |
+
json_match = response_content.split("```json", 1)
|
206 |
+
if len(json_match) > 1:
|
207 |
+
json_str = json_match[1].split("```", 1)[0]
|
208 |
+
parsed_json = json.loads(json_str)
|
209 |
+
citations = parsed_json
|
210 |
+
is_parse_successful = True
|
211 |
+
# Find spans for each citation
|
212 |
+
for cit in citations:
|
213 |
+
quote = cit.get("quote", "")
|
214 |
+
if quote:
|
215 |
+
# Call find_citation_spans from interface_utils
|
216 |
+
spans = interface_utils.find_citation_spans(
|
217 |
+
document=passage_text, citation=quote
|
218 |
+
)
|
219 |
+
cit["char_spans"] = spans # Store spans in the citation dict
|
220 |
+
all_spans.extend(spans)
|
221 |
+
else:
|
222 |
+
raise ValueError("No ```json block found in response")
|
223 |
+
except (json.JSONDecodeError, ValueError, IndexError) as json_e:
|
224 |
+
print(f"Error parsing JSON for excerpt {excerpt_index}: {json_e}")
|
225 |
+
is_parse_successful = False
|
226 |
+
raw_error_response = f"LLM Response (failed to parse): {response_content}" # Fixed potential newline issue
|
227 |
+
|
228 |
+
except Exception as llm_e:
|
229 |
+
print(f"Error during LLM call for excerpt {excerpt_index}: {llm_e}")
|
230 |
+
is_parse_successful = False
|
231 |
+
raw_error_response = f"LLM API Error: {llm_e}"
|
232 |
+
|
233 |
+
return {
|
234 |
+
"citations": citations,
|
235 |
+
"all_spans": all_spans,
|
236 |
+
"parse_successful": is_parse_successful,
|
237 |
+
"raw_error_response": raw_error_response,
|
238 |
+
}
|
239 |
+
|
240 |
+
|
241 |
+
def generate_summary_answer(snippets: list, query: str, hf_client: InferenceClient):
|
242 |
+
"""Generates a summarized answer based on provided snippets using an LLM."""
|
243 |
+
# NOTE: Removed llm_endpoint_url parameter, using env var directly
|
244 |
+
endpoint_url = os.getenv("HF_LLM_ENDPOINT_URL", DEFAULT_LLM_ENDPOINT_URL)
|
245 |
+
if not snippets:
|
246 |
+
return {
|
247 |
+
"answer_sentences": [],
|
248 |
+
"parse_successful": False,
|
249 |
+
"raw_error_response": "No snippets provided for summarization.",
|
250 |
+
}
|
251 |
+
|
252 |
+
try:
|
253 |
+
# Ensure snippets are formatted as a JSON string for the prompt
|
254 |
+
snippets_json_string = json.dumps(snippets, indent=2)
|
255 |
+
|
256 |
+
answer_prompt_formatted = ANSWER_PROMPT.format(
|
257 |
+
snippets=snippets_json_string, question=query
|
258 |
+
)
|
259 |
+
|
260 |
+
response = hf_client.chat_completion(
|
261 |
+
messages=[
|
262 |
+
{"role": "system", "content": ANSWER_SYSTEM_PROMPT},
|
263 |
+
{"role": "user", "content": answer_prompt_formatted},
|
264 |
+
],
|
265 |
+
model=endpoint_url,
|
266 |
+
max_tokens=512,
|
267 |
+
temperature=0.01,
|
268 |
+
)
|
269 |
+
|
270 |
+
# Attempt to parse JSON response
|
271 |
+
response_content = response.choices[0].message.content.strip()
|
272 |
+
try:
|
273 |
+
# Find JSON block (assuming it might be wrapped in ```json ... ```)
|
274 |
+
json_match = response_content.split("```json", 1)
|
275 |
+
if len(json_match) > 1:
|
276 |
+
json_str = json_match[1].split("```", 1)[0]
|
277 |
+
else: # Assume the response *is* the JSON if no backticks found
|
278 |
+
json_str = response_content
|
279 |
+
|
280 |
+
parsed_json = json.loads(json_str)
|
281 |
+
|
282 |
+
# Basic validation: check if it's a list of dictionaries with expected keys
|
283 |
+
if isinstance(parsed_json, list) and all(
|
284 |
+
isinstance(item, dict) and "sentence" in item and "citations" in item
|
285 |
+
for item in parsed_json
|
286 |
+
):
|
287 |
+
return {
|
288 |
+
"answer_sentences": parsed_json,
|
289 |
+
"parse_successful": True,
|
290 |
+
"raw_error_response": None,
|
291 |
+
}
|
292 |
+
else:
|
293 |
+
raise ValueError(
|
294 |
+
"Parsed JSON does not match expected format (list of {'sentence':..., 'citations':...})"
|
295 |
+
)
|
296 |
+
|
297 |
+
except (json.JSONDecodeError, ValueError, IndexError) as json_e:
|
298 |
+
print(f"Error parsing summary JSON: {json_e}")
|
299 |
+
return {
|
300 |
+
"answer_sentences": [],
|
301 |
+
"parse_successful": False,
|
302 |
+
"raw_error_response": f"LLM Response (failed to parse summary): {response_content}",
|
303 |
+
}
|
304 |
+
|
305 |
+
except Exception as llm_e:
|
306 |
+
print(f"Error during LLM summary call: {llm_e}")
|
307 |
+
return {
|
308 |
+
"answer_sentences": [],
|
309 |
+
"parse_successful": False,
|
310 |
+
"raw_error_response": f"LLM API Error during summary generation: {llm_e}",
|
311 |
+
}
|
312 |
+
|
313 |
+
|
314 |
+
# REMOVED Comment: This function will now live in app.py or interface_utils.py as it handles single excerpt processing
|
315 |
+
# def make_supporting_snippets(...): -> Now handled excerpt by excerpt in app.py
|