Spaces:
Running
Running
Update document_generator.py
Browse files- document_generator.py +216 -23
document_generator.py
CHANGED
@@ -81,6 +81,156 @@ FORMAT YOUR OUTPUT AS A TEMPLATE ENCLOSED IN <response></response> tags
|
|
81 |
DOCUMENT_TEMPLATE_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
|
82 |
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
# File: app.py
|
86 |
import os
|
@@ -138,30 +288,17 @@ class DatabaseManager:
|
|
138 |
"""
|
139 |
cur.execute(insert_query, (user_id, user_query, response))
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
)
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
self,
|
151 |
-
messages: List[Dict[str, str]],
|
152 |
-
model: str = "openai/gpt-4o-mini",
|
153 |
-
max_tokens: int = 32000
|
154 |
-
) -> Optional[str]:
|
155 |
-
if not messages:
|
156 |
-
return None
|
157 |
-
response = self.client.chat.completions.create(
|
158 |
-
model=model,
|
159 |
-
messages=messages,
|
160 |
-
max_tokens=max_tokens,
|
161 |
-
stream=False
|
162 |
-
)
|
163 |
-
return response.choices[0].message.content
|
164 |
|
|
|
165 |
class DocumentGenerator:
|
166 |
def __init__(self, ai_client: AIClient):
|
167 |
self.ai_client = ai_client
|
@@ -395,6 +532,62 @@ async def generate_markdown_document_stream_endpoint(request: MarkdownDocumentRe
|
|
395 |
|
396 |
return StreamingResponse(stream_generator(), media_type="application/octet-stream")
|
397 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
###########################################
|
399 |
class MarkdownDocumentResponse(BaseModel):
|
400 |
markdown_document: str
|
|
|
81 |
DOCUMENT_TEMPLATE_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
|
82 |
|
83 |
|
84 |
+
# File: llm_observability.py
|
85 |
+
|
86 |
+
import sqlite3
|
87 |
+
import json
|
88 |
+
from datetime import datetime
|
89 |
+
from typing import Dict, Any, List, Optional
|
90 |
+
|
91 |
+
class LLMObservabilityManager:
|
92 |
+
def __init__(self, db_path: str = "llm_observability.db"):
|
93 |
+
self.db_path = db_path
|
94 |
+
self.create_table()
|
95 |
+
|
96 |
+
def create_table(self):
|
97 |
+
with sqlite3.connect(self.db_path) as conn:
|
98 |
+
cursor = conn.cursor()
|
99 |
+
cursor.execute('''
|
100 |
+
CREATE TABLE IF NOT EXISTS llm_observations (
|
101 |
+
id TEXT PRIMARY KEY,
|
102 |
+
conversation_id TEXT,
|
103 |
+
created_at DATETIME,
|
104 |
+
status TEXT,
|
105 |
+
request TEXT,
|
106 |
+
response TEXT,
|
107 |
+
model TEXT,
|
108 |
+
total_tokens INTEGER,
|
109 |
+
prompt_tokens INTEGER,
|
110 |
+
completion_tokens INTEGER,
|
111 |
+
latency FLOAT,
|
112 |
+
user TEXT
|
113 |
+
)
|
114 |
+
''')
|
115 |
+
|
116 |
+
def insert_observation(self, response: Dict[str, Any], conversation_id: str, status: str, request: str, latency: float, user: str):
|
117 |
+
created_at = datetime.fromtimestamp(response['created'])
|
118 |
+
|
119 |
+
with sqlite3.connect(self.db_path) as conn:
|
120 |
+
cursor = conn.cursor()
|
121 |
+
cursor.execute('''
|
122 |
+
INSERT INTO llm_observations
|
123 |
+
(id, conversation_id, created_at, status, request, response, model, total_tokens, prompt_tokens, completion_tokens, latency, user)
|
124 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
125 |
+
''', (
|
126 |
+
response['id'],
|
127 |
+
conversation_id,
|
128 |
+
created_at,
|
129 |
+
status,
|
130 |
+
request,
|
131 |
+
json.dumps(response['choices'][0]['message']),
|
132 |
+
response['model'],
|
133 |
+
response['usage']['total_tokens'],
|
134 |
+
response['usage']['prompt_tokens'],
|
135 |
+
response['usage']['completion_tokens'],
|
136 |
+
latency,
|
137 |
+
user
|
138 |
+
))
|
139 |
+
|
140 |
+
def get_observations(self, conversation_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
141 |
+
with sqlite3.connect(self.db_path) as conn:
|
142 |
+
cursor = conn.cursor()
|
143 |
+
if conversation_id:
|
144 |
+
cursor.execute('SELECT * FROM llm_observations WHERE conversation_id = ? ORDER BY created_at', (conversation_id,))
|
145 |
+
else:
|
146 |
+
cursor.execute('SELECT * FROM llm_observations ORDER BY created_at')
|
147 |
+
rows = cursor.fetchall()
|
148 |
+
|
149 |
+
column_names = [description[0] for description in cursor.description]
|
150 |
+
return [dict(zip(column_names, row)) for row in rows]
|
151 |
+
|
152 |
+
def get_all_observations(self) -> List[Dict[str, Any]]:
|
153 |
+
return self.get_observations()
|
154 |
+
|
155 |
+
|
156 |
+
# aiclient.py
|
157 |
+
|
158 |
+
class AIClient:
|
159 |
+
def __init__(self):
|
160 |
+
self.client = OpenAI(
|
161 |
+
base_url="https://openrouter.ai/api/v1",
|
162 |
+
api_key="sk-or-v1-" + os.environ['OPENROUTER_API_KEY']
|
163 |
+
)
|
164 |
+
self.observability_manager = LLMObservabilityManager()
|
165 |
+
|
166 |
+
@log_execution
|
167 |
+
def generate_response(
|
168 |
+
self,
|
169 |
+
messages: List[Dict[str, str]],
|
170 |
+
model: str = "openai/gpt-4o-mini",
|
171 |
+
max_tokens: int = 32000,
|
172 |
+
conversation_id: str = None,
|
173 |
+
user: str = "anonymous"
|
174 |
+
) -> Optional[str]:
|
175 |
+
if not messages:
|
176 |
+
return None
|
177 |
+
|
178 |
+
start_time = time.time()
|
179 |
+
response = self.client.chat.completions.create(
|
180 |
+
model=model,
|
181 |
+
messages=messages,
|
182 |
+
max_tokens=max_tokens,
|
183 |
+
stream=False
|
184 |
+
)
|
185 |
+
end_time = time.time()
|
186 |
+
latency = end_time - start_time
|
187 |
+
|
188 |
+
# Log the observation
|
189 |
+
self.observability_manager.insert_observation(
|
190 |
+
response=response.dict(),
|
191 |
+
conversation_id=conversation_id or "default",
|
192 |
+
status="success",
|
193 |
+
request=json.dumps(messages),
|
194 |
+
latency=latency,
|
195 |
+
user=user
|
196 |
+
)
|
197 |
+
|
198 |
+
return response.choices[0].message.content
|
199 |
+
|
200 |
+
@log_execution
|
201 |
+
def generate_vision_response(
|
202 |
+
self,
|
203 |
+
messages: List[Dict[str, Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]]],
|
204 |
+
model: str = "google/gemini-flash-1.5-8b",
|
205 |
+
max_tokens: int = 32000,
|
206 |
+
conversation_id: str = None,
|
207 |
+
user: str = "anonymous"
|
208 |
+
) -> Optional[str]:
|
209 |
+
if not messages:
|
210 |
+
return None
|
211 |
+
|
212 |
+
start_time = time.time()
|
213 |
+
response = self.client.chat.completions.create(
|
214 |
+
model=model,
|
215 |
+
messages=messages,
|
216 |
+
max_tokens=max_tokens,
|
217 |
+
stream=False
|
218 |
+
)
|
219 |
+
end_time = time.time()
|
220 |
+
latency = end_time - start_time
|
221 |
+
|
222 |
+
# Log the observation
|
223 |
+
self.observability_manager.insert_observation(
|
224 |
+
response=response.dict(),
|
225 |
+
conversation_id=conversation_id or "default",
|
226 |
+
status="success",
|
227 |
+
request=json.dumps(messages),
|
228 |
+
latency=latency,
|
229 |
+
user=user
|
230 |
+
)
|
231 |
+
|
232 |
+
return response.choices[0].message.content
|
233 |
+
|
234 |
|
235 |
# File: app.py
|
236 |
import os
|
|
|
288 |
"""
|
289 |
cur.execute(insert_query, (user_id, user_query, response))
|
290 |
|
291 |
+
def log_execution(func):
|
292 |
+
@wraps(func)
|
293 |
+
def wrapper(*args, **kwargs):
|
294 |
+
start_time = time.time()
|
295 |
+
result = func(*args, **kwargs)
|
296 |
+
end_time = time.time()
|
297 |
+
print(f"{func.__name__} executed in {end_time - start_time:.2f} seconds")
|
298 |
+
return result
|
299 |
+
return wrapper
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
|
301 |
+
|
302 |
class DocumentGenerator:
|
303 |
def __init__(self, ai_client: AIClient):
|
304 |
self.ai_client = ai_client
|
|
|
532 |
|
533 |
return StreamingResponse(stream_generator(), media_type="application/octet-stream")
|
534 |
|
535 |
+
|
536 |
+
## OBSERVABILITY
|
537 |
+
from uuid import uuid4
|
538 |
+
import csv
|
539 |
+
from io import StringIO
|
540 |
+
|
541 |
+
def create_csv_response(observations: List[Dict]) -> StreamingResponse:
|
542 |
+
def iter_csv(data):
|
543 |
+
output = StringIO()
|
544 |
+
writer = csv.DictWriter(output, fieldnames=data[0].keys() if data else [])
|
545 |
+
writer.writeheader()
|
546 |
+
for row in data:
|
547 |
+
writer.writerow(row)
|
548 |
+
output.seek(0)
|
549 |
+
yield output.read()
|
550 |
+
|
551 |
+
headers = {
|
552 |
+
'Content-Disposition': 'attachment; filename="observations.csv"'
|
553 |
+
}
|
554 |
+
return StreamingResponse(iter_csv(observations), media_type="text/csv", headers=headers)
|
555 |
+
|
556 |
+
|
557 |
+
@router.get("/last-observations/{limit}")
|
558 |
+
async def get_last_observations(limit: int = 10, format: str = "json"):
|
559 |
+
observability_manager = LLMObservabilityManager()
|
560 |
+
|
561 |
+
try:
|
562 |
+
# Get all observations, sorted by created_at in descending order
|
563 |
+
all_observations = observability_manager.get_observations()
|
564 |
+
all_observations.sort(key=lambda x: x['created_at'], reverse=True)
|
565 |
+
|
566 |
+
# Get the last conversation_id
|
567 |
+
if all_observations:
|
568 |
+
last_conversation_id = all_observations[0]['conversation_id']
|
569 |
+
|
570 |
+
# Filter observations for the last conversation
|
571 |
+
last_conversation_observations = [
|
572 |
+
obs for obs in all_observations
|
573 |
+
if obs['conversation_id'] == last_conversation_id
|
574 |
+
][:limit]
|
575 |
+
|
576 |
+
if format.lower() == "csv":
|
577 |
+
return create_csv_response(last_conversation_observations)
|
578 |
+
else:
|
579 |
+
return ObservationResponse(observations=last_conversation_observations)
|
580 |
+
else:
|
581 |
+
if format.lower() == "csv":
|
582 |
+
return create_csv_response([])
|
583 |
+
else:
|
584 |
+
return ObservationResponse(observations=[])
|
585 |
+
except Exception as e:
|
586 |
+
raise HTTPException(status_code=500, detail=f"Failed to retrieve observations: {str(e)}")
|
587 |
+
|
588 |
+
|
589 |
+
|
590 |
+
|
591 |
###########################################
|
592 |
class MarkdownDocumentResponse(BaseModel):
|
593 |
markdown_document: str
|