Spaces:
Running
on
Zero
Running
on
Zero
adjusted return type for pdf
Browse files
app.py
CHANGED
|
@@ -128,7 +128,7 @@ def extract_pdf_text(pdf_path: str) -> str:
|
|
| 128 |
|
| 129 |
except Exception as e:
|
| 130 |
logger.error(f"Error extracting text from PDF {pdf_path}: {e}")
|
| 131 |
-
|
| 132 |
|
| 133 |
|
| 134 |
def process_user_input(message: dict, max_images: int) -> list[dict]:
|
|
@@ -153,8 +153,13 @@ def process_user_input(message: dict, max_images: int) -> list[dict]:
|
|
| 153 |
result_content.append({"type": "text", "text": f"Error processing video: {str(e)}"})
|
| 154 |
elif file_path.lower().endswith(".pdf"):
|
| 155 |
try:
|
|
|
|
| 156 |
pdf_text = extract_pdf_text(file_path)
|
|
|
|
| 157 |
result_content.append({"type": "text", "text": f"PDF Content:\n{pdf_text}"})
|
|
|
|
|
|
|
|
|
|
| 158 |
except Exception as e:
|
| 159 |
logger.error(f"PDF processing failed: {e}")
|
| 160 |
result_content.append({"type": "text", "text": f"Error processing PDF: {str(e)}"})
|
|
@@ -255,9 +260,15 @@ def run(
|
|
| 255 |
{"role": "system", "content": [{"type": "text", "text": system_prompt}]}
|
| 256 |
)
|
| 257 |
messages.extend(process_history(history))
|
|
|
|
| 258 |
messages.append(
|
| 259 |
-
{"role": "user", "content":
|
| 260 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
inputs = input_processor.apply_chat_template(
|
| 263 |
messages,
|
|
@@ -281,7 +292,19 @@ def run(
|
|
| 281 |
do_sample=True,
|
| 282 |
)
|
| 283 |
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
t.start()
|
| 286 |
|
| 287 |
output = ""
|
|
@@ -335,7 +358,18 @@ def run(
|
|
| 335 |
do_sample=True,
|
| 336 |
)
|
| 337 |
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
t.start()
|
| 340 |
|
| 341 |
output = f"⚠️ Switched to {fallback_name} due to {current_model_name} failure.\n\n"
|
|
|
|
| 128 |
|
| 129 |
except Exception as e:
|
| 130 |
logger.error(f"Error extracting text from PDF {pdf_path}: {e}")
|
| 131 |
+
raise ValueError(f"Failed to extract text from PDF: {str(e)}")
|
| 132 |
|
| 133 |
|
| 134 |
def process_user_input(message: dict, max_images: int) -> list[dict]:
|
|
|
|
| 153 |
result_content.append({"type": "text", "text": f"Error processing video: {str(e)}"})
|
| 154 |
elif file_path.lower().endswith(".pdf"):
|
| 155 |
try:
|
| 156 |
+
logger.info(f"Processing PDF file: {file_path}")
|
| 157 |
pdf_text = extract_pdf_text(file_path)
|
| 158 |
+
logger.debug(f"PDF text extracted successfully, length: {len(pdf_text)} characters")
|
| 159 |
result_content.append({"type": "text", "text": f"PDF Content:\n{pdf_text}"})
|
| 160 |
+
except ValueError as ve:
|
| 161 |
+
logger.error(f"PDF validation failed: {ve}")
|
| 162 |
+
result_content.append({"type": "text", "text": f"Error processing PDF: {str(ve)}"})
|
| 163 |
except Exception as e:
|
| 164 |
logger.error(f"PDF processing failed: {e}")
|
| 165 |
result_content.append({"type": "text", "text": f"Error processing PDF: {str(e)}"})
|
|
|
|
| 260 |
{"role": "system", "content": [{"type": "text", "text": system_prompt}]}
|
| 261 |
)
|
| 262 |
messages.extend(process_history(history))
|
| 263 |
+
user_content = process_user_input(message, max_images)
|
| 264 |
messages.append(
|
| 265 |
+
{"role": "user", "content": user_content}
|
| 266 |
)
|
| 267 |
+
|
| 268 |
+
# Validate messages structure before processing
|
| 269 |
+
logger.debug(f"Final messages structure: {len(messages)} messages")
|
| 270 |
+
for i, msg in enumerate(messages):
|
| 271 |
+
logger.debug(f"Message {i}: role={msg.get('role', 'MISSING')}, content_type={type(msg.get('content', 'MISSING'))}")
|
| 272 |
|
| 273 |
inputs = input_processor.apply_chat_template(
|
| 274 |
messages,
|
|
|
|
| 292 |
do_sample=True,
|
| 293 |
)
|
| 294 |
|
| 295 |
+
# Wrapper function to catch thread exceptions
|
| 296 |
+
def safe_generate():
|
| 297 |
+
try:
|
| 298 |
+
selected_model.generate(**generate_kwargs)
|
| 299 |
+
except Exception as thread_e:
|
| 300 |
+
logger.error(f"Exception in generation thread: {thread_e}")
|
| 301 |
+
logger.error(f"Thread exception type: {type(thread_e)}")
|
| 302 |
+
# Store the exception so we can handle it in the main thread
|
| 303 |
+
import traceback
|
| 304 |
+
logger.error(f"Thread traceback: {traceback.format_exc()}")
|
| 305 |
+
raise
|
| 306 |
+
|
| 307 |
+
t = Thread(target=safe_generate)
|
| 308 |
t.start()
|
| 309 |
|
| 310 |
output = ""
|
|
|
|
| 358 |
do_sample=True,
|
| 359 |
)
|
| 360 |
|
| 361 |
+
# Wrapper function to catch thread exceptions in fallback
|
| 362 |
+
def safe_fallback_generate():
|
| 363 |
+
try:
|
| 364 |
+
selected_model.generate(**generate_kwargs)
|
| 365 |
+
except Exception as thread_e:
|
| 366 |
+
logger.error(f"Exception in fallback generation thread: {thread_e}")
|
| 367 |
+
logger.error(f"Fallback thread exception type: {type(thread_e)}")
|
| 368 |
+
import traceback
|
| 369 |
+
logger.error(f"Fallback thread traceback: {traceback.format_exc()}")
|
| 370 |
+
raise
|
| 371 |
+
|
| 372 |
+
t = Thread(target=safe_fallback_generate)
|
| 373 |
t.start()
|
| 374 |
|
| 375 |
output = f"⚠️ Switched to {fallback_name} due to {current_model_name} failure.\n\n"
|