| | |
| | from flask import Flask, request, jsonify, render_template |
| | import requests |
| | import json |
| | import asyncio |
| | import os |
| | import uuid |
| | import base64 |
| | import io |
| |
|
| | |
| | try: |
| | from PyPDF2 import PdfReader |
| | except ImportError: |
| | print("PyPDF2 not found. Please install it using 'pip install PyPDF2'") |
| | PdfReader = None |
| |
|
| | app = Flask(__name__) |
| |
|
| | |
| | |
| | |
| | conversation_histories = {} |
| |
|
| | async def generate_solution_python(chat_history): |
| | """ |
| | Generates a solution using a dummy context and Gemini LLM, |
| | based on the provided chat history which can include text, images, and extracted PDF text. |
| | |
| | Args: |
| | chat_history (list): A list of message objects representing the conversation. |
| | Each object has "role" and "parts". Parts can be: |
| | - {"text": "..."} |
| | - {"inlineData": {"mimeType": "image/png", "data": "base64_string"}} |
| | Returns: |
| | str: The generated solution text or an error message. |
| | """ |
| | if not chat_history: |
| | return "Error: Chat history is empty." |
| |
|
| | print(f"Processing chat history length: {len(chat_history)}") |
| | response_text = "" |
| |
|
| | try: |
| | |
| | |
| | |
| | |
| |
|
| | |
| | latest_user_input = "" |
| | for message in reversed(chat_history): |
| | if message["role"] == "user" and message["parts"]: |
| | for part in message["parts"]: |
| | if part.get("text"): |
| | latest_user_input = part["text"] |
| | break |
| | if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""): |
| | latest_user_input = "an image" |
| | break |
| | |
| | if part.get("text") and part["text"].startswith("PDF Document Content:") or part["text"].startswith("Document content:"): |
| | latest_user_input = "a document" |
| | break |
| | if latest_user_input: |
| | break |
| |
|
| | dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..." |
| | |
| | |
| | print("Calling Gemini API with full chat history...") |
| | llm_payload = { |
| | "contents": chat_history |
| | } |
| |
|
| | |
| | gemini_api_key = os.environ.get("GEMINI_API_KEY") |
| | if not gemini_api_key: |
| | raise ValueError("GEMINI_API_KEY environment variable not set.") |
| |
|
| | gemini_api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={gemini_api_key}" |
| |
|
| | gemini_response = requests.post( |
| | gemini_api_url, |
| | headers={'Content-Type': 'application/json'}, |
| | data=json.dumps(llm_payload) |
| | ) |
| |
|
| | gemini_response.raise_for_status() |
| | llm_result = gemini_response.json() |
| | print("Gemini API response received.") |
| |
|
| | if llm_result.get('candidates') and len(llm_result['candidates']) > 0 and \ |
| | llm_result['candidates'][0].get('content') and llm_result['candidates'][0]['content'].get('parts') and \ |
| | len(llm_result['candidates'][0]['content']['parts']) > 0: |
| | response_text = llm_result['candidates'][0]['content']['parts'][0]['text'] |
| | else: |
| | response_text = "No solution could be generated. Please try a different query." |
| |
|
| | except requests.exceptions.RequestException as e: |
| | error_message = f"Network or API error during LLM call: {e}" |
| | print(f"Error: {error_message}") |
| | response_text = f"An API error occurred: {error_message}. Please check the logs." |
| | except ValueError as e: |
| | error_message = f"Configuration error (e.g., missing API key): {e}" |
| | print(f"Error: {error_message}") |
| | response_text = f"A configuration error occurred: {error_message}. Please check your Space secrets." |
| | except Exception as e: |
| | error_message = f"An unexpected error occurred in generate_solution_python: {e}" |
| | print(f"Error: {error_message}") |
| | response_text = f"An unexpected error occurred: {error_message}. Please check the logs." |
| | |
| | return response_text |
| |
|
| | |
| |
|
| | @app.route('/') |
| | def index(): |
| | """Serves the main HTML page.""" |
| | return render_template('index.html') |
| |
|
| | @app.route('/generate', methods=['POST']) |
| | async def generate(): |
| | """Handles the AI generation request, managing conversation history and multi-modal input.""" |
| | session_id = None |
| | try: |
| | data = request.get_json() |
| | if not data: |
| | return jsonify({"error": "Request body must be JSON"}), 400 |
| |
|
| | user_query = data.get('query') |
| | image_data = data.get('image_data') |
| | document_text = data.get('document_text') |
| | pdf_data = data.get('pdf_data') |
| | |
| | |
| | session_id = data.get('session_id') |
| | if not session_id: |
| | session_id = str(uuid.uuid4()) |
| | print(f"Warning: session_id not provided, generated new one: {session_id}") |
| |
|
| | if not (user_query or image_data or document_text or pdf_data): |
| | return jsonify({"error": "Query, image, or document is required in the request body"}), 400 |
| | |
| | current_chat_history = conversation_histories.get(session_id, []) |
| | |
| | |
| | user_message_parts = [] |
| | if user_query: |
| | user_message_parts.append({"text": user_query}) |
| | |
| | if image_data: |
| | user_message_parts.append({"inlineData": image_data}) |
| | print("Received image data for processing.") |
| | |
| | if document_text: |
| | user_message_parts.append({"text": f"Document content:\n{document_text}"}) |
| | print("Received text document content for processing.") |
| |
|
| | if pdf_data: |
| | if not PdfReader: |
| | return jsonify({"error": "PDF parsing library (PyPDF2) not installed on backend."}), 500 |
| | |
| | try: |
| | |
| | pdf_bytes = base64.b64decode(pdf_data['data']) |
| | pdf_file = io.BytesIO(pdf_bytes) |
| | reader = PdfReader(pdf_file) |
| | |
| | pdf_extracted_text = "" |
| | for page_num in range(len(reader.pages)): |
| | page = reader.pages[page_num] |
| | pdf_extracted_text += page.extract_text() or "" |
| | |
| | if pdf_extracted_text.strip(): |
| | user_message_parts.append({"text": f"PDF Document Content:\n{pdf_extracted_text}"}) |
| | print(f"Successfully extracted {len(pdf_extracted_text)} characters from PDF.") |
| | else: |
| | user_message_parts.append({"text": "PDF Document: (No extractable text found or PDF is image-based)"}) |
| | print("No extractable text found in PDF.") |
| |
|
| | except Exception as pdf_error: |
| | print(f"Error processing PDF: {pdf_error}") |
| | user_message_parts.append({"text": f"PDF Document: (Error processing PDF: {pdf_error})"}) |
| | |
| | |
| | |
| |
|
| | |
| | if not user_query and (image_data or document_text or pdf_data): |
| | if image_data: |
| | user_message_parts.insert(0, {"text": "Please analyze the following image and provide insights:"}) |
| | elif document_text or pdf_data: |
| | user_message_parts.insert(0, {"text": "Please analyze the following document content and provide a summary or answer questions:"}) |
| |
|
| | |
| | current_chat_history.append({"role": "user", "parts": user_message_parts}) |
| |
|
| | |
| | solution_text = await generate_solution_python(current_chat_history) |
| |
|
| | |
| | current_chat_history.append({"role": "model", "parts": [{"text": solution_text}]}) |
| | |
| | |
| | conversation_histories[session_id] = current_chat_history |
| |
|
| | return jsonify({"solution": solution_text, "session_id": session_id}) |
| |
|
| | except Exception as e: |
| | print(f"Error in /generate endpoint: {e}") |
| | |
| | if session_id: |
| | return jsonify({"error": f"Internal server error for session {session_id}: {e}"}), 500 |
| | else: |
| | return jsonify({"error": f"Internal server error: {e}"}), 500 |
| |
|
| | if __name__ == '__main__': |
| | app.run(host='0.0.0.0', port=7860) |
| |
|