Spaces:

TarSh8654
/

AI_tool

Sleeping

App Files Files Community

AI_tool / app.py

TarSh8654

Update app.py

9d07e1c verified 8 months ago

raw

history blame contribute delete

10.1 kB

	# app.py
	from flask import Flask, request, jsonify, render_template
	import requests
	import json
	import asyncio
	import os
	import uuid # For generating unique session IDs if not provided
	import base64 # For decoding base64 data
	import io # For handling binary data in memory

	# Import PyPDF2 for PDF parsing
	try:
	from PyPDF2 import PdfReader
	except ImportError:
	print("PyPDF2 not found. Please install it using 'pip install PyPDF2'")
	PdfReader = None # Set to None if not available

	app = Flask(__name__)

	# In-memory storage for conversation histories
	# This will reset if the Flask application restarts.
	# For persistent history, a database (like Firestore) is required.
	conversation_histories = {}

	async def generate_solution_python(chat_history):
	"""
	Generates a solution using a dummy context and Gemini LLM,
	based on the provided chat history which can include text, images, and extracted PDF text.

	Args:
	chat_history (list): A list of message objects representing the conversation.
	Each object has "role" and "parts". Parts can be:
	- {"text": "..."}
	- {"inlineData": {"mimeType": "image/png", "data": "base64_string"}}
	Returns:
	str: The generated solution text or an error message.
	"""
	if not chat_history:
	return "Error: Chat history is empty."

	print(f"Processing chat history length: {len(chat_history)}")
	response_text = ""

	try:
	# --- IMPORTANT: Placeholder for Search API Integration ---
	# The 'google_search' tool is specific to the Canvas environment.
	# On Hugging Face, you would integrate a real public search API here.
	# For this example, we'll use a dummy context based on the latest user query.

	# Find the latest user input (text or image/document indication) for dummy context
	latest_user_input = ""
	for message in reversed(chat_history):
	if message["role"] == "user" and message["parts"]:
	for part in message["parts"]:
	if part.get("text"):
	latest_user_input = part["text"]
	break
	if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""):
	latest_user_input = "an image"
	break
	# If a document was processed and its text added, use that
	if part.get("text") and part["text"].startswith("PDF Document Content:") or part["text"].startswith("Document content:"):
	latest_user_input = "a document"
	break
	if latest_user_input:
	break

	dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..."

	# Step 2: Call Gemini API with the full chat history
	print("Calling Gemini API with full chat history...")
	llm_payload = {
	"contents": chat_history # Pass the entire history, including text and image parts
	}

	# Get API key from environment variables (Hugging Face Space Secrets)
	gemini_api_key = os.environ.get("GEMINI_API_KEY")
	if not gemini_api_key:
	raise ValueError("GEMINI_API_KEY environment variable not set.")

	gemini_api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={gemini_api_key}"

	gemini_response = requests.post(
	gemini_api_url,
	headers={'Content-Type': 'application/json'},
	data=json.dumps(llm_payload)
	)

	gemini_response.raise_for_status() # Raise an exception for HTTP errors
	llm_result = gemini_response.json()
	print("Gemini API response received.")

	if llm_result.get('candidates') and len(llm_result['candidates']) > 0 and \
	llm_result['candidates'][0].get('content') and llm_result['candidates'][0]['content'].get('parts') and \
	len(llm_result['candidates'][0]['content']['parts']) > 0:
	response_text = llm_result['candidates'][0]['content']['parts'][0]['text']
	else:
	response_text = "No solution could be generated. Please try a different query."

	except requests.exceptions.RequestException as e:
	error_message = f"Network or API error during LLM call: {e}"
	print(f"Error: {error_message}")
	response_text = f"An API error occurred: {error_message}. Please check the logs."
	except ValueError as e:
	error_message = f"Configuration error (e.g., missing API key): {e}"
	print(f"Error: {error_message}")
	response_text = f"A configuration error occurred: {error_message}. Please check your Space secrets."
	except Exception as e:
	error_message = f"An unexpected error occurred in generate_solution_python: {e}"
	print(f"Error: {error_message}")
	response_text = f"An unexpected error occurred: {error_message}. Please check the logs."

	return response_text

	# --- Flask Routes ---

	@app.route('/')
	def index():
	"""Serves the main HTML page."""
	return render_template('index.html')

	@app.route('/generate', methods=['POST'])
	async def generate():
	"""Handles the AI generation request, managing conversation history and multi-modal input."""
	session_id = None # Initialize session_id to None
	try:
	data = request.get_json()
	if not data:
	return jsonify({"error": "Request body must be JSON"}), 400

	user_query = data.get('query')
	image_data = data.get('image_data') # Base64 image data
	document_text = data.get('document_text') # Text extracted from .txt on frontend
	pdf_data = data.get('pdf_data') # Base64 PDF data

	# Ensure session_id is assigned before use
	session_id = data.get('session_id')
	if not session_id:
	session_id = str(uuid.uuid4())
	print(f"Warning: session_id not provided, generated new one: {session_id}")

	if not (user_query or image_data or document_text or pdf_data):
	return jsonify({"error": "Query, image, or document is required in the request body"}), 400

	current_chat_history = conversation_histories.get(session_id, [])

	# Construct the parts for the user message
	user_message_parts = []
	if user_query:
	user_message_parts.append({"text": user_query})

	if image_data:
	user_message_parts.append({"inlineData": image_data})
	print("Received image data for processing.")

	if document_text:
	user_message_parts.append({"text": f"Document content:\n{document_text}"})
	print("Received text document content for processing.")

	if pdf_data:
	if not PdfReader:
	return jsonify({"error": "PDF parsing library (PyPDF2) not installed on backend."}), 500

	try:
	# Decode base64 PDF data
	pdf_bytes = base64.b64decode(pdf_data['data'])
	pdf_file = io.BytesIO(pdf_bytes)
	reader = PdfReader(pdf_file)

	pdf_extracted_text = ""
	for page_num in range(len(reader.pages)):
	page = reader.pages[page_num]
	pdf_extracted_text += page.extract_text() or "" # extract_text can return None

	if pdf_extracted_text.strip():
	user_message_parts.append({"text": f"PDF Document Content:\n{pdf_extracted_text}"})
	print(f"Successfully extracted {len(pdf_extracted_text)} characters from PDF.")
	else:
	user_message_parts.append({"text": "PDF Document: (No extractable text found or PDF is image-based)"})
	print("No extractable text found in PDF.")

	except Exception as pdf_error:
	print(f"Error processing PDF: {pdf_error}")
	user_message_parts.append({"text": f"PDF Document: (Error processing PDF: {pdf_error})"})
	# Do not return error to frontend immediately for PDF processing issues
	# Let the LLM try to respond even if PDF extraction failed
	# return jsonify({"error": f"Failed to process PDF: {pdf_error}"}), 400

	# If only a file was provided without a query, add a default instruction
	if not user_query and (image_data or document_text or pdf_data):
	if image_data:
	user_message_parts.insert(0, {"text": "Please analyze the following image and provide insights:"})
	elif document_text or pdf_data:
	user_message_parts.insert(0, {"text": "Please analyze the following document content and provide a summary or answer questions:"})

	# Append the new user message (which can be multi-part) to the history
	current_chat_history.append({"role": "user", "parts": user_message_parts})

	# Generate the solution using the full chat history
	solution_text = await generate_solution_python(current_chat_history)

	# Append the model's response to the history
	current_chat_history.append({"role": "model", "parts": [{"text": solution_text}]})

	# Store the updated history
	conversation_histories[session_id] = current_chat_history

	return jsonify({"solution": solution_text, "session_id": session_id})

	except Exception as e:
	print(f"Error in /generate endpoint: {e}")
	# Ensure session_id is handled even in the outer exception for logging/debugging
	if session_id:
	return jsonify({"error": f"Internal server error for session {session_id}: {e}"}), 500
	else:
	return jsonify({"error": f"Internal server error: {e}"}), 500

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=7860)