Spaces:
Runtime error
Runtime error
| from pathlib import Path | |
| from typing import Dict, List, Optional, Any, Union | |
| import os | |
| import json | |
| import tempfile | |
| import base64 | |
| from PIL import Image | |
| import io | |
| from src.parsers.parser_interface import DocumentParser | |
| from src.parsers.parser_registry import ParserRegistry | |
| # Import the Google Gemini API client | |
| try: | |
| from google import genai | |
| GEMINI_AVAILABLE = True | |
| except ImportError: | |
| GEMINI_AVAILABLE = False | |
| # Load API key from environment variable | |
| api_key = os.getenv("GOOGLE_API_KEY") | |
| # Check if API key is available and print a message if not | |
| if not api_key: | |
| print("Warning: GOOGLE_API_KEY environment variable not found. Gemini Flash parser may not work.") | |
| class GeminiFlashParser(DocumentParser): | |
| """Parser that uses Google's Gemini Flash 2.0 to convert documents to markdown.""" | |
| def get_name(cls) -> str: | |
| return "Gemini Flash" | |
| def get_supported_ocr_methods(cls) -> List[Dict[str, Any]]: | |
| return [ | |
| { | |
| "id": "none", | |
| "name": "None", | |
| "default_params": {} | |
| } | |
| ] | |
| def get_description(cls) -> str: | |
| return "Gemini Flash 2.0 parser for converting documents and images to markdown" | |
| def parse(self, file_path: Union[str, Path], ocr_method: Optional[str] = None, **kwargs) -> str: | |
| """Parse a document using Gemini Flash 2.0.""" | |
| if not GEMINI_AVAILABLE: | |
| raise ImportError( | |
| "The Google Gemini API client is not installed. " | |
| "Please install it with 'pip install google-genai'." | |
| ) | |
| # Use the globally loaded API key | |
| if not api_key: | |
| raise ValueError( | |
| "GOOGLE_API_KEY environment variable is not set. " | |
| "Please set it to your Gemini API key." | |
| ) | |
| try: | |
| # Determine file type based on extension | |
| file_path = Path(file_path) | |
| file_extension = file_path.suffix.lower() | |
| # Read the file content | |
| file_content = file_path.read_bytes() | |
| # Determine MIME type based on file extension | |
| mime_type = self._get_mime_type(file_extension) | |
| # Create a client and use the model | |
| client = genai.Client(api_key=api_key) | |
| # Set up the prompt | |
| prompt = """ | |
| Convert this document to markdown format. | |
| Preserve the structure, headings, lists, tables, and formatting as much as possible. | |
| For images, include a brief description in markdown image syntax. | |
| Return only the markdown content, no other text. | |
| """ | |
| # Generate the response | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash", | |
| contents=[ | |
| prompt, | |
| genai.types.Part.from_bytes( | |
| data=file_content, | |
| mime_type=mime_type | |
| ) | |
| ], | |
| config={ | |
| "temperature": config.model.temperature, | |
| "top_p": 0.95, | |
| "top_k": 40, | |
| "max_output_tokens": config.model.max_tokens, | |
| } | |
| ) | |
| # Extract the markdown text from the response | |
| markdown_text = response.text | |
| return markdown_text | |
| except Exception as e: | |
| error_message = f"Error parsing document with Gemini Flash: {str(e)}" | |
| print(error_message) | |
| return f"# Error\n\n{error_message}\n\nPlease check your API key and try again." | |
| def _get_mime_type(self, file_extension: str) -> str: | |
| """Get the MIME type for a file extension.""" | |
| mime_types = { | |
| ".pdf": "application/pdf", | |
| ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | |
| ".doc": "application/msword", | |
| ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", | |
| ".ppt": "application/vnd.ms-powerpoint", | |
| ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
| ".xls": "application/vnd.ms-excel", | |
| ".txt": "text/plain", | |
| ".md": "text/markdown", | |
| ".html": "text/html", | |
| ".htm": "text/html", | |
| ".jpg": "image/jpeg", | |
| ".jpeg": "image/jpeg", | |
| ".png": "image/png", | |
| ".gif": "image/gif", | |
| ".bmp": "image/bmp", | |
| ".tiff": "image/tiff", | |
| ".tif": "image/tiff", | |
| } | |
| return mime_types.get(file_extension, "application/octet-stream") | |
| # Register the parser with the registry | |
| if GEMINI_AVAILABLE: | |
| ParserRegistry.register(GeminiFlashParser) | |
| else: | |
| print("Gemini Flash parser not registered: google-genai package not installed") |