Spaces:

Rahatara
/

build_with_gemini

Sleeping

App Files Files Community

Rahatara commited on Dec 15, 2024

Commit

f156c0f

verified ·

1 Parent(s): b54195c

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -51

app.py CHANGED Viewed

@@ -1,58 +1,81 @@
-import os
-import gradio as gr
-import google.generativeai as genai
-# Ensure the API key is set
-api_key = os.getenv("GOOGLE_API_KEY")
-if not api_key:
-    raise ValueError("GOOGLE_API_KEY not found. Please set it in your environment variables.")
-genai.configure(api_key=api_key)
-# Initialize the Gemini model
-model = genai.GenerativeModel(model_name="gemini-2.0-flash-exp")
-def process_input(text_input, files):
-    """Process input text and files, send them to the Gemini API, and get a response."""
-    contents = []
-    if text_input:
-        contents.append({"text": text_input})  # Add text content for Gemini
-    # Handle files: read and prepare for Gemini API
-    for file_info in files:
-        file_content = file_info['content']  # Access the file content from the Gradio file dictionary
-        mime_type = file_info['metadata']['mime_type']  # Access MIME type from the metadata
-        contents.append({
-            "file": file_content,
-            "mime_type": mime_type  # MIME type is used if specific handling is required by Gemini
-        })
-    # Call Gemini API to process the collected contents
     try:
-        response = model.generate_content(contents)
-        response.resolve()
-        return response.text
     except Exception as e:
-        return f"Error communicating with Gemini API: {e}"
-# Create the Gradio interface
-def create_interface():
-    with gr.Blocks() as app:
-        with gr.Row():
-            text_input = gr.Textbox(label="Enter your text:", placeholder="Type your query here...")
-            file_input = gr.File(label="Upload files", type="file", file_types=["pdf", "png", "jpg", "mp3", "mp4"], file_count="multiple")
-            submit_button = gr.Button("Process")
-        output = gr.Textbox(placeholder="Response will appear here...")
-        submit_button.click(
-            fn=process_input,
-            inputs=[text_input, file_input],
-            outputs=output
-        )
-    return app
-if __name__ == "__main__":
-    app = create_interface()
-    app.launch(debug=True)

+from gradio import Interface, File
+import requests
+import io
+import base64
+import json
+# Replace with your Google AI Studio API key
+API_KEY = "YOUR_GEMINI_API_KEY"
+project_id= "genai-test-396922"
+location= "us-central1"
+model_id= "gemini-2.0-flash-exp"
+# Endpoint for Gemini 2.0 Text Generation (adapt if needed for specific multimodal endpoints)
+ENDPOINT = "https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/models/{model_id}:textGenerate"
+def encode_file(file_obj, file_type):
+    """Encodes a file (image, video, or PDF) to base64."""
     try:
+      if file_type == "pdf":
+        content = file_obj.read()
+      else:
+        content = file_obj.getvalue() # For images and videos
+      encoded_content = base64.b64encode(content).decode("utf-8")
+      return encoded_content
     except Exception as e:
+        print(f"Error encoding file: {e}")
+        return None
+def generate_response(text_prompt, files):
+    """Handles multiple file inputs (images, videos, PDFs)."""
+    url = ENDPOINT.format(project_id="your-project-id", location="your-location", model_id="your-model-id")
+    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
+    inputs = [{"text": text_prompt}]
+    for file_data in files:
+        if file_data is None:
+            continue
+        file_path = file_data.name
+        file_type = file_path.split(".")[-1].lower()
+        try:
+            with open(file_path, "rb") as file_obj:
+                encoded_content = encode_file(file_obj, file_type)
+                if encoded_content:
+                    inputs.append({file_type: encoded_content}) # Use file type as key
+        except Exception as e:
+            return f"Error processing file {file_path}: {e}"
+    request_body = {"inputs": inputs}
+    try:
+        response = requests.post(url, headers=headers, json=request_body)
+        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
+        return response.json().get("generated_text", "No generated text found in response.") # Handle missing key
+    except requests.exceptions.RequestException as e:
+        if response is not None:
+            try:
+                error_message = response.json().get("error", {}).get("message", "Unknown error")
+            except json.JSONDecodeError:
+                error_message = response.text
+            return f"API Error: {response.status_code} - {error_message}"
+        else:
+            return f"Request Exception: {e}"
+    except Exception as e:
+        return f"An unexpected error occurred: {e}"
+# Define Gradio interface with multiple file input
+interface = Interface(
+    fn=generate_response,
+    inputs=[
+        "text",
+        [File(label="Upload Files (Images, Videos, PDFs)", file_types=[".png", ".jpg", ".jpeg", ".mp4", ".mov", ".pdf"], type="file")], # Multiple files
+    ],
+    outputs="text",
+    title="Multimodal Prompt with Gemini 2.0 (Experimental)",
+    description="Enter a text prompt and upload one or more files (images, videos, PDFs).",
+)
+interface.launch(share=True)