Rahatara commited on
Commit
f156c0f
·
verified ·
1 Parent(s): b54195c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -51
app.py CHANGED
@@ -1,58 +1,81 @@
1
- import os
2
- import gradio as gr
3
- import google.generativeai as genai
4
-
5
- # Ensure the API key is set
6
- api_key = os.getenv("GOOGLE_API_KEY")
7
- if not api_key:
8
- raise ValueError("GOOGLE_API_KEY not found. Please set it in your environment variables.")
9
- genai.configure(api_key=api_key)
10
-
11
- # Initialize the Gemini model
12
- model = genai.GenerativeModel(model_name="gemini-2.0-flash-exp")
13
-
14
- def process_input(text_input, files):
15
- """Process input text and files, send them to the Gemini API, and get a response."""
16
- contents = []
17
- if text_input:
18
- contents.append({"text": text_input}) # Add text content for Gemini
19
-
20
- # Handle files: read and prepare for Gemini API
21
- for file_info in files:
22
- file_content = file_info['content'] # Access the file content from the Gradio file dictionary
23
- mime_type = file_info['metadata']['mime_type'] # Access MIME type from the metadata
24
-
25
- contents.append({
26
- "file": file_content,
27
- "mime_type": mime_type # MIME type is used if specific handling is required by Gemini
28
- })
29
-
30
- # Call Gemini API to process the collected contents
31
  try:
32
- response = model.generate_content(contents)
33
- response.resolve()
34
- return response.text
 
 
 
35
  except Exception as e:
36
- return f"Error communicating with Gemini API: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- # Create the Gradio interface
39
- def create_interface():
40
- with gr.Blocks() as app:
41
- with gr.Row():
42
- text_input = gr.Textbox(label="Enter your text:", placeholder="Type your query here...")
43
- file_input = gr.File(label="Upload files", type="file", file_types=["pdf", "png", "jpg", "mp3", "mp4"], file_count="multiple")
44
- submit_button = gr.Button("Process")
45
 
46
- output = gr.Textbox(placeholder="Response will appear here...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- submit_button.click(
49
- fn=process_input,
50
- inputs=[text_input, file_input],
51
- outputs=output
52
- )
53
 
54
- return app
 
 
 
 
 
 
 
 
 
 
55
 
56
- if __name__ == "__main__":
57
- app = create_interface()
58
- app.launch(debug=True)
 
1
+ from gradio import Interface, File
2
+ import requests
3
+ import io
4
+ import base64
5
+ import json
6
+
7
+ # Replace with your Google AI Studio API key
8
+ API_KEY = "YOUR_GEMINI_API_KEY"
9
+ project_id= "genai-test-396922"
10
+ location= "us-central1"
11
+ model_id= "gemini-2.0-flash-exp"
12
+ # Endpoint for Gemini 2.0 Text Generation (adapt if needed for specific multimodal endpoints)
13
+ ENDPOINT = "https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/models/{model_id}:textGenerate"
14
+
15
+ def encode_file(file_obj, file_type):
16
+ """Encodes a file (image, video, or PDF) to base64."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  try:
18
+ if file_type == "pdf":
19
+ content = file_obj.read()
20
+ else:
21
+ content = file_obj.getvalue() # For images and videos
22
+ encoded_content = base64.b64encode(content).decode("utf-8")
23
+ return encoded_content
24
  except Exception as e:
25
+ print(f"Error encoding file: {e}")
26
+ return None
27
+
28
+ def generate_response(text_prompt, files):
29
+ """Handles multiple file inputs (images, videos, PDFs)."""
30
+ url = ENDPOINT.format(project_id="your-project-id", location="your-location", model_id="your-model-id")
31
+ headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
32
+
33
+ inputs = [{"text": text_prompt}]
34
+ for file_data in files:
35
+ if file_data is None:
36
+ continue
37
+
38
+ file_path = file_data.name
39
+ file_type = file_path.split(".")[-1].lower()
40
 
41
+ try:
42
+ with open(file_path, "rb") as file_obj:
43
+ encoded_content = encode_file(file_obj, file_type)
44
+ if encoded_content:
45
+ inputs.append({file_type: encoded_content}) # Use file type as key
46
+ except Exception as e:
47
+ return f"Error processing file {file_path}: {e}"
48
 
49
+ request_body = {"inputs": inputs}
50
+
51
+ try:
52
+ response = requests.post(url, headers=headers, json=request_body)
53
+ response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
54
+ return response.json().get("generated_text", "No generated text found in response.") # Handle missing key
55
+
56
+ except requests.exceptions.RequestException as e:
57
+ if response is not None:
58
+ try:
59
+ error_message = response.json().get("error", {}).get("message", "Unknown error")
60
+ except json.JSONDecodeError:
61
+ error_message = response.text
62
+ return f"API Error: {response.status_code} - {error_message}"
63
+ else:
64
+ return f"Request Exception: {e}"
65
+ except Exception as e:
66
+ return f"An unexpected error occurred: {e}"
67
 
 
 
 
 
 
68
 
69
+ # Define Gradio interface with multiple file input
70
+ interface = Interface(
71
+ fn=generate_response,
72
+ inputs=[
73
+ "text",
74
+ [File(label="Upload Files (Images, Videos, PDFs)", file_types=[".png", ".jpg", ".jpeg", ".mp4", ".mov", ".pdf"], type="file")], # Multiple files
75
+ ],
76
+ outputs="text",
77
+ title="Multimodal Prompt with Gemini 2.0 (Experimental)",
78
+ description="Enter a text prompt and upload one or more files (images, videos, PDFs).",
79
+ )
80
 
81
+ interface.launch(share=True)