Spaces:

disham993
/

gemini-pro-vision-streamlit-application

Running

App Files Files Community

disham993 commited on Jan 4

Commit

ec44ead

•

1 Parent(s): c04fddb

Gemini Streamlit Application.

Browse files

Files changed (12) hide show

.gitignore +2 -0
app.py +54 -0
requirements.txt +5 -0
ui_files/__init__.py +5 -0
ui_files/authentication.py +33 -0
ui_files/initial_headers.py +15 -0
ui_files/media_handling.py +32 -0
ui_files/settings.py +41 -0
ui_files/user_input_handler.py +39 -0
utils/__init__.py +2 -0
utils/constants.py +38 -0
utils/helper.py +68 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ */__pycache__
2	+ */.env

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# Q&A Chatbot
+from utils import *
+from ui_files import *
+# Headers of the app
+initial_headers()
+# Handle media upload
+media_content, media_type = handle_media_upload()
+# Handle JSON file upload for authentication
+api_key = handle_credentials(media_type=media_type)
+# Handle input fields
+prompts = manage_input_fields()
+submit = st.button(f"Tell me about the {media_type}")
+# Configure generation and safety settings
+generation_config, safety_settings = configure_generation_and_safety(
+    SAFETY_SETTINGS, THRESHOLD_OPTIONS
+)
+## If ask button is clicked
+if submit:
+    print(f"Response being generated...")
+    st.subheader("The Response as follows...")
+    start_time = time.time()
+    if media_type == "video":
+        final_safety_settings = {}
+        for setting in safety_settings:
+            final_safety_settings[
+                SAFETY_SETTINGS_VIDEO_LABELS[setting["category"]]
+            ] = THRESHOLD_OPTIONS_VIDEO_LABELS[setting["threshold"]]
+    else:
+        final_safety_settings = safety_settings
+    response = get_gemini_response(
+        prompts,
+        media_content=media_content,
+        generation_config=generation_config,
+        media_type=media_type,
+        safety_settings=final_safety_settings,
+        api_key=api_key,
+    )
+    for chunk in response:
+        print(chunk.text)  # For Debugging
+        st.write(chunk.text)
+    if os.path.exists("tmp/json_data.json"):
+        os.remove("tmp/json_data.json")
+    st.write(f"Time taken to generate results: {time.time() - start_time:.2f} seconds.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit==1.29.0
+google-generativeai==0.3.0
+google-ai-generativelanguage==0.4.0
+python-dotenv==1.0.0
+google-cloud-aiplatform==1.38.1

ui_files/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from ui_files.media_handling import *
+from ui_files.authentication import *
+from ui_files.settings import *
+from ui_files.initial_headers import *
+from ui_files.user_input_handler import *

ui_files/authentication.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os, sys
+from os.path import dirname as up
+sys.path.append(os.path.abspath(os.path.join(up(__file__), os.pardir)))
+from utils import *
+def handle_credentials(media_type: str = "image"):
+    if media_type == "image":
+        api_key = st.text_input(
+            "🔐 GOOGLE AI STUDIO API KEY - Required For Image.", key="api_key"
+        )
+        return api_key
+    elif media_type == "video":
+        uploaded_json = st.file_uploader(
+            "🔐 Upload a JSON file which includes Google Service Account Credentials - Required for Video.",
+            type=["json"],
+        )
+        if uploaded_json is not None:
+            json_data = json.load(uploaded_json)
+            os.makedirs("tmp", exist_ok=True)
+            json_path = os.path.join("tmp", "json_data.json")
+            with open(json_path, "w") as file:
+                json.dump(json_data, file)
+            os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = json_path
+            service_account.Credentials.from_service_account_info(json_data)
+            st.success(
+                "Environment variable GOOGLE_APPLICATION_CREDENTIALS set from JSON file."
+            )

ui_files/initial_headers.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import os, sys
+from os.path import dirname as up
+sys.path.append(os.path.abspath(os.path.join(up(__file__), os.pardir)))
+from utils import *
+def initial_headers():
+    st.set_page_config(page_title="Gemini Image & Video Demo")
+    st.header("Gemini Application - Image & Video Demo")
+    st.write(
+        "This app is to be used to ask questions on image and video that will be uploaded."
+    )

ui_files/media_handling.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import os, sys
+from os.path import dirname as up
+sys.path.append(os.path.abspath(os.path.join(up(__file__), os.pardir)))
+from utils import *
+def handle_media_upload():
+    uploaded_file = st.file_uploader(
+        "**Drag and drop or upload an Image 🖼️ or a Video 📺**",
+        type=["jpg", "jpeg", "png", "mp4"],
+    )
+    media_content = ""
+    media_type = "image"
+    if uploaded_file is not None:
+        if uploaded_file.type.startswith("image/"):
+            media_content = Image.open(uploaded_file)
+            media_content = media_content.resize((500, 500))
+            st.image(media_content, caption="Uploaded Image.", use_column_width=True)
+        if uploaded_file.type.startswith("video/"):
+            file_bytes = uploaded_file.read()
+            data = base64.b64encode(file_bytes)
+            media_content = Part.from_data(
+                data=base64.b64decode(data), mime_type="video/mp4"
+            )
+            st.video(uploaded_file)
+            media_type = "video"
+    return media_content, media_type

ui_files/settings.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os, sys
+from os.path import dirname as up
+sys.path.append(os.path.abspath(os.path.join(up(__file__), os.pardir)))
+from utils import *
+def configure_generation_and_safety(safety_settings, threshold_options):
+    # Add sliders for temperature, top_p, top_k, and max_output_tokens
+    st.sidebar.header("Generation Configuration")
+    temperature = st.sidebar.slider(
+        "Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01
+    )
+    top_p = st.sidebar.slider(
+        "Top P", min_value=0.0, max_value=1.0, value=0.9, step=0.01
+    )
+    top_k = st.sidebar.slider("Top K", min_value=0, max_value=100, value=40, step=1)
+    max_output_tokens = st.sidebar.slider(
+        "Max Output Tokens", min_value=1, max_value=4096, value=1024, step=1
+    )
+    generation_config = {
+        "temperature": temperature,
+        "top_p": top_p,
+        "top_k": top_k,
+        "max_output_tokens": max_output_tokens,
+    }
+    # Sidebar for safety settings
+    st.sidebar.header("Safety Settings")
+    # Create a dropdown for each category
+    for setting in safety_settings:
+        setting["threshold"] = st.sidebar.selectbox(
+            f"{setting['category']}",
+            threshold_options,
+            index=threshold_options.index(setting["threshold"]),
+        )
+    return generation_config, safety_settings

ui_files/user_input_handler.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import os, sys
+from os.path import dirname as up
+sys.path.append(os.path.abspath(os.path.join(up(__file__), os.pardir)))
+from utils import *
+def manage_input_fields():
+    st.write("Enter a single or multiple prompts.")
+    # Initialize session state variables if they don't exist
+    if "input_list" not in st.session_state:
+        st.session_state.input_list = [""]
+    # Function to add a new input field
+    def add_input():
+        st.session_state.input_list.append("")
+    # Function to remove an input field
+    def remove_input(index):
+        st.session_state.input_list.pop(index)
+    # Display the input fields
+    for index, value in enumerate(st.session_state.input_list):
+        col1, col2 = st.columns([4, 1])
+        with col1:
+            st.session_state.input_list[index] = st.text_input(
+                f"Input Prompt: {index+1}", value=value
+            )
+        with col2:
+            st.button(
+                "Remove", key=f"remove_{index}", on_click=remove_input, args=(index,)
+            )
+    # Button to add new input field
+    st.button("Add new input", on_click=add_input)
+    return st.session_state.input_list

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from utils.helper import *
2	+ from utils.constants import *

utils/constants.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os, sys
+from os.path import dirname as up
+sys.path.append(os.path.abspath(os.path.join(up(__file__), os.pardir)))
+from utils.helper import *
+SAFETY_SETTINGS = [
+    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
+    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_ONLY_HIGH"},
+    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_ONLY_HIGH"},
+    {
+        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+        "threshold": "BLOCK_ONLY_HIGH",
+    },
+]
+# Threshold options
+THRESHOLD_OPTIONS = [
+    "BLOCK_NONE",
+    "BLOCK_ONLY_HIGH",
+    "BLOCK_MEDIUM_AND_ABOVE",
+    "BLOCK_LOW_AND_ABOVE",
+]
+SAFETY_SETTINGS_VIDEO_LABELS = {
+    "HARM_CATEGORY_HARASSMENT": HarmCategory.HARM_CATEGORY_HARASSMENT,
+    "HARM_CATEGORY_HATE_SPEECH": HarmCategory.HARM_CATEGORY_HATE_SPEECH,
+    "HARM_CATEGORY_SEXUALLY_EXPLICIT": HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
+    "HARM_CATEGORY_DANGEROUS_CONTENT": HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
+}
+THRESHOLD_OPTIONS_VIDEO_LABELS = {
+    "BLOCK_NONE": HarmBlockThreshold.BLOCK_NONE,
+    "BLOCK_ONLY_HIGH": HarmBlockThreshold.BLOCK_ONLY_HIGH,
+    "BLOCK_MEDIUM_AND_ABOVE": HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
+    "BLOCK_LOW_AND_ABOVE": HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
+}

utils/helper.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os, sys
+from os.path import dirname as up
+sys.path.append(os.path.abspath(os.path.join(up(__file__), os.pardir)))
+import streamlit as st
+import os
+import google.generativeai as genai
+import pathlib
+import textwrap
+from PIL import Image
+import json
+from vertexai.preview.generative_models import (
+    GenerativeModel,
+    Part,
+    HarmCategory,
+    HarmBlockThreshold,
+)
+from google.oauth2 import service_account  # importing auth using service_account
+import json
+import os
+import base64
+import time
+from enum import Enum
+from typing import Union, List, Any, Dict
+## Function to load OpenAI model and get respones
+def get_gemini_response(
+    input: Union[str, List[str]],
+    media_content: Any,
+    generation_config: Dict,
+    safety_settings: Union[List[Dict], Dict],
+    media_type: str = "image",
+    api_key: str = None,
+):
+    print(f"Safety Settings: {safety_settings}")
+    print(f"Generation Config: {generation_config}")  # -> For Debugging
+    if media_type == "video":
+        print(f"Media type is video.")
+        model = GenerativeModel(
+            model_name="gemini-pro-vision",
+            generation_config=generation_config,
+            safety_settings=safety_settings,
+        )
+    else:
+        print(f"Media type is image.")
+        genai.configure(api_key=api_key)
+        model = genai.GenerativeModel(
+            "gemini-pro-vision",
+            generation_config=generation_config,
+            safety_settings=safety_settings,
+        )
+    if input != "":
+        # For debugging
+        # with open("tmp/input.txt", "w") as f:
+        #     f.write(str(media_content))
+        response = model.generate_content(input + [media_content], stream=True)
+    else:
+        response = model.generate_content(media_content, stream=True)
+    return response