Spaces:

Nymbo
/

Claude_3_Gpt4_Compare

Running

App Files Files Community

AppleBotzz commited on Apr 5

Commit

dac012f

•

1 Parent(s): 9ef58f1

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -39

app.py CHANGED Viewed

@@ -1,11 +1,18 @@
 import gradio as gr
 import base64
 import anthropic
 # Assuming anthropic is a package that provides an Anthropic client for interacting with Claude
 # and it's installed or defined somewhere in your project
 from anthropic import Anthropic
 def image_to_base64(image_path):
     """Convert the image to base64."""
     with open(image_path, "rb") as image_file:
@@ -21,59 +28,98 @@ def get_media_type(image_name):
     else:
         return None  # Extend this function based on the image formats you expect to handle
-def describe_image(image_path, api_key, model, prompt):
-    """Send the image to Claude for description."""
-    try:
-        image_base64 = image_to_base64(image_path)
-        media_type = get_media_type(image_path)
-        client = Anthropic(api_key=api_key)
-        message = client.messages.create(
-            model=model,
-            max_tokens=1024,
-            messages=[
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "image",
-                            "source": {
-                                "type": "base64",
-                                "media_type": media_type,
-                                "data": image_base64,
                             },
-                        },
-                        {
-                            "type": "text",
-                            "text": prompt
-                        }
-                    ],
-                }
-            ],
-        )
-        return message.content[0].text
     except Exception as e:
         return f"Error: {str(e)}"
-def main(image_path, api_key, model_a, model_b, prompt):
-    if api_key:
-        description_a = describe_image(image_path, api_key, model_a, prompt)
-        description_b = describe_image(image_path, api_key, model_b, prompt)
         return description_a, description_b
     else:
         return "Please enter a valid API key.", "Please enter a valid API key."
-model_options = ["claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"]
 with gr.Blocks() as iface:
-    gr.Markdown("# Image Description with Claude Models")
-    gr.Markdown("Drag and drop an image to get descriptions from different Claude models.")
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="filepath", label="Upload Image")
-            api_key_input = gr.Textbox(type="password", label="Enter your Claude API Key")
         with gr.Column():
             model_a_dropdown = gr.Dropdown(choices=model_options, label="Model A")
@@ -87,8 +133,9 @@ with gr.Blocks() as iface:
     run_button = gr.Button("Run")
     run_button.click(
-        fn=main,
-        inputs=[image_input, api_key_input, model_a_dropdown, model_b_dropdown, prompt_input],
         outputs=[output_a, output_b]
     )

 import gradio as gr
 import base64
 import anthropic
+from openai import OpenAI
 # Assuming anthropic is a package that provides an Anthropic client for interacting with Claude
 # and it's installed or defined somewhere in your project
 from anthropic import Anthropic
+def create_image_content(image, MT, detail = "low"):
+    return {
+        "type": "image_url",
+        "image_url": {"url": f"data:{MT};base64,{image}", "detail": detail}
+    }
 def image_to_base64(image_path):
     """Convert the image to base64."""
     with open(image_path, "rb") as image_file:
     else:
         return None  # Extend this function based on the image formats you expect to handle
+def set_system_message(sysmsg):
+    return [{
+        "role": "system",
+        "content": sysmsg
+    }]
+def describe_image(image_path, claude_api_key, openai_api_key, model, prompt):
+    """Send the image to the selected model for description."""
+    try:
+        if model.startswith("claude"):
+            # Using Anthropic Claude models
+            if not claude_api_key:
+                return "Claude API key is required for Claude models."
+            client = Anthropic(api_key=claude_api_key)
+            message = client.messages.create(
+                model=model,
+                max_tokens=1024,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "image",
+                                "source": {
+                                    "type": "base64",
+                                    "media_type": get_media_type(image_path),
+                                    "data": image_to_base64(image_path),
+                                },
                             },
+                            {
+                                "type": "text",
+                                "text": prompt
+                            }
+                        ],
+                    }
+                ],
+            )
+            return message.content[0].text
+        elif model == "gpt-4-vision Low" or model == "gpt-4-vision High":
+            # Using OpenAI GPT-4 Vision
+            if not openai_api_key:
+                return "OpenAI API key is required for GPT-4 Vision."
+            client = OpenAI(api_key = openai_api_key)
+            processed_image = image_to_base64(image_path)
+            mt = get_media_type(image_path)
+            if model == "gpt-4-vision Low":
+                detail = "low"#image_content = create_image_content(processed_image, mt)
+            else:
+                detail = "high"#image_content = create_image_content(processed_image, mt, "high")
+            system_message = set_system_message("You are GPT-4.")
+            response = client.chat.completions.create(
+                model="gpt-4-vision-preview",
+                messages=system_message + [
+                    {
+                        "role": "user",
+                        "content": [{
+                        "type": "image_url",
+        "image_url": {"url": f"data:{mt};base64,{processed_image}", "detail": detail}
+        }]
+                    },
+                    {
+                        "role": "user",
+                        "content": prompt
+                    }
+                ],
+                max_tokens=1024
+            )
+            return response.choices[0].message.content
     except Exception as e:
         return f"Error: {str(e)}"
+def main(image_path, claude_api_key, openai_api_key, model_a, model_b, prompt):
+    if claude_api_key or openai_api_key:
+        description_a = describe_image(image_path, claude_api_key, openai_api_key, model_a, prompt)
+        description_b = describe_image(image_path, claude_api_key, openai_api_key, model_b, prompt)
         return description_a, description_b
     else:
         return "Please enter a valid API key.", "Please enter a valid API key."
+model_options = ["claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307", "gpt-4-vision Low", "gpt-4-vision High"]
 with gr.Blocks() as iface:
+    gr.Markdown("# Image Description with Claude Models and GPT-4 Vision")
+    gr.Markdown("Drag and drop an image to get descriptions from different models.")
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="filepath", label="Upload Image")
+            claude_api_key_input = gr.Textbox(type="password", label="Enter your Claude API Key")
+            openai_api_key_input = gr.Textbox(type="password", label="Enter your OpenAI API Key")
         with gr.Column():
             model_a_dropdown = gr.Dropdown(choices=model_options, label="Model A")
     run_button = gr.Button("Run")
     run_button.click(
+        fn=lambda image_path, claude_api_key, openai_api_key, model_a, model_b, prompt:
+            main(image_path, claude_api_key, openai_api_key, model_a, model_b, prompt),
+        inputs=[image_input, claude_api_key_input, openai_api_key_input, model_a_dropdown, model_b_dropdown, prompt_input],
         outputs=[output_a, output_b]
     )