AppleBotzz commited on
Commit
dac012f
1 Parent(s): 9ef58f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -39
app.py CHANGED
@@ -1,11 +1,18 @@
1
  import gradio as gr
2
  import base64
3
  import anthropic
 
4
 
5
  # Assuming anthropic is a package that provides an Anthropic client for interacting with Claude
6
  # and it's installed or defined somewhere in your project
7
  from anthropic import Anthropic
8
 
 
 
 
 
 
 
9
  def image_to_base64(image_path):
10
  """Convert the image to base64."""
11
  with open(image_path, "rb") as image_file:
@@ -21,59 +28,98 @@ def get_media_type(image_name):
21
  else:
22
  return None # Extend this function based on the image formats you expect to handle
23
 
24
- def describe_image(image_path, api_key, model, prompt):
25
- """Send the image to Claude for description."""
26
- try:
27
- image_base64 = image_to_base64(image_path)
28
- media_type = get_media_type(image_path)
29
 
30
- client = Anthropic(api_key=api_key)
31
- message = client.messages.create(
32
- model=model,
33
- max_tokens=1024,
34
- messages=[
35
- {
36
- "role": "user",
37
- "content": [
38
- {
39
- "type": "image",
40
- "source": {
41
- "type": "base64",
42
- "media_type": media_type,
43
- "data": image_base64,
 
 
 
 
 
 
 
 
44
  },
45
- },
46
- {
47
- "type": "text",
48
- "text": prompt
49
- }
50
- ],
51
- }
52
- ],
53
- )
54
- return message.content[0].text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  except Exception as e:
56
  return f"Error: {str(e)}"
57
 
58
- def main(image_path, api_key, model_a, model_b, prompt):
59
- if api_key:
60
- description_a = describe_image(image_path, api_key, model_a, prompt)
61
- description_b = describe_image(image_path, api_key, model_b, prompt)
62
 
63
  return description_a, description_b
64
  else:
65
  return "Please enter a valid API key.", "Please enter a valid API key."
66
 
67
- model_options = ["claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"]
68
 
69
  with gr.Blocks() as iface:
70
- gr.Markdown("# Image Description with Claude Models")
71
- gr.Markdown("Drag and drop an image to get descriptions from different Claude models.")
72
 
73
  with gr.Row():
74
  with gr.Column():
75
  image_input = gr.Image(type="filepath", label="Upload Image")
76
- api_key_input = gr.Textbox(type="password", label="Enter your Claude API Key")
 
77
 
78
  with gr.Column():
79
  model_a_dropdown = gr.Dropdown(choices=model_options, label="Model A")
@@ -87,8 +133,9 @@ with gr.Blocks() as iface:
87
  run_button = gr.Button("Run")
88
 
89
  run_button.click(
90
- fn=main,
91
- inputs=[image_input, api_key_input, model_a_dropdown, model_b_dropdown, prompt_input],
 
92
  outputs=[output_a, output_b]
93
  )
94
 
 
1
  import gradio as gr
2
  import base64
3
  import anthropic
4
+ from openai import OpenAI
5
 
6
  # Assuming anthropic is a package that provides an Anthropic client for interacting with Claude
7
  # and it's installed or defined somewhere in your project
8
  from anthropic import Anthropic
9
 
10
+ def create_image_content(image, MT, detail = "low"):
11
+ return {
12
+ "type": "image_url",
13
+ "image_url": {"url": f"data:{MT};base64,{image}", "detail": detail}
14
+ }
15
+
16
  def image_to_base64(image_path):
17
  """Convert the image to base64."""
18
  with open(image_path, "rb") as image_file:
 
28
  else:
29
  return None # Extend this function based on the image formats you expect to handle
30
 
31
+ def set_system_message(sysmsg):
32
+ return [{
33
+ "role": "system",
34
+ "content": sysmsg
35
+ }]
36
 
37
+ def describe_image(image_path, claude_api_key, openai_api_key, model, prompt):
38
+ """Send the image to the selected model for description."""
39
+ try:
40
+ if model.startswith("claude"):
41
+ # Using Anthropic Claude models
42
+ if not claude_api_key:
43
+ return "Claude API key is required for Claude models."
44
+ client = Anthropic(api_key=claude_api_key)
45
+ message = client.messages.create(
46
+ model=model,
47
+ max_tokens=1024,
48
+ messages=[
49
+ {
50
+ "role": "user",
51
+ "content": [
52
+ {
53
+ "type": "image",
54
+ "source": {
55
+ "type": "base64",
56
+ "media_type": get_media_type(image_path),
57
+ "data": image_to_base64(image_path),
58
+ },
59
  },
60
+ {
61
+ "type": "text",
62
+ "text": prompt
63
+ }
64
+ ],
65
+ }
66
+ ],
67
+ )
68
+ return message.content[0].text
69
+ elif model == "gpt-4-vision Low" or model == "gpt-4-vision High":
70
+ # Using OpenAI GPT-4 Vision
71
+ if not openai_api_key:
72
+ return "OpenAI API key is required for GPT-4 Vision."
73
+ client = OpenAI(api_key = openai_api_key)
74
+ processed_image = image_to_base64(image_path)
75
+ mt = get_media_type(image_path)
76
+ if model == "gpt-4-vision Low":
77
+ detail = "low"#image_content = create_image_content(processed_image, mt)
78
+ else:
79
+ detail = "high"#image_content = create_image_content(processed_image, mt, "high")
80
+
81
+ system_message = set_system_message("You are GPT-4.")
82
+ response = client.chat.completions.create(
83
+ model="gpt-4-vision-preview",
84
+ messages=system_message + [
85
+ {
86
+ "role": "user",
87
+ "content": [{
88
+ "type": "image_url",
89
+ "image_url": {"url": f"data:{mt};base64,{processed_image}", "detail": detail}
90
+ }]
91
+ },
92
+ {
93
+ "role": "user",
94
+ "content": prompt
95
+ }
96
+ ],
97
+ max_tokens=1024
98
+ )
99
+ return response.choices[0].message.content
100
  except Exception as e:
101
  return f"Error: {str(e)}"
102
 
103
+ def main(image_path, claude_api_key, openai_api_key, model_a, model_b, prompt):
104
+ if claude_api_key or openai_api_key:
105
+ description_a = describe_image(image_path, claude_api_key, openai_api_key, model_a, prompt)
106
+ description_b = describe_image(image_path, claude_api_key, openai_api_key, model_b, prompt)
107
 
108
  return description_a, description_b
109
  else:
110
  return "Please enter a valid API key.", "Please enter a valid API key."
111
 
112
+ model_options = ["claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307", "gpt-4-vision Low", "gpt-4-vision High"]
113
 
114
  with gr.Blocks() as iface:
115
+ gr.Markdown("# Image Description with Claude Models and GPT-4 Vision")
116
+ gr.Markdown("Drag and drop an image to get descriptions from different models.")
117
 
118
  with gr.Row():
119
  with gr.Column():
120
  image_input = gr.Image(type="filepath", label="Upload Image")
121
+ claude_api_key_input = gr.Textbox(type="password", label="Enter your Claude API Key")
122
+ openai_api_key_input = gr.Textbox(type="password", label="Enter your OpenAI API Key")
123
 
124
  with gr.Column():
125
  model_a_dropdown = gr.Dropdown(choices=model_options, label="Model A")
 
133
  run_button = gr.Button("Run")
134
 
135
  run_button.click(
136
+ fn=lambda image_path, claude_api_key, openai_api_key, model_a, model_b, prompt:
137
+ main(image_path, claude_api_key, openai_api_key, model_a, model_b, prompt),
138
+ inputs=[image_input, claude_api_key_input, openai_api_key_input, model_a_dropdown, model_b_dropdown, prompt_input],
139
  outputs=[output_a, output_b]
140
  )
141