Rooni commited on
Commit
1b96f25
·
1 Parent(s): e685399

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -66
app.py CHANGED
@@ -1,77 +1,116 @@
 
1
  import gradio as gr
2
- import requests
3
- import os
4
  import base64
5
- from PIL import Image
6
- import numpy as np
7
  import io
 
8
 
9
- # Функция для обработки изображения и текста и отправки запроса к OpenAI
10
- def generate_text(image, prompt):
11
- # Конвертируем NumPy массив в объект Image
12
- image_pil = Image.fromarray(image.astype('uint8'), 'RGB')
13
-
14
- # Конвертируем изображение для отправки через API
15
- image_bytes = io.BytesIO()
16
- image_pil.save(image_bytes, format='PNG')
17
- image_base64 = base64.b64encode(image_bytes.getvalue()).decode('utf-8')
18
-
19
- # API ключ для OpenAI
20
- api_key = os.getenv("API_KEY")
21
-
22
- # Заголовки для запроса
23
- headers = {
24
- 'Authorization': f'Bearer {api_key}',
25
- 'Content-Type': 'application/json',
26
- }
27
 
28
- # Данные для запроса
29
- data = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  "model": "gpt-4-vision-preview",
31
- "prompt": prompt,
32
- "n": 1,
33
- "temperature": 0.5,
34
- "top_p": 1,
35
- "frequency_penalty": 0,
36
- "presence_penalty": 0,
37
- "stop": ["\n"],
38
- "image": {
39
- "data": image_base64,
40
- "mime_type": "image/png"
41
- }
 
 
 
 
 
 
 
 
42
  }
43
 
44
- # URL для запроса к модели gpt-4-vision-preview
45
- url = 'https://api.openai.com/v1/completions'
 
 
 
 
46
 
47
- # Отправляем запрос к OpenAI
48
- response = requests.post(url, headers=headers, json=data)
49
-
50
- # Проверяем ответ и возвращаем результат
51
  if response.status_code == 200:
52
- response_data = response.json()
53
- return response_data['choices'][0]['text'].strip()
 
 
 
 
 
 
 
 
54
  else:
55
- return f"Error: {response.status_code} - {response.text}"
56
-
57
- # Создаем интерфейс с помощью Gradio
58
- with gr.Blocks() as demo:
59
- with gr.Row():
60
- with gr.Column():
61
- image_input = gr.Image(label="Загрузите изображение", type="numpy")
62
- text_input = gr.Textbox(label="Введите текст")
63
- submit_button = gr.Button("Решить")
64
- with gr.Column():
65
- output_text = gr.Textbox(label="Ответ", interactive=True, lines=10)
66
- output_markdown = gr.Textbox(label="Ответ в Markdown", interactive=True, lines=10, visible=False)
67
-
68
- # Функция для обновления Markdown поля при получении ответа
69
- def update_markdown(answer):
70
- output_markdown.update(f"```\n{answer}\n```")
71
- output_markdown.change_visibility(True)
72
-
73
- # Привязываем функции к кнопке
74
- submit_button.click(fn=generate_text, inputs=[image_input, text_input], outputs=[output_text])
75
- output_text.change(fn=update_markdown, inputs=[output_text], outputs=[output_markdown])
76
-
77
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import the necessary libraries
2
  import gradio as gr
3
+ import openai
 
4
  import base64
 
 
5
  import io
6
+ import requests
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Function to encode the image to base64
10
+ def encode_image_to_base64(image):
11
+ buffered = io.BytesIO()
12
+ image.save(buffered, format="JPEG")
13
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
14
+ return img_str
15
+
16
+
17
+ # Function to send the image to the OpenAI API and get a response
18
+ def ask_openai_with_image(instruction, json_prompt, low_quality_mode, image):
19
+ # Set the OpenAI API key
20
+ openai.api_key = os.getenv("API_KEY")
21
+
22
+ # Encode the uploaded image to base64
23
+ base64_image = encode_image_to_base64(image)
24
+
25
+ instruction = instruction.strip()
26
+
27
+ if json_prompt.strip() != "":
28
+ instruction = f"{instruction}\n\nReturn in JSON format and include the following attributes:\n\n{json_prompt.strip()}"
29
+
30
+ # Create the payload with the base64 encoded image
31
+ payload = {
32
  "model": "gpt-4-vision-preview",
33
+ "messages": [
34
+ {
35
+ "role": "user",
36
+ "content": [
37
+ {
38
+ "type": "text",
39
+ "text": instruction,
40
+ },
41
+ {
42
+ "type": "image_url",
43
+ "image_url": {
44
+ "url": f"data:image/jpeg;base64,{base64_image}",
45
+ "detail": "low" if low_quality_mode else "high",
46
+ },
47
+ },
48
+ ],
49
+ }
50
+ ],
51
+ "max_tokens": 4095,
52
  }
53
 
54
+ # Send the request to the OpenAI API
55
+ response = requests.post(
56
+ "https://api.openai.com/v1/chat/completions",
57
+ headers={"Authorization": f"Bearer {openai.api_key}"},
58
+ json=payload,
59
+ )
60
 
61
+ # Check if the request was successful
 
 
 
62
  if response.status_code == 200:
63
+ response_json = response.json()
64
+ print("Response JSON:", response_json) # Print the raw response JSON
65
+ try:
66
+ # Attempt to extract the content text
67
+ return response_json["choices"][0]["message"]["content"]
68
+ except Exception as e:
69
+ # If there is an error in the JSON structure, print it
70
+ print("Error in JSON structure:", e)
71
+ print("Full JSON response:", response_json)
72
+ return "Error processing the image response."
73
  else:
74
+ # If an error occurred, return the error message
75
+ return f"Error: {response.text}"
76
+
77
+
78
+ json_schema = gr.Textbox(
79
+ label="JSON Attributes",
80
+ info="Define a list of attributes to force the model to respond in valid json format. Leave blank to disable json formatting.",
81
+ lines=3,
82
+ placeholder="""Example:
83
+ - name: Name of the object
84
+ - color: Color of the object
85
+ """,
86
+ )
87
+
88
+ instructions = gr.Textbox(
89
+ label="Instructions",
90
+ info="Instructions for the vision model to follow. Leave blank to use default.",
91
+ lines=2,
92
+ placeholder="""Default:
93
+ I've uploaded an image and I'd like to know what it depicts and any interesting details you can provide.""",
94
+ )
95
+
96
+ low_quality_mode = gr.Checkbox(
97
+ label="Low Quality Mode",
98
+ info="See here: https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding.",
99
+ )
100
+
101
+ # Create a Gradio interface
102
+ vision_playground = gr.Interface(
103
+ fn=ask_openai_with_image,
104
+ inputs=[
105
+ instructions,
106
+ json_schema,
107
+ low_quality_mode,
108
+ gr.Image(type="pil", label="Image"),
109
+ ],
110
+ outputs=[gr.Markdown()],
111
+ title="GPT-4-Vision Playground",
112
+ description="Upload an image and get a description from GPT-4 with Vision.",
113
+ )
114
+
115
+ # Launch the app
116
+ vision_playground.launch()