StudentHelpGpt

Runtime error

App Files Files Community

Rooni commited on Dec 11, 2023

Commit

1b96f25

1 Parent(s): e685399

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -66

app.py CHANGED Viewed

@@ -1,77 +1,116 @@
 import gradio as gr
-import requests
-import os
 import base64
-from PIL import Image
-import numpy as np
 import io
-# Функция для обработки изображения и текста и отправки запроса к OpenAI
-def generate_text(image, prompt):
-    # Конвертируем NumPy массив в объект Image
-    image_pil = Image.fromarray(image.astype('uint8'), 'RGB')
-    # Конвертируем изображение для отправки через API
-    image_bytes = io.BytesIO()
-    image_pil.save(image_bytes, format='PNG')
-    image_base64 = base64.b64encode(image_bytes.getvalue()).decode('utf-8')
-    # API ключ для OpenAI
-    api_key = os.getenv("API_KEY")
-    # Заголовки для запроса
-    headers = {
-        'Authorization': f'Bearer {api_key}',
-        'Content-Type': 'application/json',
-    }
-    # Данные для запроса
-    data = {
         "model": "gpt-4-vision-preview",
-        "prompt": prompt,
-        "n": 1,
-        "temperature": 0.5,
-        "top_p": 1,
-        "frequency_penalty": 0,
-        "presence_penalty": 0,
-        "stop": ["\n"],
-        "image": {
-            "data": image_base64,
-            "mime_type": "image/png"
-        }
     }
-    # URL для запроса к модели gpt-4-vision-preview
-    url = 'https://api.openai.com/v1/completions'
-    # Отправляем запрос к OpenAI
-    response = requests.post(url, headers=headers, json=data)
-    # Проверяем ответ и возвращаем результат
     if response.status_code == 200:
-        response_data = response.json()
-        return response_data['choices'][0]['text'].strip()
     else:
-        return f"Error: {response.status_code} - {response.text}"
-# Создаем интерфейс с помощью Gradio
-with gr.Blocks() as demo:
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(label="Загрузите изображение", type="numpy")
-            text_input = gr.Textbox(label="Введите текст")
-            submit_button = gr.Button("Решить")
-        with gr.Column():
-            output_text = gr.Textbox(label="Ответ", interactive=True, lines=10)
-            output_markdown = gr.Textbox(label="Ответ в Markdown", interactive=True, lines=10, visible=False)
-    # Функция для обновления Markdown поля при получении ответа
-    def update_markdown(answer):
-        output_markdown.update(f"```\n{answer}\n```")
-        output_markdown.change_visibility(True)
-    # Привязываем функции к кнопке
-    submit_button.click(fn=generate_text, inputs=[image_input, text_input], outputs=[output_text])
-    output_text.change(fn=update_markdown, inputs=[output_text], outputs=[output_markdown])
-demo.launch()

+# Import the necessary libraries
 import gradio as gr
+import openai
 import base64
 import io
+import requests
+# Function to encode the image to base64
+def encode_image_to_base64(image):
+    buffered = io.BytesIO()
+    image.save(buffered, format="JPEG")
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return img_str
+# Function to send the image to the OpenAI API and get a response
+def ask_openai_with_image(instruction, json_prompt, low_quality_mode, image):
+    # Set the OpenAI API key
+    openai.api_key = os.getenv("API_KEY")
+    # Encode the uploaded image to base64
+    base64_image = encode_image_to_base64(image)
+    instruction = instruction.strip()
+    if json_prompt.strip() != "":
+        instruction = f"{instruction}\n\nReturn in JSON format and include the following attributes:\n\n{json_prompt.strip()}"
+    # Create the payload with the base64 encoded image
+    payload = {
         "model": "gpt-4-vision-preview",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": instruction,
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}",
+                            "detail": "low" if low_quality_mode else "high",
+                        },
+                    },
+                ],
+            }
+        ],
+        "max_tokens": 4095,
     }
+    # Send the request to the OpenAI API
+    response = requests.post(
+        "https://api.openai.com/v1/chat/completions",
+        headers={"Authorization": f"Bearer {openai.api_key}"},
+        json=payload,
+    )
+    # Check if the request was successful
     if response.status_code == 200:
+        response_json = response.json()
+        print("Response JSON:", response_json)  # Print the raw response JSON
+        try:
+            # Attempt to extract the content text
+            return response_json["choices"][0]["message"]["content"]
+        except Exception as e:
+            # If there is an error in the JSON structure, print it
+            print("Error in JSON structure:", e)
+            print("Full JSON response:", response_json)
+            return "Error processing the image response."
     else:
+        # If an error occurred, return the error message
+        return f"Error: {response.text}"
+json_schema = gr.Textbox(
+    label="JSON Attributes",
+    info="Define a list of attributes to force the model to respond in valid json format. Leave blank to disable json formatting.",
+    lines=3,
+    placeholder="""Example:
+- name: Name of the object
+- color: Color of the object
+""",
+)
+instructions = gr.Textbox(
+    label="Instructions",
+    info="Instructions for the vision model to follow. Leave blank to use default.",
+    lines=2,
+    placeholder="""Default:
+I've uploaded an image and I'd like to know what it depicts and any interesting details you can provide.""",
+)
+low_quality_mode = gr.Checkbox(
+    label="Low Quality Mode",
+    info="See here: https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding.",
+)
+# Create a Gradio interface
+vision_playground = gr.Interface(
+    fn=ask_openai_with_image,
+    inputs=[
+        instructions,
+        json_schema,
+        low_quality_mode,
+        gr.Image(type="pil", label="Image"),
+    ],
+    outputs=[gr.Markdown()],
+    title="GPT-4-Vision Playground",
+    description="Upload an image and get a description from GPT-4 with Vision.",
+)
+# Launch the app
+vision_playground.launch()