MultiMedTulu

Runtime error

App Files Files Community

not-lain commited on Nov 7, 2023

Commit

adc6d8b

1 Parent(s): 143c351

fix for image function

Browse files

Files changed (5) hide show

.gitattributes +0 -1
.gitignore +2 -0
app.py +14 -12
requirements.txt +1 -0
test.py +59 -38

.gitattributes CHANGED Viewed

@@ -1,4 +1,3 @@
-.env
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+ lain halloween.png

app.py CHANGED Viewed

@@ -19,6 +19,7 @@ import json
 import dotenv
 from transformers import AutoProcessor, SeamlessM4TModel
 import torchaudio
 dotenv.load_dotenv()
 client = Client("https://facebook-seamless-m4t.hf.space/--replicas/frq8b/")
@@ -76,19 +77,14 @@ def process_speech_using_model(sound):
     return text_out
-def convert_image_to_required_format(image):
-    """
-    convert image from numpy to base64
-    """
-    base64_image = base64.b64encode(image).decode('utf-8')
-    return base64_image
-def process_image_with_openai(image):
-    base64_image = convert_image_to_required_format(image)
     openai_api_key = os.getenv('OPENAI_API_KEY')
     # oai_org = os.getenv('OAI_ORG')
     headers = {
         "Content-Type": "application/json",
         "Authorization": f"Bearer {openai_api_key}"
@@ -118,7 +114,13 @@ def process_image_with_openai(image):
     response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
-    return str(response.json())
 def query_vectara(text):

 import dotenv
 from transformers import AutoProcessor, SeamlessM4TModel
 import torchaudio
+import PIL
 dotenv.load_dotenv()
 client = Client("https://facebook-seamless-m4t.hf.space/--replicas/frq8b/")
     return text_out
+def process_image(image) :
+    img_name = f"{np.random.randint(0, 100)}.jpg"
+    PIL.Image.fromarray(image.astype('uint8'), 'RGB').save(img_name)
+    image = open(img_name, "rb").read()
+    base64_image = base64_image = base64.b64encode(image).decode('utf-8')
     openai_api_key = os.getenv('OPENAI_API_KEY')
     # oai_org = os.getenv('OAI_ORG')
     headers = {
         "Content-Type": "application/json",
         "Authorization": f"Bearer {openai_api_key}"
     response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+    try :
+        out = response.json()
+        out = out["choices"][0]["message"]["content"]
+        return f"{out}"
+    except Exception as e :
+        return f"{e}"
 def query_vectara(text):

requirements.txt CHANGED Viewed

@@ -4,3 +4,4 @@ git+https://github.com/huggingface/transformers
 torchaudio==2.0.2
 sentencepiece
 python-dotenv

 torchaudio==2.0.2
 sentencepiece
 python-dotenv
+Pillow

test.py CHANGED Viewed

@@ -1,40 +1,61 @@
-# import gradio library
 import gradio as gr
 import numpy as np
-import base64
-import time
-import gradio_client as grc
-# client = grc.InterfaceDescriptionClient("https://gradio.app")
-# define a function that takes input and returns output
-client = grc.Client("facebook/seamless_m4t")
-print(client.view_api(all_endpoints=True))
-job = client.submit(
-        "S2TT",
-        "file",
-        None,
-        "sample_input.mp3",
-        "",
-        "French",
-        "English",
-        api_name="/run",
-    )
-while job.done() == False :
-    time.sleep(1)
-gr.Markdown(job.result())
-#     return out
-# # print(client.view_api(all_endpoints=True))
-# def convert_image_to_required_format(audio):
-#     sr, data = audio
-#     return str(type(sr))+str(type(data))
-# # create a gradio interface
-# iface = gr.Interface(
-#     fn=sound_to_text,
-#     inputs=[gr.Audio()],
-#     outputs=[gr.Markdown(label="Output Text")],
-#     )
-# iface.launch()

+import dotenv
+import base64
+import os
+import requests
 import gradio as gr
+import PIL
 import numpy as np
+dotenv.load_dotenv()
+def process_image(image) :
+    # img_name = f"{np.random.randint(0, 100)}.jpg"
+    img_name = f"{1}.jpg"
+    PIL.Image.fromarray(image.astype('uint8'), 'RGB').save(img_name)
+    image = open(img_name, "rb").read()
+    base64_image = base64_image = base64.b64encode(image).decode('utf-8')
+    openai_api_key = os.getenv('OPENAI_API_KEY')
+    # oai_org = os.getenv('OAI_ORG')
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {openai_api_key}"
+    }
+    payload = {
+        "model": "gpt-4-vision-preview",
+        "messages": [
+        {
+            "role": "user",
+            "content": [
+            {
+                "type": "text",
+                "text": "What's in this image?"
+            },
+            {
+                "type": "image_url",
+                "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_image}"
+                }
+            }
+            ]
+        }
+        ],
+        "max_tokens": 300
+    }
+    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+    try :
+        out = response.json()
+        out = out["choices"][0]["message"]["content"]
+        print("out : ", out)
+        print("type(out) : ", type(out))
+        return f"{out}"
+    except Exception as e :
+        return f"{e}"
+iface = gr.Interface(fn=process_image, inputs="image", outputs="text")
+iface.launch()