File size: 2,483 Bytes
fe15a96
 
5fd680d
 
71f1200
9160ce9
 
5fd680d
 
 
 
71f1200
 
 
5fd680d
71f1200
5fd680d
 
71f1200
5fd680d
71f1200
5fd680d
 
325ea2b
6c5b35d
5fd680d
 
 
a7a9bdc
5fd680d
77d1bb4
171063f
f0c2761
ef54300
90b9489
 
 
be0290f
 
77d1bb4
e6d2f53
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import gradio as gr
from PIL import Image
import requests
import base64
import io
import os
os.system("pip install openai")
import openai
openai.api_key = ""
openai.api_base = "https://api.deepinfra.com/v1/openai"
def todataimage(file, ext):
    buffered = io.BytesIO()
    file.save(buffered, format=ext)
    return "data:image/png;base64,"+base64.b64encode(buffered.getvalue()).decode("utf-8")
def caption(file, ext):
  datimg = todataimage(file, ext)
  response = requests.post("https://russellc-comparing-captioning-models.hf.space/run/predict", json={
    "data": [
      datimg,
  ]}).json()
  print(response)
  data = response["data"]
  chat_completion = openai.ChatCompletion.create(
    model="jondurbin/airoboros-l2-70b-gpt4-1.4.1",
    messages=[{"role": "system", "content": "you will be given descriptions of one image from a varity of image captioning models with a varity of quality, what you need to do is combine them into one image caption and make that be your output, no extras words like \"here is your output\", remeber, don't take too much information from low quality, or too little from high. do NOT contain ANY text other than the description. Remember MERGE them and add details, and do NOT copy from any of the given descriptions, also, use prior knowledge to infer details."},{"role":"user", "content":"High Quality:\n"+data[1]+"\n"+data[3]+"\nMedium Quality:\n"+data[2]+"\nLow Quality\n"+data[0]}],
)

  return chat_completion.choices[0].message.content
def image_predict(image):
    return caption(image, "png")
examples = [["penguin.png"], ["stop.png"], ["stick.png"]]
title = "Generating Captions For Images Using ImCap v1"
description = """ImCap v1 uses a varity of image caption generators, then plugs it into LLaMA 2 to get a final caption.<br/>
<a href="https://huggingface.co/spaces/SFP/ImCap_v1?duplicate=true"
                style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
            <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;"
                src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
if there is a queue (free CPU works fine as it uses apis).
<br/>To use the api, go [here](https://sfp-imcapapi.hf.space/--replicas/cnk2m/) and click Use Via API to learn how to use the API"""
iface = gr.Interface(image_predict, inputs=gr.Image(type="pil"), outputs="label", flagging_options=[], title=title, examples=examples, description=description)
iface.launch(show_api=False)