Spaces:
Running
Running
import codecs | |
import io | |
import os | |
import pickle | |
from pathlib import Path | |
from PIL import Image | |
import requests | |
import os | |
import sys | |
sys.path.append(os.getcwd()) | |
from cllm.services.utils import get_bytes_value | |
from cllm.services.nlp.api import openai_chat_model | |
__ALL__ = [ | |
"object_detection", | |
"image_classification", | |
"ocr", | |
"image_to_text", | |
"segment_objects", | |
] | |
HOST = os.environ.get("CLLM_SERVICES_HOST", "localhost") | |
PORT = os.environ.get("CLLM_SERVICES_PORT", 10056) | |
def setup(host="localhost", port=10049): | |
global HOST, PORT | |
HOST = host | |
PORT = port | |
def object_detection(image, **kwargs): | |
host = kwargs.get("host", HOST) | |
port = kwargs.get("port", PORT) | |
url = f"http://{host}:{port}/object_detection" | |
files = {"image": (image, get_bytes_value(image))} | |
response = requests.post(url, files=files) | |
return response.json() | |
def image_classification(image, **kwargs): | |
host = kwargs.get("host", HOST) | |
port = kwargs.get("port", PORT) | |
url = f"http://{host}:{port}/image_classification" | |
files = {"image": (image, get_bytes_value(image))} | |
response = requests.post(url, files=files) | |
return response.json() | |
def image_to_text(image, **kwargs): | |
host = kwargs.get("host", HOST) | |
port = kwargs.get("port", PORT) | |
url = f"http://{host}:{port}/image_to_text" | |
files = {"image": (image, get_bytes_value(image))} | |
response = requests.post(url, files=files) | |
return response.json() | |
def ocr(image, **kwargs): | |
host = kwargs.get("host", HOST) | |
port = kwargs.get("port", PORT) | |
url = f"http://{host}:{port}/ocr" | |
files = {"image": (image, get_bytes_value(image))} | |
response = requests.post(url, files=files) | |
return response.json() | |
def segment_objects(image, **kwargs): | |
host = kwargs.get("host", HOST) | |
port = kwargs.get("port", PORT) | |
url = f"http://{host}:{port}/segment_objects" | |
files = {"image": (image, get_bytes_value(image))} | |
response = requests.post(url, files=files) | |
pickled = response.json()["data"] | |
output = pickle.loads(codecs.decode(pickled.encode(), "base64")) | |
for o in output: | |
stream = io.BytesIO() | |
o["mask"].save(stream, format="png") | |
stream.seek(0) | |
o["mask"] = stream.getvalue() | |
return output | |
def visual_grounding(image, query, **kwargs): | |
host = kwargs.get("host", HOST) | |
port = kwargs.get("port", PORT) | |
url = rf"http://{host}:{port}/visual_grounding" | |
human_msg = f"""Your task is to extract the prompt from input. Here is examples: | |
Input: | |
find the regin of interest in the da9619_image.png: \"An elephant in right corner\" | |
Answer: | |
An elephant in right corner | |
Input: | |
locate \"A maintenance vehicle on a railway\" in the image | |
Answer: | |
A maintenance vehicle on a railway | |
Input: | |
use visual grounding method to detect the regin of interest in the 1ba6e2_image.png: The motorcycle with the rainbow flag" | |
Answer: | |
The motorcycle with the rainbow flag | |
Input: | |
for given image, find A little baby girl with brunette hair, a pink and white dress, and is being fed frosting from her mom." | |
Answer: | |
A little baby girl with brunette hair, a pink and white dress, and is being fed frosting from her mom | |
Input: | |
find the policeman on the motorcycle in the 851522_image.png" | |
Answer: | |
the policeman on the motorcycle | |
Input: | |
The legs of a zebra shown under the neck of another zebra. | |
Answer: | |
The legs of a zebra shown under the neck of another zebra. | |
Input: | |
{query} | |
Answer: | |
""" | |
extracted_prompt = openai_chat_model(human_msg) | |
files = {"image": get_bytes_value(image)} | |
data = {"query": extracted_prompt} | |
# image = Image.open(io.BytesIO(image)).convert("RGB") | |
response = requests.post(url, data=data, files=files) | |
return response.json() | |
def image_captioning(image, endpoint="llava", **kwargs): | |
host = kwargs.get("host", HOST) | |
port = kwargs.get("port", PORT) | |
url = f"http://{host}:{port}/{endpoint}" | |
data = None | |
if endpoint == "llava": | |
data = {"text": "Please describe the image in details."} | |
files = {"image": (image, get_bytes_value(image))} | |
response = requests.post(url, files=files, data=data) | |
return response.content.decode("utf-8") | |
def segment_all(image: str | Path, **kwargs): | |
host = kwargs.get("host", HOST) | |
port = kwargs.get("port", PORT) | |
url = f"http://{host}:{port}/segment_all" | |
files = {"image": (image, get_bytes_value(image))} | |
response = requests.post(url, files=files) | |
return response.content | |
def set_image(image: str | Path, **kwargs): | |
host = kwargs.get("host", HOST) | |
port = kwargs.get("port", PORT) | |
url = f"http://{host}:{port}/set_image" | |
files = {"image": (image, get_bytes_value(image))} | |
response = requests.post(url, files=files) | |
return response.content.decode() | |
def segment_by_mask(mask: str | Path, image_id: str, **kwargs): | |
host = kwargs.get("host", HOST) | |
port = kwargs.get("port", PORT) | |
url = f"http://{host}:{port}/segment_by_mask" | |
data = {"image_id": image_id} | |
files = {"mask": (mask, get_bytes_value(mask))} | |
response = requests.post(url, files=files, data=data) | |
return response.content | |
def segment_by_points(points: list | tuple | str, image_id: str, **kwargs): | |
host = kwargs.get("host", HOST) | |
port = kwargs.get("port", PORT) | |
url = f"http://{host}:{port}/segment_by_points" | |
data = {"points": points, "image_id": image_id} | |
response = requests.post(url, data=data) | |
return response.content | |
def seg_by_mask(image, prompt_mask, **kwargs): | |
image_id = set_image(image) | |
mask = segment_by_mask(mask=prompt_mask, image_id=image_id) | |
return mask | |
def seg_by_points(image, prompt_points, **kwargs): | |
image_id = set_image(image) | |
mask = segment_by_points(points=prompt_points, image_id=image_id) | |
return mask | |