Spaces:

5m4ck3r
/

SelectByText

Sleeping

App Files Files Community

SelectByText / app.py

5m4ck3r

Update app.py

72a2ac2 verified 8 months ago

raw

history blame contribute delete

4.31 kB

	import os
	from transformers import pipeline
	import gradio
	import base64
	from PIL import Image, ImageDraw
	from io import BytesIO
	from sentence_transformers import SentenceTransformer, util
	import spaces

	backgroundPipe = pipeline("image-segmentation", model="facebook/maskformer-swin-large-coco")
	PersonPipe = pipeline("image-segmentation", model="mattmdjaga/segformer_b2_clothes")
	sentenceModal = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
	personDetailsPipe = pipeline("image-segmentation", model="yolo12138/segformer-b2-human-parse-24")
	faceModal = pipeline("image-segmentation", model="jonathandinu/face-parsing")
	faceDetectionModal = pipeline("object-detection", model="aditmohan96/detr-finetuned-face")
	PersonDetectionpipe = pipeline("object-detection", model="hustvl/yolos-tiny")

	def getPersonDetail(image):
	data = PersonDetectionpipe(image)
	persn = []
	for per in data:
	if per["label"].lower() == "person":
	persn.append(per["box"])
	n = 1
	ret = {}
	for cord in persn:
	crop_box = (cord['xmin'], cord['ymin'], cord['xmax'], cord['ymax'])
	cropped_image = image.crop(crop_box)
	personData = personDetailsPipe(cropped_image)
	for dt in personData:
	if len(persn) > 1:
	ret[(f'Person {n} {dt["label"]}').lower()] = cbiwm(image, dt["mask"], cord)
	else:
	ret[dt["label"].lower()] = cbiwm(image, dt["mask"], cord)
	n = n + 1
	return ret

	def cbiwm(image, mask, coordinates):
	black_image = Image.new("RGBA", image.size, (0, 0, 0, 255))
	black_image.paste(mask, (coordinates['xmin'], coordinates['ymin']), mask)
	return black_image

	def processFaceDetails(image):
	ret = getPersonDetail(image)
	data = faceDetectionModal(image)
	if len(data) > 1:
	cordinates = data[1]["box"]
	crop_box = (data[1]["box"]['xmin'], data[1]["box"]['ymin'], data[1]["box"]['xmax'], data[1]["box"]['ymax'])
	elif len(data) > 0:
	cordinates = data[0]["box"]
	crop_box = (data[0]["box"]['xmin'], data[0]["box"]['ymin'], data[0]["box"]['xmax'], data[0]["box"]['ymax'])
	else:
	return ret
	cropped_image = image.crop(crop_box)
	facedata = faceModal(cropped_image)
	for imask in facedata:
	ret[imask["label"].replace(".png", "").lower()] = cbiwm(image, imask["mask"], cordinates)
	return ret

	def getImageDetails(image) -> dict:
	ret = processFaceDetails(image)
	person = PersonPipe(image)
	bg = backgroundPipe(image)
	for imask in bg:
	ret[imask["label"].lower()] = imask["mask"] # Apply base64 image converter here if needed
	for mask in person:
	ret[mask["label"].lower()] = mask["mask"] # Apply base64 image converter here if needed
	return ret

	def processSentence(sentence: str, semilist: list):
	query_embedding = sentenceModal.encode(sentence)
	passage_embedding = sentenceModal.encode(semilist)
	listv = util.dot_score(query_embedding, passage_embedding)[0]
	float_list = []
	for i in listv:
	float_list.append(i)
	max_value = max(float_list)
	max_index = float_list.index(max_value)
	return semilist[max_index]

	def process_image(image):
	rgba_image = image.convert("RGBA")
	switched_data = [
	(255, 255, 255, pixel[3]) if pixel[:3] == (0, 0, 0) else (0, 0, 0, pixel[3]) if pixel[:3] == (255, 255, 255) else pixel
	for pixel in rgba_image.getdata()
	]
	switched_image = Image.new("RGBA", rgba_image.size)
	switched_image.putdata(switched_data)
	final_data = [
	(0, 0, 0, 0) if pixel[:3] == (255, 255, 255) else pixel
	for pixel in switched_image.getdata()
	]
	processed_image = Image.new("RGBA", rgba_image.size)
	processed_image.putdata(final_data)
	return processed_image

	@spaces.GPU()
	def processAndGetMask(image: str, text: str):
	datas = getImageDetails(image)
	labs = list(datas.keys())
	selector = processSentence(text, labs)
	imageout = datas[selector]
	print(f"Selected : {selector} Among : {labs}")
	return process_image(imageout)

	gr = gradio.Interface(
	processAndGetMask,
	[gradio.Image(label="Input Image", type="pil"), gradio.Text(label="Input text to segment")],
	gradio.Image(label="Output Image", type="pil")
	)
	gr.launch(share=True)