air_draw / app.py
brogelio's picture
Edited Interface
4b1fa52
raw
history blame
8.68 kB
import cv2
import numpy as np
from PIL import Image
from PIL import ImageColor
import mediapipe as mp
import time
import gradio as gr
import glob
width_, height_ = 144, 96
drawing_flag = False
sleepy_time = time.time()
output_frames = []
def is_hex(hexq):
if hexq[0] == '#' and len(hexq) == 7 and hexq[1:7].isalnum():
return True
else:
return False
def hex2rgb(hex):
if is_hex(hex):
return ImageColor.getcolor(hex, "RGB")
else:
return (0, 0, 0)
def find_hands(brain, img):
if img is not None:
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # opencv image is in BGR form but mp is trained with RGB
results = brain.process(img_rgb) # process finds the hands and outputs classification and 21 landmarks for each hand
all_hands = [] # initializing array to hold the dictionary for the hands
h, w, _ = img.shape # get height and width of image for scaling
if results.multi_hand_landmarks:
for hand_type, hand_lms in zip(results.multi_handedness,
results.multi_hand_landmarks): # elegant solution for mp list object traversal
hand = {} # initializing dict for each hand
lm_list = [] # landmarks array for all 21 point of the hand
for lm in hand_lms.landmark:
px, py, pz = int(lm.x * w), int(lm.y * h), int(
lm.z * w) # scaling landmark points to image size for frame coordinates
lm_list.append([px, py, pz])
hand["lm_list"] = lm_list # add "lm_list" key for all landmark points of the hand
hand["type"] = hand_type.classification[0].label # adds the label (left/right) for the hand
all_hands.append(hand) # appends the dict
return all_hands
else:
return 0
def is_drawing(index, thumb): # proximity function with arbitrary threshold
npindex = np.array((index[0], index[1]))
npthumb = np.array((thumb[0], thumb[1]))
if np.linalg.norm(npindex - npthumb) < 20:
return True
else:
return False
def save(landmarks): # brute force finger orientation checking
if landmarks[8][1] < landmarks[6][1]:
if landmarks[12][1] < landmarks[10][1]:
if landmarks[16][1] < landmarks[14][1]:
if landmarks[20][1] < landmarks[18][1]:
return True
else:
return False
def clear(landmarks): # brute force finger orientation checking
if landmarks[4][1] < landmarks[3][1] < landmarks[2][1] < landmarks[8][1]:
return True
else:
return False
def show(video, dominant_hand, hex_color): # main
cam = cv2.VideoCapture(video) # get the video file from path
width = cam.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cam.get(cv2.CAP_PROP_FRAME_HEIGHT)
detector = mp.solutions.hands.Hands(min_detection_confidence=0.8) # initialize detector
# paper = np.zeros((width, height, 4), np.uint8)
paper = np.zeros((int(height), int(width), 3), dtype=np.uint8) # create blank page
paper.fill(255)
color = hex2rgb(hex_color)
past_holder = () # hold previous index coordinates
palette = cv2.imread('palette_small.jpg')
page_num = 0 # iterating for saving (not a viable function for gradio)
global sleepy_time # get sleep time for multiple gestures
while cam.isOpened():
x, rgb_image = cam.read()
rgb_image_f = cv2.flip(rgb_image, 1) # mirrored video
hands = find_hands(detector, rgb_image_f)
if x: # return flag for cv2
try: # for error handling
if hands:
hand1 = hands[0] if hands[0]["type"] == dominant_hand else hands[1]
lm_list1 = hand1["lm_list"] # List of 21 Landmarks
handedness = hand1["type"]
if handedness == dominant_hand:
idx_coords = lm_list1[8][0], lm_list1[8][1] # 0 is width (bigger)
# print(idx_coords)
cv2.circle(rgb_image_f, idx_coords, 5, color, cv2.FILLED)
### Discontinued function due to gradio limitations ###
# if idx_coords[1] < 72: # brute force but should be extremely marginally faster lol
# if idx_coords[0] < 71: # red
# color = (0, 0, 255)
# if 71 < idx_coords[0] < 142: # orange
# color = (0, 115, 255)
# if 142 < idx_coords[0] < 213: # yellow
# color = (0, 229, 255)
# if 213 < idx_coords[0] < 284: # green
# color = (0, 195, 88)
# if 284 < idx_coords[0] < 356: # blue
# color = (195, 85, 0)
# if 356 < idx_coords[0] < 427: # indigo
# color = (195, 0, 68)
# if 427 < idx_coords[0] < 498: # violet
# color = (195, 0, 143)
# if 498 < idx_coords[0] < 569: # black
# color = (0, 0, 0)
# if 569 < idx_coords[0]: # white / eraser
# color = (255, 255, 255)
if len(past_holder) and drawing_flag: # start drawing
cv2.line(paper, past_holder, idx_coords, color, 5)
cv2.line(rgb_image_f, past_holder, idx_coords, color, 5)
# paper[idx_coords[0]][idx_coords[1]][0] = 255
# paper[idx_coords[0]][idx_coords[1]][3] = 255
cv2.circle(rgb_image_f, idx_coords, 5, color, cv2.FILLED)
### Discontinued function due to gradio limitations ###
# if save(lm_list1) and time.time() - sleepy_time > 3: # save / output
# paper[0:height_, w - width_: w] = 255 # presenter eraser
# paper = cv2.cvtColor(paper, cv2.COLOR_BGR2RGB)
# im = Image.fromarray(paper)
# im.save("paper%s.png" % page_num)
# print("saved")
# sleepy_time = time.time()
# paper = cv2.cvtColor(paper, cv2.COLOR_RGB2BGR)
# page_num += 1
if clear(lm_list1) and time.time() - sleepy_time > 3: # reset paper
paper = np.zeros((height, width, 3), dtype=np.uint8)
paper.fill(255)
print("page cleared")
sleepy_time = time.time()
past_holder = idx_coords
if is_drawing(idx_coords, lm_list1[4]): # 4 is thumb
drawing_flag = True
else:
drawing_flag = False
except:
pass
finally:
if True:
presenter = cv2.resize(rgb_image_f, (width_, height_))
h, w, _ = rgb_image_f.shape
paper[0:height_, w - width_: w] = presenter
else:
break
paper = cv2.cvtColor(paper, cv2.COLOR_RGB2BGR)
im = Image.fromarray(paper)
output_frames.append(paper)
im.save("paper%s.png" % page_num)
page_num += 1
img_array = []
for filename in glob.glob('*.png'):
imggg = cv2.imread(filename)
img_array.append(imggg)
video_output = cv2.VideoWriter('any.webm', cv2.VideoWriter_fourcc(*'VP80'), 30, (640, 480))
for i in range(len(img_array)):
video_output.write(img_array[i])
video_output.release()
return 'any.webm'
title = 'Air Draw'
desc = 'A mediapipe hands wrapper for drawing in the air. Position your fingers in a "drawing" form to draw, and open up your hand to lift the "pen" from the "paper". Use a "thumbs up" gesture to clear the drawing paper.'
iface = gr.Interface(
fn=show,
inputs=[
gr.inputs.Video(source="webcam", label="Record yourself drawing in the air!"),
gr.inputs.Radio(['Right', 'Left'], label="Dominant Hand"),
gr.inputs.Textbox(placeholder="#355C7D", label="Hex Color")
],
outputs='video',
title=title,
description=desc)
iface.launch(share=True, enable_queue=True)