Spaces:

Akjava
/

mediapipe-face-detect

Running

App Files Files Community

Akjava commited on Nov 9, 2024

Commit

0dda723

1 Parent(s): 6545c3e

init

Browse files

Files changed (12) hide show

.gitattributes +1 -0
.gitignore +1 -0
README.md +1 -1
app.py +130 -0
demo_footer.html +3 -0
demo_header.html +13 -0
demo_tools.html +10 -0
examples/00004200.jpg +0 -0
face_landmarker.task +3 -0
face_landmarker.task.txt +8 -0
mp_box.py +133 -0
requirements.txt +4 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.task filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__

README.md CHANGED Viewed

@@ -8,7 +8,7 @@ sdk_version: 5.5.0
 app_file: app.py
 pinned: false
 license: mit
-short_description: face detection with mediapipe
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: mit
+short_description: face detection with mediapipe-landmaker and output multi-size
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import spaces
+import gradio as gr
+import subprocess
+from PIL import Image
+import json
+import mp_box
+'''
+Face landmark detection based Face Detection.
+https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker
+from model card
+https://storage.googleapis.com/mediapipe-assets/MediaPipe%20BlazeFace%20Model%20Card%20(Short%20Range).pdf
+Licensed Apache License, Version 2.0
+Train with google's dataset(more detail see model card)
+Not Face Detector based
+https://ai.google.dev/edge/mediapipe/solutions/vision/face_detector
+Bacause this is part of getting-landmark program and need control face edge.
+So I don't know which one is better.never compare these.
+'''
+#@spaces.GPU(duration=120)
+def process_images(image,no_mesh_draw=False,square_shape=False,progress=gr.Progress(track_tqdm=True)):
+    progress(0, desc="Start Mediapipe")
+    boxes,mp_image,face_landmarker_result = mp_box.mediapipe_to_box(image)
+    if no_mesh_draw:
+         annotated_image = image
+    else:
+        annotated_image = mp_box.draw_landmarks_on_image(face_landmarker_result,image)
+    annotation_boxes = []
+    jsons ={
+    }
+    index = 1
+    print(boxes)
+    if square_shape:
+        xy_boxes = boxes[3:]
+    else:
+        xy_boxes = boxes[:3]
+    print(len(xy_boxes))
+    for box in xy_boxes:
+       label=f"type-{index}"
+       print(box)
+       print(mp_box.xywh_to_xyxy(box))
+       annotation_boxes.append([mp_box.xywh_to_xyxy(box),label])
+       index+=1
+       jsons[label] = boxes[index-1]
+       print(label)
+    #print(annotation_boxes)
+    formatted_json = json.dumps(jsons, indent=1)
+    #return image
+    return [annotated_image,annotation_boxes],formatted_json
+def read_file(file_path: str) -> str:
+    """read the text of target file
+    """
+    with open(file_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    return content
+css="""
+#col-left {
+    margin: 0 auto;
+    max-width: 640px;
+}
+#col-right {
+    margin: 0 auto;
+    max-width: 640px;
+}
+.grid-container {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  gap:10px
+}
+.image {
+  width: 128px;
+  height: 128px;
+  object-fit: cover;
+}
+.text {
+  font-size: 16px;
+}
+"""
+#css=css,
+with gr.Blocks(css=css, elem_id="demo-container") as demo:
+    with gr.Column():
+        gr.HTML(read_file("demo_header.html"))
+        gr.HTML(read_file("demo_tools.html"))
+    with gr.Row():
+                with gr.Column():
+                    image = gr.Image(height=800,sources=['upload','clipboard'],image_mode='RGB',elem_id="image_upload", type="pil", label="Upload")
+                    with gr.Row(elem_id="prompt-container",  equal_height=False):
+                        with gr.Row():
+                            btn = gr.Button("Face Detect", elem_id="run_button")
+                    with gr.Accordion(label="Advanced Settings", open=False):
+                        with gr.Row( equal_height=True):
+                            no_mesh_draw = gr.Checkbox(label="No Mesh Drawing")
+                            square_shape = gr.Checkbox(label="square shape")
+                with gr.Column():
+                    image_out = gr.AnnotatedImage(label="Output", elem_id="output-img")
+                    text_out = gr.TextArea(label="JSON-Output")
+    btn.click(fn=process_images, inputs=[image,no_mesh_draw], outputs =[image_out,text_out], api_name='infer')
+    gr.Examples(
+                examples =["examples/00004200.jpg"],
+                inputs=[image]
+    )
+    gr.HTML(read_file("demo_footer.html"))
+    if __name__ == "__main__":
+        demo.launch()

demo_footer.html ADDED Viewed

	@@ -0,0 +1,3 @@

+<div>
+    <P> Images are generated with <a href="https://huggingface.co/black-forest-labs/FLUX.1-schnell">FLUX.1-schnell</a> and licensed under <a href="http://www.apache.org/licenses/LICENSE-2.0">the Apache 2.0 License</a>
+</div>

demo_header.html ADDED Viewed

	@@ -0,0 +1,13 @@

+<div style="text-align: center;">
+    <h1>
+        Mediapipe Face Detect
+    </h1>
+    <div  class="grid-container">
+        <img src="https://akjava.github.io/AIDiagramChatWithVoice-FaceCharacter/webp/128/00191245_09_00002200.webp" alt="Flux.1-schnell-WebP3Frame-TalkingAnimation" class="image">
+        This Space use <a href="https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker">Mediapipe face landmark detection model</a> which licensed under <a href="http://www.apache.org/licenses/LICENSE-2.0">the Apache 2.0 License</a>
+        <p class="text">
+        </p>
+    </div>
+</div>

demo_tools.html ADDED Viewed

	@@ -0,0 +1,10 @@

+<div style="text-align: center;">
+    <p><a href="https://huggingface.co/spaces/Akjava/mediapipe-face-detect">Mediapipe Face detector</a></p>
+    <p><a href="https://huggingface.co/spaces/Akjava/WebPTalkHead">[WebP-3F-TH]</a>
+        <a href="https://huggingface.co/spaces/Akjava/flux1-schnell-img2img">[Flux1-Img2Img(GPU)]</a>
+        <a href="https://huggingface.co/spaces/Akjava/flux1-schnell-mask-inpaint">[Flux1-Inpaint(GPU)]</a>
+        <a href="https://huggingface.co/spaces/Akjava/OpenCVInpaintCPU">[OpenCV-Inapint]</a>
+        <a href="https://huggingface.co/spaces/Akjava/Simple-Whitebalance-Image">[Whitebalance]</a>
+        <a href="https://huggingface.co/spaces/Akjava/Simple-Mask-Paste-Image">[Paste Image]</a>
+        <a href=" https://huggingface.co/spaces/Akjava/WebP-Resize-Convert">[WebP Resize Convert]</a></p>
+</div>

examples/00004200.jpg ADDED Viewed

face_landmarker.task ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64184e229b263107bc2b804c6625db1341ff2bb731874b0bcc2fe6544e0bc9ff
+size 3758596

face_landmarker.task.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+Face landmark detection
+https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker
+model card page is
+https://storage.googleapis.com/mediapipe-assets/MediaPipe%20BlazeFace%20Model%20Card%20(Short%20Range).pdf
+license is Apache2.0
+https://www.apache.org/licenses/LICENSE-2.0.html

mp_box.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import mediapipe as mp
+from mediapipe.tasks import python
+from mediapipe.tasks.python import vision
+from mediapipe.framework.formats import landmark_pb2
+from mediapipe import solutions
+import numpy as np
+# for X,Y,W,H to x1,y1,x2,y2(Left-top,right-bottom style)
+def xywh_to_xyxy(box):
+  return [box[0],box[1],box[0]+box[2],box[1]+box[3]]
+def convert_to_box(face_landmarks_list,indices,w=1024,h=1024):
+  x1=w
+  y1=h
+  x2=0
+  y2=0
+  for index in indices:
+    x=min(w,max(0,(face_landmarks_list[0][index].x*w)))
+    y=min(h,max(0,(face_landmarks_list[0][index].y*h)))
+    if x<x1:
+      x1=x
+    if y<y1:
+      y1=y
+    if x>x2:
+      x2=x
+    if y>y2:
+      y2=y
+  return [int(x1),int(y1),int(x2-x1),int(y2-y1)]
+def box_to_square(bbox):
+  box=list(bbox)
+  if box[2]>box[3]:
+    diff = box[2]-box[3]
+    box[3]+=diff
+    box[1]-=diff/2
+  elif box[3]>box[2]:
+    diff = box[3]-box[2]
+    box[2]+=diff
+    box[0]-=diff/2
+  return box
+def face_landmark_result_to_box(face_landmarker_result,width=1024,height=1024):
+  face_landmarks_list = face_landmarker_result.face_landmarks
+  full_indices  = list(range(456))
+  MIDDLE_FOREHEAD = 151
+  BOTTOM_CHIN_EX = 152
+  BOTTOM_CHIN = 175
+  CHIN_TO_MIDDLE_FOREHEAD = [200,14,1,6,18,9]
+  MOUTH_BOTTOM = [202,200,422]
+  EYEBROW_CHEEK_LEFT_RIGHT = [46,226,50,1,280,446,276]
+  LEFT_HEAD_OUTER_EX = 251  #on side face almost same as full
+  LEFT_HEAD_OUTER = 301
+  LEFT_EYE_OUTER_EX = 356
+  LEFT_EYE_OUTER = 264
+  LEFT_MOUTH_OUTER_EX = 288
+  LEFT_MOUTH_OUTER = 288
+  LEFT_CHIN_OUTER = 435
+  RIGHT_HEAD_OUTER_EX = 21
+  RIGHT_HEAD_OUTER = 71
+  RIGHT_EYE_OUTER_EX = 127
+  RIGHT_EYE_OUTER = 34
+  RIGHT_MOUTH_OUTER_EX = 58
+  RIGHT_MOUTH_OUTER = 215
+  RIGHT_CHIN_OUTER = 150
+  # TODO naming line
+  min_indices=CHIN_TO_MIDDLE_FOREHEAD+EYEBROW_CHEEK_LEFT_RIGHT+MOUTH_BOTTOM
+  chin_to_brow_indices = [LEFT_CHIN_OUTER,LEFT_MOUTH_OUTER,LEFT_EYE_OUTER,LEFT_HEAD_OUTER,MIDDLE_FOREHEAD,RIGHT_HEAD_OUTER,RIGHT_EYE_OUTER,RIGHT_MOUTH_OUTER,RIGHT_CHIN_OUTER,BOTTOM_CHIN]+min_indices
+  box1 = convert_to_box(face_landmarks_list,min_indices,width,height)
+  box2 = convert_to_box(face_landmarks_list,chin_to_brow_indices,width,height)
+  box3 = convert_to_box(face_landmarks_list,full_indices,width,height)
+  #print(box)
+  return [box1,box2,box3,box_to_square(box1),box_to_square(box2),box_to_square(box3)]
+def draw_landmarks_on_image(detection_result,rgb_image):
+  face_landmarks_list = detection_result.face_landmarks
+  annotated_image = np.copy(rgb_image)
+  # Loop through the detected faces to visualize.
+  for idx in range(len(face_landmarks_list)):
+    face_landmarks = face_landmarks_list[idx]
+    # Draw the face landmarks.
+    face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
+    face_landmarks_proto.landmark.extend([
+      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
+    ])
+    solutions.drawing_utils.draw_landmarks(
+        image=annotated_image,
+        landmark_list=face_landmarks_proto,
+        connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
+        landmark_drawing_spec=None,
+        connection_drawing_spec=mp.solutions.drawing_styles
+        .get_default_face_mesh_tesselation_style())
+  return annotated_image
+def mediapipe_to_box(image_data,model_path="face_landmarker.task"):
+  BaseOptions = mp.tasks.BaseOptions
+  FaceLandmarker = mp.tasks.vision.FaceLandmarker
+  FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
+  VisionRunningMode = mp.tasks.vision.RunningMode
+  options = FaceLandmarkerOptions(
+      base_options=BaseOptions(model_asset_path=model_path),
+      running_mode=VisionRunningMode.IMAGE
+      ,min_face_detection_confidence=0, min_face_presence_confidence=0
+      )
+  with FaceLandmarker.create_from_options(options) as landmarker:
+    if isinstance(image_data,str):
+        mp_image = mp.Image.create_from_file(image_data)
+    else:
+        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.asarray(image_data))
+    face_landmarker_result = landmarker.detect(mp_image)
+    boxes = face_landmark_result_to_box(face_landmarker_result,mp_image.width,mp_image.height)
+    return boxes,mp_image,face_landmarker_result

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+numpy
+torch
+spaces
+mediapipe