nnibras commited on
Commit
6aab31e
·
verified ·
1 Parent(s): 352c81d

Upload 17 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assests/left.jpg filter=lfs diff=lfs merge=lfs -text
37
+ assests/left1.jpg filter=lfs diff=lfs merge=lfs -text
38
+ assests/right.jpg filter=lfs diff=lfs merge=lfs -text
39
+ assests/right1.jpg filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.gradio_interface import generate_anaglyph
2
+ import gradio as gr
3
+
4
+ iface = gr.Interface(
5
+ fn=generate_anaglyph,
6
+ inputs=[
7
+ gr.Image(type="filepath", label="Upload Person Image"),
8
+ gr.Image(type="filepath", label="Upload Left Stereoscopic Image"),
9
+ gr.Image(type="filepath", label="Upload Right Stereoscopic Image"),
10
+ gr.Dropdown(["", "close", "medium", "far"], label="Depth Level"),
11
+ ],
12
+ outputs=gr.Image(label="Anaglyph Image"),
13
+ )
14
+
15
+ if __name__ == "__main__":
16
+ iface.launch()
assests/left.jpg ADDED

Git LFS Details

  • SHA256: bf959cc46c651b823431a66501b1c557150a21796d3c00eba0b5f884d1bebaf0
  • Pointer size: 132 Bytes
  • Size of remote file: 1.93 MB
assests/left1.jpg ADDED

Git LFS Details

  • SHA256: aafa16b69538539592a2191d208c59679ddd5848badda828df10adb788c0f624
  • Pointer size: 132 Bytes
  • Size of remote file: 1.35 MB
assests/person.avif ADDED
assests/right.jpg ADDED

Git LFS Details

  • SHA256: 40f786f0f7b0cca6090a157f98a93465d90d292943d9b040458c86092c3674ea
  • Pointer size: 132 Bytes
  • Size of remote file: 2.32 MB
assests/right1.jpg ADDED

Git LFS Details

  • SHA256: 7c3f1a66eecddff916f1da155fa55091bd42151c3611442c7368aca27aad4e3c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.51 MB
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ opencv-python
4
+ Pillow
5
+ gradio
src/__pycache__/anaglyph_converter.cpython-312.pyc ADDED
Binary file (824 Bytes). View file
 
src/__pycache__/gradio_interface.cpython-312.pyc ADDED
Binary file (1.65 kB). View file
 
src/__pycache__/segmentation.cpython-312.pyc ADDED
Binary file (1.72 kB). View file
 
src/__pycache__/stereoscopic_insert.cpython-312.pyc ADDED
Binary file (2.36 kB). View file
 
src/anaglyph_converter.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+
4
+
5
+ def create_anaglyph(left_image, right_image):
6
+ # Ensure both images have the same dimensions
7
+ height, width = left_image.shape[:2]
8
+ right_image_resized = cv2.resize(right_image, (width, height))
9
+
10
+ # Extract the red channel from the left image and green-blue channels from the resized right image
11
+ left_red = left_image[:, :, 2] # Red channel from left image
12
+ right_green_blue = right_image_resized[
13
+ :, :, :2
14
+ ] # Green and blue channels from right image
15
+
16
+ # Combine channels into a single anaglyph image
17
+ anaglyph = np.zeros_like(left_image)
18
+ anaglyph[:, :, 2] = left_red # Red channel from left image
19
+ anaglyph[:, :, :2] = right_green_blue # Green and blue channels from right image
20
+
21
+ return anaglyph
src/gradio_interface.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.segmentation import segment_person
2
+ from src.stereoscopic_insert import insert_person
3
+ from src.anaglyph_converter import create_anaglyph
4
+ import cv2
5
+ from PIL import Image
6
+
7
+
8
+ def generate_anaglyph(
9
+ person_image_path, left_image_path, right_image_path, depth="medium"
10
+ ):
11
+ """
12
+ Generate an anaglyph 3D image by segmenting a person from an uploaded image,
13
+ inserting the segmented person into a stereoscopic pair, and converting the result
14
+ to an anaglyph format.
15
+
16
+ Parameters:
17
+ - person_image_path: file path to the uploaded person image.
18
+ - left_image_path: file path to the uploaded left stereoscopic image.
19
+ - right_image_path: file path to the uploaded right stereoscopic image.
20
+ - depth: depth level for the person in the 3D scene ("close", "medium", or "far").
21
+
22
+ Returns:
23
+ - Anaglyph PIL image ready for display.
24
+ """
25
+
26
+ # Segment the person from the uploaded image
27
+ person_image = segment_person(person_image_path)
28
+
29
+ # Save the segmented image temporarily for overlay purposes
30
+ person_image.save("temp_person.png")
31
+
32
+ # Insert the segmented person into the stereoscopic images
33
+ left_image, right_image = insert_person(
34
+ left_image_path, right_image_path, "temp_person.png", depth
35
+ )
36
+
37
+ # Create the final anaglyph image from the left and right images
38
+ anaglyph_image = create_anaglyph(left_image, right_image)
39
+ anaglyph_pil = Image.fromarray(cv2.cvtColor(anaglyph_image, cv2.COLOR_BGR2RGB))
40
+
41
+ return anaglyph_pil
src/segmentation.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torchvision import models, transforms
3
+ from PIL import Image
4
+ import numpy as np
5
+
6
+
7
+ def segment_person(image_path):
8
+ # Load the pre-trained DeepLabV3 model
9
+ model = models.segmentation.deeplabv3_resnet101(pretrained=True).eval()
10
+
11
+ # Load and preprocess the input image
12
+ input_image = Image.open(image_path).convert("RGB")
13
+ preprocess = transforms.Compose(
14
+ [
15
+ transforms.ToTensor(),
16
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
17
+ ]
18
+ )
19
+ input_tensor = preprocess(input_image).unsqueeze(0)
20
+
21
+ with torch.no_grad():
22
+ output = model(input_tensor)["out"][0]
23
+ mask = output.argmax(0).byte().numpy()
24
+
25
+ # Convert mask to an image with transparency
26
+ segmented_image = np.array(input_image)
27
+ segmented_image = np.dstack([segmented_image, mask * 255]) # Add alpha channel
28
+ return Image.fromarray(segmented_image)
src/stereoscopic_insert.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+
4
+
5
+ def insert_person(left_image_path, right_image_path, person_image_path, depth="medium"):
6
+ # Load left and right stereoscopic images
7
+ left_image = cv2.imread(left_image_path)
8
+ right_image = cv2.imread(right_image_path)
9
+
10
+ # Load the segmented person image with alpha channel (transparency)
11
+ person = cv2.imread(person_image_path, cv2.IMREAD_UNCHANGED)
12
+
13
+ # Define scaling and disparity values for each depth level
14
+ depth_settings = {
15
+ "close": {
16
+ "scale": 1.2,
17
+ "disparity": 15,
18
+ }, # Larger size and greater disparity for closer placement
19
+ "medium": {
20
+ "scale": 1.0,
21
+ "disparity": 10,
22
+ }, # Moderate size and disparity for medium placement
23
+ "far": {
24
+ "scale": 0.7,
25
+ "disparity": 5,
26
+ }, # Smaller size and lesser disparity for farther placement
27
+ }
28
+
29
+ # Retrieve scale and disparity based on depth level
30
+ scale_factor = depth_settings[depth]["scale"]
31
+ disparity = depth_settings[depth]["disparity"]
32
+
33
+ # Resize the person image based on the scale factor
34
+ person_h, person_w = person.shape[:2]
35
+ new_size = (int(person_w * scale_factor), int(person_h * scale_factor))
36
+ person_resized = cv2.resize(person, new_size, interpolation=cv2.INTER_AREA)
37
+
38
+ # Determine the positions for placing the person in left and right images
39
+ # (You may adjust these values to match specific surfaces or alignments in your background)
40
+ left_x, left_y = (
41
+ 50,
42
+ left_image.shape[0] - person_resized.shape[0] - 50,
43
+ ) # Place near bottom for realism
44
+ right_x = left_x + disparity # Horizontal offset for depth effect
45
+
46
+ # Overlay the person onto both left and right images with adjusted position
47
+ for img, x in zip((left_image, right_image), (left_x, right_x)):
48
+ # Ensure the person fits within the bounds of the image
49
+ y, x = max(0, left_y), max(0, x)
50
+ y_end = min(y + person_resized.shape[0], img.shape[0])
51
+ x_end = min(x + person_resized.shape[1], img.shape[1])
52
+
53
+ # Extract the region of interest (ROI)
54
+ roi = img[y:y_end, x:x_end]
55
+
56
+ # Apply alpha blending to combine person image with background
57
+ person_alpha = (
58
+ person_resized[: y_end - y, : x_end - x, 3] / 255.0
59
+ ) # Alpha channel mask
60
+ for c in range(3):
61
+ roi[:, :, c] = (1 - person_alpha) * roi[
62
+ :, :, c
63
+ ] + person_alpha * person_resized[: y_end - y, : x_end - x, c]
64
+
65
+ # Insert modified ROI back into the original image
66
+ img[y:y_end, x:x_end] = roi
67
+
68
+ # Return the modified left and right images
69
+ return left_image, right_image
utils/config.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Constants for depth levels
2
+ DISPARITY_LEVELS = {"close": 10, "medium": 5, "far": 2}
3
+
4
+ # Default positions for overlaying segmented images
5
+ DEFAULT_POSITION = (50, 100)
utils/image_processing.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+
3
+
4
+ def resize_image(image, width=None, height=None):
5
+ if width is None and height is None:
6
+ return image
7
+
8
+ (h, w) = image.shape[:2]
9
+ if width is None:
10
+ ratio = height / float(h)
11
+ width = int(w * ratio)
12
+ else:
13
+ ratio = width / float(w)
14
+ height = int(h * ratio)
15
+
16
+ return cv2.resize(image, (width, height))