File size: 3,390 Bytes
4d3e12e
7de6814
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad9ba05
c6365e4
7de6814
 
 
 
b0aebf0
7de6814
 
 
4726c2d
 
7de6814
4726c2d
7de6814
4726c2d
 
 
7de6814
4726c2d
 
7de6814
4726c2d
b0aebf0
7de6814
4726c2d
 
7de6814
4726c2d
 
7de6814
 
 
4726c2d
7de6814
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
from streamlit.components.v1 import html
import cv2
import numpy as np
from PIL import Image
import io
import time
from selenium import webdriver
from transformers import pipeline
import torch
import torch.nn.functional as F
from ollama import Client
import base64
from io import BytesIO
import yaml
import psutil
import threading
from streamlit_webrtc import WebRtcMode, webrtc_streamer
OPENCV_AVFOUNDATION_SKIP_AUTH=1

def main():
    st.title('SpatialSense')
    st.write('Github: https://github.com/kabir12345/SpatialSense')

    # Initialize the depth-estimation pipeline
    pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")

    # Streamlit-WebRTC component
    webrtc_ctx = webrtc_streamer(key="example", mode=WebRtcMode.SENDRECV)

    if webrtc_ctx.video_receiver:
        while True:
            frame = webrtc_ctx.video_receiver.get_frame(timeout=None)
            if frame is None:
                continue

            image = frame.to_ndarray(format="bgr24")
            pil_img = Image.fromarray(image)

            # Perform depth estimation
            depth_mask = apply_depth_estimation(pipe, pil_img)

            # Convert PIL Image to NumPy array for display in Streamlit
            depth_mask_np = np.array(depth_mask)

            # Display the processed image
            st.image(depth_mask_np, caption="Processed Depth Image", channels="BGR")


def apply_depth_estimation(pipe, pil_img):
    # Assume the rest of your depth estimation logic is defined here
    original_width, original_height = pil_img.size
    depth = pipe(pil_img)["depth"]
    depth_tensor = torch.from_numpy(np.array(depth)).unsqueeze(0).unsqueeze(0).float()
    depth_resized = F.interpolate(depth_tensor, size=(original_height, original_width), mode='bilinear', align_corners=False)[0, 0]

    depth_normalized = (depth_resized - depth_resized.min()) / (depth_resized.max() - depth_resized.min()) * 255.0
    depth_normalized_np = depth_normalized.byte().cpu().numpy()
    colored_depth = cv2.applyColorMap(depth_normalized_np, cv2.COLORMAP_INFERNO)
    colored_depth_rgb = cv2.cvtColor(colored_depth, cv2.COLOR_BGR2RGB)
    colored_depth_image = Image.fromarray(colored_depth_rgb)
    
    return colored_depth_image

def encode_image_to_base64(pil_img):
    buffered = BytesIO()
    pil_img.save(buffered, format="JPEG")  # You can change to "PNG" if you prefer
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

def handle_user_query(query, image_path, text_placeholder):
    if query:
        client = Client(host='http://localhost:11434')
        response = client.chat(model='llava:7b-v1.5-q2_K', messages=[
            {
                'role': 'user',
                'content': query,
                'images': [image_path]  # Pass the path to the temporary file
            },
        ])
        # Assuming response returns correctly, extract the response content if necessary
        response_content = str(response['message']['content'])  # Adjust based on how the response content is structured
        text_placeholder.text(response_content)

def update_cpu_usage():
    while True:
        cpu_usage = psutil.cpu_percent(interval=1)
        st.session_state.cpu_usage = f"CPU Usage: {cpu_usage}%"
        time.sleep(5)  # Update every 5 seconds, adjust as needed


if __name__ == "__main__":
    main()