Spaces:
Running
Running
import gradio as gr | |
import cv2 | |
import numpy as np | |
import pytesseract | |
import groq | |
import os | |
import tempfile | |
from gtts import gTTS # Google Text-to-Speech | |
from PIL import Image | |
from dotenv import load_dotenv | |
# Initialize Groq Client | |
load_dotenv() # Load environment variables from .env file | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
groq_client = groq.Client(api_key=GROQ_API_KEY) | |
# Set Tesseract OCR path (Update if needed) | |
if not os.path.exists("/usr/bin/tesseract"): | |
os.system("apt-get update && apt-get install -y tesseract-ocr") | |
# Now set the correct Tesseract path | |
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract" | |
# Global storage for chat history & last floorplan analysis | |
chat_history = [] | |
last_floorplan_context = {"features": "No floorplan uploaded yet.", "text": "No text detected."} | |
latest_audio_file = None # Stores the latest AI voice response | |
### π **Floorplan Feature Detection** | |
def detect_floorplan_features(image): | |
"""Detect walls, rooms, and extract text using OpenCV & OCR.""" | |
# Convert PIL Image to OpenCV format | |
image_cv = np.array(image.convert("RGB")) | |
gray = cv2.cvtColor(image_cv, cv2.COLOR_RGB2GRAY) | |
# Apply edge detection | |
edges = cv2.Canny(gray, 50, 150) | |
# Detect walls using Hough Transform | |
lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=120, minLineLength=80, maxLineGap=10) | |
wall_count = len(lines) if lines is not None else 0 # Ensure it is always defined | |
# Detect rooms using contours | |
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
room_count = sum(1 for c in contours if cv2.contourArea(c) > 1000) # Ensure it is always defined | |
# Extract text labels using OCR | |
extracted_text = pytesseract.image_to_string(gray, config='--psm 6').strip() | |
detected_features = f"Walls: {wall_count}, Rooms: {room_count}" | |
return detected_features, extracted_text, edges, wall_count, room_count | |
### π **Analyze Floorplan (AI Processing)** | |
def analyze_floorplan(image): | |
"""Analyze the floorplan, update chat history, and generate AI response.""" | |
global last_floorplan_context, chat_history, latest_audio_file | |
# Call feature detection function | |
detected_features, extracted_text, edges, wall_count, room_count = detect_floorplan_features(image) | |
# Convert edges to a PIL Image | |
edges_pil = Image.fromarray(cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)) | |
# Store context for chat | |
last_floorplan_context = { | |
"features": detected_features, | |
"text": extracted_text if extracted_text else "No text detected" | |
} | |
# AI Architectural Analysis Prompt (Improved) | |
prompt = f""" | |
π **Architectural Floorplan Analysis** | |
**Detected Structural Features** | |
- **Rooms Identified:** {room_count} | |
- **Walls Detected:** {wall_count} (Load-bearing & Partition) | |
- **Extracted Text:** {extracted_text if extracted_text else "No text found"} | |
**Design & Efficiency Insights** | |
- **Traffic Flow & Accessibility**: Are room transitions efficient? | |
- **Lighting & Ventilation**: Where can natural light be optimized? | |
- **Space Utilization**: Are there underutilized or congested areas? | |
- **Expansion Feasibility**: Which walls could be removed for better space efficiency? | |
- **Modernization Potential**: Can smart home features be integrated? | |
Provide **a structured, expert-level architectural assessment**. | |
""" | |
response = groq_client.chat.completions.create( | |
model="llama3-70b-8192", | |
messages=[{"role": "system", "content": prompt}], | |
max_tokens=500 | |
) | |
ai_reply = response.choices[0].message.content | |
# Update chat history (New Format) | |
chat_history.clear() | |
chat_history.append({"role": "assistant", "content": ai_reply}) # New format | |
# Generate AI Voice | |
latest_audio_file = text_to_speech(ai_reply) | |
return edges_pil, chat_history, latest_audio_file | |
### π **Text-to-Speech AI Response** | |
def text_to_speech(text): | |
"""Convert AI response to speech using gTTS and return an audio file path.""" | |
temp_dir = tempfile.gettempdir() # Get system temp folder | |
file_path = os.path.join(temp_dir, "speech_output.mp3") # Fixed filename | |
# Remove old file if exists | |
if os.path.exists(file_path): | |
os.remove(file_path) | |
# Generate speech | |
tts = gTTS(text=text, lang="en") | |
tts.save(file_path) | |
return file_path | |
### π¬ **AI Chatbot for Floorplan Analysis** | |
def chat_with_ai(user_input): | |
"""Handles user chat with AI.""" | |
global latest_audio_file | |
floorplan_context = f"**Floorplan Summary**: {last_floorplan_context['features']}" if last_floorplan_context["features"] != "No floorplan uploaded yet." else "" | |
updated_input = f""" | |
User Question: {user_input} | |
{floorplan_context} | |
Provide an **expert-level architectural response** considering previous chat history. | |
""" | |
# AI Response with Context | |
response = groq_client.chat.completions.create( | |
model="llama3-70b-8192", | |
messages=[{"role": "user", "content": updated_input}], | |
max_tokens=500 | |
) | |
ai_reply = response.choices[0].message.content | |
# Append to chat history (New Format) | |
chat_history.append({"role": "user", "content": user_input}) | |
chat_history.append({"role": "assistant", "content": ai_reply}) | |
# Generate voice response for latest AI reply | |
latest_audio_file = text_to_speech(ai_reply) | |
return chat_history, latest_audio_file # Return updated chat & voice | |
### **π₯ Gradio UI - Light Themed & Modernized** | |
with gr.Blocks() as demo: | |
gr.Markdown("# π Virtual Architect - AI Floorplan Chat (Voice Enabled)") | |
with gr.Row(): | |
with gr.Column(): | |
image_input = gr.Image(type="pil", label="Upload Floorplan") | |
analyze_btn = gr.Button("Analyze Floorplan") | |
with gr.Column(): | |
result_image = gr.Image(type="pil", label="Edge Detection Output") | |
chatbox = gr.Chatbot(label="AI Chat (Starts with Floorplan Analysis)", type="messages") | |
# **SINGLE AI VOICE RESPONSE COMPONENT** | |
ai_voice_response = gr.Audio(label="AI Voice Response") | |
# AI recommendation appears in chatbox (Single Voice Response) | |
analyze_btn.click(analyze_floorplan, inputs=image_input, outputs=[result_image, chatbox, ai_voice_response]) | |
# Text input directly under chatbox | |
user_input = gr.Textbox(label="Ask a Question Here") | |
send_btn = gr.Button("Send") | |
send_btn.click(chat_with_ai, inputs=user_input, outputs=[chatbox, ai_voice_response]) | |
# Launch Gradio App (Fixes Theme Issue) | |
demo.launch() | |