Spaces:

abraarsyed
/

vocalprint-ai

Sleeping

App Files Files Community

vocalprint-ai / web /app.py

abraarsyed

Initial Commit

39ec667 about 1 month ago

raw

history blame contribute delete

2.32 kB

	# web/app.py
	# Gradio-based web UI for VocalPrint AI (Refactored to use shared CLI logic)

	import gradio as gr
	import os
	import tempfile
	import whisper
	import torch
	import json
	import sys

	# Ensure parent directory is in path
	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

	from core.processor import (
	download_video,
	extract_audio,
	transcribe,
	classify_accent,
	compute_fluency
	)

	# Load Whisper model once
	whisper_model = whisper.load_model("small")

	def process_video(url):
	try:
	temp_dir = tempfile.mkdtemp()
	video_path = os.path.join(temp_dir, "video.mp4")
	audio_path = os.path.join(temp_dir, "audio.wav")

	download_video(url, temp_dir)
	video_file = next((f for f in os.listdir(temp_dir) if f.endswith(".mp4")), None)
	if not video_file:
	raise FileNotFoundError("No .mp4 file found")

	extract_audio(os.path.join(temp_dir, video_file), audio_path)
	transcript, segments, language = transcribe(audio_path, whisper_model)
	top_accent, confidence, top3 = classify_accent(audio_path)
	fluency = compute_fluency(segments)

	# Format the top3 for the dataframe display
	top3_formatted = [[item["accent"], f"{item['confidence']}%"] for item in top3]

	return (
	top_accent,
	f"{confidence}%",
	fluency,
	language,
	transcript[:500],
	top3_formatted
	)
	except Exception as e:
	return ("Error", "-", "-", "-", str(e), [])

	iface = gr.Interface(
	fn=process_video,
	inputs=gr.Textbox(label="Public Video URL (YouTube, Loom, MP4)", placeholder="https://..."),
	outputs=[
	gr.Textbox(label="Detected Accent"),
	gr.Textbox(label="Confidence (%)"),
	gr.Textbox(label="Fluency Score (0–100)"),
	gr.Textbox(label="Language Detected by Whisper"),
	gr.Textbox(label="Transcript Sample (first 500 chars)"),
	gr.Dataframe(headers=["Accent", "Confidence"], label="Top 3 Accent Predictions")
	],
	title="VocalPrint AI",
	description="Analyze English speech from a public video link to detect accent, fluency, and transcription.",
	allow_flagging="never",
	theme="default"
	)

	if __name__ == "__main__":
	iface.launch()