VoiceToWrite / app.py
Seicas's picture
Update app.py
cd2255e verified
from fastapi import FastAPI, Depends, HTTPException, status, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
import gradio as gr
import os
import tempfile
import json
from typing import Optional
import spacy
from preprocessing import clean_audio
from asr import transcribe_file, MedicalASR
from diarization import diarize_segments
from privacy import MedicalPrivacyProcessor
from config import settings
# HuggingFace token'ını ayarla (kullanıcının kendi token'ını eklemesini sağla)
os.environ["HF_TOKEN"] = os.environ.get("HF_TOKEN", "")
os.environ["ENVIRONMENT"] = "production"
# Arayüz konfigürasyonu
THEME = gr.themes.Soft(
primary_hue="indigo",
secondary_hue="blue",
)
LOGO = "assets/pediatric_logo.png" # Logo ekleyin (opsiyonel)
# SpaCy modelini yükle
try:
nlp = spacy.load("tr_core_news_sm")
except OSError:
nlp = spacy.blank("tr")
# Gradio arayüzü
def create_gradio_app():
def transcribe_gr(audio_file, diarize=True, enhance_audio=True, anonymize=True):
try:
if audio_file is None:
return "Lütfen bir ses dosyası yükleyin."
asr_model = get_asr_model()
result = asr_model.transcribe(
audio_file,
speaker_diarization=diarize,
enhance_audio=enhance_audio
)
output = "📝 Transkripsiyon:\n\n"
if "diarization" in result:
for segment in result["diarization"]:
output += f"🗣️ {segment['speaker']} ({segment['start']:.1f}s - {segment['end']:.1f}s):\n"
output += f"{segment['text']}\n\n"
else:
output += result["text"]
if result.get("anonymized"):
output += "\n🔒 Kişisel veriler anonimleştirildi."
return output
except Exception as e:
return f"Bir hata oluştu: {str(e)}"
demo = gr.Interface(
fn=transcribe_gr,
inputs=[
gr.Audio(type="filepath", label="Ses Dosyası"),
gr.Checkbox(label="Konuşmacı Ayrımı", value=True),
gr.Checkbox(label="Ses İyileştirme", value=True),
gr.Checkbox(label="Kişisel Verileri Anonimleştir", value=True)
],
outputs=gr.Textbox(label="Transkripsiyon Sonucu", lines=10),
title="🏥 Tıbbi Konuşma Transkripsiyon Servisi",
description="Bu servis, doktor vizitelerindeki konuşmaları yazıya döker ve konuşmacıları ayırt eder.",
theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"),
allow_flagging="never"
)
return demo
# FastAPI uygulaması
app = FastAPI(title="Pediatrik ASR API")
# CORS ayarları
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ASR modeli için bağımlılık
def get_asr_model():
config = {
"language": "tr",
"model": settings.ASR_MODEL,
"domain": "medical"
}
return MedicalASR(config)
# API endpoint'i
@app.post("/api/v1/transcribe")
async def transcribe_audio_api(
file: UploadFile = File(...),
diarize: bool = True,
enhance_audio: bool = True,
anonymize: Optional[bool] = None,
asr_model: MedicalASR = Depends(get_asr_model)
):
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
temp_file.write(await file.read())
temp_file_path = temp_file.name
result = asr_model.transcribe(
temp_file_path,
speaker_diarization=diarize,
enhance_audio=enhance_audio
)
os.unlink(temp_file_path)
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# Gradio uygulamasını oluştur ve mount et
gradio_app = create_gradio_app()
app = gr.mount_gradio_app(app, gradio_app, path="/")
# Uygulamayı başlat
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)