sivakorn-su
		
	commited on
		
		
					Commit 
							
							·
						
						78dde53
	
1
								Parent(s):
							
							e6d32bd
								
feat: add voice diarization project
Browse files- Dockerfile +15 -0
- README.md +81 -4
- app.py +345 -0
- requirements.txt +18 -0
    	
        Dockerfile
    ADDED
    
    | @@ -0,0 +1,15 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
         | 
| 2 | 
            +
            # you will also find guides on how best to write your Dockerfile
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            FROM python:3.9
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            WORKDIR /app
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            COPY ./requirements.txt requirements.txt
         | 
| 9 | 
            +
            RUN pip install --no-cache-dir --upgrade -r requirements.txt \
         | 
| 10 | 
            +
                && pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            COPY . /app
         | 
| 13 | 
            +
            COPY .env.prod .env.prod
         | 
| 14 | 
            +
            ENV ENV=production
         | 
| 15 | 
            +
            CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8300"]
         | 
    	
        README.md
    CHANGED
    
    | @@ -1,10 +1,87 @@ | |
| 1 | 
             
            ---
         | 
| 2 | 
            -
            title:  | 
| 3 | 
            -
            emoji:  | 
| 4 | 
            -
            colorFrom:  | 
| 5 | 
            -
            colorTo:  | 
| 6 | 
             
            sdk: docker
         | 
| 7 | 
             
            pinned: false
         | 
|  | |
| 8 | 
             
            ---
         | 
| 9 |  | 
| 10 | 
             
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
             
            ---
         | 
| 2 | 
            +
            title: WhisperPyanoteLLM
         | 
| 3 | 
            +
            emoji: 📉
         | 
| 4 | 
            +
            colorFrom: indigo
         | 
| 5 | 
            +
            colorTo: green
         | 
| 6 | 
             
            sdk: docker
         | 
| 7 | 
             
            pinned: false
         | 
| 8 | 
            +
            license: apache-2.0
         | 
| 9 | 
             
            ---
         | 
| 10 |  | 
| 11 | 
             
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            # WhisperPyanoteLLM
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            A FastAPI-based app for speaker diarization and transcription using Whisper and PyAnnote, with LLM-powered summarization.
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            ## Features
         | 
| 18 | 
            +
            - Speaker diarization with pyannote.audio
         | 
| 19 | 
            +
            - Transcription with OpenAI Whisper
         | 
| 20 | 
            +
            - Summarization with Together LLM
         | 
| 21 | 
            +
            - REST API for video/audio upload and processing
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            ## Quick Start (Development)
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            1. **Clone the repository:**
         | 
| 26 | 
            +
               ```sh
         | 
| 27 | 
            +
               git clone <your-repo-url>
         | 
| 28 | 
            +
               cd WhisperPyanoteLLM
         | 
| 29 | 
            +
               ```
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            2. **Create a `.env` file:**
         | 
| 32 | 
            +
               ```env
         | 
| 33 | 
            +
               HF_TOKEN=your_huggingface_token
         | 
| 34 | 
            +
               TOGETHER_API_KEY=your_together_api_key
         | 
| 35 | 
            +
               NGROK_AUTH_TOKEN=your_ngrok_token
         | 
| 36 | 
            +
               ```
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            3. **Install dependencies:**
         | 
| 39 | 
            +
               ```sh
         | 
| 40 | 
            +
               pip install -r requirements.txt
         | 
| 41 | 
            +
               ```
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            4. **Run the app:**
         | 
| 44 | 
            +
               ```sh
         | 
| 45 | 
            +
               uvicorn app:app --reload --port 8300
         | 
| 46 | 
            +
               ```
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            5. **Access the API:**
         | 
| 49 | 
            +
               - Health check: [http://localhost:8300/health](http://localhost:8300/health)
         | 
| 50 | 
            +
               - Upload endpoint: `/upload_video/`
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            ---
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            ## Production (Docker)
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            1. **Create a `.env.prod` file:**
         | 
| 57 | 
            +
               ```env
         | 
| 58 | 
            +
               HF_TOKEN=your_huggingface_token
         | 
| 59 | 
            +
               TOGETHER_API_KEY=your_together_api_key
         | 
| 60 | 
            +
               NGROK_AUTH_TOKEN=your_ngrok_token
         | 
| 61 | 
            +
               ```
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            2. **Build the Docker image:**
         | 
| 64 | 
            +
               ```sh
         | 
| 65 | 
            +
               docker build -t whisperpyanote .
         | 
| 66 | 
            +
               ```
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            3. **Run the Docker container:**
         | 
| 69 | 
            +
               ```sh
         | 
| 70 | 
            +
               docker run --env-file .env.prod -p 8300:8300 whisperpyanote
         | 
| 71 | 
            +
               ```
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            4. **Access the API:**
         | 
| 74 | 
            +
               - Health check: [http://localhost:8300/health](http://localhost:8300/health)
         | 
| 75 | 
            +
               - Upload endpoint: `/upload_video/`
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            ---
         | 
| 78 | 
            +
             | 
| 79 | 
            +
            ## Notes
         | 
| 80 | 
            +
            - Make sure your `.env` and `.env.prod` files are **not** committed to version control.
         | 
| 81 | 
            +
            - For best performance, run on a machine with a CUDA-enabled GPU.
         | 
| 82 | 
            +
            - For more details, see the code and comments in `app.py`.
         | 
| 83 | 
            +
             | 
| 84 | 
            +
            ---
         | 
| 85 | 
            +
             | 
| 86 | 
            +
            ## License
         | 
| 87 | 
            +
            Apache-2.0
         | 
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,345 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            import shutil
         | 
| 3 | 
            +
            import time
         | 
| 4 | 
            +
            from collections import Counter
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            import torch
         | 
| 7 | 
            +
            import whisper
         | 
| 8 | 
            +
            from pyannote.audio import Pipeline
         | 
| 9 | 
            +
            from torch.serialization import add_safe_globals
         | 
| 10 | 
            +
            from omegaconf import ListConfig
         | 
| 11 | 
            +
            import nest_asyncio
         | 
| 12 | 
            +
            import uvicorn
         | 
| 13 | 
            +
            from fastapi import FastAPI, UploadFile, File
         | 
| 14 | 
            +
            from fastapi.middleware.cors import CORSMiddleware
         | 
| 15 | 
            +
            from fastapi.responses import JSONResponse
         | 
| 16 | 
            +
            from pyngrok import ngrok, conf
         | 
| 17 | 
            +
            from pydub import AudioSegment, effects
         | 
| 18 | 
            +
            import pandas as pd
         | 
| 19 | 
            +
            from moviepy.editor import VideoFileClip
         | 
| 20 | 
            +
            from together import Together
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            # Hugging Face Spaces injects secrets as environment variables automatically
         | 
| 23 | 
            +
            token = os.environ.get('HF_TOKEN')
         | 
| 24 | 
            +
            together_api_key = os.environ.get('TOGETHER_API_KEY')
         | 
| 25 | 
            +
            ngrok_auth_token = os.environ.get('NGROK_AUTH_TOKEN')
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            pipelines, models, others = [], [], []
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            n = torch.cuda.device_count()
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            if n == 0:
         | 
| 32 | 
            +
                device = "cpu"
         | 
| 33 | 
            +
                pipelines.append(Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=token).to(device))
         | 
| 34 | 
            +
                models.append(whisper.load_model("large").to(device))
         | 
| 35 | 
            +
            elif n == 1:
         | 
| 36 | 
            +
                device = "cuda:0"
         | 
| 37 | 
            +
                pipelines.append(Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=token).to(device))
         | 
| 38 | 
            +
                models.append(whisper.load_model("large").to(device))
         | 
| 39 | 
            +
            else:
         | 
| 40 | 
            +
                device_pyannote = torch.device("cuda:0")
         | 
| 41 | 
            +
                device_whisper = torch.device("cuda:1")
         | 
| 42 | 
            +
                pipeline = Pipeline.from_pretrained(
         | 
| 43 | 
            +
                    "pyannote/speaker-diarization-3.1",
         | 
| 44 | 
            +
                    use_auth_token=token
         | 
| 45 | 
            +
                )
         | 
| 46 | 
            +
                pipeline.to(device_pyannote)
         | 
| 47 | 
            +
                model = whisper.load_model("large").to(device_whisper)
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            nest_asyncio.apply()
         | 
| 50 | 
            +
            together = Together(api_key=together_api_key)
         | 
| 51 | 
            +
            conf.get_default().auth_token = ngrok_auth_token
         | 
| 52 | 
            +
             | 
| 53 | 
            +
            add_safe_globals({ListConfig})
         | 
| 54 | 
            +
             | 
| 55 | 
            +
            UPLOAD_FOLDER = "uploads"
         | 
| 56 | 
            +
            os.makedirs(UPLOAD_FOLDER, exist_ok=True)
         | 
| 57 | 
            +
             | 
| 58 | 
            +
            app = FastAPI()
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            origins = [
         | 
| 61 | 
            +
                "http://127.0.0.1:8000",
         | 
| 62 | 
            +
                "http://localhost:8000",
         | 
| 63 | 
            +
                "https://project-diarzation-production.up.railway.app"
         | 
| 64 | 
            +
            ]
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            app.add_middleware(
         | 
| 67 | 
            +
                CORSMiddleware,
         | 
| 68 | 
            +
                allow_origins=origins,
         | 
| 69 | 
            +
                allow_credentials=True,
         | 
| 70 | 
            +
                allow_methods=["*"],
         | 
| 71 | 
            +
                allow_headers=["*"],
         | 
| 72 | 
            +
            )
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            @app.on_event("startup")
         | 
| 75 | 
            +
            def on_startup():
         | 
| 76 | 
            +
                global pipeline, model, device
         | 
| 77 | 
            +
                pipeline, model, device = setup_models()
         | 
| 78 | 
            +
                # ... any other startup logic
         | 
| 79 | 
            +
             | 
| 80 | 
            +
            @app.get("/health")
         | 
| 81 | 
            +
            def health_check():
         | 
| 82 | 
            +
                return {
         | 
| 83 | 
            +
                    "status": "ok",
         | 
| 84 | 
            +
                    "model_loaded": model is not None,
         | 
| 85 | 
            +
                    "diarization_pipeline_loaded": pipeline is not None,
         | 
| 86 | 
            +
                    "device": device
         | 
| 87 | 
            +
                }
         | 
| 88 | 
            +
             | 
| 89 | 
            +
            @app.get("/")
         | 
| 90 | 
            +
            def check_api():
         | 
| 91 | 
            +
                return {"message": "API is up and running"}
         | 
| 92 | 
            +
             | 
| 93 | 
            +
            @app.get("/key")
         | 
| 94 | 
            +
            def check_env():
         | 
| 95 | 
            +
                return {
         | 
| 96 | 
            +
                    "env": os.environ.get("ENV", "dev"),
         | 
| 97 | 
            +
                    "openai_key_exists": bool(os.environ.get("OPENAI_API_KEY")),
         | 
| 98 | 
            +
                }
         | 
| 99 | 
            +
             | 
| 100 | 
            +
            def save_uploaded_file(file: UploadFile) -> str:
         | 
| 101 | 
            +
                os.makedirs(UPLOAD_FOLDER, exist_ok=True)
         | 
| 102 | 
            +
                filepath = os.path.join(UPLOAD_FOLDER, file.filename)
         | 
| 103 | 
            +
                with open(filepath, "wb") as f:
         | 
| 104 | 
            +
                    shutil.copyfileobj(file.file, f)
         | 
| 105 | 
            +
                return filepath
         | 
| 106 | 
            +
             | 
| 107 | 
            +
            def extract_and_normalize_audio(video_path: str) -> str:
         | 
| 108 | 
            +
                clip = VideoFileClip(video_path)
         | 
| 109 | 
            +
                audio_path = os.path.join(UPLOAD_FOLDER, "extracted_audio.wav")
         | 
| 110 | 
            +
                clip.audio.write_audiofile(audio_path)
         | 
| 111 | 
            +
             | 
| 112 | 
            +
                audio = AudioSegment.from_wav(audio_path)
         | 
| 113 | 
            +
                normalized_audio = effects.normalize(audio)
         | 
| 114 | 
            +
                cleaned_path = os.path.join(UPLOAD_FOLDER, "cleaned.wav")
         | 
| 115 | 
            +
                normalized_audio.export(cleaned_path, format="wav")
         | 
| 116 | 
            +
                return cleaned_path
         | 
| 117 | 
            +
             | 
| 118 | 
            +
            def diarize_audio(audio_path: str) -> pd.DataFrame:
         | 
| 119 | 
            +
                diarization = pipeline(audio_path)
         | 
| 120 | 
            +
                return pd.DataFrame([
         | 
| 121 | 
            +
                    {"start": round(turn.start, 3), "end": round(turn.end, 3), "speaker": speaker}
         | 
| 122 | 
            +
                    for turn, _, speaker in diarization.itertracks(yield_label=True)
         | 
| 123 | 
            +
                ])
         | 
| 124 | 
            +
             | 
| 125 | 
            +
            def split_segments(audio_path: str, df: pd.DataFrame) -> str:
         | 
| 126 | 
            +
                segment_folder = os.path.join(UPLOAD_FOLDER, "segments")
         | 
| 127 | 
            +
                if os.path.exists(segment_folder):
         | 
| 128 | 
            +
                    shutil.rmtree(segment_folder)
         | 
| 129 | 
            +
                os.makedirs(segment_folder, exist_ok=True)
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                audio = AudioSegment.from_file(audio_path)
         | 
| 132 | 
            +
                for i, row in df.iterrows():
         | 
| 133 | 
            +
                    start_ms = int(row['start'] * 1000)
         | 
| 134 | 
            +
                    end_ms = int(row['end'] * 1000)
         | 
| 135 | 
            +
                    segment = audio[start_ms:end_ms]
         | 
| 136 | 
            +
                    filename = f"segment_{i:03d}_{row['speaker']}.wav"
         | 
| 137 | 
            +
                    segment.export(os.path.join(segment_folder, filename), format="wav")
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                return segment_folder
         | 
| 140 | 
            +
             | 
| 141 | 
            +
            def transcribe_segments(segment_folder: str) -> pd.DataFrame:
         | 
| 142 | 
            +
                files = sorted(os.listdir(segment_folder))
         | 
| 143 | 
            +
                results = []
         | 
| 144 | 
            +
                for filename in files:
         | 
| 145 | 
            +
                    segment_path = os.path.join(segment_folder, filename)
         | 
| 146 | 
            +
                    res = model.transcribe(segment_path, language="th")
         | 
| 147 | 
            +
                    results.append({
         | 
| 148 | 
            +
                        "filename": filename,
         | 
| 149 | 
            +
                        "text": res["text"].strip()
         | 
| 150 | 
            +
                    })
         | 
| 151 | 
            +
                return pd.DataFrame(results)
         | 
| 152 | 
            +
             | 
| 153 | 
            +
            def clean_summary(text):
         | 
| 154 | 
            +
                import re
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                if not text or len(str(text).strip()) == 0:
         | 
| 157 | 
            +
                    return "ไม่มีข้อมูลสำคัญที่จะสรุป"
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                text = str(text)
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                # Patterns to remove (more comprehensive)
         | 
| 162 | 
            +
                patterns_to_remove = [
         | 
| 163 | 
            +
                    # Headers and labels
         | 
| 164 | 
            +
                    r'สรุป:\s*',
         | 
| 165 | 
            +
                    r'สรุปการประชุม:\s*',
         | 
| 166 | 
            +
                    r'บทสรุป:\s*',
         | 
| 167 | 
            +
                    r'ข้อสรุป:\s*',
         | 
| 168 | 
            +
                    r'\*\*Key Messages:\*\*|\*\*หัวข้อหลัก:\*\*',
         | 
| 169 | 
            +
                    r'\*\*Action Items:\*\*|\*\*ประเด็นสำคัญ:\*\*',
         | 
| 170 | 
            +
                    r'\*\*Summary:\*\*|\*\*สรุป:\*\*',
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                    # Bullet points and markers
         | 
| 173 | 
            +
                    r'^[-•]\s*Key Messages?:?\s*',
         | 
| 174 | 
            +
                    r'^[-•]\s*Action Items?:?\s*',
         | 
| 175 | 
            +
                    r'^[-•]\s*หัวข้อหลัก:?\s*',
         | 
| 176 | 
            +
                    r'^[-•]\s*ประเด็นสำคัญ:?\s*',
         | 
| 177 | 
            +
                    r'^[-•]\s*ข้อมูลน่าสนใจ:?\s*',
         | 
| 178 | 
            +
                    r'^[-•]\s*บทสรุป:?\s*',
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                    # Line breaks and formatting
         | 
| 181 | 
            +
                    r'\r\n|\r|\n',
         | 
| 182 | 
            +
                    r'\t+',
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                    # Disclaimers and notes
         | 
| 185 | 
            +
                    r'หมายเหตุ:.*?(?=\n|\r|$)',
         | 
| 186 | 
            +
                    r'เนื่องจาก.*?(?=\n|\r|$)',
         | 
| 187 | 
            +
                    r'ไม่มีข้อความ.*?(?=\n|\r|$)',
         | 
| 188 | 
            +
                    r'ไม่มีประเด็น.*?(?=\n|\r|$)',
         | 
| 189 | 
            +
                    r'ไม่มี Action Items.*?(?=\n|\r|$)',
         | 
| 190 | 
            +
                    r'ไม่มีรายการ.*?(?=\n|\r|$)',
         | 
| 191 | 
            +
                    r'ต้องการข้อมูลเพิ่มเติม.*?(?=\n|\r|$)',
         | 
| 192 | 
            +
                    r'ต้องขอความชัดเจนเพิ่มเติม.*?(?=\n|\r|$)',
         | 
| 193 | 
            +
             | 
| 194 | 
            +
                    # Meta comments
         | 
| 195 | 
            +
                    r'\(ตัดประโยคที่ไม่เกี่ยวข้องหรือซ้ำซ้อนออก.*?\)',
         | 
| 196 | 
            +
                    r'\(.*?เพื่อเน้นความชัดเจน.*?\)',
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                    # AI-generated phrases
         | 
| 199 | 
            +
                    r'ตามที่ได้กล่าวไว้.*?(?=\n|\r|$)',
         | 
| 200 | 
            +
                    r'จากข้อความที่ให้มา.*?(?=\n|\r|$)',
         | 
| 201 | 
            +
                    r'Based on the provided text.*?(?=\n|\r|$)',
         | 
| 202 | 
            +
                    r'According to the text.*?(?=\n|\r|$)',
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                    # Multiple spaces (keep at end)
         | 
| 205 | 
            +
                    r'\s+'
         | 
| 206 | 
            +
                ]
         | 
| 207 | 
            +
             | 
| 208 | 
            +
                cleaned_text = text
         | 
| 209 | 
            +
             | 
| 210 | 
            +
                # Apply cleaning patterns
         | 
| 211 | 
            +
                for pattern in patterns_to_remove:
         | 
| 212 | 
            +
                    if pattern == r'\s+':
         | 
| 213 | 
            +
                        # Replace multiple spaces with single space
         | 
| 214 | 
            +
                        cleaned_text = re.sub(pattern, ' ', cleaned_text)
         | 
| 215 | 
            +
                    else:
         | 
| 216 | 
            +
                        cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE | re.MULTILINE | re.DOTALL)
         | 
| 217 | 
            +
             | 
| 218 | 
            +
                # Remove markdown formatting but keep content
         | 
| 219 | 
            +
                cleaned_text = re.sub(r'\*\*(.*?)\*\*', r'\1', cleaned_text)  # Bold
         | 
| 220 | 
            +
                cleaned_text = re.sub(r'\*(.*?)\*', r'\1', cleaned_text)      # Italic
         | 
| 221 | 
            +
                cleaned_text = re.sub(r'_{2,}(.*?)_{2,}', r'\1', cleaned_text) # Underline
         | 
| 222 | 
            +
             | 
| 223 | 
            +
                # Remove excessive punctuation
         | 
| 224 | 
            +
                cleaned_text = re.sub(r'[.]{3,}', '...', cleaned_text)
         | 
| 225 | 
            +
                cleaned_text = re.sub(r'[!]{2,}', '!', cleaned_text)
         | 
| 226 | 
            +
                cleaned_text = re.sub(r'[?]{2,}', '?', cleaned_text)
         | 
| 227 | 
            +
             | 
| 228 | 
            +
                # Clean up bullet points and numbering
         | 
| 229 | 
            +
                cleaned_text = re.sub(r'^[-•*]\s*', '', cleaned_text, flags=re.MULTILINE)
         | 
| 230 | 
            +
                cleaned_text = re.sub(r'^\d+\.\s*', '', cleaned_text, flags=re.MULTILINE)
         | 
| 231 | 
            +
             | 
| 232 | 
            +
                # Useless phrases (more comprehensive)
         | 
| 233 | 
            +
                useless_phrases = [
         | 
| 234 | 
            +
                    'ไม่มี',
         | 
| 235 | 
            +
                    'ไม่สามารถสรุปได้',
         | 
| 236 | 
            +
                    'ข้อความต้นฉบับไม่มีความหมาย',
         | 
| 237 | 
            +
                    'ไม่มีข้อมูลเพียงพอ',
         | 
| 238 | 
            +
                    'ไม่มีประเด็นสำคัญ',
         | 
| 239 | 
            +
                    'ไม่มี Action Items',
         | 
| 240 | 
            +
                    'ต้องขอความชัดเจนเพิ่มเติม',
         | 
| 241 | 
            +
                    'ไม่มีข้อมูลที่สำคัญ',
         | 
| 242 | 
            +
                    'ไม่สามารถระบุได้',
         | 
| 243 | 
            +
                    'ข้อมูลไม่ชัดเจน',
         | 
| 244 | 
            +
                    'ไม่มีเนื้อหาที่เกี่ยวข้อง',
         | 
| 245 | 
            +
                    'N/A',
         | 
| 246 | 
            +
                    'n/a',
         | 
| 247 | 
            +
                    'Not applicable',
         | 
| 248 | 
            +
                    'No content',
         | 
| 249 | 
            +
                    'No summary available'
         | 
| 250 | 
            +
                ]
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                cleaned_text = cleaned_text.strip()
         | 
| 253 | 
            +
             | 
| 254 | 
            +
                if (len(cleaned_text) < 15 or
         | 
| 255 | 
            +
                    any(phrase.lower() in cleaned_text.lower() for phrase in useless_phrases) or
         | 
| 256 | 
            +
                    cleaned_text.lower() in [phrase.lower() for phrase in useless_phrases]):
         | 
| 257 | 
            +
                    return "ไม่มีข้อมูลสำคัญที่จะสรุปมากพอ"
         | 
| 258 | 
            +
             | 
| 259 | 
            +
                cleaned_text = re.sub(r'\s+([.!?])', r'\1', cleaned_text)
         | 
| 260 | 
            +
                cleaned_text = re.sub(r'([.!?])\s*([A-Za-zก-๙])', r'\1 \2', cleaned_text)
         | 
| 261 | 
            +
             | 
| 262 | 
            +
                return cleaned_text
         | 
| 263 | 
            +
             | 
| 264 | 
            +
            from together import Together
         | 
| 265 | 
            +
            import time
         | 
| 266 | 
            +
             | 
| 267 | 
            +
            def summarize_texts(texts, api_key, model="deepseek-ai/DeepSeek-V3", delay=1):
         | 
| 268 | 
            +
                client = Together(api_key=api_key)
         | 
| 269 | 
            +
                summaries = []
         | 
| 270 | 
            +
             | 
| 271 | 
            +
                for idx, text in enumerate(texts):
         | 
| 272 | 
            +
                    prompt = f"""
         | 
| 273 | 
            +
            สรุปข้อความประชุมนี้เป็นภาษาไทยสั้น ๆ เน้นประเด็นสำคัญ (key messages) และ Action Items โดยตัดรายละเอียดที���ไม่สำคัญออก:
         | 
| 274 | 
            +
             | 
| 275 | 
            +
            ข้อความ:
         | 
| 276 | 
            +
            {text}
         | 
| 277 | 
            +
             | 
| 278 | 
            +
            สรุป:
         | 
| 279 | 
            +
            - Key Messages:
         | 
| 280 | 
            +
            - Action Items:
         | 
| 281 | 
            +
            """
         | 
| 282 | 
            +
                    try:
         | 
| 283 | 
            +
                        response = client.chat.completions.create(
         | 
| 284 | 
            +
                            model=model,
         | 
| 285 | 
            +
                            messages=[
         | 
| 286 | 
            +
                                {"role": "system", "content": "คุณเป็นผู้เชี่ยวชาญในการสรุปเนื้อหา ตอบเป็นภาษาไทยเสมอ เน้นหัวข้อหลักและข้อมูลสำคัญ"},
         | 
| 287 | 
            +
                                {"role": "user", "content": prompt}
         | 
| 288 | 
            +
                            ],
         | 
| 289 | 
            +
                            max_tokens=1024,
         | 
| 290 | 
            +
                            temperature=0.7,
         | 
| 291 | 
            +
                        )
         | 
| 292 | 
            +
             | 
| 293 | 
            +
                        summary = response.choices[0].message.content.strip()
         | 
| 294 | 
            +
                        summary = clean_summary(summary)
         | 
| 295 | 
            +
                        summaries.append(summary)
         | 
| 296 | 
            +
             | 
| 297 | 
            +
                    except Exception as e:
         | 
| 298 | 
            +
                        print(f"Error at index {idx}: {e}")
         | 
| 299 | 
            +
                        summaries.append("ไม่สามารถสรุปได้")
         | 
| 300 | 
            +
             | 
| 301 | 
            +
                    if idx < len(texts) - 1:
         | 
| 302 | 
            +
                        time.sleep(delay)
         | 
| 303 | 
            +
             | 
| 304 | 
            +
                return summaries
         | 
| 305 | 
            +
             | 
| 306 | 
            +
             | 
| 307 | 
            +
            @app.post("/upload_video/")
         | 
| 308 | 
            +
            async def upload_video(file: UploadFile = File(...)):
         | 
| 309 | 
            +
                video_path = save_uploaded_file(file)
         | 
| 310 | 
            +
                audio_path = extract_and_normalize_audio(video_path)
         | 
| 311 | 
            +
                df_diarization = diarize_audio(audio_path)
         | 
| 312 | 
            +
                segment_folder = split_segments(audio_path, df_diarization)
         | 
| 313 | 
            +
                df_transcriptions = transcribe_segments(segment_folder)
         | 
| 314 | 
            +
             | 
| 315 | 
            +
                min_len = min(len(df_diarization), len(df_transcriptions))
         | 
| 316 | 
            +
                df_merged = pd.concat([
         | 
| 317 | 
            +
                    df_diarization.iloc[:min_len].reset_index(drop=True),
         | 
| 318 | 
            +
                    df_transcriptions.iloc[:min_len].reset_index(drop=True)
         | 
| 319 | 
            +
                ], axis=1)
         | 
| 320 | 
            +
             | 
| 321 | 
            +
                result = df_merged.to_dict(orient="records")
         | 
| 322 | 
            +
                speaker_array = df_diarization["speaker"].unique().tolist()
         | 
| 323 | 
            +
                counter = Counter(df_diarization["speaker"])
         | 
| 324 | 
            +
                result_array = [{"speaker": spk, "count": cnt} for spk, cnt in counter.most_common()]
         | 
| 325 | 
            +
                # api_key = "9d698113d5c677fa44aae75a51882e5b2f094f20381e763df82188fc5585bfed"
         | 
| 326 | 
            +
                # summaries = summarize_texts(df_merged["text"].tolist(), api_key, delay=2)
         | 
| 327 | 
            +
                duration_minutes = len(AudioSegment.from_wav(audio_path)) / 1000 / 60
         | 
| 328 | 
            +
             | 
| 329 | 
            +
                return JSONResponse(content={
         | 
| 330 | 
            +
                    "video_path": video_path,
         | 
| 331 | 
            +
                    "audio_path": audio_path,
         | 
| 332 | 
            +
                    "audio_length": duration_minutes,
         | 
| 333 | 
            +
                    "data": result,
         | 
| 334 | 
            +
                    "speaker_array": speaker_array,
         | 
| 335 | 
            +
                    "count_speaker": result_array,
         | 
| 336 | 
            +
                    "num_speakers": len(speaker_array),
         | 
| 337 | 
            +
                    "summaries": '',
         | 
| 338 | 
            +
                    "total_sentence": len(df_merged['text']),
         | 
| 339 | 
            +
                })
         | 
| 340 | 
            +
             | 
| 341 | 
            +
            public_url = ngrok.connect(8300)
         | 
| 342 | 
            +
            print(f"Public URL: {public_url}")
         | 
| 343 | 
            +
             | 
| 344 | 
            +
            if __name__ == "__main__":
         | 
| 345 | 
            +
                uvicorn.run(app, host="0.0.0.0", port=8300)
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,18 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            fastapi
         | 
| 2 | 
            +
            uvicorn[standard]
         | 
| 3 | 
            +
            openai-whisper
         | 
| 4 | 
            +
            pyannote.audio
         | 
| 5 | 
            +
            moviepy
         | 
| 6 | 
            +
            pydub
         | 
| 7 | 
            +
            pyngrok
         | 
| 8 | 
            +
            python-multipart
         | 
| 9 | 
            +
            together
         | 
| 10 | 
            +
            torch
         | 
| 11 | 
            +
            # For CUDA-enabled torch, install via Dockerfile:
         | 
| 12 | 
            +
            # pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
         | 
| 13 | 
            +
            torchvision
         | 
| 14 | 
            +
            torchaudio
         | 
| 15 | 
            +
            omegaconf
         | 
| 16 | 
            +
            pandas
         | 
| 17 | 
            +
            nest_asyncio
         | 
| 18 | 
            +
            python-dotenv
         |