File size: 6,937 Bytes
bba0c34
 
 
 
 
 
 
 
 
 
816272b
 
 
bba0c34
 
 
 
816272b
 
bba0c34
 
 
 
 
816272b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bba0c34
 
 
 
 
816272b
bba0c34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a29b94
816272b
 
 
 
 
 
9a29b94
 
816272b
 
 
 
 
 
 
 
 
 
bba0c34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee170eb
bba0c34
 
 
 
 
 
 
 
 
 
 
 
816272b
 
bba0c34
 
816272b
9a29b94
 
bba0c34
 
 
816272b
bba0c34
 
 
816272b
bba0c34
 
 
 
 
 
 
 
816272b
bba0c34
 
 
 
 
 
 
 
 
 
816272b
 
 
9a29b94
 
0921fe5
9a29b94
bba0c34
 
 
 
 
 
 
 
 
 
 
 
 
 
b2b8afa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
from flask import Flask, request, jsonify, Response
from faster_whisper import WhisperModel
import torch
import io
import time
import datetime
from threading import Semaphore
import os
from werkzeug.utils import secure_filename
import tempfile
from moviepy.editor import VideoFileClip
import firebase_admin
from firebase_admin import credentials, messaging  # Added for FCM

app = Flask(__name__)

# Configuration
MAX_CONCURRENT_REQUESTS = 2
MAX_FILE_DURATION = 60 * 30
TEMPORARY_FOLDER = tempfile.gettempdir()
ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'}
ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)


# Initialize Firebase Admin SDK using environment variables
firebase_credentials = {
    "type": "service_account",
    "project_id": os.getenv("FIREBASE_PROJECT_ID"),
    "private_key_id": os.getenv("FIREBASE_PRIVATE_KEY_ID"),
    "private_key": os.getenv("FIREBASE_PRIVATE_KEY").replace("\\n", "\n"),
    "client_email": os.getenv("FIREBASE_CLIENT_EMAIL"),
    "client_id": os.getenv("FIREBASE_CLIENT_ID"),
    "auth_uri": "https://accounts.google.com/o/oauth2/auth",
    "token_uri": "https://oauth2.googleapis.com/token",
    "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
    "client_x509_cert_url": f"https://www.googleapis.com/robot/v1/metadata/x509/{os.getenv('FIREBASE_CLIENT_EMAIL')}"
}
cred = credentials.Certificate(firebase_credentials)
firebase_admin.initialize_app(cred)

# Device check for faster-whisper
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"
print(f"Using device: {device} with compute_type: {compute_type}")

# Faster Whisper setup
beamsize = 2
wmodel = WhisperModel(
    "guillaumekln/faster-whisper-small",
    device=device,
    compute_type=compute_type,
    download_root="./model_cache"
)

# Concurrency control
request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
active_requests = 0

def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def cleanup_temp_files(*file_paths):
    for file_path in file_paths:
        try:
            if file_path and os.path.exists(file_path):
                os.remove(file_path)
        except Exception as e:
            print(f"Error cleaning up temp file {file_path}: {str(e)}")

def extract_audio_from_video(video_path, output_audio_path):
    try:
        video = VideoFileClip(video_path)
        if video.duration > MAX_FILE_DURATION:
            video.close()
            raise ValueError(f"Video duration exceeds {MAX_FILE_DURATION} seconds")
        video.audio.write_audiofile(output_audio_path)
        video.close()
        return output_audio_path
    except Exception as e:
        raise Exception(f"Failed to extract audio from video: {str(e)}")

def send_fcm_data_message(fcm_token, transcription, file_type, created_date, transcription_name):
    """Send a silent FCM data message with transcription details"""
    try:
        message = messaging.Message(
            data={
                'transcription': transcription,
                'file_type': file_type,
                'created_date': created_date,
                'transcription_name': transcription_name
            },
            token=fcm_token
        )
        response = messaging.send(message)
        print(f"FCM message sent: {response}")
        return True
    except Exception as e:
        print(f"Error sending FCM message: {str(e)}")
        return False

@app.route("/health", methods=["GET"])
def health_check():
    return jsonify({
        'status': 'API is running',
        'timestamp': datetime.datetime.now().isoformat(),
        'device': device,
        'compute_type': compute_type,
        'active_requests': active_requests,
        'max_duration_supported': MAX_FILE_DURATION,
        'supported_formats': list(ALLOWED_EXTENSIONS)
    })

@app.route("/status/busy", methods=["GET"])
def server_busy():
    is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
    return jsonify({
        'is_busy': is_busy,
        'active_requests': active_requests,
        'max_capacity': MAX_CONCURRENT_REQUESTS
    })

@app.route("/whisper_transcribe", methods=["POST"])
def transcribe():
    global active_requests
    
    if not request_semaphore.acquire(blocking=False):
        return jsonify({'error': 'Server busy'}), 503
    
    active_requests += 1
    start_time = time.time()
    temp_file_path = None
    temp_audio_path = None
    
    try:
        if 'file' not in request.files or 'fcm_token' not in request.form:
            return jsonify({'error': 'Missing file or FCM token'}), 400
            
        file = request.files['file']
        fcm_token = request.form['fcm_token']
        created_date = request.form['created_date']
        transcription_name = request.form['transcription_name']
        if not (file and allowed_file(file.filename)):
            return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400

        # Save uploaded file
        temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
        file.save(temp_file_path)
        
        # Handle video/audio
        file_extension = file.filename.rsplit('.', 1)[1].lower()
        if file_extension in ALLOWED_VIDEO_EXTENSIONS:
            temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav")
            extract_audio_from_video(temp_file_path, temp_audio_path)
            transcription_file = temp_audio_path
        else:
            transcription_file = temp_file_path
        
        # Transcribe
        segments, _ = wmodel.transcribe(
            transcription_file,
            beam_size=beamsize,
            vad_filter=True,
            without_timestamps=True,
            compression_ratio_threshold=2.4,
            word_timestamps=False
        )
        
        full_text = " ".join(segment.text for segment in segments)
        file_type = 'video' if file_extension in ALLOWED_VIDEO_EXTENSIONS else 'audio'
        
        # Send FCM data message
    # Send FCM data message
        send_fcm_data_message(fcm_token, full_text, file_type, created_date, transcription_name)

        return jsonify({}), 200
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500
        
    finally:
        cleanup_temp_files(temp_file_path, temp_audio_path)
        active_requests -= 1
        request_semaphore.release()
        print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")

if __name__ == "__main__":
    if not os.path.exists(TEMPORARY_FOLDER):
        os.makedirs(TEMPORARY_FOLDER)
    
    app.run(host="0.0.0.0", port=7860, threaded=True)