github-actions[bot] commited on
Commit ·
ba2e30f
1
Parent(s): e92ff79
Auto-deploy from GitHub: 18b54f17939a0679569c10197cff43c713b55b9e
Browse files
app.py
CHANGED
|
@@ -33,9 +33,10 @@ def init_db():
|
|
| 33 |
caption TEXT,
|
| 34 |
created_at TEXT NOT NULL,
|
| 35 |
processed_at TEXT,
|
|
|
|
|
|
|
| 36 |
hide_from_ui INTEGER DEFAULT 0)'''
|
| 37 |
)
|
| 38 |
-
|
| 39 |
conn.commit()
|
| 40 |
conn.close()
|
| 41 |
|
|
@@ -140,12 +141,17 @@ def worker_loop():
|
|
| 140 |
print(f"🔄 Running STT on: {os.path.abspath(filepath)}")
|
| 141 |
command = f"""cd {CWD} && {PYTHON_PATH} --input {shlex.quote(os.path.abspath(filepath))} --model {STT_MODEL_NAME}"""
|
| 142 |
|
| 143 |
-
|
|
|
|
|
|
|
| 144 |
command,
|
| 145 |
shell=True,
|
| 146 |
executable="/bin/bash",
|
| 147 |
-
|
|
|
|
| 148 |
cwd=CWD,
|
|
|
|
|
|
|
| 149 |
env={
|
| 150 |
**os.environ,
|
| 151 |
'PYTHONUNBUFFERED': '1',
|
|
@@ -154,6 +160,53 @@ def worker_loop():
|
|
| 154 |
}
|
| 155 |
)
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
# Read transcription result
|
| 158 |
output_path = f'{CWD}/temp_dir/output_transcription.json'
|
| 159 |
with open(output_path, 'r') as file:
|
|
@@ -189,6 +242,15 @@ def worker_loop():
|
|
| 189 |
print(f"⚠️ Worker error: {str(e)}")
|
| 190 |
time.sleep(POLL_INTERVAL)
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
def update_status(file_id, status, caption=None, error=None):
|
| 193 |
"""Update the status of a file in the database"""
|
| 194 |
conn = sqlite3.connect('audio_captions.db')
|
|
@@ -196,12 +258,12 @@ def update_status(file_id, status, caption=None, error=None):
|
|
| 196 |
|
| 197 |
if status == 'completed':
|
| 198 |
c.execute('''UPDATE audio_files
|
| 199 |
-
SET status = ?, caption = ?, processed_at = ?
|
| 200 |
WHERE id = ?''',
|
| 201 |
(status, caption, datetime.now().isoformat(), file_id))
|
| 202 |
elif status == 'failed':
|
| 203 |
c.execute('''UPDATE audio_files
|
| 204 |
-
SET status = ?, caption = ?, processed_at = ?
|
| 205 |
WHERE id = ?''',
|
| 206 |
(status, f"Error: {error}", datetime.now().isoformat(), file_id))
|
| 207 |
else:
|
|
@@ -318,9 +380,11 @@ def get_files():
|
|
| 318 |
'id': row['id'],
|
| 319 |
'filename': row['filename'],
|
| 320 |
'status': row['status'],
|
| 321 |
-
'caption':
|
| 322 |
'created_at': row['created_at'],
|
| 323 |
'processed_at': row['processed_at'],
|
|
|
|
|
|
|
| 324 |
'queue_position': queue_position,
|
| 325 |
'estimated_start_seconds': estimated_start_seconds
|
| 326 |
})
|
|
@@ -371,6 +435,8 @@ def get_file(file_id):
|
|
| 371 |
'caption': row['caption'],
|
| 372 |
'created_at': row['created_at'],
|
| 373 |
'processed_at': row['processed_at'],
|
|
|
|
|
|
|
| 374 |
'queue_position': queue_position,
|
| 375 |
'estimated_start_seconds': estimated_start_seconds
|
| 376 |
})
|
|
|
|
| 33 |
caption TEXT,
|
| 34 |
created_at TEXT NOT NULL,
|
| 35 |
processed_at TEXT,
|
| 36 |
+
progress INTEGER DEFAULT 0,
|
| 37 |
+
progress_text TEXT,
|
| 38 |
hide_from_ui INTEGER DEFAULT 0)'''
|
| 39 |
)
|
|
|
|
| 40 |
conn.commit()
|
| 41 |
conn.close()
|
| 42 |
|
|
|
|
| 141 |
print(f"🔄 Running STT on: {os.path.abspath(filepath)}")
|
| 142 |
command = f"""cd {CWD} && {PYTHON_PATH} --input {shlex.quote(os.path.abspath(filepath))} --model {STT_MODEL_NAME}"""
|
| 143 |
|
| 144 |
+
import re
|
| 145 |
+
|
| 146 |
+
process = subprocess.Popen(
|
| 147 |
command,
|
| 148 |
shell=True,
|
| 149 |
executable="/bin/bash",
|
| 150 |
+
stdout=subprocess.PIPE,
|
| 151 |
+
stderr=subprocess.STDOUT,
|
| 152 |
cwd=CWD,
|
| 153 |
+
text=True,
|
| 154 |
+
bufsize=1,
|
| 155 |
env={
|
| 156 |
**os.environ,
|
| 157 |
'PYTHONUNBUFFERED': '1',
|
|
|
|
| 160 |
}
|
| 161 |
)
|
| 162 |
|
| 163 |
+
|
| 164 |
+
current_chunk = 1
|
| 165 |
+
total_chunks = 1
|
| 166 |
+
|
| 167 |
+
for line in process.stdout:
|
| 168 |
+
print(line, end='')
|
| 169 |
+
|
| 170 |
+
# Track chunk progress
|
| 171 |
+
chunk_match = re.search(r'Processing chunk (\d+)/(\d+)', line)
|
| 172 |
+
if chunk_match:
|
| 173 |
+
try:
|
| 174 |
+
current_chunk = int(chunk_match.group(1))
|
| 175 |
+
total_chunks = int(chunk_match.group(2))
|
| 176 |
+
except: pass
|
| 177 |
+
|
| 178 |
+
# Generic percentage matcher
|
| 179 |
+
percent_match = re.search(r'(\d+)%', line)
|
| 180 |
+
if percent_match:
|
| 181 |
+
try:
|
| 182 |
+
percent = int(percent_match.group(1))
|
| 183 |
+
if 'audio' in line.lower() or 'extract' in line.lower():
|
| 184 |
+
update_progress(file_id, percent // 2, "Extracting audio...")
|
| 185 |
+
elif 'transcrib' in line.lower() or 'model' in line.lower():
|
| 186 |
+
# Calculate overall transcription progress based on chunks
|
| 187 |
+
chunk_base = ((current_chunk - 1) / total_chunks) * 100
|
| 188 |
+
chunk_progress = (percent / total_chunks)
|
| 189 |
+
overall_transcription_progress = chunk_base + chunk_progress
|
| 190 |
+
|
| 191 |
+
# Remap so 50-100% of the overall bar is transcription
|
| 192 |
+
overall_progress = int(50 + (overall_transcription_progress / 2))
|
| 193 |
+
update_progress(file_id, overall_progress, f"Transcribing... (Chunk {current_chunk}/{total_chunks})")
|
| 194 |
+
else:
|
| 195 |
+
update_progress(file_id, percent, "Processing...")
|
| 196 |
+
except: pass
|
| 197 |
+
|
| 198 |
+
# Stage matchers
|
| 199 |
+
if 'extracting audio' in line.lower():
|
| 200 |
+
update_progress(file_id, 10, "Extracting audio...")
|
| 201 |
+
elif 'transcription started' in line.lower() and total_chunks == 1:
|
| 202 |
+
update_progress(file_id, 50, "Transcribing started...")
|
| 203 |
+
elif 'model loaded' in line.lower():
|
| 204 |
+
update_progress(file_id, 20, "Model loaded...")
|
| 205 |
+
|
| 206 |
+
process.wait()
|
| 207 |
+
if process.returncode != 0:
|
| 208 |
+
raise Exception(f"STT process failed with return code {process.returncode}")
|
| 209 |
+
|
| 210 |
# Read transcription result
|
| 211 |
output_path = f'{CWD}/temp_dir/output_transcription.json'
|
| 212 |
with open(output_path, 'r') as file:
|
|
|
|
| 242 |
print(f"⚠️ Worker error: {str(e)}")
|
| 243 |
time.sleep(POLL_INTERVAL)
|
| 244 |
|
| 245 |
+
def update_progress(file_id, progress, progress_text=None):
|
| 246 |
+
"""Update the progress of a file in the database"""
|
| 247 |
+
conn = sqlite3.connect('audio_captions.db')
|
| 248 |
+
c = conn.cursor()
|
| 249 |
+
c.execute('UPDATE audio_files SET progress = ?, progress_text = ? WHERE id = ?',
|
| 250 |
+
(progress, progress_text, file_id))
|
| 251 |
+
conn.commit()
|
| 252 |
+
conn.close()
|
| 253 |
+
|
| 254 |
def update_status(file_id, status, caption=None, error=None):
|
| 255 |
"""Update the status of a file in the database"""
|
| 256 |
conn = sqlite3.connect('audio_captions.db')
|
|
|
|
| 258 |
|
| 259 |
if status == 'completed':
|
| 260 |
c.execute('''UPDATE audio_files
|
| 261 |
+
SET status = ?, caption = ?, processed_at = ?, progress = 100, progress_text = 'Completed'
|
| 262 |
WHERE id = ?''',
|
| 263 |
(status, caption, datetime.now().isoformat(), file_id))
|
| 264 |
elif status == 'failed':
|
| 265 |
c.execute('''UPDATE audio_files
|
| 266 |
+
SET status = ?, caption = ?, processed_at = ?, progress_text = 'Failed'
|
| 267 |
WHERE id = ?''',
|
| 268 |
(status, f"Error: {error}", datetime.now().isoformat(), file_id))
|
| 269 |
else:
|
|
|
|
| 380 |
'id': row['id'],
|
| 381 |
'filename': row['filename'],
|
| 382 |
'status': row['status'],
|
| 383 |
+
'caption': "HIDDEN_IN_LIST_VIEW", # Don't send full captions in list view
|
| 384 |
'created_at': row['created_at'],
|
| 385 |
'processed_at': row['processed_at'],
|
| 386 |
+
'progress': row['progress'] or 0,
|
| 387 |
+
'progress_text': row['progress_text'],
|
| 388 |
'queue_position': queue_position,
|
| 389 |
'estimated_start_seconds': estimated_start_seconds
|
| 390 |
})
|
|
|
|
| 435 |
'caption': row['caption'],
|
| 436 |
'created_at': row['created_at'],
|
| 437 |
'processed_at': row['processed_at'],
|
| 438 |
+
'progress': row['progress'] or 0,
|
| 439 |
+
'progress_text': row['progress_text'],
|
| 440 |
'queue_position': queue_position,
|
| 441 |
'estimated_start_seconds': estimated_start_seconds
|
| 442 |
})
|