File size: 28,768 Bytes
2daffd5
52d1010
 
 
 
 
 
 
2daffd5
52d1010
 
2daffd5
52d1010
 
 
 
 
 
2daffd5
df66a57
 
 
 
ef0931a
52d1010
ef0931a
13bbc90
 
 
52d1010
13bbc90
 
 
 
52d1010
13bbc90
 
 
52d1010
13bbc90
52d1010
13bbc90
52d1010
 
 
 
 
 
 
e8631da
df66a57
52d1010
13bbc90
 
 
52d1010
 
 
 
df66a57
 
 
 
 
 
 
 
 
2daffd5
52d1010
 
 
2daffd5
df66a57
 
 
 
 
 
 
 
52d1010
2daffd5
 
 
 
52d1010
 
 
 
2daffd5
 
52d1010
 
2daffd5
52d1010
 
5a4b416
 
2daffd5
 
 
 
 
 
 
52d1010
2daffd5
 
 
 
 
52d1010
 
 
2daffd5
 
 
 
 
 
5a4b416
 
2daffd5
 
52d1010
 
2daffd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52d1010
 
2daffd5
 
 
 
 
 
 
52d1010
 
2daffd5
 
 
 
 
 
 
52d1010
13bbc90
 
 
 
 
 
 
52d1010
13bbc90
 
 
df66a57
52d1010
df66a57
 
 
 
 
52d1010
13bbc90
52d1010
df66a57
 
 
52d1010
df66a57
52d1010
 
 
df66a57
 
52d1010
 
df66a57
 
 
52d1010
 
df66a57
 
 
 
 
 
 
 
 
 
 
 
 
 
13bbc90
df66a57
 
52d1010
 
df66a57
 
 
 
 
 
13bbc90
df66a57
 
 
2daffd5
 
 
 
 
 
 
 
 
 
 
 
 
df66a57
 
13bbc90
52d1010
 
13bbc90
 
 
 
 
 
 
df66a57
 
 
 
 
 
 
 
 
13bbc90
df66a57
 
52d1010
df66a57
52d1010
df66a57
13bbc90
52d1010
13bbc90
 
52d1010
13bbc90
 
5b4d2a9
52d1010
 
13bbc90
 
52d1010
df66a57
13bbc90
52d1010
 
df66a57
52d1010
 
df66a57
13bbc90
52d1010
13bbc90
df66a57
 
52d1010
df66a57
52d1010
 
df66a57
52d1010
 
 
df66a57
52d1010
ef0931a
 
c23fdff
 
52d1010
 
 
 
 
 
 
 
c23fdff
 
52d1010
c23fdff
 
 
 
 
ef0931a
 
c23fdff
 
ef0931a
 
 
 
 
 
 
 
 
52d1010
 
df66a57
13bbc90
52d1010
13bbc90
c23fdff
 
 
ef0931a
 
 
52d1010
 
ef0931a
 
52d1010
13bbc90
 
 
 
52d1010
 
13bbc90
 
52d1010
 
13bbc90
ef0931a
52d1010
 
13bbc90
df66a57
52d1010
df66a57
 
13bbc90
 
52d1010
df66a57
 
c23fdff
 
 
13bbc90
 
 
c23fdff
 
52d1010
 
c23fdff
 
df66a57
13bbc90
52d1010
13bbc90
52d1010
13bbc90
 
 
df66a57
13bbc90
52d1010
5b4d2a9
13bbc90
 
 
52d1010
 
 
df66a57
52d1010
 
df66a57
13bbc90
df66a57
 
52d1010
 
 
df66a57
52d1010
13bbc90
 
e8631da
 
13bbc90
 
 
e8631da
13bbc90
52d1010
e8631da
52d1010
 
2daffd5
 
 
 
 
13bbc90
 
 
52d1010
df66a57
52d1010
df66a57
 
13bbc90
52d1010
 
df66a57
52d1010
df66a57
 
13bbc90
2daffd5
 
 
 
52d1010
 
2daffd5
 
52d1010
 
df66a57
52d1010
df66a57
 
13bbc90
52d1010
df66a57
52d1010
 
df66a57
 
13bbc90
2daffd5
 
 
52d1010
2daffd5
 
 
 
 
52d1010
 
2daffd5
52d1010
2daffd5
df66a57
52d1010
 
df66a57
 
2daffd5
 
 
 
 
 
 
 
 
 
 
 
 
 
52d1010
2daffd5
 
 
52d1010
2daffd5
df66a57
52d1010
 
df66a57
 
13bbc90
df66a57
52d1010
 
df66a57
 
 
52d1010
 
df66a57
52d1010
df66a57
52d1010
df66a57
52d1010
 
df66a57
52d1010
df66a57
 
13bbc90
df66a57
 
52d1010
 
df66a57
 
 
52d1010
df66a57
 
 
13bbc90
df66a57
 
52d1010
 
df66a57
 
 
 
52d1010
 
df66a57
 
52d1010
df66a57
 
 
13bbc90
df66a57
13bbc90
df66a57
13bbc90
df66a57
 
 
 
 
52d1010
df66a57
 
 
 
 
 
 
13bbc90
df66a57
 
 
 
 
 
13bbc90
df66a57
 
 
 
 
52d1010
df66a57
 
 
13bbc90
df66a57
13bbc90
df66a57
13bbc90
df66a57
 
 
 
 
52d1010
df66a57
13bbc90
df66a57
 
 
 
 
13bbc90
df66a57
 
 
 
 
52d1010
df66a57
 
 
13bbc90
df66a57
 
 
 
52d1010
df66a57
52d1010
df66a57
 
 
 
 
 
 
 
 
 
 
 
 
13bbc90
df66a57
 
 
 
 
52d1010
df66a57
 
 
13bbc90
df66a57
 
 
 
13bbc90
df66a57
13bbc90
df66a57
 
 
 
 
 
52d1010
df66a57
 
 
13bbc90
df66a57
 
52d1010
 
df66a57
 
52d1010
df66a57
52d1010
df66a57
 
 
52d1010
df66a57
 
 
 
 
13bbc90
df66a57
 
 
 
 
52d1010
df66a57
 
 
13bbc90
2daffd5
 
 
 
 
 
 
 
 
df66a57
 
 
 
 
 
 
 
 
52d1010
df66a57
 
 
13bbc90
df66a57
 
 
52d1010
df66a57
 
 
13bbc90
df66a57
 
 
52d1010
 
13bbc90
ef0931a
 
 
 
 
52d1010
2daffd5
 
 
ef0931a
2daffd5
 
 
 
 
 
ef0931a
 
52d1010
 
13bbc90
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
import boto3
import json
import logging
import os
import platform
import requests
import signal
import sys
import tempfile
import threading
import time
import uuid
from botocore.exceptions import ClientError
from datetime import datetime
from dotenv import load_dotenv
from extract_signed_segments_from_annotations import ClipExtractor, VideoClip
from flask import Flask, jsonify, redirect, render_template, request, send_file, send_from_directory, session, url_for
from typing import Any, Dict, List, Optional
from urllib.parse import urlparse

# Load environment variables
load_dotenv()

# Add this near the top with other environment variables
bypass_auth = os.getenv("BYPASS_AUTH", "false").lower() == "true"

# Configure logging first
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

# Hugging Face specific configuration
is_hf_space = os.getenv("SPACE_ID") is not None
if is_hf_space:
    logger.info("Running in Hugging Face Spaces environment")
    # Allow insecure transport for development in HF
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
    # Ensure port is set correctly
    os.environ["PORT"] = "7860"

# Debug information
print("=" * 50)
print(f"Python version: {sys.version}")
print(f"Platform: {platform.platform()}")
print(f"Current directory: {os.getcwd()}")
print(f"Directory contents: {os.listdir('.')}")
print("=" * 50)

app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY", "dev_key_for_testing")

# Configure session for HF
if is_hf_space:
    app.config["SESSION_COOKIE_SECURE"] = False
    app.config["SESSION_COOKIE_HTTPONLY"] = True
    app.config["SESSION_COOKIE_SAMESITE"] = None  # Add this line
    app.config["PERMANENT_SESSION_LIFETIME"] = 86400  # 24 hours

# Directory paths
VIDEO_DIR = os.path.abspath("data/videos")
ANNOTATIONS_DIR = os.path.abspath("data/annotations")
TEMP_DIR = os.path.abspath("data/temp")
WORD_TIMESTAMPS_DIR = os.path.abspath("data/word_timestamps")
ALIGNMENTS_DIR = os.path.abspath("data/alignments")
TRANSCRIPTS_DIR = os.path.abspath("data/transcripts")

# S3 configuration
S3_BUCKET = os.getenv("S3_BUCKET", "sorenson-ai-sb-scratch")
S3_VIDEO_PREFIX = os.getenv("S3_VIDEO_PREFIX", "awilkinson/kylie_dataset_videos_for_alignment_webapp/")
USE_S3_FOR_VIDEOS = os.getenv("USE_S3_FOR_VIDEOS", "true").lower() == "true"

# Ensure all required directories exist
for directory in [VIDEO_DIR, ANNOTATIONS_DIR, TEMP_DIR, WORD_TIMESTAMPS_DIR, ALIGNMENTS_DIR, TRANSCRIPTS_DIR]:
    os.makedirs(directory, exist_ok=True)

# Global dictionaries for progress tracking
clip_extraction_status = {}
transcription_progress_status = {}


# S3 helper functions
def get_s3_client():
    """Get a boto3 S3 client."""
    return boto3.client(
        "s3",
        region_name=os.environ.get("AWS_DEFAULT_REGION", "us-west-2"),
        aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
        aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY")
    )


def list_s3_videos() -> List[str]:
    """List all videos in the S3 bucket with the given prefix."""
    if not os.environ.get("AWS_ACCESS_KEY_ID") or not os.environ.get("AWS_SECRET_ACCESS_KEY"):
        logger.warning("AWS credentials not found.  Returning empty video list.")
        return []
    
    try:
        s3_client = get_s3_client()
        response = s3_client.list_objects_v2(
            Bucket=S3_BUCKET,
            Prefix=S3_VIDEO_PREFIX
        )
        
        if "Contents" not in response:
            logger.warning(f"No videos found in S3 bucket {S3_BUCKET} with prefix {S3_VIDEO_PREFIX}")
            return []
        
        # Extract video IDs (filenames without extension) from S3 keys
        videos = []
        for item in response["Contents"]:
            key = item["Key"]
            if key.endswith(".mp4"):
                # Extract just the filename without extension
                filename = os.path.basename(key)
                video_id = os.path.splitext(filename)[0]
                videos.append(video_id)
        
        return videos
    except Exception as e:
        logger.error(f"Error listing S3 videos: {str(e)}")
        return []


def download_video_from_s3(video_id: str) -> Optional[str]:
    """Download a video from S3 to the local videos directory."""
    video_filename = f"{video_id}.mp4"
    s3_key = f"{S3_VIDEO_PREFIX}{video_filename}"
    local_path = os.path.join(VIDEO_DIR, video_filename)
    
    # Check if the file already exists locally
    if os.path.exists(local_path):
        logger.info(f"Video {video_id} already exists locally.")
        return local_path
    
    try:
        logger.info(f"Downloading video {video_id} from S3...")
        s3_client = get_s3_client()
        s3_client.download_file(S3_BUCKET, s3_key, local_path)
        logger.info(f"Video {video_id} downloaded successfully to {local_path}")
        return local_path
    except ClientError as e:
        logger.error(f"Error downloading video from S3: {str(e)}")
        return None


def generate_presigned_url(video_id: str, expiration: int = 3600) -> Optional[str]:
    """Generate a presigned URL for direct access to the video in S3."""
    video_filename = f"{video_id}.mp4"
    s3_key = f"{S3_VIDEO_PREFIX}{video_filename}"
    
    try:
        s3_client = get_s3_client()
        url = s3_client.generate_presigned_url(
            "get_object",
            Params={"Bucket": S3_BUCKET, "Key": s3_key},
            ExpiresIn=expiration
        )
        return url
    except ClientError as e:
        logger.error(f"Error generating presigned URL: {str(e)}")
        return None


# Graceful shutdown handler
def graceful_shutdown(signum, frame):
    """Handle graceful shutdown on signals."""
    logger.info(f"Received signal {signum}, shutting down gracefully...")
    # Clean up as needed here
    sys.exit(0)


# Register signal handlers
signal.signal(signal.SIGTERM, graceful_shutdown)
signal.signal(signal.SIGINT, graceful_shutdown)


# Login required decorator
def login_required(f):
    from functools import wraps
    @wraps(f)
    def decorated_function(*args, **kwargs):
        if "user" not in session:
            logger.info(f"User not in session, redirecting to login")
            return redirect(url_for("login"))
        return f(*args, **kwargs)
    return decorated_function


# Allow specific users (for testing)
def is_allowed_user(username: str) -> bool:
    allowed_users_env = os.getenv("ALLOWED_USERS", "Perilon")  # Default to your username
    allowed_users = [user.strip() for user in allowed_users_env.split(",")]
    return username in allowed_users or not is_hf_space  # Allow all users in local dev


def update_extraction_progress(video_id: str, current: int, total: int) -> None:
    percent = int((current / total) * 100)
    clip_extraction_status[video_id] = {"current": current, "total": total, "percent": percent}


def run_clip_extraction(video_id: str) -> None:
    try:
        base_dir = app.root_path
        extractor = ClipExtractor(base_dir)
        extractor.extract_clips_from_annotations(
            video_id,
            progress_callback=lambda current, total: update_extraction_progress(video_id, current, total)
        )
        if video_id in clip_extraction_status:
            status = clip_extraction_status[video_id]
            if status.get("percent", 0) < 100:
                update_extraction_progress(video_id, status["total"], status["total"])
        else:
            update_extraction_progress(video_id, 1, 1)
    except Exception as e:
        logger.error(f"Error during clip extraction for {video_id}: {str(e)}")
        clip_extraction_status[video_id] = {"error": str(e)}


def run_transcription(video_id: str) -> None:
    try:
        base_dir = app.root_path
        output_path = os.path.join(WORD_TIMESTAMPS_DIR, f"{video_id}_word_timestamps.json")

        # Check if transcription already exists and is valid.
        if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
            logger.info(f"Using cached transcription for video {video_id}.")
            transcription_progress_status[video_id] = {"status": "completed", "percent": 100}
            return

        # Download video from S3 if needed
        if USE_S3_FOR_VIDEOS:
            video_path = download_video_from_s3(video_id)
            if not video_path:
                transcription_progress_status[video_id] = {
                    "status": "error", 
                    "percent": 0, 
                    "message": f"Failed to download video {video_id} from S3"
                }
                return
        else:
            video_path = os.path.join(base_dir, "data", "videos", f"{video_id}.mp4")
        
        transcription_progress_status[video_id] = {"status": "started", "percent": 10}

        # Check if AWS credentials are available
        if not os.environ.get("AWS_ACCESS_KEY_ID") or not os.environ.get("AWS_SECRET_ACCESS_KEY"):
            logger.warning("AWS credentials not found.  Transcription will not work properly.")
            transcription_progress_status[video_id] = {
                "status": "error", 
                "percent": 0, 
                "message": "AWS credentials missing"
            }
            return

        # Run transcription via the imported function from get_transcription_with_amazon.py
        from get_transcription_with_amazon import get_word_timestamps
        word_timestamps = get_word_timestamps(video_path)

        with open(output_path, "w") as f:
            json.dump(word_timestamps, f, indent=4)

        transcription_progress_status[video_id] = {"status": "completed", "percent": 100}
    except Exception as e:
        logger.error(f"Error during transcription for {video_id}: {str(e)}")
        transcription_progress_status[video_id] = {"status": "error", "percent": 0, "message": str(e)}


# Authentication routes
@app.route("/login")
def login():
    """Handle login for both local and HF environments."""
    logger.info(f"Login route called.  Headers: {dict(request.headers)}")
    
    if is_hf_space:
        username = request.headers.get("X-Spaces-Username")
        logger.info(f"Username from headers in login: {username}")
        
        if username and is_allowed_user(username):
            session["user"] = {"name": username, "is_hf": True}
            return redirect(url_for("index"))
        else:
            # Redirect to the HF auth endpoint
            return redirect("/auth")
    else:
        # For local development
        session["user"] = {"name": "LocalDeveloper", "is_mock": True}
        return redirect(url_for("index"))


@app.route("/auth/callback")
def auth_callback():
    """This route will be called by Hugging Face after successful authentication."""
    logger.info(f"Auth callback called.  Headers: {dict(request.headers)}")
    
    if is_hf_space:
        # In Hugging Face Spaces, the user info is available in the request headers
        username = request.headers.get("X-Spaces-Username")
        if username:
            session["user"] = {"name": username, "is_hf": True}
            return redirect(url_for("index"))
        else:
            return render_template("error.html", message="Authentication failed.  No username provided.")
    return redirect(url_for("login"))


@app.route("/health")
def health_check():
    """Health check endpoint for container verification."""
    # Log environment variables for debugging
    env_vars = {
        "FLASK_ENV": os.environ.get("FLASK_ENV", "production"),
        "DEBUG": os.environ.get("DEBUG", "Not set"),
        "SPACE_ID": os.environ.get("SPACE_ID", "Not set"),
        "BYPASS_AUTH": os.environ.get("BYPASS_AUTH", "Not set"),
        "SECRET_KEY": os.environ.get("SECRET_KEY", "Not set")[:5] + "..." if os.environ.get("SECRET_KEY") else "Not set",
        "S3_BUCKET": os.environ.get("S3_BUCKET", "Not set"),
        "S3_VIDEO_PREFIX": os.environ.get("S3_VIDEO_PREFIX", "Not set"),
        "USE_S3_FOR_VIDEOS": os.environ.get("USE_S3_FOR_VIDEOS", "Not set")
    }
    
    logger.info(f"Health check called.  Environment: {env_vars}")
    
    # Get session information for debugging
    session_info = dict(session) if session else None
    session_keys = list(session.keys()) if session else []
    
    return jsonify({
        "status": "healthy",
        "environment": env_vars,
        "session_keys": session_keys,
        "is_hf_space": is_hf_space,
        "bypass_auth": bypass_auth,
        "directories": {
            "videos": os.path.exists(VIDEO_DIR),
            "annotations": os.path.exists(ANNOTATIONS_DIR),
            "temp": os.path.exists(TEMP_DIR)
        }
    })


@app.route("/auth")
def auth():
    """This route handles HF authentication."""
    logger.info(f"Auth route called.  Headers: {dict(request.headers)}")
    
    # Force bypass auth to be true for debugging
    bypass_auth = True
    
    # If bypass is enabled, authenticate immediately
    if bypass_auth:
        logger.info("Auth bypass enabled, setting default user")
        session["user"] = {"name": "Perilon", "is_hf": True}
        return redirect(url_for("index"))
        
    # Normal authentication logic
    username = request.headers.get("X-Spaces-Username")
    logger.info(f"Username from headers in auth: {username}")
    
    if is_hf_space and username and is_allowed_user(username):
        logger.info(f"Setting user in session: {username}")
        session["user"] = {"name": username, "is_hf": True}
        return redirect(url_for("index"))
    elif not is_hf_space:
        # For local development
        session["user"] = {"name": "LocalDeveloper", "is_mock": True}
        return redirect(url_for("index"))
    else:
        # For HF with no valid username yet
        return render_template("error.html", message=
            "Waiting for Hugging Face authentication.  If you continue to see this message, "
            "please make sure you're logged into Hugging Face and your username is allowed.")


@app.before_request
def check_auth():
    """Check authentication before processing requests."""
    # Skip authentication for certain routes and static files
    if request.path in ["/login", "/logout", "/auth", "/auth/callback", "/debug", "/health"] or request.path.startswith("/static/"):
        return

    # Force bypass auth to be true for debugging
    bypass_auth = True
    
    # Log all request paths to help troubleshoot
    logger.debug(f"Request path: {request.path}, User in session: {'user' in session}")
    
    if bypass_auth:
        # Set default user for bypass mode if not already set
        if "user" not in session:
            session["user"] = {"name": "Perilon", "is_hf": True}
        return
    
    if is_hf_space:
        # Check for HF username header
        username = request.headers.get("X-Spaces-Username")
        
        if "user" in session:
            logger.debug(f"User in session: {session['user']}")
            return
            
        if username and is_allowed_user(username):
            logger.info(f"Setting user from headers: {username}")
            session["user"] = {"name": username, "is_hf": True}
            return
            
        # No valid user in session or headers
        logger.info(f"No authenticated user, redirecting to /auth")
        return redirect("/auth")
    elif "user" not in session:
        return redirect(url_for("login"))


@app.route("/logout")
def logout():
    """Clear session and redirect to login."""
    session.clear()  # Clear the entire session
    if is_hf_space:
        return redirect("/auth/logout")
    return redirect(url_for("login"))


@app.route("/debug")
def debug_info():
    """Return debug information."""
    cookies = {key: request.cookies.get(key) for key in request.cookies.keys()}
    
    info = {
        "session": dict(session) if session else None,
        "headers": dict(request.headers),
        "cookies": cookies,
        "is_hf_space": is_hf_space,
        "allowed_users": os.getenv("ALLOWED_USERS", "Perilon"),
        "app_config": {k: str(v) for k, v in app.config.items() if k in 
                      ["SESSION_COOKIE_SECURE", "SESSION_COOKIE_HTTPONLY", 
                       "SESSION_COOKIE_SAMESITE", "PERMANENT_SESSION_LIFETIME"]},
        "s3_config": {
            "S3_BUCKET": S3_BUCKET,
            "S3_VIDEO_PREFIX": S3_VIDEO_PREFIX,
            "USE_S3_FOR_VIDEOS": USE_S3_FOR_VIDEOS
        }
    }
    return jsonify(info)


# Main application routes
@app.route("/")
@login_required
def index():
    """Main entry point, redirects to video selection."""
    return redirect(url_for("select_video"))


@app.route("/select_video")
@login_required
def select_video():
    """Page to select a video for annotation."""
    if USE_S3_FOR_VIDEOS:
        video_ids = list_s3_videos()
    else:
        if not os.path.exists(VIDEO_DIR):
            return render_template("error.html", message="Video directory not found.")
        videos = [f for f in os.listdir(VIDEO_DIR) if f.endswith(".mp4")]
        video_ids = [os.path.splitext(v)[0] for v in videos]
    
    return render_template("select_video.html", video_ids=video_ids, user=session.get("user"))


@app.route("/player/<video_id>")
@login_required
def player(video_id):
    """Video player page for annotation."""
    return render_template("player.html", video_id=video_id, user=session.get("user"))


@app.route("/videos")
@login_required
def get_videos():
    """API endpoint to get available videos."""
    if USE_S3_FOR_VIDEOS:
        videos = list_s3_videos()
        if not videos:
            return jsonify({"error": "No videos found in S3"}), 404
        # Return just the filenames with .mp4 extension for compatibility
        return jsonify([f"{vid}.mp4" for vid in videos])
    else:
        # Original local file behavior
        if not os.path.exists(VIDEO_DIR):
            return jsonify({"error": "Video directory not found"}), 404
        videos = [f for f in os.listdir(VIDEO_DIR) if f.endswith((".mp4", ".avi", ".mov"))]
        if not videos:
            return jsonify({"error": "No videos found"}), 404
        return jsonify(videos)


@app.route("/video/<path:filename>")
@login_required
def serve_video(filename):
    """Serve a video file from S3 or local storage."""
    video_id = os.path.splitext(filename)[0]  # Remove extension
    
    if USE_S3_FOR_VIDEOS:
        # Option 1: Generate a presigned URL and redirect
        presigned_url = generate_presigned_url(video_id)
        if presigned_url:
            return redirect(presigned_url)
        
        # Option 2 (fallback): Download from S3 to local temporary storage and serve
        local_path = download_video_from_s3(video_id)
        if local_path and os.path.exists(local_path):
            return send_from_directory(VIDEO_DIR, filename)
        
        return jsonify({"error": "Video not found in S3"}), 404
    else:
        # Original local file behavior
        if not os.path.exists(os.path.join(VIDEO_DIR, filename)):
            return jsonify({"error": "Video not found"}), 404
        return send_from_directory(VIDEO_DIR, filename)


@app.route("/save_annotations", methods=["POST"])
@login_required
def save_annotations():
    """Save annotation data."""
    data = request.json
    if not data or "video" not in data or "timestamps" not in data:
        return jsonify({"success": False, "message": "Invalid data"}), 400

    annotation_file = os.path.join(ANNOTATIONS_DIR, f"{data['video']}_annotations.json")
    annotation_data = {
        "video_name": data["video"] + ".mp4",
        "timestamps": sorted(data["timestamps"]),
        "annotation_date": datetime.now().isoformat(),
        "annotated_by": session.get("user", {}).get("name", "unknown")
    }
    with open(annotation_file, "w") as f:
        json.dump(annotation_data, f, indent=4)
    return jsonify({"success": True, "message": "Annotations saved successfully"})


@app.route("/get_annotations/<path:video_name>")
@login_required
def get_annotations(video_name):
    """Get annotations for a video."""
    annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_name}_annotations.json")
    if not os.path.exists(annotation_file):
        return jsonify({"error": "No annotations found"}), 404
    with open(annotation_file, "r") as f:
        annotations = json.load(f)
    return jsonify(annotations)


@app.route("/alignment/<video_id>")
@login_required
def alignment_mode(video_id):
    """Page for aligning sign language with transcribed text."""
    annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_id}_annotations.json")
    if not os.path.exists(annotation_file):
        return render_template("error.html", message="No annotations found for this video.  Please annotate the video first.")
    with open(annotation_file, "r") as f:
        annotations = json.load(f)
    return render_template(
        "alignment.html",
        video_id=video_id,
        total_clips=len(annotations["timestamps"]) - 1,
        user=session.get("user")
    )


@app.route("/api/transcript/<video_id>")
@login_required
def get_transcript(video_id):
    """Get transcript for a video."""
    timestamps_file = os.path.join(WORD_TIMESTAMPS_DIR, f"{video_id}_word_timestamps.json")
    logger.info(f"Attempting to load word timestamps from: {timestamps_file}")
    if not os.path.exists(timestamps_file):
        logger.warning(f"Word timestamps file not found: {timestamps_file}")
        return jsonify({
            "status": "error",
            "message": "No word timestamps found for this video"
        }), 404
    try:
        with open(timestamps_file, "r") as f:
            word_data = json.load(f)
        full_text = " ".join(item["punctuated_word"] for item in word_data)
        words_with_times = [{
            "word": item["punctuated_word"],
            "start": float(item["start_time"]),
            "end": float(item["end_time"])
        } for item in word_data]
        logger.info(f"Successfully created transcript ({len(full_text)} characters)")
        return jsonify({
            "status": "success",
            "text": full_text,
            "words": words_with_times
        })
    except Exception as e:
        logger.error(f"Error processing word timestamps: {str(e)}")
        return jsonify({
            "status": "error",
            "message": f"Error processing word timestamps: {str(e)}"
        }), 500


@app.route("/api/word_timestamps/<video_id>")
@login_required
def get_word_timestamps(video_id):
    """Get word-level timestamps for a video."""
    timestamps_file = os.path.join(WORD_TIMESTAMPS_DIR, f"{video_id}_word_timestamps.json")
    logger.info(f"Attempting to load word timestamps from: {timestamps_file}")
    if not os.path.exists(timestamps_file):
        logger.warning(f"Word timestamps file not found: {timestamps_file}")
        return jsonify({
            "status": "error",
            "message": "No word timestamps found for this video"
        }), 404
    try:
        with open(timestamps_file, "r") as f:
            word_data = json.load(f)
        logger.info(f"Successfully loaded {len(word_data)} word timestamps")
        return jsonify({
            "status": "success",
            "words": word_data
        })
    except Exception as e:
        logger.error(f"Error processing word timestamps: {str(e)}")
        return jsonify({
            "status": "error",
            "message": f"Error processing word timestamps: {str(e)}"
        }), 500


@app.route("/api/clips/<video_id>")
@login_required
def get_video_clips(video_id):
    """Get clips for a video."""
    try:
        annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_id}_annotations.json")
        if not os.path.exists(annotation_file):
            raise FileNotFoundError("Annotations not found")
        with open(annotation_file, "r") as f:
            annotations = json.load(f)
        timestamps = annotations["timestamps"]
        clips = []
        for i in range(len(timestamps)-1):
            clips.append({
                "index": i,
                "start": timestamps[i],
                "end": timestamps[i+1],
                "path": f"/clip/{video_id}/{i}"
            })
        return jsonify({
            "status": "success",
            "clips": clips
        })
    except Exception as e:
        logger.error(f"Error getting clips: {str(e)}")
        return jsonify({
            "status": "error",
            "message": str(e)
        }), 500


@app.route("/clip/<video_id>/<int:clip_index>")
@login_required
def serve_clip(video_id, clip_index):
    """Serve a specific clip."""
    clip_path = os.path.join(
        TEMP_DIR,
        f"{video_id}_clip_{clip_index:03d}.mp4"
    )
    logger.info(f"Attempting to serve clip: {clip_path}")
    if not os.path.exists(clip_path):
        logger.error(f"Clip not found: {clip_path}")
        return jsonify({
            "status": "error",
            "message": "Clip not found"
        }), 404
    return send_file(clip_path, mimetype="video/mp4")


@app.route("/api/save_alignments", methods=["POST"])
@login_required
def save_alignments():
    """Save alignment data."""
    try:
        data = request.json
        if not data or "video_id" not in data or "alignments" not in data:
            return jsonify({"success": False, "message": "Invalid data"}), 400

        # Add user information to the alignments
        for alignment in data["alignments"]:
            if alignment:
                alignment["aligned_by"] = session.get("user", {}).get("name", "unknown")

        output_path = os.path.join(ALIGNMENTS_DIR, f"{data['video_id']}.json")
        with open(output_path, "w") as f:
            json.dump(data["alignments"], f, indent=2)
        return jsonify({
            "success": True,
            "message": "Alignments saved successfully"
        })
    except Exception as e:
        logger.error(f"Error saving alignments: {str(e)}")
        return jsonify({
            "success": False,
            "message": str(e)
        }), 500


@app.route("/api/extract_clips/<video_id>")
@login_required
def extract_clips_for_video(video_id):
    """Extract clips and start transcription for a video."""
    # If using S3, ensure the video is downloaded first
    if USE_S3_FOR_VIDEOS:
        video_path = download_video_from_s3(video_id)
        if not video_path:
            return jsonify({
                "status": "error",
                "message": f"Failed to download video {video_id} from S3"
            }), 404
    
    status = clip_extraction_status.get(video_id, {})
    if status.get("percent", 0) < 100:
        thread = threading.Thread(target=run_clip_extraction, args=(video_id,))
        thread.start()
    if video_id not in transcription_progress_status or transcription_progress_status.get(video_id, {}).get("percent", 0) < 100:
        thread_trans = threading.Thread(target=run_transcription, args=(video_id,))
        thread_trans.start()
    return jsonify({"status": "started"})


@app.route("/api/clip_progress/<video_id>")
@login_required
def clip_progress(video_id):
    """Get clip extraction progress."""
    progress = clip_extraction_status.get(video_id, {"current": 0, "total": 0, "percent": 0})
    return jsonify(progress)


@app.route("/api/transcription_progress/<video_id>")
@login_required
def transcription_progress(video_id):
    """Get transcription progress."""
    progress = transcription_progress_status.get(video_id, {"status": "not started", "percent": 0})
    return jsonify(progress)


if __name__ == "__main__":
    try:
        # Print diagnostic information
        print("=" * 50)
        print(f"Starting app with configuration:")
        print(f"- Running in HF Space: {is_hf_space}")
        print(f"- Auth bypass: {bypass_auth}")
        print(f"- Port: {os.getenv('PORT', 7860)}")
        print(f"- S3 for videos: {USE_S3_FOR_VIDEOS}")
        print(f"- S3 bucket: {S3_BUCKET}")
        print(f"- S3 prefix: {S3_VIDEO_PREFIX}")
        print(f"- Available videos: {os.listdir(VIDEO_DIR) if os.path.exists(VIDEO_DIR) else 'None'}")
        if USE_S3_FOR_VIDEOS:
            try:
                s3_videos = list_s3_videos()
                print(f"- Available S3 videos: {s3_videos if s3_videos else 'None'}")
            except Exception as e:
                print(f"- Error listing S3 videos: {str(e)}")
        print("=" * 50)
        
        port = int(os.getenv("PORT", 7860))
        app.run(host="0.0.0.0", port=port, debug=True)
    except Exception as e:
        print(f"Error starting the application: {e}")
        import traceback
        traceback.print_exc()