File size: 2,452 Bytes
a1da02d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

import json
from pathlib import Path

def load_transcript_json(transcript_file: str):
    """
    Parse a Whisper JSON file into a Whisper JSON object

    # Parameters:
    transcript_file (str): Path to the Whisper JSON file
    """
    with open(transcript_file, "r", encoding="utf-8") as f:
        whisper_result = json.load(f)

    # Format of Whisper JSON file:
    #  {
    # "text": " And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.",
    # "segments": [
    #    {
    #        "text": " And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.",
    #        "start": 0.0,
    #        "end": 10.36,
    #        "words": [
    #            {
    #                "start": 0.0,
    #                "end": 0.56,
    #                "word": " And",
    #                "probability": 0.61767578125
    #            },
    #            {
    #                "start": 0.56,
    #                "end": 0.88,
    #                "word": " so",
    #                "probability": 0.9033203125
    #            },
    # etc.  

    return whisper_result


def load_transcript_srt(subtitle_file: str):
    import srt

    """
    Parse a SRT file into a Whisper JSON object

    # Parameters:
    subtitle_file (str): Path to the SRT file
    """
    with open(subtitle_file, "r", encoding="utf-8") as f:
        subs = srt.parse(f)

        whisper_result = {
            "text": "",
            "segments": []
        }

        for sub in subs:
            # Subtitle(index=1, start=datetime.timedelta(seconds=33, microseconds=843000), end=datetime.timedelta(seconds=38, microseconds=97000), content='地球上只有3%的水是淡水', proprietary='')
            segment = {
                "text": sub.content,
                "start": sub.start.total_seconds(),
                "end": sub.end.total_seconds(),
                "words": []
            }
            whisper_result["segments"].append(segment)
            whisper_result["text"] += sub.content

        return whisper_result

def load_transcript(file: str):
    # Determine file type
    file_extension = Path(file).suffix.lower()

    if file_extension == ".json":
        return load_transcript_json(file)
    elif file_extension == ".srt":
        return load_transcript_srt(file)
    else:
        raise ValueError(f"Unsupported file type: {file_extension}")