File size: 552 Bytes
476b1c3
 
 
 
70c05b8
476b1c3
 
 
 
 
70c05b8
 
 
 
 
 
476b1c3
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

import librosa 
import torch
from pathlib import Path
import whisper

sample_rate: int = 16000
float_factor: float = 32678.0


def preprocess_audio(filepath: str):
    # load audio and pad/trim it to fit 30 seconds
    audio = whisper.load_audio(filepath)
    audio = whisper.pad_or_trim(audio)
    
    return audio

def parsing_text(filepath: str):
    path = Path(filepath)
    if path.suffix.lower() not in ('.txt', '.md'):
        raise ValueError("Invalid file type. Only '.txt' and '.md' files are supported.")

    return path.read_text()