|
|
import base64 |
|
|
import datetime |
|
|
import json |
|
|
import sys |
|
|
import requests |
|
|
|
|
|
|
|
|
import os |
|
|
import glob |
|
|
import re |
|
|
import subprocess |
|
|
|
|
|
|
|
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
API_URL = os.getenv( |
|
|
"HF_ENDPOINT", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.endpoints.huggingface.cloud" |
|
|
) |
|
|
headers = { |
|
|
"Accept": "application/json", |
|
|
"Authorization": f"Bearer {os.getenv('HF_TOKEN')}", |
|
|
"Content-Type": "application/json", |
|
|
} |
|
|
|
|
|
|
|
|
def transcribe(filename, params=None): |
|
|
parameters = params or { |
|
|
"word_timestamps": False, |
|
|
|
|
|
"temperature": [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1], |
|
|
"repetition_penalty": 1.2, |
|
|
"log_prob_threshold": -1.0, |
|
|
} |
|
|
result = call_endpoint(filename, parameters) |
|
|
txt = result.get("text") |
|
|
if txt is None: |
|
|
print("Error: No text returned") |
|
|
print(result) |
|
|
sys.exit(1) |
|
|
|
|
|
print(txt) |
|
|
isodate = datetime.datetime.now().isoformat() |
|
|
isodate = isodate.split(".")[0] |
|
|
isodate = isodate.replace(":", "_") |
|
|
|
|
|
|
|
|
resultfile = os.path.basename(filename) |
|
|
resultfile = f"{resultfile}_{isodate}.txt" |
|
|
|
|
|
with open(resultfile, "w", encoding="utf-8") as f: |
|
|
f.write("----- PARAMETERS -----\n") |
|
|
f.write(json.dumps(parameters, indent=2, ensure_ascii=False)) |
|
|
f.write("\n----- RESULT -----\n") |
|
|
f.write(json.dumps(result, indent=2, ensure_ascii=False)) |
|
|
f.write("\n") |
|
|
|
|
|
|
|
|
def call_endpoint(filename, params=None): |
|
|
with open(filename, "rb") as f: |
|
|
data = f.read() |
|
|
|
|
|
|
|
|
data = base64.b64encode(data).decode("utf-8") |
|
|
|
|
|
if params is None: |
|
|
parameters = { |
|
|
"initial_prompt": "Her går det godt når vi taler om det", |
|
|
"word_timestamps": False, |
|
|
"vad_parameters": dict(min_silence_duration_ms=500), |
|
|
} |
|
|
else: |
|
|
parameters = params |
|
|
|
|
|
print(f"Parameters: {parameters}") |
|
|
payload = { |
|
|
"inputs": f"data:audio/wav,base64,{data}", |
|
|
"language": "da", |
|
|
"parameters": parameters, |
|
|
} |
|
|
json_data = json.dumps(payload) |
|
|
|
|
|
now = datetime.datetime.now() |
|
|
print(f"Sending request to {API_URL}...") |
|
|
response = requests.post(API_URL, headers=headers, data=json_data) |
|
|
elapsed_time = datetime.datetime.now() - now |
|
|
print(f"Elapsed time: {elapsed_time}") |
|
|
|
|
|
return response.json() |
|
|
|
|
|
|
|
|
def convert(filename): |
|
|
|
|
|
output_file = filename.replace(".wav", "_16k.wav") |
|
|
print(f"Converting {filename} to 16khz mono --> {output_file}...") |
|
|
|
|
|
if not os.path.exists(output_file): |
|
|
subprocess.run( |
|
|
[ |
|
|
"ffmpeg", |
|
|
"-i", |
|
|
filename, |
|
|
"-ar", |
|
|
"16000", |
|
|
"-ac", |
|
|
"1", |
|
|
"-c:a", |
|
|
"pcm_s16le", |
|
|
output_file, |
|
|
], |
|
|
check=True, |
|
|
) |
|
|
return output_file |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if sys.argv[1] == "convert": |
|
|
if len(sys.argv) < 2: |
|
|
print("Usage: python test_endpoint.py convert <filename>") |
|
|
exit(1) |
|
|
|
|
|
filename = sys.argv[2] |
|
|
convert(filename) |
|
|
exit(0) |
|
|
|
|
|
|
|
|
if (len(sys.argv) > 1 and sys.argv[1] == "transcribe") or len(sys.argv) < 2: |
|
|
if len(sys.argv) < 2: |
|
|
print("Usage: python test_endpoint.py transcribe <filename>") |
|
|
exit(1) |
|
|
|
|
|
filename = sys.argv[2] |
|
|
output = transcribe(filename) |
|
|
print(output) |
|
|
exit(0) |
|
|
|