|
<!DOCTYPE html> |
|
<html lang="en"> |
|
|
|
<head> |
|
<meta charset="UTF-8"> |
|
<title>Automatic Speech Recognition - Hugging Face Transformers.js</title> |
|
|
|
<script type="module"> |
|
|
|
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.5.4'; |
|
|
|
|
|
window.pipeline = pipeline; |
|
</script> |
|
|
|
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.0/dist/css/bootstrap.min.css" rel="stylesheet"> |
|
|
|
<link rel="stylesheet" href="css/styles.css"> |
|
</head> |
|
|
|
<body> |
|
<div class="container-main"> |
|
|
|
<div class="header"> |
|
<div class="header-logo"> |
|
<img src="images/logo.png" alt="logo"> |
|
</div> |
|
<div class="header-main-text"> |
|
<h1>Hugging Face Transformers.js</h1> |
|
</div> |
|
<div class="header-sub-text"> |
|
<h3>Free AI Models for JavaScript Web Development</h3> |
|
</div> |
|
</div> |
|
<hr> |
|
|
|
|
|
<div class="row mt-5"> |
|
<div class="col-md-12 text-center"> |
|
<a href="index.html" class="btn btn-outline-secondary" |
|
style="color: #3c650b; border-color: #3c650b;">Back to Main Page</a> |
|
</div> |
|
</div> |
|
|
|
|
|
<div class="container mt-5"> |
|
|
|
<div class="text-center"> |
|
<h2>Audio</h2> |
|
<h4>Automatic Speech Recognition - English</h4> |
|
</div> |
|
|
|
|
|
<div id="transcribe-english-container" class="container mt-4"> |
|
<h5>Transcribe English:</h5> |
|
<div class="d-flex align-items-center"> |
|
<label for="transcribeEnglishURLText" class="mb-0 text-nowrap" style="margin-right: 15px;">Enter |
|
audio URL:</label> |
|
<input type="text" class="form-control flex-grow-1" id="transcribeEnglishURLText" |
|
value="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav" |
|
placeholder="Enter audio" style="margin-right: 15px; margin-left: 15px;"> |
|
<button id="TranscribeEnglishButton" class="btn btn-primary" |
|
onclick="transcribeEnglish()">Transcribe</button> |
|
</div> |
|
<div class="mt-4"> |
|
<h4>Output:</h4> |
|
<pre id="outputArea"></pre> |
|
</div> |
|
</div> |
|
|
|
<hr> |
|
|
|
<div id="transcribe-english-local-container" class="container mt-4"> |
|
<h5>Transcribe English a Local File:</h5> |
|
<div class="d-flex align-items-center"> |
|
<label for="transcribeEnglishLocalFile" class="mb-0 text-nowrap" style="margin-right: 15px;">Select |
|
Local Audio:</label> |
|
<input type="file" id="transcribeEnglishLocalFile" accept="audio/*" /> |
|
<button id="TranscribeEnglishButtonLocal" class="btn btn-primary" |
|
onclick="transcribeEnglishLocal()">Transcribe</button> |
|
</div> |
|
<div class="mt-4"> |
|
<h4>Output:</h4> |
|
<pre id="outputAreaLocal"></pre> |
|
</div> |
|
</div> |
|
|
|
<hr> |
|
|
|
<div id="transcribe-english-timestamps-container" class="container mt-4"> |
|
<h5>Transcribe English with Timestamps:</h5> |
|
<div class="d-flex align-items-center"> |
|
<label for="transcribeEnglishTimestampsURLText" class="mb-0 text-nowrap" |
|
style="margin-right: 15px;">Enter |
|
audio URL:</label> |
|
<input type="text" class="form-control flex-grow-1" id="transcribeEnglishTimestampsURLText" |
|
value="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav" |
|
placeholder="Enter audio" style="margin-right: 15px; margin-left: 15px;"> |
|
<button id="TranscribeEnglishTimestampsButton" class="btn btn-primary" |
|
onclick="transcribeEnglishTimestamps()">Transcribe</button> |
|
</div> |
|
<div class="mt-4"> |
|
<h4>Output:</h4> |
|
<pre id="outputAreaTimestamps"></pre> |
|
</div> |
|
</div> |
|
|
|
<hr> |
|
|
|
<div id="transcribe-english-word-level-timestamps-container" class="container mt-4"> |
|
<h5>Transcribe English with Word-level Timestamps:</h5> |
|
<div class="d-flex align-items-center"> |
|
<label for="transcribeEnglishWordlevelTimestampsURLText" class="mb-0 text-nowrap" |
|
style="margin-right: 15px;">Enter |
|
audio URL:</label> |
|
<input type="text" class="form-control flex-grow-1" id="transcribeEnglishWordlevelTimestampsURLText" |
|
value="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav" |
|
placeholder="Enter audio" style="margin-right: 15px; margin-left: 15px;"> |
|
<button id="TranscribeEnglishWord-levelTimestampsButton" class="btn btn-primary" |
|
onclick="transcribeEnglishWordlevelTimestamps()">Transcribe</button> |
|
</div> |
|
<div class="mt-4"> |
|
<h4>Output:</h4> |
|
<pre id="outputAreaWordlevelTimestamps"></pre> |
|
</div> |
|
</div> |
|
|
|
<hr> |
|
|
|
<div id="transcribe-english-30-container" class="container mt-4"> |
|
<h5>Transcribe/Translate Audio Longer Than 30 Seconds:</h5> |
|
<div class="d-flex align-items-center"> |
|
<label for="transcribeEnglish30URLText" class="mb-0 text-nowrap" style="margin-right: 15px;">Enter |
|
audio URL:</label> |
|
<input type="text" class="form-control flex-grow-1" id="transcribeEnglish30URLText" |
|
value="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/ted_60.wav" |
|
placeholder="Enter audio" style="margin-right: 15px; margin-left: 15px;"> |
|
<button id="TranscribeEnglish30Button" class="btn btn-primary" |
|
onclick="transcribeEnglish30()">Transcribe</button> |
|
</div> |
|
<div class="mt-4"> |
|
<h4>Output:</h4> |
|
<pre id="outputArea30"></pre> |
|
</div> |
|
</div> |
|
|
|
|
|
<div class="row mt-5"> |
|
<div class="col-md-12 text-center"> |
|
<a href="index.html" class="btn btn-outline-secondary" |
|
style="color: #3c650b; border-color: #3c650b;">Back to Main Page</a> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
<script> |
|
|
|
let transcriber; |
|
|
|
|
|
async function initializeModel() { |
|
transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en'); |
|
|
|
} |
|
|
|
async function transcribeEnglish() { |
|
const textFieldValue = document.getElementById("transcribeEnglishURLText").value.trim(); |
|
|
|
let result = await transcriber(textFieldValue); |
|
|
|
document.getElementById("outputArea").innerText = JSON.stringify(result, null, 2); |
|
} |
|
|
|
async function transcribeEnglishLocal() { |
|
const fileInput = document.getElementById("transcribeEnglishLocalFile"); |
|
const file = fileInput.files[0]; |
|
|
|
if (!file) { |
|
alert('Please select an audio file first.'); |
|
return; |
|
} |
|
|
|
|
|
const url = URL.createObjectURL(file); |
|
|
|
let result = await transcriber(url); |
|
|
|
document.getElementById("outputAreaLocal").innerText = JSON.stringify(result, null, 2); |
|
} |
|
|
|
async function transcribeEnglishTimestamps() { |
|
const textFieldValue = document.getElementById("transcribeEnglishTimestampsURLText").value.trim(); |
|
|
|
let result = await transcriber(textFieldValue, { return_timestamps: true }); |
|
|
|
document.getElementById("outputAreaTimestamps").innerText = JSON.stringify(result, null, 2); |
|
} |
|
|
|
async function transcribeEnglishWordlevelTimestamps() { |
|
const textFieldValue = document.getElementById("transcribeEnglishWordlevelTimestampsURLText").value.trim(); |
|
|
|
let result = await transcriber(textFieldValue, { return_timestamps: 'word' }); |
|
|
|
document.getElementById("outputAreaWordlevelTimestamps").innerText = JSON.stringify(result, null, 2); |
|
} |
|
|
|
|
|
async function transcribeEnglish30() { |
|
const textFieldValue = document.getElementById("transcribeEnglish30URLText").value.trim(); |
|
|
|
let result = await transcriber(textFieldValue, { chunk_length_s: 30, stride_length_s: 5 }); |
|
|
|
document.getElementById("outputArea30").innerText = JSON.stringify(result, null, 2); |
|
} |
|
|
|
|
|
window.addEventListener("DOMContentLoaded", initializeModel); |
|
</script> |
|
</body> |
|
|
|
</html> |