Spaces:

EACXRLab
/

ECHO_Demo

Runtime error

App Files Files Community

ItsNotSoftware commited on Aug 5, 2024

Commit

f8cf3ad

verified ·

1 Parent(s): 840c4e3

Upload 11 files

Browse files

Files changed (12) hide show

.gitattributes +1 -0
26.wav +3 -0
app.py +60 -14
checkpoint-60/README.md +202 -0
checkpoint-60/adapter_config.json +32 -0
checkpoint-60/adapter_model.safetensors +3 -0
checkpoint-60/optimizer.pt +3 -0
checkpoint-60/preprocessor_config.json +14 -0
checkpoint-60/rng_state.pth +3 -0
checkpoint-60/scheduler.pt +3 -0
checkpoint-60/trainer_state.json +122 -0
checkpoint-60/training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 ariane6_example.mp3 filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 ariane6_example.mp3 filter=lfs diff=lfs merge=lfs -text
+26.wav filter=lfs diff=lfs merge=lfs -text

26.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09a2d84379a713d2517638b6d188a9493221b863339a8de2b06a9b7baa9de866
+size 14582680

app.py CHANGED Viewed

@@ -1,22 +1,68 @@
 import gradio as gr
-import whisper
-MODEL = whisper.load_model("small.en")
-def transcribe(audio):
-    result = MODEL.transcribe(audio)
-    try:
-        return result["text"]
-    except:
-        return ""
-examples = [["apollo11_example.mp3"], ["ariane6_example.mp3"]]
 ui = gr.Interface(
-    fn=transcribe,
     inputs=gr.Audio(
         sources=["microphone", "upload"],
         type="filepath",
@@ -26,12 +72,12 @@ ui = gr.Interface(
         label="Transcription",
         placeholder="The transcribed text will appear here...",
     ),
-    title="ECHO",
     description="""
-    This is a demo of the transcription capabilities of "ECHO". This could be adapded to run real-time transcription on a live audio stream like ISS communications.
     ### How to use:
-    1. **Record or Upload**: Click on the microphone icon 🎙️ to record audio, usign your microphone, or click on the upload button ⬆️ to upload an audio file.
     You can also use the **Examples** provided below, as inputs, by clicking on them.
     2. **Click Submit**: Clicking the submit button will transcribe the audio.
     3. **Read the Transcription**: The transcribed text will appear in the text box below the audio input section.
@@ -39,4 +85,4 @@ ui = gr.Interface(
     examples=examples,
 )
-ui.launch()

 import gradio as gr
+import torch
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+from peft import PeftModel
+import torchaudio
+# Constants
+MODEL = "openai/whisper-small.en"
+ADAPTER_DIR = "./adapter"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+SAMPLE_RATE = 16000
+CHUNK_LENGTH = 30  # Length of each audio chunk in seconds
+# Load processor and model
+processor = WhisperProcessor.from_pretrained(MODEL)
+base_model = WhisperForConditionalGeneration.from_pretrained(MODEL)
+finetuned_model = PeftModel.from_pretrained(base_model, ADAPTER_DIR)
+finetuned_model = finetuned_model.merge_and_unload().to(DEVICE)
+def load_audio(audio_path: str):
+    """Load and preprocess the audio file."""
+    speech_array, sampling_rate = torchaudio.load(audio_path)
+    # Convert stereo to mono by averaging the two channels
+    if speech_array.shape[0] > 1:
+        speech_array = torch.mean(speech_array, dim=0, keepdim=True)
+    # Resample to the model's required sample rate
+    if sampling_rate != SAMPLE_RATE:
+        resampler = torchaudio.transforms.Resample(sampling_rate, SAMPLE_RATE)
+        speech_array = resampler(speech_array)
+    return speech_array.squeeze().numpy()
+def chunk_audio(audio, chunk_length=CHUNK_LENGTH):
+    """Split the audio into chunks of specified length in seconds."""
+    chunk_samples = chunk_length * SAMPLE_RATE
+    return [audio[i : i + chunk_samples] for i in range(0, len(audio), chunk_samples)]
+def transcribe_chunk(chunk):
+    """Transcribe a single audio chunk."""
+    inputs = processor(chunk, sampling_rate=SAMPLE_RATE, return_tensors="pt")
+    input_features = inputs.input_features.to(DEVICE)
+    with torch.no_grad():
+        predicted_ids = finetuned_model.generate(input_features)
+    return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+def transcribe_audio(audio_path: str) -> str:
+    """Transcribe the given audio file using the specified Whisper model."""
+    audio = load_audio(audio_path)
+    audio_chunks = chunk_audio(audio)
+    transcriptions = [transcribe_chunk(chunk) for chunk in audio_chunks]
+    return " ".join(transcriptions)
+examples = [["apollo11_example.mp3"], ["mock_operator_example.wav"]]
 ui = gr.Interface(
+    fn=transcribe_audio,
     inputs=gr.Audio(
         sources=["microphone", "upload"],
         type="filepath",
         label="Transcription",
         placeholder="The transcribed text will appear here...",
     ),
+    title="ECHO V0.1",
     description="""
+    This is a demo of the transcription capabilities of "ECHO". This could be adapted to run real-time transcription on a live audio stream like ISS communications.
     ### How to use:
+    1. **Record or Upload**: Click on the microphone icon 🎙️ to record audio, using your microphone, or click on the upload button ⬆️ to upload an audio file.
     You can also use the **Examples** provided below, as inputs, by clicking on them.
     2. **Click Submit**: Clicking the submit button will transcribe the audio.
     3. **Read the Transcription**: The transcribed text will appear in the text box below the audio input section.
     examples=examples,
 )
+ui.launch(share=False)

checkpoint-60/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: openai/whisper-small.en
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.11.1

checkpoint-60/adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": {
+    "base_model_class": "WhisperForConditionalGeneration",
+    "parent_library": "transformers.models.whisper.modeling_whisper"
+  },
+  "base_model_name_or_path": "openai/whisper-small.en",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": null,
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-60/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b15e3353f37af4d1d7ea1e44c080cec9113f64cb7c6a493493c4c58d2bc1727
+size 14176064

checkpoint-60/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbd9b9489f49bc8b005817941551fb0de48c78b22311090f390a56d914770267
+size 28432570

checkpoint-60/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "chunk_length": 30,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

checkpoint-60/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:70af4d9ff66efb9b200a49233a3dcb06212cd5c42860c820af8b3b0f975f616d
+size 14244

checkpoint-60/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6574cff4022f3e678230ab559d2aabe9a67f1acea6716a875a3ad2c85bea47c
+size 1064

checkpoint-60/trainer_state.json ADDED Viewed

	@@ -0,0 +1,122 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 60,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0,
+      "eval_loss": 2.356511354446411,
+      "eval_runtime": 27.7264,
+      "eval_samples_per_second": 15.4,
+      "eval_steps_per_second": 0.252,
+      "eval_wer": 22.12566844919786,
+      "step": 0
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.6522291302680969,
+      "learning_rate": 0.0008,
+      "loss": 0.8625,
+      "step": 12
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.45175907015800476,
+      "eval_runtime": 27.2705,
+      "eval_samples_per_second": 15.658,
+      "eval_steps_per_second": 0.257,
+      "eval_wer": 17.290552584670234,
+      "step": 12
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.39275336265563965,
+      "learning_rate": 0.0006,
+      "loss": 0.2741,
+      "step": 24
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.4212001860141754,
+      "eval_runtime": 27.8396,
+      "eval_samples_per_second": 15.338,
+      "eval_steps_per_second": 0.251,
+      "eval_wer": 18.137254901960784,
+      "step": 24
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.35100314021110535,
+      "learning_rate": 0.0004,
+      "loss": 0.1835,
+      "step": 36
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.4116327464580536,
+      "eval_runtime": 27.3845,
+      "eval_samples_per_second": 15.593,
+      "eval_steps_per_second": 0.256,
+      "eval_wer": 17.446524064171122,
+      "step": 36
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.23920069634914398,
+      "learning_rate": 0.0002,
+      "loss": 0.1354,
+      "step": 48
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.41445085406303406,
+      "eval_runtime": 27.6194,
+      "eval_samples_per_second": 15.46,
+      "eval_steps_per_second": 0.253,
+      "eval_wer": 18.070409982174688,
+      "step": 48
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.18301299214363098,
+      "learning_rate": 0.0,
+      "loss": 0.1074,
+      "step": 60
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.41686326265335083,
+      "eval_runtime": 27.6106,
+      "eval_samples_per_second": 15.465,
+      "eval_steps_per_second": 0.254,
+      "eval_wer": 17.713903743315505,
+      "step": 60
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 60,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.1145212153856e+18,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-60/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2adcdff3799c0e56a840f3713150b8368783a95ba20e632256f7506c42c60001
+size 5240