Slower-whisper

Sleeping

App Files Files Community

aadnk commited on Jul 15, 2023

Commit

43c1beb

•

2 Parent(s): 76110f9 e8a76fc

Merge branch 'main' of https://huggingface.co/spaces/aadnk/whisper-webui

Browse files

Files changed (4) hide show

README.md +1 -1
app.py +1 -0
src/whisper/dummyWhisperContainer.py +101 -0
src/whisper/whisperFactory.py +4 -0

README.md CHANGED Viewed

@@ -71,7 +71,7 @@ pip install -r requirements-fasterWhisper.txt
 ```
 And then run the App or the CLI with the `--whisper_implementation faster-whisper` flag:
 ```
-python app.py --whisper_implementation faster-whisper --input_audio_max_duration -1 --server_name 127.0.0.1 --auto_parallel True
 ```
 You can also select the whisper implementation in `config.json5`:
 ```json5

 ```
 And then run the App or the CLI with the `--whisper_implementation faster-whisper` flag:
 ```
+python app.py --whisper_implementation faster-whisper --input_audio_max_duration -1 --server_name 127.0.0.1 --server_port 7860 --auto_parallel True
 ```
 You can also select the whisper implementation in `config.json5`:
 ```json5

app.py CHANGED Viewed

@@ -624,4 +624,5 @@ if __name__ == '__main__':
     if (threads := args.pop("threads")) > 0:
         torch.set_num_threads(threads)
     create_ui(app_config=updated_config)

     if (threads := args.pop("threads")) > 0:
         torch.set_num_threads(threads)
+    print("Using whisper implementation: " + updated_config.whisper_implementation)
     create_ui(app_config=updated_config)

src/whisper/dummyWhisperContainer.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from typing import List
+import ffmpeg
+from src.config import ModelConfig
+from src.hooks.progressListener import ProgressListener
+from src.modelCache import ModelCache
+from src.prompts.abstractPromptStrategy import AbstractPromptStrategy
+from src.whisper.abstractWhisperContainer import AbstractWhisperCallback, AbstractWhisperContainer
+class DummyWhisperContainer(AbstractWhisperContainer):
+    def __init__(self, model_name: str, device: str = None, compute_type: str = "float16",
+                       download_root: str = None,
+                       cache: ModelCache = None, models: List[ModelConfig] = []):
+        super().__init__(model_name, device, compute_type, download_root, cache, models)
+    def ensure_downloaded(self):
+        """
+        Ensure that the model is downloaded. This is useful if you want to ensure that the model is downloaded before
+        passing the container to a subprocess.
+        """
+        print("[Dummy] Ensuring that the model is downloaded")
+    def _create_model(self):
+        print("[Dummy] Creating dummy whisper model " + self.model_name + " for device " + str(self.device))
+        return None
+    def create_callback(self, language: str = None, task: str = None,
+                        prompt_strategy: AbstractPromptStrategy = None,
+                        **decodeOptions: dict) -> AbstractWhisperCallback:
+        """
+        Create a WhisperCallback object that can be used to transcript audio files.
+        Parameters
+        ----------
+        language: str
+            The target language of the transcription. If not specified, the language will be inferred from the audio content.
+        task: str
+            The task - either translate or transcribe.
+        prompt_strategy: AbstractPromptStrategy
+            The prompt strategy to use. If not specified, the prompt from Whisper will be used.
+        decodeOptions: dict
+            Additional options to pass to the decoder. Must be pickleable.
+        Returns
+        -------
+        A WhisperCallback object.
+        """
+        return DummyWhisperCallback(self, language=language, task=task, prompt_strategy=prompt_strategy, **decodeOptions)
+class DummyWhisperCallback(AbstractWhisperCallback):
+    def __init__(self, model_container: DummyWhisperContainer, **decodeOptions: dict):
+        self.model_container = model_container
+        self.decodeOptions = decodeOptions
+    def invoke(self, audio, segment_index: int, prompt: str, detected_language: str, progress_listener: ProgressListener = None):
+        """
+        Peform the transcription of the given audio file or data.
+        Parameters
+        ----------
+        audio: Union[str, np.ndarray, torch.Tensor]
+            The audio file to transcribe, or the audio data as a numpy array or torch tensor.
+        segment_index: int
+            The target language of the transcription. If not specified, the language will be inferred from the audio content.
+        task: str
+            The task - either translate or transcribe.
+        progress_listener: ProgressListener
+            A callback to receive progress updates.
+        """
+        print("[Dummy] Invoking dummy whisper callback for segment " + str(segment_index))
+        # Estimate length
+        if isinstance(audio, str):
+            audio_length = ffmpeg.probe(audio)["format"]["duration"]
+        # Format is pcm_s16le at a sample rate of 16000, loaded as a float32 array.
+        else:
+            audio_length = len(audio) / 16000
+        # Convert the segments to a format that is easier to serialize
+        whisper_segments = [{
+            "text": "Dummy text for segment " + str(segment_index),
+            "start": 0,
+            "end": audio_length,
+            # Extra fields added by faster-whisper
+            "words": []
+        }]
+        result = {
+            "segments": whisper_segments,
+            "text": "Dummy text for segment " + str(segment_index),
+            "language": "en" if detected_language is None else detected_language,
+            # Extra fields added by faster-whisper
+            "language_probability": 1.0,
+            "duration": audio_length,
+        }
+        if progress_listener is not None:
+            progress_listener.on_finished()
+        return result

src/whisper/whisperFactory.py CHANGED Viewed

@@ -15,5 +15,9 @@ def create_whisper_container(whisper_implementation: str,
     elif (whisper_implementation == "faster-whisper" or whisper_implementation == "faster_whisper"):
         from src.whisper.fasterWhisperContainer import FasterWhisperContainer
         return FasterWhisperContainer(model_name=model_name, device=device, compute_type=compute_type, download_root=download_root, cache=cache, models=models)
     else:
         raise ValueError("Unknown Whisper implementation: " + whisper_implementation)

     elif (whisper_implementation == "faster-whisper" or whisper_implementation == "faster_whisper"):
         from src.whisper.fasterWhisperContainer import FasterWhisperContainer
         return FasterWhisperContainer(model_name=model_name, device=device, compute_type=compute_type, download_root=download_root, cache=cache, models=models)
+    elif (whisper_implementation == "dummy-whisper" or whisper_implementation == "dummy_whisper" or whisper_implementation == "dummy"):
+        # This is useful for testing
+        from src.whisper.dummyWhisperContainer import DummyWhisperContainer
+        return DummyWhisperContainer(model_name=model_name, device=device, compute_type=compute_type, download_root=download_root, cache=cache, models=models)
     else:
         raise ValueError("Unknown Whisper implementation: " + whisper_implementation)