Spaces:

tykiww
/

diarize_and_transcribe

Running

tykiww commited on Aug 22

Commit

6386953

•

1 Parent(s): c19e19e

Create asr.py

Files changed (1) hide show

services/asr.py ADDED Viewed

+import torch
+from transformers import pipeline
+class Transcriber:
+    def __init__(self, conf):
+        self.conf = conf
+        self.pipeline = self.asr_pipeline()
+    def asr_pipeline(self):
+        return pipeline(
+            self.conf["model"]["asr"]["type"],
+            model=self.conf["model"]["asr"]["transcriber"],
+            device=0 if torch.cuda.is_available() else -1  # Use 0 for GPU, -1 for CPU
+        )
+    def run(self, file_path):
+        kwargs = {"max_new_tokens": self.conf["model"]["asr"]["max_new_tokens"]}
+        output = self.pipeline(
+            file_path,
+            generate_kwargs=kwargs,
+            return_timestamps=True,
+        )
+        print(output)
+        return output.get("chunks", output)  # Use .get to avoid key errors