cortexso
/

whispervq

Model card Files Files and versions Community

jan-ai commited on Dec 11, 2024

Commit

6debc39

verified ·

1 Parent(s): ef193b5

Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

app.py +10 -4
model.yml +66 -0
models/__pycache__/audio.cpython-311.pyc +0 -0
models/audio.py +4 -0
routes/InferenceRoute.py +15 -0
routes/__pycache__/AudioTokenizerRoute.cpython-311.pyc +0 -0
routes/__pycache__/InferenceRoute.cpython-311.pyc +0 -0
services/__pycache__/AudioTokenizerService.cpython-311.pyc +0 -0
utils/__pycache__/custom_component.cpython-311.pyc +0 -0
utils/__pycache__/utils.cpython-311.pyc +0 -0
utils/utils.py +34 -0

app.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import argparse, os,sys
 parser = argparse.ArgumentParser(description="WhisperVQ Application")
-parser.add_argument('--log-path', type=str,
                     default='whisper.log', help='The log file path')
-parser.add_argument('--log-level', type=str, default='INFO',
                     choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'TRACE'], help='The log level')
 parser.add_argument('--port', type=int, default=3348,
                     help='The port to run the WhisperVQ app on')
-parser.add_argument('--device-id', type=str, default="0",
                     help='The port to run the WhisperVQ app on')
-parser.add_argument('--package-dir', type=str, default="",
                     help='The package-dir to be extended to sys.path')
 args = parser.parse_args()
 sys.path.insert(0, args.package_dir)
@@ -34,6 +34,7 @@ logger = logging.getLogger(__name__)
 from services.AudioTokenizerService import get_audio_tokenizer_service
 from routes.AudioTokenizerRoute import audio_tokenizer_router
 @asynccontextmanager
 async def lifespan(app: FastAPI):
@@ -47,6 +48,7 @@ app = FastAPI(lifespan=lifespan)
 # include the routes
 app.include_router(audio_tokenizer_router)
 def self_terminate():
     time.sleep(1)
@@ -59,6 +61,10 @@ async def destroy():
     threading.Thread(target=self_terminate, daemon=True).start()
     return {"success": True}
 if __name__ == "__main__":
     import uvicorn
     from uvicorn.config import LOGGING_CONFIG

 import argparse, os,sys
 parser = argparse.ArgumentParser(description="WhisperVQ Application")
+parser.add_argument('--log_path', type=str,
                     default='whisper.log', help='The log file path')
+parser.add_argument('--log_level', type=str, default='INFO',
                     choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'TRACE'], help='The log level')
 parser.add_argument('--port', type=int, default=3348,
                     help='The port to run the WhisperVQ app on')
+parser.add_argument('--device_id', type=str, default="0",
                     help='The port to run the WhisperVQ app on')
+parser.add_argument('--package_dir', type=str, default="",
                     help='The package-dir to be extended to sys.path')
 args = parser.parse_args()
 sys.path.insert(0, args.package_dir)
 from services.AudioTokenizerService import get_audio_tokenizer_service
 from routes.AudioTokenizerRoute import audio_tokenizer_router
+from routes.InferenceRoute import audio_inference_router
 @asynccontextmanager
 async def lifespan(app: FastAPI):
 # include the routes
 app.include_router(audio_tokenizer_router)
+app.include_router(audio_inference_router)
 def self_terminate():
     time.sleep(1)
     threading.Thread(target=self_terminate, daemon=True).start()
     return {"success": True}
+@app.get("/health")
+async def health():
+    return {"status": "OK"}
 if __name__ == "__main__":
     import uvicorn
     from uvicorn.config import LOGGING_CONFIG

model.yml CHANGED Viewed

	@@ -0,0 +1,66 @@

+# BEGIN GENERAL GGUF METADATA
+id: ichigo-whispervq # Model ID unique between models
+model: ichigo-whispervq # Model ID which is used for request construct - should be unique between models (author / quantization)
+name: Ichigo WhisperVQ
+version: 1 # metadata.version
+# END GENERAL METADATA
+# BEGIN INFERENCE PARAMETERS
+# BEGIN REQUIRED
+load_model: # method to load python model through API
+  method: post
+  path: /loadmodel
+  transform_request: "" # jinja2 template to transform request
+  transform_response: "" # jinja2 template to transform response
+destroy: # method to destroy python process through API
+  method: delete
+  path: /detroy
+health_check: # method to destroy python process through API
+  method: get
+  path: /health
+inference: # method to do inference python model through API
+  method: post
+  path: /inference
+  transform_request: ""
+  transform_response: ""
+extra_endpoints: # untilities methods
+  - method: post
+    path: /tokenize/wav
+    transform_request: ""
+    transform_response: ""
+  - method: get
+    path: /supported_formats
+# END REQUIRED
+# BEGIN OPTIONAL
+# END OPTIONAL
+# END INFERENCE PARAMETERS
+# BEGIN SERVER START PARAMETERS
+# BEGIN REQUIRED
+files: /home/thuan/cortexcpp/models/cortex.so/whispervq/fp16
+port: 3348
+log_path: whisper.log
+log_level: INFO
+environment: whispervq # python environment to run model
+script: app.py
+command: ["python"] # this is the base command, cortex will automatic find the correct location of python in env and add params when execute command
+engine: python-engine
+# END REQUIRED
+# BEGIN OPTIONAL
+extra_params:
+  device_id: "0"
+  package_dir: "" # the package directory to be searched
+# END OPTIONAL
+# END SERVER START PARAMETERS

models/__pycache__/audio.cpython-311.pyc ADDED Viewed

Binary file (1.24 kB). View file

models/audio.py CHANGED Viewed

@@ -20,3 +20,7 @@ FORMAT_BACKENDS = {
     AudioFormat.OPUS: ["ffmpeg"],
     AudioFormat.PCM: ["soundfile"]
 }

     AudioFormat.OPUS: ["ffmpeg"],
     AudioFormat.PCM: ["soundfile"]
 }
+class AudioRequest(BaseModel):
+    data: str
+    format: AudioFormat = "wav"

routes/InferenceRoute.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from services.AudioTokenizerService import get_audio_tokenizer_service
+from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi import File, UploadFile
+from models.audio import AudioFormat, FORMAT_BACKENDS, AudioRequest
+from utils.utils import decode_base64_to_audio
+import base64
+audio_inference_router = APIRouter(
+    prefix="/audio", tags=["audio"])
+@audio_inference_router.post("/inference")
+async def tokenize_audio(request: AudioRequest):
+    file_obj = decode_base64_to_audio(request.data)
+    return get_audio_tokenizer_service().tokenize(file_obj, request.format)

routes/__pycache__/AudioTokenizerRoute.cpython-311.pyc ADDED Viewed

Binary file (1.68 kB). View file

routes/__pycache__/InferenceRoute.cpython-311.pyc ADDED Viewed

Binary file (1.29 kB). View file

services/__pycache__/AudioTokenizerService.cpython-311.pyc ADDED Viewed

Binary file (8.88 kB). View file

utils/__pycache__/custom_component.cpython-311.pyc ADDED Viewed

Binary file (12.8 kB). View file

utils/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (1.8 kB). View file

utils/utils.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import base64
+def decode_base64_to_audio(
+    base64_string: str
+) -> bytes:
+    """
+    Decode a base64 string to audio bytes and optionally save to file.
+    Args:
+        base64_string (str): Base64 encoded string
+        output_path (Optional[Union[str, Path]]): Path to save the decoded audio file
+    Returns:
+        bytes: Decoded audio bytes
+    Raises:
+        ValueError: If the base64 string is invalid
+        IOError: If there's an error writing the file
+    """
+    try:
+        audio_bytes = base64.b64decode(base64_string)
+        return audio_bytes
+    except base64.binascii.Error as e:
+        raise ValueError(f"Invalid base64 string: {e}")
+    except IOError as e:
+        raise IOError(f"Error writing audio file: {e}")
+def encode_audio_to_base64(byte_data: bytes) -> str:
+    try:
+        base64_encoded = base64.b64encode(byte_data).decode('utf-8')
+        return base64_encoded
+    except IOError as e:
+        raise IOError(f"Error reading audio file: {e}")