Upload folder using huggingface_hub
Browse files- app.py +10 -4
- model.yml +66 -0
- models/__pycache__/audio.cpython-311.pyc +0 -0
- models/audio.py +4 -0
- routes/InferenceRoute.py +15 -0
- routes/__pycache__/AudioTokenizerRoute.cpython-311.pyc +0 -0
- routes/__pycache__/InferenceRoute.cpython-311.pyc +0 -0
- services/__pycache__/AudioTokenizerService.cpython-311.pyc +0 -0
- utils/__pycache__/custom_component.cpython-311.pyc +0 -0
- utils/__pycache__/utils.cpython-311.pyc +0 -0
- utils/utils.py +34 -0
app.py
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
import argparse, os,sys
|
2 |
parser = argparse.ArgumentParser(description="WhisperVQ Application")
|
3 |
-
parser.add_argument('--
|
4 |
default='whisper.log', help='The log file path')
|
5 |
-
parser.add_argument('--
|
6 |
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'TRACE'], help='The log level')
|
7 |
parser.add_argument('--port', type=int, default=3348,
|
8 |
help='The port to run the WhisperVQ app on')
|
9 |
-
parser.add_argument('--
|
10 |
help='The port to run the WhisperVQ app on')
|
11 |
-
parser.add_argument('--
|
12 |
help='The package-dir to be extended to sys.path')
|
13 |
args = parser.parse_args()
|
14 |
sys.path.insert(0, args.package_dir)
|
@@ -34,6 +34,7 @@ logger = logging.getLogger(__name__)
|
|
34 |
|
35 |
from services.AudioTokenizerService import get_audio_tokenizer_service
|
36 |
from routes.AudioTokenizerRoute import audio_tokenizer_router
|
|
|
37 |
|
38 |
@asynccontextmanager
|
39 |
async def lifespan(app: FastAPI):
|
@@ -47,6 +48,7 @@ app = FastAPI(lifespan=lifespan)
|
|
47 |
|
48 |
# include the routes
|
49 |
app.include_router(audio_tokenizer_router)
|
|
|
50 |
|
51 |
def self_terminate():
|
52 |
time.sleep(1)
|
@@ -59,6 +61,10 @@ async def destroy():
|
|
59 |
threading.Thread(target=self_terminate, daemon=True).start()
|
60 |
return {"success": True}
|
61 |
|
|
|
|
|
|
|
|
|
62 |
if __name__ == "__main__":
|
63 |
import uvicorn
|
64 |
from uvicorn.config import LOGGING_CONFIG
|
|
|
1 |
import argparse, os,sys
|
2 |
parser = argparse.ArgumentParser(description="WhisperVQ Application")
|
3 |
+
parser.add_argument('--log_path', type=str,
|
4 |
default='whisper.log', help='The log file path')
|
5 |
+
parser.add_argument('--log_level', type=str, default='INFO',
|
6 |
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'TRACE'], help='The log level')
|
7 |
parser.add_argument('--port', type=int, default=3348,
|
8 |
help='The port to run the WhisperVQ app on')
|
9 |
+
parser.add_argument('--device_id', type=str, default="0",
|
10 |
help='The port to run the WhisperVQ app on')
|
11 |
+
parser.add_argument('--package_dir', type=str, default="",
|
12 |
help='The package-dir to be extended to sys.path')
|
13 |
args = parser.parse_args()
|
14 |
sys.path.insert(0, args.package_dir)
|
|
|
34 |
|
35 |
from services.AudioTokenizerService import get_audio_tokenizer_service
|
36 |
from routes.AudioTokenizerRoute import audio_tokenizer_router
|
37 |
+
from routes.InferenceRoute import audio_inference_router
|
38 |
|
39 |
@asynccontextmanager
|
40 |
async def lifespan(app: FastAPI):
|
|
|
48 |
|
49 |
# include the routes
|
50 |
app.include_router(audio_tokenizer_router)
|
51 |
+
app.include_router(audio_inference_router)
|
52 |
|
53 |
def self_terminate():
|
54 |
time.sleep(1)
|
|
|
61 |
threading.Thread(target=self_terminate, daemon=True).start()
|
62 |
return {"success": True}
|
63 |
|
64 |
+
@app.get("/health")
|
65 |
+
async def health():
|
66 |
+
return {"status": "OK"}
|
67 |
+
|
68 |
if __name__ == "__main__":
|
69 |
import uvicorn
|
70 |
from uvicorn.config import LOGGING_CONFIG
|
model.yml
CHANGED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# BEGIN GENERAL GGUF METADATA
|
2 |
+
id: ichigo-whispervq # Model ID unique between models
|
3 |
+
model: ichigo-whispervq # Model ID which is used for request construct - should be unique between models (author / quantization)
|
4 |
+
name: Ichigo WhisperVQ
|
5 |
+
version: 1 # metadata.version
|
6 |
+
|
7 |
+
# END GENERAL METADATA
|
8 |
+
|
9 |
+
# BEGIN INFERENCE PARAMETERS
|
10 |
+
# BEGIN REQUIRED
|
11 |
+
|
12 |
+
load_model: # method to load python model through API
|
13 |
+
method: post
|
14 |
+
path: /loadmodel
|
15 |
+
transform_request: "" # jinja2 template to transform request
|
16 |
+
transform_response: "" # jinja2 template to transform response
|
17 |
+
|
18 |
+
destroy: # method to destroy python process through API
|
19 |
+
method: delete
|
20 |
+
path: /detroy
|
21 |
+
|
22 |
+
health_check: # method to destroy python process through API
|
23 |
+
method: get
|
24 |
+
path: /health
|
25 |
+
|
26 |
+
inference: # method to do inference python model through API
|
27 |
+
method: post
|
28 |
+
path: /inference
|
29 |
+
transform_request: ""
|
30 |
+
transform_response: ""
|
31 |
+
|
32 |
+
extra_endpoints: # untilities methods
|
33 |
+
- method: post
|
34 |
+
path: /tokenize/wav
|
35 |
+
transform_request: ""
|
36 |
+
transform_response: ""
|
37 |
+
- method: get
|
38 |
+
path: /supported_formats
|
39 |
+
|
40 |
+
# END REQUIRED
|
41 |
+
|
42 |
+
# BEGIN OPTIONAL
|
43 |
+
|
44 |
+
# END OPTIONAL
|
45 |
+
# END INFERENCE PARAMETERS
|
46 |
+
|
47 |
+
# BEGIN SERVER START PARAMETERS
|
48 |
+
# BEGIN REQUIRED
|
49 |
+
files: /home/thuan/cortexcpp/models/cortex.so/whispervq/fp16
|
50 |
+
port: 3348
|
51 |
+
log_path: whisper.log
|
52 |
+
log_level: INFO
|
53 |
+
environment: whispervq # python environment to run model
|
54 |
+
script: app.py
|
55 |
+
command: ["python"] # this is the base command, cortex will automatic find the correct location of python in env and add params when execute command
|
56 |
+
|
57 |
+
engine: python-engine
|
58 |
+
# END REQUIRED
|
59 |
+
|
60 |
+
# BEGIN OPTIONAL
|
61 |
+
extra_params:
|
62 |
+
device_id: "0"
|
63 |
+
package_dir: "" # the package directory to be searched
|
64 |
+
|
65 |
+
# END OPTIONAL
|
66 |
+
# END SERVER START PARAMETERS
|
models/__pycache__/audio.cpython-311.pyc
ADDED
Binary file (1.24 kB). View file
|
|
models/audio.py
CHANGED
@@ -20,3 +20,7 @@ FORMAT_BACKENDS = {
|
|
20 |
AudioFormat.OPUS: ["ffmpeg"],
|
21 |
AudioFormat.PCM: ["soundfile"]
|
22 |
}
|
|
|
|
|
|
|
|
|
|
20 |
AudioFormat.OPUS: ["ffmpeg"],
|
21 |
AudioFormat.PCM: ["soundfile"]
|
22 |
}
|
23 |
+
|
24 |
+
class AudioRequest(BaseModel):
|
25 |
+
data: str
|
26 |
+
format: AudioFormat = "wav"
|
routes/InferenceRoute.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from services.AudioTokenizerService import get_audio_tokenizer_service
|
2 |
+
from fastapi import APIRouter, Depends, HTTPException, status
|
3 |
+
from fastapi import File, UploadFile
|
4 |
+
from models.audio import AudioFormat, FORMAT_BACKENDS, AudioRequest
|
5 |
+
from utils.utils import decode_base64_to_audio
|
6 |
+
import base64
|
7 |
+
|
8 |
+
audio_inference_router = APIRouter(
|
9 |
+
prefix="/audio", tags=["audio"])
|
10 |
+
|
11 |
+
|
12 |
+
@audio_inference_router.post("/inference")
|
13 |
+
async def tokenize_audio(request: AudioRequest):
|
14 |
+
file_obj = decode_base64_to_audio(request.data)
|
15 |
+
return get_audio_tokenizer_service().tokenize(file_obj, request.format)
|
routes/__pycache__/AudioTokenizerRoute.cpython-311.pyc
ADDED
Binary file (1.68 kB). View file
|
|
routes/__pycache__/InferenceRoute.cpython-311.pyc
ADDED
Binary file (1.29 kB). View file
|
|
services/__pycache__/AudioTokenizerService.cpython-311.pyc
ADDED
Binary file (8.88 kB). View file
|
|
utils/__pycache__/custom_component.cpython-311.pyc
ADDED
Binary file (12.8 kB). View file
|
|
utils/__pycache__/utils.cpython-311.pyc
ADDED
Binary file (1.8 kB). View file
|
|
utils/utils.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
|
3 |
+
def decode_base64_to_audio(
|
4 |
+
base64_string: str
|
5 |
+
) -> bytes:
|
6 |
+
"""
|
7 |
+
Decode a base64 string to audio bytes and optionally save to file.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
base64_string (str): Base64 encoded string
|
11 |
+
output_path (Optional[Union[str, Path]]): Path to save the decoded audio file
|
12 |
+
|
13 |
+
Returns:
|
14 |
+
bytes: Decoded audio bytes
|
15 |
+
|
16 |
+
Raises:
|
17 |
+
ValueError: If the base64 string is invalid
|
18 |
+
IOError: If there's an error writing the file
|
19 |
+
"""
|
20 |
+
try:
|
21 |
+
audio_bytes = base64.b64decode(base64_string)
|
22 |
+
return audio_bytes
|
23 |
+
except base64.binascii.Error as e:
|
24 |
+
raise ValueError(f"Invalid base64 string: {e}")
|
25 |
+
except IOError as e:
|
26 |
+
raise IOError(f"Error writing audio file: {e}")
|
27 |
+
|
28 |
+
def encode_audio_to_base64(byte_data: bytes) -> str:
|
29 |
+
|
30 |
+
try:
|
31 |
+
base64_encoded = base64.b64encode(byte_data).decode('utf-8')
|
32 |
+
return base64_encoded
|
33 |
+
except IOError as e:
|
34 |
+
raise IOError(f"Error reading audio file: {e}")
|