mrchan1207 commited on
Commit
4a55f3c
·
verified ·
1 Parent(s): 0a04551

Upload 5 files (#1)

Browse files
Files changed (5) hide show
  1. Dockerfile +29 -0
  2. README.md +3 -4
  3. app.py +110 -0
  4. gitattributes +35 -0
  5. requirements.txt +13 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim
3
+
4
+ RUN mkdir -p /cache && chmod 777 /cache
5
+ ENV HF_HOME="/cache"
6
+ ENV HUGGINGFACE_HUB_CACHE="/cache"
7
+ ENV TRANSFORMERS_CACHE="/cache"
8
+ ENV TRANSFORMERS_CACHE="/cache"
9
+
10
+ ENV LIBROSA_CACHE_DISABLE="1"
11
+
12
+ # Set the working directory in the container
13
+ WORKDIR /code
14
+
15
+ # Copy the requirements file into the container
16
+ COPY ./requirements.txt /code/requirements.txt
17
+
18
+ # Install any needed packages specified in requirements.txt
19
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
20
+
21
+ # Copy the app code into the container
22
+ COPY ./app.py /code/app.py
23
+
24
+ # Expose the port the app runs on. HF Spaces uses 7860.
25
+ EXPOSE 7860
26
+
27
+ # Command to run the app using uvicorn
28
+ # The host must be 0.0.0.0 to be accessible from outside the container
29
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,12 +1,11 @@
1
  ---
2
- title: Wav2vec2
3
- emoji: 🚀
4
  colorFrom: purple
5
- colorTo: purple
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
- short_description: Phoneme detection from audio
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Phoneme Transciptor
3
+ emoji: 💻
4
  colorFrom: purple
5
+ colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  license: mit
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import librosa
3
+ import soundfile as sf
4
+ import io
5
+ from fastapi import FastAPI, File, UploadFile, Request
6
+ from fastapi.responses import JSONResponse
7
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
8
+
9
+ import os
10
+ print("--- SCRIPT START: app.py v3 ---")
11
+ os.environ[ 'NUMBA_CACHE_DIR' ] = '/tmp/'
12
+ os.environ['LIBROSA_CACHE_DISABLE'] = '1'
13
+ print(f"--- LIBROSA_CACHE_DISABLE is set to: {os.environ.get('LIBROSA_CACHE_DISABLE')} ---")
14
+
15
+ # --- 1. Initialize FastAPI App ---
16
+ app = FastAPI(
17
+ title="Audio Transcription API",
18
+ description="An API to transcribe audio files using a Wav2Vec2 model.",
19
+ )
20
+
21
+ # --- 2. Load Model and Processor (with GPU support) ---
22
+
23
+ # <-- CHANGED: Detect if a GPU is available, otherwise use CPU
24
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
+ print(f"Using device: {device}")
26
+
27
+ MODEL_ID = "Bluecast/wav2vec2-Phoneme"
28
+ print(f"Loading model: {MODEL_ID}...")
29
+ try:
30
+ processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
31
+ model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
32
+
33
+ # <-- CHANGED: Move the model to the selected device (GPU or CPU)
34
+ model.to(device)
35
+
36
+ print("Model loaded successfully.")
37
+ except Exception as e:
38
+ print(f"Error loading model: {e}")
39
+ model = None
40
+ processor = None
41
+
42
+ # --- 3. Define the Transcription Endpoint ---
43
+ @app.post("/transcribe/")
44
+ async def transcribe(audio_file: UploadFile = File(...)):
45
+ if not model or not processor:
46
+ return JSONResponse(status_code=503, content={"error": "Model is not loaded."})
47
+
48
+ try:
49
+ contents = await audio_file.read()
50
+ audio_data, original_sr = sf.read(io.BytesIO(contents))
51
+
52
+ if audio_data.ndim > 1:
53
+ audio_data = audio_data.mean(axis=1)
54
+
55
+ resampled_audio = librosa.resample(y=audio_data, orig_sr=original_sr, target_sr=16000)
56
+
57
+ inputs = processor(resampled_audio, sampling_rate=16000, return_tensors="pt", padding=True)
58
+
59
+ # <-- CHANGED: Move the input tensors to the same device as the model
60
+ inputs = inputs.to(device)
61
+
62
+ with torch.no_grad():
63
+ logits = model(**inputs).logits
64
+
65
+ predicted_ids = torch.argmax(logits, dim=-1)
66
+ transcription = processor.batch_decode(predicted_ids)[0]
67
+
68
+ print(f"Transcription complete: {transcription}")
69
+ return {"transcription": transcription}
70
+
71
+ except Exception as e:
72
+ print(f"Error during transcription: {str(e)}")
73
+ return JSONResponse(status_code=500, content={"error": f"An error occurred: {str(e)}"})
74
+
75
+ @app.post("/transcribe_audio/")
76
+ async def transcribe_audio(request: Request):
77
+ if not model or not processor:
78
+ return JSONResponse(status_code=503, content={"error": "Model is not loaded."})
79
+
80
+ try:
81
+ contents = await request.body()
82
+
83
+ audio_data, original_sr = sf.read(io.BytesIO(contents))
84
+ if audio_data.ndim > 1:
85
+ audio_data = audio_data.mean(axis=1)
86
+
87
+ resampled_audio = librosa.resample(y=audio_data, orig_sr=original_sr, target_sr=16000)
88
+
89
+ inputs = processor(resampled_audio, sampling_rate=16000, return_tensors="pt", padding=True)
90
+
91
+ # <-- CHANGED: Move the input tensors to the same device as the model
92
+ inputs = inputs.to(device)
93
+
94
+ with torch.no_grad():
95
+ logits = model(**inputs).logits
96
+
97
+ predicted_ids = torch.argmax(logits, dim=-1)
98
+ transcription = processor.batch_decode(predicted_ids)[0]
99
+
100
+ print(f"Transcription complete: {transcription}")
101
+ return {"transcription": transcription}
102
+
103
+ except Exception as e:
104
+ print(f"Error during transcription: {str(e)}")
105
+ return JSONResponse(status_code=500, content={"error": f"An error occurred: {str(e)}"})
106
+
107
+ # --- 4. Root Endpoint for Health Check ---
108
+ @app.get("/")
109
+ def read_root():
110
+ return {"status": "API is running."}
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart==0.0.20
4
+ soundfile==0.13.1
5
+ librosa==0.10.1
6
+ joblib==1.3.2
7
+
8
+ # For Hugging Face and PyTorch
9
+ transformers==4.40.0
10
+ torch==2.2.1 --extra-index-url https://download.pytorch.org/whl/cu121
11
+ datasets==2.19.1
12
+ tokenizers==0.19.1
13
+ numpy==1.26.4