Hugo Rodrigues commited on
Commit
99797ef
1 Parent(s): b2430fa

audio endpoint

Browse files
Files changed (4) hide show
  1. .gitignore +3 -1
  2. README.md +35 -0
  3. main.py +30 -2
  4. requirements.txt +2 -1
.gitignore CHANGED
@@ -19,4 +19,6 @@ __pycache__/
19
  .gdb_history
20
  .vscode/
21
  # Other
22
- .DS_Store
 
 
 
19
  .gdb_history
20
  .vscode/
21
  # Other
22
+ .DS_Store
23
+
24
+ *.wav
README.md CHANGED
@@ -25,3 +25,38 @@ VS Code Python select interpreter hf
25
  ```
26
  docker compose up --build
27
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  ```
26
  docker compose up --build
27
  ```
28
+
29
+ ## Tests
30
+
31
+ Translate from English (eng) to Portuguese (por) the following text: "we the people of the united states in order to form a more perfect union establish justice ensure domestic tranquillity provide for the common defense"
32
+
33
+ mac book pro M1 16GB device = cpu.
34
+
35
+ - Do not run. Not enougth memory
36
+
37
+ HF CPU free
38
+
39
+ - 8.99 sec
40
+ - 9.06 sec
41
+ - 8.77 sec
42
+
43
+ T4 small
44
+
45
+ - 1.18 sec
46
+ - 1.12 sec
47
+ - 1.12 sec
48
+
49
+ A10G small
50
+
51
+ - 1.02 sec
52
+ - 1.00 sec
53
+ - 1.06 sec
54
+ - 1.01 sec
55
+
56
+ 2xA10G large
57
+
58
+ - 0.97 sec
59
+ - 0.95 sec
60
+ - 0.95 sec
61
+ - 0.95 sec
62
+ - 0.95 sec
main.py CHANGED
@@ -1,9 +1,12 @@
1
  import time
 
 
2
 
3
  # from typing import Union
4
  # from pydantic import BaseModel
5
  from fastapi import FastAPI
6
  from fastapi.middleware.cors import CORSMiddleware
 
7
 
8
  # from fastapi.staticfiles import StaticFiles
9
  # from fastapi.responses import FileResponse
@@ -14,8 +17,11 @@ import torch
14
  from transformers import SeamlessM4Tv2Model
15
  from transformers import AutoProcessor
16
 
17
- processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
18
- model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
 
 
 
19
 
20
 
21
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -63,3 +69,25 @@ def transcribe(inputs, src_lang="eng", tgt_lang="por"):
63
  print("Time took to process the request and return response is {} sec".format(
64
  time.time() - start_time))
65
  return translated_text_from_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import time
2
+ from scipy.io.wavfile import write
3
+
4
 
5
  # from typing import Union
6
  # from pydantic import BaseModel
7
  from fastapi import FastAPI
8
  from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.responses import FileResponse
10
 
11
  # from fastapi.staticfiles import StaticFiles
12
  # from fastapi.responses import FileResponse
 
17
  from transformers import SeamlessM4Tv2Model
18
  from transformers import AutoProcessor
19
 
20
+ model_name = "facebook/seamless-m4t-v2-large"
21
+ # model_name = "facebook/hf-seamless-m4t-medium"
22
+
23
+ processor = AutoProcessor.from_pretrained(model_name)
24
+ model = SeamlessM4Tv2Model.from_pretrained(model_name)
25
 
26
 
27
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
69
  print("Time took to process the request and return response is {} sec".format(
70
  time.time() - start_time))
71
  return translated_text_from_text
72
+
73
+
74
+ @app.get("/audio")
75
+ async def audio(inputs, src_lang="eng", tgt_lang="por", speaker_id=5):
76
+ start_time = time.time()
77
+
78
+ if inputs is None:
79
+ raise "No audio file submitted! Please upload or record an audio file before submitting your request."
80
+
81
+ text_inputs = processor(text=inputs,
82
+ src_lang=src_lang, return_tensors="pt").to(device)
83
+
84
+ audio_array_from_text = model.generate(
85
+ **text_inputs, tgt_lang=tgt_lang, speaker_id=int(speaker_id))[0].cpu().numpy().squeeze()
86
+
87
+ print("Time took to process the request and return response is {} sec".format(
88
+ time.time() - start_time))
89
+
90
+ write("output.wav", model.config.sampling_rate,
91
+ audio_array_from_text)
92
+
93
+ return FileResponse('output.wav', media_type="audio/mpeg")
requirements.txt CHANGED
@@ -7,4 +7,5 @@ sentencepiece
7
  protobuf
8
  torch
9
  uvicorn[standard]
10
- ffmpeg
 
 
7
  protobuf
8
  torch
9
  uvicorn[standard]
10
+ ffmpeg
11
+ scipy