Spaces:
Sleeping
Sleeping
Update Traffic_Signs_Classification.py
Browse files
Traffic_Signs_Classification.py
CHANGED
@@ -3,17 +3,17 @@ from PIL import Image
|
|
3 |
import torch
|
4 |
from transformers import AutoImageProcessor
|
5 |
import pandas as pd
|
6 |
-
from transformers import pipeline
|
7 |
from transformers import ViTForImageClassification
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
model= ViTForImageClassification.from_pretrained('Rae1230/Traffic_Signs_Classification')
|
12 |
-
processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
|
13 |
|
14 |
# Streamlit application title
|
15 |
st.title("Speech the Traffic Signs")
|
16 |
|
|
|
|
|
|
|
17 |
|
18 |
uploaded_file = st.file_uploader("Choose a PNG image...", type="png", accept_multiple_files=False)
|
19 |
if uploaded_file is not None:
|
@@ -33,4 +33,18 @@ if uploaded_file is not None:
|
|
33 |
text_col = df['Name']
|
34 |
|
35 |
text_value = text_col.loc[num_col == img_class_idx].values[0]
|
36 |
-
st.write("Predicted class:", text_value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import torch
|
4 |
from transformers import AutoImageProcessor
|
5 |
import pandas as pd
|
|
|
6 |
from transformers import ViTForImageClassification
|
7 |
+
from transformers import VitsModel, AutoTokenizer
|
8 |
+
import torch
|
9 |
+
from IPython.display import Audio
|
|
|
|
|
10 |
|
11 |
# Streamlit application title
|
12 |
st.title("Speech the Traffic Signs")
|
13 |
|
14 |
+
#Traffic Sign Classification
|
15 |
+
model= ViTForImageClassification.from_pretrained('Rae1230/Traffic_Signs_Classification')
|
16 |
+
processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
|
17 |
|
18 |
uploaded_file = st.file_uploader("Choose a PNG image...", type="png", accept_multiple_files=False)
|
19 |
if uploaded_file is not None:
|
|
|
33 |
text_col = df['Name']
|
34 |
|
35 |
text_value = text_col.loc[num_col == img_class_idx].values[0]
|
36 |
+
st.write("Predicted class:", text_value)
|
37 |
+
|
38 |
+
|
39 |
+
#speech the Traffic Sign
|
40 |
+
|
41 |
+
model2 = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
42 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
|
43 |
+
|
44 |
+
text = text_value
|
45 |
+
inputs = tokenizer(text, return_tensors="pt")
|
46 |
+
|
47 |
+
with torch.no_grad():
|
48 |
+
output = model2(**inputs).waveform
|
49 |
+
|
50 |
+
Audio(output.numpy(), rate=model2.config.sampling_rate)
|