akhaliq HF staff commited on
Commit
2029b71
1 Parent(s): a536a59

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import tensorflow_hub as hub
3
+ import numpy as np
4
+ import csv
5
+
6
+ import matplotlib.pyplot as plt
7
+ from IPython.display import Audio
8
+ from scipy.io import wavfile
9
+
10
+ # Load the model.
11
+ model = hub.load('https://tfhub.dev/google/yamnet/1')
12
+
13
+ # Find the name of the class with the top score when mean-aggregated across frames.
14
+ def class_names_from_csv(class_map_csv_text):
15
+ """Returns list of class names corresponding to score vector."""
16
+ class_names = []
17
+ with tf.io.gfile.GFile(class_map_csv_text) as csvfile:
18
+ reader = csv.DictReader(csvfile)
19
+ for row in reader:
20
+ class_names.append(row['display_name'])
21
+
22
+ return class_names
23
+
24
+ class_map_path = model.class_map_path().numpy()
25
+ class_names = class_names_from_csv(class_map_path)
26
+
27
+
28
+ def ensure_sample_rate(original_sample_rate, waveform,
29
+ desired_sample_rate=16000):
30
+ """Resample waveform if required."""
31
+ if original_sample_rate != desired_sample_rate:
32
+ desired_length = int(round(float(len(waveform)) /
33
+ original_sample_rate * desired_sample_rate))
34
+ waveform = scipy.signal.resample(waveform, desired_length)
35
+ return desired_sample_rate, waveform
36
+
37
+ os.system("wget https://storage.googleapis.com/audioset/miaow_16k.wav")
38
+
39
+ def inference(audio):
40
+ # wav_file_name = 'speech_whistling2.wav'
41
+ wav_file_name = audio
42
+ sample_rate, wav_data = wavfile.read(wav_file_name, 'rb')
43
+ sample_rate, wav_data = ensure_sample_rate(sample_rate, wav_data)
44
+
45
+ waveform = wav_data / tf.int16.max
46
+
47
+ # Run the model, check the output.
48
+ scores, embeddings, spectrogram = model(waveform)
49
+
50
+ scores_np = scores.numpy()
51
+ spectrogram_np = spectrogram.numpy()
52
+ infered_class = class_names[scores_np.mean(axis=0).argmax()]
53
+
54
+ return f'The main sound is: {infered_class}'
55
+
56
+ gr.Interface(inference,"audio","text").launch()
57
+