Faisal-Data commited on
Commit
cad6415
1 Parent(s): a0d3273

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import CLIPProcessor, CLIPModel, pipeline
3
+ import torch
4
+ from PIL import Image
5
+ import scipy.io.wavfile
6
+
7
+ # Load the MusicGen model
8
+ musicgen = pipeline("text-to-audio", model="facebook/musicgen-small")
9
+
10
+ # Load the StreetCLIP model
11
+ model = CLIPModel.from_pretrained("geolocal/StreetCLIP")
12
+ processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP")
13
+
14
+ labels = ['Albania', 'Andorra', 'Argentina', 'Australia', 'Austria', 'Bangladesh', 'Belgium', 'Bermuda', 'Bhutan', 'Bolivia', 'Botswana', 'Brazil', 'Bulgaria', 'Cambodia', 'Canada', 'Chile', 'China', 'Colombia', 'Croatia', 'Czech Republic', 'Denmark', 'Dominican Republic', 'Egypt', 'Ecuador', 'Estonia', 'Finland', 'France', 'Germany', 'Ghana', 'Greece', 'Greenland', 'Guam', 'Guatemala', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Ireland', 'Israel', 'Italy', 'Japan', 'Jordan', 'Kenya', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lesotho', 'Lithuania', 'Luxembourg', 'Macedonia', 'Madagascar', 'Malaysia', 'Malta', 'Mexico', 'Monaco', 'Mongolia', 'Montenegro', 'Netherlands', 'New Zealand', 'Nigeria', 'Norway', 'Pakistan', 'Palestine', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Puerto Rico', 'Romania', 'Russia', 'Rwanda','Saudi Arabia', 'Senegal', 'Serbia', 'Singapore', 'Slovakia', 'Slovenia', 'South Africa', 'South Korea', 'Spain', 'Sri Lanka', 'Swaziland', 'Sweden', 'Switzerland', 'Syria','Taiwan', 'Thailand', 'Tunisia', 'Turkey', 'Uganda', 'Ukraine', 'United Arab Emirates', 'United Kingdom', 'United States', 'Uruguay']
15
+
16
+ def process_image(image, audio_path="musicgen_out.wav"):
17
+ # Ensure the image is in the correct format
18
+ if isinstance(image, str):
19
+ image = Image.open(image)
20
+
21
+ # Process the image and text inputs
22
+ inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
23
+
24
+ # Get the model outputs
25
+ with torch.no_grad():
26
+ outputs = model(**inputs)
27
+ logits_per_image = outputs.logits_per_image
28
+ probs = logits_per_image.softmax(dim=1)
29
+
30
+ # Get the country with the highest probability
31
+ country_index = probs.argmax(dim=1).item()
32
+ country = labels[country_index]
33
+
34
+ # Generate music based on the country
35
+ music_description = f"Traditional music from {country}"
36
+ music = musicgen(music_description, forward_params={"do_sample": True})
37
+
38
+ # Save the generated music to the specified path
39
+ scipy.io.wavfile.write(audio_path, rate=music["sampling_rate"], data=music["audio"])
40
+
41
+ # Return the country and the path to the generated music
42
+ return country, audio_path
43
+
44
+ # Define the Gradio interface
45
+ inputs = gr.Image(type="pil", label="Upload a photo (تحميل صورة)")
46
+ outputs = [gr.Textbox(label="Country (البلد)"), gr.Audio(label="Generated Music (الموسيقى المولدة)")]
47
+
48
+ iface = gr.Interface(
49
+ fn=process_image,
50
+ inputs=inputs,
51
+ outputs=outputs,
52
+ title="Photo to Country and Music Generator محدد الموقع من الصور بالاضافة الى انشاء م",
53
+ description="Upload a photo to identify the country and generate traditional music from that country. (قم بتحميل صورة لتحديد البلد وإنشاء موسيقى تقليدية من هذا البلد.)",
54
+ examples=["/content/Egypt.jfif", "/content/Riyadh.jpeg", "/content/Syria.jfif", "/content/Turkey.jfif"]
55
+ )
56
+
57
+ # Launch the interface
58
+ iface.launch(debug=True)