Josh Cox commited on
Commit
158667b
1 Parent(s): 78e3b34
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ artist
__pycache__/artist_lib.cpython-311.pyc ADDED
Binary file (11.1 kB). View file
 
app.py CHANGED
@@ -1,7 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
1
+ import argparse
2
+ import binascii
3
+ import glob
4
+ import os
5
+ import os.path
6
+ import numpy as np
7
+ import matplotlib.pyplot as plt
8
+ import random
9
+ import sys
10
+ import tempfile
11
+ import time
12
+ import torch
13
+ from PIL import Image
14
+ from diffusers import StableDiffusionPipeline
15
+
16
  import gradio as gr
17
 
18
+ import artist_lib
19
+
20
+ from dotenv import load_dotenv
21
+ load_dotenv()
22
+ SERVER_NAME = os.getenv("SERVER_NAME")
23
+
24
+ drawdemo = gr.Interface(
25
+ fn=artist_lib.draw,
26
+ inputs=[
27
+ gr.Text(label="Drawing description text", value="hindu mandala neon orange and blue"),
28
+ gr.Dropdown(label='Model', choices=["stable-diffusion-2", "stable-diffusion-2-1", "stable-diffusion-v1-5"], value="stable-diffusion-v1-5"),
29
+ gr.Checkbox(label="Force-New"),
30
+ ],
31
+ outputs="image",
32
+ examples=[
33
+ ['van gogh dogs playing poker', "stable-diffusion-v1-5", False],
34
+ ['picasso the scream', "stable-diffusion-v1-5", False],
35
+ ['dali american gothic', "stable-diffusion-v1-5", False],
36
+ ['matisse mona lisa', "stable-diffusion-v1-5", False],
37
+ ['maxfield parrish angel in lake ', "stable-diffusion-v1-5", False],
38
+ ['peter max dogs playing poker', "stable-diffusion-v1-5", False],
39
+ ['hindu mandala copper and patina green', "stable-diffusion-v1-5", False],
40
+ ['hindu mandala fruit salad', "stable-diffusion-v1-5", False],
41
+ ['hindu mandala neon green black and purple', "stable-diffusion-v1-5", False],
42
+ ['astronaut riding a horse on mars', "stable-diffusion-v1-5", False]
43
+ ],
44
+ )
45
+
46
+ AudioDemo = gr.Interface(
47
+ fn=artist_lib.generate_tone,
48
+ inputs=[
49
+ gr.Dropdown(artist_lib.notes, type="index"),
50
+ gr.Slider(4, 6, step=1),
51
+ gr.Textbox(value=1, label="Duration in seconds")
52
+ ],
53
+ outputs="audio"
54
+ )
55
+
56
+ imageClassifierDemo = gr.Interface(
57
+ fn=artist_lib.imageClassifier,
58
+ inputs="image",
59
+ outputs="text"
60
+ )
61
+
62
+ audioGeneratorDemo = gr.Interface(
63
+ fn=artist_lib.audioGenerator,
64
+ inputs="text",
65
+ outputs="audio",
66
+ examples=[
67
+ ['balsamic beats'],
68
+ ['dance the night away']
69
+ ]
70
+ )
71
+
72
+ nameMyPetDemo = gr.Interface(
73
+ fn=artist_lib.nameMyPet,
74
+ inputs=[
75
+ gr.Text(label="What type of animal is your pet?", value="green cat")
76
+ ],
77
+ outputs="text",
78
+ examples=[
79
+ ['dog'],
80
+ ['pink dolphin'],
81
+ ['elevated elephant'],
82
+ ['green monkey'],
83
+ ['bionic beaver'],
84
+ ['felonous fish'],
85
+ ['delinquent dog'],
86
+ ['dragging donkey'],
87
+ ['stinky skunk'],
88
+ ['pink unicorn'],
89
+ ['naughty narwahl'],
90
+ ['blue cat']
91
+ ],
92
+ )
93
+
94
+ blog_writer_demo = gr.Interface(
95
+ fn=artist_lib.write_blog,
96
+ inputs=[
97
+ gr.Text(label="Blog description text", value="machine learning can be used to track chickens"),
98
+ gr.Dropdown(label='Model', choices=["gpt-neo-1.3B", "gpt-neo-2.7B"], value="gpt-neo-1.3B"),
99
+ gr.Number(label='Minimum word count', value=50, precision=0),
100
+ gr.Number(label='Maximum word count', value=50, precision=0),
101
+ gr.Checkbox(label="Force-New"),
102
+ ],
103
+ outputs="text",
104
+ examples=[
105
+ ['machine learning can be used to track chickens', "gpt-neo-1.3B", 50, 50, False],
106
+ ['music and machine learning', "gpt-neo-2.7B", 50, 50, False]
107
+ ],
108
+ )
109
+
110
+ generateAudioDemo = gr.Interface(
111
+ fn=artist_lib.generate_spectrogram_audio_and_loop,
112
+ title="Audio Diffusion",
113
+ description="Generate audio using Huggingface diffusers.\
114
+ The models without 'latent' or 'ddim' give better results but take about \
115
+ 20 minutes without a GPU. For GPU, you can use \
116
+ [colab](https://colab.research.google.com/github/teticio/audio-diffusion/blob/master/notebooks/gradio_app.ipynb) \
117
+ to run this app.",
118
+ inputs=[
119
+ gr.Dropdown(label="Model",
120
+ choices=[
121
+ "teticio/audio-diffusion-256",
122
+ "teticio/audio-diffusion-breaks-256",
123
+ "teticio/audio-diffusion-instrumental-hiphop-256",
124
+ "teticio/audio-diffusion-ddim-256",
125
+ "teticio/latent-audio-diffusion-256",
126
+ "teticio/latent-audio-diffusion-ddim-256"
127
+ ],
128
+ value="teticio/latent-audio-diffusion-ddim-256")
129
+ ],
130
+ outputs=[
131
+ gr.Image(label="Mel spectrogram", image_mode="L"),
132
+ gr.Audio(label="Audio"),
133
+ gr.Audio(label="Loop"),
134
+ ],
135
+ allow_flagging="never")
136
+
137
+ with gr.Blocks() as gallerydemo:
138
+ with gr.Column(variant="panel"):
139
+ with gr.Row(variant="compact"):
140
+ text = gr.Textbox(
141
+ label="Enter your prompt",
142
+ show_label=False,
143
+ max_lines=1,
144
+ placeholder="Enter your prompt"
145
+ )
146
+ btn = gr.Button("Generate image")
147
+
148
+ gallery = gr.Gallery(
149
+ label="Generated images", show_label=False, elem_id="gallery"
150
+ )
151
+
152
+ btn.click(artist_lib.fake_gan, None, gallery)
153
+
154
+ #artist = gr.TabbedInterface( [drawdemo, blog_writer_demo, gallerydemo], ["Draw", "Bloggr", "Gallery"])
155
+ #artist = gr.TabbedInterface( [drawdemo, blog_writer_demo, imageClassifierDemo, generateAudioDemo, audioGeneratorDemo, AudioDemo, nameMyPetDemo], ["Draw", "Bloggr", "imageClassifier", "generateAudio", "audioGenerator", "AudioDemo", "nameMyPet"])
156
+ artist = gr.TabbedInterface( [drawdemo, imageClassifierDemo, generateAudioDemo, nameMyPetDemo, blog_writer_demo], ["Draw", "imageClassifier", "generateAudio", "nameMyPet", "Bloggr"])
157
 
158
+ artist.queue(
159
+ max_size = 4
160
+ )
161
+ artist.launch()
artist_lib.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import binascii
3
+ import glob
4
+ import openai
5
+ import os
6
+ import os.path
7
+ import numpy as np
8
+ import matplotlib.pyplot as plt
9
+ import random
10
+ import sys
11
+ import tempfile
12
+ import time
13
+ import torch
14
+ from PIL import Image
15
+ from IPython.display import Audio
16
+ from diffusers import StableDiffusionPipeline
17
+ from diffusers import DiffusionPipeline
18
+ from transformers import pipeline
19
+ from transformers import ViTFeatureExtractor, ViTForImageClassification
20
+ from audiodiffusion import AudioDiffusion
21
+ import requests
22
+
23
+ notes = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
24
+
25
+ def fake_gan():
26
+ images = [
27
+ (random.choice(
28
+ [
29
+ "https://images.unsplash.com/photo-1507003211169-0a1dd7228f2d?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=387&q=80",
30
+ "https://images.unsplash.com/photo-1554151228-14d9def656e4?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=386&q=80",
31
+ "https://images.unsplash.com/photo-1542909168-82c3e7fdca5c?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxzZWFyY2h8MXx8aHVtYW4lMjBmYWNlfGVufDB8fDB8fA%3D%3D&w=1000&q=80",
32
+ "https://images.unsplash.com/photo-1546456073-92b9f0a8d413?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=387&q=80",
33
+ "https://images.unsplash.com/photo-1601412436009-d964bd02edbc?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=464&q=80",
34
+ ]
35
+ ), f"label {i}" if i != 0 else "label" * 50)
36
+ for i in range(3)
37
+ ]
38
+ return images
39
+
40
+ def imageClassifier(inputImage):
41
+ #fn=artist_lib.imageClassifier,
42
+ #url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
43
+ #image = Image.open(requests.get(url, stream=True).raw)
44
+ image = inputImage
45
+
46
+ feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
47
+ model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
48
+
49
+ inputs = feature_extractor(images=image, return_tensors="pt")
50
+ outputs = model(**inputs)
51
+ logits = outputs.logits
52
+ # model predicts one of the 1000 ImageNet classes
53
+ predicted_class_idx = logits.argmax(-1).item()
54
+ #print("Predicted class:", model.config.id2label[predicted_class_idx])
55
+ return "Predicted class:", model.config.id2label[predicted_class_idx]
56
+
57
+ def audioGenerator(inputText):
58
+ device = "cuda" if torch.cuda.is_available() else "cpu"
59
+ pipe = DiffusionPipeline.from_pretrained("teticio/audio-diffusion-256").to(device)
60
+ output = pipe()
61
+ from IPython.display import display
62
+ display(output.images[0])
63
+ display(Audio(output.audios[0], rate=pipe.mel.get_sample_rate()))
64
+ print("sample rate is ", pipe.mel.get_sample_rate())
65
+ #print(Audio(output.audios[0]))
66
+ sr=int(pipe.mel.get_sample_rate())
67
+ audio=Audio(output.audios[0])
68
+ #return int(pipe.mel.get_sample_rate()), Audio(output.audios[0])
69
+ return sr, audio
70
+
71
+ def generate_spectrogram_audio_and_loop(model_id):
72
+ audio_diffusion = AudioDiffusion(model_id=model_id)
73
+ image, (sample_rate,
74
+ audio) = audio_diffusion.generate_spectrogram_and_audio()
75
+ loop = AudioDiffusion.loop_it(audio, sample_rate)
76
+ if loop is None:
77
+ loop = audio
78
+ return image, (sample_rate, audio), (sample_rate, loop)
79
+
80
+ def generate_tone(note, octave, duration):
81
+ sr = 48000
82
+ a4_freq, tones_from_a4 = 440, 12 * (octave - 4) + (note - 9)
83
+ frequency = a4_freq * 2 ** (tones_from_a4 / 12)
84
+ duration = int(duration)
85
+ audio = np.linspace(0, duration, duration * sr)
86
+ audio = (20000 * np.sin(audio * (2 * np.pi * frequency))).astype(np.int16)
87
+ return sr, audio
88
+
89
+ def draw(inp, this_model, force_new):
90
+ drawing = inp
91
+ if this_model == "stable-diffusion-2":
92
+ this_model_addr = "stabilityai/stable-diffusion-2"
93
+ images_dir = 'images2/'
94
+ elif this_model == "stable-diffusion-2-1":
95
+ this_model_addr = "stabilityai/stable-diffusion-2-1"
96
+ images_dir = 'images2-1/'
97
+ elif this_model == "stable-diffusion-v1-5":
98
+ this_model_addr = "runwayml/stable-diffusion-v1-5"
99
+ images_dir = 'images/'
100
+ else:
101
+ raise gr.Error("Unknown Model!")
102
+ mkdir_if_not_exist(images_dir)
103
+ drawing_filename = images_dir + drawing.replace(' ', '_') + '.png'
104
+ if os.path.exists(drawing_filename):
105
+ if force_new:
106
+ new_drawing_filename = images_dir + drawing.replace(' ', '_') + '.' + str(time.time()) + '.png'
107
+ os.replace(drawing_filename, new_drawing_filename)
108
+ else:
109
+ print("found drawing ", drawing_filename)
110
+ return Image.open(drawing_filename)
111
+ print("generating drawing '", drawing, "'", drawing_filename)
112
+ pipe = StableDiffusionPipeline.from_pretrained(this_model_addr, torch_dtype=torch.float16)
113
+ pipe.enable_attention_slicing()
114
+ pipe = pipe.to("cuda")
115
+ image = pipe(drawing).images[0]
116
+ image.seek(0)
117
+ image.save(drawing_filename)
118
+ return image
119
+
120
+ def write_blog(inp, this_model, min_length, max_length, force_new):
121
+ blog_post_name = inp
122
+ if this_model == "gpt-neo-1.3B":
123
+ this_model_addr = "EleutherAI/gpt-neo-1.3B"
124
+ text_dir = 'text1.3/'
125
+ elif this_model == "gpt-neo-2.7B":
126
+ this_model_addr = "EleutherAI/gpt-neo-2.7B"
127
+ text_dir = 'text2.7/'
128
+ else:
129
+ raise gr.Error("Unknown Model!")
130
+ mkdir_if_not_exist(text_dir)
131
+ target_filename = text_dir + blog_post_name.replace(' ', '_') + '.txt'
132
+ if os.path.exists(target_filename):
133
+ if force_new:
134
+ new_target_filename = text_dir + blog_post_name.replace(' ', '_') + '.' + str(time.time()) + '.txt'
135
+ os.replace(target_filename, new_target_filename)
136
+ else:
137
+ print("found drawing ", target_filename)
138
+ with open(target_filename, 'r') as file:
139
+ return file.read()
140
+ print("generating blog '", blog_post_name, "'", target_filename)
141
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
142
+ #generator = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B', device=device, torch_dtype=torch.float16)
143
+ #generator = pipeline('text-generation', model=this_model_addr, torch_dtype=torch.float16)
144
+ #generator = pipeline('text-generation', model=this_model_addr)
145
+ generator = pipeline('text-generation', model=this_model_addr, device=device, torch_dtype=torch.float16)
146
+ # AttributeError: 'TextGenerationPipeline' object has no attribute 'enable_attention_slicing'
147
+ #generator.enable_attention_slicing()
148
+ res = generator(blog_post_name, min_length=min_length, max_length=max_length, do_sample=True, temperature=0.7)
149
+ blog_post_text = res[0]['generated_text']
150
+ with open(target_filename, 'w') as file:
151
+ file.write(blog_post_text)
152
+ return blog_post_text
153
+
154
+ def nameMyPet(inp):
155
+ animal = inp
156
+ response = openai.Completion.create(
157
+ model="text-davinci-003",
158
+ prompt=generate_prompt(animal),
159
+ temperature=0.6,
160
+ )
161
+ return response.choices[0].text
162
+
163
+ def mkdir_if_not_exist(path):
164
+ if os.path.exists(path):
165
+ return 0
166
+ else:
167
+ os.mkdir(path)
168
+
169
+ def generate_prompt(animal):
170
+ return """Suggest three names for an animal that is a superhero.
171
+
172
+ Animal: Cat
173
+ Names: Captain Sharpclaw, Agent Fluffball, The Incredible Feline
174
+ Animal: Dog
175
+ Names: Ruff the Protector, Wonder Canine, Sir Barks-a-Lot
176
+ Animal: {}
177
+ Names:""".format(
178
+ animal.capitalize()
179
+ )
images/van_gogh_dogs_playing_poker.png ADDED
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.27.2
2
+ audiodiffusion==1.5.6
3
+ diffusers==0.26.3
4
+ gradio==4.19.1
5
+ ipython==8.21.0
6
+ matplotlib==3.8.3
7
+ numpy==1.26.4
8
+ openai==1.12.0
9
+ Pillow==10.2.0
10
+ python-dotenv==1.0.1
11
+ Requests==2.31.0
12
+ torch==2.2.1
13
+ transformers==4.38.1