Spaces:
Build error
Build error
import base64 | |
import json | |
import random | |
import whisper | |
import gradio as gr | |
WhisperModels = ['tiny', 'base', 'small', 'medium', 'large'] | |
import matplotlib.pyplot as plt | |
import matplotlib | |
import requests | |
matplotlib.use('AGG') | |
import io | |
from PIL import Image | |
import PIL | |
from io import BytesIO | |
import openai | |
import os | |
openai.organization = os.getenv('organization') | |
openai.api_key = os.getenv('api_key') | |
def get_story(dream): | |
response = openai.Completion.create( | |
model="text-davinci-003", | |
prompt=f"m going to tell you of my dream and i want you to make a better more and more detailed story out of in one json array so i can create a booklet with image generation. Can you split it into 4 sections and give it 3 keys: section= nr of section, story= containing the story, alt_text= the alt text(make sure that the alt text is overall consistent and map each person in it to a known movie character):{dream}", | |
temperature=0.7, | |
max_tokens=2048, | |
top_p=1, | |
frequency_penalty=0, | |
presence_penalty=0 | |
) | |
return response["choices"][0]["text"] | |
def get_image(text): | |
engine_id = "stable-diffusion-xl-beta-v2-2-2" | |
api_host = "https://api.stability.ai" | |
stability_key = os.getenv('stability_key') | |
if stability_key is None: | |
raise Exception("Missing Stability API key.") | |
response = requests.post( | |
f"{api_host}/v1/generation/{engine_id}/text-to-image", | |
headers={ | |
"Content-Type": "application/json", | |
"Accept": "application/json", | |
"Authorization": f"Bearer {stability_key}" | |
}, | |
json={ | |
"text_prompts": [ | |
{ | |
"text": f"animated surreal with colors and creepy faces everything detailed, {text}" | |
} | |
], | |
"cfg_scale": 25, | |
"clip_guidance_preset": "FAST_BLUE", | |
"height": 512, | |
"width": 512, | |
"samples": 1, | |
"steps": 50, | |
"seed": 4294967295, | |
}, | |
) | |
if response.status_code != 200: | |
raise Exception("Non-200 response: " + str(response.text)) | |
data = response.json() | |
#To change: | |
number = random.randint(0, 1000) | |
with open(f"{number}.png", "wb") as f: | |
f.write(base64.b64decode(data["artifacts"][0]["base64"])) | |
return f"{number}.png" | |
def get_array(dream): | |
json_start_index = dream.find("[") | |
# Extract the JSON-formatted string from the original string | |
json_string = dream[json_start_index:] | |
# Parse the JSON-formatted string and convert it to a Python object | |
my_object = json.loads(json_string) | |
# Extract the JSON array from the Python object | |
return my_object | |
def SpeechToText(audio, SelectedModel): | |
print('Loading model...') | |
model = whisper.load_model(SelectedModel) | |
print('Loading audio...') | |
audio = whisper.load_audio(audio) | |
audio = whisper.pad_or_trim(audio) | |
print('Creating log-mel spectrogram...') | |
mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
print('Detecting language...') | |
_, probs = model.detect_language(mel) | |
lang = f"Language: {max(probs, key=probs.get)}" | |
print('Decoding audio to text...') | |
options = whisper.DecodingOptions(fp16 = False) | |
result = whisper.decode(model, mel, options) | |
text = get_story(result.text) | |
print("Text: " + text) | |
text = get_array(text) | |
print(type(text)) | |
img1 = get_image(text[0]["alt_text"]) | |
text1 = text[0]["story"] | |
print('image added') | |
img2 = get_image(text[1]["alt_text"]) | |
text2 = text[1]["story"] | |
print('image added') | |
img3 = get_image(text[2]["alt_text"]) | |
text3 = text[2]["story"] | |
print('image added') | |
img4 = get_image(text[3]["alt_text"]) | |
text4 = text[3]["story"] | |
print('image added') | |
#carou = [ "./585.png"] | |
#text = "this is a test" | |
return img1, img2, img3, img4, text1, text2, text3, text4 | |
def clean_text(text): | |
""" | |
we get rid of the commas and dots, maybe in the future there more things to get rid of in a sentence like !, ? ... | |
Args: | |
text (_type_): _description_ | |
Returns: | |
_type_: _description_ | |
""" | |
print("cleaning text: ", text) | |
text = text.lower() | |
text = text.replace(",", " ") | |
text = text.replace(".", " ") | |
text = text.replace("?", " ") | |
text = text.replace("-", " ") | |
text = text.split() | |
new_string = [] | |
for temp in text: | |
if temp: | |
if temp == "i": | |
temp = "I" | |
new_string.append(temp) | |
concatString = ' '.join(new_string) | |
return new_string, concatString | |
import nltk | |
nltk.download('punkt') | |
nltk.download('averaged_perceptron_tagger') | |
nltk.download('wordnet') | |
nltk.download('omw-1.4') | |
nltk.data.path.append('/root/nltk_data') | |
from nltk import pos_tag, word_tokenize | |
from nltk.stem.wordnet import WordNetLemmatizer | |
class POS_tagging(): | |
def __init__(self, concatString): | |
self.concatString = concatString | |
def handle_conjugation(self, tags): | |
# here we do the conjugation for verbs | |
new_sentence = [] | |
for index, item in enumerate(tags): | |
if item[1] not in ['VBP', 'DT', 'IN', 'TO', 'VBG', 'VBD', 'VBN', 'VBZ']: | |
new_sentence.append(item[0]) | |
elif item[1] in ['VBP', 'VBG', 'VBD', 'VBN', 'VBZ']: | |
new_verb = WordNetLemmatizer().lemmatize(item[0],'v') | |
if new_verb != "be": | |
new_sentence.append(new_verb) | |
return new_sentence | |
def make_predictions(self): | |
tags = pos_tag(word_tokenize(self.concatString)) | |
return self.handle_conjugation(tags) | |
def generate_pic(text_to_search, ax): | |
""" | |
we define a function here to use the api frpm arasaac, and return the image based on the text that we search | |
ref: https://arasaac.org/developers/api | |
Args: | |
text_to_search (_type_): _description_ | |
ax (_type_): _description_ | |
""" | |
search_url = f"https://api.arasaac.org/api/pictograms/en/bestsearch/{text_to_search}" | |
search_response = requests.get(search_url) | |
search_json = search_response.json() | |
if search_json: | |
pic_url = f"https://api.arasaac.org/api/pictograms/{search_json[0]['_id']}?download=false" | |
pic_response = requests.get(pic_url) | |
img = Image.open(BytesIO(pic_response.content)) | |
ax.imshow(img) | |
ax.set_title(text_to_search) | |
else: | |
try: | |
response = openai.Image.create( | |
prompt=text_to_search, | |
n=2, | |
size="512x512" | |
) | |
image_url = response['data'][0]['url'] | |
image_response = requests.get(image_url) | |
img = Image.open(BytesIO(image_response.content)) | |
ax.imshow(img) | |
ax.set_title(f"/{text_to_search}/") | |
except: | |
ax.set_title("Error!") | |
ax.axes.xaxis.set_visible(False) | |
ax.axes.yaxis.set_visible(False) | |
with gr.Blocks(title="The Dream Steamer") as demo: | |
gr.Markdown("# The Dream Steamer") | |
gr.Markdown("This Application transforms your dreams into really cool pictures and makes it a more memorable experience.") | |
gr.Markdown("With this application you can save your dreams and share them with your friends and family.") | |
with gr.Row(): | |
audio = gr.Audio(label="Record your dream here",source="microphone", type="filepath") | |
with gr.Row(): | |
dropdown = gr.Dropdown(label="Whisper Model", choices=WhisperModels, value='base') | |
with gr.Row(): | |
btn1 = gr.Button("Show me my dream!") | |
with gr.Column(): | |
with gr.Row(): | |
image1 = gr.Image(label="1", shape=(200,200)) | |
text1= gr.Text(label="1") | |
image2 = gr.Image(label="2", shape=(200,200)) | |
text2= gr.Text(label="2") | |
with gr.Row(): | |
image3 = gr.Image(label="3", shape=(200,200)) | |
text3= gr.Text(label="3") | |
image4 = gr.Image(label="4", shape=(200,200)) | |
text4= gr.Text(label="4") | |
btn1.click(SpeechToText, inputs=[audio, dropdown], outputs=[image1, image2, image3, image4, text1, text2, text3, text4]) | |
gr.Markdown("Made by the Dreamers [Alireza](https://github.com/golali) [Erfan](https://github.com/golchini) and [Omidreza](https://github.com/omidreza-amrollahi)") | |
demo.launch() | |