Spaces:

Golali
/

dreamsteam

Build error

App Files Files Community

Golali commited on Apr 20, 2023

Commit

2470687

•

1 Parent(s): e63b09e

init commit

Browse files

Files changed (4) hide show

README.md +4 -12
app.py +243 -0
packages.txt +1 -0
requirements.txt +11 -0

README.md CHANGED Viewed

@@ -1,13 +1,5 @@
----
-title: Dreamsteam
-emoji: 📊
-colorFrom: indigo
-colorTo: yellow
-sdk: gradio
-sdk_version: 3.27.0
-app_file: app.py
-pinned: false
-license: cc-by-nc-nd-4.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Speech To Pictogram
+## deployed to:
+https://huggingface.co/spaces/omidreza/speechtopictogram

app.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import base64
+import json
+import random
+import whisper
+import gradio as gr
+WhisperModels = ['tiny', 'base', 'small', 'medium', 'large']
+import matplotlib.pyplot as plt
+import matplotlib
+import requests
+matplotlib.use('AGG')
+import io
+from PIL import Image
+import PIL
+from io import BytesIO
+import openai
+import os
+openai.organization = os.getenv('organization')
+openai.api_key = os.getenv('api_key')
+def get_story(dream):
+    response = openai.Completion.create(
+    model="text-davinci-003",
+    prompt=f"m going to tell you of my dream and i want you to make a better more and more detailed story out of in one json array so i can create a booklet with image generation. Can you split it into 4 sections and give it 3 keys: section= nr of section, story= containing the story, alt_text= the alt text(make sure that the alt text is overall consistent and map each person in it to a known movie character):{dream}",
+    temperature=0.7,
+    max_tokens=2048,
+    top_p=1,
+    frequency_penalty=0,
+    presence_penalty=0
+    )
+    return response["choices"][0]["text"]
+def get_image(text):
+    engine_id = "stable-diffusion-xl-beta-v2-2-2"
+    api_host = "https://api.stability.ai"
+    stability_key = os.getenv('stability_key')
+    if stability_key is None:
+        raise Exception("Missing Stability API key.")
+    response = requests.post(
+        f"{api_host}/v1/generation/{engine_id}/text-to-image",
+        headers={
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "Authorization": f"Bearer {stability_key}"
+        },
+        json={
+            "text_prompts": [
+                {
+                    "text": f"animated surreal with colors and creepy faces everything detailed, {text}"
+                }
+            ],
+            "cfg_scale": 25,
+            "clip_guidance_preset": "FAST_BLUE",
+            "height": 512,
+            "width": 512,
+            "samples": 1,
+            "steps": 50,
+            "seed": 4294967295,
+        },
+    )
+    if response.status_code != 200:
+        raise Exception("Non-200 response: " + str(response.text))
+    data = response.json()
+    #To change:
+    number = random.randint(0, 1000)
+    with open(f"{number}.png", "wb") as f:
+            f.write(base64.b64decode(data["artifacts"][0]["base64"]))
+    return f"{number}.png"
+def get_array(dream):
+    json_start_index = dream.find("[")
+    # Extract the JSON-formatted string from the original string
+    json_string = dream[json_start_index:]
+    # Parse the JSON-formatted string and convert it to a Python object
+    my_object = json.loads(json_string)
+    # Extract the JSON array from the Python object
+    return my_object
+def SpeechToText(audio, SelectedModel):
+    print('Loading model...')
+    model = whisper.load_model(SelectedModel)
+    print('Loading audio...')
+    audio = whisper.load_audio(audio)
+    audio = whisper.pad_or_trim(audio)
+    print('Creating log-mel spectrogram...')
+    mel = whisper.log_mel_spectrogram(audio).to(model.device)
+    print('Detecting language...')
+    _, probs = model.detect_language(mel)
+    lang = f"Language: {max(probs, key=probs.get)}"
+    print('Decoding audio to text...')
+    options = whisper.DecodingOptions(fp16 = False)
+    result = whisper.decode(model, mel, options)
+    text = get_story(result.text)
+    print("Text: " + text)
+    text = get_array(text)
+    print(type(text))
+    img1 = get_image(text[0]["alt_text"])
+    text1 = text[0]["story"]
+    print('image added')
+    img2 = get_image(text[1]["alt_text"])
+    text2 = text[1]["story"]
+    print('image added')
+    img3 = get_image(text[2]["alt_text"])
+    text3 = text[2]["story"]
+    print('image added')
+    img4 = get_image(text[3]["alt_text"])
+    text4 = text[3]["story"]
+    print('image added')
+    #carou = [ "./585.png"]
+    #text = "this is a test"
+    return img1, img2, img3, img4, text1, text2, text3, text4
+def clean_text(text):
+    """
+    we get rid of the commas and dots, maybe in the future there more things to get rid of in a sentence like !, ? ...
+    Args:
+        text (_type_): _description_
+    Returns:
+        _type_: _description_
+    """
+    print("cleaning text: ", text)
+    text = text.lower()
+    text = text.replace(",", " ")
+    text = text.replace(".", " ")
+    text = text.replace("?", " ")
+    text = text.replace("-", " ")
+    text = text.split()
+    new_string = []
+    for temp in text:
+        if temp:
+            if temp == "i":
+                temp = "I"
+            new_string.append(temp)
+    concatString = ' '.join(new_string)
+    return new_string, concatString
+import nltk
+nltk.download('punkt')
+nltk.download('averaged_perceptron_tagger')
+nltk.download('wordnet')
+nltk.download('omw-1.4')
+nltk.data.path.append('/root/nltk_data')
+from nltk import pos_tag, word_tokenize
+from nltk.stem.wordnet import WordNetLemmatizer
+class POS_tagging():
+    def __init__(self, concatString):
+        self.concatString = concatString
+    def handle_conjugation(self, tags):
+        # here we do the conjugation for verbs
+        new_sentence = []
+        for index, item in enumerate(tags):
+            if item[1] not in ['VBP', 'DT', 'IN', 'TO', 'VBG', 'VBD', 'VBN', 'VBZ']:
+                new_sentence.append(item[0])
+            elif item[1] in ['VBP', 'VBG', 'VBD', 'VBN', 'VBZ']:
+                new_verb = WordNetLemmatizer().lemmatize(item[0],'v')
+                if new_verb != "be":
+                    new_sentence.append(new_verb)
+        return new_sentence
+    def make_predictions(self):
+        tags = pos_tag(word_tokenize(self.concatString))
+        return self.handle_conjugation(tags)
+def generate_pic(text_to_search, ax):
+    """
+    we define a function here to use the api frpm arasaac, and return the image based on the text that we search
+    ref: https://arasaac.org/developers/api
+    Args:
+        text_to_search (_type_): _description_
+        ax (_type_): _description_
+    """
+    search_url = f"https://api.arasaac.org/api/pictograms/en/bestsearch/{text_to_search}"
+    search_response = requests.get(search_url)
+    search_json = search_response.json()
+    if search_json:
+        pic_url = f"https://api.arasaac.org/api/pictograms/{search_json[0]['_id']}?download=false"
+        pic_response = requests.get(pic_url)
+        img = Image.open(BytesIO(pic_response.content))
+        ax.imshow(img)
+        ax.set_title(text_to_search)
+    else:
+        try:
+            response = openai.Image.create(
+              prompt=text_to_search,
+              n=2,
+              size="512x512"
+            )
+            image_url = response['data'][0]['url']
+            image_response = requests.get(image_url)
+            img = Image.open(BytesIO(image_response.content))
+            ax.imshow(img)
+            ax.set_title(f"/{text_to_search}/")
+        except:
+            ax.set_title("Error!")
+    ax.axes.xaxis.set_visible(False)
+    ax.axes.yaxis.set_visible(False)
+with gr.Blocks(title="The Dream Steamer") as demo:
+    gr.Markdown("# The Dream Steamer")
+    gr.Markdown("This Application transforms your dreams into really cool pictures and makes it a more memorable experience.")
+    gr.Markdown("With this application you can save your dreams and share them with your friends and family.")
+    with gr.Row():
+            audio = gr.Audio(label="Record your dream here",source="microphone", type="filepath")
+    with gr.Row():
+            dropdown = gr.Dropdown(label="Whisper Model", choices=WhisperModels, value='base')
+    with gr.Row():
+            btn1 = gr.Button("Show me my dream!")
+    with gr.Column():
+            with gr.Row():
+                image1 = gr.Image(label="1", shape=(200,200))
+                text1= gr.Text(label="1")
+                image2 = gr.Image(label="2", shape=(200,200))
+                text2= gr.Text(label="2")
+            with gr.Row():
+                image3 = gr.Image(label="3", shape=(200,200))
+                text3= gr.Text(label="3")
+                image4 = gr.Image(label="4", shape=(200,200))
+                text4= gr.Text(label="4")
+            btn1.click(SpeechToText, inputs=[audio, dropdown], outputs=[image1, image2, image3, image4, text1, text2, text3, text4])
+    gr.Markdown("Made by the Dreamers [Alireza](https://github.com/golali) [Erfan](https://github.com/golchini) and [Omidreza](https://github.com/omidreza-amrollahi)")
+demo.launch()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+matplotlib==3.5.2
+Pillow==9.2.0
+git+https://github.com/openai/whisper.git
+nltk==3.7
+requests==2.28.1
+streamlit==1.14
+ffmpeg-python
+pydub==0.25.1
+setuptools-rust==1.5.2
+openai
+gradio==3.9.1