File size: 5,260 Bytes
c64ff21
 
 
 
 
 
 
 
55a50d4
c64ff21
 
 
 
b7b153d
 
c64ff21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7b153d
 
c64ff21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cff3161
c64ff21
 
 
 
 
 
 
 
 
 
 
 
 
b89a48d
c64ff21
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from transformers import pipeline
from datasets import load_dataset
import gradio as gr
import torch
from diffusers import DiffusionPipeline

pipe_ar = pipeline('text-generation', framework='pt', model='akhooli/ap2023', tokenizer='akhooli/ap2023')
pipe_en = pipeline("text-generation", model="ismaelfaro/gpt2-poems.en")
pipe_image = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
pipe_translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ar-en")

# Initialize text-to-speech models for Arabic and English
# Arabic: text-to-speech
synthesiser_arabic = synthesiser_arabic = pipeline("text-to-speech", model="facebook/mms-tts-ara")


# English: text-to-speech
synthesiser_english = pipeline("text-to-speech", model="microsoft/speecht5_tts")
embeddings_dataset_english = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding_english = torch.tensor(embeddings_dataset_english[7306]["xvector"]).unsqueeze(0)

# Generate poem based on language and convert it to audio and image
def generate_poem(selected_language, text):
    if selected_language == "English":
        poem = generate_poem_english(text) #retrun the generated poem from the generate_poem_english function
        sampling_rate, audio_data = text_to_speech_english(poem) #return the audio from the text_to_speech_english function
        image = generate_image_from_poem(text) #return the image from the generate_image_from_poem function
    elif selected_language == "Arabic":
        poem = generate_poem_arabic(text) #retrun the generated poem from the generate_poem_arabic function
        sampling_rate, audio_data = text_to_speech_arabic(poem) #return the audio from the text_to_speech_arabic function
        translated_text = translate_arabic_to_english(text) #return the translated poem from arabic to englsih, using translate_arabic_to_english function
        image = generate_image_from_poem(translated_text) #return the image from the generate_image_from_poem function

    return poem, (sampling_rate, audio_data), image

# Poem generation for Arabic
def generate_poem_arabic(text):
    generated_text = pipe_ar(text, do_sample=True, max_length=96, top_k=50, top_p=1.0, temperature=1.0, num_return_sequences=1,
                              no_repeat_ngram_size = 3, return_full_text=True)[0]["generated_text"]
    clean_text = generated_text.replace("-", "") #To get rid of the dashs generated by the model.
    return clean_text

# Poem generation for English
def generate_poem_english(text):
    generated_text = pipe_en(text, do_sample=True, max_length=50)[0]['generated_text']
    clean_text = generated_text.replace("-", "")  # Remove dashes generated by the model
    clean_text = clean_text.replace("\\n", " ")  # Replace newlines with a space
    return clean_text

def text_to_speech_arabic(text):
    speech = synthesiser_arabic(text)
    audio_data = speech["audio"][0]  # Flatten to 1D
    sampling_rate = speech["sampling_rate"]
    return (sampling_rate, audio_data)

# Text-to-speech conversion for English
def text_to_speech_english(text):
    speech = synthesiser_english(text, forward_params={"speaker_embeddings": speaker_embedding_english})
    audio_data = speech["audio"]
    sampling_rate = speech["sampling_rate"]
    return (sampling_rate, audio_data)

#Image Function
def generate_image_from_poem(poem_text):
    image = pipe_image(poem_text).images[0]
    return image

#Translation Function from Arabic to English
def translate_arabic_to_english(text):
    translated_text = pipe_translator(text)[0]['translation_text']
    return translated_text

custom_css = """
body {
    background-color: #f4f4f9;
    color: #333;
}
.gradio-container {
    border-radius: 10px;
    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
    background-color: #fff;
}
label {
    color: #4A90E2;
    font-weight: bold;
}

input[type="text"],
textarea {
    border: 1px solid #4A90E2;
}
textarea {
    height: 150px;
}

button {
    background-color: #4A90E2;
    color: #fff;
    border-radius: 5px;
    cursor: pointer;
}
button:hover {
    background-color: #357ABD;
}

.dropdown {
    border: 1px solid #4A90E2;
    border-radius: 4px;
}

"""
#First parameter is for the dropdown menu, and the second parameter is for the starter of the poem
examples = [["English", "The night sky is filled with stars and dreams"]]


my_model = gr.Interface(
    fn=generate_poem,  #The primary function that will recives the inputs (language and the starter of the poem)
    inputs=[
        gr.Dropdown(["English", "Arabic"], label="Select Language"), #Dropdown menu to select the language, either "English" or "Arabic" for the poem
        gr.Textbox(label="Enter a sentence")], #Textbox where the user will input a sentence or phrase to generate the poem (starter of the peom)

    outputs=[
        gr.Textbox(label="Generated Poem", lines=10), # Textbox to display the generated poem
        gr.Audio(label="Generated Audio", type="numpy"), #Audio output for the generated poem
        gr.Image(label="Generated Image")], #Display an image generated from the starter of the peom

    examples=examples,  #Predefined examples to guide the user how to use the interface
    css=custom_css  #Applying CSS Custeom
)
my_model.launch()