Spaces:

pain
/

Arabic_story_generator

Sleeping

App Files Files Community

pain commited on Jun 2

Commit

87727f0

•

1 Parent(s): 33beb7e

Upload 5 files

Browse files

Files changed (6) hide show

.gitattributes +1 -0
app.py +86 -0
image_generator.py +72 -0
llm_models.py +128 -0
requirements.txt +6 -0
تطبيق الراوي.pdf +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+تطبيق[[:space:]]الراوي.pdf filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import gradio as gr
+from llm_models import get_text_image_pairs
+import time
+from tqdm import tqdm
+title_markdown = ("""
+<div style="display: flex; justify-content: center; align-items: center; text-align: center; direction: rtl;">
+  <img src="https://s11.ax1x.com/2023/12/28/piqvDMV.png" alt="MoE-LLaVA🚀" style="max-width: 120px; height: auto; margin-right: 20px;">
+  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center;">
+    <h1 style="margin: 0; font-size: 4em;">الراوي</h1>
+                  <br>
+    <h2 style="margin: 0; font-size: 1.5em;">صانع القصص بالذكاء الاصطناعي التوليدي</h2>
+  </div>
+</div>
+""")
+def get_text_images_values(k, input_prompt):
+    pages = int(k)
+    segments_list, images_names =  get_text_image_pairs(pages,input_prompt)
+    return segments_list, images_names
+css = """
+.gradio-container {direction: rtl}
+.gradio-container-4-18-0 .prose h1 {direction: rtl};
+}
+"""
+with gr.Blocks(css=css) as demo:
+    gr.Markdown(title_markdown)
+    prompt = gr.Textbox(label="معلومات بسيطة عن القصة",
+                info="أدخل بعض المعلومات عن القصة، مثلاً: خالد صبي في الرابعة من عمره، ويحب أن يصبح طياراً في المستقبل",
+                placeholder="خالد صبي في الرابعة من عمره، ويحب أن يصبح طياراً في المستقبل",
+                text_align="right",
+                rtl=True,
+                elem_classes="rtl-textbox",
+                elem_id="rtl-textbox")
+    with gr.Row():
+        max_textboxes = 10 # Define the max number of textboxed, so we will add the max number of textboxes and images to the layout
+        def variable_outputs(k, segments_list):
+            k = int(k)
+            return [gr.Textbox(label= f"الصفحة رقم {i+1}", value=item, text_align="right", visible=True) for i, item in enumerate(segments_list)] + [gr.Textbox(visible=False, text_align="right", rtl=True)]*(max_textboxes-k)
+        def variable_outputs_image(k,images_names):
+            k = int(k)
+            return [gr.Image(value=item, scale=1, visible=True) for item in images_names] + [gr.Image(scale=1,visible=False)]*(max_textboxes-k)
+        with gr.Column():
+            s = gr.Slider(1, max_textboxes, value=1, step=1, info="أقصى عدد صفحات يمكن توليده هو 10 صفحات",label="كم عدد صفحات القصة التي تريدها؟")
+            textboxes = []
+            imageboxes = []
+            for i in tqdm(range(max_textboxes)):
+                with gr.Row():
+                    i_t = gr.Image(visible=False)
+                    t = gr.Textbox(visible=False)
+                    imageboxes.append(i_t)
+                    textboxes.append(t)
+            segment_list = gr.JSON(value=[],visible=False)
+            images_list = gr.JSON(value=[], visible=False)
+    submit = gr.Button(value="أنشئ القصة الآن")
+    submit.click(
+        fn=get_text_images_values,
+        inputs=[s,prompt],
+        outputs=[segment_list, images_list]
+    ).then(
+        fn=variable_outputs,
+        inputs=[s, segment_list],
+        outputs=textboxes,
+    ).then(
+        fn=variable_outputs_image,
+        inputs=[s, images_list],
+        outputs=imageboxes,
+    )
+demo.launch()

image_generator.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import os
+import io
+import warnings
+from PIL import Image
+from stability_sdk import client
+import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
+import uuid
+# Our Host URL should not be prepended with "https" nor should it have a trailing slash.
+os.environ['STABILITY_HOST'] = 'grpc.stability.ai:443'
+# Sign up for an account at the following link to get an API Key.
+# https://platform.stability.ai/
+# Click on the following link once you have created an account to be taken to your API Key.
+# https://platform.stability.ai/account/keys
+# Paste your API Key below.
+os.environ['STABILITY_KEY'] = 'key_here'
+# Set up our connection to the API.
+stability_api = client.StabilityInference(
+    key=os.environ['STABILITY_KEY'], # API Key reference.
+    verbose=True, # Print debug messages.
+    engine="stable-diffusion-xl-1024-v1-0", # Set the engine to use for generation.
+    # Check out the following link for a list of available engines: https://platform.stability.ai/docs/features/api-parameters#engine
+)
+def get_image(prompt):
+    # Set up our initial generation parameters.
+    answers = stability_api.generate(
+        prompt=prompt, # The prompt we want to generate an image from.
+        seed=4253978046, # If a seed is provided, the resulting generated image will be deterministic.
+                        # What this means is that as long as all generation parameters remain the same, you can always recall the same image simply by generating it again.
+                        # Note: This isn't quite the case for Clip Guided generations, which we'll tackle in a future example notebook.
+        steps=30, # Amount of inference steps performed on image generation. Defaults to 30.
+        cfg_scale=8.0, # Influences how strongly your generation is guided to match your prompt.
+                    # Setting this value higher increases the strength in which it tries to match your prompt.
+                    # Defaults to 7.0 if not specified.
+        width=512, # Generation width, defaults to 512 if not included.
+        height=512, # Generation height, defaults to 512 if not included.
+        samples=1, # Number of images to generate, defaults to 1 if not included.
+        sampler=generation.SAMPLER_K_DPMPP_2M # Choose which sampler we want to denoise our generation with.
+                                                    # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
+                                                    # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
+    )
+    # print("Finish the prompt")
+    # Set up our warning to print to the console if the adult content classifier is tripped.
+    # If adult content classifier is not tripped, save generated images.
+    for resp in answers:
+        for artifact in resp.artifacts:
+            # if artifact.finish_reason == generation.FILTER:
+            #     print(artifact.finish_reason)
+            #     print("Warning")
+            #     warnings.warn(
+            #         "Your request activated the API's safety filters and could not be processed."
+            #         "Please modify the prompt and try again.")
+            if artifact.type == generation.ARTIFACT_IMAGE:
+                img = Image.open(io.BytesIO(artifact.binary))
+                unique_filename = str(uuid.uuid4())
+                img.save(str(unique_filename)+ ".png") # Save our generated images with their seed number as the filename.
+    return unique_filename + ".png"

llm_models.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import json
+from openai import OpenAI
+from pydantic import BaseModel
+from typing import List
+from image_generator import get_image
+class StepByStepAIResponse(BaseModel):
+    title: str
+    story_segments: List[str]
+    image_prompts: List[str]
+class GetTranslation(BaseModel):
+    translated_text: List[str]
+client = OpenAI(api_key="key_here")
+def generate_story(k, prompt):
+        """ Generate a story with k segments and initial prompt"""
+        response = client.chat.completions.create(
+        model="gpt-4-turbo-preview",
+        messages=[
+            {
+            "role": "system",
+            "content": f"""
+            Your expertise lies in weaving captivating narratives for children, complemented by images that vividly bring each tale to life. Embark on a creative endeavor to construct a story segmented into {k} distinct chapters, each a cornerstone of an enchanting journey for a young audience.
+            The input prompt will be on Arabic, but the output must be in English.
+            **Task Overview**:
+            1. **Story Development**:
+            - Craft a narrative divided into {k} parts, with a strict 50-word limit for each.
+            - Start with an engaging introduction that lays the foundation for the adventure.
+            - Ensure each part naturally progresses from the previous, crafting a fluid story that escalates to an exhilarating climax.
+            - Wrap up the narrative with a gratifying conclusion that ties all story threads together.
+            - Keep character continuity intact across the story, with consistent presence from beginning to end.
+            - You must describe the characters in details in every image prompt.
+            - Use language and themes that are child-friendly, imbued with wonder, and easy to visualize.
+            - The story will talk about {prompt}
+            2. **Image Generation Instructions for Image Models**:
+            - For every story part, create a comprehensive prompt for generating an image that encapsulates the scene's essence. Each prompt should:
+                - Offer a detailed description of the scene, characters, and critical elements, providing enough specificity for the image model to create a consistent and coherent visual.
+                - Request the images be in an anime style to ensure visual consistency throughout.
+                - Given the image model's isolated processing, reintroduce characters, settings, and pivotal details in each prompt to maintain narrative and visual continuity.
+                - Focus on visual storytelling components that enhance the story segments, steering clear of direct text inclusion in the images.
+            **Key Points**:
+            - Due to the image model's lack of recall, stress the need for self-contained prompts that reintroduce crucial elements each time. This strategy guarantees that, although generated independently, each image mirrors a continuous and cohesive visual story.
+            Through your skill in melding textual and visual storytelling, you will breathe life into this magical tale, offering young readers a journey to remember through both prose and illustration.
+            """
+            },
+        ],
+        functions=[
+            {
+                "name": "get_story_segments_and_image_prompts",
+                "description": "Get user answer in series of segment and image prompts",
+                "parameters": StepByStepAIResponse.model_json_schema(),
+            }
+        ],
+        function_call={"name": "get_story_segments_and_image_prompts"},  # Corrected to match the defined function name
+        temperature=1,
+        max_tokens=1000,
+        top_p=1,
+        frequency_penalty=0,
+        presence_penalty=0
+        )
+        output = json.loads(response.choices[0].message.function_call.arguments)
+        sbs = StepByStepAIResponse(**output)
+        return sbs
+def get_Arabic_translation(story_segments):
+        response = client.chat.completions.create(
+        model="gpt-4-turbo-preview",
+        messages=[
+            {
+            "role": "system",
+            "content":
+            f"""
+            You are an expert translator of text from English to Arabic.
+            On the following, you can find the input text that you need to translate to Arabic:
+            {story_segments}
+            Translate it from English to Arabic.
+            """
+            },
+        ],
+        functions=[
+            {
+                "name": "translate_text_from_english_to_arabic",
+                "description": "Translate the text from English to Arabic.",
+                "parameters": GetTranslation.model_json_schema(),
+            }
+        ],
+        function_call={"name": "translate_text_from_english_to_arabic"},  # Corrected to match the defined function name
+        temperature=1,
+        max_tokens=1000,
+        top_p=1,
+        frequency_penalty=0,
+        presence_penalty=0
+        )
+        output = json.loads(response.choices[0].message.function_call.arguments)
+        sbs = GetTranslation(**output)
+        return sbs
+def get_text_image_pairs(k, prompt):
+    describtion = generate_story(k, prompt)
+    segements_translation = get_Arabic_translation(describtion.story_segments)
+    images_names = [get_image(itm) for itm in describtion.image_prompts]
+    return (segements_translation.translated_text, images_names)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio==4.18.0
+gradio_client==0.10.0
+openai==1.12.0
+pydantic==2.6.1
+rich==13.7.0
+stability-sdk==0.8.5

تطبيق الراوي.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:509a398d5629418376c7797bd4ffc369cc8c428b78ced09ebcc6b28462bbce24
+size 4607682