pain commited on
Commit
87727f0
1 Parent(s): 33beb7e

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ تطبيق[[:space:]]الراوي.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llm_models import get_text_image_pairs
3
+ import time
4
+ from tqdm import tqdm
5
+
6
+ title_markdown = ("""
7
+ <div style="display: flex; justify-content: center; align-items: center; text-align: center; direction: rtl;">
8
+ <img src="https://s11.ax1x.com/2023/12/28/piqvDMV.png" alt="MoE-LLaVA🚀" style="max-width: 120px; height: auto; margin-right: 20px;">
9
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center;">
10
+ <h1 style="margin: 0; font-size: 4em;">الراوي</h1>
11
+ <br>
12
+ <h2 style="margin: 0; font-size: 1.5em;">صانع القصص بالذكاء الاصطناعي التوليدي</h2>
13
+ </div>
14
+ </div>
15
+ """)
16
+
17
+ def get_text_images_values(k, input_prompt):
18
+
19
+ pages = int(k)
20
+
21
+ segments_list, images_names = get_text_image_pairs(pages,input_prompt)
22
+ return segments_list, images_names
23
+
24
+ css = """
25
+ .gradio-container {direction: rtl}
26
+ .gradio-container-4-18-0 .prose h1 {direction: rtl};
27
+ }
28
+ """
29
+
30
+ with gr.Blocks(css=css) as demo:
31
+
32
+ gr.Markdown(title_markdown)
33
+
34
+ prompt = gr.Textbox(label="معلومات بسيطة عن القصة",
35
+ info="أدخل بعض المعلومات عن القصة، مثلاً: خالد صبي في الرابعة من عمره، ويحب أن يصبح طياراً في المستقبل",
36
+ placeholder="خالد صبي في الرابعة من عمره، ويحب أن يصبح طياراً في المستقبل",
37
+ text_align="right",
38
+ rtl=True,
39
+ elem_classes="rtl-textbox",
40
+ elem_id="rtl-textbox")
41
+
42
+
43
+ with gr.Row():
44
+
45
+
46
+ max_textboxes = 10 # Define the max number of textboxed, so we will add the max number of textboxes and images to the layout
47
+
48
+ def variable_outputs(k, segments_list):
49
+ k = int(k)
50
+ return [gr.Textbox(label= f"الصفحة رقم {i+1}", value=item, text_align="right", visible=True) for i, item in enumerate(segments_list)] + [gr.Textbox(visible=False, text_align="right", rtl=True)]*(max_textboxes-k)
51
+
52
+ def variable_outputs_image(k,images_names):
53
+ k = int(k)
54
+ return [gr.Image(value=item, scale=1, visible=True) for item in images_names] + [gr.Image(scale=1,visible=False)]*(max_textboxes-k)
55
+
56
+ with gr.Column():
57
+ s = gr.Slider(1, max_textboxes, value=1, step=1, info="أقصى عدد صفحات يمكن توليده هو 10 صفحات",label="كم عدد صفحات القصة التي تريدها؟")
58
+ textboxes = []
59
+ imageboxes = []
60
+ for i in tqdm(range(max_textboxes)):
61
+ with gr.Row():
62
+ i_t = gr.Image(visible=False)
63
+ t = gr.Textbox(visible=False)
64
+ imageboxes.append(i_t)
65
+ textboxes.append(t)
66
+
67
+ segment_list = gr.JSON(value=[],visible=False)
68
+ images_list = gr.JSON(value=[], visible=False)
69
+
70
+ submit = gr.Button(value="أنشئ القصة الآن")
71
+
72
+ submit.click(
73
+ fn=get_text_images_values,
74
+ inputs=[s,prompt],
75
+ outputs=[segment_list, images_list]
76
+ ).then(
77
+ fn=variable_outputs,
78
+ inputs=[s, segment_list],
79
+ outputs=textboxes,
80
+ ).then(
81
+ fn=variable_outputs_image,
82
+ inputs=[s, images_list],
83
+ outputs=imageboxes,
84
+ )
85
+
86
+ demo.launch()
image_generator.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import warnings
4
+ from PIL import Image
5
+ from stability_sdk import client
6
+ import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
7
+ import uuid
8
+
9
+
10
+
11
+ # Our Host URL should not be prepended with "https" nor should it have a trailing slash.
12
+ os.environ['STABILITY_HOST'] = 'grpc.stability.ai:443'
13
+
14
+ # Sign up for an account at the following link to get an API Key.
15
+ # https://platform.stability.ai/
16
+
17
+ # Click on the following link once you have created an account to be taken to your API Key.
18
+ # https://platform.stability.ai/account/keys
19
+
20
+ # Paste your API Key below.
21
+
22
+ os.environ['STABILITY_KEY'] = 'key_here'
23
+
24
+
25
+
26
+ # Set up our connection to the API.
27
+ stability_api = client.StabilityInference(
28
+ key=os.environ['STABILITY_KEY'], # API Key reference.
29
+ verbose=True, # Print debug messages.
30
+ engine="stable-diffusion-xl-1024-v1-0", # Set the engine to use for generation.
31
+ # Check out the following link for a list of available engines: https://platform.stability.ai/docs/features/api-parameters#engine
32
+ )
33
+
34
+ def get_image(prompt):
35
+ # Set up our initial generation parameters.
36
+ answers = stability_api.generate(
37
+ prompt=prompt, # The prompt we want to generate an image from.
38
+ seed=4253978046, # If a seed is provided, the resulting generated image will be deterministic.
39
+ # What this means is that as long as all generation parameters remain the same, you can always recall the same image simply by generating it again.
40
+ # Note: This isn't quite the case for Clip Guided generations, which we'll tackle in a future example notebook.
41
+ steps=30, # Amount of inference steps performed on image generation. Defaults to 30.
42
+ cfg_scale=8.0, # Influences how strongly your generation is guided to match your prompt.
43
+ # Setting this value higher increases the strength in which it tries to match your prompt.
44
+ # Defaults to 7.0 if not specified.
45
+ width=512, # Generation width, defaults to 512 if not included.
46
+ height=512, # Generation height, defaults to 512 if not included.
47
+ samples=1, # Number of images to generate, defaults to 1 if not included.
48
+ sampler=generation.SAMPLER_K_DPMPP_2M # Choose which sampler we want to denoise our generation with.
49
+ # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
50
+ # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
51
+ )
52
+
53
+ # print("Finish the prompt")
54
+ # Set up our warning to print to the console if the adult content classifier is tripped.
55
+ # If adult content classifier is not tripped, save generated images.
56
+ for resp in answers:
57
+ for artifact in resp.artifacts:
58
+ # if artifact.finish_reason == generation.FILTER:
59
+ # print(artifact.finish_reason)
60
+ # print("Warning")
61
+ # warnings.warn(
62
+ # "Your request activated the API's safety filters and could not be processed."
63
+ # "Please modify the prompt and try again.")
64
+
65
+ if artifact.type == generation.ARTIFACT_IMAGE:
66
+ img = Image.open(io.BytesIO(artifact.binary))
67
+ unique_filename = str(uuid.uuid4())
68
+
69
+ img.save(str(unique_filename)+ ".png") # Save our generated images with their seed number as the filename.
70
+
71
+ return unique_filename + ".png"
72
+
llm_models.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from openai import OpenAI
3
+ from pydantic import BaseModel
4
+ from typing import List
5
+ from image_generator import get_image
6
+
7
+ class StepByStepAIResponse(BaseModel):
8
+ title: str
9
+ story_segments: List[str]
10
+ image_prompts: List[str]
11
+
12
+ class GetTranslation(BaseModel):
13
+ translated_text: List[str]
14
+
15
+
16
+ client = OpenAI(api_key="key_here")
17
+
18
+
19
+ def generate_story(k, prompt):
20
+ """ Generate a story with k segments and initial prompt"""
21
+
22
+ response = client.chat.completions.create(
23
+ model="gpt-4-turbo-preview",
24
+ messages=[
25
+ {
26
+ "role": "system",
27
+ "content": f"""
28
+ Your expertise lies in weaving captivating narratives for children, complemented by images that vividly bring each tale to life. Embark on a creative endeavor to construct a story segmented into {k} distinct chapters, each a cornerstone of an enchanting journey for a young audience.
29
+ The input prompt will be on Arabic, but the output must be in English.
30
+
31
+ **Task Overview**:
32
+
33
+ 1. **Story Development**:
34
+ - Craft a narrative divided into {k} parts, with a strict 50-word limit for each.
35
+ - Start with an engaging introduction that lays the foundation for the adventure.
36
+ - Ensure each part naturally progresses from the previous, crafting a fluid story that escalates to an exhilarating climax.
37
+ - Wrap up the narrative with a gratifying conclusion that ties all story threads together.
38
+ - Keep character continuity intact across the story, with consistent presence from beginning to end.
39
+ - You must describe the characters in details in every image prompt.
40
+ - Use language and themes that are child-friendly, imbued with wonder, and easy to visualize.
41
+ - The story will talk about {prompt}
42
+
43
+ 2. **Image Generation Instructions for Image Models**:
44
+ - For every story part, create a comprehensive prompt for generating an image that encapsulates the scene's essence. Each prompt should:
45
+ - Offer a detailed description of the scene, characters, and critical elements, providing enough specificity for the image model to create a consistent and coherent visual.
46
+ - Request the images be in an anime style to ensure visual consistency throughout.
47
+ - Given the image model's isolated processing, reintroduce characters, settings, and pivotal details in each prompt to maintain narrative and visual continuity.
48
+ - Focus on visual storytelling components that enhance the story segments, steering clear of direct text inclusion in the images.
49
+
50
+ **Key Points**:
51
+ - Due to the image model's lack of recall, stress the need for self-contained prompts that reintroduce crucial elements each time. This strategy guarantees that, although generated independently, each image mirrors a continuous and cohesive visual story.
52
+
53
+ Through your skill in melding textual and visual storytelling, you will breathe life into this magical tale, offering young readers a journey to remember through both prose and illustration.
54
+
55
+ """
56
+ },
57
+ ],
58
+ functions=[
59
+ {
60
+ "name": "get_story_segments_and_image_prompts",
61
+ "description": "Get user answer in series of segment and image prompts",
62
+ "parameters": StepByStepAIResponse.model_json_schema(),
63
+ }
64
+ ],
65
+ function_call={"name": "get_story_segments_and_image_prompts"}, # Corrected to match the defined function name
66
+ temperature=1,
67
+ max_tokens=1000,
68
+ top_p=1,
69
+ frequency_penalty=0,
70
+ presence_penalty=0
71
+ )
72
+
73
+ output = json.loads(response.choices[0].message.function_call.arguments)
74
+
75
+ sbs = StepByStepAIResponse(**output)
76
+
77
+ return sbs
78
+
79
+ def get_Arabic_translation(story_segments):
80
+
81
+ response = client.chat.completions.create(
82
+ model="gpt-4-turbo-preview",
83
+ messages=[
84
+ {
85
+ "role": "system",
86
+ "content":
87
+
88
+ f"""
89
+ You are an expert translator of text from English to Arabic.
90
+
91
+ On the following, you can find the input text that you need to translate to Arabic:
92
+ {story_segments}
93
+
94
+ Translate it from English to Arabic.
95
+
96
+ """
97
+ },
98
+ ],
99
+ functions=[
100
+ {
101
+ "name": "translate_text_from_english_to_arabic",
102
+ "description": "Translate the text from English to Arabic.",
103
+ "parameters": GetTranslation.model_json_schema(),
104
+ }
105
+ ],
106
+ function_call={"name": "translate_text_from_english_to_arabic"}, # Corrected to match the defined function name
107
+ temperature=1,
108
+ max_tokens=1000,
109
+ top_p=1,
110
+ frequency_penalty=0,
111
+ presence_penalty=0
112
+ )
113
+
114
+ output = json.loads(response.choices[0].message.function_call.arguments)
115
+
116
+ sbs = GetTranslation(**output)
117
+
118
+ return sbs
119
+
120
+ def get_text_image_pairs(k, prompt):
121
+
122
+ describtion = generate_story(k, prompt)
123
+
124
+ segements_translation = get_Arabic_translation(describtion.story_segments)
125
+
126
+ images_names = [get_image(itm) for itm in describtion.image_prompts]
127
+
128
+ return (segements_translation.translated_text, images_names)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==4.18.0
2
+ gradio_client==0.10.0
3
+ openai==1.12.0
4
+ pydantic==2.6.1
5
+ rich==13.7.0
6
+ stability-sdk==0.8.5
تطبيق الراوي.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:509a398d5629418376c7797bd4ffc369cc8c428b78ced09ebcc6b28462bbce24
3
+ size 4607682