diegopacheco commited on
Commit
42bc318
1 Parent(s): f0011e3
Files changed (9) hide show
  1. app.py +99 -0
  2. comics.png +0 -0
  3. comics_0.png +0 -0
  4. e1_comics_0.png +0 -0
  5. e2_comics_0.png +0 -0
  6. e3_comics_0.png +0 -0
  7. install-deps.sh +3 -0
  8. requirements.txt +18 -0
  9. run.sh +3 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import DiffusionPipeline
2
+ from torchvision.transforms.functional import to_tensor
3
+ import torch
4
+ import matplotlib.pyplot as plt
5
+ from torchvision.transforms.functional import to_pil_image
6
+ import gradio as gr
7
+ from torchvision.utils import save_image
8
+ from PIL import Image
9
+ import os
10
+ from gtts import gTTS
11
+ import torch
12
+ import gradio as gr
13
+ from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
14
+ from transformers import pipeline, GPT2LMHeadModel, GPT2Tokenizer
15
+
16
+ example_1 = "ninja turtles fighting against a mosquito, in the sea"
17
+ example_2 = "warrior fighting zombies with a sword, in the forest"
18
+ example_3 = "western cowboy fighting against a dragon, in the desert"
19
+
20
+ def load_image(image_path):
21
+ images = []
22
+ image = Image.open(image_path)
23
+ image_tensor = to_tensor(image)
24
+ image_tensor = image_tensor / image_tensor.max()
25
+ images.append(image_tensor)
26
+ return to_pil_image(images[0])
27
+
28
+ def text_to_comics(text):
29
+ if text == example_1:
30
+ return load_image("e1_comics_0.png")
31
+ if text == example_2:
32
+ return load_image("e2_comics_0.png")
33
+ if text == example_3:
34
+ return load_image("e3_comics_0.png")
35
+
36
+ pipeline = DiffusionPipeline.from_pretrained("ogkalu/Comic-Diffusion")
37
+ output = pipeline(text, prompt_len=70, num_images=1, return_tensors=True)
38
+
39
+ images = []
40
+ for i in range(1):
41
+ image = output.images[i]
42
+ image_tensor = to_tensor(image)
43
+ image_tensor = image_tensor / image_tensor.max()
44
+ save_image(image_tensor, f"comics_{i}.png")
45
+ images.append(to_pil_image(image_tensor))
46
+ return images[0]
47
+
48
+ #text_to_comics(example_1)
49
+ #text_to_comics(example_2)
50
+ #text_to_comics(example_3)
51
+
52
+ def generate_story(description):
53
+ model = GPT2LMHeadModel.from_pretrained("gpt2")
54
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
55
+ inputs = tokenizer.encode(description + " a thriller/action story.", return_tensors='pt')
56
+ outputs = model.generate(input_ids=inputs,
57
+ max_length=200,
58
+ num_return_sequences=1,
59
+ temperature=0.7,
60
+ no_repeat_ngram_size=2)
61
+ story = tokenizer.decode(outputs[0], skip_special_tokens=True)
62
+ return story
63
+
64
+ def convert_to_audio(text):
65
+ tts = gTTS(text)
66
+ audio_file_path = "audio.mp3"
67
+ tts.save(audio_file_path)
68
+ return audio_file_path
69
+
70
+ def audio_to_text(audio_file_path):
71
+ pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v2")
72
+ result = pipe("audio.mp3")
73
+ print(result)
74
+ return result['text']
75
+
76
+ def sentiment_analysis(text):
77
+ sentiment_analyzer = pipeline("sentiment-analysis")
78
+ result = sentiment_analyzer(text)
79
+ print(result)
80
+ return result
81
+
82
+ def app(text):
83
+ comics = text_to_comics(text)
84
+ story = generate_story(text)
85
+ audio_file = convert_to_audio(story)
86
+ transcribed_text = audio_to_text(audio_file)
87
+ sentiment = sentiment_analysis(transcribed_text)
88
+ return comics, audio_file,transcribed_text, sentiment
89
+
90
+ ui = gr.Interface(fn=app,
91
+ inputs="text",
92
+ outputs=["image", "audio", "text", "text"],
93
+ title="GenAI Multi-model LLM comics: Type some text get comics!",
94
+ description="This model generates comics based on the text(max 70 chars) you provide." + \
95
+ "<BR/>It does not work on mobile(timeout issue) click on examples if dont want to wait. " + \
96
+ "<BR/>It may take ~10-20min to generate the comics.",
97
+ examples=[(example_1),(example_2),(example_3)],
98
+ )
99
+ ui.launch()
comics.png ADDED
comics_0.png ADDED
e1_comics_0.png ADDED
e2_comics_0.png ADDED
e3_comics_0.png ADDED
install-deps.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ /bin/pip install -r requirements.txt
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy
2
+ transformers
3
+ sentence-transformers
4
+ seaborn
5
+ torch
6
+ torchvision
7
+ matplotlib
8
+ pandas
9
+ scikit-learn
10
+ nltk
11
+ gensim
12
+ tensorflow
13
+ keras
14
+ opencv-python
15
+ fastapi
16
+ uvicorn
17
+ gradio
18
+
run.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ /bin/python src/main.py