gstaff commited on
Commit
8907ae5
·
1 Parent(s): ebf6ce9

Update to transformers-based model.

Browse files
app.py CHANGED
@@ -1,67 +1,47 @@
1
- import gradio as gr
2
-
3
- import pathlib
4
  import base64
5
  import re
 
 
 
6
  import time
7
  from io import BytesIO
8
 
 
 
9
  import imgkit
10
- import os
11
  from PIL import Image
12
- from fastai.callback.core import Callback
13
- from fastai.learner import *
14
- from fastai.torch_core import TitledStr
15
- from torch import tensor, Tensor
16
- from torch.distributions import Transform
17
- import random
18
 
19
- # These utility functions need to be in main (or otherwise where created) because fastai loads from that module, see:
20
- # https://docs.fast.ai/learner.html#load_learner
21
- from transformers import GPT2TokenizerFast
22
 
23
- import torch
24
- from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
25
 
26
  gpu = False
27
 
28
  AUTH_TOKEN = os.environ.get('AUTH_TOKEN')
29
-
 
30
 
31
  if gpu:
32
- pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", custom_pipeline="stable_diffusion_mega", torch_dtype=torch.float16, revision="fp16", use_auth_token=AUTH_TOKEN)
33
- scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config)
34
- pipeline.scheduler = scheduler
35
- pipeline.to("cuda")
 
36
  else:
37
- pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",
38
- custom_pipeline="stable_diffusion_mega", use_auth_token=AUTH_TOKEN)
39
- scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config)
40
- pipeline.scheduler = scheduler
41
 
42
  # Huggingface Spaces have 16GB RAM and 8 CPU cores
43
  # See https://huggingface.co/docs/hub/spaces-overview#hardware-resources
44
 
45
- pretrained_weights = 'gpt2'
46
- tokenizer = GPT2TokenizerFast.from_pretrained(pretrained_weights)
47
-
48
-
49
- def tokenize(text):
50
- toks = tokenizer.tokenize(text)
51
- return tensor(tokenizer.convert_tokens_to_ids(toks))
52
-
53
-
54
- class TransformersTokenizer(Transform):
55
- def __init__(self, tokenizer): self.tokenizer = tokenizer
56
-
57
- def encodes(self, x):
58
- return x if isinstance(x, Tensor) else tokenize(x)
59
-
60
- def decodes(self, x): return TitledStr(self.tokenizer.decode(x.cpu().numpy()))
61
-
62
-
63
- class DropOutput(Callback):
64
- def after_pred(self): self.learn.pred = self.pred[0]
65
 
66
 
67
  def gen_card_text(name):
@@ -70,15 +50,9 @@ def gen_card_text(name):
70
  else:
71
  prompt = f"Name: {name}\r\n"
72
  print(f'GENERATING CARD TEXT with prompt: {prompt}')
73
- prompt_ids = tokenizer.encode(prompt)
74
- if gpu:
75
- inp = tensor(prompt_ids)[None].cuda() # Use .cuda() for torch GPU
76
- else:
77
- inp = tensor(prompt_ids)[None]
78
- preds = learner.model.generate(inp, max_length=512, num_beams=5, temperature=1.5, do_sample=True,
79
- repetition_penalty=1.2)
80
- result = tokenizer.decode(preds[0].cpu().numpy())
81
- result = result.split('###')[0].replace(r'\r\n', '\n').replace('\r', '').replace(r'\r', '')
82
  print(f'GENERATING CARD COMPLETE')
83
  print(result)
84
  if name == '':
@@ -87,10 +61,6 @@ def gen_card_text(name):
87
  return name, result
88
 
89
 
90
- # init only once
91
- learner = load_learner('./colab-data-test/export.pkl',
92
- cpu=not gpu) # cpu=False uses GPU; make sure installed torch is GPU e.g. `pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116`
93
-
94
  pathlib.Path('card_data').mkdir(parents=True, exist_ok=True)
95
  pathlib.Path('card_images').mkdir(parents=True, exist_ok=True)
96
  pathlib.Path('card_html').mkdir(parents=True, exist_ok=True)
@@ -109,7 +79,7 @@ def run(name):
109
  prompt_template = f"fantasy illustration of a {card_type} {name}, by Greg Rutkowski"
110
  print(f"GENERATING IMAGE FOR {prompt_template}")
111
  # Regarding sizing see https://huggingface.co/blog/stable_diffusion#:~:text=When%20choosing%20image%20sizes%2C%20we%20advise%20the%20following%3A
112
- images = pipeline.text2img(prompt_template, width=512, height=368, num_inference_steps=20).images
113
  card_image = None
114
  for image in images:
115
  save_name = get_savename('card_images', name, 'png')
 
 
 
 
1
  import base64
2
  import re
3
+ import os
4
+ import pathlib
5
+ import random
6
  import time
7
  from io import BytesIO
8
 
9
+ from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
10
+ import gradio as gr
11
  import imgkit
 
12
  from PIL import Image
13
+ import torch
14
+ from transformers import GPT2LMHeadModel, GPT2TokenizerFast, pipeline
 
 
 
 
15
 
 
 
 
16
 
17
+ temp = pathlib.PosixPath
18
+ pathlib.PosixPath = pathlib.WindowsPath
19
 
20
  gpu = False
21
 
22
  AUTH_TOKEN = os.environ.get('AUTH_TOKEN')
23
+ BASE_MODEL = "gpt2"
24
+ MERGED_MODEL = "gpt2-magic-card"
25
 
26
  if gpu:
27
+ image_pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16,
28
+ revision="fp16", use_auth_token=AUTH_TOKEN)
29
+ scheduler = EulerAncestralDiscreteScheduler.from_config(image_pipeline.scheduler.config)
30
+ image_pipeline.scheduler = scheduler
31
+ image_pipeline.to("cuda")
32
  else:
33
+ image_pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", use_auth_token=AUTH_TOKEN)
34
+ scheduler = EulerAncestralDiscreteScheduler.from_config(image_pipeline.scheduler.config)
35
+ image_pipeline.scheduler = scheduler
 
36
 
37
  # Huggingface Spaces have 16GB RAM and 8 CPU cores
38
  # See https://huggingface.co/docs/hub/spaces-overview#hardware-resources
39
 
40
+ model = GPT2LMHeadModel.from_pretrained(MERGED_MODEL)
41
+ tokenizer = GPT2TokenizerFast.from_pretrained(BASE_MODEL)
42
+ END_TOKEN = '###'
43
+ eos_id = tokenizer.encode(END_TOKEN)
44
+ text_pipeline = pipeline('text-generation', model=model, tokenizer=tokenizer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
 
47
  def gen_card_text(name):
 
50
  else:
51
  prompt = f"Name: {name}\r\n"
52
  print(f'GENERATING CARD TEXT with prompt: {prompt}')
53
+ output = text_pipeline(prompt, max_length=512, num_return_sequences=1, num_beams=5, temperature=1.5, do_sample=True,
54
+ repetition_penalty=1.2, eos_token_id=eos_id)
55
+ result = output[0]['generated_text'].split("###")[0].replace(r'\r\n', '\n').replace('\r', '').replace(r'\r', '')
 
 
 
 
 
 
56
  print(f'GENERATING CARD COMPLETE')
57
  print(result)
58
  if name == '':
 
61
  return name, result
62
 
63
 
 
 
 
 
64
  pathlib.Path('card_data').mkdir(parents=True, exist_ok=True)
65
  pathlib.Path('card_images').mkdir(parents=True, exist_ok=True)
66
  pathlib.Path('card_html').mkdir(parents=True, exist_ok=True)
 
79
  prompt_template = f"fantasy illustration of a {card_type} {name}, by Greg Rutkowski"
80
  print(f"GENERATING IMAGE FOR {prompt_template}")
81
  # Regarding sizing see https://huggingface.co/blog/stable_diffusion#:~:text=When%20choosing%20image%20sizes%2C%20we%20advise%20the%20following%3A
82
+ images = image_pipeline(prompt_template, width=512, height=368, num_inference_steps=20).images
83
  card_image = None
84
  for image in images:
85
  save_name = get_savename('card_images', name, 'png')
gpt2-magic-card/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float16",
36
+ "transformers_version": "4.32.1",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
gpt2-magic-card/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.32.1"
6
+ }
colab-data-test/export.pkl → gpt2-magic-card/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81e1bb9267d4e38f1de3eb4f103ec49537640ce7536e3dfcbce8cd673a38fb85
3
- size 512717919
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3b0f3c17995a67032a94e998a790f5ea1ae1dbfd9d231c8bc7625b6556cb7d1
3
+ size 248927581
gpt2-magic-card/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,6 +1,5 @@
1
  accelerate
2
  diffusers
3
- fastai
4
  ftfy
5
  gradio
6
  imgkit
 
1
  accelerate
2
  diffusers
 
3
  ftfy
4
  gradio
5
  imgkit