Spaces:

determined-ai
/

detsd_demo

Running on CPU Upgrade

App Files Files Community

Garrett Goon commited on Oct 24, 2022

Commit

1da6b3f

•

1 Parent(s): 7b17c3f

updated syntax to match reviewed repo

Browse files

Files changed (4) hide show

__pycache__/utils.cpython-38.pyc +0 -0
app.py +19 -19
learned_embeddings_dict.pt +1 -1
utils.py +31 -41

__pycache__/utils.cpython-38.pyc CHANGED Viewed

Binary files a/__pycache__/utils.cpython-38.pyc and b/__pycache__/utils.cpython-38.pyc differ

app.py CHANGED Viewed

@@ -34,37 +34,37 @@ pipeline = StableDiffusionPipeline.from_pretrained(
 CONCEPT_PATH = pathlib.Path("learned_embeddings_dict.pt")
 learned_embeddings_dict = torch.load(CONCEPT_PATH)
-concept_to_dummy_tokens_map = {}
 for concept_token, embedding_dict in learned_embeddings_dict.items():
-    initializer_tokens = embedding_dict["initializer_tokens"]
     learned_embeddings = embedding_dict["learned_embeddings"]
     (
         initializer_ids,
         dummy_placeholder_ids,
-        dummy_placeholder_tokens,
     ) = utils.add_new_tokens_to_tokenizer(
-        concept_token=concept_token,
-        initializer_tokens=initializer_tokens,
         tokenizer=pipeline.tokenizer,
     )
     pipeline.text_encoder.resize_token_embeddings(len(pipeline.tokenizer))
     token_embeddings = pipeline.text_encoder.get_input_embeddings().weight.data
     for d_id, tensor in zip(dummy_placeholder_ids, learned_embeddings):
         token_embeddings[d_id] = tensor
-    concept_to_dummy_tokens_map[concept_token] = dummy_placeholder_tokens
-def replace_concept_tokens(text: str):
-    for concept_token, dummy_tokens in concept_to_dummy_tokens_map.items():
-        text = text.replace(concept_token, dummy_tokens)
     return text
 def inference(prompt: str, guidance_scale: int, num_inference_steps: int, seed: int):
     if not prompt:
         raise ValueError("Please enter a prompt.")
-    if '<det-logo>' not in prompt:
-        raise ValueError('"<det-logo>" must be included in the prompt.')
-    prompt = replace_concept_tokens(prompt)
     generator = torch.Generator(device=device).manual_seed(seed)
     output = pipeline(
         prompt=[prompt] * BATCH_SIZE,
@@ -275,35 +275,35 @@ block = gr.Blocks(css=css)
 examples = [
     [
-        "a Van Gogh painting of a <det-logo> with thick strokes, masterful composition",
         #        4,
         #        45,
         #        7.5,
         #        1024,
     ],
     [
-        "Futuristic <det-logo> in a desert, painting, octane render, 4 k, anime sky, warm colors",
         #        4,
         #        45,
         #        7,
         #        1024,
     ],
     [
-        "cell shaded cartoon of a <det-logo>, subtle colors, post grunge, concept art by josan gonzales and wlop, by james jean, victo ngai, david rubin, mike mignola, deviantart, art by artgem",
         #        4,
         #        45,
         #        7,
         #        1024,
     ],
     [
-        "a surreal Salvador Dali painting of a <det-logo>, soft blended colors",
         #        4,
         #        45,
         #        7,
         #        1024,
     ],
 [
-        "Beautiful tarot illustration of a <det-logo>, in the style of james jean and victo ngai, mystical colors, trending on artstation",
         #        4,
         #        45,
         #        7,
@@ -334,10 +334,10 @@ with block:
         with gr.Box():
             with gr.Row(elem_id="prompt-container").style(equal_height=True):
                 prompt = gr.Textbox(
-                    label='Enter a prompt including "<det-logo>"',
                     show_label=False,
                     max_lines=1,
-                    placeholder='Enter a prompt including "<det-logo>"',
                     elem_id="prompt-text-input",
                 ).style(
                     container=False,

 CONCEPT_PATH = pathlib.Path("learned_embeddings_dict.pt")
 learned_embeddings_dict = torch.load(CONCEPT_PATH)
+concept_to_dummy_strs_map = {}
 for concept_token, embedding_dict in learned_embeddings_dict.items():
+    initializer_strs = embedding_dict["initializer_strs"]
     learned_embeddings = embedding_dict["learned_embeddings"]
     (
         initializer_ids,
         dummy_placeholder_ids,
+        dummy_placeholder_strs,
     ) = utils.add_new_tokens_to_tokenizer(
+        concept_str=concept_token,
+        initializer_strs=initializer_strs,
         tokenizer=pipeline.tokenizer,
     )
     pipeline.text_encoder.resize_token_embeddings(len(pipeline.tokenizer))
     token_embeddings = pipeline.text_encoder.get_input_embeddings().weight.data
     for d_id, tensor in zip(dummy_placeholder_ids, learned_embeddings):
         token_embeddings[d_id] = tensor
+    concept_to_dummy_strs_map[concept_token] = dummy_placeholder_strs
+def replace_concept_strs(text: str):
+    for concept_token, dummy_strs in concept_to_dummy_strs_map.items():
+        text = text.replace(concept_token, dummy_strs)
     return text
 def inference(prompt: str, guidance_scale: int, num_inference_steps: int, seed: int):
     if not prompt:
         raise ValueError("Please enter a prompt.")
+    if 'det-logo' not in prompt:
+        raise ValueError('"det-logo" must be included in the prompt.')
+    prompt = replace_concept_strs(prompt)
     generator = torch.Generator(device=device).manual_seed(seed)
     output = pipeline(
         prompt=[prompt] * BATCH_SIZE,
 examples = [
     [
+        "a Van Gogh painting of a det-logo with thick strokes, masterful composition",
         #        4,
         #        45,
         #        7.5,
         #        1024,
     ],
     [
+        "Futuristic det-logo in a desert, painting, octane render, 4 k, anime sky, warm colors",
         #        4,
         #        45,
         #        7,
         #        1024,
     ],
     [
+        "cell shaded cartoon of a det-logo, subtle colors, post grunge, concept art by josan gonzales and wlop, by james jean, victo ngai, david rubin, mike mignola, deviantart, art by artgem",
         #        4,
         #        45,
         #        7,
         #        1024,
     ],
     [
+        "a surreal Salvador Dali painting of a det-logo, soft blended colors",
         #        4,
         #        45,
         #        7,
         #        1024,
     ],
 [
+        "Beautiful tarot illustration of a det-logo, in the style of james jean and victo ngai, mystical colors, trending on artstation",
         #        4,
         #        45,
         #        7,
         with gr.Box():
             with gr.Row(elem_id="prompt-container").style(equal_height=True):
                 prompt = gr.Textbox(
+                    label='Enter a prompt including "det-logo"',
                     show_label=False,
                     max_lines=1,
+                    placeholder='Enter a prompt including "det-logo"',
                     elem_id="prompt-text-input",
                 ).style(
                     container=False,

learned_embeddings_dict.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:73ab240e6ef7b16a70e14b4625882d8f63050f1d96ffc0eef6e0e0caa2844109
 size 16235

 version https://git-lfs.github.com/spec/v1
+oid sha256:5184c747567ac6240bd45b701cb29416752fcc925b2a967a811c28729451b942
 size 16235

utils.py CHANGED Viewed

@@ -1,59 +1,49 @@
-from typing import List, Sequence, Tuple
 import torch
 import torch.nn as nn
 def add_new_tokens_to_tokenizer(
-    concept_token: str,
-    initializer_tokens: Sequence[str],
     tokenizer: nn.Module,
-) -> Tuple[List[int], List[int], str]:
     """Helper function for adding new tokens to the tokenizer and extending the corresponding
     embeddings appropriately, given a single concept token and its sequence of corresponding
-    initializer tokens.  Returns the lists of ids for the initializer tokens and their dummy
     replacements, as well as the string representation of the dummies.
     """
     initializer_ids = tokenizer(
-        initializer_tokens,
-        padding="max_length",
-        truncation=True,
-        max_length=tokenizer.model_max_length,
         return_tensors="pt",
         add_special_tokens=False,
-    ).input_ids
-    try:
-        special_token_ids = tokenizer.all_special_ids
-    except AttributeError:
-        special_token_ids = []
-    non_special_initializer_locations = torch.isin(
-        initializer_ids, torch.tensor(special_token_ids), invert=True
-    )
-    non_special_initializer_ids = initializer_ids[non_special_initializer_locations]
-    if len(non_special_initializer_ids) == 0:
-        raise ValueError(
-            f'"{initializer_tokens}" maps to trivial tokens, please choose a different initializer.'
-        )
     # Add a dummy placeholder token for every token in the initializer.
-    dummy_placeholder_token_list = [
-        f"{concept_token}_{n}" for n in range(len(non_special_initializer_ids))
-    ]
-    dummy_placeholder_tokens = " ".join(dummy_placeholder_token_list)
-    num_added_tokens = tokenizer.add_tokens(dummy_placeholder_token_list)
-    if num_added_tokens != len(dummy_placeholder_token_list):
-        raise ValueError(
-            f"Subset of {dummy_placeholder_token_list} tokens already exist in tokenizer."
-        )
-    dummy_placeholder_ids = tokenizer.convert_tokens_to_ids(
-        dummy_placeholder_token_list
-    )
-    # Sanity check
     assert len(dummy_placeholder_ids) == len(
-        non_special_initializer_ids
-    ), 'Length of "dummy_placeholder_ids" and "non_special_initializer_ids" must match.'
-    return non_special_initializer_ids, dummy_placeholder_ids, dummy_placeholder_tokens

+from typing import List, Tuple
 import torch
 import torch.nn as nn
 def add_new_tokens_to_tokenizer(
+    concept_str: str,
+    initializer_strs: str,
     tokenizer: nn.Module,
+) -> Tuple[torch.Tensor, List[int], str]:
     """Helper function for adding new tokens to the tokenizer and extending the corresponding
     embeddings appropriately, given a single concept token and its sequence of corresponding
+    initializer tokens.  Returns the tensor of ids for the initializer tokens and their dummy
     replacements, as well as the string representation of the dummies.
     """
+    assert not token_exists_in_tokenizer(
+        concept_str, tokenizer
+    ), f"concept_str {concept_str} already exists in tokenizer."
     initializer_ids = tokenizer(
+        initializer_strs,
         return_tensors="pt",
         add_special_tokens=False,
+    ).input_ids[0]
     # Add a dummy placeholder token for every token in the initializer.
+    dummy_placeholder_str_list = [f"<{concept_str}>_{n}" for n in range(len(initializer_ids))]
+    # Sanity check.
+    for dummy in dummy_placeholder_str_list:
+        assert not token_exists_in_tokenizer(
+            dummy, tokenizer
+        ), f"dummy {dummy} already exists in tokenizer."
+    dummy_placeholder_strs = " ".join(dummy_placeholder_str_list)
+    tokenizer.add_tokens(dummy_placeholder_str_list)
+    dummy_placeholder_ids = tokenizer.convert_tokens_to_ids(dummy_placeholder_str_list)
+    # Sanity check that the dummies correspond to the correct number of ids.
     assert len(dummy_placeholder_ids) == len(
+        initializer_ids
+    ), 'Length of "dummy_placeholder_ids" and "initializer_ids" must match.'
+    return initializer_ids, dummy_placeholder_ids, dummy_placeholder_strs
+def token_exists_in_tokenizer(token: str, tokenizer: nn.Module) -> bool:
+    exists = tokenizer.convert_tokens_to_ids([token]) != [tokenizer.unk_token_id]
+    return exists