Norod78/sd15-arthur-blip-captions

SDv1.5 sd15-arthur-blip-captions model trained by Norod78 with Huggingface Diffusers train_text_to_image script

Using the screenshot images from ארתור ללא הקשר by @GadiAleks (Both are in Hebrew)

The sample images above and below were generated using this smaple script

The dataset was a "No context Arthur screenshots with Hebrew subtitles" account. The model got the Hebrew letters nice but the words are gibberish.

Sample code


from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
import torch

#////////////////////////////////////////////////////////////////
guidance_scale=8.0
steps=40
width=512
height=512

prompt_suffix = ", Very detailed, clean, high quality, sharp image"
#////////////////////////////////////////////////////////////////
custom_model_id = "Norod78/sd15-arthur-blip-captions"
#////////////////////////////////////////////////////////////////
custom_model_id_str = custom_model_id.replace("/","_")
custom_model_pipe = None

def generate(prompt, file_prefix ,samples, seed):
    global base_model_pipe, custom_model_pipe

    torch.manual_seed(seed)
    prompt += prompt_suffix
    file_prefix += "Arthur"
    custom_model_images = custom_model_pipe([prompt] * samples, num_inference_steps=steps, guidance_scale=guidance_scale, height=height, width=width)["images"]
    for idx, image in enumerate(custom_model_images):
        image.save(f"{file_prefix}-{idx}-{seed}--{width}x{height}-{custom_model_id_str}.jpg")

def load():
    global base_model_pipe, custom_model_pipe
    
    scheduler = DPMSolverMultistepScheduler(
        beta_start=0.00085,
        beta_end=0.012,
        beta_schedule="scaled_linear",
        num_train_timesteps=1000,
        trained_betas=None,
        thresholding=False,
        algorithm_type="dpmsolver++",
        solver_type="midpoint",
        lower_order_final=True,
    )

    device = "cuda" if torch.cuda.is_available() else "cpu"
    dtype = torch.float16 if device == "cuda" else torch.float32
    custom_model_pipe = StableDiffusionPipeline.from_pretrained(custom_model_id, scheduler=scheduler,torch_dtype=dtype).to(device)

def main():
    load()

    generate("A livingroom", "01_LivingRoom", 4, 555)
    generate("Nicolas Cage, in \"The Minions\" movie", "02_NicolasCage", 2, 42)
    generate("Gal Gadot as wonderwoman", "03_GalGadot", 2, 42)
    generate("Gal Gadot in Avatar", "04_GalGadotAvatar", 2, 777)
    generate("Family guy taking selfies at the beach", "05_FamilyGuy", 2, 555)
    generate("Pikachu as Rick and morty, Eric Wallis", "06_PikachuRnM", 2, 777)
    generate("Pikachu as Spongebob, Eric Wallis", "07_PikachuSpongeBob", 2, 42)
    generate("An oil painting of Miss. Piggy from the muppets as the Mona Lisa", "08_MsPiggyMonaLisa", 2, 42)
    generate("Rick Sanchez from the TV show \"Rick and Morty\"", "09_RickSanchez", 2, 42)
    generate("An paiting of Southpark with rainbow", "10_Southpark", 2, 777)
    generate("A psychedelic image of Bojack Horseman", "11_Bojack", 2, 777)
    generate("A movie poster for Gravity Falls Cthulhu stories", "12_GravityFalls", 2, 777)
    generate("A vibrant oil painting portrait of She-Ra", "13_Shira", 2, 512)
#

if __name__ == '__main__':
    main()