diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000000000000000000000000000000000000..935fedcf2312cd2bcb0296366f69980eb418ace3 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @AUTOMATIC1111 diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..fe0ce321fa05a75c1f787604c768b7de710ac22b --- /dev/null +++ b/app.py @@ -0,0 +1,137 @@ +import os +import threading +import time +import importlib +import signal +import threading + +from fastapi.middleware.gzip import GZipMiddleware + +from modules.paths import script_path + +from modules import devices, sd_samplers +import modules.codeformer_model as codeformer +import modules.extras +import modules.face_restoration +import modules.gfpgan_model as gfpgan +import modules.img2img + +import modules.lowvram +import modules.paths +import modules.scripts +import modules.sd_hijack +import modules.sd_models +import modules.shared as shared +import modules.txt2img + +import modules.ui +from modules import devices +from modules import modelloader +from modules.paths import script_path +from modules.shared import cmd_opts +import modules.hypernetworks.hypernetwork + + +queue_lock = threading.Lock() + + +def wrap_queued_call(func): + def f(*args, **kwargs): + with queue_lock: + res = func(*args, **kwargs) + + return res + + return f + + +def wrap_gradio_gpu_call(func, extra_outputs=None): + def f(*args, **kwargs): + devices.torch_gc() + + shared.state.sampling_step = 0 + shared.state.job_count = -1 + shared.state.job_no = 0 + shared.state.job_timestamp = shared.state.get_job_timestamp() + shared.state.current_latent = None + shared.state.current_image = None + shared.state.current_image_sampling_step = 0 + shared.state.skipped = False + shared.state.interrupted = False + shared.state.textinfo = None + + with queue_lock: + res = func(*args, **kwargs) + + shared.state.job = "" + shared.state.job_count = 0 + + devices.torch_gc() + + return res + + return modules.ui.wrap_gradio_call(f, extra_outputs=extra_outputs) + +def initialize(): + modelloader.cleanup_models() + modules.sd_models.setup_model() + codeformer.setup_model(cmd_opts.codeformer_models_path) + gfpgan.setup_model(cmd_opts.gfpgan_models_path) + shared.face_restorers.append(modules.face_restoration.FaceRestoration()) + modelloader.load_upscalers() + + modules.scripts.load_scripts(os.path.join(script_path, "scripts")) + + shared.sd_model = modules.sd_models.load_model() + shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(shared.sd_model))) + shared.opts.onchange("sd_hypernetwork", wrap_queued_call(lambda: modules.hypernetworks.hypernetwork.load_hypernetwork(shared.opts.sd_hypernetwork))) + shared.opts.onchange("sd_hypernetwork_strength", modules.hypernetworks.hypernetwork.apply_strength) + + +def webui(): + initialize() + + # make the program just exit at ctrl+c without waiting for anything + def sigint_handler(sig, frame): + print(f'Interrupted with signal {sig} in {frame}') + os._exit(0) + + signal.signal(signal.SIGINT, sigint_handler) + + while 1: + + demo = modules.ui.create_ui(wrap_gradio_gpu_call=wrap_gradio_gpu_call) + + app, local_url, share_url = demo.launch( + share=cmd_opts.share, + server_name="0.0.0.0" if cmd_opts.listen else None, + server_port=cmd_opts.port, + debug=cmd_opts.gradio_debug, + auth=[tuple(cred.split(':')) for cred in cmd_opts.gradio_auth.strip('"').split(',')] if cmd_opts.gradio_auth else None, + inbrowser=cmd_opts.autolaunch, + prevent_thread_lock=True + ) + + app.add_middleware(GZipMiddleware, minimum_size=1000) + + while 1: + time.sleep(0.5) + if getattr(demo, 'do_restart', False): + time.sleep(0.5) + demo.close() + time.sleep(0.5) + break + + sd_samplers.set_samplers() + + print('Reloading Custom Scripts') + modules.scripts.reload_scripts(os.path.join(script_path, "scripts")) + print('Reloading modules: modules.ui') + importlib.reload(modules.ui) + print('Refreshing Model List') + modules.sd_models.list_models() + print('Restarting Gradio') + + +if __name__ == "__main__": + webui() diff --git a/artists.csv b/artists.csv new file mode 100644 index 0000000000000000000000000000000000000000..99cdbdc60990113eaccaca5296e33a145eac1bc4 --- /dev/null +++ b/artists.csv @@ -0,0 +1,3048 @@ +artist,score,category +Peter Max,0.99715996,weird +Roy Lichtenstein,0.98272276,cartoon +Romero Britto,0.9498342,scribbles +Keith Haring,0.9431302,weird +Hiroshige,0.93995106,ukioe +Joan Miró,0.9169429,scribbles +Jean-Michel Basquiat,0.90080947,scribbles +Katsushika Hokusai,0.8887236,ukioe +Paul Klee,0.8868682,scribbles +Marc Chagall,0.8868168,scribbles +Karl Schmidt-Rottluff,0.88444495,scribbles +Howard Hodgkin,0.8808578,scribbles +Jean Metzinger,0.88056004,scribbles +Alma Thomas,0.87658304,weird +Rufino Tamayo,0.8749848,scribbles +Utagawa Hiroshige,0.8728796,ukioe +Chagall,0.8718535,scribbles +Harumi Hironaka,0.86914605,scribbles +Hans Hofmann,0.8686159,scribbles +Kawanabe Kyōsai,0.86612236,ukioe +Andy Warhol,0.8654825,scribbles +Barbara Takenaga,0.86223894,scribbles +Tatsuro Kiuchi,0.8597267,cartoon +Vincent Van Gogh,0.85538065,scribbles +Wassily Kandinsky,0.85490596,scribbles +Georges Seurat,0.8534801,scribbles +Karel Appel,0.8529153,scribbles +Sonia Delaunay,0.8506156,scribbles +Hokusai,0.85046995,ukioe +Eduardo Kobra,0.85036755,weird +Fra Angelico,0.84984255,fineart +Milton Avery,0.849746,scribbles +David Hockney,0.8496144,scribbles +Hiroshi Nagai,0.847129,cartoon +Aristarkh Lentulov,0.846537,scribbles +Lyonel Feininger,0.84573764,scribbles +Mary Blair,0.845709,scribbles +Ellsworth Kelly,0.8455428,scribbles +Jun Kaneko,0.8448367,scribbles +Roz Chast,0.8432013,weird +Ida Rentoul Outhwaite,0.84275174,scribbles +Robert Motherwell,0.8409468,scribbles +Garry Winogrand,0.83994275,black-white +Andrei Rublev,0.83950496,fineart +Alexander Calder,0.83832693,scribbles +Tomokazu Matsuyama,0.8376121,scribbles +August Macke,0.8362022,scribbles +Kazimir Malevich,0.8356527,scribbles +Richard Scarry,0.83554685,scribbles +Victor Vasarely,0.8335438,scribbles +Kitagawa Utamaro,0.83333457,ukioe +Matt Bors,0.83252287,scribbles +Emil Nolde,0.8323225,scribbles +Patrick Caulfield,0.8322225,scribbles +Charles Blackman,0.83200824,scribbles +Peter Doig,0.83111644,scribbles +Alexej von Jawlensky,0.8308932,scribbles +Rumiko Takahashi,0.8301817,anime +Eileen Agar,0.82945526,scribbles +Ernst Ludwig Kirchner,0.82756275,scribbles +Nicolas Delort,0.8261329,scribbles +Marsden Hartley,0.8250993,scribbles +Keith Negley,0.8212553,scribbles +Jamini Roy,0.8212199,scribbles +Quentin Blake,0.82115215,scribbles +Andy Kehoe,0.82063186,cartoon +George barbier,0.82046914,fineart +Frans Masereel,0.81997275,scribbles +Umberto Boccioni,0.81921184,scribbles +Conrad Roset,0.8190752,cartoon +Paul Ranson,0.81903255,scribbles +Yayoi Kusama,0.81886625,weird +Tomi Ungerer,0.81848705,scribbles +Saul Steinberg,0.81778854,scribbles +Jon Klassen,0.81773067,scribbles +W.W. Denslow,0.81708044,fineart +Helen Frankenthaler,0.81704986,scribbles +Jean Jullien,0.816437,scribbles +Brett Whiteley,0.81601924,scribbles +Giotto Di Bondone,0.81427747,fineart +Takashi Murakami,0.81338763,weird +Howard Finster,0.81333554,scribbles +Eduardo Paolozzi,0.81312317,scribbles +Charles Rennie Mackintosh,0.81297064,scribbles +Brandon Mably,0.8128239,weird +Rebecca Louise Law,0.81214285,weird +Victo Ngai,0.81195843,cartoon +Hanabusa Itchō II,0.81187993,ukioe +Edmund Dulac,0.81104875,scribbles +Ben Shahn,0.8104582,scribbles +Howard Arkley,0.8103746,scribbles +Wilfredo Lam,0.8096211,scribbles +Michael Deforge,0.8095954,scribbles +John Hoyland,0.8094592,fineart +Francesco Clemente,0.8090387,scribbles +Leonetto Cappiello,0.8087691,scribbles +Norman Ackroyd,0.80788493,scribbles +Bhupen Khakhar,0.8077607,scribbles +Jeremiah Ketner,0.8075384,cartoon +Chris Ofili,0.8073793,scribbles +Banksy,0.80695426,scribbles +Tom Whalen,0.805867,scribbles +Ernst Wilhelm Nay,0.805295,scribbles +Henri Rousseau,0.8049866,scribbles +Kunisada,0.80493814,ukioe +Naoko Takeuchi,0.80482674,anime +Kaethe Butcher,0.80406916,scribbles +Hasui Kawase,0.8040483,ukioe +Alvin Langdon Coburn,0.8035004,black-white +Stanley Donwood,0.8033054,scribbles +Agnes Martin,0.8028028,scribbles +Osamu Tezuka,0.8005524,cartoon +Frank Stella,0.80049455,scribbles +Dale Chihuly,0.79982775,digipa-high-impact +Evgeni Gordiets,0.79967916,scribbles +Janek Sedlar,0.7993992,fineart +Alasdair Gray,0.7992301,scribbles +Yasuo Kuniyoshi,0.79870003,ukioe +Edward Gorey,0.7984938,scribbles +Johannes Itten,0.798481,scribbles +Cuno Amiet,0.7979497,scribbles +M.C. Escher,0.7976657,scribbles +Albert Irvin,0.79688835,scribbles +Jack Gaughan,0.79443675,scribbles +Ravi Zupa,0.7939542,scribbles +Kay Nielsen,0.79385525,scribbles +Agnolo Gaddi,0.79369193,fineart +Alessandro Gottardo,0.79321593,scribbles +Paul Laffoley,0.79196846,scribbles +Giovanni Battista Piranesi,0.79111177,fineart +Adrian Tomine,0.79109013,scribbles +Adolph Gottlieb,0.79061794,scribbles +Milton Caniff,0.7905358,cartoon +Philip Guston,0.78994095,scribbles +Debbie Criswell,0.7895031,cartoon +Alice Pasquini,0.78949904,cartoon +Johannes Vermeer,0.78931487,fineart +Lisa Frank,0.7892591,cartoon +Patrick Heron,0.78889126,scribbles +Mikhail Nesterov,0.78814346,fineart +Cézanne,0.7879481,scribbles +Tristan Eaton,0.787513,scribbles +Jillian Tamaki,0.7868066,scribbles +Takato Yamamoto,0.78460765,ukioe +Martiros Saryan,0.7844924,scribbles +Emil Orlik,0.7842625,scribbles +Armand Guillaumin,0.7840431,scribbles +Jane Newland,0.7837676,scribbles +Paul Cézanne,0.78368753,scribbles +Tove Jansson,0.78356475,scribbles +Guido Crepax,0.7835321,cartoon +OSGEMEOS,0.7829088,weird +Albert Watson,0.48901254,digipa-med-impact +Emory Douglas,0.78179604,scribbles +Chris Van Allsburg,0.66413003,fineart +Ohara Koson,0.78132576,ukioe +Nicolas de Stael,0.7802779,scribbles +Aubrey Beardsley,0.77970016,scribbles +Hishikawa Moronobu,0.7794119,ukioe +Alfred Wallis,0.77926695,scribbles +Friedensreich Hundertwasser,0.7791805,scribbles +Eyvind Earle,0.7788089,scribbles +Giotto,0.7785216,fineart +Simone Martini,0.77843,fineart +Ivan Bilibin,0.77720606,fineart +Karl Blossfeldt,0.77652574,black-white +Duy Huynh,0.77634746,scribbles +Giovanni da Udina,0.7763063,fineart +Henri-Edmond Cross,0.7762994,fineart +Barry McGee,0.77618384,scribbles +William Kentridge,0.77615225,scribbles +Alexander Archipenko,0.7759824,scribbles +Jaume Plensa,0.7756799,weird +Bill Jacklin,0.77504414,fineart +Alberto Vargas,0.7747376,cartoon +Jean Dubuffet,0.7744374,scribbles +Eugène Grasset,0.7741958,fineart +Arthur Rackham,0.77418125,fineart +Yves Tanguy,0.77380997,scribbles +Elsa Beskow,0.7736908,fineart +Georgia O’Keeffe,0.77368987,scribbles +Georgia O'Keeffe,0.77368987,scribbles +Henri Cartier-Bresson,0.7735415,black-white +Andrea del Verrocchio,0.77307427,fineart +Mark Rothko,0.77294236,scribbles +Bruce Gilden,0.7256681,black-white +Gino Severini,0.77247965,scribbles +Delphin Enjolras,0.5594248,fineart +Alena Aenami,0.77210015,cartoon +Ed Freeman,0.42526615,digipa-low-impact +Apollonia Saintclair,0.7718383,anime +László Moholy-Nagy,0.771497,scribbles +Louis Glackens,0.7713224,fineart +Fang Lijun,0.77097225,fineart +Alfred Kubin,0.74409986,fineart +David Wojnarowicz,0.7705802,scribbles +Tara McPherson,0.77023256,scribbles +Gustav Doré,0.7367536,fineart +Patricia Polacco,0.7696109,scribbles +Norman Bluhm,0.7692634,fineart +Elizabeth Gadd,0.7691194,digipa-high-impact +Gabriele Münter,0.7690926,scribbles +David Inshaw,0.76905304,scribbles +Maurice Sendak,0.7690118,cartoon +Harry Clarke,0.7688428,cartoon +Howardena Pindell,0.7686921,n +Jamie Hewlett,0.7680373,scribbles +Steve Ditko,0.76725733,scribbles +Annie Soudain,0.7671485,scribbles +Albert Gleizes,0.76658314,scribbles +Henry Fuseli,0.69147265,fineart +Alain Laboile,0.67634284,c +Albrecht Altdorfer,0.7663378,fineart +Jack Butler Yeats,0.7661406,fineart +Yue Minjun,0.76583517,scribbles +Art Spiegelman,0.7656343,scribbles +Grete Stern,0.7656276,fineart +Mordecai Ardon,0.7648692,scribbles +Joel Sternfeld,0.76456416,digipa-high-impact +Milton Glaser,0.7641823,scribbles +Eishōsai Chōki,0.7639659,scribbles +Domenico Ghirlandaio,0.76372653,fineart +Alex Timmermans,0.64443207,digipa-high-impact +Andreas Vesalius,0.763446,fineart +Bruce McLean,0.76335883,scribbles +Jacob Lawrence,0.76330304,scribbles +Alex Katz,0.76317835,scribbles +Henri de Toulouse-Lautrec,0.76268333,scribbles +Franz Sedlacek,0.762062,scribbles +Paul Lehr,0.70854837,cartoon +Nicholas Roerich,0.76117516,scribbles +Henri Matisse,0.76110923,scribbles +Colin McCahon,0.76086944,scribbles +Max Dupain,0.6661642,black-white +Stephen Gammell,0.74001735,weird +Alberto Giacometti,0.7596302,scribbles +Goyō Hashiguchi,0.7595048,ukioe +Gustave Doré,0.7018832,fineart +Butcher Billy,0.7593378,cartoon +Pieter de Hooch,0.75916564,fineart +Gaetano Pesce,0.75906265,scribbles +Winsor McCay,0.7589382,scribbles +Claude Cahun,0.7588153,weird +Roger Ballen,0.64683115,black-white +Ellen Gallagher,0.758621,scribbles +Anton Corbijn,0.5550669,digipa-high-impact +Margaret Macdonald Mackintosh,0.75781375,fineart +Franz Kline,0.7576461,scribbles +Cimabue,0.75720495,fineart +André Kertész,0.7319392,black-white +Hans Hartung,0.75718236,scribbles +J. J. Grandville,0.7321584,fineart +David Octavius Hill,0.6333561,digipa-high-impact +teamLab,0.7566472,digipa-high-impact +Paul Gauguin,0.75635266,scribbles +Etel Adnan,0.75631833,scribbles +Barbara Kruger,0.7562784,scribbles +Franz Marc,0.75538874,scribbles +Saul Bass,0.75496316,scribbles +El Lissitzky,0.7549487,scribbles +Thomas Moran,0.6507399,fineart +Claude Monet,0.7541377,fineart +David Young Cameron,0.7541016,scribbles +W. Heath Robinson,0.75374347,cartoon +Yves Klein,0.7536262,fineart +Albert Pinkham Ryder,0.7338848,fineart +Elizabeth Shippen Green,0.7533686,fineart +Robert Stivers,0.5516287,fineart +Emily Kame Kngwarreye,0.7532016,weird +Charline von Heyl,0.753142,scribbles +Frida Kahlo,0.75303876,scribbles +Amy Sillman,0.752921,scribbles +Emperor Huizong of Song,0.7525214,ukioe +Edward Burne-Jones,0.75220466,fineart +Brett Weston,0.6891357,black-white +Charles E. Burchfield,0.75174403,scribbles +Hishida Shunsō,0.751617,fareast +Elaine de Kooning,0.7514996,scribbles +Gary Panter,0.7514598,scribbles +Frederick Hammersley,0.7514268,scribbles +Gustave Dore,0.6735896,fineart +Ephraim Moses Lilien,0.7510494,fineart +Hannah Hoch,0.7509496,scribbles +Shepard Fairey,0.7508583,scribbles +Richard Burlet,0.7506659,scribbles +Bill Brandt,0.6833408,black-white +Herbert List,0.68455493,black-white +Joseph Cornell,0.75023884,nudity +Nathan Wirth,0.6436741,black-white +John Kenn Mortensen,0.74758303,anime +Andre De Dienes,0.5683014,digipa-high-impact +Albert Robida,0.7485741,cartoon +Shintaro Kago,0.7484431,anime +Sidney Nolan,0.74809414,scribbles +Patrice Murciano,0.61973965,fineart +Brian Stelfreeze,0.7478351,scribbles +Francisco De Goya,0.6954584,fineart +William Morris,0.7478111,fineart +Honoré Daumier,0.74767774,scribbles +Hubert Robert,0.6863421,fineart +Marianne von Werefkin,0.7475825,fineart +Edvard Munch,0.74719715,scribbles +Victor Brauner,0.74719006,scribbles +George Inness,0.7470588,fineart +Naoki Urasawa,0.7469665,anime +Kilian Eng,0.7468486,scribbles +Bordalo II,0.7467364,digipa-high-impact +Katsuhiro Otomo,0.746364,anime +Maximilien Luce,0.74609685,fineart +Amy Earles,0.74603415,fineart +Jeanloup Sieff,0.7196009,black-white +William Zorach,0.74574494,scribbles +Pascale Campion,0.74516207,fineart +Dorothy Lathrop,0.74418795,fineart +Sofonisba Anguissola,0.74418664,fineart +Natalia Goncharova,0.74414873,scribbles +August Sander,0.6644566,black-white +Jasper Johns,0.74395454,scribbles +Arthur Dove,0.74383533,scribbles +Darwyn Cooke,0.7435789,scribbles +Leonardo Da Vinci,0.6825216,fineart +Fra Filippo Lippi,0.7433891,fineart +Pierre-Auguste Renoir,0.742464,fineart +Jeff Lemire,0.7422893,scribbles +Al Williamson,0.742113,cartoon +Childe Hassam,0.7418015,fineart +Francisco Goya,0.69522625,fineart +Alphonse Mucha,0.74171394,special +Cleon Peterson,0.74163914,scribbles +J.M.W. Turner,0.65582645,fineart +Walter Crane,0.74146044,fineart +Brassaï,0.6361966,digipa-high-impact +Virgil Finlay,0.74133486,fineart +Fernando Botero,0.7412504,nudity +Ben Nicholson,0.7411573,scribbles +Robert Rauschenberg,0.7410054,fineart +David Wiesner,0.7406237,scribbles +Bartolome Esteban Murillo,0.6933951,fineart +Jean Arp,0.7403873,scribbles +Andre Kertesz,0.7228358,black-white +Simeon Solomon,0.66441345,fineart +Hugh Ferriss,0.72443527,black-white +Agnes Lawrence Pelton,0.73960555,scribbles +Charles Camoin,0.7395686,scribbles +Paul Strand,0.7080332,black-white +Charles Gwathmey,0.7394747,scribbles +Bartolomé Esteban Murillo,0.7011274,fineart +Oskar Kokoschka,0.7392038,scribbles +Bruno Munari,0.73918355,weird +Willem de Kooning,0.73916197,scribbles +Hans Memling,0.7387886,fineart +Chris Mars,0.5861489,digipa-high-impact +Hiroshi Yoshida,0.73787534,ukioe +Hundertwasser,0.7377672,fineart +David Bowie,0.73773724,weird +Ettore Sottsass,0.7376095,digipa-high-impact +Antanas Sutkus,0.7369492,black-white +Leonora Carrington,0.73726475,scribbles +Hieronymus Bosch,0.7369955,scribbles +A. J. Casson,0.73666203,scribbles +Chaim Soutine,0.73662066,scribbles +Artur Bordalo,0.7364549,weird +Thomas Allom,0.68792284,fineart +Louis Comfort Tiffany,0.7363504,fineart +Philippe Druillet,0.7363382,cartoon +Jan Van Eyck,0.7360621,fineart +Sandro Botticelli,0.7359395,fineart +Hieronim Bosch,0.7359308,scribbles +Everett Shinn,0.7355817,fineart +Camille Corot,0.7355603,fineart +Nick Sharratt,0.73470485,scribbles +Fernand Léger,0.7079839,scribbles +Robert S. Duncanson,0.7346282,fineart +Hieronymous Bosch,0.73453265,scribbles +Charles Addams,0.7344034,scribbles +Studio Ghibli,0.73439026,anime +Archibald Motley,0.7343683,scribbles +Anton Fadeev,0.73433846,cartoon +Uemura Shoen,0.7342118,ukioe +Ando Fuchs,0.73406494,black-white +Jessie Willcox Smith,0.73398125,fineart +Alex Garant,0.7333658,scribbles +Lawren Harris,0.73331416,scribbles +Anne Truitt,0.73297834,scribbles +Richard Lindner,0.7328564,scribbles +Sailor Moon,0.73281246,anime +Bridget Bate Tichenor,0.73274165,scribbles +Ralph Steadman,0.7325864,scribbles +Annibale Carracci,0.73251307,fineart +Dürer,0.7324789,fineart +Abigail Larson,0.7319012,cartoon +Bill Traylor,0.73189163,scribbles +Louis Rhead,0.7318623,fineart +David Burliuk,0.731803,scribbles +Camille Pissarro,0.73172396,fineart +Catrin Welz-Stein,0.73117495,scribbles +William Etty,0.6497544,nudity +Pierre Bonnard,0.7310132,scribbles +Benoit B. Mandelbrot,0.5033001,digipa-med-impact +Théodore Géricault,0.692039,fineart +Andy Goldsworthy,0.7307565,digipa-high-impact +Alfred Sisley,0.7306032,fineart +Charles-Francois Daubigny,0.73057353,fineart +Karel Thole,0.7305395,cartoon +Andre Derain,0.73050404,scribbles +Larry Poons,0.73023695,fineart +Beauford Delaney,0.72999024,scribbles +Ruth Bernhard,0.72990334,black-white +David Alfaro Siqueiros,0.7297947,scribbles +Gaugin,0.729636,fineart +Carl Larsson,0.7296195,cartoon +Albrecht Dürer,0.72946966,fineart +Henri De Toulouse Lautrec,0.7294263,cartoon +Shotaro Ishinomori,0.7292093,anime +Hope Gangloff,0.729082,scribbles +Vivian Maier,0.72897506,digipa-high-impact +Alex Andreev,0.6442978,digipa-high-impact +Julie Blackmon,0.72862685,c +Arthur Melville,0.7286146,fineart +Henri Michaux,0.599607,fineart +William Steig,0.7283096,scribbles +Octavio Ocampo,0.72814554,scribbles +Cy Twombly,0.72814107,scribbles +Guy Denning,0.67375445,fineart +Maxfield Parrish,0.7280283,fineart +Randolph Caldecott,0.7279564,fineart +Duccio,0.72795,fineart +Ray Donley,0.5837457,fineart +Hiroshi Sugimoto,0.6497892,digipa-high-impact +Daniela Uhlig,0.4691466,special +Go Nagai,0.72770613,anime +Carlo Crivelli,0.72764605,fineart +Helmut Newton,0.44433144,digipa-low-impact +Josef Albers,0.7061394,scribbles +Henry Moret,0.7274567,fineart +André Masson,0.727404,scribbles +Henri Fantin Latour,0.72732764,fineart +Theo van Rysselberghe,0.7272843,fineart +John Wayne Gacy,0.72686327,scribbles +Carlos Schwabe,0.7267612,fineart +Herbert Bayer,0.7094297,scribbles +Domenichino,0.72667265,fineart +Liam Wong,0.7262276,special +George Caleb Bingham,0.7262154,digipa-high-impact +Gigadō Ashiyuki,0.7261864,fineart +Chaïm Soutine,0.72603923,scribbles +Ary Scheffer,0.64913243,fineart +Rockwell Kent,0.7257272,scribbles +Jean-Paul Riopelle,0.72570604,fineart +Ed Mell,0.6637067,cartoon +Ismail Inceoglu,0.72561014,special +Edgar Degas,0.72538006,fineart +Giorgione,0.7252798,fineart +Charles-François Daubigny,0.7252482,fineart +Arthur Lismer,0.7251765,scribbles +Aaron Siskind,0.4852289,digipa-med-impact +Arkhip Kuindzhi,0.7249981,fineart +Joseph Mallord William Turner,0.6834406,fineart +Dante Gabriel Rossetti,0.7244541,fineart +Ernst Haeckel,0.6660129,fineart +Rebecca Guay,0.72439146,cartoon +Anthony Gerace,0.636678,digipa-high-impact +Martin Kippenberger,0.72418386,scribbles +Diego Giacometti,0.72415763,scribbles +Dmitry Kustanovich,0.7241322,cartoon +Dora Carrington,0.7239633,scribbles +Shusei Nagaoko,0.7238965,anime +Odilon Redon,0.72381747,scribbles +Shohei Otomo,0.7132803,nudity +Barnett Newman,0.7236389,scribbles +Jean Fouquet,0.7235963,fineart +Gustav Klimt,0.72356784,nudity +Francisco Josè de Goya,0.6589663,fineart +Bonnard Pierre,0.72309464,nudity +Brooke Shaden,0.61281693,digipa-high-impact +Mao Hamaguchi,0.7228292,scribbles +Frederick Edwin Church,0.64416,fineart +Asher Brown Durand,0.72264796,fineart +George Baselitz,0.7223453,scribbles +Sam Bosma,0.7223237,fineart +Asaf Hanuka,0.72222745,scribbles +David Teniers the Younger,0.7221168,fineart +Nicola Samori,0.68747556,nudity +Claude Lorrain,0.7217102,fineart +Hermenegildo Anglada Camarasa,0.7214374,nudity +Pablo Picasso,0.72142905,scribbles +Howard Chaykin,0.7213998,cartoon +Ferdinand Hodler,0.7213758,nudity +Farel Dalrymple,0.7213298,fineart +Lyubov Popova,0.7213024,scribbles +Albin Egger-Lienz,0.72120845,fineart +Geertgen tot Sint Jans,0.72107565,fineart +Kate Greenaway,0.72069687,fineart +Louise Bourgeois,0.7206516,fineart +Miriam Schapiro,0.72026414,fineart +Pieter Claesz,0.7200939,fineart +George B. Bridgman,0.5592567,fineart +Piet Mondrian,0.71990657,scribbles +Michelangelo Merisi Da Caravaggio,0.7094674,fineart +Marie Spartali Stillman,0.71986604,fineart +Gertrude Abercrombie,0.7196962,scribbles +Louis Icart,0.7195913,fineart +David Driskell,0.719564,scribbles +Paula Modersohn-Becker,0.7193769,scribbles +George Hurrell,0.57496595,digipa-high-impact +Andrea Mantegna,0.7190254,fineart +Silvestro Lega,0.71891177,fineart +Junji Ito,0.7188978,anime +Jacob Hashimoto,0.7186867,digipa-high-impact +Benjamin West,0.6642946,fineart +David Teniers the Elder,0.7181293,fineart +Roberto Matta,0.71808386,fineart +Chiho Aoshima,0.71801454,anime +Amedeo Modigliani,0.71788836,scribbles +Raja Ravi Varma,0.71788085,fineart +Roberto Ferri,0.538221,nudity +Winslow Homer,0.7176876,fineart +Horace Vernet,0.65729,fineart +Lucas Cranach the Elder,0.71738195,fineart +Godfried Schalcken,0.625893,fineart +Affandi,0.7170285,nudity +Diane Arbus,0.655138,digipa-high-impact +Joseph Ducreux,0.65247905,digipa-high-impact +Berthe Morisot,0.7165984,fineart +Hilma AF Klint,0.71643853,scribbles +Hilma af Klint,0.71643853,scribbles +Filippino Lippi,0.7163017,fineart +Leonid Afremov,0.7163005,fineart +Chris Ware,0.71628594,scribbles +Marius Borgeaud,0.7162446,scribbles +M.W. Kaluta,0.71612585,cartoon +Govert Flinck,0.68975246,fineart +Charles Demuth,0.71605396,scribbles +Coles Phillips,0.7158309,scribbles +Oskar Fischinger,0.6721027,digipa-high-impact +David Teniers III,0.71569765,fineart +Jean Delville,0.7156771,fineart +Antonio Saura,0.7155949,scribbles +Bridget Riley,0.7155669,fineart +Gordon Parks,0.5759978,digipa-high-impact +Anselm Kiefer,0.71514887,scribbles +Remedios Varo,0.7150927,weird +Franz Hegi,0.71495223,scribbles +Kati Horna,0.71486115,black-white +Arshile Gorky,0.71459055,scribbles +David LaChapelle,0.7144903,scribbles +Fritz von Dardel,0.71446383,scribbles +Edward Ruscha,0.71438885,fineart +Blanche Hoschedé Monet,0.7143073,fineart +Alexandre Calame,0.5735474,fineart +Sean Scully,0.714154,fineart +Alexandre Benois,0.7141515,fineart +Sally Mann,0.6534312,black-white +Thomas Eakins,0.7141104,fineart +Arnold Böcklin,0.71407956,fineart +Alfonse Mucha,0.7139052,special +Damien Hirst,0.7136273,scribbles +Lee Krasner,0.71362555,scribbles +Dorothea Lange,0.71361613,black-white +Juan Gris,0.7132987,scribbles +Bernardo Bellotto,0.70720065,fineart +John Martin,0.5376847,fineart +Harriet Backer,0.7131594,fineart +Arnold Newman,0.5736342,digipa-high-impact +Gjon Mili,0.46520913,digipa-low-impact +Asger Jorn,0.7129575,scribbles +Chesley Bonestell,0.6063316,fineart +Agostino Carracci,0.7128167,fineart +Peter Wileman,0.71271706,cartoon +Chen Hongshou,0.71268153,ukioe +Catherine Hyde,0.71266896,scribbles +Andrea Pozzo,0.626546,fineart +Kitty Lange Kielland,0.7125735,fineart +Cornelis Saftleven,0.6684047,fineart +Félix Vallotton,0.71237606,fineart +Albrecht Durer,0.7122327,fineart +Jackson Pollock,0.71222305,scribbles +John Bratby,0.7122171,scribbles +Beksinski,0.71218586,fineart +James Thomas Watts,0.5959548,fineart +Konstantin Korovin,0.71188873,fineart +Gustave Caillebotte,0.71181154,fineart +Dean Ellis,0.50233585,fineart +Friedrich von Amerling,0.6420181,fineart +Christopher Balaskas,0.67935324,special +Alexander Rodchenko,0.67415404,scribbles +Alfred Cheney Johnston,0.6647291,fineart +Mikalojus Konstantinas Ciurlionis,0.710677,scribbles +Jean-Antoine Watteau,0.71061164,fineart +Paul Delvaux,0.7105914,scribbles +Francesco del Cossa,0.7104901,nudity +Isaac Cordal,0.71046066,weird +Hikari Shimoda,0.7104546,weird +François Boucher,0.67153126,fineart +Akos Major,0.7103802,digipa-high-impact +Bernard Buffet,0.7103491,cartoon +Brandon Woelfel,0.6727086,digipa-high-impact +Edouard Manet,0.7101296,fineart +Auguste Herbin,0.6866145,scribbles +Eugene Delacroix,0.70995826,fineart +L. Birge Harrison,0.70989627,fineart +Howard Pyle,0.70979863,fineart +Diane Dillon,0.70968723,scribbles +Hans Erni,0.7096618,scribbles +Richard Diebenkorn,0.7096184,scribbles +Thomas Gainsborough,0.6759419,fineart +Maria Sibylla Merian,0.7093275,fineart +François Joseph Heim,0.6175854,fineart +E. H. Shepard,0.7091189,cartoon +Hsiao-Ron Cheng,0.7090618,scribbles +Canaletto,0.7090392,fineart +John Atkinson Grimshaw,0.7087531,fineart +Giovanni Battista Tiepolo,0.6754107,fineart +Cornelis van Poelenburgh,0.69821274,fineart +Raina Telgemeier,0.70846486,scribbles +Francesco Hayez,0.6960006,fineart +Gilbert Stuart,0.659772,fineart +Konstantin Yuon,0.7081486,fineart +Antonello da Messina,0.70806944,fineart +Austin Osman Spare,0.7079903,fineart +James Ensor,0.70781446,scribbles +Claude Bonin-Pissarro,0.70739406,fineart +Mikhail Vrubel,0.70738363,fineart +Angelica Kauffman,0.6748828,fineart +Viktor Vasnetsov,0.7072422,fineart +Alphonse Osbert,0.70724136,fineart +Tsutomu Nihei,0.7070495,anime +Harvey Quaytman,0.63613266,fineart +Jamie Hawkesworth,0.706914,digipa-high-impact +Francesco Guardi,0.70682615,fineart +Jean-Honoré Fragonard,0.6518248,fineart +Brice Marden,0.70673287,digipa-high-impact +Charles-Amédée-Philippe van Loo,0.6725916,fineart +Mati Klarwein,0.7066092,n +Gerard ter Borch,0.706589,fineart +Dan Hillier,0.48966256,digipa-med-impact +Federico Barocci,0.682664,fineart +Henri Le Sidaner,0.70637953,fineart +Olivier Bonhomme,0.7063748,scribbles +Edward Weston,0.7061382,black-white +Giovanni Paolo Cavagna,0.6840265,fineart +Germaine Krull,0.6621777,black-white +Hans Holbein the Younger,0.70590156,fineart +François Bocion,0.6272365,fineart +Georg Baselitz,0.7053314,scribbles +Caravaggio,0.7050303,fineart +Anne Rothenstein,0.70502245,scribbles +Wadim Kashin,0.43714935,digipa-low-impact +Heinrich Lefler,0.7048054,fineart +Jacob van Ruisdael,0.7047918,fineart +Bartholomeus van Bassen,0.6676872,fineart +Jeffrey Smith art,0.56750107,fineart +Anne Packard,0.7046703,weird +Jean-François Millet,0.7045456,fineart +Andrey Remnev,0.7041204,digipa-high-impact +Fujiwara Takanobu,0.70410216,ukioe +Elliott Erwitt,0.69950557,black-white +Fern Coppedge,0.7036215,fineart +Bartholomeus van der Helst,0.66411966,fineart +Rembrandt Van Rijn,0.6979987,fineart +Rene Magritte,0.703457,scribbles +Aelbert Cuyp,0.7033657,fineart +Gerda Wegener,0.70319015,scribbles +Graham Sutherland,0.7031714,scribbles +Gerrit Dou,0.7029986,fineart +August Friedrich Schenck,0.6801586,fineart +George Herriman,0.7028568,scribbles +Stanisław Szukalski,0.6903354,fineart +Slim Aarons,0.70222545,digipa-high-impact +Ernst Thoms,0.70221686,fineart +Louis Wain,0.702186,fineart +Artemisia Gentileschi,0.70198226,fineart +Eugène Delacroix,0.70155394,fineart +Peter Bagge,0.70127463,scribbles +Jeffrey Catherine Jones,0.7012148,cartoon +Eugène Carrière,0.65272695,fineart +Alexander Millar,0.7011144,scribbles +Nobuyoshi Araki,0.70108867,fareast +Tintoretto,0.6702795,fineart +André Derain,0.7009005,scribbles +Charles Maurice Detmold,0.70079994,fineart +Francisco de Zurbarán,0.7007234,fineart +Laurie Greasley,0.70072114,cartoon +Lynda Benglis,0.7006948,digipa-high-impact +Cecil Beaton,0.66362655,black-white +Gustaf Tenggren,0.7006041,cartoon +Abdur Rahman Chughtai,0.7004994,ukioe +Constantin Brancusi,0.7004367,scribbles +Mikhail Larionov,0.7004066,fineart +Jan van Kessel the Elder,0.70040506,fineart +Chantal Joffe,0.70036674,scribbles +Charles-André van Loo,0.6830367,fineart +Reginald Marsh,0.6301042,fineart +Elsa Bleda,0.70005083,digipa-high-impact +Peter Paul Rubens,0.65745676,fineart +Eugène Boudin,0.70001304,fineart +Charles Willson Peale,0.66907954,fineart +Brian Mashburn,0.63395154,digipa-high-impact +Barkley L. Hendricks,0.69986427,n +Yoshiyuki Tomino,0.6998095,anime +Guido Reni,0.6416875,fineart +Lynd Ward,0.69958556,fineart +John Constable,0.6907788,fineart +František Kupka,0.6993329,fineart +Pieter Bruegel The Elder,0.6992879,scribbles +Benjamin Gerritsz Cuyp,0.6992173,fineart +Nicolas Mignard,0.6988214,fineart +Augustus Edwin Mulready,0.6482165,fineart +Andrea del Sarto,0.698532,fineart +Edward Steichen,0.69837445,black-white +James Abbott McNeill Whistler,0.69836813,fineart +Alphonse Legros,0.6983243,fineart +Ivan Aivazovsky,0.64588225,fineart +Giovanni Francesco Barbieri,0.6981316,fineart +Grace Cossington Smith,0.69811064,fineart +Bert Stern,0.53411555,scribbles +Mary Cassatt,0.6980135,fineart +Jules Bastien-Lepage,0.69796044,fineart +Max Ernst,0.69777006,fineart +Kentaro Miura,0.697743,anime +Georges Rouault,0.69758564,scribbles +Josephine Wall,0.6973667,fineart +Anne-Louis Girodet,0.58104825,nudity +Bert Hardy,0.6972966,black-white +Adriaen van de Velde,0.69716156,fineart +Andreas Achenbach,0.61108655,fineart +Hayv Kahraman,0.69705284,fineart +Beatrix Potter,0.6969851,fineart +Elmer Bischoff,0.6968948,fineart +Cornelis de Heem,0.6968436,fineart +Inio Asano,0.6965007,anime +Alfred Henry Maurer,0.6964837,fineart +Gottfried Helnwein,0.6962953,digipa-high-impact +Paul Barson,0.54196984,digipa-high-impact +Roger de La Fresnaye,0.69620967,fineart +Abraham Mignon,0.60605425,fineart +Albert Bloch,0.69573116,nudity +Charles Dana Gibson,0.67155975,fineart +Alexandre-Évariste Fragonard,0.6507174,fineart +Alexandre-Évariste Fragonard,0.6507174,fineart +Ernst Fuchs,0.6953538,nudity +Alfredo Jaar,0.6952965,digipa-high-impact +Judy Chicago,0.6952246,weird +Frans van Mieris the Younger,0.6951849,fineart +Aertgen van Leyden,0.6951305,fineart +Emily Carr,0.69512105,fineart +Frances Macdonald,0.6950408,scribbles +Frances MacDonald,0.6950408,scribbles +Hannah Höch,0.69495845,scribbles +Gillis Rombouts,0.58770025,fineart +Käthe Kollwitz,0.6947756,fineart +Barbara Stauffacher Solomon,0.6920825,fineart +Georges Lacombe,0.6944455,fineart +Gwen John,0.6944161,fineart +Terada Katsuya,0.6944026,cartoon +James Gillray,0.6871335,fineart +Robert Crumb,0.69420326,fineart +Bruce Pennington,0.6545669,fineart +David Firth,0.69400465,scribbles +Arthur Boyd,0.69399726,fineart +Antonin Artaud,0.67321455,fineart +Giuseppe Arcimboldo,0.6937329,fineart +Jim Mahfood,0.6936606,cartoon +Ossip Zadkine,0.6494374,scribbles +Atelier Olschinsky,0.69349927,fineart +Carl Frederik von Breda,0.57274634,fineart +Ken Sugimori,0.6932626,anime +Chris Friel,0.5399168,fineart +Andrew Macara,0.69307995,fineart +Alexander Jansson,0.69298327,scribbles +Anne Brigman,0.6865817,black-white +George Ault,0.66756654,fineart +Arkhyp Kuindzhi,0.6928072,digipa-high-impact +Emiliano Ponzi,0.69278395,scribbles +William Holman Hunt,0.6927663,fineart +Tamara Lempicka,0.6386007,scribbles +Mark Ryden,0.69259655,fineart +Giovanni Paolo Pannini,0.6802902,fineart +Carl Barks,0.6923666,cartoon +Fritz Bultman,0.6318746,fineart +Salomon van Ruysdael,0.690313,fineart +Carrie Mae Weems,0.6645416,n +Agostino Arrivabene,0.61166185,fineart +Gustave Boulanger,0.655797,fineart +Henry Justice Ford,0.51214355,fareast +Bernardo Strozzi,0.63510317,fineart +André Lhote,0.68718815,scribbles +Paul Corfield,0.6915611,scribbles +Gifford Beal,0.6914777,fineart +Hirohiko Araki,0.6914078,anime +Emil Carlsen,0.691326,fineart +Frans van Mieris the Elder,0.6912799,fineart +Simon Stalenhag,0.6912775,special +Henry van de Velde,0.64838886,fineart +Eleanor Fortescue-Brickdale,0.6909729,fineart +Thomas W Schaller,0.69093937,special +NHK Animation,0.6907677,cartoon +Euan Uglow,0.69060403,scribbles +Hendrick Goltzius,0.69058937,fineart +William Blake,0.69038224,fineart +Vito Acconci,0.58409876,digipa-high-impact +Billy Childish,0.6902057,scribbles +Ben Quilty,0.6875855,fineart +Mark Briscoe,0.69010437,fineart +Adriaen van de Venne,0.6899867,fineart +Alasdair McLellan,0.6898454,digipa-high-impact +Ed Paschke,0.68974686,scribbles +Guy Rose,0.68960273,fineart +Barbara Hepworth,0.68958247,fineart +Edward Henry Potthast,0.6895703,fineart +Francis Bacon,0.6895397,scribbles +Pawel Kuczynski,0.6894536,fineart +Bjarke Ingels,0.68933153,digipa-high-impact +Henry Ossawa Tanner,0.68932164,fineart +Alessandro Allori,0.6892961,fineart +Abraham van Calraet,0.63841593,fineart +Egon Schiele,0.6891415,scribbles +Tim Doyle,0.5474768,digipa-high-impact +Grandma Moses,0.6890782,fineart +John Frederick Kensett,0.61981744,fineart +Giacomo Balla,0.68893707,fineart +Jamie Baldridge,0.6546651,digipa-high-impact +Max Beckmann,0.6884731,scribbles +Cornelis van Haarlem,0.6677613,fineart +Edward Hopper,0.6884258,special +Barkley Hendricks,0.6883637,n +Patrick Dougherty,0.688321,digipa-high-impact +Karol Bak,0.6367705,fineart +Pierre Puvis de Chavannes,0.6880703,fineart +Antoni Tàpies,0.685689,fineart +Alexander Nasmyth,0.57695735,fineart +Laurent Grasso,0.5793272,fineart +Camille Walala,0.6076875,digipa-high-impact +Fairfield Porter,0.68790644,fineart +Alex Colville,0.68787855,fineart +Herb Ritts,0.51471305,scribbles +Gerhard Munthe,0.687658,fineart +Susan Seddon Boulet,0.68762136,scribbles +Liu Ye,0.68760437,fineart +Robert Antoine Pinchon,0.68744636,fineart +Fujiwara Nobuzane,0.6873439,fineart +Frederick Carl Frieseke,0.6873361,fineart +Aert van der Neer,0.6159286,fineart +Allen Jones,0.6869935,scribbles +Anja Millen,0.6064488,digipa-high-impact +Esaias van de Velde,0.68673944,fineart +Gyoshū Hayami,0.68665624,anime +William Hogarth,0.6720842,fineart +Frederic Church,0.6865637,fineart +Cyril Rolando,0.68644965,cartoon +Frederic Edwin Church,0.6863009,fineart +Thomas Rowlandson,0.66726154,fineart +Joachim Brohm,0.68601763,digipa-high-impact +Cristofano Allori,0.6858083,fineart +Adrianus Eversen,0.58259964,fineart +Richard Dadd,0.68546164,fineart +Ambrosius Bosschaert II,0.6854217,fineart +Paolo Veronese,0.68422073,fineart +Abraham van den Tempel,0.66463804,fineart +Duncan Grant,0.6852565,scribbles +Hendrick Cornelisz. van Vliet,0.6851691,fineart +Geof Darrow,0.6851174,scribbles +Émile Bernard,0.6850957,fineart +Brian Bolland,0.68496394,scribbles +James Gilleard,0.6849431,cartoon +Anton Raphael Mengs,0.6689196,fineart +Augustus Jansson,0.6845705,digipa-high-impact +Hendrik Goltzius,0.6843367,fineart +Domenico Quaglio the Younger,0.65769434,fineart +Cicely Mary Barker,0.6841806,fineart +William Eggleston,0.6840795,digipa-high-impact +David Choe,0.6840449,scribbles +Adam Elsheimer,0.6716068,fineart +Heinrich Danioth,0.5390186,fineart +Franz Stuck,0.6836468,fineart +Bernie Wrightson,0.64101505,fineart +Dorina Costras,0.6835419,fineart +El Greco,0.68343943,fineart +Gatōken Shunshi,0.6833314,anime +Giovanni Bellini,0.67622876,fineart +Aron Wiesenfeld,0.68331146,nudity +Boris Kustodiev,0.68329334,fineart +Alec Soth,0.5597321,digipa-high-impact +Artus Scheiner,0.6313348,fineart +Kelly Vivanco,0.6830933,scribbles +Shaun Tan,0.6830649,fineart +Anthony van Dyck,0.6577681,fineart +Neil Welliver,0.68297863,nudity +Robert McCall,0.68294585,fineart +Sandra Chevrier,0.68284667,scribbles +Yinka Shonibare,0.68256056,n +Arthur Tress,0.6301861,digipa-high-impact +Richard McGuire,0.6820089,scribbles +Anni Albers,0.65708244,digipa-high-impact +Aleksey Savrasov,0.65207493,fineart +Wayne Barlowe,0.6537874,fineart +Giorgio De Chirico,0.6815907,fineart +Giorgio de Chirico,0.6815907,fineart +Ernest Procter,0.6815795,fineart +Adriaen Brouwer,0.6815058,fineart +Ilya Glazunov,0.6813533,fineart +Alison Bechdel,0.68096143,scribbles +Carl Holsoe,0.68082225,fineart +Alfred Edward Chalon,0.6464571,fineart +Gerard David,0.68058,fineart +Basil Blackshaw,0.6805679,fineart +Gerrit Adriaenszoon Berckheyde,0.67340267,fineart +George Hendrik Breitner,0.6804209,fineart +Abraham Bloemaert,0.68036544,fineart +Ferdinand Van Kessel,0.67742276,fineart +Hugo Simberg,0.68031186,fineart +Gaston Bussière,0.665221,fineart +Shawn Coss,0.42407864,digipa-low-impact +Hanabusa Itchō,0.68023074,ukioe +Magnus Enckell,0.6801553,fineart +Gary Larson,0.6801336,scribbles +George Manson,0.68013126,digipa-high-impact +Hayao Miyazaki,0.6800754,anime +Carl Spitzweg,0.66581815,fineart +Ambrosius Holbein,0.6798341,fineart +Domenico Pozzi,0.6434162,fineart +Dorothea Tanning,0.6797955,fineart +Jeannette Guichard-Bunel,0.5251578,digipa-high-impact +Victor Moscoso,0.62962687,fineart +Francis Picabia,0.6795391,scribbles +Charles W. Bartlett,0.67947805,fineart +David A Hardy,0.5554935,fineart +C. R. W. Nevinson,0.67946506,fineart +Man Ray,0.6507145,scribbles +Albert Bierstadt,0.67935765,fineart +Charles Le Brun,0.6758479,fineart +Lovis Corinth,0.67913896,fineart +Herbert Abrams,0.5507507,digipa-high-impact +Giorgio Morandi,0.6789025,fineart +Agnolo Bronzino,0.6787985,fineart +Abraham Pether,0.66922426,fineart +John Bauer,0.6786695,fineart +Arthur Stanley Wilkinson,0.67860866,fineart +Arthur Wardle,0.5510789,fineart +George Romney,0.62868094,fineart +Laurie Lipton,0.5201844,fineart +Mickalene Thomas,0.45433685,digipa-low-impact +Alice Rahon,0.6777824,scribbles +Gustave Van de Woestijne,0.6777346,scribbles +Laurel Burch,0.67766285,fineart +Hendrik Gerritsz Pot,0.67750573,fineart +John William Waterhouse,0.677472,fineart +Conor Harrington,0.5967809,fineart +Gabriel Ba,0.6773366,cartoon +Franz Xaver Winterhalter,0.62229514,fineart +George Cruikshank,0.6473593,fineart +Hyacinthe Rigaud,0.67717785,fineart +Cornelis Claesz van Wieringen,0.6770269,fineart +Adriaen van Outrecht,0.67682564,fineart +Yaacov Agam,0.6767926,fineart +Franz von Lenbach,0.61948,fineart +Clyfford Still,0.67667866,fineart +Alexander Roslin,0.66719526,fineart +Barry Windsor Smith,0.6765375,cartoon +Takeshi Obata,0.67643225,anime +John Harris,0.47712502,fineart +Bruce Davidson,0.6763525,digipa-high-impact +Hendrik Willem Mesdag,0.6762745,fineart +Makoto Shinkai,0.67610705,anime +Andreas Gursky,0.67610145,digipa-high-impact +Mike Winkelmann (Beeple),0.6510196,digipa-high-impact +Gustave Moreau,0.67607844,fineart +Frank Weston Benson,0.6760142,fineart +Eduardo Kingman,0.6759026,fineart +Benjamin Williams Leader,0.5611925,fineart +Hervé Guibert,0.55973417,black-white +Cornelis Dusart,0.6753622,fineart +Amédée Guillemin,0.6752696,fineart +Alessio Albi,0.6752633,digipa-high-impact +Matthias Grünewald,0.6751779,fineart +Fujishima Takeji,0.6751577,anime +Georges Braque,0.67514753,scribbles +John Salminen,0.67498183,fineart +Atey Ghailan,0.674873,scribbles +Giovanni Antonio Galli,0.657484,fineart +Julie Mehretu,0.6748382,fineart +Jean Auguste Dominique Ingres,0.6746286,fineart +Francesco Albani,0.6621554,fineart +Anato Finnstark,0.6744919,digipa-high-impact +Giovanni Bernardino Mazzolini,0.64416045,fineart +Antoine Le Nain,0.6233709,fineart +Ford Madox Brown,0.6743224,fineart +Gerhard Richter,0.67426133,fineart +theCHAMBA,0.6742506,cartoon +Edward Julius Detmold,0.67421955,fineart +George Stubbs,0.6209227,fineart +George Tooker,0.6740602,scribbles +Faith Ringgold,0.6739976,scribbles +Giambattista Pittoni,0.5792371,fineart +George Bellows,0.6737008,fineart +Aldus Manutius,0.67366326,fineart +Ambrosius Bosschaert,0.67364097,digipa-high-impact +Michael Parkes,0.6133628,fineart +Hans Bellmer,0.6735973,nudity +Sir James Guthrie,0.67359626,fineart +Charles Spencelayh,0.67356884,fineart +Ivan Shishkin,0.6734136,fineart +Hans Holbein the Elder,0.6733856,fineart +Filip Hodas,0.60053295,digipa-high-impact +Herman Saftleven,0.6732188,digipa-high-impact +Dirck de Quade van Ravesteyn,0.67309594,fineart +Joe Fenton,0.6730916,scribbles +Arnold Bocklin,0.6730706,fineart +Baiōken Eishun,0.6730663,anime +Giovanni Giacometti,0.6730505,fineart +Giovanni Battista Gaulli,0.65036476,fineart +William Stout,0.672887,fineart +Gavin Hamilton,0.5982757,fineart +John Stezaker,0.6726847,black-white +Frederick McCubbin,0.67263377,fineart +Christoph Ludwig Agricola,0.62750757,fineart +Alice Neel,0.67255914,scribbles +Giovanni Battista Venanzi,0.61996603,fineart +Miho Hirano,0.6724092,anime +Tom Thomson,0.6723876,fineart +Alfred Munnings,0.6723851,fineart +David Wilkie,0.6722781,fineart +Adriaen van Ostade,0.67220736,fineart +Alfred Eisenstaedt,0.67213774,black-white +Leon Kossoff,0.67208946,fineart +Georges de La Tour,0.6421979,fineart +Chuck Close,0.6719756,digipa-high-impact +Herbert MacNair,0.6719506,scribbles +Edward Atkinson Hornel,0.6719265,fineart +Becky Cloonan,0.67192084,cartoon +Gian Lorenzo Bernini,0.58210254,fineart +Hein Gorny,0.4982776,digipa-med-impact +Joe Webb,0.6714884,fineart +Cornelis Pietersz Bega,0.64423996,fineart +Christian Krohg,0.6713641,fineart +Cornelia Parker,0.6712246,fineart +Anna Mary Robertson Moses,0.6709144,fineart +Quentin Tarantino,0.6708354,digipa-high-impact +Frederic Remington,0.67074275,fineart +Barent Fabritius,0.6707407,fineart +Oleg Oprisco,0.6707388,digipa-high-impact +Hendrick van Streeck,0.670666,fineart +Bakemono Zukushi,0.67051035,anime +Lucy Madox Brown,0.67032814,fineart +Paul Wonner,0.6700563,scribbles +Guido Borelli Da Caluso,0.66966087,digipa-high-impact +Emil Alzamora,0.5844039,nudity +Heinrich Brocksieper,0.64469147,fineart +Dan Smith,0.669563,digipa-high-impact +Lois van Baarle,0.6695091,scribbles +Arthur Garfield Dove,0.6694996,scribbles +Matthias Jung,0.66936135,digipa-high-impact +José Clemente Orozco,0.6693544,scribbles +Don Bluth,0.6693046,cartoon +Akseli Gallen-Kallela,0.66927314,fineart +Alex Howitt,0.52858865,digipa-high-impact +Giovanni Bernardino Asoleni,0.6635405,fineart +Frederick Goodall,0.6690712,fineart +Francesco Bartolozzi,0.63431,fineart +Edmund Leighton,0.6689639,fineart +Abraham Willaerts,0.5966594,fineart +François Louis Thomas Francia,0.6207474,fineart +Carel Fabritius,0.6688478,fineart +Flora Macdonald Reid,0.6687404,fineart +Bartholomeus Breenbergh,0.6163084,fineart +Bernardino Mei,0.6486895,fineart +Carel Weight,0.6684968,fineart +Aristide Maillol,0.66843045,scribbles +Chris Leib,0.60567486,fineart +Giovanni Battista Piazzetta,0.65012705,fineart +Daniel Maclise,0.6678073,fineart +Giovanni Bernardino Azzolini,0.65774256,fineart +Aaron Horkey,0.6676864,fineart +Otto Dix,0.667294,scribbles +Ferdinand Bol,0.6414797,fineart +Adriaen Coorte,0.6670663,fineart +William Gropper,0.6669881,scribbles +Gerard de Lairesse,0.6639489,fineart +Mab Graves,0.6668356,scribbles +Fernando Amorsolo,0.66683346,fineart +Pixar Concept Artists,0.6667752,cartoon +Alfred Augustus Glendening,0.64009607,fineart +Diego Velázquez,0.6666799,fineart +Jerry Pinkney,0.6665478,fineart +Antoine Wiertz,0.6143825,fineart +Alberto Burri,0.6618252,scribbles +Max Weber,0.6664029,fineart +Hans Baluschek,0.66636246,fineart +Annie Swynnerton,0.6663346,fineart +Albert Dubois-Pillet,0.57526016,fineart +Dora Maar,0.62862253,digipa-high-impact +Kay Sage,0.5614823,fineart +David A. Hardy,0.51376164,fineart +Alberto Biasi,0.42917693,digipa-low-impact +Fra Bartolomeo,0.6661105,fineart +Hendrick van Balen,0.65754294,fineart +Edwin Austin Abbey,0.66596496,fineart +George Frederic Watts,0.66595024,fineart +Alexei Kondratyevich Savrasov,0.6470352,fineart +Anna Ancher,0.66581213,fineart +Irma Stern,0.66580737,fineart +Frédéric Bazille,0.6657115,fineart +Awataguchi Takamitsu,0.6656272,anime +Edward Sorel,0.6655388,fineart +Edward Lear,0.6655078,fineart +Gabriel Metsu,0.6654555,fineart +Giovanni Battista Innocenzo Colombo,0.6653655,fineart +Scott Naismith,0.6650656,fineart +John Perceval,0.6650283,fineart +Girolamo Muziano,0.64234406,fineart +Cornelis de Man,0.66494393,fineart +Cornelis Bisschop,0.64119905,digipa-high-impact +Hans Leu the Elder,0.64770013,fineart +Michael Hutter,0.62479556,fineart +Cornelia MacIntyre Foley,0.6510235,fineart +Todd McFarlane,0.6647763,cartoon +John James Audubon,0.6279882,digipa-high-impact +William Henry Hunt,0.57340264,fineart +John Anster Fitzgerald,0.6644317,fineart +Tomer Hanuka,0.6643152,cartoon +Alex Prager,0.6641814,fineart +Heinrich Kley,0.6641148,fineart +Anne Redpath,0.66407835,scribbles +Marianne North,0.6640104,fineart +Daniel Merriam,0.6639365,fineart +Bill Carman,0.66390574,fineart +Méret Oppenheim,0.66387725,digipa-high-impact +Erich Heckel,0.66384083,fineart +Iryna Yermolova,0.663623,fineart +Antoine Ignace Melling,0.61502695,fineart +Akira Toriyama,0.6635002,anime +Gregory Crewdson,0.59810174,digipa-high-impact +Helene Schjerfbeck,0.66333634,fineart +Antonio Mancini,0.6631618,fineart +Zanele Muholi,0.58554715,n +Balthasar van der Ast,0.66294503,fineart +Toei Animations,0.6629127,anime +Arthur Quartley,0.6628106,fineart +Diego Rivera,0.6625808,fineart +Hendrik van Steenwijk II,0.6623777,fineart +James Tissot,0.6623415,fineart +Kehinde Wiley,0.66218376,n +Chiharu Shiota,0.6621249,digipa-high-impact +George Grosz,0.6620224,fineart +Peter De Seve,0.6616659,cartoon +Ryan Hewett,0.6615638,fineart +Hasegawa Tōhaku,0.66146004,anime +Apollinary Vasnetsov,0.6613177,fineart +Francis Cadell,0.66119456,fineart +Henri Harpignies,0.6611012,fineart +Henry Macbeth-Raeburn,0.6213787,fineart +Christoffel van den Berghe,0.6609149,fineart +Leiji Matsumoto,0.66089404,anime +Adriaen van der Werff,0.638286,fineart +Ramon Casas,0.6606529,fineart +Arthur Hacker,0.66062653,fineart +Edward Willis Redfield,0.66058433,fineart +Carl Gustav Carus,0.65355223,fineart +Francesca Woodman,0.60435605,digipa-high-impact +Hans Makart,0.5881955,fineart +Carne Griffiths,0.660091,weird +Will Barnet,0.65995145,scribbles +Fitz Henry Lane,0.659841,fineart +Masaaki Sasamoto,0.6597158,anime +Salvador Dali,0.6290813,scribbles +Walt Kelly,0.6596993,digipa-high-impact +Charlotte Nasmyth,0.56481636,fineart +Ferdinand Knab,0.6596528,fineart +Steve Lieber,0.6596117,scribbles +Zhang Kechun,0.6595939,fareast +Olivier Valsecchi,0.5324838,digipa-high-impact +Joel Meyerowitz,0.65937585,digipa-high-impact +Arthur Streeton,0.6592294,fineart +Henriett Seth F.,0.6592273,fineart +Genndy Tartakovsky,0.6591695,scribbles +Otto Marseus van Schrieck,0.65890455,fineart +Hanna-Barbera,0.6588123,cartoon +Mary Anning,0.6588001,fineart +Pamela Colman Smith,0.6587648,fineart +Anton Mauve,0.6586873,fineart +Hendrick Avercamp,0.65866685,fineart +Max Pechstein,0.65860206,scribbles +Franciszek Żmurko,0.56855476,fineart +Felice Casorati,0.6584761,fineart +Louis Janmot,0.65298057,fineart +Thomas Cole,0.5408042,fineart +Peter Mohrbacher,0.58273685,fineart +Arnold Franz Brasz,0.65834284,nudity +Christian Rohlfs,0.6582814,fineart +Basil Gogos,0.658105,fineart +Fitz Hugh Lane,0.657923,fineart +Liubov Sergeevna Popova,0.62325525,fineart +Elizabeth MacNicol,0.65773135,fineart +Zinaida Serebriakova,0.6577016,fineart +Ernest Lawson,0.6575238,fineart +Bruno Catalano,0.6574354,fineart +Albert Namatjira,0.6573372,fineart +Fritz von Uhde,0.6572697,fineart +Edwin Henry Landseer,0.62363374,fineart +Naoto Hattori,0.621745,fareast +Reylia Slaby,0.65709853,fineart +Arthur Burdett Frost,0.6147318,fineart +Frank Miller,0.65707314,digipa-high-impact +Algernon Talmage,0.65702903,fineart +Itō Jakuchū,0.6570199,digipa-high-impact +Billie Waters,0.65684533,digipa-high-impact +Ingrid Baars,0.58558,digipa-high-impact +Pieter Jansz Saenredam,0.6566058,fineart +Egbert van Heemskerck,0.6125889,fineart +John French Sloan,0.6362145,fineart +Craola,0.65639997,scribbles +Benjamin Marra,0.61809736,nudity +Anthony Thieme,0.65609205,fineart +Satoshi Kon,0.65606606,anime +Masamune Shirow,0.65592873,anime +Alfred Stevens,0.6557321,fineart +Hariton Pushwagner,0.6556745,anime +Carlo Carrà,0.6556279,fineart +Stuart Davis,0.6050534,digipa-high-impact +David Shrigley,0.6553904,digipa-high-impact +Albrecht Anker,0.65531695,fineart +Anton Semenov,0.6552501,digipa-high-impact +Fabio Hurtado,0.5955889,fineart +Donald Judd,0.6552257,fineart +Francisco de Burgos Mantilla,0.65516514,fineart +Barthel Bruyn the Younger,0.6551433,fineart +Abram Arkhipov,0.6550962,fineart +Paulus Potter,0.65498203,fineart +Edward Lamson Henry,0.6549521,fineart +Audrey Kawasaki,0.654843,fineart +George Catlin,0.6547183,fineart +Adélaïde Labille-Guiard,0.6066263,fineart +Sandy Skoglund,0.6546999,digipa-high-impact +Hans Baldung,0.654431,fineart +Ethan Van Sciver,0.65442884,cartoon +Frans Hals,0.6542338,fineart +Caspar David Friedrich,0.6542175,fineart +Charles Conder,0.65420866,fineart +Betty Churcher,0.65387225,fineart +Claes Corneliszoon Moeyaert,0.65386075,fineart +David Bomberg,0.6537477,fineart +Abraham Bosschaert,0.6535562,fineart +Giuseppe De Nittis,0.65354455,fineart +Giuseppe de Nittis,0.65354455,fineart +John La Farge,0.65342575,fineart +Frits Thaulow,0.65341854,fineart +John Duncan,0.6532379,fineart +Floris van Dyck,0.64900756,fineart +Anton Pieck,0.65310377,fineart +Roger Dean,0.6529647,nudity +Maximilian Pirner,0.65280807,fineart +Dorothy Johnstone,0.65267503,fineart +Govert Dircksz Camphuysen,0.65258145,fineart +Ryohei Hase,0.6168618,fineart +Hans von Aachen,0.62437224,fineart +Gustaf Munch-Petersen,0.6522485,fineart +Earnst Haeckel,0.6344333,fineart +Giovanni Battista Bracelli,0.62635326,fineart +Hendrick Goudt,0.6521433,fineart +Aneurin Jones,0.65191466,fineart +Bryan Hitch,0.6518333,cartoon +Coby Whitmore,0.6515695,fineart +Barthélemy d'Eyck,0.65156406,fineart +Quint Buchholz,0.65151155,fineart +Adriaen Hanneman,0.6514815,fineart +Tom Roberts,0.5855832,fineart +Fernand Khnopff,0.6512954,nudity +Charles Vess,0.6512271,cartoon +Carlo Galli Bibiena,0.6511681,nudity +Alexander Milne Calder,0.6081027,fineart +Josan Gonzalez,0.6193469,cartoon +Barthel Bruyn the Elder,0.6509954,fineart +Jon Whitcomb,0.6046063,fineart +Arcimboldo,0.6509897,fineart +Hendrik van Steenwijk I,0.65086293,fineart +Albert Joseph Pénot,0.65085316,fineart +Edward Wadsworth,0.6308917,scribbles +Andrew Wyeth,0.6507103,fineart +Correggio,0.650689,fineart +Frances Currey,0.65068,fineart +Henryk Siemiradzki,0.56721973,fineart +Worthington Whittredge,0.6504713,fineart +Federico Zandomeneghi,0.65033823,fineart +Isaac Levitan,0.6503356,fineart +Russ Mills,0.65012795,fineart +Edith Lawrence,0.65010095,fineart +Gil Elvgren,0.5614284,digipa-high-impact +Chris Foss,0.56495357,fineart +Francesco Zuccarelli,0.612805,fineart +Hendrick Bloemaert,0.64962655,fineart +Egon von Vietinghoff,0.57180583,fineart +Pixar,0.6495793,cartoon +Daniel Clowes,0.6495775,fineart +Friedrich Ritter von Friedländer-Malheim,0.6493772,fineart +Rebecca Sugar,0.6492679,scribbles +Chen Daofu,0.6492026,fineart +Dustin Nguyen,0.64909416,cartoon +Raymond Duchamp-Villon,0.6489605,nudity +Daniel Garber,0.6489332,fineart +Antonio Canova,0.58764786,fineart +Algernon Blackwood,0.59256804,fineart +Betye Saar,0.64877665,fineart +William S. Burroughs,0.5505619,fineart +Rodney Matthews,0.64844495,fineart +Michelangelo Buonarroti,0.6484401,fineart +Posuka Demizu,0.64843124,anime +Joao Ruas,0.6484134,fineart +Andy Fairhurst,0.6480388,special +"Andries Stock, Dutch Baroque painter",0.6479797,fineart +Antonio de la Gandara,0.6479292,fineart +Bruce Timm,0.6477877,scribbles +Harvey Kurtzman,0.64772683,cartoon +Eiichiro Oda,0.64772165,anime +Edwin Landseer,0.6166703,fineart +Carl Heinrich Bloch,0.64755356,fineart +Adriaen Isenbrant,0.6475428,fineart +Santiago Caruso,0.6473954,fineart +Alfred Guillou,0.6472603,fineart +Clara Peeters,0.64725095,fineart +Kim Jung Gi,0.6472225,cartoon +Milo Manara,0.6471776,cartoon +Phil Noto,0.6470769,anime +Kaws,0.6470336,cartoon +Desmond Morris,0.5951916,fineart +Gediminas Pranckevicius,0.6467787,fineart +Jack Kirby,0.6467424,cartoon +Claes Jansz. Visscher,0.6466888,fineart +Augustin Meinrad Bächtiger,0.6465789,fineart +John Lavery,0.64643383,fineart +Anne Bachelier,0.6464065,fineart +Giuseppe Bernardino Bison,0.64633006,fineart +E. T. A. Hoffmann,0.5887251,fineart +Ambrosius Benson,0.6457839,fineart +Cornelis Verbeeck,0.645782,fineart +H. R. Giger,0.6456823,weird +Adolph Menzel,0.6455246,fineart +Aliza Razell,0.5863178,digipa-high-impact +Gerard Seghers,0.6205679,fineart +David Aja,0.62812066,scribbles +Gustave Courbet,0.64476407,fineart +Alexandre Cabanel,0.63849115,fineart +Albert Marquet,0.64471006,fineart +Harold Harvey,0.64464307,fineart +William Wegman,0.6446265,scribbles +Harold Gilman,0.6445966,fineart +Jeremy Geddes,0.57839495,digipa-high-impact +Abraham van Beijeren,0.6356113,fineart +Eugène Isabey,0.6160607,fineart +Jorge Jacinto,0.58618563,fineart +Frederic Leighton,0.64383554,fineart +Dave McKean,0.6438012,cartoon +Hiromu Arakawa,0.64371413,anime +Aaron Douglas,0.6437089,fineart +Adolf Dietrich,0.590169,fineart +Frederik de Moucheron,0.6435952,fineart +Siya Oum,0.6435919,cartoon +Alberto Morrocco,0.64352196,fineart +Robert Vonnoh,0.6433115,fineart +Tom Bagshaw,0.5322264,fineart +Guerrilla Girls,0.64309967,digipa-high-impact +Johann Wolfgang von Goethe,0.6429888,fineart +Charles Le Roux,0.6426594,fineart +Auguste Toulmouche,0.64261353,fineart +Cindy Sherman,0.58666563,digipa-high-impact +Federico Zuccari,0.6425021,fineart +Mike Mignola,0.642346,cartoon +Cecily Brown,0.6421981,fineart +Brian K. Vaughan,0.64147836,cartoon +RETNA (Marquis Lewis),0.47963,n +Klaus Janson,0.64129144,cartoon +Alessandro Galli Bibiena,0.6412889,fineart +Jeremy Lipking,0.64123213,fineart +Stephen Shore,0.64108944,digipa-high-impact +Heinz Edelmann,0.51325977,digipa-med-impact +Joaquín Sorolla,0.6409732,fineart +Bella Kotak,0.6409608,digipa-high-impact +Cornelis Engebrechtsz,0.64091057,fineart +Bruce Munro,0.64084166,digipa-high-impact +Marjane Satrapi,0.64076495,fineart +Jeremy Mann,0.557744,digipa-high-impact +Heinrich Maria Davringhausen,0.6403986,fineart +Kengo Kuma,0.6402023,digipa-high-impact +Alfred Manessier,0.640153,fineart +Antonio Galli Bibiena,0.6399247,digipa-high-impact +Eduard von Grützner,0.6397164,fineart +Bunny Yeager,0.5455078,digipa-high-impact +Adolphe Willette,0.6396935,fineart +Wangechi Mutu,0.6394607,n +Peter Milligan,0.6391612,digipa-high-impact +Dalí,0.45400402,digipa-low-impact +Élisabeth Vigée Le Brun,0.6388982,fineart +Beth Conklin,0.6388204,digipa-high-impact +Charles Alphonse du Fresnoy,0.63881266,fineart +Thomas Benjamin Kennington,0.56668127,fineart +Jim Woodring,0.5625168,fineart +Francisco Oller,0.63846034,fineart +Csaba Markus,0.6384506,fineart +Botero,0.63843524,scribbles +Bill Henson,0.5394536,digipa-high-impact +Anna Bocek,0.6382304,scribbles +Hugo van der Goes,0.63822484,fineart +Robert William Hume,0.5433574,fineart +Chip Zdarsky,0.6381826,cartoon +Daniel Seghers,0.53494316,fineart +Richard Doyle,0.6377541,fineart +Hendrick Terbrugghen,0.63773805,fineart +Joe Madureira,0.6377177,special +Floris van Schooten,0.6376191,fineart +Jeff Simpson,0.3959046,fineart +Albert Joseph Moore,0.6374316,fineart +Arthur Merric Boyd,0.6373228,fineart +Amadeo de Souza Cardoso,0.5927926,fineart +Os Gemeos,0.6368859,digipa-high-impact +Giovanni Boldini,0.6368698,fineart +Albert Goodwin,0.6368695,fineart +Hans Eduard von Berlepsch-Valendas,0.61562145,fineart +Edmond Xavier Kapp,0.5758474,fineart +François Quesnel,0.6365935,fineart +Nathan Coley,0.6365817,digipa-high-impact +Jasmine Becket-Griffith,0.6365083,digipa-high-impact +Raphaelle Peale,0.6364422,fineart +Candido Portinari,0.63634276,fineart +Edward Dugmore,0.63179636,fineart +Anders Zorn,0.6361722,fineart +Ed Emshwiller,0.63615763,fineart +Francis Coates Jones,0.6361159,fineart +Ernst Haas,0.6361123,digipa-high-impact +Dirck van Baburen,0.6213001,fineart +René Lalique,0.63594735,fineart +Sydney Prior Hall,0.6359345,fineart +Brad Kunkle,0.5659712,fineart +Corneille,0.6356381,fineart +Henry Lamb,0.63560975,fineart +Dirck Hals,0.63559663,fineart +Alex Grey,0.62908936,nudity +Michael Heizer,0.63555753,fineart +Yiannis Moralis,0.61731136,fineart +Emily Murray Paterson,0.4392335,fineart +Georg Friedrich Kersting,0.6256248,fineart +Frances Hodgkins,0.6352128,fineart +Charles Cundall,0.6349486,fineart +Henry Wallis,0.63478243,fineart +Goro Fujita,0.6346491,cartoon +Jean-Léon Gérôme,0.5954844,fineart +August von Pettenkofen,0.60910493,fineart +Abbott Handerson Thayer,0.63428533,fineart +Martin John Heade,0.5926603,fineart +Ellen Jewett,0.63420236,digipa-high-impact +Hidari Jingorō,0.63388014,fareast +Taiyō Matsumoto,0.63372946,special +Emanuel Leutze,0.6007246,fineart +Adam Martinakis,0.48973057,digipa-med-impact +Will Eisner,0.63349223,cartoon +Alexander Stirling Calder,0.6331682,fineart +Saturno Butto,0.6331184,nudity +Cecilia Beaux,0.6330725,fineart +Amandine Van Ray,0.6174208,digipa-high-impact +Bob Eggleton,0.63277495,digipa-high-impact +Sherree Valentine Daines,0.63274443,fineart +Frederick Lord Leighton,0.6299176,fineart +Daniel Ridgway Knight,0.63251615,fineart +Gaetano Previati,0.61743724,fineart +John Berkey,0.63226986,fineart +Richard Misrach,0.63201725,digipa-high-impact +Aaron Jasinski,0.57948315,fineart +"Edward Otho Cresap Ord, II",0.6317712,fineart +Evelyn De Morgan,0.6317376,fineart +Noelle Stevenson,0.63159716,digipa-high-impact +Edward Robert Hughes,0.6315573,fineart +Allan Ramsay,0.63150716,fineart +Balthus,0.6314323,scribbles +Hendrick Cornelisz Vroom,0.63143134,digipa-high-impact +Ilya Repin,0.6313043,fineart +George Lambourn,0.6312267,fineart +Arthur Hughes,0.6310194,fineart +Antonio J. Manzanedo,0.53841716,fineart +John Singleton Copley,0.6264835,fineart +Dennis Miller Bunker,0.63078755,fineart +Ernie Barnes,0.6307126,cartoon +Alison Kinnaird,0.6306353,digipa-high-impact +Alex Toth,0.6305541,digipa-high-impact +Henry Raeburn,0.6155551,fineart +Alice Bailly,0.6305177,fineart +Brian Kesinger,0.63037646,scribbles +Antoine Blanchard,0.63036835,fineart +Ron Walotsky,0.63035095,fineart +Kent Monkman,0.63027304,fineart +Naomi Okubo,0.5782754,fareast +Hercules Seghers,0.62957174,fineart +August Querfurt,0.6295643,fineart +Samuel Melton Fisher,0.6283333,fineart +David Burdeny,0.62950236,digipa-high-impact +George Bain,0.58519644,fineart +Peter Holme III,0.62938106,fineart +Grayson Perry,0.62928164,digipa-high-impact +Chris Claremont,0.6292076,digipa-high-impact +Dod Procter,0.6291759,fineart +Huang Tingjian,0.6290358,fareast +Dorothea Warren O'Hara,0.6290113,fineart +Ivan Albright,0.6289551,fineart +Hubert von Herkomer,0.6288955,fineart +Barbara Nessim,0.60589516,digipa-high-impact +Henry Scott Tuke,0.6286309,fineart +Ditlev Blunck,0.6282925,fineart +Sven Nordqvist,0.62828535,fineart +Lee Madgwick,0.6281731,fineart +Hubert van Eyck,0.6281529,fineart +Edmond Bille,0.62339354,fineart +Ejnar Nielsen,0.6280824,fineart +Arturo Souto,0.6280583,fineart +Jean Giraud,0.6279888,fineart +Storm Thorgerson,0.6277394,digipa-high-impact +Ed Benedict,0.62764007,digipa-high-impact +Christoffer Wilhelm Eckersberg,0.6014842,fineart +Clarence Holbrook Carter,0.5514105,fineart +Dorothy Lockwood,0.6273235,fineart +John Singer Sargent,0.6272487,fineart +Brigid Derham,0.6270125,digipa-high-impact +Henricus Hondius II,0.6268505,fineart +Gertrude Harvey,0.5903887,fineart +Grant Wood,0.6266253,fineart +Fyodor Vasilyev,0.5234919,digipa-med-impact +Cagnaccio di San Pietro,0.6261671,fineart +Cagnaccio Di San Pietro,0.6261671,fineart +Doris Boulton-Maude,0.62593174,fineart +Adolf Hirémy-Hirschl,0.5946784,fineart +Harold von Schmidt,0.6256755,fineart +Martine Johanna,0.6256161,digipa-high-impact +Gerald Kelly,0.5579602,digipa-high-impact +Ub Iwerks,0.625396,cartoon +Dirck van der Lisse,0.6253871,fineart +Edouard Riou,0.6250113,fineart +Ilya Yefimovich Repin,0.62491584,fineart +Martin Johnson Heade,0.59421235,fineart +Afarin Sajedi,0.62475824,scribbles +Alfred Thompson Bricher,0.6247515,fineart +Edwin G. Lucas,0.5553578,fineart +Georges Emile Lebacq,0.56175387,fineart +Francis Davis Millet,0.5988504,fineart +Bill Sienkiewicz,0.6125557,digipa-high-impact +Giocondo Albertolli,0.62441677,fineart +Victor Nizovtsev,0.6242258,fineart +Squeak Carnwath,0.62416434,digipa-high-impact +Bill Viola,0.62409425,digipa-high-impact +Annie Abernethie Pirie Quibell,0.6240767,fineart +Jason Edmiston,0.62405366,fineart +Al Capp,0.6239494,fineart +Kobayashi Kiyochika,0.6239368,anime +Albert Anker,0.62389827,fineart +Iain Faulkner,0.62376785,fineart +Todd Schorr,0.6237408,fineart +Charles Ginner,0.62370133,fineart +Emile Auguste Carolus-Duran,0.62353987,fineart +John Philip Falter,0.623418,cartoon +Chizuko Yoshida,0.6233001,fareast +Anna Dittmann,0.62327325,cartoon +Henry Snell Gamley,0.62319934,fineart +Edmund Charles Tarbell,0.6230626,fineart +Rob Gonsalves,0.62298363,fineart +Gladys Dawson,0.6228511,fineart +Tomma Abts,0.61153626,fineart +Kate Beaton,0.53993124,digipa-high-impact +Gustave Buchet,0.62243867,fineart +Gareth Pugh,0.6223551,digipa-high-impact +Caspar van Wittel,0.57871693,fineart +Anton Otto Fischer,0.6222941,fineart +Albert Guillaume,0.56529653,fineart +Felix Octavius Carr Darley,0.62223387,fineart +Bernard van Orley,0.62221646,fineart +Edward John Poynter,0.60147405,fineart +Walter Percy Day,0.62207425,fineart +Franciszek Starowieyski,0.5709621,fineart +Auguste Baud-Bovy,0.6219854,fineart +Chris LaBrooy,0.45497298,digipa-low-impact +Abraham de Vries,0.5859101,fineart +Antoni Gaudi,0.62162614,fineart +Joe Jusko,0.62156093,digipa-high-impact +Lynda Barry,0.62154603,digipa-high-impact +Michal Karcz,0.62154436,digipa-high-impact +Raymond Briggs,0.62150294,fineart +Herbert James Gunn,0.6210927,fineart +Dwight William Tryon,0.620984,fineart +Paul Henry,0.5752968,fineart +Helio Oiticica,0.6203739,digipa-high-impact +Sebastian Errazuriz,0.62036186,digipa-high-impact +Lucian Freud,0.6203146,nudity +Frank Auerbach,0.6201102,weird +Andre-Charles Boulle,0.6200789,fineart +Franz Fedier,0.5669752,fineart +Austin Briggs,0.57675314,fineart +Hugo Sánchez Bonilla,0.61978436,digipa-high-impact +Caroline Chariot-Dayez,0.6195682,digipa-high-impact +Bill Ward,0.61953044,digipa-high-impact +Charles Bird King,0.6194487,fineart +Adrian Ghenie,0.6193521,digipa-high-impact +Agnes Cecile,0.6192814,digipa-high-impact +Augustus John,0.6191995,fineart +Jeffrey T. Larson,0.61913544,fineart +Alexis Simon Belle,0.3190395,digipa-low-impact +Jean-Baptiste Monge,0.5758537,fineart +Adolf Bierbrauer,0.56129396,fineart +Ayako Rokkaku,0.61891204,fareast +Lisa Keene,0.54570895,digipa-high-impact +Edmond Aman-Jean,0.57168096,fineart +Marc Davis,0.61837333,cartoon +Cerith Wyn Evans,0.61829346,digipa-high-impact +George Wyllie,0.61829203,fineart +George Luks,0.6182724,fineart +William-Adolphe Bouguereau,0.618265,c +Grigoriy Myasoyedov,0.61801606,fineart +Hashimoto Gahō,0.61795104,fineart +Charles Ragland Bunnell,0.61772746,fineart +Ambrose McCarthy Patterson,0.61764514,fineart +Bill Brauer,0.5824066,fineart +Mikko Lagerstedt,0.591015,digipa-high-impact +Koson Ohara,0.53635323,fineart +Evaristo Baschenis,0.5857368,fineart +Martin Ansin,0.5294119,fineart +Cory Loftis,0.6168619,cartoon +Joseph Stella,0.6166778,fineart +André Pijet,0.5768274,fineart +Jeff Wall,0.6162895,digipa-high-impact +Eleanor Layfield Davis,0.6158844,fineart +Saul Tepper,0.61579347,fineart +Alex Hirsch,0.6157384,cartoon +Alexandre Falguière,0.55011404,fineart +Malcolm Liepke,0.6155646,fineart +Georg Friedrich Schmidt,0.60364646,fineart +Hendrik Kerstens,0.55099905,digipa-high-impact +Félix Bódog Widder,0.6153954,fineart +Marie Guillemine Benoist,0.61532974,fineart +Kelly Mckernan,0.60047054,digipa-high-impact +Ignacio Zuloaga,0.6151608,fineart +Hubert van Ravesteyn,0.61489964,fineart +Angus McKie,0.61487424,digipa-high-impact +Colin Campbell Cooper,0.6147882,fineart +Pieter Aertsen,0.61454165,fineart +Jan Brett,0.6144608,fineart +Kazuo Koike,0.61438507,fineart +Edith Grace Wheatley,0.61428297,fineart +Ogawa Kazumasa,0.61427975,fareast +Giovanni Battista Cipriani,0.6022825,fineart +André Bauchant,0.57124996,fineart +George Abe,0.6140447,digipa-high-impact +Georges Lemmen,0.6139967,scribbles +Frank Leonard Brooks,0.6139327,fineart +Gai Qi,0.613744,anime +Frank Gehry,0.6136776,digipa-high-impact +Anton Domenico Gabbiani,0.55471313,fineart +Cassandra Austen,0.6135781,fineart +Paul Gustav Fischer,0.613273,fineart +Emiliano Di Cavalcanti,0.6131207,fineart +Meryl McMaster,0.6129995,digipa-high-impact +Domenico di Pace Beccafumi,0.6129922,fineart +Ludwig Mies van der Rohe,0.6126692,fineart +Étienne-Louis Boullée,0.6126158,fineart +Dali,0.5928694,nudity +Shinji Aramaki,0.61246127,anime +Giovanni Fattori,0.59544694,fineart +Bapu,0.6122084,c +Raphael Lacoste,0.5539114,digipa-high-impact +Scarlett Hooft Graafland,0.6119631,digipa-high-impact +Rene Laloux,0.61190474,fineart +Julius Horsthuis,0.59037095,fineart +Gerald van Honthorst,0.6115939,fineart +Dino Valls,0.611533,fineart +Tony DiTerlizzi,0.6114657,cartoon +Michael Cheval,0.61138546,anime +Charles Schulz,0.6113759,digipa-high-impact +Alvar Aalto,0.61122143,digipa-high-impact +Gu Kaizhi,0.6110798,fareast +Eugene von Guerard,0.6109776,fineart +John Cassaday,0.610949,fineart +Elizabeth Forbes,0.61092335,fineart +Edmund Greacen,0.6109115,fineart +Eugène Burnand,0.6107876,fineart +Boris Grigoriev,0.6107853,scribbles +Norman Rockwell,0.6107638,fineart +Barthélemy Menn,0.61064315,fineart +George Biddle,0.61058354,fineart +Edgar Ainsworth,0.5525424,digipa-high-impact +Alfred Leyman,0.5887217,fineart +Tex Avery,0.6104007,cartoon +Beatrice Ethel Lithiby,0.61030364,fineart +Grace Pailthorpe,0.61026484,digipa-high-impact +Brian Oldham,0.396231,digipa-low-impact +Android Jones,0.61023116,fareast +François Girardon,0.5830649,fineart +Ib Eisner,0.61016303,digipa-high-impact +Armand Point,0.610156,fineart +Henri Alphonse Barnoin,0.59465057,fineart +Jean Marc Nattier,0.60987425,fineart +Francisco de Holanda,0.6091294,fineart +Marco Mazzoni,0.60970783,fineart +Esaias Boursse,0.6093308,fineart +Alexander Deyneka,0.55000365,fineart +John Totleben,0.60883725,fineart +Al Feldstein,0.6087723,fineart +Adam Hughes,0.60854626,anime +Ernest Zobole,0.6085073,fineart +Alex Gross,0.60837066,digipa-high-impact +George Jamesone,0.6079673,fineart +Frank Lloyd Wright,0.60793245,scribbles +Brooke DiDonato,0.47680336,digipa-med-impact +Hans Gude,0.60780364,fineart +Ethel Schwabacher,0.60748273,fineart +Gladys Kathleen Bell,0.60747695,fineart +Adolf Fényes,0.54192233,fineart +Carel Willink,0.58120143,fineart +George Henry,0.6070727,digipa-high-impact +Ronald Balfour,0.60697085,fineart +Elsie Dalton Hewland,0.6067718,digipa-high-impact +Alex Maleev,0.6067118,fineart +Anish Kapoor,0.6067015,digipa-high-impact +Aleksandr Ivanovich Laktionov,0.606544,fineart +Kim Keever,0.6037775,digipa-high-impact +Aleksi Briclot,0.46056762,fineart +Raymond Leech,0.6062721,fineart +Richard Eurich,0.6062664,fineart +Phil Jimenez,0.60625625,cartoon +Gao Cen,0.60618126,nudity +Mike Deodato,0.6061201,cartoon +Charles Haslewood Shannon,0.6060581,fineart +Alexandre Jacovleff,0.3991747,digipa-low-impact +André Beauneveu,0.584062,fineart +Hiroshi Honda,0.60507596,digipa-high-impact +Charles Joshua Chaplin,0.60498774,fineart +Domenico Zampieri,0.6049726,fineart +Gusukuma Seihō,0.60479784,fareast +Nikolina Petolas,0.46318632,digipa-low-impact +Casey Weldon,0.6047672,cartoon +Elmyr de Hory,0.6046374,fineart +Nan Goldin,0.6046119,digipa-high-impact +Charles McAuley,0.6045995,fineart +Archibald Skirving,0.6044234,fineart +Elizabeth York Brunton,0.6043737,fineart +Dugald Sutherland MacColl,0.6042907,fineart +Titian,0.60426414,fineart +Ignacy Witkiewicz,0.6042259,fineart +Allie Brosh,0.6042061,digipa-high-impact +H.P. Lovecraft,0.6039597,digipa-high-impact +Andrée Ruellan,0.60395086,fineart +Ralph McQuarrie,0.60380936,fineart +Mead Schaeffer,0.6036558,fineart +Henri-Julien Dumont,0.571257,fineart +Kieron Gillen,0.6035093,fineart +Maginel Wright Enright Barney,0.6034306,nudity +Vincent Di Fate,0.6034131,fineart +Briton Rivière,0.6032918,fineart +Hajime Sorayama,0.60325956,nudity +Béla Czóbel,0.6031023,fineart +Edmund Blampied,0.603072,fineart +E. Simms Campbell,0.6030443,fineart +Hisui Sugiura,0.603034,fareast +Alan Davis,0.6029676,fineart +Glen Keane,0.60287905,cartoon +Frank Holl,0.6027312,fineart +Abbott Fuller Graves,0.6025608,fineart +Albert Servaes,0.60250103,black-white +Hovsep Pushman,0.5937487,fineart +Brian M. Viveros,0.60233414,fineart +Charles Fremont Conner,0.6023278,fineart +Francesco Furini,0.6022654,digipa-high-impact +Camille-Pierre Pambu Bodo,0.60191673,fineart +Yasushi Nirasawa,0.6016714,nudity +Charles Uzzell-Edwards,0.6014683,fineart +Abram Efimovich Arkhipov,0.60128385,fineart +Hedda Sterne,0.6011857,digipa-high-impact +Ben Aronson,0.6011548,fineart +Frank Frazetta,0.551121,nudity +Elizabeth Durack,0.6010842,fineart +Ian Miller,0.42153555,fareast +Charlie Bowater,0.4410439,special +Michael Carson,0.60039437,fineart +Walter Langley,0.6002273,fineart +Cornelis Anthonisz,0.6001956,fineart +Dorothy Elizabeth Bradford,0.6001929,fineart +J.C. Leyendecker,0.5791972,fineart +Willem van Haecht,0.59990716,fineart +Anna and Elena Balbusso,0.59955937,digipa-low-impact +Harrison Fisher,0.59952044,fineart +Bill Medcalf,0.59950054,fineart +Edward Arthur Walton,0.59945667,fineart +Alois Arnegger,0.5991994,fineart +Ray Caesar,0.59902894,digipa-high-impact +Karen Wallis,0.5990094,fineart +Emmanuel Shiu,0.51082766,digipa-med-impact +Thomas Struth,0.5988324,digipa-high-impact +Barbara Longhi,0.5985706,fineart +Richard Deacon,0.59851056,fineart +Constantin Hansen,0.5984213,fineart +Harold Shapinsky,0.5984175,fineart +George Dionysus Ehret,0.5983857,fineart +Doug Wildey,0.5983639,digipa-high-impact +Fernand Toussaint,0.5982694,fineart +Horatio Nelson Poole,0.5982614,fineart +Caesar van Everdingen,0.5981566,fineart +Eva Gonzalès,0.5981396,fineart +Franz Vohwinkel,0.5448179,fineart +Margaret Mee,0.5979592,fineart +Francis Focer Brown,0.59779185,fineart +Henry Moore,0.59767926,nudity +Scott Listfield,0.58795893,fineart +Nikolai Ge,0.5973643,fineart +Jacek Yerka,0.58198756,fineart +Margaret Brundage,0.5969077,fineart +JC Leyendecker,0.5620243,fineart +Ben Templesmith,0.5498991,digipa-high-impact +Armin Hansen,0.59669334,anime +Jean-Louis Prevost,0.5966897,fineart +Daphne Allen,0.59666026,fineart +Franz Karl Basler-Kopp,0.59663445,fineart +"Henry Ives Cobb, Jr.",0.596385,fineart +Michael Sowa,0.546285,fineart +Anna Füssli,0.59600973,fineart +György Rózsahegyi,0.59580946,fineart +Luis Royo,0.59566617,fineart +Émile Gallé,0.5955559,fineart +Antonio Mora,0.5334297,digipa-high-impact +Edward P. Beard Jr.,0.59543866,fineart +Jessica Rossier,0.54958373,special +André Thomkins,0.5343785,digipa-high-impact +David Macbeth Sutherland,0.5949968,fineart +Charles Liu,0.5949787,digipa-high-impact +Edi Rama,0.5949226,digipa-high-impact +Jacques Le Moyne,0.5948843,fineart +Egbert van der Poel,0.59488285,fineart +Georg Jensen,0.594782,digipa-high-impact +Anne Sudworth,0.5947539,fineart +Jan Pietersz Saenredam,0.59472525,fineart +Henryk Stażewski,0.5945748,fineart +André François,0.58402044,fineart +Alexander Runciman,0.5944449,digipa-high-impact +Thomas Kinkade,0.594391,fineart +Robert Williams,0.5567989,digipa-high-impact +George Gardner Symons,0.57431924,fineart +D. Alexander Gregory,0.5334464,fineart +Gerald Brom,0.52473724,fineart +Robert Hagan,0.59406,fineart +Ernest Crichlow,0.5940588,fineart +Viviane Sassen,0.5939927,digipa-high-impact +Enrique Simonet,0.5937546,fineart +Esther Blaikie MacKinnon,0.593747,digipa-high-impact +Jeff Kinney,0.59372896,scribbles +Igor Morski,0.5936732,digipa-high-impact +John Currin,0.5936216,fineart +Bob Ringwood,0.5935273,digipa-high-impact +Jordan Grimmer,0.44948143,digipa-low-impact +François Barraud,0.5933471,fineart +Helen Binyon,0.59331006,digipa-high-impact +Brenda Chamberlain,0.5932333,fineart +Candido Bido,0.59310603,fineart +Abraham Storck,0.5929502,fineart +Raphael,0.59278333,fineart +Larry Sultan,0.59273386,digipa-high-impact +Agostino Tassi,0.59265685,fineart +Alexander V. Kuprin,0.5925917,fineart +Frans Koppelaar,0.5658725,fineart +Richard Corben,0.59251785,fineart +David Gilmour Blythe,0.5924247,digipa-high-impact +František Kaván,0.5924211,fineart +Rob Liefeld,0.5921167,fineart +Ernő Rubik,0.5920297,fineart +Byeon Sang-byeok,0.59200096,fareast +Johfra Bosschart,0.5919376,fineart +Emil Lindenfeld,0.5761086,fineart +Howard Mehring,0.5917471,fineart +Gwenda Morgan,0.5915571,digipa-high-impact +Henry Asencio,0.5915404,fineart +"George Barret, Sr.",0.5914306,fineart +Andrew Ferez,0.5911011,fineart +Ed Brubaker,0.5910869,digipa-high-impact +George Reid,0.59095883,digipa-high-impact +Derek Gores,0.51769906,digipa-med-impact +Charles Rollier,0.5539186,fineart +Terry Oakes,0.590443,fineart +Thomas Blackshear,0.5078616,fineart +Albert Benois,0.5902705,nudity +Krenz Cushart,0.59026587,special +Jeff Koons,0.5902637,digipa-high-impact +Akihiko Yoshida,0.5901294,special +Anja Percival,0.45039332,digipa-low-impact +Eduard von Steinle,0.59008586,fineart +Alex Russell Flint,0.5900352,digipa-high-impact +Edward Okuń,0.5897297,fineart +Emma Lampert Cooper,0.5894849,fineart +Stuart Haygarth,0.58132994,digipa-high-impact +George French Angas,0.5434376,fineart +Edmund F. Ward,0.5892848,fineart +Eleanor Vere Boyle,0.58925456,digipa-high-impact +Evelyn Cheston,0.58924586,fineart +Edwin Dickinson,0.58921975,digipa-high-impact +Christophe Vacher,0.47325426,fineart +Anne Dewailly,0.58905107,fineart +Gertrude Greene,0.5862596,digipa-high-impact +Boris Groh,0.5888809,digipa-high-impact +Douglas Smith,0.588804,digipa-high-impact +Ian Hamilton Finlay,0.5887713,fineart +Derek Jarman,0.5887292,digipa-high-impact +Archibald Thorburn,0.5882001,fineart +Gillis d'Hondecoeter,0.58813053,fineart +I Ketut Soki,0.58801544,digipa-high-impact +Alex Schomburg,0.46614102,digipa-low-impact +Bastien L. Deharme,0.583349,special +František Jakub Prokyš,0.58782333,fineart +Jesper Ejsing,0.58782053,fineart +Odd Nerdrum,0.53551745,digipa-high-impact +Tom Lovell,0.5877577,fineart +Ayami Kojima,0.5877416,fineart +Peter Sculthorpe,0.5875696,fineart +Bernard D’Andrea,0.5874042,fineart +Denis Eden,0.58739066,digipa-high-impact +Alfons Walde,0.58728385,fineart +Jovana Rikalo,0.47006977,digipa-low-impact +Franklin Booth,0.5870834,fineart +Mat Collishaw,0.5870676,digipa-high-impact +Joseph Lorusso,0.586858,fineart +Helen Stevenson,0.454647,digipa-low-impact +Delaunay,0.58657396,fineart +H.R. Millar,0.58655745,fineart +E. Charlton Fortune,0.586376,fineart +Alson Skinner Clark,0.58631575,fineart +Stan And Jan Berenstain,0.5862361,digipa-high-impact +Howard Lyon,0.5862271,fineart +John Blanche,0.586182,fineart +Bernardo Cavallino,0.5858575,fineart +Tomasz Alen Kopera,0.5216588,fineart +Peter Gric,0.58583695,fineart +Guo Pei,0.5857794,fareast +James Turrell,0.5853901,digipa-high-impact +Alexandr Averin,0.58533764,fineart +Bertalan Székely,0.5548113,digipa-high-impact +Brothers Hildebrandt,0.5850233,fineart +Ed Roth,0.5849769,digipa-high-impact +Enki Bilal,0.58492255,fineart +Alan Lee,0.5848701,fineart +Charles H. Woodbury,0.5848688,fineart +André Charles Biéler,0.5847876,fineart +Annie Rose Laing,0.5597829,fineart +Matt Fraction,0.58463776,cartoon +Charles Alston,0.58453286,fineart +Frank Xavier Leyendecker,0.545465,fineart +Alfred Richard Gurrey,0.584306,fineart +Dan Mumford,0.5843051,cartoon +Francisco Martín,0.5842005,fineart +Alvaro Siza,0.58406967,digipa-high-impact +Frank J. Girardin,0.5839858,fineart +Henry Carr,0.58397424,digipa-high-impact +Charles Furneaux,0.58394694,fineart +Daniel F. Gerhartz,0.58389103,fineart +Gilberto Soren Zaragoza,0.5448442,fineart +Bart Sears,0.5838427,cartoon +Allison Bechdel,0.58383805,digipa-high-impact +Frank O'Meara,0.5837992,fineart +Charles Codman,0.5836579,fineart +Francisco Zúñiga,0.58359766,fineart +Vladimir Kush,0.49075457,fineart +Arnold Mesches,0.5834257,fineart +Frank McKelvey,0.5831641,fineart +Allen Butler Talcott,0.5830911,fineart +Eric Zener,0.58300316,fineart +Noah Bradley,0.44176096,digipa-low-impact +Robert Childress,0.58289623,fineart +Frances C. Fairman,0.5827239,fineart +Kathryn Morris Trotter,0.465856,digipa-low-impact +Everett Raymond Kinstler,0.5824819,fineart +Edward Mitchell Bannister,0.5804899,fineart +"George Barret, Jr.",0.5823128,fineart +Greg Hildebrandt,0.4271311,fineart +Anka Zhuravleva,0.5822078,digipa-high-impact +Rolf Armstrong,0.58217514,fineart +Eric Wallis,0.58191466,fineart +Clemens Ascher,0.5480207,digipa-high-impact +Hugo Kārlis Grotuss,0.5818766,fineart +Albert Paris Gütersloh,0.5817827,fineart +Hilda May Gordon,0.5817449,fineart +Hendrik Martenszoon Sorgh,0.5817126,fineart +Pipilotti Rist,0.5816868,digipa-high-impact +Hiroyuki Tajima,0.5816242,fareast +Igor Zenin,0.58159757,digipa-high-impact +Genevieve Springston Lynch,0.4979099,digipa-med-impact +Dan Witz,0.44476372,fineart +David Roberts,0.5255326,fineart +Frieke Janssens,0.5706969,digipa-high-impact +Arnold Schoenberg,0.56520367,fineart +Inoue Naohisa,0.5809933,fareast +Elfriede Lohse-Wächtler,0.58097905,fineart +Alex Ross,0.42460668,digipa-low-impact +Robert Irwin,0.58078,c +Charles Angrand,0.58077514,fineart +Anne Nasmyth,0.54221964,fineart +Henri Bellechose,0.5773891,fineart +De Hirsh Margules,0.58059025,fineart +Hiromitsu Takahashi,0.5805599,fareast +Ilya Kuvshinov,0.5805521,special +Cassius Marcellus Coolidge,0.5805516,c +Dorothy Burroughes,0.5804835,fineart +Emanuel de Witte,0.58027405,fineart +George Herbert Baker,0.5799624,digipa-high-impact +Cheng Zhengkui,0.57990086,fareast +Bernard Fleetwood-Walker,0.57987773,digipa-high-impact +Philippe Parreno,0.57985014,digipa-high-impact +Thornton Oakley,0.57969713,fineart +Greg Rutkowski,0.5203395,special +Ike no Taiga,0.5795857,anime +Eduardo Lefebvre Scovell,0.5795808,fineart +Adolfo Müller-Ury,0.57944727,fineart +Patrick Woodroffe,0.5228063,fineart +Wim Crouwel,0.57933235,digipa-high-impact +Colijn de Coter,0.5792779,fineart +François Boquet,0.57924724,fineart +Gerbrand van den Eeckhout,0.57897866,fineart +Eugenio Granell,0.5392264,fineart +Kuang Hong,0.5782304,digipa-high-impact +Justin Gerard,0.46685404,fineart +Tokujin Yoshioka,0.5779153,digipa-high-impact +Alan Bean,0.57788515,fineart +Ernest Biéler,0.5778079,fineart +Martin Deschambault,0.44401115,digipa-low-impact +Anna Boch,0.577735,fineart +Jack Davis,0.5775291,fineart +Félix Labisse,0.5775142,fineart +Greg Simkins,0.5679761,fineart +David Lynch,0.57751054,digipa-low-impact +Eizō Katō,0.5774127,digipa-high-impact +Grethe Jürgens,0.5773412,digipa-high-impact +Heinrich Bichler,0.5770147,fineart +Barbara Nasmyth,0.5446056,fineart +Domenico Induno,0.5583946,fineart +Gustave Baumann,0.5607866,fineart +Mike Mayhew,0.5765857,cartoon +Delmer J. Yoakum,0.576538,fineart +Aykut Aydogdu,0.43111503,digipa-low-impact +George Barker,0.5763551,fineart +Ernő Grünbaum,0.57634187,fineart +Eliseu Visconti,0.5763241,fineart +Esao Andrews,0.5761547,fineart +JennyBird Alcantara,0.49165845,digipa-med-impact +Joan Tuset,0.5761051,fineart +Angela Barrett,0.55976534,digipa-high-impact +Syd Mead,0.5758396,fineart +Ignacio Bazan-Lazcano,0.5757512,fineart +Franciszek Kostrzewski,0.57570386,fineart +Eero Järnefelt,0.57540673,fineart +Loretta Lux,0.56217635,digipa-high-impact +Gaudi,0.57519895,fineart +Charles Gleyre,0.57490873,fineart +Antoine Verney-Carron,0.56386137,fineart +Albert Edelfelt,0.57466495,fineart +Fabian Perez,0.57444525,fineart +Kevin Sloan,0.5737548,fineart +Stanislav Poltavsky,0.57434607,fineart +Abraham Hondius,0.574326,fineart +Tadao Ando,0.57429105,fareast +Fyodor Slavyansky,0.49796474,digipa-med-impact +David Brewster,0.57385933,digipa-high-impact +Cliff Chiang,0.57375133,digipa-high-impact +Drew Struzan,0.5317983,digipa-high-impact +Henry O. Tanner,0.5736586,fineart +Alberto Sughi,0.5736495,fineart +Albert J. Welti,0.5736257,fineart +Charles Mahoney,0.5735923,digipa-high-impact +Exekias,0.5734506,fineart +Felipe Seade,0.57342744,digipa-high-impact +Henriette Wyeth,0.57330644,digipa-high-impact +Harold Sandys Williamson,0.5443646,fineart +Eddie Campbell,0.57329535,digipa-high-impact +Gao Fenghan,0.5732926,fareast +Cynthia Sheppard,0.51099646,fineart +Henriette Grindat,0.573179,fineart +Yasutomo Oka,0.5731342,fareast +Celia Frances Bedford,0.57313216,fineart +Les Edwards,0.42068473,fineart +Edwin Deakin,0.5031717,fineart +Eero Saarinen,0.5725142,digipa-high-impact +Franciszek Smuglewicz,0.5722554,fineart +Doris Blair,0.57221186,fineart +Seb Mckinnon,0.51721895,digipa-med-impact +Gregorio Lazzarini,0.57204294,fineart +Gerard Sekoto,0.5719927,fineart +Francis Ernest Jackson,0.5506009,fineart +Simon Birch,0.57171595,digipa-high-impact +Bayard Wu,0.57171166,fineart +François Clouet,0.57162094,fineart +Christopher Wren,0.5715372,fineart +Evgeny Lushpin,0.5714827,special +Art Green,0.5714495,digipa-high-impact +Amy Judd,0.57142305,digipa-high-impact +Art Brenner,0.42619684,digipa-low-impact +Travis Louie,0.43916368,digipa-low-impact +James Jean,0.5457318,digipa-high-impact +Ewald Rübsamen,0.57083976,fineart +Donato Giancola,0.57052535,fineart +Carl Arnold Gonzenbach,0.5703996,fineart +Bastien Lecouffe-Deharme,0.5201288,fineart +Howard Chandler Christy,0.5702813,nudity +Dean Cornwell,0.56977296,fineart +Don Maitz,0.4743015,fineart +James Montgomery Flagg,0.56974065,fineart +Andreas Levers,0.42125136,digipa-low-impact +Edgar Schofield Baum,0.56965977,fineart +Alan Parry,0.5694952,digipa-high-impact +An Zhengwen,0.56942475,fareast +Alayna Lemmer,0.48293802,fineart +Edward Marshall Boehm,0.5530143,fineart +Henri Biva,0.54013556,nudity +Fiona Rae,0.4646715,digipa-low-impact +Elizabeth Jane Lloyd,0.5688463,digipa-high-impact +Franklin Carmichael,0.5687844,digipa-high-impact +Dionisius,0.56875896,fineart +Edwin Georgi,0.56868523,fineart +Jenny Saville,0.5686633,fineart +Ernest Hébert,0.56859314,fineart +Stephan Martiniere,0.56856346,digipa-high-impact +Huang Binhong,0.56841767,fineart +August Lemmer,0.5683548,fineart +Camille Bouvagne,0.5678048,fineart +Olga Skomorokhova,0.39401102,digipa-low-impact +Sacha Goldberger,0.5675477,digipa-high-impact +Hilda Annetta Walker,0.5675261,digipa-high-impact +Harvey Pratt,0.51314723,digipa-med-impact +Jean Bourdichon,0.5670543,fineart +Noriyoshi Ohrai,0.56690073,fineart +Kadir Nelson,0.5669006,n +Ilya Ostroukhov,0.5668801,fineart +Eugène Brands,0.56681967,fineart +Achille Leonardi,0.56674325,fineart +Franz Cižek,0.56670356,fineart +George Paul Chalmers,0.5665988,digipa-high-impact +Serge Marshennikov,0.5665971,digipa-high-impact +Mike Worrall,0.56641084,fineart +Dirck van Delen,0.5661764,fineart +Peter Andrew Jones,0.5661655,fineart +Rafael Albuquerque,0.56541103,fineart +Daniel Buren,0.5654043,fineart +Giuseppe Grisoni,0.5432699,fineart +George Fiddes Watt,0.55861616,fineart +Stan Lee,0.5651268,digipa-high-impact +Dorning Rasbotham,0.56511617,fineart +Albert Lynch,0.56497896,fineart +Lorenz Hideyoshi,0.56494075,fineart +Fenghua Zhong,0.56492203,fareast +Caroline Lucy Scott,0.49190843,digipa-med-impact +Victoria Crowe,0.5647996,digipa-high-impact +Hasegawa Settan,0.5647092,fareast +Dennis H. Farber,0.56453323,digipa-high-impact +Dick Bickenbach,0.5644289,fineart +Art Frahm,0.56439924,fineart +Edith Edmonds,0.5643151,fineart +Alfred Heber Hutty,0.56419206,fineart +Henry Tonks,0.56410825,fineart +Peter Howson,0.5640759,fineart +Albert Dorne,0.56395364,fineart +Arthur Adams,0.5639404,fineart +Bernt Tunold,0.56383425,digipa-high-impact +Gianluca Foli,0.5637317,digipa-high-impact +Vittorio Matteo Corcos,0.5636767,fineart +Béla Iványi-Grünwald,0.56355745,nudity +Feng Zhu,0.5634973,fineart +Sam Kieth,0.47251505,digipa-low-impact +Charles Crodel,0.5633834,fineart +Elsie Henderson,0.56310076,digipa-high-impact +George Earl Ortman,0.56295705,fineart +Tari Márk Dávid,0.562937,fineart +Betty Merken,0.56281745,digipa-high-impact +Cecile Walton,0.46672013,digipa-low-impact +Bracha L. Ettinger,0.56237936,fineart +Ken Fairclough,0.56230986,digipa-high-impact +Phil Koch,0.56224954,digipa-high-impact +George Pirie,0.56213045,digipa-high-impact +Chad Knight,0.56194013,digipa-high-impact +Béla Kondor,0.5427164,digipa-high-impact +Barclay Shaw,0.53689134,digipa-high-impact +Tim Hildebrandt,0.47194147,fineart +Hermann Rüdisühli,0.56104004,digipa-high-impact +Ian McQue,0.5342066,digipa-high-impact +Yanjun Cheng,0.5607171,fineart +Heinrich Hofmann,0.56060636,fineart +Henry Raleigh,0.5605958,fineart +Ernest Buckmaster,0.5605704,fineart +Charles Ricketts,0.56055415,fineart +Juergen Teller,0.56051147,digipa-high-impact +Auguste Mambour,0.5604873,fineart +Sean Yoro,0.5601486,digipa-high-impact +Sheilah Beckett,0.55995446,digipa-high-impact +Eugene Tertychnyi,0.5598978,fineart +Dr. Seuss,0.5597466,c +Adolf Wölfli,0.5372333,digipa-high-impact +Enrique Tábara,0.559323,fineart +Dionisio Baixeras Verdaguer,0.5590695,fineart +Aleksander Gierymski,0.5590013,fineart +Augustus Dunbier,0.55872476,fineart +Adolf Born,0.55848217,fineart +Chris Turnham,0.5584234,digipa-high-impact +James C Christensen,0.55837405,fineart +Daphne Fedarb,0.5582459,digipa-high-impact +Andre Kohn,0.5581832,special +Ron Mueck,0.5581811,nudity +Glenn Fabry,0.55786383,fineart +Elizabeth Polunin,0.5578102,digipa-high-impact +Charles S. Kaelin,0.5577954,fineart +Arthur Radebaugh,0.5577016,fineart +Ai Yazawa,0.55768114,fareast +Charles Roka,0.55762553,fineart +Ai Weiwei,0.5576034,digipa-high-impact +Dorothy Bradford,0.55760014,digipa-high-impact +Alfred Leslie,0.557555,fineart +Heinrich Herzig,0.5574423,fineart +Eliot Hodgkin,0.55740607,digipa-high-impact +Albert Kotin,0.55737317,fineart +Carlo Carlone,0.55729353,fineart +Chen Rong,0.5571221,fineart +Ikuo Hirayama,0.5570225,digipa-high-impact +Edward Corbett,0.55701995,nudity +Eugeniusz Żak,0.556925,nudity +Ettore Tito,0.556875,fineart +Helene Knoop,0.5567731,fineart +Amanda Sage,0.37731662,fareast +Annick Bouvattier,0.54647046,fineart +Harvey Dunn,0.55663586,fineart +Hans Sandreuter,0.5562575,digipa-high-impact +Ruan Jia,0.5398549,special +Anton Räderscheidt,0.55618906,fineart +Tyler Shields,0.4081434,digipa-low-impact +Darek Zabrocki,0.49975997,digipa-med-impact +Frank Montague Moore,0.5556432,fineart +Greg Staples,0.5555332,fineart +Endre Bálint,0.5553731,fineart +Augustus Vincent Tack,0.5136602,fineart +Marc Simonetti,0.48602036,fineart +Carlo Randanini,0.55493265,digipa-high-impact +Diego Dayer,0.5549119,fineart +Kelly Freas,0.55476534,fineart +Thomas Saliot,0.5139967,digipa-med-impact +Gijsbert d'Hondecoeter,0.55455256,fineart +Walter Kim,0.554521,digipa-high-impact +Francesco Cozza,0.5155097,digipa-med-impact +Bill Watterson,0.5542879,digipa-high-impact +Mark Keathley,0.4824056,fineart +Béni Ferenczy,0.55405354,digipa-high-impact +Amadou Opa Bathily,0.5536976,n +Giuseppe Antonio Petrini,0.55340284,fineart +Enzo Cucchi,0.55331933,digipa-high-impact +Adolf Schrödter,0.55316544,fineart +George Benjamin Luks,0.548566,fineart +Glenys Cour,0.55304,digipa-high-impact +Andrew Robertson,0.5529603,digipa-high-impact +Claude Rogers,0.55272067,digipa-high-impact +Alexandre Antigna,0.5526737,fineart +Aimé Barraud,0.55265915,digipa-high-impact +György Vastagh,0.55258965,fineart +Bruce Nauman,0.55257386,digipa-high-impact +Benjamin Block,0.55251944,digipa-high-impact +Gonzalo Endara Crow,0.552346,digipa-high-impact +Dirck de Bray,0.55221736,fineart +Gerald Kelley,0.5521059,digipa-high-impact +Dave Gibbons,0.5520954,digipa-high-impact +Béla Nagy Abodi,0.5520624,digipa-high-impact +Faith 47,0.5517006,digipa-high-impact +Anna Razumovskaya,0.5229187,digipa-med-impact +Archibald Robertson,0.55129635,digipa-high-impact +Louise Dahl-Wolfe,0.55120385,digipa-high-impact +Simon Bisley,0.55119276,digipa-high-impact +Eric Fischl,0.55107886,fineart +Hu Zaobin,0.5510481,fareast +Béla Pállik,0.5507963,digipa-high-impact +Eugene J. Martin,0.55078864,fineart +Friedrich Gauermann,0.55063415,fineart +Fritz Baumann,0.5341434,fineart +Michal Lisowski,0.5505639,fineart +Paolo Roversi,0.5503342,digipa-high-impact +Andrew Atroshenko,0.55009747,fineart +Gyula Derkovits,0.5500315,fineart +Hugh Adam Crawford,0.55000615,digipa-high-impact +Béla Apáti Abkarovics,0.5499799,digipa-high-impact +Paul Chadeisson,0.389151,digipa-low-impact +Aurél Bernáth,0.54968774,fineart +Albert Henry Krehbiel,0.54952574,fineart +Piet Hein Eek,0.54918796,digipa-high-impact +Yoshitaka Amano,0.5491855,fareast +Antonio Rotta,0.54909515,fineart +Józef Mehoffer,0.50760424,fineart +Donald Sherwood,0.5490415,digipa-high-impact +Catrin G Grosse,0.5489286,digipa-high-impact +Arthur Webster Emerson,0.5478842,fineart +Incarcerated Jerkfaces,0.5488423,digipa-high-impact +Emanuel Büchel,0.5487217,fineart +Andrew Loomis,0.54854584,fineart +Charles Hopkinson,0.54853606,fineart +Gabor Szikszai,0.5485203,digipa-high-impact +Archibald Standish Hartrick,0.54850936,digipa-high-impact +Aleksander Orłowski,0.546705,nudity +Hans Hinterreiter,0.5483628,fineart +Fred Williams,0.54544824,fineart +Fred A. Precht,0.5481606,fineart +Camille Souter,0.5213742,fineart +Emil Fuchs,0.54807395,fineart +Francesco Bonsignori,0.5478936,fineart +H. R. (Hans Ruedi) Giger,0.547799,fineart +Harriet Zeitlin,0.5477388,digipa-high-impact +Christian Jane Fergusson,0.5396168,fineart +Edward Kemble,0.5476892,fineart +Bernard Aubertin,0.5475396,fineart +Augustyn Mirys,0.5474162,fineart +Alejandro Burdisio,0.47482288,special +Erin Hanson,0.4343264,digipa-low-impact +Amalia Lindegren,0.5471987,digipa-high-impact +Alberto Seveso,0.47735062,fineart +Bartholomeus Strobel,0.54703736,fineart +Jim Davis,0.54703003,digipa-high-impact +Antony Gormley,0.54696125,digipa-high-impact +Charles Marion Russell,0.54696095,fineart +George B. Sutherland,0.5467901,fineart +Almada Negreiros,0.54670584,fineart +Edward Armitage,0.54358315,fineart +Bruno Walpoth,0.546167,digipa-high-impact +Richard Hamilton,0.5461275,nudity +Charles Harold Davis,0.5460415,digipa-high-impact +Fernand Verhaegen,0.54601514,fineart +Bernard Meninsky,0.5302034,digipa-high-impact +Fede Galizia,0.5456873,digipa-high-impact +Alfred Kelsner,0.5455753,nudity +Fritz Puempin,0.5452847,fineart +Alfred Charles Parker,0.54521024,fineart +Ahmed Yacoubi,0.544767,digipa-high-impact +Arthur B. Carles,0.54447794,fineart +Alice Prin,0.54435575,digipa-high-impact +Carl Gustaf Pilo,0.5443212,digipa-high-impact +Ross Tran,0.5259248,special +Hideyuki Kikuchi,0.544193,fareast +Art Fitzpatrick,0.49847245,fineart +Cherryl Fountain,0.5440454,fineart +Skottie Young,0.5440119,cartoon +NC Wyeth,0.54382974,digipa-high-impact +Rudolf Freund,0.5437342,fineart +Mort Kunstler,0.5433619,digipa-high-impact +Ben Goossens,0.53002644,digipa-high-impact +Andreas Rocha,0.49621177,special +Gérard Ernest Schneider,0.5429964,fineart +Francesco Filippini,0.5429598,digipa-high-impact +Alejandro Jodorowsky,0.5429065,digipa-high-impact +Friedrich Traffelet,0.5428817,fineart +Honor C. Appleton,0.5428735,digipa-high-impact +Jason A. Engle,0.542821,fineart +Henry Otto Wix,0.54271996,fineart +Gregory Manchess,0.54270375,fineart +Ann Stookey,0.54269934,digipa-high-impact +Henryk Rodakowski,0.542589,fineart +Albert Welti,0.5425134,digipa-high-impact +Gerard Houckgeest,0.5424413,digipa-high-impact +Dorothy Hood,0.54226196,digipa-high-impact +Frank Schoonover,0.51056194,fineart +Erlund Hudson,0.5422107,digipa-high-impact +Alexander Litovchenko,0.54210097,fineart +Sakai Hōitsu,0.5420294,digipa-high-impact +Benito Quinquela Martín,0.54194224,fineart +David Watson Stevenson,0.54191554,fineart +Ann Thetis Blacker,0.5416629,digipa-high-impact +Frank DuMond,0.51004076,digipa-med-impact +David Dougal Williams,0.5410126,digipa-high-impact +Robert Mcginnis,0.54098356,fineart +Ernest Briggs,0.5408636,fineart +Ferenc Joachim,0.5408625,fineart +Carlos Saenz de Tejada,0.47332364,digipa-low-impact +David Burton-Richardson,0.49659324,digipa-med-impact +Ernest Heber Thompson,0.54039246,digipa-high-impact +Albert Bertelsen,0.54038215,nudity +Giorgio Giulio Clovio,0.5403708,fineart +Eugene Leroy,0.54019785,digipa-high-impact +Anna Findlay,0.54018176,digipa-high-impact +Roy Gjertson,0.54012,digipa-high-impact +Charmion von Wiegand,0.5400893,fineart +Arnold Bronckhorst,0.526247,fineart +Boris Vallejo,0.487253,fineart +Adélaïde Victoire Hall,0.539939,fineart +Earl Norem,0.5398575,fineart +Sanford Kossin,0.53977877,digipa-high-impact +Aert de Gelder,0.519166,digipa-med-impact +Carl Eugen Keel,0.539739,digipa-high-impact +Francis Bourgeois,0.5397272,digipa-high-impact +Bojan Jevtic,0.41141546,fineart +Edward Avedisian,0.5393925,fineart +Gao Xiang,0.5392419,fareast +Charles Hinman,0.53911865,digipa-high-impact +Frits Van den Berghe,0.53896487,fineart +Carlo Martini,0.5384833,digipa-high-impact +Elina Karimova,0.5384318,digipa-high-impact +Anto Carte,0.4708289,digipa-low-impact +Andrey Yefimovich Martynov,0.537721,fineart +Frances Jetter,0.5376904,fineart +Yuri Ivanovich Pimenov,0.5342793,fineart +Gaston Anglade,0.537608,digipa-high-impact +Albert Swinden,0.5375844,fineart +Bob Byerley,0.5375774,fineart +A.B. Frost,0.5375025,fineart +Jaya Suberg,0.5372893,digipa-high-impact +Josh Keyes,0.53654516,digipa-high-impact +Juliana Huxtable,0.5364195,n +Everett Warner,0.53641814,digipa-high-impact +Hugh Kretschmer,0.45171157,digipa-low-impact +Arnold Blanch,0.535774,fineart +Ryan McGinley,0.53572595,digipa-high-impact +Alfons Karpiński,0.53564656,fineart +George Aleef,0.5355317,digipa-high-impact +Hal Foster,0.5351446,fineart +Stuart Immonen,0.53501946,digipa-high-impact +Craig Thompson,0.5346844,digipa-high-impact +Bartolomeo Vivarini,0.53465015,fineart +Hermann Feierabend,0.5346168,digipa-high-impact +Antonio Donghi,0.4610982,digipa-low-impact +Adonna Khare,0.4858036,digipa-med-impact +James Stokoe,0.5015107,digipa-med-impact +Art & Language,0.5341332,digipa-high-impact +Agustín Fernández,0.53403986,fineart +Germán Londoño,0.5338712,fineart +Emmanuelle Moureaux,0.5335641,digipa-high-impact +Conrad Marca-Relli,0.5148334,digipa-med-impact +Gyula Batthyány,0.5332407,fineart +Francesco Raibolini,0.53314835,fineart +Apelles,0.5166026,fineart +Marat Latypov,0.45811993,fineart +Andrei Markin,0.5328752,fineart +Einar Hakonarson,0.5328311,digipa-high-impact +Beatrice Huntington,0.5328165,digipa-high-impact +Coppo di Marcovaldo,0.5327443,fineart +Gregorio Prestopino,0.53250784,fineart +A.D.M. Cooper,0.53244877,digipa-high-impact +Horatio McCulloch,0.53244334,digipa-high-impact +Wes Anderson,0.5318741,digipa-high-impact +Moebius,0.53178746,digipa-high-impact +Gerard Soest,0.53160626,fineart +Charles Ellison,0.53152347,digipa-high-impact +Wojciech Ostrycharz,0.5314213,fineart +Doug Chiang,0.5313724,fineart +Anne Savage,0.5310638,digipa-high-impact +Cor Melchers,0.53099334,fineart +Gordon Browne,0.5308195,digipa-high-impact +Augustus Earle,0.49196815,fineart +Carlos Francisco Chang Marín,0.5304734,fineart +Larry Elmore,0.53032553,fineart +Adolf Hölzel,0.5303149,fineart +David Ligare,0.5301894,fineart +Jan Luyken,0.52985555,fineart +Earle Bergey,0.5298525,fineart +David Ramsay Hay,0.52974963,digipa-high-impact +Alfred East,0.5296565,digipa-high-impact +A. R. Middleton Todd,0.50988734,fineart +Giorgio De Vincenzi,0.5291678,fineart +Hugh William Williams,0.5291014,digipa-high-impact +Erwin Bowien,0.52895796,digipa-high-impact +Victor Adame Minguez,0.5288686,fineart +Yoji Shinkawa,0.5287015,anime +Clara Weaver Parrish,0.5284487,digipa-high-impact +Albert Eckhout,0.5284096,fineart +Dorothy Coke,0.5282345,digipa-high-impact +Jerzy Duda-Gracz,0.5279943,digipa-high-impact +Byron Galvez,0.39178842,fareast +Alson S. Clark,0.5278568,digipa-high-impact +Adolf Ulric Wertmüller,0.5278296,digipa-high-impact +Bruce Coville,0.5277226,digipa-high-impact +Gong Kai,0.5276811,digipa-high-impact +Andréi Arinouchkine,0.52763486,digipa-high-impact +Florence Engelbach,0.5273161,digipa-high-impact +Brian Froud,0.5270276,fineart +Charles Thomson,0.5270127,digipa-high-impact +Bessie Wheeler,0.5269164,digipa-high-impact +Anton Lehmden,0.5268611,fineart +Emilia Wilk,0.5264961,fineart +Carl Eytel,0.52646196,digipa-high-impact +Alfred Janes,0.5264481,digipa-high-impact +Julie Bell,0.49962538,fineart +Eugenio de Arriba,0.52613926,digipa-high-impact +Samuel and Joseph Newsom,0.52595663,digipa-high-impact +Hans Falk,0.52588874,digipa-high-impact +Guillermo del Toro,0.52565175,digipa-high-impact +Félix Arauz,0.52555984,digipa-high-impact +Gyula Basch,0.52524436,digipa-high-impact +Haroon Mirza,0.5252279,digipa-high-impact +Du Jin,0.5249934,digipa-med-impact +Harry Shoulberg,0.5249456,digipa-med-impact +Arie Smit,0.5249027,fineart +Ahmed Karahisari,0.4259451,digipa-low-impact +Brian and Wendy Froud,0.5246335,fineart +E. William Gollings,0.52461207,digipa-med-impact +Bo Bartlett,0.51341593,digipa-med-impact +Hans Burgkmair,0.52416867,digipa-med-impact +David Macaulay,0.5241233,digipa-med-impact +Benedetto Caliari,0.52370214,digipa-med-impact +Eliott Lilly,0.5235398,digipa-med-impact +Vincent Tanguay,0.48578292,digipa-med-impact +Ada Hill Walker,0.52207166,fineart +Christopher Wood,0.49360397,digipa-med-impact +Kris Kuksi,0.43938053,digipa-low-impact +Chen Yifei,0.5217867,fineart +Margaux Valonia,0.5217782,digipa-med-impact +Antoni Pitxot,0.40582713,digipa-low-impact +Jhonen Vasquez,0.5216471,digipa-med-impact +Emilio Grau Sala,0.52156484,fineart +Henry B. Christian,0.52153796,fineart +Jacques Nathan-Garamond,0.52144086,digipa-med-impact +Eddie Mendoza,0.4949638,digipa-med-impact +Grzegorz Rutkowski,0.48906532,special +Beeple,0.40085253,digipa-low-impact +Giorgio Cavallon,0.5209209,digipa-med-impact +Godfrey Blow,0.52062386,digipa-med-impact +Gabriel Dawe,0.5204431,fineart +Emile Lahner,0.5202367,digipa-med-impact +Steve Dillon,0.5201676,digipa-med-impact +Lee Quinones,0.4626683,digipa-low-impact +Hale Woodruff,0.52000225,digipa-med-impact +Tom Hammick,0.5032626,digipa-med-impact +Hamilton Sloan,0.5197798,digipa-med-impact +Caesar Andrade Faini,0.51971483,digipa-med-impact +Sam Spratt,0.48991,digipa-med-impact +Chris Cold,0.4753577,fineart +Alejandro Obregón,0.5190562,digipa-med-impact +Dan Flavin,0.51901346,digipa-med-impact +Arthur Sarnoff,0.5189428,fineart +Elenore Abbott,0.5187141,digipa-med-impact +Andrea Kowch,0.51822996,digipa-med-impact +Demetrios Farmakopoulos,0.5181248,digipa-med-impact +Alexis Grimou,0.41958088,digipa-low-impact +Lesley Vance,0.5177536,digipa-med-impact +Gyula Aggházy,0.517747,fineart +Georgina Hunt,0.46105456,digipa-low-impact +Christian W. Staudinger,0.4684662,digipa-low-impact +Abraham Begeyn,0.5172538,digipa-med-impact +Charles Mozley,0.5171356,digipa-med-impact +Elias Ravanetti,0.38719344,digipa-low-impact +Herman van Swanevelt,0.5168748,digipa-med-impact +David Paton,0.4842217,digipa-med-impact +Hans Werner Schmidt,0.51671976,digipa-med-impact +Bob Ross,0.51628315,fineart +Sou Fujimoto,0.5162528,fareast +Balcomb Greene,0.5162045,digipa-med-impact +Glen Angus,0.51609933,digipa-med-impact +Buckminster Fuller,0.51607454,digipa-med-impact +Andrei Ryabushkin,0.5158933,fineart +Almeida Júnior,0.515856,digipa-med-impact +Tim White,0.4182697,digipa-low-impact +Hans Beat Wieland,0.51553553,digipa-med-impact +Jakub Różalski,0.5154904,digipa-med-impact +John Whitcomb,0.51523805,digipa-med-impact +Dorothy King,0.5150925,digipa-med-impact +Richard S. Johnson,0.51500344,fineart +Aniello Falcone,0.51475304,digipa-med-impact +Henning Jakob Henrik Lund,0.5147134,c +Robert M Cunningham,0.5144858,digipa-med-impact +Nick Knight,0.51447505,digipa-med-impact +David Chipperfield,0.51424,digipa-med-impact +Bartolomeo Cesi,0.5136737,digipa-med-impact +Bettina Heinen-Ayech,0.51334465,digipa-med-impact +Annabel Kidston,0.51327646,digipa-med-impact +Charles Schridde,0.51308405,digipa-med-impact +Samuel Earp,0.51305825,digipa-med-impact +Eugene Montgomery,0.5128343,digipa-med-impact +Alfred Parsons,0.5127445,digipa-med-impact +Anton Möller,0.5127209,digipa-med-impact +Craig Davison,0.499598,special +Cricorps Grégoire,0.51267076,fineart +Celia Fiennes,0.51266706,digipa-med-impact +Raymond Swanland,0.41350424,fineart +Howard Knotts,0.5122062,digipa-med-impact +Helmut Federle,0.51201206,digipa-med-impact +Tyler Edlin,0.44028252,digipa-high-impact +Elwood H. Smith,0.5119027,digipa-med-impact +Ralph Horsley,0.51142794,fineart +Alexander Ivanov,0.4539051,digipa-low-impact +Cedric Peyravernay,0.4200587,digipa-low-impact +Annabel Eyres,0.51136214,digipa-med-impact +Zack Snyder,0.51129746,digipa-med-impact +Gentile Bellini,0.511102,digipa-med-impact +Giovanni Pelliccioli,0.4868688,digipa-med-impact +Fikret Muallâ Saygı,0.510694,digipa-med-impact +Bauhaus,0.43454266,digipa-low-impact +Charles Williams,0.510406,digipa-med-impact +Georg Arnold-Graboné,0.5103381,digipa-med-impact +Fedot Sychkov,0.47935224,digipa-med-impact +Alberto Magnelli,0.5103212,digipa-med-impact +Aloysius O'Kelly,0.5102891,digipa-med-impact +Alexander McQueen,0.5101986,digipa-med-impact +Cam Sykes,0.510071,digipa-med-impact +George Lucas,0.510038,digipa-med-impact +Eglon van der Neer,0.5099339,digipa-med-impact +Christian August Lorentzen,0.50989646,digipa-med-impact +Eleanor Best,0.50966686,digipa-med-impact +Terry Redlin,0.474244,fineart +Ken Kelly,0.4304738,fineart +David Eugene Henry,0.48173362,fineart +Shin Jeongho,0.5092497,fareast +Flora Borsi,0.5091922,digipa-med-impact +Berndnaut Smilde,0.50864,digipa-med-impact +Art of Brom,0.45828784,fineart +Ernő Tibor,0.50851977,digipa-med-impact +Ancell Stronach,0.5084514,digipa-med-impact +Helen Thomas Dranga,0.45412368,digipa-low-impact +Anita Malfatti,0.5080986,digipa-med-impact +Arnold Brügger,0.5080749,digipa-med-impact +Edward Ben Avram,0.50778764,digipa-med-impact +Antonio Ciseri,0.5073538,fineart +Alyssa Monks,0.50734174,digipa-med-impact +Chen Zhen,0.5071876,digipa-med-impact +Francis Helps,0.50707847,digipa-med-impact +Georg Karl Pfahler,0.50700235,digipa-med-impact +Henry Woods,0.506811,digipa-med-impact +Barbara Greg,0.50674164,digipa-med-impact +Guan Daosheng,0.506712,fareast +Guy Billout,0.5064906,digipa-med-impact +Basuki Abdullah,0.50613165,digipa-med-impact +Thomas Visscher,0.5059943,digipa-med-impact +Edward Simmons,0.50598735,digipa-med-impact +Arabella Rankin,0.50572735,digipa-med-impact +Lady Pink,0.5056634,digipa-high-impact +Christopher Williams,0.5052288,digipa-med-impact +Fuyuko Matsui,0.5051116,fareast +Edward Baird,0.5049874,digipa-med-impact +Georges Stein,0.5049069,digipa-med-impact +Alex Alemany,0.43974748,digipa-low-impact +Emanuel Schongut,0.5047326,digipa-med-impact +Hans Bol,0.5045265,digipa-med-impact +Kurzgesagt,0.5043725,digipa-med-impact +Harald Giersing,0.50410193,digipa-med-impact +Antonín Slavíček,0.5040368,fineart +Carl Rahl,0.5040115,digipa-med-impact +Etienne Delessert,0.5037818,fineart +Americo Makk,0.5034161,digipa-med-impact +Fernand Pelez,0.5027561,digipa-med-impact +Alexey Merinov,0.4469615,digipa-low-impact +Caspar Netscher,0.5019529,digipa-med-impact +Walt Disney,0.50178146,digipa-med-impact +Qian Xuan,0.50150526,fareast +Geoffrey Dyer,0.50120556,digipa-med-impact +Andre Norton,0.5007602,digipa-med-impact +Daphne McClure,0.5007391,digipa-med-impact +Dieric Bouts,0.5005882,fineart +Aguri Uchida,0.5005107,fareast +Hugo Scheiber,0.50004864,digipa-med-impact +Kenne Gregoire,0.46421963,digipa-low-impact +Wolfgang Tillmans,0.4999767,fineart +Carl-Henning Pedersen,0.4998986,digipa-med-impact +Alison Debenham,0.4998683,digipa-med-impact +Eppo Doeve,0.49975222,digipa-med-impact +Christen Købke,0.49961317,digipa-med-impact +Aron Demetz,0.49895018,digipa-med-impact +Alesso Baldovinetti,0.49849576,digipa-med-impact +Jimmy Lawlor,0.4475271,fineart +Carl Walter Liner,0.49826378,fineart +Gwenny Griffiths,0.45598924,digipa-low-impact +David Cooke Gibson,0.4976222,digipa-med-impact +Howard Butterworth,0.4974621,digipa-med-impact +Bob Thompson,0.49743804,fineart +Enguerrand Quarton,0.49711192,fineart +Abdel Hadi Al Gazzar,0.49631482,digipa-med-impact +Gu Zhengyi,0.49629828,digipa-med-impact +Aleksander Kotsis,0.4953621,digipa-med-impact +Alexander Sharpe Ross,0.49519226,digipa-med-impact +Carlos Enríquez Gómez,0.49494863,digipa-med-impact +Abed Abdi,0.4948855,digipa-med-impact +Elaine Duillo,0.49474388,digipa-med-impact +Anne Said,0.49473995,digipa-med-impact +Istvan Banyai,0.4947369,digipa-med-impact +Bouchta El Hayani,0.49455142,digipa-med-impact +Chinwe Chukwuogo-Roy,0.49445248,n +George Claessen,0.49412063,digipa-med-impact +Axel Törneman,0.49401706,digipa-med-impact +Avigdor Arikha,0.49384058,digipa-med-impact +Gloria Stoll Karn,0.4937976,digipa-med-impact +Alfredo Volpi,0.49367586,digipa-med-impact +Raffaello Sanizo,0.49365884,digipa-med-impact +Jeff Easley,0.49344411,digipa-med-impact +Aileen Eagleton,0.49318358,digipa-med-impact +Gaetano Sabatini,0.49307147,digipa-med-impact +Bertalan Pór,0.4930132,digipa-med-impact +Alfred Jensen,0.49291304,digipa-med-impact +Huang Guangjian,0.49286693,fareast +Emil Ferris,0.49282396,digipa-med-impact +Derek Chittock,0.492694,digipa-med-impact +Alonso Vázquez,0.49205148,digipa-med-impact +Kelly Sue Deconnick,0.4919476,digipa-med-impact +Clive Madgwick,0.4749857,fineart +Edward George Handel Lucas,0.49166748,digipa-med-impact +Dorothea Braby,0.49161923,digipa-med-impact +Sangyeob Park,0.49150884,fareast +Heinz Edelman,0.49140438,digipa-med-impact +Mark Seliger,0.4912073,digipa-med-impact +Camilo Egas,0.4586727,digipa-low-impact +Craig Mullins,0.49085408,fineart +Dong Kingman,0.49063343,digipa-med-impact +Douglas Robertson Bisset,0.49031347,digipa-med-impact +Blek Le Rat,0.49008566,digipa-med-impact +Anton Ažbe,0.48984748,fineart +Olafur Eliasson,0.48971075,digipa-med-impact +Elinor Proby Adams,0.48967826,digipa-med-impact +Cándido López,0.48915705,digipa-med-impact +D. Howard Hitchcock,0.48902267,digipa-med-impact +Cheng Jiasui,0.48889247,fareast +Jean Nouvel,0.4888183,digipa-med-impact +Bill Gekas,0.48848945,digipa-med-impact +Hermione Hammond,0.48845994,digipa-med-impact +Fernando Gerassi,0.48841453,digipa-med-impact +Frank Barrington Craig,0.4883762,digipa-med-impact +A. B. Jackson,0.4883623,digipa-med-impact +Bernie D’Andrea,0.48813275,digipa-med-impact +Clarice Beckett,0.487809,digipa-med-impact +Dosso Dossi,0.48775777,digipa-med-impact +Donald Roller Wilson,0.48767656,digipa-med-impact +Ernest William Christmas,0.4876317,digipa-med-impact +Aleksandr Gerasimov,0.48736423,digipa-med-impact +Edward Clark,0.48703307,digipa-med-impact +Georg Schrimpf,0.48697302,digipa-med-impact +John Wilhelm,0.48696536,digipa-med-impact +Aries Moross,0.4863676,digipa-med-impact +Bill Lewis,0.48635158,digipa-med-impact +Huang Ji,0.48611963,fareast +F. Scott Hess,0.43634564,fineart +Gao Qipei,0.4860631,fareast +Albert Tucker,0.4854299,digipa-med-impact +Barbara Balmer,0.48528513,fineart +Anne Ryan,0.48511976,digipa-med-impact +Helen Edwards,0.48484707,digipa-med-impact +Alexander Bogen,0.48421195,digipa-med-impact +David Annand,0.48418126,digipa-med-impact +Du Qiong,0.48414314,fareast +Fred Cress,0.4837878,digipa-med-impact +David B. Mattingly,0.48370445,digipa-med-impact +Hristofor Žefarović,0.4837008,digipa-med-impact +Wim Wenders,0.44484183,digipa-low-impact +Alexander Fedosav,0.48360944,digipa-med-impact +Anne Rigney,0.48357943,digipa-med-impact +Bertalan Karlovszky,0.48338628,digipa-med-impact +George Frederick Harris,0.4833259,fineart +Toshiharu Mizutani,0.48315164,fareast +David McClellan,0.39739317,digipa-low-impact +Eugeen Van Mieghem,0.48270774,digipa-med-impact +Alexei Harlamoff,0.48255378,digipa-med-impact +Jeff Legg,0.48249072,digipa-med-impact +Elizabeth Murray,0.48227608,digipa-med-impact +Hugo Heyrman,0.48213717,digipa-med-impact +Adrian Paul Allinson,0.48211843,digipa-med-impact +Altoon Sultan,0.4820177,digipa-med-impact +Alice Mason,0.48188528,fareast +Harriet Powers,0.48181778,digipa-med-impact +Aaron Bohrod,0.48175076,digipa-med-impact +Chris Saunders,0.41429797,digipa-low-impact +Clara Miller Burd,0.47797233,digipa-med-impact +David G. Sorensen,0.38101727,digipa-low-impact +Iwan Baan,0.4806739,digipa-med-impact +Anatoly Metlan,0.48020265,digipa-med-impact +Alfons von Czibulka,0.4801954,digipa-med-impact +Amedee Ozenfant,0.47950014,digipa-med-impact +Valerie Hegarty,0.47947168,digipa-med-impact +Hugo Anton Fisher,0.4793551,digipa-med-impact +Antonio Roybal,0.4792729,digipa-med-impact +Cui Zizhong,0.47902682,fareast +F Scott Hess,0.42582104,fineart +Julien Delval,0.47888556,digipa-med-impact +Marcin Jakubowski,0.4788583,digipa-med-impact +Anne Stokes,0.4786997,digipa-med-impact +David Palumbo,0.47632077,fineart +Hallsteinn Sigurðsson,0.47858906,digipa-med-impact +Mike Campau,0.47850558,digipa-med-impact +Giuseppe Avanzi,0.47846943,digipa-med-impact +Harry Morley,0.47836518,digipa-med-impact +Constance-Anne Parker,0.47832203,digipa-med-impact +Albert Keller,0.47825447,digipa-med-impact +Daniel Chodowiecki,0.47825167,digipa-med-impact +Alasdair Grant Taylor,0.47802624,digipa-med-impact +Maria Pascual Alberich,0.4779718,fineart +Rebeca Saray,0.41697127,digipa-low-impact +Ernő Bánk,0.47753686,digipa-med-impact +Shaddy Safadi,0.47724134,digipa-med-impact +André Castro,0.4771826,digipa-med-impact +Amiet Cuno,0.41975892,digipa-low-impact +Adi Granov,0.40670198,fineart +Allen Williams,0.47675848,digipa-med-impact +Anna Haifisch,0.47672725,digipa-med-impact +Clovis Trouille,0.47669724,digipa-med-impact +Jane Graverol,0.47655866,digipa-med-impact +Conroy Maddox,0.47645602,digipa-med-impact +Božidar Jakac,0.4763106,digipa-med-impact +George Morrison,0.47533786,digipa-med-impact +Douglas Bourgeois,0.47527707,digipa-med-impact +Cao Zhibai,0.47476804,fareast +Bradley Walker Tomlin,0.47462896,digipa-low-impact +Dave Dorman,0.46852386,fineart +Stevan Dohanos,0.47452107,fineart +John Howe,0.44144905,fineart +Fanny McIan,0.47406268,digipa-low-impact +Bholekar Srihari,0.47387534,digipa-low-impact +Giovanni Lanfranco,0.4737344,digipa-low-impact +Fred Marcellino,0.47346023,digipa-low-impact +Clyde Caldwell,0.47305286,fineart +Haukur Halldórsson,0.47275954,digipa-low-impact +Huang Gongwang,0.47269204,fareast +Brothers Grimm,0.47249007,digipa-low-impact +Ollie Hoff,0.47240657,digipa-low-impact +RHADS,0.4722166,digipa-low-impact +Constance Gordon-Cumming,0.47219282,digipa-low-impact +Anne Mccaffrey,0.4719924,digipa-low-impact +Henry Heerup,0.47190166,digipa-low-impact +Adrian Smith,0.4716923,digipa-high-impact +Harold Elliott,0.4714101,digipa-low-impact +Eric Peterson,0.47106332,digipa-low-impact +David Garner,0.47106326,digipa-low-impact +Edward Hicks,0.4708863,digipa-low-impact +Alfred Krupa,0.47052455,digipa-low-impact +Breyten Breytenbach,0.4699338,digipa-low-impact +Douglas Shuler,0.4695691,digipa-low-impact +Elaine Hamilton,0.46941522,digipa-low-impact +Kapwani Kiwanga,0.46917036,digipa-low-impact +Dan Scott,0.46897763,digipa-low-impact +Allan Brooks,0.46882123,digipa-low-impact +Ian Fairweather,0.46878594,digipa-low-impact +Arlington Nelson Lindenmuth,0.4683814,digipa-low-impact +Russell Ayto,0.4681503,digipa-low-impact +Allan Linder,0.46812692,digipa-low-impact +Bohumil Kubista,0.4679809,digipa-low-impact +Christopher Jin Baron,0.4677839,digipa-low-impact +Eero Snellman,0.46777654,digipa-low-impact +Christabel Dennison,0.4677633,digipa-low-impact +Amelia Peláez,0.46764764,digipa-low-impact +James Gurney,0.46740666,digipa-low-impact +Carles Delclaux Is,0.46734855,digipa-low-impact +George Papazov,0.42420334,digipa-low-impact +Mark Brooks,0.4672415,fineart +Anne Dunn,0.46722376,digipa-low-impact +Klaus Wittmann,0.4670704,fineart +Arvid Nyholm,0.46697336,digipa-low-impact +Georg Scholz,0.46674117,digipa-low-impact +David Spriggs,0.46671993,digipa-low-impact +Ernest Morgan,0.4665036,digipa-low-impact +Ella Guru,0.46619284,digipa-low-impact +Helen Berman,0.46614346,digipa-low-impact +Gen Paul,0.4658785,digipa-low-impact +Auseklis Ozols,0.46569023,digipa-low-impact +Amelia Robertson Hill,0.4654411,fineart +Jim Lee,0.46544096,digipa-low-impact +Anson Maddocks,0.46539295,digipa-low-impact +Chen Hong,0.46516004,fareast +Haddon Sundblom,0.46490777,digipa-low-impact +Eva Švankmajerová,0.46454152,digipa-low-impact +Antonio Cavallucci,0.4645282,digipa-low-impact +Herve Groussin,0.40050638,digipa-low-impact +Gwen Barnard,0.46400994,digipa-low-impact +Grace English,0.4638674,digipa-low-impact +Carl Critchlow,0.4636,digipa-low-impact +Ayshia Taşkın,0.463412,digipa-low-impact +Alison Watt,0.43141022,digipa-low-impact +Andre de Krayewski,0.4628024,digipa-low-impact +Hamish MacDonald,0.462645,digipa-low-impact +Ni Chuanjing,0.46254826,fareast +Frank Mason,0.46254665,digipa-low-impact +Steve Henderson,0.43113405,fineart +Eileen Aldridge,0.46210572,digipa-low-impact +Brad Rigney,0.28446302,digipa-low-impact +Ching Yeh,0.46177,fareast +Bertram Brooker,0.46176457,digipa-low-impact +Henry Bright,0.46150023,digipa-low-impact +Claire Dalby,0.46117848,digipa-low-impact +Brian Despain,0.41538632,digipa-low-impact +Anna Maria Barbara Abesch,0.4611045,digipa-low-impact +Bernardo Daddi,0.46088326,digipa-low-impact +Abraham Mintchine,0.46088243,digipa-high-impact +Alexander Carse,0.46078917,digipa-low-impact +Doc Hammer,0.46075988,digipa-low-impact +Yuumei,0.46072406,digipa-low-impact +Teophilus Tetteh,0.46064255,n +Bess Hamiti,0.46062252,digipa-low-impact +Ceferí Olivé,0.46058378,digipa-low-impact +Enrique Grau,0.46046937,digipa-low-impact +Eleanor Hughes,0.46007007,digipa-low-impact +Elizabeth Charleston,0.46001568,digipa-low-impact +Félix Ziem,0.45987016,digipa-low-impact +Eugeniusz Zak,0.45985222,digipa-low-impact +Dain Yoon,0.45977795,fareast +Gong Xian,0.4595083,digipa-low-impact +Flavia Blois,0.45950204,digipa-low-impact +Frederik Vermehren,0.45949826,digipa-low-impact +Gang Se-hwang,0.45937777,digipa-low-impact +Bjørn Wiinblad,0.45934483,digipa-low-impact +Alex Horley-Orlandelli,0.42623433,digipa-low-impact +Dr. Atl,0.459287,digipa-low-impact +Hu Jieqing,0.45889485,fareast +Amédée Ozenfant,0.4585215,digipa-low-impact +Warren Ellis,0.4584044,digipa-low-impact +Helen Dahm,0.45804346,digipa-low-impact +Anne Geddes,0.45785287,digipa-low-impact +Bikash Bhattacharjee,0.45775396,digipa-low-impact +Phil Foglio,0.457582,digipa-low-impact +Evelyn Abelson,0.4574563,digipa-low-impact +Alan Moore,0.4573369,digipa-low-impact +Josh Kao,0.45725146,fareast +Bertil Nilsson,0.45724383,digipa-low-impact +Hristofor Zhefarovich,0.457089,fineart +Edward Bailey,0.45659882,digipa-low-impact +Christopher Moeller,0.45648077,digipa-low-impact +Dóra Keresztes,0.4558745,fineart +Cory Arcangel,0.4558071,digipa-low-impact +Aleksander Kobzdej,0.45552525,digipa-low-impact +Tim Burton,0.45541722,digipa-high-impact +Chen Jiru,0.4553378,fareast +George Passantino,0.4552104,digipa-low-impact +Fuller Potter,0.4552072,digipa-low-impact +Warwick Globe,0.45516664,digipa-low-impact +Heinz Anger,0.45466962,digipa-low-impact +Elias Goldberg,0.45416242,digipa-low-impact +tokyogenso,0.45406622,fareast +Zeen Chin,0.45404464,digipa-low-impact +Albert Koetsier,0.45385844,fineart +Giuseppe Camuncoli,0.45377725,digipa-low-impact +Elsie Vera Cole,0.45377362,digipa-low-impact +Andreas Franke,0.4300047,digipa-low-impact +Constantine Andreou,0.4533816,digipa-low-impact +Elisabeth Collins,0.45337808,digipa-low-impact +Ted Nasmith,0.45302224,fineart +Antônio Parreiras,0.45269623,digipa-low-impact +Gwilym Prichard,0.45256525,digipa-low-impact +Fang Congyi,0.45240825,fareast +Huang Ding,0.45233482,fareast +Hans von Bartels,0.45200723,digipa-low-impact +Peter Elson,0.4121406,fineart +Fan Kuan,0.4513034,digipa-low-impact +Dean Roger,0.45112592,digipa-low-impact +Bernat Sanjuan,0.45074993,fareast +Fletcher Martin,0.45055175,digipa-low-impact +Gentile Tondino,0.45043385,digipa-low-impact +Ei-Q,0.45038772,digipa-low-impact +Chen Lin,0.45035738,fareast +Ted Wallace,0.4500007,digipa-low-impact +"Cornelisz Hendriksz Vroom, the Younger",0.4499252,digipa-low-impact +Alpo Jaakola,0.44981295,digipa-low-impact +Clark Voorhees,0.4495309,digipa-low-impact +Cleve Gray,0.449188,digipa-low-impact +Wolf Kahn,0.4489858,digipa-low-impact +Choi Buk,0.44892842,fareast +Frank Tinsley,0.4480373,digipa-low-impact +George Bell,0.44779524,digipa-low-impact +Fiona Stephenson,0.44761062,fineart +Carlos Trillo Name,0.4470371,digipa-low-impact +Jamie McKelvie,0.44696707,digipa-low-impact +Dennis Flanders,0.44673377,digipa-low-impact +Dulah Marie Evans,0.44662604,digipa-low-impact +Hans Schwarz,0.4463275,digipa-low-impact +Steve McCurry,0.44620228,digipa-low-impact +Bedwyr Williams,0.44616276,digipa-low-impact +Anton Graff,0.38569996,digipa-low-impact +Leticia Gillett,0.44578317,digipa-low-impact +Rafał Olbiński,0.44561762,digipa-low-impact +Artgerm,0.44555497,fineart +Adrienn Henczné Deák,0.445518,digipa-low-impact +Gu Hongzhong,0.4454906,fareast +Matt Groening,0.44518438,digipa-low-impact +Sue Bryce,0.4447164,digipa-low-impact +Armin Baumgarten,0.444061,digipa-low-impact +Araceli Gilbert,0.44399196,digipa-low-impact +Carey Morris,0.44388965,digipa-low-impact +Ignat Bednarik,0.4438085,digipa-low-impact +Frank Buchser,0.44373792,digipa-low-impact +Ben Zoeller,0.44368798,digipa-low-impact +Adam Szentpétery,0.4434548,fineart +Gene Davis,0.44343877,digipa-low-impact +Fei Danxu,0.4433627,fareast +Andrei Kolkoutine,0.44328922,digipa-low-impact +Bruce Onobrakpeya,0.42588046,n +Christoph Amberger,0.38912287,digipa-low-impact +"Fred Mitchell,",0.4432277,digipa-low-impact +Klaus Burgle,0.44295216,digipa-low-impact +Carl Hoppe,0.44270635,digipa-low-impact +Caroline Gotch,0.44263047,digipa-low-impact +Hans Mertens,0.44260004,digipa-low-impact +Mandy Disher,0.44219893,fineart +Sarah Lucas,0.4420507,digipa-low-impact +Sydney Edmunds,0.44198513,digipa-low-impact +Amos Ferguson,0.4418735,digipa-low-impact +Alton Tobey,0.4416385,digipa-low-impact +Clifford Ross,0.44139367,digipa-low-impact +Henric Trenk,0.4412782,digipa-low-impact +Claire Hummel,0.44119984,digipa-low-impact +Norman Foster,0.4411899,digipa-low-impact +Carmen Saldana,0.44076762,digipa-low-impact +Michael Whelan,0.4372847,digipa-low-impact +Carlos Berlanga,0.440354,digipa-low-impact +Gilles Beloeil,0.43997732,digipa-low-impact +Ashley Wood,0.4398396,digipa-low-impact +David Allan,0.43969798,digipa-low-impact +Mark Lovett,0.43922082,digipa-low-impact +Jed Henry,0.43882954,digipa-low-impact +Adam Bruce Thomson,0.43847767,digipa-low-impact +Horst Antes,0.4384303,digipa-low-impact +Fritz Glarner,0.43787453,digipa-low-impact +Harold McCauley,0.43760818,digipa-low-impact +Estuardo Maldonado,0.437594,digipa-low-impact +Dai Jin,0.4375449,fareast +Fabien Charuau,0.43688047,digipa-low-impact +Chica Macnab,0.4365166,digipa-low-impact +Jim Burns,0.3975072,digipa-low-impact +Santiago Calatrava,0.43651623,digipa-low-impact +Robert Maguire,0.40926617,digipa-low-impact +Cliff Childs,0.43611953,digipa-low-impact +Charles Martin,0.43582463,fareast +Elbridge Ayer Burbank,0.43572164,digipa-low-impact +Anita Kunz,0.4356005,digipa-low-impact +Colin Geller,0.43559563,digipa-low-impact +Allen Tupper True,0.43556124,digipa-low-impact +Jef Wu,0.43555313,digipa-low-impact +Jon McCoy,0.4147122,digipa-low-impact +Cedric Seaut,0.43521535,digipa-low-impact +Emily Shanks,0.43519047,digipa-low-impact +Andrew Whem,0.43512022,digipa-low-impact +Ibrahim Kodra,0.43471518,digipa-low-impact +Harrington Mann,0.4345901,digipa-low-impact +Jerry Siegel,0.43458986,digipa-low-impact +Howard Kanovitz,0.4345178,digipa-low-impact +Cicely Hey,0.43449926,digipa-low-impact +Ben Thompson,0.43436068,digipa-low-impact +Joe Bowler,0.43413073,digipa-low-impact +Lori Earley,0.43389612,digipa-low-impact +Arent Arentsz,0.43373522,digipa-low-impact +David Bailly,0.43371305,digipa-low-impact +Hans Arnold,0.4335214,digipa-low-impact +Constance Copeman,0.4334836,digipa-low-impact +Brent Heighton,0.4333118,fineart +Eric Taylor,0.43312082,digipa-low-impact +Aleksander Gine,0.4326849,digipa-low-impact +Alexander Johnston,0.4326589,digipa-low-impact +David Park,0.43235332,digipa-low-impact +Balázs Diószegi,0.432244,digipa-low-impact +Ed Binkley,0.43222216,digipa-low-impact +Eric Dinyer,0.4321258,digipa-low-impact +Susan Luo,0.43198025,fareast +Cedric Seaut (Keos Masons),0.4317356,digipa-low-impact +Lorena Alvarez Gómez,0.431683,digipa-low-impact +Fred Ludekens,0.431662,digipa-low-impact +David Begbie,0.4316218,digipa-low-impact +Ai Xuan,0.43150818,fareast +Felix-Kelly,0.43132153,digipa-low-impact +Antonín Chittussi,0.431248,digipa-low-impact +Ammi Phillips,0.43095884,digipa-low-impact +Elke Vogelsang,0.43092483,digipa-low-impact +Fathi Hassan,0.43090487,digipa-low-impact +Angela Sung,0.391746,fareast +Clément Serveau,0.43050706,digipa-low-impact +Dong Yuan,0.4303865,fareast +Hew Lorimer,0.43035403,digipa-low-impact +David Finch,0.29487437,digipa-low-impact +Bill Durgin,0.4300932,digipa-low-impact +Alexander Robertson,0.4300743,digipa-low-impact diff --git a/environment-wsl2.yaml b/environment-wsl2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f88727507835d96bfbbfae3ece2996e8506e3760 --- /dev/null +++ b/environment-wsl2.yaml @@ -0,0 +1,11 @@ +name: automatic +channels: + - pytorch + - defaults +dependencies: + - python=3.10 + - pip=22.2.2 + - cudatoolkit=11.3 + - pytorch=1.12.1 + - torchvision=0.13.1 + - numpy=1.23.1 \ No newline at end of file diff --git a/javascript/aspectRatioOverlay.js b/javascript/aspectRatioOverlay.js new file mode 100644 index 0000000000000000000000000000000000000000..96f1c00d0de5b69556918850bd7bf2d9fe14b3ba --- /dev/null +++ b/javascript/aspectRatioOverlay.js @@ -0,0 +1,119 @@ + +let currentWidth = null; +let currentHeight = null; +let arFrameTimeout = setTimeout(function(){},0); + +function dimensionChange(e,dimname){ + + if(dimname == 'Width'){ + currentWidth = e.target.value*1.0 + } + if(dimname == 'Height'){ + currentHeight = e.target.value*1.0 + } + + var inImg2img = Boolean(gradioApp().querySelector("button.rounded-t-lg.border-gray-200")) + + if(!inImg2img){ + return; + } + + var img2imgMode = gradioApp().querySelector('#mode_img2img.tabs > div > button.rounded-t-lg.border-gray-200') + if(img2imgMode){ + img2imgMode=img2imgMode.innerText + }else{ + return; + } + + var redrawImage = gradioApp().querySelector('div[data-testid=image] img'); + var inpaintImage = gradioApp().querySelector('#img2maskimg div[data-testid=image] img') + + var targetElement = null; + + if(img2imgMode=='img2img' && redrawImage){ + targetElement = redrawImage; + }else if(img2imgMode=='Inpaint' && inpaintImage){ + targetElement = inpaintImage; + } + + if(targetElement){ + + var arPreviewRect = gradioApp().querySelector('#imageARPreview'); + if(!arPreviewRect){ + arPreviewRect = document.createElement('div') + arPreviewRect.id = "imageARPreview"; + gradioApp().getRootNode().appendChild(arPreviewRect) + } + + + + var viewportOffset = targetElement.getBoundingClientRect(); + + viewportscale = Math.min( targetElement.clientWidth/targetElement.naturalWidth, targetElement.clientHeight/targetElement.naturalHeight ) + + scaledx = targetElement.naturalWidth*viewportscale + scaledy = targetElement.naturalHeight*viewportscale + + cleintRectTop = (viewportOffset.top+window.scrollY) + cleintRectLeft = (viewportOffset.left+window.scrollX) + cleintRectCentreY = cleintRectTop + (targetElement.clientHeight/2) + cleintRectCentreX = cleintRectLeft + (targetElement.clientWidth/2) + + viewRectTop = cleintRectCentreY-(scaledy/2) + viewRectLeft = cleintRectCentreX-(scaledx/2) + arRectWidth = scaledx + arRectHeight = scaledy + + arscale = Math.min( arRectWidth/currentWidth, arRectHeight/currentHeight ) + arscaledx = currentWidth*arscale + arscaledy = currentHeight*arscale + + arRectTop = cleintRectCentreY-(arscaledy/2) + arRectLeft = cleintRectCentreX-(arscaledx/2) + arRectWidth = arscaledx + arRectHeight = arscaledy + + arPreviewRect.style.top = arRectTop+'px'; + arPreviewRect.style.left = arRectLeft+'px'; + arPreviewRect.style.width = arRectWidth+'px'; + arPreviewRect.style.height = arRectHeight+'px'; + + clearTimeout(arFrameTimeout); + arFrameTimeout = setTimeout(function(){ + arPreviewRect.style.display = 'none'; + },2000); + + arPreviewRect.style.display = 'block'; + + } + +} + + +onUiUpdate(function(){ + var arPreviewRect = gradioApp().querySelector('#imageARPreview'); + if(arPreviewRect){ + arPreviewRect.style.display = 'none'; + } + var inImg2img = Boolean(gradioApp().querySelector("button.rounded-t-lg.border-gray-200")) + if(inImg2img){ + let inputs = gradioApp().querySelectorAll('input'); + inputs.forEach(function(e){ + let parentLabel = e.parentElement.querySelector('label') + if(parentLabel && parentLabel.innerText){ + if(!e.classList.contains('scrollwatch')){ + if(parentLabel.innerText == 'Width' || parentLabel.innerText == 'Height'){ + e.addEventListener('input', function(e){dimensionChange(e,parentLabel.innerText)} ) + e.classList.add('scrollwatch') + } + if(parentLabel.innerText == 'Width'){ + currentWidth = e.value*1.0 + } + if(parentLabel.innerText == 'Height'){ + currentHeight = e.value*1.0 + } + } + } + }) + } +}); diff --git a/javascript/contextMenus.js b/javascript/contextMenus.js new file mode 100644 index 0000000000000000000000000000000000000000..fe67c42e185160b1bb76386a943246bf181196fa --- /dev/null +++ b/javascript/contextMenus.js @@ -0,0 +1,177 @@ + +contextMenuInit = function(){ + let eventListenerApplied=false; + let menuSpecs = new Map(); + + const uid = function(){ + return Date.now().toString(36) + Math.random().toString(36).substr(2); + } + + function showContextMenu(event,element,menuEntries){ + let posx = event.clientX + document.body.scrollLeft + document.documentElement.scrollLeft; + let posy = event.clientY + document.body.scrollTop + document.documentElement.scrollTop; + + let oldMenu = gradioApp().querySelector('#context-menu') + if(oldMenu){ + oldMenu.remove() + } + + let tabButton = uiCurrentTab + let baseStyle = window.getComputedStyle(tabButton) + + const contextMenu = document.createElement('nav') + contextMenu.id = "context-menu" + contextMenu.style.background = baseStyle.background + contextMenu.style.color = baseStyle.color + contextMenu.style.fontFamily = baseStyle.fontFamily + contextMenu.style.top = posy+'px' + contextMenu.style.left = posx+'px' + + + + const contextMenuList = document.createElement('ul') + contextMenuList.className = 'context-menu-items'; + contextMenu.append(contextMenuList); + + menuEntries.forEach(function(entry){ + let contextMenuEntry = document.createElement('a') + contextMenuEntry.innerHTML = entry['name'] + contextMenuEntry.addEventListener("click", function(e) { + entry['func'](); + }) + contextMenuList.append(contextMenuEntry); + + }) + + gradioApp().getRootNode().appendChild(contextMenu) + + let menuWidth = contextMenu.offsetWidth + 4; + let menuHeight = contextMenu.offsetHeight + 4; + + let windowWidth = window.innerWidth; + let windowHeight = window.innerHeight; + + if ( (windowWidth - posx) < menuWidth ) { + contextMenu.style.left = windowWidth - menuWidth + "px"; + } + + if ( (windowHeight - posy) < menuHeight ) { + contextMenu.style.top = windowHeight - menuHeight + "px"; + } + + } + + function appendContextMenuOption(targetEmementSelector,entryName,entryFunction){ + + currentItems = menuSpecs.get(targetEmementSelector) + + if(!currentItems){ + currentItems = [] + menuSpecs.set(targetEmementSelector,currentItems); + } + let newItem = {'id':targetEmementSelector+'_'+uid(), + 'name':entryName, + 'func':entryFunction, + 'isNew':true} + + currentItems.push(newItem) + return newItem['id'] + } + + function removeContextMenuOption(uid){ + menuSpecs.forEach(function(v,k) { + let index = -1 + v.forEach(function(e,ei){if(e['id']==uid){index=ei}}) + if(index>=0){ + v.splice(index, 1); + } + }) + } + + function addContextMenuEventListener(){ + if(eventListenerApplied){ + return; + } + gradioApp().addEventListener("click", function(e) { + let source = e.composedPath()[0] + if(source.id && source.id.indexOf('check_progress')>-1){ + return + } + + let oldMenu = gradioApp().querySelector('#context-menu') + if(oldMenu){ + oldMenu.remove() + } + }); + gradioApp().addEventListener("contextmenu", function(e) { + let oldMenu = gradioApp().querySelector('#context-menu') + if(oldMenu){ + oldMenu.remove() + } + menuSpecs.forEach(function(v,k) { + if(e.composedPath()[0].matches(k)){ + showContextMenu(e,e.composedPath()[0],v) + e.preventDefault() + return + } + }) + }); + eventListenerApplied=true + + } + + return [appendContextMenuOption, removeContextMenuOption, addContextMenuEventListener] +} + +initResponse = contextMenuInit(); +appendContextMenuOption = initResponse[0]; +removeContextMenuOption = initResponse[1]; +addContextMenuEventListener = initResponse[2]; + +(function(){ + //Start example Context Menu Items + let generateOnRepeat = function(genbuttonid,interruptbuttonid){ + let genbutton = gradioApp().querySelector(genbuttonid); + let interruptbutton = gradioApp().querySelector(interruptbuttonid); + if(!interruptbutton.offsetParent){ + genbutton.click(); + } + clearInterval(window.generateOnRepeatInterval) + window.generateOnRepeatInterval = setInterval(function(){ + if(!interruptbutton.offsetParent){ + genbutton.click(); + } + }, + 500) + } + + appendContextMenuOption('#txt2img_generate','Generate forever',function(){ + generateOnRepeat('#txt2img_generate','#txt2img_interrupt'); + }) + appendContextMenuOption('#img2img_generate','Generate forever',function(){ + generateOnRepeat('#img2img_generate','#img2img_interrupt'); + }) + + let cancelGenerateForever = function(){ + clearInterval(window.generateOnRepeatInterval) + } + + appendContextMenuOption('#txt2img_interrupt','Cancel generate forever',cancelGenerateForever) + appendContextMenuOption('#txt2img_generate', 'Cancel generate forever',cancelGenerateForever) + appendContextMenuOption('#img2img_interrupt','Cancel generate forever',cancelGenerateForever) + appendContextMenuOption('#img2img_generate', 'Cancel generate forever',cancelGenerateForever) + + appendContextMenuOption('#roll','Roll three', + function(){ + let rollbutton = get_uiCurrentTabContent().querySelector('#roll'); + setTimeout(function(){rollbutton.click()},100) + setTimeout(function(){rollbutton.click()},200) + setTimeout(function(){rollbutton.click()},300) + } + ) +})(); +//End example Context Menu Items + +onUiUpdate(function(){ + addContextMenuEventListener() +}); diff --git a/javascript/dragdrop.js b/javascript/dragdrop.js new file mode 100644 index 0000000000000000000000000000000000000000..5aac57f77b93cae9bc176f1f602d7462986826c3 --- /dev/null +++ b/javascript/dragdrop.js @@ -0,0 +1,86 @@ +// allows drag-dropping files into gradio image elements, and also pasting images from clipboard + +function isValidImageList( files ) { + return files && files?.length === 1 && ['image/png', 'image/gif', 'image/jpeg'].includes(files[0].type); +} + +function dropReplaceImage( imgWrap, files ) { + if ( ! isValidImageList( files ) ) { + return; + } + + imgWrap.querySelector('.modify-upload button + button, .touch-none + div button + button')?.click(); + const callback = () => { + const fileInput = imgWrap.querySelector('input[type="file"]'); + if ( fileInput ) { + fileInput.files = files; + fileInput.dispatchEvent(new Event('change')); + } + }; + + if ( imgWrap.closest('#pnginfo_image') ) { + // special treatment for PNG Info tab, wait for fetch request to finish + const oldFetch = window.fetch; + window.fetch = async (input, options) => { + const response = await oldFetch(input, options); + if ( 'api/predict/' === input ) { + const content = await response.text(); + window.fetch = oldFetch; + window.requestAnimationFrame( () => callback() ); + return new Response(content, { + status: response.status, + statusText: response.statusText, + headers: response.headers + }) + } + return response; + }; + } else { + window.requestAnimationFrame( () => callback() ); + } +} + +window.document.addEventListener('dragover', e => { + const target = e.composedPath()[0]; + const imgWrap = target.closest('[data-testid="image"]'); + if ( !imgWrap ) { + return; + } + e.stopPropagation(); + e.preventDefault(); + e.dataTransfer.dropEffect = 'copy'; +}); + +window.document.addEventListener('drop', e => { + const target = e.composedPath()[0]; + const imgWrap = target.closest('[data-testid="image"]'); + if ( !imgWrap ) { + return; + } + e.stopPropagation(); + e.preventDefault(); + const files = e.dataTransfer.files; + dropReplaceImage( imgWrap, files ); +}); + +window.addEventListener('paste', e => { + const files = e.clipboardData.files; + if ( ! isValidImageList( files ) ) { + return; + } + + const visibleImageFields = [...gradioApp().querySelectorAll('[data-testid="image"]')] + .filter(el => uiElementIsVisible(el)); + if ( ! visibleImageFields.length ) { + return; + } + + const firstFreeImageField = visibleImageFields + .filter(el => el.querySelector('input[type=file]'))?.[0]; + + dropReplaceImage( + firstFreeImageField ? + firstFreeImageField : + visibleImageFields[visibleImageFields.length - 1] + , files ); +}); diff --git a/javascript/edit-attention.js b/javascript/edit-attention.js new file mode 100644 index 0000000000000000000000000000000000000000..3f1d2fbbd9d51b4d65c069486d34b9529407e535 --- /dev/null +++ b/javascript/edit-attention.js @@ -0,0 +1,45 @@ +addEventListener('keydown', (event) => { + let target = event.originalTarget || event.composedPath()[0]; + if (!target.hasAttribute("placeholder")) return; + if (!target.placeholder.toLowerCase().includes("prompt")) return; + + let plus = "ArrowUp" + let minus = "ArrowDown" + if (event.key != plus && event.key != minus) return; + + selectionStart = target.selectionStart; + selectionEnd = target.selectionEnd; + if(selectionStart == selectionEnd) return; + + event.preventDefault(); + + if (selectionStart == 0 || target.value[selectionStart - 1] != "(") { + target.value = target.value.slice(0, selectionStart) + + "(" + target.value.slice(selectionStart, selectionEnd) + ":1.0)" + + target.value.slice(selectionEnd); + + target.focus(); + target.selectionStart = selectionStart + 1; + target.selectionEnd = selectionEnd + 1; + + } else { + end = target.value.slice(selectionEnd + 1).indexOf(")") + 1; + weight = parseFloat(target.value.slice(selectionEnd + 1, selectionEnd + 1 + end)); + if (isNaN(weight)) return; + if (event.key == minus) weight -= 0.1; + if (event.key == plus) weight += 0.1; + + weight = parseFloat(weight.toPrecision(12)); + + target.value = target.value.slice(0, selectionEnd + 1) + + weight + + target.value.slice(selectionEnd + 1 + end - 1); + + target.focus(); + target.selectionStart = selectionStart; + target.selectionEnd = selectionEnd; + } + // Since we've modified a Gradio Textbox component manually, we need to simulate an `input` DOM event to ensure its + // internal Svelte data binding remains in sync. + target.dispatchEvent(new Event("input", { bubbles: true })); +}); diff --git a/javascript/hints.js b/javascript/hints.js new file mode 100644 index 0000000000000000000000000000000000000000..f65e7b882a0cc16fe1ddba935d28e3f234cd831c --- /dev/null +++ b/javascript/hints.js @@ -0,0 +1,121 @@ +// mouseover tooltips for various UI elements + +titles = { + "Sampling steps": "How many times to improve the generated image iteratively; higher values take longer; very low values can produce bad results", + "Sampling method": "Which algorithm to use to produce the image", + "GFPGAN": "Restore low quality faces using GFPGAN neural network", + "Euler a": "Euler Ancestral - very creative, each can get a completely different picture depending on step count, setting steps to higher than 30-40 does not help", + "DDIM": "Denoising Diffusion Implicit Models - best at inpainting", + + "Batch count": "How many batches of images to create", + "Batch size": "How many image to create in a single batch", + "CFG Scale": "Classifier Free Guidance Scale - how strongly the image should conform to prompt - lower values produce more creative results", + "Seed": "A value that determines the output of random number generator - if you create an image with same parameters and seed as another image, you'll get the same result", + "\u{1f3b2}\ufe0f": "Set seed to -1, which will cause a new random number to be used every time", + "\u267b\ufe0f": "Reuse seed from last generation, mostly useful if it was randomed", + "\u{1f3a8}": "Add a random artist to the prompt.", + "\u2199\ufe0f": "Read generation parameters from prompt or last generation if prompt is empty into user interface.", + "\u{1f4c2}": "Open images output directory", + + "Inpaint a part of image": "Draw a mask over an image, and the script will regenerate the masked area with content according to prompt", + "SD upscale": "Upscale image normally, split result into tiles, improve each tile using img2img, merge whole image back", + + "Just resize": "Resize image to target resolution. Unless height and width match, you will get incorrect aspect ratio.", + "Crop and resize": "Resize the image so that entirety of target resolution is filled with the image. Crop parts that stick out.", + "Resize and fill": "Resize the image so that entirety of image is inside target resolution. Fill empty space with image's colors.", + + "Mask blur": "How much to blur the mask before processing, in pixels.", + "Masked content": "What to put inside the masked area before processing it with Stable Diffusion.", + "fill": "fill it with colors of the image", + "original": "keep whatever was there originally", + "latent noise": "fill it with latent space noise", + "latent nothing": "fill it with latent space zeroes", + "Inpaint at full resolution": "Upscale masked region to target resolution, do inpainting, downscale back and paste into original image", + + "Denoising strength": "Determines how little respect the algorithm should have for image's content. At 0, nothing will change, and at 1 you'll get an unrelated image. With values below 1.0, processing will take less steps than the Sampling Steps slider specifies.", + "Denoising strength change factor": "In loopback mode, on each loop the denoising strength is multiplied by this value. <1 means decreasing variety so your sequence will converge on a fixed picture. >1 means increasing variety so your sequence will become more and more chaotic.", + + "Skip": "Stop processing current image and continue processing.", + "Interrupt": "Stop processing images and return any results accumulated so far.", + "Save": "Write image to a directory (default - log/images) and generation parameters into csv file.", + + "X values": "Separate values for X axis using commas.", + "Y values": "Separate values for Y axis using commas.", + + "None": "Do not do anything special", + "Prompt matrix": "Separate prompts into parts using vertical pipe character (|) and the script will create a picture for every combination of them (except for the first part, which will be present in all combinations)", + "X/Y plot": "Create a grid where images will have different parameters. Use inputs below to specify which parameters will be shared by columns and rows", + "Custom code": "Run Python code. Advanced user only. Must run program with --allow-code for this to work", + + "Prompt S/R": "Separate a list of words with commas, and the first word will be used as a keyword: script will search for this word in the prompt, and replace it with others", + "Prompt order": "Separate a list of words with commas, and the script will make a variation of prompt with those words for their every possible order", + + "Tiling": "Produce an image that can be tiled.", + "Tile overlap": "For SD upscale, how much overlap in pixels should there be between tiles. Tiles overlap so that when they are merged back into one picture, there is no clearly visible seam.", + + "Variation seed": "Seed of a different picture to be mixed into the generation.", + "Variation strength": "How strong of a variation to produce. At 0, there will be no effect. At 1, you will get the complete picture with variation seed (except for ancestral samplers, where you will just get something).", + "Resize seed from height": "Make an attempt to produce a picture similar to what would have been produced with same seed at specified resolution", + "Resize seed from width": "Make an attempt to produce a picture similar to what would have been produced with same seed at specified resolution", + + "Interrogate": "Reconstruct prompt from existing image and put it into the prompt field.", + + "Images filename pattern": "Use following tags to define how filenames for images are chosen: [steps], [cfg], [prompt], [prompt_no_styles], [prompt_spaces], [width], [height], [styles], [sampler], [seed], [model_hash], [prompt_words], [date], [datetime], [job_timestamp]; leave empty for default.", + "Directory name pattern": "Use following tags to define how subdirectories for images and grids are chosen: [steps], [cfg], [prompt], [prompt_no_styles], [prompt_spaces], [width], [height], [styles], [sampler], [seed], [model_hash], [prompt_words], [date], [datetime], [job_timestamp]; leave empty for default.", + "Max prompt words": "Set the maximum number of words to be used in the [prompt_words] option; ATTENTION: If the words are too long, they may exceed the maximum length of the file path that the system can handle", + + "Loopback": "Process an image, use it as an input, repeat.", + "Loops": "How many times to repeat processing an image and using it as input for the next iteration", + + "Style 1": "Style to apply; styles have components for both positive and negative prompts and apply to both", + "Style 2": "Style to apply; styles have components for both positive and negative prompts and apply to both", + "Apply style": "Insert selected styles into prompt fields", + "Create style": "Save current prompts as a style. If you add the token {prompt} to the text, the style use that as placeholder for your prompt when you use the style in the future.", + + "Checkpoint name": "Loads weights from checkpoint before making images. You can either use hash or a part of filename (as seen in settings) for checkpoint name. Recommended to use with Y axis for less switching.", + + "vram": "Torch active: Peak amount of VRAM used by Torch during generation, excluding cached data.\nTorch reserved: Peak amount of VRAM allocated by Torch, including all active and cached data.\nSys VRAM: Peak amount of VRAM allocation across all applications / total GPU VRAM (peak utilization%).", + + "Highres. fix": "Use a two step process to partially create an image at smaller resolution, upscale, and then improve details in it without changing composition", + "Scale latent": "Uscale the image in latent space. Alternative is to produce the full image from latent representation, upscale that, and then move it back to latent space.", + + "Eta noise seed delta": "If this values is non-zero, it will be added to seed and used to initialize RNG for noises when using samplers with Eta. You can use this to produce even more variation of images, or you can use this to match images of other software if you know what you are doing.", + "Do not add watermark to images": "If this option is enabled, watermark will not be added to created images. Warning: if you do not add watermark, you may be behaving in an unethical manner.", + + "Filename word regex": "This regular expression will be used extract words from filename, and they will be joined using the option below into label text used for training. Leave empty to keep filename text as it is.", + "Filename join string": "This string will be used to hoin split words into a single line if the option above is enabled.", + + "Quicksettings list": "List of setting names, separated by commas, for settings that should go to the quick access bar at the top, rather than the usual setting tab. See modules/shared.py for setting names. Requires restarting to apply." +} + + +onUiUpdate(function(){ + gradioApp().querySelectorAll('span, button, select, p').forEach(function(span){ + tooltip = titles[span.textContent]; + + if(!tooltip){ + tooltip = titles[span.value]; + } + + if(!tooltip){ + for (const c of span.classList) { + if (c in titles) { + tooltip = titles[c]; + break; + } + } + } + + if(tooltip){ + span.title = tooltip; + } + }) + + gradioApp().querySelectorAll('select').forEach(function(select){ + if (select.onchange != null) return; + + select.onchange = function(){ + select.title = titles[select.value] || ""; + } + }) +}) diff --git a/javascript/imageMaskFix.js b/javascript/imageMaskFix.js new file mode 100644 index 0000000000000000000000000000000000000000..3d77bfe9b45c13ef602098b4bd81586767430360 --- /dev/null +++ b/javascript/imageMaskFix.js @@ -0,0 +1,45 @@ +/** + * temporary fix for https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/668 + * @see https://github.com/gradio-app/gradio/issues/1721 + */ +window.addEventListener( 'resize', () => imageMaskResize()); +function imageMaskResize() { + const canvases = gradioApp().querySelectorAll('#img2maskimg .touch-none canvas'); + if ( ! canvases.length ) { + canvases_fixed = false; + window.removeEventListener( 'resize', imageMaskResize ); + return; + } + + const wrapper = canvases[0].closest('.touch-none'); + const previewImage = wrapper.previousElementSibling; + + if ( ! previewImage.complete ) { + previewImage.addEventListener( 'load', () => imageMaskResize()); + return; + } + + const w = previewImage.width; + const h = previewImage.height; + const nw = previewImage.naturalWidth; + const nh = previewImage.naturalHeight; + const portrait = nh > nw; + const factor = portrait; + + const wW = Math.min(w, portrait ? h/nh*nw : w/nw*nw); + const wH = Math.min(h, portrait ? h/nh*nh : w/nw*nh); + + wrapper.style.width = `${wW}px`; + wrapper.style.height = `${wH}px`; + wrapper.style.left = `${(w-wW)/2}px`; + wrapper.style.top = `${(h-wH)/2}px`; + + canvases.forEach( c => { + c.style.width = c.style.height = ''; + c.style.maxWidth = '100%'; + c.style.maxHeight = '100%'; + c.style.objectFit = 'contain'; + }); + } + + onUiUpdate(() => imageMaskResize()); \ No newline at end of file diff --git a/javascript/imageviewer.js b/javascript/imageviewer.js new file mode 100644 index 0000000000000000000000000000000000000000..65a33dd78deb66f9b63dc283a1ffc085df60e4f5 --- /dev/null +++ b/javascript/imageviewer.js @@ -0,0 +1,236 @@ +// A full size 'lightbox' preview modal shown when left clicking on gallery previews +function closeModal() { + gradioApp().getElementById("lightboxModal").style.display = "none"; +} + +function showModal(event) { + const source = event.target || event.srcElement; + const modalImage = gradioApp().getElementById("modalImage") + const lb = gradioApp().getElementById("lightboxModal") + modalImage.src = source.src + if (modalImage.style.display === 'none') { + lb.style.setProperty('background-image', 'url(' + source.src + ')'); + } + lb.style.display = "block"; + lb.focus() + event.stopPropagation() +} + +function negmod(n, m) { + return ((n % m) + m) % m; +} + +function updateOnBackgroundChange() { + const modalImage = gradioApp().getElementById("modalImage") + if (modalImage && modalImage.offsetParent) { + let allcurrentButtons = gradioApp().querySelectorAll(".gallery-item.transition-all.\\!ring-2") + let currentButton = null + allcurrentButtons.forEach(function(elem) { + if (elem.parentElement.offsetParent) { + currentButton = elem; + } + }) + + if (modalImage.src != currentButton.children[0].src) { + modalImage.src = currentButton.children[0].src; + if (modalImage.style.display === 'none') { + modal.style.setProperty('background-image', `url(${modalImage.src})`) + } + } + } +} + +function modalImageSwitch(offset) { + var allgalleryButtons = gradioApp().querySelectorAll(".gallery-item.transition-all") + var galleryButtons = [] + allgalleryButtons.forEach(function(elem) { + if (elem.parentElement.offsetParent) { + galleryButtons.push(elem); + } + }) + + if (galleryButtons.length > 1) { + var allcurrentButtons = gradioApp().querySelectorAll(".gallery-item.transition-all.\\!ring-2") + var currentButton = null + allcurrentButtons.forEach(function(elem) { + if (elem.parentElement.offsetParent) { + currentButton = elem; + } + }) + + var result = -1 + galleryButtons.forEach(function(v, i) { + if (v == currentButton) { + result = i + } + }) + + if (result != -1) { + nextButton = galleryButtons[negmod((result + offset), galleryButtons.length)] + nextButton.click() + const modalImage = gradioApp().getElementById("modalImage"); + const modal = gradioApp().getElementById("lightboxModal"); + modalImage.src = nextButton.children[0].src; + if (modalImage.style.display === 'none') { + modal.style.setProperty('background-image', `url(${modalImage.src})`) + } + setTimeout(function() { + modal.focus() + }, 10) + } + } +} + +function modalNextImage(event) { + modalImageSwitch(1) + event.stopPropagation() +} + +function modalPrevImage(event) { + modalImageSwitch(-1) + event.stopPropagation() +} + +function modalKeyHandler(event) { + switch (event.key) { + case "ArrowLeft": + modalPrevImage(event) + break; + case "ArrowRight": + modalNextImage(event) + break; + case "Escape": + closeModal(); + break; + } +} + +function showGalleryImage() { + setTimeout(function() { + fullImg_preview = gradioApp().querySelectorAll('img.w-full.object-contain') + + if (fullImg_preview != null) { + fullImg_preview.forEach(function function_name(e) { + if (e.dataset.modded) + return; + e.dataset.modded = true; + if(e && e.parentElement.tagName == 'DIV'){ + e.style.cursor='pointer' + e.addEventListener('click', function (evt) { + if(!opts.js_modal_lightbox) return; + modalZoomSet(gradioApp().getElementById('modalImage'), opts.js_modal_lightbox_initially_zoomed) + showModal(evt) + }, true); + } + }); + } + + }, 100); +} + +function modalZoomSet(modalImage, enable) { + if (enable) { + modalImage.classList.add('modalImageFullscreen'); + } else { + modalImage.classList.remove('modalImageFullscreen'); + } +} + +function modalZoomToggle(event) { + modalImage = gradioApp().getElementById("modalImage"); + modalZoomSet(modalImage, !modalImage.classList.contains('modalImageFullscreen')) + event.stopPropagation() +} + +function modalTileImageToggle(event) { + const modalImage = gradioApp().getElementById("modalImage"); + const modal = gradioApp().getElementById("lightboxModal"); + const isTiling = modalImage.style.display === 'none'; + if (isTiling) { + modalImage.style.display = 'block'; + modal.style.setProperty('background-image', 'none') + } else { + modalImage.style.display = 'none'; + modal.style.setProperty('background-image', `url(${modalImage.src})`) + } + + event.stopPropagation() +} + +function galleryImageHandler(e) { + if (e && e.parentElement.tagName == 'BUTTON') { + e.onclick = showGalleryImage; + } +} + +onUiUpdate(function() { + fullImg_preview = gradioApp().querySelectorAll('img.w-full') + if (fullImg_preview != null) { + fullImg_preview.forEach(galleryImageHandler); + } + updateOnBackgroundChange(); +}) + +document.addEventListener("DOMContentLoaded", function() { + const modalFragment = document.createDocumentFragment(); + const modal = document.createElement('div') + modal.onclick = closeModal; + modal.id = "lightboxModal"; + modal.tabIndex = 0 + modal.addEventListener('keydown', modalKeyHandler, true) + + const modalControls = document.createElement('div') + modalControls.className = 'modalControls gradio-container'; + modal.append(modalControls); + + const modalZoom = document.createElement('span') + modalZoom.className = 'modalZoom cursor'; + modalZoom.innerHTML = '⤡' + modalZoom.addEventListener('click', modalZoomToggle, true) + modalZoom.title = "Toggle zoomed view"; + modalControls.appendChild(modalZoom) + + const modalTileImage = document.createElement('span') + modalTileImage.className = 'modalTileImage cursor'; + modalTileImage.innerHTML = '⊞' + modalTileImage.addEventListener('click', modalTileImageToggle, true) + modalTileImage.title = "Preview tiling"; + modalControls.appendChild(modalTileImage) + + const modalClose = document.createElement('span') + modalClose.className = 'modalClose cursor'; + modalClose.innerHTML = '×' + modalClose.onclick = closeModal; + modalClose.title = "Close image viewer"; + modalControls.appendChild(modalClose) + + const modalImage = document.createElement('img') + modalImage.id = 'modalImage'; + modalImage.onclick = closeModal; + modalImage.tabIndex = 0 + modalImage.addEventListener('keydown', modalKeyHandler, true) + modal.appendChild(modalImage) + + const modalPrev = document.createElement('a') + modalPrev.className = 'modalPrev'; + modalPrev.innerHTML = '❮' + modalPrev.tabIndex = 0 + modalPrev.addEventListener('click', modalPrevImage, true); + modalPrev.addEventListener('keydown', modalKeyHandler, true) + modal.appendChild(modalPrev) + + const modalNext = document.createElement('a') + modalNext.className = 'modalNext'; + modalNext.innerHTML = '❯' + modalNext.tabIndex = 0 + modalNext.addEventListener('click', modalNextImage, true); + modalNext.addEventListener('keydown', modalKeyHandler, true) + + modal.appendChild(modalNext) + + + gradioApp().getRootNode().appendChild(modal) + + document.body.appendChild(modalFragment); + +}); diff --git a/javascript/notification.js b/javascript/notification.js new file mode 100644 index 0000000000000000000000000000000000000000..f96de313b6a11d9695e8ff487804f25f11c17e75 --- /dev/null +++ b/javascript/notification.js @@ -0,0 +1,49 @@ +// Monitors the gallery and sends a browser notification when the leading image is new. + +let lastHeadImg = null; + +notificationButton = null + +onUiUpdate(function(){ + if(notificationButton == null){ + notificationButton = gradioApp().getElementById('request_notifications') + + if(notificationButton != null){ + notificationButton.addEventListener('click', function (evt) { + Notification.requestPermission(); + },true); + } + } + + const galleryPreviews = gradioApp().querySelectorAll('img.h-full.w-full.overflow-hidden'); + + if (galleryPreviews == null) return; + + const headImg = galleryPreviews[0]?.src; + + if (headImg == null || headImg == lastHeadImg) return; + + lastHeadImg = headImg; + + // play notification sound if available + gradioApp().querySelector('#audio_notification audio')?.play(); + + if (document.hasFocus()) return; + + // Multiple copies of the images are in the DOM when one is selected. Dedup with a Set to get the real number generated. + const imgs = new Set(Array.from(galleryPreviews).map(img => img.src)); + + const notification = new Notification( + 'Stable Diffusion', + { + body: `Generated ${imgs.size > 1 ? imgs.size - opts.return_grid : 1} image${imgs.size > 1 ? 's' : ''}`, + icon: headImg, + image: headImg, + } + ); + + notification.onclick = function(_){ + parent.focus(); + this.close(); + }; +}); diff --git a/javascript/progressbar.js b/javascript/progressbar.js new file mode 100644 index 0000000000000000000000000000000000000000..4395a2159a4508cbc7b9427314e650342bd5ba62 --- /dev/null +++ b/javascript/progressbar.js @@ -0,0 +1,76 @@ +// code related to showing and updating progressbar shown as the image is being made +global_progressbars = {} + +function check_progressbar(id_part, id_progressbar, id_progressbar_span, id_skip, id_interrupt, id_preview, id_gallery){ + var progressbar = gradioApp().getElementById(id_progressbar) + var skip = id_skip ? gradioApp().getElementById(id_skip) : null + var interrupt = gradioApp().getElementById(id_interrupt) + + if(opts.show_progress_in_title && progressbar && progressbar.offsetParent){ + if(progressbar.innerText){ + let newtitle = 'Stable Diffusion - ' + progressbar.innerText + if(document.title != newtitle){ + document.title = newtitle; + } + }else{ + let newtitle = 'Stable Diffusion' + if(document.title != newtitle){ + document.title = newtitle; + } + } + } + + if(progressbar!= null && progressbar != global_progressbars[id_progressbar]){ + global_progressbars[id_progressbar] = progressbar + + var mutationObserver = new MutationObserver(function(m){ + preview = gradioApp().getElementById(id_preview) + gallery = gradioApp().getElementById(id_gallery) + + if(preview != null && gallery != null){ + preview.style.width = gallery.clientWidth + "px" + preview.style.height = gallery.clientHeight + "px" + + var progressDiv = gradioApp().querySelectorAll('#' + id_progressbar_span).length > 0; + if(!progressDiv){ + if (skip) { + skip.style.display = "none" + } + interrupt.style.display = "none" + } + } + + window.setTimeout(function() { requestMoreProgress(id_part, id_progressbar_span, id_skip, id_interrupt) }, 500) + }); + mutationObserver.observe( progressbar, { childList:true, subtree:true }) + } +} + +onUiUpdate(function(){ + check_progressbar('txt2img', 'txt2img_progressbar', 'txt2img_progress_span', 'txt2img_skip', 'txt2img_interrupt', 'txt2img_preview', 'txt2img_gallery') + check_progressbar('img2img', 'img2img_progressbar', 'img2img_progress_span', 'img2img_skip', 'img2img_interrupt', 'img2img_preview', 'img2img_gallery') + check_progressbar('ti', 'ti_progressbar', 'ti_progress_span', '', 'ti_interrupt', 'ti_preview', 'ti_gallery') +}) + +function requestMoreProgress(id_part, id_progressbar_span, id_skip, id_interrupt){ + btn = gradioApp().getElementById(id_part+"_check_progress"); + if(btn==null) return; + + btn.click(); + var progressDiv = gradioApp().querySelectorAll('#' + id_progressbar_span).length > 0; + var skip = id_skip ? gradioApp().getElementById(id_skip) : null + var interrupt = gradioApp().getElementById(id_interrupt) + if(progressDiv && interrupt){ + if (skip) { + skip.style.display = "block" + } + interrupt.style.display = "block" + } +} + +function requestProgress(id_part){ + btn = gradioApp().getElementById(id_part+"_check_progress_initial"); + if(btn==null) return; + + btn.click(); +} diff --git a/javascript/textualInversion.js b/javascript/textualInversion.js new file mode 100644 index 0000000000000000000000000000000000000000..8061be08988ca58fddf52feff1d0056db820ca51 --- /dev/null +++ b/javascript/textualInversion.js @@ -0,0 +1,8 @@ + + +function start_training_textual_inversion(){ + requestProgress('ti') + gradioApp().querySelector('#ti_error').innerHTML='' + + return args_to_array(arguments) +} diff --git a/javascript/ui.js b/javascript/ui.js new file mode 100644 index 0000000000000000000000000000000000000000..0f8fe68efc7844f8d94f1854d219cbd8cf8c74ac --- /dev/null +++ b/javascript/ui.js @@ -0,0 +1,234 @@ +// various functions for interation with ui.py not large enough to warrant putting them in separate files + +function selected_gallery_index(){ + var buttons = gradioApp().querySelectorAll('[style="display: block;"].tabitem .gallery-item') + var button = gradioApp().querySelector('[style="display: block;"].tabitem .gallery-item.\\!ring-2') + + var result = -1 + buttons.forEach(function(v, i){ if(v==button) { result = i } }) + + return result +} + +function extract_image_from_gallery(gallery){ + if(gallery.length == 1){ + return gallery[0] + } + + index = selected_gallery_index() + + if (index < 0 || index >= gallery.length){ + return [null] + } + + return gallery[index]; +} + +function args_to_array(args){ + res = [] + for(var i=0;i label > textarea"); + txt2img_textarea?.addEventListener("input", () => update_token_counter("txt2img_token_button")); + txt2img_textarea?.addEventListener("keyup", (event) => submit_prompt(event, "txt2img_generate")); + } + if (!img2img_textarea) { + img2img_textarea = gradioApp().querySelector("#img2img_prompt > label > textarea"); + img2img_textarea?.addEventListener("input", () => update_token_counter("img2img_token_button")); + img2img_textarea?.addEventListener("keyup", (event) => submit_prompt(event, "img2img_generate")); + } +}) + +let txt2img_textarea, img2img_textarea = undefined; +let wait_time = 800 +let token_timeout; + +function update_txt2img_tokens(...args) { + update_token_counter("txt2img_token_button") + if (args.length == 2) + return args[0] + return args; +} + +function update_img2img_tokens(...args) { + update_token_counter("img2img_token_button") + if (args.length == 2) + return args[0] + return args; +} + +function update_token_counter(button_id) { + if (token_timeout) + clearTimeout(token_timeout); + token_timeout = setTimeout(() => gradioApp().getElementById(button_id)?.click(), wait_time); +} + +function submit_prompt(event, generate_button_id) { + if (event.altKey && event.keyCode === 13) { + event.preventDefault(); + gradioApp().getElementById(generate_button_id).click(); + return; + } +} + +function restart_reload(){ + document.body.innerHTML='

Reloading...

'; + setTimeout(function(){location.reload()},2000) +} diff --git a/launch.py b/launch.py new file mode 100644 index 0000000000000000000000000000000000000000..16627a032db8ca3bebf8f0e82aec693af3b3a633 --- /dev/null +++ b/launch.py @@ -0,0 +1,169 @@ +# this scripts installs necessary requirements and launches main program in webui.py +import subprocess +import os +import sys +import importlib.util +import shlex +import platform + +dir_repos = "repositories" +python = sys.executable +git = os.environ.get('GIT', "git") + + +def extract_arg(args, name): + return [x for x in args if x != name], name in args + + +def run(command, desc=None, errdesc=None): + if desc is not None: + print(desc) + + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + + if result.returncode != 0: + + message = f"""{errdesc or 'Error running command'}. +Command: {command} +Error code: {result.returncode} +stdout: {result.stdout.decode(encoding="utf8", errors="ignore") if len(result.stdout)>0 else ''} +stderr: {result.stderr.decode(encoding="utf8", errors="ignore") if len(result.stderr)>0 else ''} +""" + raise RuntimeError(message) + + return result.stdout.decode(encoding="utf8", errors="ignore") + + +def check_run(command): + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + return result.returncode == 0 + + +def is_installed(package): + try: + spec = importlib.util.find_spec(package) + except ModuleNotFoundError: + return False + + return spec is not None + + +def repo_dir(name): + return os.path.join(dir_repos, name) + + +def run_python(code, desc=None, errdesc=None): + return run(f'"{python}" -c "{code}"', desc, errdesc) + + +def run_pip(args, desc=None): + return run(f'"{python}" -m pip {args} --prefer-binary', desc=f"Installing {desc}", errdesc=f"Couldn't install {desc}") + + +def check_run_python(code): + return check_run(f'"{python}" -c "{code}"') + + +def git_clone(url, dir, name, commithash=None): + # TODO clone into temporary dir and move if successful + + if os.path.exists(dir): + if commithash is None: + return + + current_hash = run(f'"{git}" -C {dir} rev-parse HEAD', None, f"Couldn't determine {name}'s hash: {commithash}").strip() + if current_hash == commithash: + return + + run(f'"{git}" -C {dir} fetch', f"Fetching updates for {name}...", f"Couldn't fetch {name}") + run(f'"{git}" -C {dir} checkout {commithash}', f"Checking out commint for {name} with hash: {commithash}...", f"Couldn't checkout commit {commithash} for {name}") + return + + run(f'"{git}" clone "{url}" "{dir}"', f"Cloning {name} into {dir}...", f"Couldn't clone {name}") + + if commithash is not None: + run(f'"{git}" -C {dir} checkout {commithash}', None, "Couldn't checkout {name}'s hash: {commithash}") + + +def prepare_enviroment(): + torch_command = os.environ.get('TORCH_COMMAND', "pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113") + requirements_file = os.environ.get('REQS_FILE', "requirements_versions.txt") + commandline_args = os.environ.get('COMMANDLINE_ARGS', "") + + gfpgan_package = os.environ.get('GFPGAN_PACKAGE', "git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379") + clip_package = os.environ.get('CLIP_PACKAGE', "git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1") + + stable_diffusion_commit_hash = os.environ.get('STABLE_DIFFUSION_COMMIT_HASH', "69ae4b35e0a0f6ee1af8bb9a5d0016ccb27e36dc") + taming_transformers_commit_hash = os.environ.get('TAMING_TRANSFORMERS_COMMIT_HASH', "24268930bf1dce879235a7fddd0b2355b84d7ea6") + k_diffusion_commit_hash = os.environ.get('K_DIFFUSION_COMMIT_HASH', "f4e99857772fc3a126ba886aadf795a332774878") + codeformer_commit_hash = os.environ.get('CODEFORMER_COMMIT_HASH', "c5b4593074ba6214284d6acd5f1719b6c5d739af") + blip_commit_hash = os.environ.get('BLIP_COMMIT_HASH', "48211a1594f1321b00f14c9f7a5b4813144b2fb9") + + args = shlex.split(commandline_args) + + args, skip_torch_cuda_test = extract_arg(args, '--skip-torch-cuda-test') + xformers = '--xformers' in args + deepdanbooru = '--deepdanbooru' in args + ngrok = '--ngrok' in args + + try: + commit = run(f"{git} rev-parse HEAD").strip() + except Exception: + commit = "" + + print(f"Python {sys.version}") + print(f"Commit hash: {commit}") + + if not is_installed("torch") or not is_installed("torchvision"): + run(f'"{python}" -m {torch_command}', "Installing torch and torchvision", "Couldn't install torch") + + if not skip_torch_cuda_test: + run_python("import torch; assert torch.cuda.is_available(), 'Torch is not able to use GPU; add --skip-torch-cuda-test to COMMANDLINE_ARGS variable to disable this check'") + + if not is_installed("gfpgan"): + run_pip(f"install {gfpgan_package}", "gfpgan") + + if not is_installed("clip"): + run_pip(f"install {clip_package}", "clip") + + if not is_installed("xformers") and xformers and platform.python_version().startswith("3.10"): + if platform.system() == "Windows": + run_pip("install https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/c/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl", "xformers") + elif platform.system() == "Linux": + run_pip("install xformers", "xformers") + + if not is_installed("deepdanbooru") and deepdanbooru: + run_pip("install git+https://github.com/KichangKim/DeepDanbooru.git@edf73df4cdaeea2cf00e9ac08bd8a9026b7a7b26#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru") + + if not is_installed("pyngrok") and ngrok: + run_pip("install pyngrok", "ngrok") + + os.makedirs(dir_repos, exist_ok=True) + + git_clone("https://github.com/CompVis/stable-diffusion.git", repo_dir('stable-diffusion'), "Stable Diffusion", stable_diffusion_commit_hash) + git_clone("https://github.com/CompVis/taming-transformers.git", repo_dir('taming-transformers'), "Taming Transformers", taming_transformers_commit_hash) + git_clone("https://github.com/crowsonkb/k-diffusion.git", repo_dir('k-diffusion'), "K-diffusion", k_diffusion_commit_hash) + git_clone("https://github.com/sczhou/CodeFormer.git", repo_dir('CodeFormer'), "CodeFormer", codeformer_commit_hash) + git_clone("https://github.com/salesforce/BLIP.git", repo_dir('BLIP'), "BLIP", blip_commit_hash) + + if not is_installed("lpips"): + run_pip(f"install -r {os.path.join(repo_dir('CodeFormer'), 'requirements.txt')}", "requirements for CodeFormer") + + run_pip(f"install -r {requirements_file}", "requirements for Web UI") + + sys.argv += args + + if "--exit" in args: + print("Exiting because of --exit argument") + exit(0) + + +def start_webui(): + print(f"Launching Web UI with arguments: {' '.join(sys.argv[1:])}") + import webui + webui.webui() + + +if __name__ == "__main__": + prepare_enviroment() + start_webui() diff --git a/modules/artists.py b/modules/artists.py new file mode 100644 index 0000000000000000000000000000000000000000..3612758b4ac2409db067ecd3a991e514c46d263b --- /dev/null +++ b/modules/artists.py @@ -0,0 +1,25 @@ +import os.path +import csv +from collections import namedtuple + +Artist = namedtuple("Artist", ['name', 'weight', 'category']) + + +class ArtistsDatabase: + def __init__(self, filename): + self.cats = set() + self.artists = [] + + if not os.path.exists(filename): + return + + with open(filename, "r", newline='', encoding="utf8") as file: + reader = csv.DictReader(file) + + for row in reader: + artist = Artist(row["artist"], float(row["score"]), row["category"]) + self.artists.append(artist) + self.cats.add(artist.category) + + def categories(self): + return sorted(self.cats) diff --git a/modules/bsrgan_model.py b/modules/bsrgan_model.py new file mode 100644 index 0000000000000000000000000000000000000000..737e1a7613c1911a46073d2900c3648213401b58 --- /dev/null +++ b/modules/bsrgan_model.py @@ -0,0 +1,76 @@ +import os.path +import sys +import traceback + +import PIL.Image +import numpy as np +import torch +from basicsr.utils.download_util import load_file_from_url + +import modules.upscaler +from modules import devices, modelloader +from modules.bsrgan_model_arch import RRDBNet + + +class UpscalerBSRGAN(modules.upscaler.Upscaler): + def __init__(self, dirname): + self.name = "BSRGAN" + self.model_name = "BSRGAN 4x" + self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/BSRGAN.pth" + self.user_path = dirname + super().__init__() + model_paths = self.find_models(ext_filter=[".pt", ".pth"]) + scalers = [] + if len(model_paths) == 0: + scaler_data = modules.upscaler.UpscalerData(self.model_name, self.model_url, self, 4) + scalers.append(scaler_data) + for file in model_paths: + if "http" in file: + name = self.model_name + else: + name = modelloader.friendly_name(file) + try: + scaler_data = modules.upscaler.UpscalerData(name, file, self, 4) + scalers.append(scaler_data) + except Exception: + print(f"Error loading BSRGAN model: {file}", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + self.scalers = scalers + + def do_upscale(self, img: PIL.Image, selected_file): + torch.cuda.empty_cache() + model = self.load_model(selected_file) + if model is None: + return img + model.to(devices.device_bsrgan) + torch.cuda.empty_cache() + img = np.array(img) + img = img[:, :, ::-1] + img = np.moveaxis(img, 2, 0) / 255 + img = torch.from_numpy(img).float() + img = img.unsqueeze(0).to(devices.device_bsrgan) + with torch.no_grad(): + output = model(img) + output = output.squeeze().float().cpu().clamp_(0, 1).numpy() + output = 255. * np.moveaxis(output, 0, 2) + output = output.astype(np.uint8) + output = output[:, :, ::-1] + torch.cuda.empty_cache() + return PIL.Image.fromarray(output, 'RGB') + + def load_model(self, path: str): + if "http" in path: + filename = load_file_from_url(url=self.model_url, model_dir=self.model_path, file_name="%s.pth" % self.name, + progress=True) + else: + filename = path + if not os.path.exists(filename) or filename is None: + print(f"BSRGAN: Unable to load model from {filename}", file=sys.stderr) + return None + model = RRDBNet(in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=4) # define network + model.load_state_dict(torch.load(filename), strict=True) + model.eval() + for k, v in model.named_parameters(): + v.requires_grad = False + return model + diff --git a/modules/bsrgan_model_arch.py b/modules/bsrgan_model_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..cb4d1c133c1e72bb565bf1fa825bfde7006413d5 --- /dev/null +++ b/modules/bsrgan_model_arch.py @@ -0,0 +1,102 @@ +import functools +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.nn.init as init + + +def initialize_weights(net_l, scale=1): + if not isinstance(net_l, list): + net_l = [net_l] + for net in net_l: + for m in net.modules(): + if isinstance(m, nn.Conv2d): + init.kaiming_normal_(m.weight, a=0, mode='fan_in') + m.weight.data *= scale # for residual block + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + init.kaiming_normal_(m.weight, a=0, mode='fan_in') + m.weight.data *= scale + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + init.constant_(m.weight, 1) + init.constant_(m.bias.data, 0.0) + + +def make_layer(block, n_layers): + layers = [] + for _ in range(n_layers): + layers.append(block()) + return nn.Sequential(*layers) + + +class ResidualDenseBlock_5C(nn.Module): + def __init__(self, nf=64, gc=32, bias=True): + super(ResidualDenseBlock_5C, self).__init__() + # gc: growth channel, i.e. intermediate channels + self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias) + self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias) + self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias) + self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias) + self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias) + self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) + + # initialization + initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1) + + def forward(self, x): + x1 = self.lrelu(self.conv1(x)) + x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1))) + x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1))) + x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1))) + x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1)) + return x5 * 0.2 + x + + +class RRDB(nn.Module): + '''Residual in Residual Dense Block''' + + def __init__(self, nf, gc=32): + super(RRDB, self).__init__() + self.RDB1 = ResidualDenseBlock_5C(nf, gc) + self.RDB2 = ResidualDenseBlock_5C(nf, gc) + self.RDB3 = ResidualDenseBlock_5C(nf, gc) + + def forward(self, x): + out = self.RDB1(x) + out = self.RDB2(out) + out = self.RDB3(out) + return out * 0.2 + x + + +class RRDBNet(nn.Module): + def __init__(self, in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=4): + super(RRDBNet, self).__init__() + RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc) + self.sf = sf + + self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True) + self.RRDB_trunk = make_layer(RRDB_block_f, nb) + self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + #### upsampling + self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + if self.sf==4: + self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True) + + self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) + + def forward(self, x): + fea = self.conv_first(x) + trunk = self.trunk_conv(self.RRDB_trunk(fea)) + fea = fea + trunk + + fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest'))) + if self.sf==4: + fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest'))) + out = self.conv_last(self.lrelu(self.HRconv(fea))) + + return out \ No newline at end of file diff --git a/modules/codeformer/codeformer_arch.py b/modules/codeformer/codeformer_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..11dcc3ee76511218c64977c2ecbb306cecd892c3 --- /dev/null +++ b/modules/codeformer/codeformer_arch.py @@ -0,0 +1,278 @@ +# this file is copied from CodeFormer repository. Please see comment in modules/codeformer_model.py + +import math +import numpy as np +import torch +from torch import nn, Tensor +import torch.nn.functional as F +from typing import Optional, List + +from modules.codeformer.vqgan_arch import * +from basicsr.utils import get_root_logger +from basicsr.utils.registry import ARCH_REGISTRY + +def calc_mean_std(feat, eps=1e-5): + """Calculate mean and std for adaptive_instance_normalization. + + Args: + feat (Tensor): 4D tensor. + eps (float): A small value added to the variance to avoid + divide-by-zero. Default: 1e-5. + """ + size = feat.size() + assert len(size) == 4, 'The input feature should be 4D tensor.' + b, c = size[:2] + feat_var = feat.view(b, c, -1).var(dim=2) + eps + feat_std = feat_var.sqrt().view(b, c, 1, 1) + feat_mean = feat.view(b, c, -1).mean(dim=2).view(b, c, 1, 1) + return feat_mean, feat_std + + +def adaptive_instance_normalization(content_feat, style_feat): + """Adaptive instance normalization. + + Adjust the reference features to have the similar color and illuminations + as those in the degradate features. + + Args: + content_feat (Tensor): The reference feature. + style_feat (Tensor): The degradate features. + """ + size = content_feat.size() + style_mean, style_std = calc_mean_std(style_feat) + content_mean, content_std = calc_mean_std(content_feat) + normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(size) + return normalized_feat * style_std.expand(size) + style_mean.expand(size) + + +class PositionEmbeddingSine(nn.Module): + """ + This is a more standard version of the position embedding, very similar to the one + used by the Attention is all you need paper, generalized to work on images. + """ + + def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): + super().__init__() + self.num_pos_feats = num_pos_feats + self.temperature = temperature + self.normalize = normalize + if scale is not None and normalize is False: + raise ValueError("normalize should be True if scale is passed") + if scale is None: + scale = 2 * math.pi + self.scale = scale + + def forward(self, x, mask=None): + if mask is None: + mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool) + not_mask = ~mask + y_embed = not_mask.cumsum(1, dtype=torch.float32) + x_embed = not_mask.cumsum(2, dtype=torch.float32) + if self.normalize: + eps = 1e-6 + y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale + x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale + + dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) + dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) + + pos_x = x_embed[:, :, :, None] / dim_t + pos_y = y_embed[:, :, :, None] / dim_t + pos_x = torch.stack( + (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4 + ).flatten(3) + pos_y = torch.stack( + (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4 + ).flatten(3) + pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) + return pos + +def _get_activation_fn(activation): + """Return an activation function given a string""" + if activation == "relu": + return F.relu + if activation == "gelu": + return F.gelu + if activation == "glu": + return F.glu + raise RuntimeError(F"activation should be relu/gelu, not {activation}.") + + +class TransformerSALayer(nn.Module): + def __init__(self, embed_dim, nhead=8, dim_mlp=2048, dropout=0.0, activation="gelu"): + super().__init__() + self.self_attn = nn.MultiheadAttention(embed_dim, nhead, dropout=dropout) + # Implementation of Feedforward model - MLP + self.linear1 = nn.Linear(embed_dim, dim_mlp) + self.dropout = nn.Dropout(dropout) + self.linear2 = nn.Linear(dim_mlp, embed_dim) + + self.norm1 = nn.LayerNorm(embed_dim) + self.norm2 = nn.LayerNorm(embed_dim) + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + + self.activation = _get_activation_fn(activation) + + def with_pos_embed(self, tensor, pos: Optional[Tensor]): + return tensor if pos is None else tensor + pos + + def forward(self, tgt, + tgt_mask: Optional[Tensor] = None, + tgt_key_padding_mask: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None): + + # self attention + tgt2 = self.norm1(tgt) + q = k = self.with_pos_embed(tgt2, query_pos) + tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask, + key_padding_mask=tgt_key_padding_mask)[0] + tgt = tgt + self.dropout1(tgt2) + + # ffn + tgt2 = self.norm2(tgt) + tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2)))) + tgt = tgt + self.dropout2(tgt2) + return tgt + +class Fuse_sft_block(nn.Module): + def __init__(self, in_ch, out_ch): + super().__init__() + self.encode_enc = ResBlock(2*in_ch, out_ch) + + self.scale = nn.Sequential( + nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1), + nn.LeakyReLU(0.2, True), + nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1)) + + self.shift = nn.Sequential( + nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1), + nn.LeakyReLU(0.2, True), + nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1)) + + def forward(self, enc_feat, dec_feat, w=1): + enc_feat = self.encode_enc(torch.cat([enc_feat, dec_feat], dim=1)) + scale = self.scale(enc_feat) + shift = self.shift(enc_feat) + residual = w * (dec_feat * scale + shift) + out = dec_feat + residual + return out + + +@ARCH_REGISTRY.register() +class CodeFormer(VQAutoEncoder): + def __init__(self, dim_embd=512, n_head=8, n_layers=9, + codebook_size=1024, latent_size=256, + connect_list=['32', '64', '128', '256'], + fix_modules=['quantize','generator']): + super(CodeFormer, self).__init__(512, 64, [1, 2, 2, 4, 4, 8], 'nearest',2, [16], codebook_size) + + if fix_modules is not None: + for module in fix_modules: + for param in getattr(self, module).parameters(): + param.requires_grad = False + + self.connect_list = connect_list + self.n_layers = n_layers + self.dim_embd = dim_embd + self.dim_mlp = dim_embd*2 + + self.position_emb = nn.Parameter(torch.zeros(latent_size, self.dim_embd)) + self.feat_emb = nn.Linear(256, self.dim_embd) + + # transformer + self.ft_layers = nn.Sequential(*[TransformerSALayer(embed_dim=dim_embd, nhead=n_head, dim_mlp=self.dim_mlp, dropout=0.0) + for _ in range(self.n_layers)]) + + # logits_predict head + self.idx_pred_layer = nn.Sequential( + nn.LayerNorm(dim_embd), + nn.Linear(dim_embd, codebook_size, bias=False)) + + self.channels = { + '16': 512, + '32': 256, + '64': 256, + '128': 128, + '256': 128, + '512': 64, + } + + # after second residual block for > 16, before attn layer for ==16 + self.fuse_encoder_block = {'512':2, '256':5, '128':8, '64':11, '32':14, '16':18} + # after first residual block for > 16, before attn layer for ==16 + self.fuse_generator_block = {'16':6, '32': 9, '64':12, '128':15, '256':18, '512':21} + + # fuse_convs_dict + self.fuse_convs_dict = nn.ModuleDict() + for f_size in self.connect_list: + in_ch = self.channels[f_size] + self.fuse_convs_dict[f_size] = Fuse_sft_block(in_ch, in_ch) + + def _init_weights(self, module): + if isinstance(module, (nn.Linear, nn.Embedding)): + module.weight.data.normal_(mean=0.0, std=0.02) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, nn.LayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + + def forward(self, x, w=0, detach_16=True, code_only=False, adain=False): + # ################### Encoder ##################### + enc_feat_dict = {} + out_list = [self.fuse_encoder_block[f_size] for f_size in self.connect_list] + for i, block in enumerate(self.encoder.blocks): + x = block(x) + if i in out_list: + enc_feat_dict[str(x.shape[-1])] = x.clone() + + lq_feat = x + # ################# Transformer ################### + # quant_feat, codebook_loss, quant_stats = self.quantize(lq_feat) + pos_emb = self.position_emb.unsqueeze(1).repeat(1,x.shape[0],1) + # BCHW -> BC(HW) -> (HW)BC + feat_emb = self.feat_emb(lq_feat.flatten(2).permute(2,0,1)) + query_emb = feat_emb + # Transformer encoder + for layer in self.ft_layers: + query_emb = layer(query_emb, query_pos=pos_emb) + + # output logits + logits = self.idx_pred_layer(query_emb) # (hw)bn + logits = logits.permute(1,0,2) # (hw)bn -> b(hw)n + + if code_only: # for training stage II + # logits doesn't need softmax before cross_entropy loss + return logits, lq_feat + + # ################# Quantization ################### + # if self.training: + # quant_feat = torch.einsum('btn,nc->btc', [soft_one_hot, self.quantize.embedding.weight]) + # # b(hw)c -> bc(hw) -> bchw + # quant_feat = quant_feat.permute(0,2,1).view(lq_feat.shape) + # ------------ + soft_one_hot = F.softmax(logits, dim=2) + _, top_idx = torch.topk(soft_one_hot, 1, dim=2) + quant_feat = self.quantize.get_codebook_feat(top_idx, shape=[x.shape[0],16,16,256]) + # preserve gradients + # quant_feat = lq_feat + (quant_feat - lq_feat).detach() + + if detach_16: + quant_feat = quant_feat.detach() # for training stage III + if adain: + quant_feat = adaptive_instance_normalization(quant_feat, lq_feat) + + # ################## Generator #################### + x = quant_feat + fuse_list = [self.fuse_generator_block[f_size] for f_size in self.connect_list] + + for i, block in enumerate(self.generator.blocks): + x = block(x) + if i in fuse_list: # fuse after i-th block + f_size = str(x.shape[-1]) + if w>0: + x = self.fuse_convs_dict[f_size](enc_feat_dict[f_size].detach(), x, w) + out = x + # logits doesn't need softmax before cross_entropy loss + return out, logits, lq_feat \ No newline at end of file diff --git a/modules/codeformer/vqgan_arch.py b/modules/codeformer/vqgan_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..c06c590ca611f46404d1756b1652adc4c7397532 --- /dev/null +++ b/modules/codeformer/vqgan_arch.py @@ -0,0 +1,437 @@ +# this file is copied from CodeFormer repository. Please see comment in modules/codeformer_model.py + +''' +VQGAN code, adapted from the original created by the Unleashing Transformers authors: +https://github.com/samb-t/unleashing-transformers/blob/master/models/vqgan.py + +''' +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import copy +from basicsr.utils import get_root_logger +from basicsr.utils.registry import ARCH_REGISTRY + +def normalize(in_channels): + return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True) + + +@torch.jit.script +def swish(x): + return x*torch.sigmoid(x) + + +# Define VQVAE classes +class VectorQuantizer(nn.Module): + def __init__(self, codebook_size, emb_dim, beta): + super(VectorQuantizer, self).__init__() + self.codebook_size = codebook_size # number of embeddings + self.emb_dim = emb_dim # dimension of embedding + self.beta = beta # commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2 + self.embedding = nn.Embedding(self.codebook_size, self.emb_dim) + self.embedding.weight.data.uniform_(-1.0 / self.codebook_size, 1.0 / self.codebook_size) + + def forward(self, z): + # reshape z -> (batch, height, width, channel) and flatten + z = z.permute(0, 2, 3, 1).contiguous() + z_flattened = z.view(-1, self.emb_dim) + + # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z + d = (z_flattened ** 2).sum(dim=1, keepdim=True) + (self.embedding.weight**2).sum(1) - \ + 2 * torch.matmul(z_flattened, self.embedding.weight.t()) + + mean_distance = torch.mean(d) + # find closest encodings + # min_encoding_indices = torch.argmin(d, dim=1).unsqueeze(1) + min_encoding_scores, min_encoding_indices = torch.topk(d, 1, dim=1, largest=False) + # [0-1], higher score, higher confidence + min_encoding_scores = torch.exp(-min_encoding_scores/10) + + min_encodings = torch.zeros(min_encoding_indices.shape[0], self.codebook_size).to(z) + min_encodings.scatter_(1, min_encoding_indices, 1) + + # get quantized latent vectors + z_q = torch.matmul(min_encodings, self.embedding.weight).view(z.shape) + # compute loss for embedding + loss = torch.mean((z_q.detach()-z)**2) + self.beta * torch.mean((z_q - z.detach()) ** 2) + # preserve gradients + z_q = z + (z_q - z).detach() + + # perplexity + e_mean = torch.mean(min_encodings, dim=0) + perplexity = torch.exp(-torch.sum(e_mean * torch.log(e_mean + 1e-10))) + # reshape back to match original input shape + z_q = z_q.permute(0, 3, 1, 2).contiguous() + + return z_q, loss, { + "perplexity": perplexity, + "min_encodings": min_encodings, + "min_encoding_indices": min_encoding_indices, + "min_encoding_scores": min_encoding_scores, + "mean_distance": mean_distance + } + + def get_codebook_feat(self, indices, shape): + # input indices: batch*token_num -> (batch*token_num)*1 + # shape: batch, height, width, channel + indices = indices.view(-1,1) + min_encodings = torch.zeros(indices.shape[0], self.codebook_size).to(indices) + min_encodings.scatter_(1, indices, 1) + # get quantized latent vectors + z_q = torch.matmul(min_encodings.float(), self.embedding.weight) + + if shape is not None: # reshape back to match original input shape + z_q = z_q.view(shape).permute(0, 3, 1, 2).contiguous() + + return z_q + + +class GumbelQuantizer(nn.Module): + def __init__(self, codebook_size, emb_dim, num_hiddens, straight_through=False, kl_weight=5e-4, temp_init=1.0): + super().__init__() + self.codebook_size = codebook_size # number of embeddings + self.emb_dim = emb_dim # dimension of embedding + self.straight_through = straight_through + self.temperature = temp_init + self.kl_weight = kl_weight + self.proj = nn.Conv2d(num_hiddens, codebook_size, 1) # projects last encoder layer to quantized logits + self.embed = nn.Embedding(codebook_size, emb_dim) + + def forward(self, z): + hard = self.straight_through if self.training else True + + logits = self.proj(z) + + soft_one_hot = F.gumbel_softmax(logits, tau=self.temperature, dim=1, hard=hard) + + z_q = torch.einsum("b n h w, n d -> b d h w", soft_one_hot, self.embed.weight) + + # + kl divergence to the prior loss + qy = F.softmax(logits, dim=1) + diff = self.kl_weight * torch.sum(qy * torch.log(qy * self.codebook_size + 1e-10), dim=1).mean() + min_encoding_indices = soft_one_hot.argmax(dim=1) + + return z_q, diff, { + "min_encoding_indices": min_encoding_indices + } + + +class Downsample(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0) + + def forward(self, x): + pad = (0, 1, 0, 1) + x = torch.nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + return x + + +class Upsample(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) + + def forward(self, x): + x = F.interpolate(x, scale_factor=2.0, mode="nearest") + x = self.conv(x) + + return x + + +class ResBlock(nn.Module): + def __init__(self, in_channels, out_channels=None): + super(ResBlock, self).__init__() + self.in_channels = in_channels + self.out_channels = in_channels if out_channels is None else out_channels + self.norm1 = normalize(in_channels) + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + self.norm2 = normalize(out_channels) + self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + if self.in_channels != self.out_channels: + self.conv_out = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x_in): + x = x_in + x = self.norm1(x) + x = swish(x) + x = self.conv1(x) + x = self.norm2(x) + x = swish(x) + x = self.conv2(x) + if self.in_channels != self.out_channels: + x_in = self.conv_out(x_in) + + return x + x_in + + +class AttnBlock(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.in_channels = in_channels + + self.norm = normalize(in_channels) + self.q = torch.nn.Conv2d( + in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0 + ) + self.k = torch.nn.Conv2d( + in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0 + ) + self.v = torch.nn.Conv2d( + in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0 + ) + self.proj_out = torch.nn.Conv2d( + in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0 + ) + + def forward(self, x): + h_ = x + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + + # compute attention + b, c, h, w = q.shape + q = q.reshape(b, c, h*w) + q = q.permute(0, 2, 1) + k = k.reshape(b, c, h*w) + w_ = torch.bmm(q, k) + w_ = w_ * (int(c)**(-0.5)) + w_ = F.softmax(w_, dim=2) + + # attend to values + v = v.reshape(b, c, h*w) + w_ = w_.permute(0, 2, 1) + h_ = torch.bmm(v, w_) + h_ = h_.reshape(b, c, h, w) + + h_ = self.proj_out(h_) + + return x+h_ + + +class Encoder(nn.Module): + def __init__(self, in_channels, nf, emb_dim, ch_mult, num_res_blocks, resolution, attn_resolutions): + super().__init__() + self.nf = nf + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.attn_resolutions = attn_resolutions + + curr_res = self.resolution + in_ch_mult = (1,)+tuple(ch_mult) + + blocks = [] + # initial convultion + blocks.append(nn.Conv2d(in_channels, nf, kernel_size=3, stride=1, padding=1)) + + # residual and downsampling blocks, with attention on smaller res (16x16) + for i in range(self.num_resolutions): + block_in_ch = nf * in_ch_mult[i] + block_out_ch = nf * ch_mult[i] + for _ in range(self.num_res_blocks): + blocks.append(ResBlock(block_in_ch, block_out_ch)) + block_in_ch = block_out_ch + if curr_res in attn_resolutions: + blocks.append(AttnBlock(block_in_ch)) + + if i != self.num_resolutions - 1: + blocks.append(Downsample(block_in_ch)) + curr_res = curr_res // 2 + + # non-local attention block + blocks.append(ResBlock(block_in_ch, block_in_ch)) + blocks.append(AttnBlock(block_in_ch)) + blocks.append(ResBlock(block_in_ch, block_in_ch)) + + # normalise and convert to latent size + blocks.append(normalize(block_in_ch)) + blocks.append(nn.Conv2d(block_in_ch, emb_dim, kernel_size=3, stride=1, padding=1)) + self.blocks = nn.ModuleList(blocks) + + def forward(self, x): + for block in self.blocks: + x = block(x) + + return x + + +class Generator(nn.Module): + def __init__(self, nf, emb_dim, ch_mult, res_blocks, img_size, attn_resolutions): + super().__init__() + self.nf = nf + self.ch_mult = ch_mult + self.num_resolutions = len(self.ch_mult) + self.num_res_blocks = res_blocks + self.resolution = img_size + self.attn_resolutions = attn_resolutions + self.in_channels = emb_dim + self.out_channels = 3 + block_in_ch = self.nf * self.ch_mult[-1] + curr_res = self.resolution // 2 ** (self.num_resolutions-1) + + blocks = [] + # initial conv + blocks.append(nn.Conv2d(self.in_channels, block_in_ch, kernel_size=3, stride=1, padding=1)) + + # non-local attention block + blocks.append(ResBlock(block_in_ch, block_in_ch)) + blocks.append(AttnBlock(block_in_ch)) + blocks.append(ResBlock(block_in_ch, block_in_ch)) + + for i in reversed(range(self.num_resolutions)): + block_out_ch = self.nf * self.ch_mult[i] + + for _ in range(self.num_res_blocks): + blocks.append(ResBlock(block_in_ch, block_out_ch)) + block_in_ch = block_out_ch + + if curr_res in self.attn_resolutions: + blocks.append(AttnBlock(block_in_ch)) + + if i != 0: + blocks.append(Upsample(block_in_ch)) + curr_res = curr_res * 2 + + blocks.append(normalize(block_in_ch)) + blocks.append(nn.Conv2d(block_in_ch, self.out_channels, kernel_size=3, stride=1, padding=1)) + + self.blocks = nn.ModuleList(blocks) + + + def forward(self, x): + for block in self.blocks: + x = block(x) + + return x + + +@ARCH_REGISTRY.register() +class VQAutoEncoder(nn.Module): + def __init__(self, img_size, nf, ch_mult, quantizer="nearest", res_blocks=2, attn_resolutions=[16], codebook_size=1024, emb_dim=256, + beta=0.25, gumbel_straight_through=False, gumbel_kl_weight=1e-8, model_path=None): + super().__init__() + logger = get_root_logger() + self.in_channels = 3 + self.nf = nf + self.n_blocks = res_blocks + self.codebook_size = codebook_size + self.embed_dim = emb_dim + self.ch_mult = ch_mult + self.resolution = img_size + self.attn_resolutions = attn_resolutions + self.quantizer_type = quantizer + self.encoder = Encoder( + self.in_channels, + self.nf, + self.embed_dim, + self.ch_mult, + self.n_blocks, + self.resolution, + self.attn_resolutions + ) + if self.quantizer_type == "nearest": + self.beta = beta #0.25 + self.quantize = VectorQuantizer(self.codebook_size, self.embed_dim, self.beta) + elif self.quantizer_type == "gumbel": + self.gumbel_num_hiddens = emb_dim + self.straight_through = gumbel_straight_through + self.kl_weight = gumbel_kl_weight + self.quantize = GumbelQuantizer( + self.codebook_size, + self.embed_dim, + self.gumbel_num_hiddens, + self.straight_through, + self.kl_weight + ) + self.generator = Generator( + self.nf, + self.embed_dim, + self.ch_mult, + self.n_blocks, + self.resolution, + self.attn_resolutions + ) + + if model_path is not None: + chkpt = torch.load(model_path, map_location='cpu') + if 'params_ema' in chkpt: + self.load_state_dict(torch.load(model_path, map_location='cpu')['params_ema']) + logger.info(f'vqgan is loaded from: {model_path} [params_ema]') + elif 'params' in chkpt: + self.load_state_dict(torch.load(model_path, map_location='cpu')['params']) + logger.info(f'vqgan is loaded from: {model_path} [params]') + else: + raise ValueError(f'Wrong params!') + + + def forward(self, x): + x = self.encoder(x) + quant, codebook_loss, quant_stats = self.quantize(x) + x = self.generator(quant) + return x, codebook_loss, quant_stats + + + +# patch based discriminator +@ARCH_REGISTRY.register() +class VQGANDiscriminator(nn.Module): + def __init__(self, nc=3, ndf=64, n_layers=4, model_path=None): + super().__init__() + + layers = [nn.Conv2d(nc, ndf, kernel_size=4, stride=2, padding=1), nn.LeakyReLU(0.2, True)] + ndf_mult = 1 + ndf_mult_prev = 1 + for n in range(1, n_layers): # gradually increase the number of filters + ndf_mult_prev = ndf_mult + ndf_mult = min(2 ** n, 8) + layers += [ + nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=2, padding=1, bias=False), + nn.BatchNorm2d(ndf * ndf_mult), + nn.LeakyReLU(0.2, True) + ] + + ndf_mult_prev = ndf_mult + ndf_mult = min(2 ** n_layers, 8) + + layers += [ + nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=1, padding=1, bias=False), + nn.BatchNorm2d(ndf * ndf_mult), + nn.LeakyReLU(0.2, True) + ] + + layers += [ + nn.Conv2d(ndf * ndf_mult, 1, kernel_size=4, stride=1, padding=1)] # output 1 channel prediction map + self.main = nn.Sequential(*layers) + + if model_path is not None: + chkpt = torch.load(model_path, map_location='cpu') + if 'params_d' in chkpt: + self.load_state_dict(torch.load(model_path, map_location='cpu')['params_d']) + elif 'params' in chkpt: + self.load_state_dict(torch.load(model_path, map_location='cpu')['params']) + else: + raise ValueError(f'Wrong params!') + + def forward(self, x): + return self.main(x) \ No newline at end of file diff --git a/modules/codeformer_model.py b/modules/codeformer_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e6d9fa4f4c33222d860092e63f31a2e925bc4359 --- /dev/null +++ b/modules/codeformer_model.py @@ -0,0 +1,140 @@ +import os +import sys +import traceback + +import cv2 +import torch + +import modules.face_restoration +import modules.shared +from modules import shared, devices, modelloader +from modules.paths import script_path, models_path + +# codeformer people made a choice to include modified basicsr library to their project which makes +# it utterly impossible to use it alongside with other libraries that also use basicsr, like GFPGAN. +# I am making a choice to include some files from codeformer to work around this issue. +model_dir = "Codeformer" +model_path = os.path.join(models_path, model_dir) +model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth' + +have_codeformer = False +codeformer = None + + +def setup_model(dirname): + global model_path + if not os.path.exists(model_path): + os.makedirs(model_path) + + path = modules.paths.paths.get("CodeFormer", None) + if path is None: + return + + try: + from torchvision.transforms.functional import normalize + from modules.codeformer.codeformer_arch import CodeFormer + from basicsr.utils.download_util import load_file_from_url + from basicsr.utils import imwrite, img2tensor, tensor2img + from facelib.utils.face_restoration_helper import FaceRestoreHelper + from modules.shared import cmd_opts + + net_class = CodeFormer + + class FaceRestorerCodeFormer(modules.face_restoration.FaceRestoration): + def name(self): + return "CodeFormer" + + def __init__(self, dirname): + self.net = None + self.face_helper = None + self.cmd_dir = dirname + + def create_models(self): + + if self.net is not None and self.face_helper is not None: + self.net.to(devices.device_codeformer) + return self.net, self.face_helper + model_paths = modelloader.load_models(model_path, model_url, self.cmd_dir, download_name='codeformer-v0.1.0.pth') + if len(model_paths) != 0: + ckpt_path = model_paths[0] + else: + print("Unable to load codeformer model.") + return None, None + net = net_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(devices.device_codeformer) + checkpoint = torch.load(ckpt_path)['params_ema'] + net.load_state_dict(checkpoint) + net.eval() + + face_helper = FaceRestoreHelper(1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', use_parse=True, device=devices.device_codeformer) + + self.net = net + self.face_helper = face_helper + + return net, face_helper + + def send_model_to(self, device): + self.net.to(device) + self.face_helper.face_det.to(device) + self.face_helper.face_parse.to(device) + + def restore(self, np_image, w=None): + np_image = np_image[:, :, ::-1] + + original_resolution = np_image.shape[0:2] + + self.create_models() + if self.net is None or self.face_helper is None: + return np_image + + self.send_model_to(devices.device_codeformer) + + self.face_helper.clean_all() + self.face_helper.read_image(np_image) + self.face_helper.get_face_landmarks_5(only_center_face=False, resize=640, eye_dist_threshold=5) + self.face_helper.align_warp_face() + + for idx, cropped_face in enumerate(self.face_helper.cropped_faces): + cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True) + normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True) + cropped_face_t = cropped_face_t.unsqueeze(0).to(devices.device_codeformer) + + try: + with torch.no_grad(): + output = self.net(cropped_face_t, w=w if w is not None else shared.opts.code_former_weight, adain=True)[0] + restored_face = tensor2img(output, rgb2bgr=True, min_max=(-1, 1)) + del output + torch.cuda.empty_cache() + except Exception as error: + print(f'\tFailed inference for CodeFormer: {error}', file=sys.stderr) + restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1)) + + restored_face = restored_face.astype('uint8') + self.face_helper.add_restored_face(restored_face) + + self.face_helper.get_inverse_affine(None) + + restored_img = self.face_helper.paste_faces_to_input_image() + restored_img = restored_img[:, :, ::-1] + + if original_resolution != restored_img.shape[0:2]: + restored_img = cv2.resize(restored_img, (0, 0), fx=original_resolution[1]/restored_img.shape[1], fy=original_resolution[0]/restored_img.shape[0], interpolation=cv2.INTER_LINEAR) + + self.face_helper.clean_all() + + if shared.opts.face_restoration_unload: + self.send_model_to(devices.cpu) + + return restored_img + + global have_codeformer + have_codeformer = True + + global codeformer + codeformer = FaceRestorerCodeFormer(dirname) + shared.face_restorers.append(codeformer) + + except Exception: + print("Error setting up CodeFormer:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + + # sys.path = stored_sys_path diff --git a/modules/deepbooru.py b/modules/deepbooru.py new file mode 100644 index 0000000000000000000000000000000000000000..f34f37882157c177fa1305dc97d0013573aa00cd --- /dev/null +++ b/modules/deepbooru.py @@ -0,0 +1,173 @@ +import os.path +from concurrent.futures import ProcessPoolExecutor +import multiprocessing +import time +import re + +re_special = re.compile(r'([\\()])') + +def get_deepbooru_tags(pil_image): + """ + This method is for running only one image at a time for simple use. Used to the img2img interrogate. + """ + from modules import shared # prevents circular reference + + try: + create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, create_deepbooru_opts()) + return get_tags_from_process(pil_image) + finally: + release_process() + + +OPT_INCLUDE_RANKS = "include_ranks" +def create_deepbooru_opts(): + from modules import shared + + return { + "use_spaces": shared.opts.deepbooru_use_spaces, + "use_escape": shared.opts.deepbooru_escape, + "alpha_sort": shared.opts.deepbooru_sort_alpha, + OPT_INCLUDE_RANKS: shared.opts.interrogate_return_ranks, + } + + +def deepbooru_process(queue, deepbooru_process_return, threshold, deepbooru_opts): + model, tags = get_deepbooru_tags_model() + while True: # while process is running, keep monitoring queue for new image + pil_image = queue.get() + if pil_image == "QUIT": + break + else: + deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts) + + +def create_deepbooru_process(threshold, deepbooru_opts): + """ + Creates deepbooru process. A queue is created to send images into the process. This enables multiple images + to be processed in a row without reloading the model or creating a new process. To return the data, a shared + dictionary is created to hold the tags created. To wait for tags to be returned, a value of -1 is assigned + to the dictionary and the method adding the image to the queue should wait for this value to be updated with + the tags. + """ + from modules import shared # prevents circular reference + shared.deepbooru_process_manager = multiprocessing.Manager() + shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() + shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts)) + shared.deepbooru_process.start() + + +def get_tags_from_process(image): + from modules import shared + + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process_queue.put(image) + while shared.deepbooru_process_return["value"] == -1: + time.sleep(0.2) + caption = shared.deepbooru_process_return["value"] + shared.deepbooru_process_return["value"] = -1 + + return caption + + +def release_process(): + """ + Stops the deepbooru process to return used memory + """ + from modules import shared # prevents circular reference + shared.deepbooru_process_queue.put("QUIT") + shared.deepbooru_process.join() + shared.deepbooru_process_queue = None + shared.deepbooru_process = None + shared.deepbooru_process_return = None + shared.deepbooru_process_manager = None + +def get_deepbooru_tags_model(): + import deepdanbooru as dd + import tensorflow as tf + import numpy as np + this_folder = os.path.dirname(__file__) + model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru')) + if not os.path.exists(os.path.join(model_path, 'project.json')): + # there is no point importing these every time + import zipfile + from basicsr.utils.download_util import load_file_from_url + load_file_from_url( + r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip", + model_path) + with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref: + zip_ref.extractall(model_path) + os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip")) + + tags = dd.project.load_tags_from_project(model_path) + model = dd.project.load_model_from_project( + model_path, compile_model=True + ) + return model, tags + + +def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts): + import deepdanbooru as dd + import tensorflow as tf + import numpy as np + + alpha_sort = deepbooru_opts['alpha_sort'] + use_spaces = deepbooru_opts['use_spaces'] + use_escape = deepbooru_opts['use_escape'] + include_ranks = deepbooru_opts['include_ranks'] + + width = model.input_shape[2] + height = model.input_shape[1] + image = np.array(pil_image) + image = tf.image.resize( + image, + size=(height, width), + method=tf.image.ResizeMethod.AREA, + preserve_aspect_ratio=True, + ) + image = image.numpy() # EagerTensor to np.array + image = dd.image.transform_and_pad_image(image, width, height) + image = image / 255.0 + image_shape = image.shape + image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2])) + + y = model.predict(image)[0] + + result_dict = {} + + for i, tag in enumerate(tags): + result_dict[tag] = y[i] + + unsorted_tags_in_theshold = [] + result_tags_print = [] + for tag in tags: + if result_dict[tag] >= threshold: + if tag.startswith("rating:"): + continue + unsorted_tags_in_theshold.append((result_dict[tag], tag)) + result_tags_print.append(f'{result_dict[tag]} {tag}') + + # sort tags + result_tags_out = [] + sort_ndx = 0 + if alpha_sort: + sort_ndx = 1 + + # sort by reverse by likelihood and normal for alpha, and format tag text as requested + unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort)) + for weight, tag in unsorted_tags_in_theshold: + # note: tag_outformat will still have a colon if include_ranks is True + tag_outformat = tag.replace(':', ' ') + if use_spaces: + tag_outformat = tag_outformat.replace('_', ' ') + if use_escape: + tag_outformat = re.sub(re_special, r'\\\1', tag_outformat) + if include_ranks: + tag_outformat = f"({tag_outformat}:{weight:.3f})" + + result_tags_out.append(tag_outformat) + + print('\n'.join(sorted(result_tags_print, reverse=True))) + + return ', '.join(result_tags_out) diff --git a/modules/devices.py b/modules/devices.py new file mode 100644 index 0000000000000000000000000000000000000000..03ef58f19457357faadfa0ccdeb001a58037b5a0 --- /dev/null +++ b/modules/devices.py @@ -0,0 +1,72 @@ +import contextlib + +import torch + +from modules import errors + +# has_mps is only available in nightly pytorch (for now), `getattr` for compatibility +has_mps = getattr(torch, 'has_mps', False) + +cpu = torch.device("cpu") + + +def get_optimal_device(): + if torch.cuda.is_available(): + return torch.device("cuda") + + if has_mps: + return torch.device("mps") + + return cpu + + +def torch_gc(): + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + + +def enable_tf32(): + if torch.cuda.is_available(): + torch.backends.cuda.matmul.allow_tf32 = True + torch.backends.cudnn.allow_tf32 = True + + +errors.run(enable_tf32, "Enabling TF32") + +device = device_gfpgan = device_bsrgan = device_esrgan = device_scunet = device_codeformer = get_optimal_device() +dtype = torch.float16 +dtype_vae = torch.float16 + +def randn(seed, shape): + # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used. + if device.type == 'mps': + generator = torch.Generator(device=cpu) + generator.manual_seed(seed) + noise = torch.randn(shape, generator=generator, device=cpu).to(device) + return noise + + torch.manual_seed(seed) + return torch.randn(shape, device=device) + + +def randn_without_seed(shape): + # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used. + if device.type == 'mps': + generator = torch.Generator(device=cpu) + noise = torch.randn(shape, generator=generator, device=cpu).to(device) + return noise + + return torch.randn(shape, device=device) + + +def autocast(disable=False): + from modules import shared + + if disable: + return contextlib.nullcontext() + + if dtype == torch.float32 or shared.cmd_opts.precision == "full": + return contextlib.nullcontext() + + return torch.autocast("cuda") diff --git a/modules/errors.py b/modules/errors.py new file mode 100644 index 0000000000000000000000000000000000000000..372dc51a0b95eca18d2474d333509b92eacbe92e --- /dev/null +++ b/modules/errors.py @@ -0,0 +1,10 @@ +import sys +import traceback + + +def run(code, task): + try: + code() + except Exception as e: + print(f"{task}: {type(e).__name__}", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) diff --git a/modules/esrgan_model.py b/modules/esrgan_model.py new file mode 100644 index 0000000000000000000000000000000000000000..46ad0da3caca92d214822eb40b572a45e4cbe8fb --- /dev/null +++ b/modules/esrgan_model.py @@ -0,0 +1,158 @@ +import os + +import numpy as np +import torch +from PIL import Image +from basicsr.utils.download_util import load_file_from_url + +import modules.esrgan_model_arch as arch +from modules import shared, modelloader, images, devices +from modules.upscaler import Upscaler, UpscalerData +from modules.shared import opts + + +def fix_model_layers(crt_model, pretrained_net): + # this code is adapted from https://github.com/xinntao/ESRGAN + if 'conv_first.weight' in pretrained_net: + return pretrained_net + + if 'model.0.weight' not in pretrained_net: + is_realesrgan = "params_ema" in pretrained_net and 'body.0.rdb1.conv1.weight' in pretrained_net["params_ema"] + if is_realesrgan: + raise Exception("The file is a RealESRGAN model, it can't be used as a ESRGAN model.") + else: + raise Exception("The file is not a ESRGAN model.") + + crt_net = crt_model.state_dict() + load_net_clean = {} + for k, v in pretrained_net.items(): + if k.startswith('module.'): + load_net_clean[k[7:]] = v + else: + load_net_clean[k] = v + pretrained_net = load_net_clean + + tbd = [] + for k, v in crt_net.items(): + tbd.append(k) + + # directly copy + for k, v in crt_net.items(): + if k in pretrained_net and pretrained_net[k].size() == v.size(): + crt_net[k] = pretrained_net[k] + tbd.remove(k) + + crt_net['conv_first.weight'] = pretrained_net['model.0.weight'] + crt_net['conv_first.bias'] = pretrained_net['model.0.bias'] + + for k in tbd.copy(): + if 'RDB' in k: + ori_k = k.replace('RRDB_trunk.', 'model.1.sub.') + if '.weight' in k: + ori_k = ori_k.replace('.weight', '.0.weight') + elif '.bias' in k: + ori_k = ori_k.replace('.bias', '.0.bias') + crt_net[k] = pretrained_net[ori_k] + tbd.remove(k) + + crt_net['trunk_conv.weight'] = pretrained_net['model.1.sub.23.weight'] + crt_net['trunk_conv.bias'] = pretrained_net['model.1.sub.23.bias'] + crt_net['upconv1.weight'] = pretrained_net['model.3.weight'] + crt_net['upconv1.bias'] = pretrained_net['model.3.bias'] + crt_net['upconv2.weight'] = pretrained_net['model.6.weight'] + crt_net['upconv2.bias'] = pretrained_net['model.6.bias'] + crt_net['HRconv.weight'] = pretrained_net['model.8.weight'] + crt_net['HRconv.bias'] = pretrained_net['model.8.bias'] + crt_net['conv_last.weight'] = pretrained_net['model.10.weight'] + crt_net['conv_last.bias'] = pretrained_net['model.10.bias'] + + return crt_net + +class UpscalerESRGAN(Upscaler): + def __init__(self, dirname): + self.name = "ESRGAN" + self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/ESRGAN.pth" + self.model_name = "ESRGAN_4x" + self.scalers = [] + self.user_path = dirname + super().__init__() + model_paths = self.find_models(ext_filter=[".pt", ".pth"]) + scalers = [] + if len(model_paths) == 0: + scaler_data = UpscalerData(self.model_name, self.model_url, self, 4) + scalers.append(scaler_data) + for file in model_paths: + if "http" in file: + name = self.model_name + else: + name = modelloader.friendly_name(file) + + scaler_data = UpscalerData(name, file, self, 4) + self.scalers.append(scaler_data) + + def do_upscale(self, img, selected_model): + model = self.load_model(selected_model) + if model is None: + return img + model.to(devices.device_esrgan) + img = esrgan_upscale(model, img) + return img + + def load_model(self, path: str): + if "http" in path: + filename = load_file_from_url(url=self.model_url, model_dir=self.model_path, + file_name="%s.pth" % self.model_name, + progress=True) + else: + filename = path + if not os.path.exists(filename) or filename is None: + print("Unable to load %s from %s" % (self.model_path, filename)) + return None + + pretrained_net = torch.load(filename, map_location='cpu' if devices.device_esrgan.type == 'mps' else None) + crt_model = arch.RRDBNet(3, 3, 64, 23, gc=32) + + pretrained_net = fix_model_layers(crt_model, pretrained_net) + crt_model.load_state_dict(pretrained_net) + crt_model.eval() + + return crt_model + + +def upscale_without_tiling(model, img): + img = np.array(img) + img = img[:, :, ::-1] + img = np.moveaxis(img, 2, 0) / 255 + img = torch.from_numpy(img).float() + img = img.unsqueeze(0).to(devices.device_esrgan) + with torch.no_grad(): + output = model(img) + output = output.squeeze().float().cpu().clamp_(0, 1).numpy() + output = 255. * np.moveaxis(output, 0, 2) + output = output.astype(np.uint8) + output = output[:, :, ::-1] + return Image.fromarray(output, 'RGB') + + +def esrgan_upscale(model, img): + if opts.ESRGAN_tile == 0: + return upscale_without_tiling(model, img) + + grid = images.split_grid(img, opts.ESRGAN_tile, opts.ESRGAN_tile, opts.ESRGAN_tile_overlap) + newtiles = [] + scale_factor = 1 + + for y, h, row in grid.tiles: + newrow = [] + for tiledata in row: + x, w, tile = tiledata + + output = upscale_without_tiling(model, tile) + scale_factor = output.width // tile.width + + newrow.append([x * scale_factor, w * scale_factor, output]) + newtiles.append([y * scale_factor, h * scale_factor, newrow]) + + newgrid = images.Grid(newtiles, grid.tile_w * scale_factor, grid.tile_h * scale_factor, grid.image_w * scale_factor, grid.image_h * scale_factor, grid.overlap * scale_factor) + output = images.combine_grid(newgrid) + return output diff --git a/modules/esrgan_model_arch.py b/modules/esrgan_model_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..e413d36ed0cdd15e38bbeb731e8f9bdd54df29e0 --- /dev/null +++ b/modules/esrgan_model_arch.py @@ -0,0 +1,80 @@ +# this file is taken from https://github.com/xinntao/ESRGAN + +import functools +import torch +import torch.nn as nn +import torch.nn.functional as F + + +def make_layer(block, n_layers): + layers = [] + for _ in range(n_layers): + layers.append(block()) + return nn.Sequential(*layers) + + +class ResidualDenseBlock_5C(nn.Module): + def __init__(self, nf=64, gc=32, bias=True): + super(ResidualDenseBlock_5C, self).__init__() + # gc: growth channel, i.e. intermediate channels + self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias) + self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias) + self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias) + self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias) + self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias) + self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) + + # initialization + # mutil.initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1) + + def forward(self, x): + x1 = self.lrelu(self.conv1(x)) + x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1))) + x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1))) + x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1))) + x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1)) + return x5 * 0.2 + x + + +class RRDB(nn.Module): + '''Residual in Residual Dense Block''' + + def __init__(self, nf, gc=32): + super(RRDB, self).__init__() + self.RDB1 = ResidualDenseBlock_5C(nf, gc) + self.RDB2 = ResidualDenseBlock_5C(nf, gc) + self.RDB3 = ResidualDenseBlock_5C(nf, gc) + + def forward(self, x): + out = self.RDB1(x) + out = self.RDB2(out) + out = self.RDB3(out) + return out * 0.2 + x + + +class RRDBNet(nn.Module): + def __init__(self, in_nc, out_nc, nf, nb, gc=32): + super(RRDBNet, self).__init__() + RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc) + + self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True) + self.RRDB_trunk = make_layer(RRDB_block_f, nb) + self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + #### upsampling + self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True) + + self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) + + def forward(self, x): + fea = self.conv_first(x) + trunk = self.trunk_conv(self.RRDB_trunk(fea)) + fea = fea + trunk + + fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest'))) + fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest'))) + out = self.conv_last(self.lrelu(self.HRconv(fea))) + + return out diff --git a/modules/extras.py b/modules/extras.py new file mode 100644 index 0000000000000000000000000000000000000000..b24d7de3f8a6f3b8f798efe7a9b1eb96c38ca9f1 --- /dev/null +++ b/modules/extras.py @@ -0,0 +1,222 @@ +import math +import os + +import numpy as np +from PIL import Image + +import torch +import tqdm + +from modules import processing, shared, images, devices, sd_models +from modules.shared import opts +import modules.gfpgan_model +from modules.ui import plaintext_to_html +import modules.codeformer_model +import piexif +import piexif.helper +import gradio as gr + + +cached_images = {} + + +def run_extras(extras_mode, resize_mode, image, image_folder, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility): + devices.torch_gc() + + imageArr = [] + # Also keep track of original file names + imageNameArr = [] + + if extras_mode == 1: + #convert file to pillow image + for img in image_folder: + image = Image.open(img) + imageArr.append(image) + imageNameArr.append(os.path.splitext(img.orig_name)[0]) + else: + imageArr.append(image) + imageNameArr.append(None) + + outpath = opts.outdir_samples or opts.outdir_extras_samples + + outputs = [] + for image, image_name in zip(imageArr, imageNameArr): + if image is None: + return outputs, "Please select an input image.", '' + existing_pnginfo = image.info or {} + + image = image.convert("RGB") + info = "" + + if gfpgan_visibility > 0: + restored_img = modules.gfpgan_model.gfpgan_fix_faces(np.array(image, dtype=np.uint8)) + res = Image.fromarray(restored_img) + + if gfpgan_visibility < 1.0: + res = Image.blend(image, res, gfpgan_visibility) + + info += f"GFPGAN visibility:{round(gfpgan_visibility, 2)}\n" + image = res + + if codeformer_visibility > 0: + restored_img = modules.codeformer_model.codeformer.restore(np.array(image, dtype=np.uint8), w=codeformer_weight) + res = Image.fromarray(restored_img) + + if codeformer_visibility < 1.0: + res = Image.blend(image, res, codeformer_visibility) + + info += f"CodeFormer w: {round(codeformer_weight, 2)}, CodeFormer visibility:{round(codeformer_visibility, 2)}\n" + image = res + + if resize_mode == 1: + upscaling_resize = max(upscaling_resize_w/image.width, upscaling_resize_h/image.height) + crop_info = " (crop)" if upscaling_crop else "" + info += f"Resize to: {upscaling_resize_w:g}x{upscaling_resize_h:g}{crop_info}\n" + + if upscaling_resize != 1.0: + def upscale(image, scaler_index, resize, mode, resize_w, resize_h, crop): + small = image.crop((image.width // 2, image.height // 2, image.width // 2 + 10, image.height // 2 + 10)) + pixels = tuple(np.array(small).flatten().tolist()) + key = (resize, scaler_index, image.width, image.height, gfpgan_visibility, codeformer_visibility, codeformer_weight) + pixels + + c = cached_images.get(key) + if c is None: + upscaler = shared.sd_upscalers[scaler_index] + c = upscaler.scaler.upscale(image, resize, upscaler.data_path) + if mode == 1 and crop: + cropped = Image.new("RGB", (resize_w, resize_h)) + cropped.paste(c, box=(resize_w // 2 - c.width // 2, resize_h // 2 - c.height // 2)) + c = cropped + cached_images[key] = c + + return c + + info += f"Upscale: {round(upscaling_resize, 3)}, model:{shared.sd_upscalers[extras_upscaler_1].name}\n" + res = upscale(image, extras_upscaler_1, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop) + + if extras_upscaler_2 != 0 and extras_upscaler_2_visibility > 0: + res2 = upscale(image, extras_upscaler_2, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop) + info += f"Upscale: {round(upscaling_resize, 3)}, visibility: {round(extras_upscaler_2_visibility, 3)}, model:{shared.sd_upscalers[extras_upscaler_2].name}\n" + res = Image.blend(res, res2, extras_upscaler_2_visibility) + + image = res + + while len(cached_images) > 2: + del cached_images[next(iter(cached_images.keys()))] + + images.save_image(image, path=outpath, basename="", seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True, + no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo, + forced_filename=image_name if opts.use_original_name_batch else None) + + if opts.enable_pnginfo: + image.info = existing_pnginfo + image.info["extras"] = info + + outputs.append(image) + + devices.torch_gc() + + return outputs, plaintext_to_html(info), '' + + +def run_pnginfo(image): + if image is None: + return '', '', '' + + items = image.info + geninfo = '' + + if "exif" in image.info: + exif = piexif.load(image.info["exif"]) + exif_comment = (exif or {}).get("Exif", {}).get(piexif.ExifIFD.UserComment, b'') + try: + exif_comment = piexif.helper.UserComment.load(exif_comment) + except ValueError: + exif_comment = exif_comment.decode('utf8', errors="ignore") + + items['exif comment'] = exif_comment + geninfo = exif_comment + + for field in ['jfif', 'jfif_version', 'jfif_unit', 'jfif_density', 'dpi', 'exif', + 'loop', 'background', 'timestamp', 'duration']: + items.pop(field, None) + + geninfo = items.get('parameters', geninfo) + + info = '' + for key, text in items.items(): + info += f""" +
+

{plaintext_to_html(str(key))}

+

{plaintext_to_html(str(text))}

+
+""".strip()+"\n" + + if len(info) == 0: + message = "Nothing found in the image." + info = f"

{message}

" + + return '', geninfo, info + + +def run_modelmerger(primary_model_name, secondary_model_name, interp_method, interp_amount, save_as_half, custom_name): + # Linear interpolation (https://en.wikipedia.org/wiki/Linear_interpolation) + def weighted_sum(theta0, theta1, alpha): + return ((1 - alpha) * theta0) + (alpha * theta1) + + # Smoothstep (https://en.wikipedia.org/wiki/Smoothstep) + def sigmoid(theta0, theta1, alpha): + alpha = alpha * alpha * (3 - (2 * alpha)) + return theta0 + ((theta1 - theta0) * alpha) + + # Inverse Smoothstep (https://en.wikipedia.org/wiki/Smoothstep) + def inv_sigmoid(theta0, theta1, alpha): + import math + alpha = 0.5 - math.sin(math.asin(1.0 - 2.0 * alpha) / 3.0) + return theta0 + ((theta1 - theta0) * alpha) + + primary_model_info = sd_models.checkpoints_list[primary_model_name] + secondary_model_info = sd_models.checkpoints_list[secondary_model_name] + + print(f"Loading {primary_model_info.filename}...") + primary_model = torch.load(primary_model_info.filename, map_location='cpu') + + print(f"Loading {secondary_model_info.filename}...") + secondary_model = torch.load(secondary_model_info.filename, map_location='cpu') + + theta_0 = sd_models.get_state_dict_from_checkpoint(primary_model) + theta_1 = sd_models.get_state_dict_from_checkpoint(secondary_model) + + theta_funcs = { + "Weighted Sum": weighted_sum, + "Sigmoid": sigmoid, + "Inverse Sigmoid": inv_sigmoid, + } + theta_func = theta_funcs[interp_method] + + print(f"Merging...") + for key in tqdm.tqdm(theta_0.keys()): + if 'model' in key and key in theta_1: + theta_0[key] = theta_func(theta_0[key], theta_1[key], (float(1.0) - interp_amount)) # Need to reverse the interp_amount to match the desired mix ration in the merged checkpoint + if save_as_half: + theta_0[key] = theta_0[key].half() + + for key in theta_1.keys(): + if 'model' in key and key not in theta_0: + theta_0[key] = theta_1[key] + if save_as_half: + theta_0[key] = theta_0[key].half() + + ckpt_dir = shared.cmd_opts.ckpt_dir or sd_models.model_path + + filename = primary_model_info.model_name + '_' + str(round(interp_amount, 2)) + '-' + secondary_model_info.model_name + '_' + str(round((float(1.0) - interp_amount), 2)) + '-' + interp_method.replace(" ", "_") + '-merged.ckpt' + filename = filename if custom_name == '' else (custom_name + '.ckpt') + output_modelname = os.path.join(ckpt_dir, filename) + + print(f"Saving to {output_modelname}...") + torch.save(primary_model, output_modelname) + + sd_models.list_models() + + print(f"Checkpoint saved.") + return ["Checkpoint saved to " + output_modelname] + [gr.Dropdown.update(choices=sd_models.checkpoint_tiles()) for _ in range(3)] diff --git a/modules/face_restoration.py b/modules/face_restoration.py new file mode 100644 index 0000000000000000000000000000000000000000..4ae53d21bef3e0783481d7c3cf3a9b2fedc4c092 --- /dev/null +++ b/modules/face_restoration.py @@ -0,0 +1,19 @@ +from modules import shared + + +class FaceRestoration: + def name(self): + return "None" + + def restore(self, np_image): + return np_image + + +def restore_faces(np_image): + face_restorers = [x for x in shared.face_restorers if x.name() == shared.opts.face_restoration_model or shared.opts.face_restoration_model is None] + if len(face_restorers) == 0: + return np_image + + face_restorer = face_restorers[0] + + return face_restorer.restore(np_image) diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py new file mode 100644 index 0000000000000000000000000000000000000000..c27826b6b5a3d2dccfb277c649500becc6632462 --- /dev/null +++ b/modules/generation_parameters_copypaste.py @@ -0,0 +1,101 @@ +import os +import re +import gradio as gr +from modules.shared import script_path +from modules import shared + +re_param_code = r"\s*([\w ]+):\s*([^,]+)(?:,|$)" +re_param = re.compile(re_param_code) +re_params = re.compile(r"^(?:" + re_param_code + "){3,}$") +re_imagesize = re.compile(r"^(\d+)x(\d+)$") +type_of_gr_update = type(gr.update()) + + +def parse_generation_parameters(x: str): + """parses generation parameters string, the one you see in text field under the picture in UI: +``` +girl with an artist's beret, determined, blue eyes, desert scene, computer monitors, heavy makeup, by Alphonse Mucha and Charlie Bowater, ((eyeshadow)), (coquettish), detailed, intricate +Negative prompt: ugly, fat, obese, chubby, (((deformed))), [blurry], bad anatomy, disfigured, poorly drawn face, mutation, mutated, (extra_limb), (ugly), (poorly drawn hands), messy drawing +Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model hash: 45dee52b +``` + + returns a dict with field values + """ + + res = {} + + prompt = "" + negative_prompt = "" + + done_with_prompt = False + + *lines, lastline = x.strip().split("\n") + if not re_params.match(lastline): + lines.append(lastline) + lastline = '' + + for i, line in enumerate(lines): + line = line.strip() + if line.startswith("Negative prompt:"): + done_with_prompt = True + line = line[16:].strip() + + if done_with_prompt: + negative_prompt += ("" if negative_prompt == "" else "\n") + line + else: + prompt += ("" if prompt == "" else "\n") + line + + if len(prompt) > 0: + res["Prompt"] = prompt + + if len(negative_prompt) > 0: + res["Negative prompt"] = negative_prompt + + for k, v in re_param.findall(lastline): + m = re_imagesize.match(v) + if m is not None: + res[k+"-1"] = m.group(1) + res[k+"-2"] = m.group(2) + else: + res[k] = v + + return res + + +def connect_paste(button, paste_fields, input_comp, js=None): + def paste_func(prompt): + if not prompt and not shared.cmd_opts.hide_ui_dir_config: + filename = os.path.join(script_path, "params.txt") + if os.path.exists(filename): + with open(filename, "r", encoding="utf8") as file: + prompt = file.read() + + params = parse_generation_parameters(prompt) + res = [] + + for output, key in paste_fields: + if callable(key): + v = key(params) + else: + v = params.get(key, None) + + if v is None: + res.append(gr.update()) + elif isinstance(v, type_of_gr_update): + res.append(v) + else: + try: + valtype = type(output.value) + val = valtype(v) + res.append(gr.update(value=val)) + except Exception: + res.append(gr.update()) + + return res + + button.click( + fn=paste_func, + _js=js, + inputs=[input_comp], + outputs=[x[0] for x in paste_fields], + ) diff --git a/modules/gfpgan_model.py b/modules/gfpgan_model.py new file mode 100644 index 0000000000000000000000000000000000000000..a9452dce5e83b0f280ca319e4e319d71dc222fe2 --- /dev/null +++ b/modules/gfpgan_model.py @@ -0,0 +1,115 @@ +import os +import sys +import traceback + +import facexlib +import gfpgan + +import modules.face_restoration +from modules import shared, devices, modelloader +from modules.paths import models_path + +model_dir = "GFPGAN" +user_path = None +model_path = os.path.join(models_path, model_dir) +model_url = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth" +have_gfpgan = False +loaded_gfpgan_model = None + + +def gfpgann(): + global loaded_gfpgan_model + global model_path + if loaded_gfpgan_model is not None: + loaded_gfpgan_model.gfpgan.to(devices.device_gfpgan) + return loaded_gfpgan_model + + if gfpgan_constructor is None: + return None + + models = modelloader.load_models(model_path, model_url, user_path, ext_filter="GFPGAN") + if len(models) == 1 and "http" in models[0]: + model_file = models[0] + elif len(models) != 0: + latest_file = max(models, key=os.path.getctime) + model_file = latest_file + else: + print("Unable to load gfpgan model!") + return None + model = gfpgan_constructor(model_path=model_file, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None) + loaded_gfpgan_model = model + + return model + + +def send_model_to(model, device): + model.gfpgan.to(device) + model.face_helper.face_det.to(device) + model.face_helper.face_parse.to(device) + + +def gfpgan_fix_faces(np_image): + model = gfpgann() + if model is None: + return np_image + + send_model_to(model, devices.device_gfpgan) + + np_image_bgr = np_image[:, :, ::-1] + cropped_faces, restored_faces, gfpgan_output_bgr = model.enhance(np_image_bgr, has_aligned=False, only_center_face=False, paste_back=True) + np_image = gfpgan_output_bgr[:, :, ::-1] + + model.face_helper.clean_all() + + if shared.opts.face_restoration_unload: + send_model_to(model, devices.cpu) + + return np_image + + +gfpgan_constructor = None + + +def setup_model(dirname): + global model_path + if not os.path.exists(model_path): + os.makedirs(model_path) + + try: + from gfpgan import GFPGANer + from facexlib import detection, parsing + global user_path + global have_gfpgan + global gfpgan_constructor + + load_file_from_url_orig = gfpgan.utils.load_file_from_url + facex_load_file_from_url_orig = facexlib.detection.load_file_from_url + facex_load_file_from_url_orig2 = facexlib.parsing.load_file_from_url + + def my_load_file_from_url(**kwargs): + return load_file_from_url_orig(**dict(kwargs, model_dir=model_path)) + + def facex_load_file_from_url(**kwargs): + return facex_load_file_from_url_orig(**dict(kwargs, save_dir=model_path, model_dir=None)) + + def facex_load_file_from_url2(**kwargs): + return facex_load_file_from_url_orig2(**dict(kwargs, save_dir=model_path, model_dir=None)) + + gfpgan.utils.load_file_from_url = my_load_file_from_url + facexlib.detection.load_file_from_url = facex_load_file_from_url + facexlib.parsing.load_file_from_url = facex_load_file_from_url2 + user_path = dirname + have_gfpgan = True + gfpgan_constructor = GFPGANer + + class FaceRestorerGFPGAN(modules.face_restoration.FaceRestoration): + def name(self): + return "GFPGAN" + + def restore(self, np_image): + return gfpgan_fix_faces(np_image) + + shared.face_restorers.append(FaceRestorerGFPGAN()) + except Exception: + print("Error setting up GFPGAN:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py new file mode 100644 index 0000000000000000000000000000000000000000..f1248bb7b5a377b86f571fdaed8deaf86745d62f --- /dev/null +++ b/modules/hypernetworks/hypernetwork.py @@ -0,0 +1,314 @@ +import datetime +import glob +import html +import os +import sys +import traceback +import tqdm + +import torch + +from ldm.util import default +from modules import devices, shared, processing, sd_models +import torch +from torch import einsum +from einops import rearrange, repeat +import modules.textual_inversion.dataset +from modules.textual_inversion.learn_schedule import LearnRateScheduler + + +class HypernetworkModule(torch.nn.Module): + multiplier = 1.0 + + def __init__(self, dim, state_dict=None): + super().__init__() + + self.linear1 = torch.nn.Linear(dim, dim * 2) + self.linear2 = torch.nn.Linear(dim * 2, dim) + + if state_dict is not None: + self.load_state_dict(state_dict, strict=True) + else: + + self.linear1.weight.data.normal_(mean=0.0, std=0.01) + self.linear1.bias.data.zero_() + self.linear2.weight.data.normal_(mean=0.0, std=0.01) + self.linear2.bias.data.zero_() + + self.to(devices.device) + + def forward(self, x): + return x + (self.linear2(self.linear1(x))) * self.multiplier + + +def apply_strength(value=None): + HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength + + +class Hypernetwork: + filename = None + name = None + + def __init__(self, name=None, enable_sizes=None): + self.filename = None + self.name = name + self.layers = {} + self.step = 0 + self.sd_checkpoint = None + self.sd_checkpoint_name = None + + for size in enable_sizes or []: + self.layers[size] = (HypernetworkModule(size), HypernetworkModule(size)) + + def weights(self): + res = [] + + for k, layers in self.layers.items(): + for layer in layers: + layer.train() + res += [layer.linear1.weight, layer.linear1.bias, layer.linear2.weight, layer.linear2.bias] + + return res + + def save(self, filename): + state_dict = {} + + for k, v in self.layers.items(): + state_dict[k] = (v[0].state_dict(), v[1].state_dict()) + + state_dict['step'] = self.step + state_dict['name'] = self.name + state_dict['sd_checkpoint'] = self.sd_checkpoint + state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name + + torch.save(state_dict, filename) + + def load(self, filename): + self.filename = filename + if self.name is None: + self.name = os.path.splitext(os.path.basename(filename))[0] + + state_dict = torch.load(filename, map_location='cpu') + + for size, sd in state_dict.items(): + if type(size) == int: + self.layers[size] = (HypernetworkModule(size, sd[0]), HypernetworkModule(size, sd[1])) + + self.name = state_dict.get('name', self.name) + self.step = state_dict.get('step', 0) + self.sd_checkpoint = state_dict.get('sd_checkpoint', None) + self.sd_checkpoint_name = state_dict.get('sd_checkpoint_name', None) + + +def list_hypernetworks(path): + res = {} + for filename in glob.iglob(os.path.join(path, '**/*.pt'), recursive=True): + name = os.path.splitext(os.path.basename(filename))[0] + res[name] = filename + return res + + +def load_hypernetwork(filename): + path = shared.hypernetworks.get(filename, None) + if path is not None: + print(f"Loading hypernetwork {filename}") + try: + shared.loaded_hypernetwork = Hypernetwork() + shared.loaded_hypernetwork.load(path) + + except Exception: + print(f"Error loading hypernetwork {path}", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + else: + if shared.loaded_hypernetwork is not None: + print(f"Unloading hypernetwork") + + shared.loaded_hypernetwork = None + + +def find_closest_hypernetwork_name(search: str): + if not search: + return None + search = search.lower() + applicable = [name for name in shared.hypernetworks if search in name.lower()] + if not applicable: + return None + applicable = sorted(applicable, key=lambda name: len(name)) + return applicable[0] + + +def apply_hypernetwork(hypernetwork, context, layer=None): + hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context.shape[2], None) + + if hypernetwork_layers is None: + return context, context + + if layer is not None: + layer.hyper_k = hypernetwork_layers[0] + layer.hyper_v = hypernetwork_layers[1] + + context_k = hypernetwork_layers[0](context) + context_v = hypernetwork_layers[1](context) + return context_k, context_v + + +def attention_CrossAttention_forward(self, x, context=None, mask=None): + h = self.heads + + q = self.to_q(x) + context = default(context, x) + + context_k, context_v = apply_hypernetwork(shared.loaded_hypernetwork, context, self) + k = self.to_k(context_k) + v = self.to_v(context_v) + + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v)) + + sim = einsum('b i d, b j d -> b i j', q, k) * self.scale + + if mask is not None: + mask = rearrange(mask, 'b ... -> b (...)') + max_neg_value = -torch.finfo(sim.dtype).max + mask = repeat(mask, 'b j -> (b h) () j', h=h) + sim.masked_fill_(~mask, max_neg_value) + + # attention, what we cannot get enough of + attn = sim.softmax(dim=-1) + + out = einsum('b i j, b j d -> b i d', attn, v) + out = rearrange(out, '(b h) n d -> b n (h d)', h=h) + return self.to_out(out) + + +def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, steps, create_image_every, save_hypernetwork_every, template_file, preview_image_prompt): + assert hypernetwork_name, 'hypernetwork not selected' + + path = shared.hypernetworks.get(hypernetwork_name, None) + shared.loaded_hypernetwork = Hypernetwork() + shared.loaded_hypernetwork.load(path) + + shared.state.textinfo = "Initializing hypernetwork training..." + shared.state.job_count = steps + + filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt') + + log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%m-%d"), hypernetwork_name) + unload = shared.opts.unload_models_when_training + + if save_hypernetwork_every > 0: + hypernetwork_dir = os.path.join(log_directory, "hypernetworks") + os.makedirs(hypernetwork_dir, exist_ok=True) + else: + hypernetwork_dir = None + + if create_image_every > 0: + images_dir = os.path.join(log_directory, "images") + os.makedirs(images_dir, exist_ok=True) + else: + images_dir = None + + shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." + with torch.autocast("cuda"): + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=512, height=512, repeats=1, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True) + + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) + + hypernetwork = shared.loaded_hypernetwork + weights = hypernetwork.weights() + for weight in weights: + weight.requires_grad = True + + losses = torch.zeros((32,)) + + last_saved_file = "" + last_saved_image = "" + + ititial_step = hypernetwork.step or 0 + if ititial_step > steps: + return hypernetwork, filename + + scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) + optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) + + pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) + for i, entry in pbar: + hypernetwork.step = i + ititial_step + + scheduler.apply(optimizer, hypernetwork.step) + if scheduler.finished: + break + + if shared.state.interrupted: + break + + with torch.autocast("cuda"): + cond = entry.cond.to(devices.device) + x = entry.latent.to(devices.device) + loss = shared.sd_model(x.unsqueeze(0), cond)[0] + del x + del cond + + losses[hypernetwork.step % losses.shape[0]] = loss.item() + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + pbar.set_description(f"loss: {losses.mean():.7f}") + + if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0: + last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name}-{hypernetwork.step}.pt') + hypernetwork.save(last_saved_file) + + if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: + last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png') + + preview_text = entry.cond_text if preview_image_prompt == "" else preview_image_prompt + + optimizer.zero_grad() + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + + p = processing.StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + prompt=preview_text, + steps=20, + do_not_save_grid=True, + do_not_save_samples=True, + ) + + processed = processing.process_images(p) + image = processed.images[0] if len(processed.images)>0 else None + + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) + + if image is not None: + shared.state.current_image = image + image.save(last_saved_image) + last_saved_image += f", prompt: {preview_text}" + + shared.state.job_no = hypernetwork.step + + shared.state.textinfo = f""" +

+Loss: {losses.mean():.7f}
+Step: {hypernetwork.step}
+Last prompt: {html.escape(entry.cond_text)}
+Last saved embedding: {html.escape(last_saved_file)}
+Last saved image: {html.escape(last_saved_image)}
+

+""" + + checkpoint = sd_models.select_checkpoint() + + hypernetwork.sd_checkpoint = checkpoint.hash + hypernetwork.sd_checkpoint_name = checkpoint.model_name + hypernetwork.save(filename) + + return hypernetwork, filename + + diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py new file mode 100644 index 0000000000000000000000000000000000000000..dfa599afa42b705b0e80385b1f27925ab9794c5a --- /dev/null +++ b/modules/hypernetworks/ui.py @@ -0,0 +1,47 @@ +import html +import os + +import gradio as gr + +import modules.textual_inversion.textual_inversion +import modules.textual_inversion.preprocess +from modules import sd_hijack, shared, devices +from modules.hypernetworks import hypernetwork + + +def create_hypernetwork(name, enable_sizes): + fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") + assert not os.path.exists(fn), f"file {fn} already exists" + + hypernet = modules.hypernetworks.hypernetwork.Hypernetwork(name=name, enable_sizes=[int(x) for x in enable_sizes]) + hypernet.save(fn) + + shared.reload_hypernetworks() + + return gr.Dropdown.update(choices=sorted([x for x in shared.hypernetworks.keys()])), f"Created: {fn}", "" + + +def train_hypernetwork(*args): + + initial_hypernetwork = shared.loaded_hypernetwork + + assert not shared.cmd_opts.lowvram, 'Training models with lowvram is not possible' + + try: + sd_hijack.undo_optimizations() + + hypernetwork, filename = modules.hypernetworks.hypernetwork.train_hypernetwork(*args) + + res = f""" +Training {'interrupted' if shared.state.interrupted else 'finished'} at {hypernetwork.step} steps. +Hypernetwork saved to {html.escape(filename)} +""" + return res, "" + except Exception: + raise + finally: + shared.loaded_hypernetwork = initial_hypernetwork + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + sd_hijack.apply_optimizations() + diff --git a/modules/images.py b/modules/images.py new file mode 100644 index 0000000000000000000000000000000000000000..c0a906762eace7bad990005c4954b2f657ad1be3 --- /dev/null +++ b/modules/images.py @@ -0,0 +1,465 @@ +import datetime +import math +import os +from collections import namedtuple +import re + +import numpy as np +import piexif +import piexif.helper +from PIL import Image, ImageFont, ImageDraw, PngImagePlugin +from fonts.ttf import Roboto +import string + +from modules import sd_samplers, shared +from modules.shared import opts, cmd_opts + +LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS) + + +def image_grid(imgs, batch_size=1, rows=None): + if rows is None: + if opts.n_rows > 0: + rows = opts.n_rows + elif opts.n_rows == 0: + rows = batch_size + else: + rows = math.sqrt(len(imgs)) + rows = round(rows) + + cols = math.ceil(len(imgs) / rows) + + w, h = imgs[0].size + grid = Image.new('RGB', size=(cols * w, rows * h), color='black') + + for i, img in enumerate(imgs): + grid.paste(img, box=(i % cols * w, i // cols * h)) + + return grid + + +Grid = namedtuple("Grid", ["tiles", "tile_w", "tile_h", "image_w", "image_h", "overlap"]) + + +def split_grid(image, tile_w=512, tile_h=512, overlap=64): + w = image.width + h = image.height + + non_overlap_width = tile_w - overlap + non_overlap_height = tile_h - overlap + + cols = math.ceil((w - overlap) / non_overlap_width) + rows = math.ceil((h - overlap) / non_overlap_height) + + dx = (w - tile_w) / (cols - 1) if cols > 1 else 0 + dy = (h - tile_h) / (rows - 1) if rows > 1 else 0 + + grid = Grid([], tile_w, tile_h, w, h, overlap) + for row in range(rows): + row_images = [] + + y = int(row * dy) + + if y + tile_h >= h: + y = h - tile_h + + for col in range(cols): + x = int(col * dx) + + if x + tile_w >= w: + x = w - tile_w + + tile = image.crop((x, y, x + tile_w, y + tile_h)) + + row_images.append([x, tile_w, tile]) + + grid.tiles.append([y, tile_h, row_images]) + + return grid + + +def combine_grid(grid): + def make_mask_image(r): + r = r * 255 / grid.overlap + r = r.astype(np.uint8) + return Image.fromarray(r, 'L') + + mask_w = make_mask_image(np.arange(grid.overlap, dtype=np.float32).reshape((1, grid.overlap)).repeat(grid.tile_h, axis=0)) + mask_h = make_mask_image(np.arange(grid.overlap, dtype=np.float32).reshape((grid.overlap, 1)).repeat(grid.image_w, axis=1)) + + combined_image = Image.new("RGB", (grid.image_w, grid.image_h)) + for y, h, row in grid.tiles: + combined_row = Image.new("RGB", (grid.image_w, h)) + for x, w, tile in row: + if x == 0: + combined_row.paste(tile, (0, 0)) + continue + + combined_row.paste(tile.crop((0, 0, grid.overlap, h)), (x, 0), mask=mask_w) + combined_row.paste(tile.crop((grid.overlap, 0, w, h)), (x + grid.overlap, 0)) + + if y == 0: + combined_image.paste(combined_row, (0, 0)) + continue + + combined_image.paste(combined_row.crop((0, 0, combined_row.width, grid.overlap)), (0, y), mask=mask_h) + combined_image.paste(combined_row.crop((0, grid.overlap, combined_row.width, h)), (0, y + grid.overlap)) + + return combined_image + + +class GridAnnotation: + def __init__(self, text='', is_active=True): + self.text = text + self.is_active = is_active + self.size = None + + +def draw_grid_annotations(im, width, height, hor_texts, ver_texts): + def wrap(drawing, text, font, line_length): + lines = [''] + for word in text.split(): + line = f'{lines[-1]} {word}'.strip() + if drawing.textlength(line, font=font) <= line_length: + lines[-1] = line + else: + lines.append(word) + return lines + + def draw_texts(drawing, draw_x, draw_y, lines): + for i, line in enumerate(lines): + drawing.multiline_text((draw_x, draw_y + line.size[1] / 2), line.text, font=fnt, fill=color_active if line.is_active else color_inactive, anchor="mm", align="center") + + if not line.is_active: + drawing.line((draw_x - line.size[0] // 2, draw_y + line.size[1] // 2, draw_x + line.size[0] // 2, draw_y + line.size[1] // 2), fill=color_inactive, width=4) + + draw_y += line.size[1] + line_spacing + + fontsize = (width + height) // 25 + line_spacing = fontsize // 2 + + try: + fnt = ImageFont.truetype(opts.font or Roboto, fontsize) + except Exception: + fnt = ImageFont.truetype(Roboto, fontsize) + + color_active = (0, 0, 0) + color_inactive = (153, 153, 153) + + pad_left = 0 if sum([sum([len(line.text) for line in lines]) for lines in ver_texts]) == 0 else width * 3 // 4 + + cols = im.width // width + rows = im.height // height + + assert cols == len(hor_texts), f'bad number of horizontal texts: {len(hor_texts)}; must be {cols}' + assert rows == len(ver_texts), f'bad number of vertical texts: {len(ver_texts)}; must be {rows}' + + calc_img = Image.new("RGB", (1, 1), "white") + calc_d = ImageDraw.Draw(calc_img) + + for texts, allowed_width in zip(hor_texts + ver_texts, [width] * len(hor_texts) + [pad_left] * len(ver_texts)): + items = [] + texts + texts.clear() + + for line in items: + wrapped = wrap(calc_d, line.text, fnt, allowed_width) + texts += [GridAnnotation(x, line.is_active) for x in wrapped] + + for line in texts: + bbox = calc_d.multiline_textbbox((0, 0), line.text, font=fnt) + line.size = (bbox[2] - bbox[0], bbox[3] - bbox[1]) + + hor_text_heights = [sum([line.size[1] + line_spacing for line in lines]) - line_spacing for lines in hor_texts] + ver_text_heights = [sum([line.size[1] + line_spacing for line in lines]) - line_spacing * len(lines) for lines in + ver_texts] + + pad_top = max(hor_text_heights) + line_spacing * 2 + + result = Image.new("RGB", (im.width + pad_left, im.height + pad_top), "white") + result.paste(im, (pad_left, pad_top)) + + d = ImageDraw.Draw(result) + + for col in range(cols): + x = pad_left + width * col + width / 2 + y = pad_top / 2 - hor_text_heights[col] / 2 + + draw_texts(d, x, y, hor_texts[col]) + + for row in range(rows): + x = pad_left / 2 + y = pad_top + height * row + height / 2 - ver_text_heights[row] / 2 + + draw_texts(d, x, y, ver_texts[row]) + + return result + + +def draw_prompt_matrix(im, width, height, all_prompts): + prompts = all_prompts[1:] + boundary = math.ceil(len(prompts) / 2) + + prompts_horiz = prompts[:boundary] + prompts_vert = prompts[boundary:] + + hor_texts = [[GridAnnotation(x, is_active=pos & (1 << i) != 0) for i, x in enumerate(prompts_horiz)] for pos in range(1 << len(prompts_horiz))] + ver_texts = [[GridAnnotation(x, is_active=pos & (1 << i) != 0) for i, x in enumerate(prompts_vert)] for pos in range(1 << len(prompts_vert))] + + return draw_grid_annotations(im, width, height, hor_texts, ver_texts) + + +def resize_image(resize_mode, im, width, height): + def resize(im, w, h): + if opts.upscaler_for_img2img is None or opts.upscaler_for_img2img == "None" or im.mode == 'L': + return im.resize((w, h), resample=LANCZOS) + + scale = max(w / im.width, h / im.height) + + if scale > 1.0: + upscalers = [x for x in shared.sd_upscalers if x.name == opts.upscaler_for_img2img] + assert len(upscalers) > 0, f"could not find upscaler named {opts.upscaler_for_img2img}" + + upscaler = upscalers[0] + im = upscaler.scaler.upscale(im, scale, upscaler.data_path) + + if im.width != w or im.height != h: + im = im.resize((w, h), resample=LANCZOS) + + return im + + if resize_mode == 0: + res = resize(im, width, height) + + elif resize_mode == 1: + ratio = width / height + src_ratio = im.width / im.height + + src_w = width if ratio > src_ratio else im.width * height // im.height + src_h = height if ratio <= src_ratio else im.height * width // im.width + + resized = resize(im, src_w, src_h) + res = Image.new("RGB", (width, height)) + res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2)) + + else: + ratio = width / height + src_ratio = im.width / im.height + + src_w = width if ratio < src_ratio else im.width * height // im.height + src_h = height if ratio >= src_ratio else im.height * width // im.width + + resized = resize(im, src_w, src_h) + res = Image.new("RGB", (width, height)) + res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2)) + + if ratio < src_ratio: + fill_height = height // 2 - src_h // 2 + res.paste(resized.resize((width, fill_height), box=(0, 0, width, 0)), box=(0, 0)) + res.paste(resized.resize((width, fill_height), box=(0, resized.height, width, resized.height)), box=(0, fill_height + src_h)) + elif ratio > src_ratio: + fill_width = width // 2 - src_w // 2 + res.paste(resized.resize((fill_width, height), box=(0, 0, 0, height)), box=(0, 0)) + res.paste(resized.resize((fill_width, height), box=(resized.width, 0, resized.width, height)), box=(fill_width + src_w, 0)) + + return res + + +invalid_filename_chars = '<>:"/\\|?*\n' +invalid_filename_prefix = ' ' +invalid_filename_postfix = ' .' +re_nonletters = re.compile(r'[\s' + string.punctuation + ']+') +max_filename_part_length = 128 + + +def sanitize_filename_part(text, replace_spaces=True): + if replace_spaces: + text = text.replace(' ', '_') + + text = text.translate({ord(x): '_' for x in invalid_filename_chars}) + text = text.lstrip(invalid_filename_prefix)[:max_filename_part_length] + text = text.rstrip(invalid_filename_postfix) + return text + + +def apply_filename_pattern(x, p, seed, prompt): + max_prompt_words = opts.directories_max_prompt_words + + if seed is not None: + x = x.replace("[seed]", str(seed)) + + if p is not None: + x = x.replace("[steps]", str(p.steps)) + x = x.replace("[cfg]", str(p.cfg_scale)) + x = x.replace("[width]", str(p.width)) + x = x.replace("[height]", str(p.height)) + x = x.replace("[styles]", sanitize_filename_part(", ".join([x for x in p.styles if not x == "None"]) or "None", replace_spaces=False)) + x = x.replace("[sampler]", sanitize_filename_part(sd_samplers.samplers[p.sampler_index].name, replace_spaces=False)) + + x = x.replace("[model_hash]", getattr(p, "sd_model_hash", shared.sd_model.sd_model_hash)) + x = x.replace("[date]", datetime.date.today().isoformat()) + x = x.replace("[datetime]", datetime.datetime.now().strftime("%Y%m%d%H%M%S")) + x = x.replace("[job_timestamp]", getattr(p, "job_timestamp", shared.state.job_timestamp)) + + # Apply [prompt] at last. Because it may contain any replacement word.^M + if prompt is not None: + x = x.replace("[prompt]", sanitize_filename_part(prompt)) + if "[prompt_no_styles]" in x: + prompt_no_style = prompt + for style in shared.prompt_styles.get_style_prompts(p.styles): + if len(style) > 0: + style_parts = [y for y in style.split("{prompt}")] + for part in style_parts: + prompt_no_style = prompt_no_style.replace(part, "").replace(", ,", ",").strip().strip(',') + prompt_no_style = prompt_no_style.replace(style, "").strip().strip(',').strip() + x = x.replace("[prompt_no_styles]", sanitize_filename_part(prompt_no_style, replace_spaces=False)) + + x = x.replace("[prompt_spaces]", sanitize_filename_part(prompt, replace_spaces=False)) + if "[prompt_words]" in x: + words = [x for x in re_nonletters.split(prompt or "") if len(x) > 0] + if len(words) == 0: + words = ["empty"] + x = x.replace("[prompt_words]", sanitize_filename_part(" ".join(words[0:max_prompt_words]), replace_spaces=False)) + + if cmd_opts.hide_ui_dir_config: + x = re.sub(r'^[\\/]+|\.{2,}[\\/]+|[\\/]+\.{2,}', '', x) + + return x + + +def get_next_sequence_number(path, basename): + """ + Determines and returns the next sequence number to use when saving an image in the specified directory. + + The sequence starts at 0. + """ + result = -1 + if basename != '': + basename = basename + "-" + + prefix_length = len(basename) + for p in os.listdir(path): + if p.startswith(basename): + l = os.path.splitext(p[prefix_length:])[0].split('-') # splits the filename (removing the basename first if one is defined, so the sequence number is always the first element) + try: + result = max(int(l[0]), result) + except ValueError: + pass + + return result + 1 + + +def save_image(image, path, basename, seed=None, prompt=None, extension='png', info=None, short_filename=False, no_prompt=False, grid=False, pnginfo_section_name='parameters', p=None, existing_info=None, forced_filename=None, suffix="", save_to_dirs=None): + '''Save an image. + + Args: + image (`PIL.Image`): + The image to be saved. + path (`str`): + The directory to save the image. Note, the option `save_to_dirs` will make the image to be saved into a sub directory. + basename (`str`): + The base filename which will be applied to `filename pattern`. + seed, prompt, short_filename, + extension (`str`): + Image file extension, default is `png`. + pngsectionname (`str`): + Specify the name of the section which `info` will be saved in. + info (`str` or `PngImagePlugin.iTXt`): + PNG info chunks. + existing_info (`dict`): + Additional PNG info. `existing_info == {pngsectionname: info, ...}` + no_prompt: + TODO I don't know its meaning. + p (`StableDiffusionProcessing`) + forced_filename (`str`): + If specified, `basename` and filename pattern will be ignored. + save_to_dirs (bool): + If true, the image will be saved into a subdirectory of `path`. + + Returns: (fullfn, txt_fullfn) + fullfn (`str`): + The full path of the saved imaged. + txt_fullfn (`str` or None): + If a text file is saved for this image, this will be its full path. Otherwise None. + ''' + if short_filename or prompt is None or seed is None: + file_decoration = "" + elif opts.save_to_dirs: + file_decoration = opts.samples_filename_pattern or "[seed]" + else: + file_decoration = opts.samples_filename_pattern or "[seed]-[prompt_spaces]" + + if file_decoration != "": + file_decoration = "-" + file_decoration.lower() + + file_decoration = apply_filename_pattern(file_decoration, p, seed, prompt) + suffix + + if extension == 'png' and opts.enable_pnginfo and info is not None: + pnginfo = PngImagePlugin.PngInfo() + + if existing_info is not None: + for k, v in existing_info.items(): + pnginfo.add_text(k, str(v)) + + pnginfo.add_text(pnginfo_section_name, info) + else: + pnginfo = None + + if save_to_dirs is None: + save_to_dirs = (grid and opts.grid_save_to_dirs) or (not grid and opts.save_to_dirs and not no_prompt) + + if save_to_dirs: + dirname = apply_filename_pattern(opts.directories_filename_pattern or "[prompt_words]", p, seed, prompt).strip('\\ /') + path = os.path.join(path, dirname) + + os.makedirs(path, exist_ok=True) + + if forced_filename is None: + basecount = get_next_sequence_number(path, basename) + fullfn = "a.png" + fullfn_without_extension = "a" + for i in range(500): + fn = f"{basecount + i:05}" if basename == '' else f"{basename}-{basecount + i:04}" + fullfn = os.path.join(path, f"{fn}{file_decoration}.{extension}") + fullfn_without_extension = os.path.join(path, f"{fn}{file_decoration}") + if not os.path.exists(fullfn): + break + else: + fullfn = os.path.join(path, f"{forced_filename}.{extension}") + fullfn_without_extension = os.path.join(path, forced_filename) + + def exif_bytes(): + return piexif.dump({ + "Exif": { + piexif.ExifIFD.UserComment: piexif.helper.UserComment.dump(info or "", encoding="unicode") + }, + }) + + if extension.lower() in ("jpg", "jpeg", "webp"): + image.save(fullfn, quality=opts.jpeg_quality) + if opts.enable_pnginfo and info is not None: + piexif.insert(exif_bytes(), fullfn) + else: + image.save(fullfn, quality=opts.jpeg_quality, pnginfo=pnginfo) + + target_side_length = 4000 + oversize = image.width > target_side_length or image.height > target_side_length + if opts.export_for_4chan and (oversize or os.stat(fullfn).st_size > 4 * 1024 * 1024): + ratio = image.width / image.height + + if oversize and ratio > 1: + image = image.resize((target_side_length, image.height * target_side_length // image.width), LANCZOS) + elif oversize: + image = image.resize((image.width * target_side_length // image.height, target_side_length), LANCZOS) + + image.save(fullfn_without_extension + ".jpg", quality=opts.jpeg_quality) + if opts.enable_pnginfo and info is not None: + piexif.insert(exif_bytes(), fullfn_without_extension + ".jpg") + + if opts.save_txt and info is not None: + txt_fullfn = f"{fullfn_without_extension}.txt" + with open(txt_fullfn, "w", encoding="utf8") as file: + file.write(info + "\n") + else: + txt_fullfn = None + + return fullfn, txt_fullfn diff --git a/modules/img2img.py b/modules/img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..24126774540d14ba71ae993f541e674d75f7c183 --- /dev/null +++ b/modules/img2img.py @@ -0,0 +1,137 @@ +import math +import os +import sys +import traceback + +import numpy as np +from PIL import Image, ImageOps, ImageChops + +from modules import devices +from modules.processing import Processed, StableDiffusionProcessingImg2Img, process_images +from modules.shared import opts, state +import modules.shared as shared +import modules.processing as processing +from modules.ui import plaintext_to_html +import modules.images as images +import modules.scripts + + +def process_batch(p, input_dir, output_dir, args): + processing.fix_seed(p) + + images = [file for file in [os.path.join(input_dir, x) for x in os.listdir(input_dir)] if os.path.isfile(file)] + + print(f"Will process {len(images)} images, creating {p.n_iter * p.batch_size} new images for each.") + + save_normally = output_dir == '' + + p.do_not_save_grid = True + p.do_not_save_samples = not save_normally + + state.job_count = len(images) * p.n_iter + + for i, image in enumerate(images): + state.job = f"{i+1} out of {len(images)}" + if state.skipped: + state.skipped = False + + if state.interrupted: + break + + img = Image.open(image) + p.init_images = [img] * p.batch_size + + proc = modules.scripts.scripts_img2img.run(p, *args) + if proc is None: + proc = process_images(p) + + for n, processed_image in enumerate(proc.images): + filename = os.path.basename(image) + + if n > 0: + left, right = os.path.splitext(filename) + filename = f"{left}-{n}{right}" + + if not save_normally: + processed_image.save(os.path.join(output_dir, filename)) + + +def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, init_img, init_img_with_mask, init_img_inpaint, init_mask_inpaint, mask_mode, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, *args): + is_inpaint = mode == 1 + is_batch = mode == 2 + + if is_inpaint: + if mask_mode == 0: + image = init_img_with_mask['image'] + mask = init_img_with_mask['mask'] + alpha_mask = ImageOps.invert(image.split()[-1]).convert('L').point(lambda x: 255 if x > 0 else 0, mode='1') + mask = ImageChops.lighter(alpha_mask, mask.convert('L')).convert('L') + image = image.convert('RGB') + else: + image = init_img_inpaint + mask = init_mask_inpaint + else: + image = init_img + mask = None + + assert 0. <= denoising_strength <= 1., 'can only work with strength in [0.0, 1.0]' + + p = StableDiffusionProcessingImg2Img( + sd_model=shared.sd_model, + outpath_samples=opts.outdir_samples or opts.outdir_img2img_samples, + outpath_grids=opts.outdir_grids or opts.outdir_img2img_grids, + prompt=prompt, + negative_prompt=negative_prompt, + styles=[prompt_style, prompt_style2], + seed=seed, + subseed=subseed, + subseed_strength=subseed_strength, + seed_resize_from_h=seed_resize_from_h, + seed_resize_from_w=seed_resize_from_w, + seed_enable_extras=seed_enable_extras, + sampler_index=sampler_index, + batch_size=batch_size, + n_iter=n_iter, + steps=steps, + cfg_scale=cfg_scale, + width=width, + height=height, + restore_faces=restore_faces, + tiling=tiling, + init_images=[image], + mask=mask, + mask_blur=mask_blur, + inpainting_fill=inpainting_fill, + resize_mode=resize_mode, + denoising_strength=denoising_strength, + inpaint_full_res=inpaint_full_res, + inpaint_full_res_padding=inpaint_full_res_padding, + inpainting_mask_invert=inpainting_mask_invert, + ) + + if shared.cmd_opts.enable_console_prompts: + print(f"\nimg2img: {prompt}", file=shared.progress_print_out) + + p.extra_generation_params["Mask blur"] = mask_blur + + if is_batch: + assert not shared.cmd_opts.hide_ui_dir_config, "Launched with --hide-ui-dir-config, batch img2img disabled" + + process_batch(p, img2img_batch_input_dir, img2img_batch_output_dir, args) + + processed = Processed(p, [], p.seed, "") + else: + processed = modules.scripts.scripts_img2img.run(p, *args) + if processed is None: + processed = process_images(p) + + shared.total_tqdm.clear() + + generation_info_js = processed.js() + if opts.samples_log_stdout: + print(generation_info_js) + + if opts.do_not_show_images: + processed.images = [] + + return processed.images, generation_info_js, plaintext_to_html(processed.info) diff --git a/modules/interrogate.py b/modules/interrogate.py new file mode 100644 index 0000000000000000000000000000000000000000..af858cc09d75fe01bff12ce3b2b5a5271e66745d --- /dev/null +++ b/modules/interrogate.py @@ -0,0 +1,171 @@ +import contextlib +import os +import sys +import traceback +from collections import namedtuple +import re + +import torch + +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode + +import modules.shared as shared +from modules import devices, paths, lowvram + +blip_image_eval_size = 384 +blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth' +clip_model_name = 'ViT-L/14' + +Category = namedtuple("Category", ["name", "topn", "items"]) + +re_topn = re.compile(r"\.top(\d+)\.") + + +class InterrogateModels: + blip_model = None + clip_model = None + clip_preprocess = None + categories = None + dtype = None + + def __init__(self, content_dir): + self.categories = [] + + if os.path.exists(content_dir): + for filename in os.listdir(content_dir): + m = re_topn.search(filename) + topn = 1 if m is None else int(m.group(1)) + + with open(os.path.join(content_dir, filename), "r", encoding="utf8") as file: + lines = [x.strip() for x in file.readlines()] + + self.categories.append(Category(name=filename, topn=topn, items=lines)) + + def load_blip_model(self): + import models.blip + + blip_model = models.blip.blip_decoder(pretrained=blip_model_url, image_size=blip_image_eval_size, vit='base', med_config=os.path.join(paths.paths["BLIP"], "configs", "med_config.json")) + blip_model.eval() + + return blip_model + + def load_clip_model(self): + import clip + + model, preprocess = clip.load(clip_model_name) + model.eval() + model = model.to(shared.device) + + return model, preprocess + + def load(self): + if self.blip_model is None: + self.blip_model = self.load_blip_model() + if not shared.cmd_opts.no_half: + self.blip_model = self.blip_model.half() + + self.blip_model = self.blip_model.to(shared.device) + + if self.clip_model is None: + self.clip_model, self.clip_preprocess = self.load_clip_model() + if not shared.cmd_opts.no_half: + self.clip_model = self.clip_model.half() + + self.clip_model = self.clip_model.to(shared.device) + + self.dtype = next(self.clip_model.parameters()).dtype + + def send_clip_to_ram(self): + if not shared.opts.interrogate_keep_models_in_memory: + if self.clip_model is not None: + self.clip_model = self.clip_model.to(devices.cpu) + + def send_blip_to_ram(self): + if not shared.opts.interrogate_keep_models_in_memory: + if self.blip_model is not None: + self.blip_model = self.blip_model.to(devices.cpu) + + def unload(self): + self.send_clip_to_ram() + self.send_blip_to_ram() + + devices.torch_gc() + + def rank(self, image_features, text_array, top_count=1): + import clip + + if shared.opts.interrogate_clip_dict_limit != 0: + text_array = text_array[0:int(shared.opts.interrogate_clip_dict_limit)] + + top_count = min(top_count, len(text_array)) + text_tokens = clip.tokenize([text for text in text_array], truncate=True).to(shared.device) + text_features = self.clip_model.encode_text(text_tokens).type(self.dtype) + text_features /= text_features.norm(dim=-1, keepdim=True) + + similarity = torch.zeros((1, len(text_array))).to(shared.device) + for i in range(image_features.shape[0]): + similarity += (100.0 * image_features[i].unsqueeze(0) @ text_features.T).softmax(dim=-1) + similarity /= image_features.shape[0] + + top_probs, top_labels = similarity.cpu().topk(top_count, dim=-1) + return [(text_array[top_labels[0][i].numpy()], (top_probs[0][i].numpy()*100)) for i in range(top_count)] + + def generate_caption(self, pil_image): + gpu_image = transforms.Compose([ + transforms.Resize((blip_image_eval_size, blip_image_eval_size), interpolation=InterpolationMode.BICUBIC), + transforms.ToTensor(), + transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)) + ])(pil_image).unsqueeze(0).type(self.dtype).to(shared.device) + + with torch.no_grad(): + caption = self.blip_model.generate(gpu_image, sample=False, num_beams=shared.opts.interrogate_clip_num_beams, min_length=shared.opts.interrogate_clip_min_length, max_length=shared.opts.interrogate_clip_max_length) + + return caption[0] + + def interrogate(self, pil_image, include_ranks=False): + res = None + + try: + + if shared.cmd_opts.lowvram or shared.cmd_opts.medvram: + lowvram.send_everything_to_cpu() + devices.torch_gc() + + self.load() + + caption = self.generate_caption(pil_image) + self.send_blip_to_ram() + devices.torch_gc() + + res = caption + + clip_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(shared.device) + + precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" else contextlib.nullcontext + with torch.no_grad(), precision_scope("cuda"): + image_features = self.clip_model.encode_image(clip_image).type(self.dtype) + + image_features /= image_features.norm(dim=-1, keepdim=True) + + if shared.opts.interrogate_use_builtin_artists: + artist = self.rank(image_features, ["by " + artist.name for artist in shared.artist_db.artists])[0] + + res += ", " + artist[0] + + for name, topn, items in self.categories: + matches = self.rank(image_features, items, top_count=topn) + for match, score in matches: + if include_ranks: + res += ", " + match + else: + res += f", ({match}:{score})" + + except Exception: + print(f"Error interrogating", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + res += "" + + self.unload() + + return res diff --git a/modules/ldsr_model.py b/modules/ldsr_model.py new file mode 100644 index 0000000000000000000000000000000000000000..8c4db44adcc75ef2dadfc9089e14a9affa12b207 --- /dev/null +++ b/modules/ldsr_model.py @@ -0,0 +1,54 @@ +import os +import sys +import traceback + +from basicsr.utils.download_util import load_file_from_url + +from modules.upscaler import Upscaler, UpscalerData +from modules.ldsr_model_arch import LDSR +from modules import shared + + +class UpscalerLDSR(Upscaler): + def __init__(self, user_path): + self.name = "LDSR" + self.user_path = user_path + self.model_url = "https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1" + self.yaml_url = "https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1" + super().__init__() + scaler_data = UpscalerData("LDSR", None, self) + self.scalers = [scaler_data] + + def load_model(self, path: str): + # Remove incorrect project.yaml file if too big + yaml_path = os.path.join(self.model_path, "project.yaml") + old_model_path = os.path.join(self.model_path, "model.pth") + new_model_path = os.path.join(self.model_path, "model.ckpt") + if os.path.exists(yaml_path): + statinfo = os.stat(yaml_path) + if statinfo.st_size >= 10485760: + print("Removing invalid LDSR YAML file.") + os.remove(yaml_path) + if os.path.exists(old_model_path): + print("Renaming model from model.pth to model.ckpt") + os.rename(old_model_path, new_model_path) + model = load_file_from_url(url=self.model_url, model_dir=self.model_path, + file_name="model.ckpt", progress=True) + yaml = load_file_from_url(url=self.yaml_url, model_dir=self.model_path, + file_name="project.yaml", progress=True) + + try: + return LDSR(model, yaml) + + except Exception: + print("Error importing LDSR:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + return None + + def do_upscale(self, img, path): + ldsr = self.load_model(path) + if ldsr is None: + print("NO LDSR!") + return img + ddim_steps = shared.opts.ldsr_steps + return ldsr.super_resolution(img, ddim_steps, self.scale) diff --git a/modules/ldsr_model_arch.py b/modules/ldsr_model_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..14db507668ca6eed685e2f058a36a242a5bb1603 --- /dev/null +++ b/modules/ldsr_model_arch.py @@ -0,0 +1,222 @@ +import gc +import time +import warnings + +import numpy as np +import torch +import torchvision +from PIL import Image +from einops import rearrange, repeat +from omegaconf import OmegaConf + +from ldm.models.diffusion.ddim import DDIMSampler +from ldm.util import instantiate_from_config, ismap + +warnings.filterwarnings("ignore", category=UserWarning) + + +# Create LDSR Class +class LDSR: + def load_model_from_config(self, half_attention): + print(f"Loading model from {self.modelPath}") + pl_sd = torch.load(self.modelPath, map_location="cpu") + sd = pl_sd["state_dict"] + config = OmegaConf.load(self.yamlPath) + model = instantiate_from_config(config.model) + model.load_state_dict(sd, strict=False) + model.cuda() + if half_attention: + model = model.half() + + model.eval() + return {"model": model} + + def __init__(self, model_path, yaml_path): + self.modelPath = model_path + self.yamlPath = yaml_path + + @staticmethod + def run(model, selected_path, custom_steps, eta): + example = get_cond(selected_path) + + n_runs = 1 + guider = None + ckwargs = None + ddim_use_x0_pred = False + temperature = 1. + eta = eta + custom_shape = None + + height, width = example["image"].shape[1:3] + split_input = height >= 128 and width >= 128 + + if split_input: + ks = 128 + stride = 64 + vqf = 4 # + model.split_input_params = {"ks": (ks, ks), "stride": (stride, stride), + "vqf": vqf, + "patch_distributed_vq": True, + "tie_braker": False, + "clip_max_weight": 0.5, + "clip_min_weight": 0.01, + "clip_max_tie_weight": 0.5, + "clip_min_tie_weight": 0.01} + else: + if hasattr(model, "split_input_params"): + delattr(model, "split_input_params") + + x_t = None + logs = None + for n in range(n_runs): + if custom_shape is not None: + x_t = torch.randn(1, custom_shape[1], custom_shape[2], custom_shape[3]).to(model.device) + x_t = repeat(x_t, '1 c h w -> b c h w', b=custom_shape[0]) + + logs = make_convolutional_sample(example, model, + custom_steps=custom_steps, + eta=eta, quantize_x0=False, + custom_shape=custom_shape, + temperature=temperature, noise_dropout=0., + corrector=guider, corrector_kwargs=ckwargs, x_T=x_t, + ddim_use_x0_pred=ddim_use_x0_pred + ) + return logs + + def super_resolution(self, image, steps=100, target_scale=2, half_attention=False): + model = self.load_model_from_config(half_attention) + + # Run settings + diffusion_steps = int(steps) + eta = 1.0 + + down_sample_method = 'Lanczos' + + gc.collect() + torch.cuda.empty_cache() + + im_og = image + width_og, height_og = im_og.size + # If we can adjust the max upscale size, then the 4 below should be our variable + down_sample_rate = target_scale / 4 + wd = width_og * down_sample_rate + hd = height_og * down_sample_rate + width_downsampled_pre = int(wd) + height_downsampled_pre = int(hd) + + if down_sample_rate != 1: + print( + f'Downsampling from [{width_og}, {height_og}] to [{width_downsampled_pre}, {height_downsampled_pre}]') + im_og = im_og.resize((width_downsampled_pre, height_downsampled_pre), Image.LANCZOS) + else: + print(f"Down sample rate is 1 from {target_scale} / 4 (Not downsampling)") + logs = self.run(model["model"], im_og, diffusion_steps, eta) + + sample = logs["sample"] + sample = sample.detach().cpu() + sample = torch.clamp(sample, -1., 1.) + sample = (sample + 1.) / 2. * 255 + sample = sample.numpy().astype(np.uint8) + sample = np.transpose(sample, (0, 2, 3, 1)) + a = Image.fromarray(sample[0]) + + del model + gc.collect() + torch.cuda.empty_cache() + return a + + +def get_cond(selected_path): + example = dict() + up_f = 4 + c = selected_path.convert('RGB') + c = torch.unsqueeze(torchvision.transforms.ToTensor()(c), 0) + c_up = torchvision.transforms.functional.resize(c, size=[up_f * c.shape[2], up_f * c.shape[3]], + antialias=True) + c_up = rearrange(c_up, '1 c h w -> 1 h w c') + c = rearrange(c, '1 c h w -> 1 h w c') + c = 2. * c - 1. + + c = c.to(torch.device("cuda")) + example["LR_image"] = c + example["image"] = c_up + + return example + + +@torch.no_grad() +def convsample_ddim(model, cond, steps, shape, eta=1.0, callback=None, normals_sequence=None, + mask=None, x0=None, quantize_x0=False, temperature=1., score_corrector=None, + corrector_kwargs=None, x_t=None + ): + ddim = DDIMSampler(model) + bs = shape[0] + shape = shape[1:] + print(f"Sampling with eta = {eta}; steps: {steps}") + samples, intermediates = ddim.sample(steps, batch_size=bs, shape=shape, conditioning=cond, callback=callback, + normals_sequence=normals_sequence, quantize_x0=quantize_x0, eta=eta, + mask=mask, x0=x0, temperature=temperature, verbose=False, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, x_t=x_t) + + return samples, intermediates + + +@torch.no_grad() +def make_convolutional_sample(batch, model, custom_steps=None, eta=1.0, quantize_x0=False, custom_shape=None, temperature=1., noise_dropout=0., corrector=None, + corrector_kwargs=None, x_T=None, ddim_use_x0_pred=False): + log = dict() + + z, c, x, xrec, xc = model.get_input(batch, model.first_stage_key, + return_first_stage_outputs=True, + force_c_encode=not (hasattr(model, 'split_input_params') + and model.cond_stage_key == 'coordinates_bbox'), + return_original_cond=True) + + if custom_shape is not None: + z = torch.randn(custom_shape) + print(f"Generating {custom_shape[0]} samples of shape {custom_shape[1:]}") + + z0 = None + + log["input"] = x + log["reconstruction"] = xrec + + if ismap(xc): + log["original_conditioning"] = model.to_rgb(xc) + if hasattr(model, 'cond_stage_key'): + log[model.cond_stage_key] = model.to_rgb(xc) + + else: + log["original_conditioning"] = xc if xc is not None else torch.zeros_like(x) + if model.cond_stage_model: + log[model.cond_stage_key] = xc if xc is not None else torch.zeros_like(x) + if model.cond_stage_key == 'class_label': + log[model.cond_stage_key] = xc[model.cond_stage_key] + + with model.ema_scope("Plotting"): + t0 = time.time() + + sample, intermediates = convsample_ddim(model, c, steps=custom_steps, shape=z.shape, + eta=eta, + quantize_x0=quantize_x0, mask=None, x0=z0, + temperature=temperature, score_corrector=corrector, corrector_kwargs=corrector_kwargs, + x_t=x_T) + t1 = time.time() + + if ddim_use_x0_pred: + sample = intermediates['pred_x0'][-1] + + x_sample = model.decode_first_stage(sample) + + try: + x_sample_noquant = model.decode_first_stage(sample, force_not_quantize=True) + log["sample_noquant"] = x_sample_noquant + log["sample_diff"] = torch.abs(x_sample_noquant - x_sample) + except: + pass + + log["sample"] = x_sample + log["time"] = t1 - t0 + + return log diff --git a/modules/lowvram.py b/modules/lowvram.py new file mode 100644 index 0000000000000000000000000000000000000000..7eba1349c1fbf7ab6bdaf00f0d5ba474e8ee11ee --- /dev/null +++ b/modules/lowvram.py @@ -0,0 +1,82 @@ +import torch +from modules.devices import get_optimal_device + +module_in_gpu = None +cpu = torch.device("cpu") +device = gpu = get_optimal_device() + + +def send_everything_to_cpu(): + global module_in_gpu + + if module_in_gpu is not None: + module_in_gpu.to(cpu) + + module_in_gpu = None + + +def setup_for_low_vram(sd_model, use_medvram): + parents = {} + + def send_me_to_gpu(module, _): + """send this module to GPU; send whatever tracked module was previous in GPU to CPU; + we add this as forward_pre_hook to a lot of modules and this way all but one of them will + be in CPU + """ + global module_in_gpu + + module = parents.get(module, module) + + if module_in_gpu == module: + return + + if module_in_gpu is not None: + module_in_gpu.to(cpu) + + module.to(gpu) + module_in_gpu = module + + # see below for register_forward_pre_hook; + # first_stage_model does not use forward(), it uses encode/decode, so register_forward_pre_hook is + # useless here, and we just replace those methods + def first_stage_model_encode_wrap(self, encoder, x): + send_me_to_gpu(self, None) + return encoder(x) + + def first_stage_model_decode_wrap(self, decoder, z): + send_me_to_gpu(self, None) + return decoder(z) + + # remove three big modules, cond, first_stage, and unet from the model and then + # send the model to GPU. Then put modules back. the modules will be in CPU. + stored = sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model + sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model = None, None, None + sd_model.to(device) + sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model = stored + + # register hooks for those the first two models + sd_model.cond_stage_model.transformer.register_forward_pre_hook(send_me_to_gpu) + sd_model.first_stage_model.register_forward_pre_hook(send_me_to_gpu) + sd_model.first_stage_model.encode = lambda x, en=sd_model.first_stage_model.encode: first_stage_model_encode_wrap(sd_model.first_stage_model, en, x) + sd_model.first_stage_model.decode = lambda z, de=sd_model.first_stage_model.decode: first_stage_model_decode_wrap(sd_model.first_stage_model, de, z) + parents[sd_model.cond_stage_model.transformer] = sd_model.cond_stage_model + + if use_medvram: + sd_model.model.register_forward_pre_hook(send_me_to_gpu) + else: + diff_model = sd_model.model.diffusion_model + + # the third remaining model is still too big for 4 GB, so we also do the same for its submodules + # so that only one of them is in GPU at a time + stored = diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed + diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = None, None, None, None + sd_model.model.to(device) + diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = stored + + # install hooks for bits of third model + diff_model.time_embed.register_forward_pre_hook(send_me_to_gpu) + for block in diff_model.input_blocks: + block.register_forward_pre_hook(send_me_to_gpu) + diff_model.middle_block.register_forward_pre_hook(send_me_to_gpu) + for block in diff_model.output_blocks: + block.register_forward_pre_hook(send_me_to_gpu) diff --git a/modules/masking.py b/modules/masking.py new file mode 100644 index 0000000000000000000000000000000000000000..fd8d92418ee898f703f3051c948b78f05b49a147 --- /dev/null +++ b/modules/masking.py @@ -0,0 +1,99 @@ +from PIL import Image, ImageFilter, ImageOps + + +def get_crop_region(mask, pad=0): + """finds a rectangular region that contains all masked ares in an image. Returns (x1, y1, x2, y2) coordinates of the rectangle. + For example, if a user has painted the top-right part of a 512x512 image", the result may be (256, 0, 512, 256)""" + + h, w = mask.shape + + crop_left = 0 + for i in range(w): + if not (mask[:, i] == 0).all(): + break + crop_left += 1 + + crop_right = 0 + for i in reversed(range(w)): + if not (mask[:, i] == 0).all(): + break + crop_right += 1 + + crop_top = 0 + for i in range(h): + if not (mask[i] == 0).all(): + break + crop_top += 1 + + crop_bottom = 0 + for i in reversed(range(h)): + if not (mask[i] == 0).all(): + break + crop_bottom += 1 + + return ( + int(max(crop_left-pad, 0)), + int(max(crop_top-pad, 0)), + int(min(w - crop_right + pad, w)), + int(min(h - crop_bottom + pad, h)) + ) + + +def expand_crop_region(crop_region, processing_width, processing_height, image_width, image_height): + """expands crop region get_crop_region() to match the ratio of the image the region will processed in; returns expanded region + for example, if user drew mask in a 128x32 region, and the dimensions for processing are 512x512, the region will be expanded to 128x128.""" + + x1, y1, x2, y2 = crop_region + + ratio_crop_region = (x2 - x1) / (y2 - y1) + ratio_processing = processing_width / processing_height + + if ratio_crop_region > ratio_processing: + desired_height = (x2 - x1) * ratio_processing + desired_height_diff = int(desired_height - (y2-y1)) + y1 -= desired_height_diff//2 + y2 += desired_height_diff - desired_height_diff//2 + if y2 >= image_height: + diff = y2 - image_height + y2 -= diff + y1 -= diff + if y1 < 0: + y2 -= y1 + y1 -= y1 + if y2 >= image_height: + y2 = image_height + else: + desired_width = (y2 - y1) * ratio_processing + desired_width_diff = int(desired_width - (x2-x1)) + x1 -= desired_width_diff//2 + x2 += desired_width_diff - desired_width_diff//2 + if x2 >= image_width: + diff = x2 - image_width + x2 -= diff + x1 -= diff + if x1 < 0: + x2 -= x1 + x1 -= x1 + if x2 >= image_width: + x2 = image_width + + return x1, y1, x2, y2 + + +def fill(image, mask): + """fills masked regions with colors from image using blur. Not extremely effective.""" + + image_mod = Image.new('RGBA', (image.width, image.height)) + + image_masked = Image.new('RGBa', (image.width, image.height)) + image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(mask.convert('L'))) + + image_masked = image_masked.convert('RGBa') + + for radius, repeats in [(256, 1), (64, 1), (16, 2), (4, 4), (2, 2), (0, 1)]: + blurred = image_masked.filter(ImageFilter.GaussianBlur(radius)).convert('RGBA') + for _ in range(repeats): + image_mod.alpha_composite(blurred) + + return image_mod.convert("RGB") + diff --git a/modules/memmon.py b/modules/memmon.py new file mode 100644 index 0000000000000000000000000000000000000000..9fb9b687caf20eaa87b95676b500eccd578cf23e --- /dev/null +++ b/modules/memmon.py @@ -0,0 +1,85 @@ +import threading +import time +from collections import defaultdict + +import torch + + +class MemUsageMonitor(threading.Thread): + run_flag = None + device = None + disabled = False + opts = None + data = None + + def __init__(self, name, device, opts): + threading.Thread.__init__(self) + self.name = name + self.device = device + self.opts = opts + + self.daemon = True + self.run_flag = threading.Event() + self.data = defaultdict(int) + + try: + torch.cuda.mem_get_info() + torch.cuda.memory_stats(self.device) + except Exception as e: # AMD or whatever + print(f"Warning: caught exception '{e}', memory monitor disabled") + self.disabled = True + + def run(self): + if self.disabled: + return + + while True: + self.run_flag.wait() + + torch.cuda.reset_peak_memory_stats() + self.data.clear() + + if self.opts.memmon_poll_rate <= 0: + self.run_flag.clear() + continue + + self.data["min_free"] = torch.cuda.mem_get_info()[0] + + while self.run_flag.is_set(): + free, total = torch.cuda.mem_get_info() # calling with self.device errors, torch bug? + self.data["min_free"] = min(self.data["min_free"], free) + + time.sleep(1 / self.opts.memmon_poll_rate) + + def dump_debug(self): + print(self, 'recorded data:') + for k, v in self.read().items(): + print(k, -(v // -(1024 ** 2))) + + print(self, 'raw torch memory stats:') + tm = torch.cuda.memory_stats(self.device) + for k, v in tm.items(): + if 'bytes' not in k: + continue + print('\t' if 'peak' in k else '', k, -(v // -(1024 ** 2))) + + print(torch.cuda.memory_summary()) + + def monitor(self): + self.run_flag.set() + + def read(self): + if not self.disabled: + free, total = torch.cuda.mem_get_info() + self.data["total"] = total + + torch_stats = torch.cuda.memory_stats(self.device) + self.data["active_peak"] = torch_stats["active_bytes.all.peak"] + self.data["reserved_peak"] = torch_stats["reserved_bytes.all.peak"] + self.data["system_peak"] = total - self.data["min_free"] + + return self.data + + def stop(self): + self.run_flag.clear() + return self.read() diff --git a/modules/modelloader.py b/modules/modelloader.py new file mode 100644 index 0000000000000000000000000000000000000000..b0f2f33d22a11d7f7419541c7f7daa6fee68af0c --- /dev/null +++ b/modules/modelloader.py @@ -0,0 +1,153 @@ +import glob +import os +import shutil +import importlib +from urllib.parse import urlparse + +from basicsr.utils.download_util import load_file_from_url +from modules import shared +from modules.upscaler import Upscaler +from modules.paths import script_path, models_path + + +def load_models(model_path: str, model_url: str = None, command_path: str = None, ext_filter=None, download_name=None) -> list: + """ + A one-and done loader to try finding the desired models in specified directories. + + @param download_name: Specify to download from model_url immediately. + @param model_url: If no other models are found, this will be downloaded on upscale. + @param model_path: The location to store/find models in. + @param command_path: A command-line argument to search for models in first. + @param ext_filter: An optional list of filename extensions to filter by + @return: A list of paths containing the desired model(s) + """ + output = [] + + if ext_filter is None: + ext_filter = [] + + try: + places = [] + + if command_path is not None and command_path != model_path: + pretrained_path = os.path.join(command_path, 'experiments/pretrained_models') + if os.path.exists(pretrained_path): + print(f"Appending path: {pretrained_path}") + places.append(pretrained_path) + elif os.path.exists(command_path): + places.append(command_path) + + places.append(model_path) + + for place in places: + if os.path.exists(place): + for file in glob.iglob(place + '**/**', recursive=True): + full_path = file + if os.path.isdir(full_path): + continue + if len(ext_filter) != 0: + model_name, extension = os.path.splitext(file) + if extension not in ext_filter: + continue + if file not in output: + output.append(full_path) + + if model_url is not None and len(output) == 0: + if download_name is not None: + dl = load_file_from_url(model_url, model_path, True, download_name) + output.append(dl) + else: + output.append(model_url) + + except Exception: + pass + + return output + + +def friendly_name(file: str): + if "http" in file: + file = urlparse(file).path + + file = os.path.basename(file) + model_name, extension = os.path.splitext(file) + return model_name + + +def cleanup_models(): + # This code could probably be more efficient if we used a tuple list or something to store the src/destinations + # and then enumerate that, but this works for now. In the future, it'd be nice to just have every "model" scaler + # somehow auto-register and just do these things... + root_path = script_path + src_path = models_path + dest_path = os.path.join(models_path, "Stable-diffusion") + move_files(src_path, dest_path, ".ckpt") + src_path = os.path.join(root_path, "ESRGAN") + dest_path = os.path.join(models_path, "ESRGAN") + move_files(src_path, dest_path) + src_path = os.path.join(root_path, "gfpgan") + dest_path = os.path.join(models_path, "GFPGAN") + move_files(src_path, dest_path) + src_path = os.path.join(root_path, "SwinIR") + dest_path = os.path.join(models_path, "SwinIR") + move_files(src_path, dest_path) + src_path = os.path.join(root_path, "repositories/latent-diffusion/experiments/pretrained_models/") + dest_path = os.path.join(models_path, "LDSR") + move_files(src_path, dest_path) + + +def move_files(src_path: str, dest_path: str, ext_filter: str = None): + try: + if not os.path.exists(dest_path): + os.makedirs(dest_path) + if os.path.exists(src_path): + for file in os.listdir(src_path): + fullpath = os.path.join(src_path, file) + if os.path.isfile(fullpath): + if ext_filter is not None: + if ext_filter not in file: + continue + print(f"Moving {file} from {src_path} to {dest_path}.") + try: + shutil.move(fullpath, dest_path) + except: + pass + if len(os.listdir(src_path)) == 0: + print(f"Removing empty folder: {src_path}") + shutil.rmtree(src_path, True) + except: + pass + + +def load_upscalers(): + sd = shared.script_path + # We can only do this 'magic' method to dynamically load upscalers if they are referenced, + # so we'll try to import any _model.py files before looking in __subclasses__ + modules_dir = os.path.join(sd, "modules") + for file in os.listdir(modules_dir): + if "_model.py" in file: + model_name = file.replace("_model.py", "") + full_model = f"modules.{model_name}_model" + try: + importlib.import_module(full_model) + except: + pass + datas = [] + c_o = vars(shared.cmd_opts) + for cls in Upscaler.__subclasses__(): + name = cls.__name__ + module_name = cls.__module__ + module = importlib.import_module(module_name) + class_ = getattr(module, name) + cmd_name = f"{name.lower().replace('upscaler', '')}_models_path" + opt_string = None + try: + if cmd_name in c_o: + opt_string = c_o[cmd_name] + except: + pass + scaler = class_(opt_string) + for child in scaler.scalers: + datas.append(child) + + shared.sd_upscalers = datas diff --git a/modules/ngrok.py b/modules/ngrok.py new file mode 100644 index 0000000000000000000000000000000000000000..7d03a6df59eeeff48795c6fe2993a02cb7c84332 --- /dev/null +++ b/modules/ngrok.py @@ -0,0 +1,15 @@ +from pyngrok import ngrok, conf, exception + + +def connect(token, port): + if token == None: + token = 'None' + conf.get_default().auth_token = token + try: + public_url = ngrok.connect(port).public_url + except exception.PyngrokNgrokError: + print(f'Invalid ngrok authtoken, ngrok connection aborted.\n' + f'Your token: {token}, get the right one on https://dashboard.ngrok.com/get-started/your-authtoken') + else: + print(f'ngrok connected to localhost:{port}! URL: {public_url}\n' + 'You can use this link after the launch is complete.') diff --git a/modules/paths.py b/modules/paths.py new file mode 100644 index 0000000000000000000000000000000000000000..1e7a2fbcf9f6faf48e565344598e5095966342aa --- /dev/null +++ b/modules/paths.py @@ -0,0 +1,40 @@ +import argparse +import os +import sys +import modules.safe + +script_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +models_path = os.path.join(script_path, "models") +sys.path.insert(0, script_path) + +# search for directory of stable diffusion in following places +sd_path = None +possible_sd_paths = [os.path.join(script_path, 'repositories/stable-diffusion'), '.', os.path.dirname(script_path)] +for possible_sd_path in possible_sd_paths: + if os.path.exists(os.path.join(possible_sd_path, 'ldm/models/diffusion/ddpm.py')): + sd_path = os.path.abspath(possible_sd_path) + break + +assert sd_path is not None, "Couldn't find Stable Diffusion in any of: " + str(possible_sd_paths) + +path_dirs = [ + (sd_path, 'ldm', 'Stable Diffusion', []), + (os.path.join(sd_path, '../taming-transformers'), 'taming', 'Taming Transformers', []), + (os.path.join(sd_path, '../CodeFormer'), 'inference_codeformer.py', 'CodeFormer', []), + (os.path.join(sd_path, '../BLIP'), 'models/blip.py', 'BLIP', []), + (os.path.join(sd_path, '../k-diffusion'), 'k_diffusion/sampling.py', 'k_diffusion', ["atstart"]), +] + +paths = {} + +for d, must_exist, what, options in path_dirs: + must_exist_path = os.path.abspath(os.path.join(script_path, d, must_exist)) + if not os.path.exists(must_exist_path): + print(f"Warning: {what} not found at path {must_exist_path}", file=sys.stderr) + else: + d = os.path.abspath(d) + if "atstart" in options: + sys.path.insert(0, d) + else: + sys.path.append(d) + paths[what] = d diff --git a/modules/processing.py b/modules/processing.py new file mode 100644 index 0000000000000000000000000000000000000000..d5172f008c675638c869d2be81352b90b3151917 --- /dev/null +++ b/modules/processing.py @@ -0,0 +1,721 @@ +import json +import math +import os +import sys + +import torch +import numpy as np +from PIL import Image, ImageFilter, ImageOps +import random +import cv2 +from skimage import exposure + +import modules.sd_hijack +from modules import devices, prompt_parser, masking, sd_samplers, lowvram +from modules.sd_hijack import model_hijack +from modules.shared import opts, cmd_opts, state +import modules.shared as shared +import modules.face_restoration +import modules.images as images +import modules.styles +import logging + + +# some of those options should not be changed at all because they would break the model, so I removed them from options. +opt_C = 4 +opt_f = 8 + + +def setup_color_correction(image): + logging.info("Calibrating color correction.") + correction_target = cv2.cvtColor(np.asarray(image.copy()), cv2.COLOR_RGB2LAB) + return correction_target + + +def apply_color_correction(correction, image): + logging.info("Applying color correction.") + image = Image.fromarray(cv2.cvtColor(exposure.match_histograms( + cv2.cvtColor( + np.asarray(image), + cv2.COLOR_RGB2LAB + ), + correction, + channel_axis=2 + ), cv2.COLOR_LAB2RGB).astype("uint8")) + + return image + + +def get_correct_sampler(p): + if isinstance(p, modules.processing.StableDiffusionProcessingTxt2Img): + return sd_samplers.samplers + elif isinstance(p, modules.processing.StableDiffusionProcessingImg2Img): + return sd_samplers.samplers_for_img2img + +class StableDiffusionProcessing: + def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt="", styles=None, seed=-1, subseed=-1, subseed_strength=0, seed_resize_from_h=-1, seed_resize_from_w=-1, seed_enable_extras=True, sampler_index=0, batch_size=1, n_iter=1, steps=50, cfg_scale=7.0, width=512, height=512, restore_faces=False, tiling=False, do_not_save_samples=False, do_not_save_grid=False, extra_generation_params=None, overlay_images=None, negative_prompt=None, eta=None): + self.sd_model = sd_model + self.outpath_samples: str = outpath_samples + self.outpath_grids: str = outpath_grids + self.prompt: str = prompt + self.prompt_for_display: str = None + self.negative_prompt: str = (negative_prompt or "") + self.styles: list = styles or [] + self.seed: int = seed + self.subseed: int = subseed + self.subseed_strength: float = subseed_strength + self.seed_resize_from_h: int = seed_resize_from_h + self.seed_resize_from_w: int = seed_resize_from_w + self.sampler_index: int = sampler_index + self.batch_size: int = batch_size + self.n_iter: int = n_iter + self.steps: int = steps + self.cfg_scale: float = cfg_scale + self.width: int = width + self.height: int = height + self.restore_faces: bool = restore_faces + self.tiling: bool = tiling + self.do_not_save_samples: bool = do_not_save_samples + self.do_not_save_grid: bool = do_not_save_grid + self.extra_generation_params: dict = extra_generation_params or {} + self.overlay_images = overlay_images + self.eta = eta + self.paste_to = None + self.color_corrections = None + self.denoising_strength: float = 0 + self.sampler_noise_scheduler_override = None + self.ddim_discretize = opts.ddim_discretize + self.s_churn = opts.s_churn + self.s_tmin = opts.s_tmin + self.s_tmax = float('inf') # not representable as a standard ui option + self.s_noise = opts.s_noise + + if not seed_enable_extras: + self.subseed = -1 + self.subseed_strength = 0 + self.seed_resize_from_h = 0 + self.seed_resize_from_w = 0 + + def init(self, all_prompts, all_seeds, all_subseeds): + pass + + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): + raise NotImplementedError() + + +class Processed: + def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None): + self.images = images_list + self.prompt = p.prompt + self.negative_prompt = p.negative_prompt + self.seed = seed + self.subseed = subseed + self.subseed_strength = p.subseed_strength + self.info = info + self.width = p.width + self.height = p.height + self.sampler_index = p.sampler_index + self.sampler = sd_samplers.samplers[p.sampler_index].name + self.cfg_scale = p.cfg_scale + self.steps = p.steps + self.batch_size = p.batch_size + self.restore_faces = p.restore_faces + self.face_restoration_model = opts.face_restoration_model if p.restore_faces else None + self.sd_model_hash = shared.sd_model.sd_model_hash + self.seed_resize_from_w = p.seed_resize_from_w + self.seed_resize_from_h = p.seed_resize_from_h + self.denoising_strength = getattr(p, 'denoising_strength', None) + self.extra_generation_params = p.extra_generation_params + self.index_of_first_image = index_of_first_image + self.styles = p.styles + self.job_timestamp = state.job_timestamp + self.clip_skip = opts.CLIP_stop_at_last_layers + + self.eta = p.eta + self.ddim_discretize = p.ddim_discretize + self.s_churn = p.s_churn + self.s_tmin = p.s_tmin + self.s_tmax = p.s_tmax + self.s_noise = p.s_noise + self.sampler_noise_scheduler_override = p.sampler_noise_scheduler_override + self.prompt = self.prompt if type(self.prompt) != list else self.prompt[0] + self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0] + self.seed = int(self.seed if type(self.seed) != list else self.seed[0]) + self.subseed = int(self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1 + + self.all_prompts = all_prompts or [self.prompt] + self.all_seeds = all_seeds or [self.seed] + self.all_subseeds = all_subseeds or [self.subseed] + self.infotexts = infotexts or [info] + + def js(self): + obj = { + "prompt": self.prompt, + "all_prompts": self.all_prompts, + "negative_prompt": self.negative_prompt, + "seed": self.seed, + "all_seeds": self.all_seeds, + "subseed": self.subseed, + "all_subseeds": self.all_subseeds, + "subseed_strength": self.subseed_strength, + "width": self.width, + "height": self.height, + "sampler_index": self.sampler_index, + "sampler": self.sampler, + "cfg_scale": self.cfg_scale, + "steps": self.steps, + "batch_size": self.batch_size, + "restore_faces": self.restore_faces, + "face_restoration_model": self.face_restoration_model, + "sd_model_hash": self.sd_model_hash, + "seed_resize_from_w": self.seed_resize_from_w, + "seed_resize_from_h": self.seed_resize_from_h, + "denoising_strength": self.denoising_strength, + "extra_generation_params": self.extra_generation_params, + "index_of_first_image": self.index_of_first_image, + "infotexts": self.infotexts, + "styles": self.styles, + "job_timestamp": self.job_timestamp, + "clip_skip": self.clip_skip, + } + + return json.dumps(obj) + + def infotext(self, p: StableDiffusionProcessing, index): + return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], position_in_batch=index % self.batch_size, iteration=index // self.batch_size) + + +# from https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/3 +def slerp(val, low, high): + low_norm = low/torch.norm(low, dim=1, keepdim=True) + high_norm = high/torch.norm(high, dim=1, keepdim=True) + dot = (low_norm*high_norm).sum(1) + + if dot.mean() > 0.9995: + return low * val + high * (1 - val) + + omega = torch.acos(dot) + so = torch.sin(omega) + res = (torch.sin((1.0-val)*omega)/so).unsqueeze(1)*low + (torch.sin(val*omega)/so).unsqueeze(1) * high + return res + + +def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, seed_resize_from_h=0, seed_resize_from_w=0, p=None): + xs = [] + + # if we have multiple seeds, this means we are working with batch size>1; this then + # enables the generation of additional tensors with noise that the sampler will use during its processing. + # Using those pre-generated tensors instead of simple torch.randn allows a batch with seeds [100, 101] to + # produce the same images as with two batches [100], [101]. + if p is not None and p.sampler is not None and (len(seeds) > 1 and opts.enable_batch_seeds or opts.eta_noise_seed_delta > 0): + sampler_noises = [[] for _ in range(p.sampler.number_of_needed_noises(p))] + else: + sampler_noises = None + + for i, seed in enumerate(seeds): + noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8) + + subnoise = None + if subseeds is not None: + subseed = 0 if i >= len(subseeds) else subseeds[i] + + subnoise = devices.randn(subseed, noise_shape) + + # randn results depend on device; gpu and cpu get different results for same seed; + # the way I see it, it's better to do this on CPU, so that everyone gets same result; + # but the original script had it like this, so I do not dare change it for now because + # it will break everyone's seeds. + noise = devices.randn(seed, noise_shape) + + if subnoise is not None: + noise = slerp(subseed_strength, noise, subnoise) + + if noise_shape != shape: + x = devices.randn(seed, shape) + dx = (shape[2] - noise_shape[2]) // 2 + dy = (shape[1] - noise_shape[1]) // 2 + w = noise_shape[2] if dx >= 0 else noise_shape[2] + 2 * dx + h = noise_shape[1] if dy >= 0 else noise_shape[1] + 2 * dy + tx = 0 if dx < 0 else dx + ty = 0 if dy < 0 else dy + dx = max(-dx, 0) + dy = max(-dy, 0) + + x[:, ty:ty+h, tx:tx+w] = noise[:, dy:dy+h, dx:dx+w] + noise = x + + if sampler_noises is not None: + cnt = p.sampler.number_of_needed_noises(p) + + if opts.eta_noise_seed_delta > 0: + torch.manual_seed(seed + opts.eta_noise_seed_delta) + + for j in range(cnt): + sampler_noises[j].append(devices.randn_without_seed(tuple(noise_shape))) + + xs.append(noise) + + if sampler_noises is not None: + p.sampler.sampler_noises = [torch.stack(n).to(shared.device) for n in sampler_noises] + + x = torch.stack(xs).to(shared.device) + return x + + +def decode_first_stage(model, x): + with devices.autocast(disable=x.dtype == devices.dtype_vae): + x = model.decode_first_stage(x) + + return x + + +def get_fixed_seed(seed): + if seed is None or seed == '' or seed == -1: + return int(random.randrange(4294967294)) + + return seed + + +def fix_seed(p): + p.seed = get_fixed_seed(p.seed) + p.subseed = get_fixed_seed(p.subseed) + + +def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration=0, position_in_batch=0): + index = position_in_batch + iteration * p.batch_size + + clip_skip = getattr(p, 'clip_skip', opts.CLIP_stop_at_last_layers) + + generation_params = { + "Steps": p.steps, + "Sampler": get_correct_sampler(p)[p.sampler_index].name, + "CFG scale": p.cfg_scale, + "Seed": all_seeds[index], + "Face restoration": (opts.face_restoration_model if p.restore_faces else None), + "Size": f"{p.width}x{p.height}", + "Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash), + "Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')), + "Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name.replace(',', '').replace(':', '')), + "Batch size": (None if p.batch_size < 2 else p.batch_size), + "Batch pos": (None if p.batch_size < 2 else position_in_batch), + "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]), + "Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength), + "Seed resize from": (None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"), + "Denoising strength": getattr(p, 'denoising_strength', None), + "Eta": (None if p.sampler is None or p.sampler.eta == p.sampler.default_eta else p.sampler.eta), + "Clip skip": None if clip_skip <= 1 else clip_skip, + "ENSD": None if opts.eta_noise_seed_delta == 0 else opts.eta_noise_seed_delta, + } + + generation_params.update(p.extra_generation_params) + + generation_params_text = ", ".join([k if k == v else f'{k}: {v}' for k, v in generation_params.items() if v is not None]) + + negative_prompt_text = "\nNegative prompt: " + p.negative_prompt if p.negative_prompt else "" + + return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip() + + +def process_images(p: StableDiffusionProcessing) -> Processed: + """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch""" + + if type(p.prompt) == list: + assert(len(p.prompt) > 0) + else: + assert p.prompt is not None + + with open(os.path.join(shared.script_path, "params.txt"), "w", encoding="utf8") as file: + processed = Processed(p, [], p.seed, "") + file.write(processed.infotext(p, 0)) + + devices.torch_gc() + + seed = get_fixed_seed(p.seed) + subseed = get_fixed_seed(p.subseed) + + if p.outpath_samples is not None: + os.makedirs(p.outpath_samples, exist_ok=True) + + if p.outpath_grids is not None: + os.makedirs(p.outpath_grids, exist_ok=True) + + modules.sd_hijack.model_hijack.apply_circular(p.tiling) + modules.sd_hijack.model_hijack.clear_comments() + + comments = {} + + shared.prompt_styles.apply_styles(p) + + if type(p.prompt) == list: + all_prompts = p.prompt + else: + all_prompts = p.batch_size * p.n_iter * [p.prompt] + + if type(seed) == list: + all_seeds = seed + else: + all_seeds = [int(seed) + (x if p.subseed_strength == 0 else 0) for x in range(len(all_prompts))] + + if type(subseed) == list: + all_subseeds = subseed + else: + all_subseeds = [int(subseed) + x for x in range(len(all_prompts))] + + def infotext(iteration=0, position_in_batch=0): + return create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration, position_in_batch) + + if os.path.exists(cmd_opts.embeddings_dir): + model_hijack.embedding_db.load_textual_inversion_embeddings() + + infotexts = [] + output_images = [] + + with torch.no_grad(), p.sd_model.ema_scope(): + with devices.autocast(): + p.init(all_prompts, all_seeds, all_subseeds) + + if state.job_count == -1: + state.job_count = p.n_iter + + for n in range(p.n_iter): + if state.skipped: + state.skipped = False + + if state.interrupted: + break + + prompts = all_prompts[n * p.batch_size:(n + 1) * p.batch_size] + seeds = all_seeds[n * p.batch_size:(n + 1) * p.batch_size] + subseeds = all_subseeds[n * p.batch_size:(n + 1) * p.batch_size] + + if (len(prompts) == 0): + break + + #uc = p.sd_model.get_learned_conditioning(len(prompts) * [p.negative_prompt]) + #c = p.sd_model.get_learned_conditioning(prompts) + with devices.autocast(): + uc = prompt_parser.get_learned_conditioning(shared.sd_model, len(prompts) * [p.negative_prompt], p.steps) + c = prompt_parser.get_multicond_learned_conditioning(shared.sd_model, prompts, p.steps) + + if len(model_hijack.comments) > 0: + for comment in model_hijack.comments: + comments[comment] = 1 + + if p.n_iter > 1: + shared.state.job = f"Batch {n+1} out of {p.n_iter}" + + with devices.autocast(): + samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength) + + if state.interrupted or state.skipped: + + # if we are interrupted, sample returns just noise + # use the image collected previously in sampler loop + samples_ddim = shared.state.current_latent + + samples_ddim = samples_ddim.to(devices.dtype_vae) + x_samples_ddim = decode_first_stage(p.sd_model, samples_ddim) + x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) + + del samples_ddim + + if shared.cmd_opts.lowvram or shared.cmd_opts.medvram: + lowvram.send_everything_to_cpu() + + devices.torch_gc() + + if opts.filter_nsfw: + import modules.safety as safety + x_samples_ddim = modules.safety.censor_batch(x_samples_ddim) + + for i, x_sample in enumerate(x_samples_ddim): + x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) + x_sample = x_sample.astype(np.uint8) + + if p.restore_faces: + if opts.save and not p.do_not_save_samples and opts.save_images_before_face_restoration: + images.save_image(Image.fromarray(x_sample), p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-face-restoration") + + devices.torch_gc() + + x_sample = modules.face_restoration.restore_faces(x_sample) + devices.torch_gc() + + image = Image.fromarray(x_sample) + + if p.color_corrections is not None and i < len(p.color_corrections): + if opts.save and not p.do_not_save_samples and opts.save_images_before_color_correction: + images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-color-correction") + image = apply_color_correction(p.color_corrections[i], image) + + if p.overlay_images is not None and i < len(p.overlay_images): + overlay = p.overlay_images[i] + + if p.paste_to is not None: + x, y, w, h = p.paste_to + base_image = Image.new('RGBA', (overlay.width, overlay.height)) + image = images.resize_image(1, image, w, h) + base_image.paste(image, (x, y)) + image = base_image + + image = image.convert('RGBA') + image.alpha_composite(overlay) + image = image.convert('RGB') + + if opts.samples_save and not p.do_not_save_samples: + images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p) + + text = infotext(n, i) + infotexts.append(text) + if opts.enable_pnginfo: + image.info["parameters"] = text + output_images.append(image) + + del x_samples_ddim + + devices.torch_gc() + + state.nextjob() + + p.color_corrections = None + + index_of_first_image = 0 + unwanted_grid_because_of_img_count = len(output_images) < 2 and opts.grid_only_if_multiple + if (opts.return_grid or opts.grid_save) and not p.do_not_save_grid and not unwanted_grid_because_of_img_count: + grid = images.image_grid(output_images, p.batch_size) + + if opts.return_grid: + text = infotext() + infotexts.insert(0, text) + if opts.enable_pnginfo: + grid.info["parameters"] = text + output_images.insert(0, grid) + index_of_first_image = 1 + + if opts.grid_save: + images.save_image(grid, p.outpath_grids, "grid", all_seeds[0], all_prompts[0], opts.grid_format, info=infotext(), short_filename=not opts.grid_extended_filename, p=p, grid=True) + + devices.torch_gc() + return Processed(p, output_images, all_seeds[0], infotext() + "".join(["\n\n" + x for x in comments]), subseed=all_subseeds[0], all_prompts=all_prompts, all_seeds=all_seeds, all_subseeds=all_subseeds, index_of_first_image=index_of_first_image, infotexts=infotexts) + + +class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): + sampler = None + firstphase_width = 0 + firstphase_height = 0 + firstphase_width_truncated = 0 + firstphase_height_truncated = 0 + + def __init__(self, enable_hr=False, scale_latent=True, denoising_strength=0.75, **kwargs): + super().__init__(**kwargs) + self.enable_hr = enable_hr + self.scale_latent = scale_latent + self.denoising_strength = denoising_strength + + def init(self, all_prompts, all_seeds, all_subseeds): + if self.enable_hr: + if state.job_count == -1: + state.job_count = self.n_iter * 2 + else: + state.job_count = state.job_count * 2 + + desired_pixel_count = 512 * 512 + actual_pixel_count = self.width * self.height + scale = math.sqrt(desired_pixel_count / actual_pixel_count) + + self.firstphase_width = math.ceil(scale * self.width / 64) * 64 + self.firstphase_height = math.ceil(scale * self.height / 64) * 64 + self.firstphase_width_truncated = int(scale * self.width) + self.firstphase_height_truncated = int(scale * self.height) + + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): + self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model) + + if not self.enable_hr: + x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning) + return samples + + x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning) + + truncate_x = (self.firstphase_width - self.firstphase_width_truncated) // opt_f + truncate_y = (self.firstphase_height - self.firstphase_height_truncated) // opt_f + + samples = samples[:, :, truncate_y//2:samples.shape[2]-truncate_y//2, truncate_x//2:samples.shape[3]-truncate_x//2] + + if self.scale_latent: + samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear") + else: + decoded_samples = decode_first_stage(self.sd_model, samples) + + if opts.upscaler_for_img2img is None or opts.upscaler_for_img2img == "None": + decoded_samples = torch.nn.functional.interpolate(decoded_samples, size=(self.height, self.width), mode="bilinear") + else: + lowres_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0) + + batch_images = [] + for i, x_sample in enumerate(lowres_samples): + x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) + x_sample = x_sample.astype(np.uint8) + image = Image.fromarray(x_sample) + image = images.resize_image(0, image, self.width, self.height) + image = np.array(image).astype(np.float32) / 255.0 + image = np.moveaxis(image, 2, 0) + batch_images.append(image) + + decoded_samples = torch.from_numpy(np.array(batch_images)) + decoded_samples = decoded_samples.to(shared.device) + decoded_samples = 2. * decoded_samples - 1. + + samples = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(decoded_samples)) + + shared.state.nextjob() + + self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model) + + noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + + # GC now before running the next img2img to prevent running out of memory + x = None + devices.torch_gc() + + samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps) + + return samples + + +class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): + sampler = None + + def __init__(self, init_images=None, resize_mode=0, denoising_strength=0.75, mask=None, mask_blur=4, inpainting_fill=0, inpaint_full_res=True, inpaint_full_res_padding=0, inpainting_mask_invert=0, **kwargs): + super().__init__(**kwargs) + + self.init_images = init_images + self.resize_mode: int = resize_mode + self.denoising_strength: float = denoising_strength + self.init_latent = None + self.image_mask = mask + #self.image_unblurred_mask = None + self.latent_mask = None + self.mask_for_overlay = None + self.mask_blur = mask_blur + self.inpainting_fill = inpainting_fill + self.inpaint_full_res = inpaint_full_res + self.inpaint_full_res_padding = inpaint_full_res_padding + self.inpainting_mask_invert = inpainting_mask_invert + self.mask = None + self.nmask = None + + def init(self, all_prompts, all_seeds, all_subseeds): + self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers_for_img2img, self.sampler_index, self.sd_model) + crop_region = None + + if self.image_mask is not None: + self.image_mask = self.image_mask.convert('L') + + if self.inpainting_mask_invert: + self.image_mask = ImageOps.invert(self.image_mask) + + #self.image_unblurred_mask = self.image_mask + + if self.mask_blur > 0: + self.image_mask = self.image_mask.filter(ImageFilter.GaussianBlur(self.mask_blur)) + + if self.inpaint_full_res: + self.mask_for_overlay = self.image_mask + mask = self.image_mask.convert('L') + crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding) + crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height) + x1, y1, x2, y2 = crop_region + + mask = mask.crop(crop_region) + self.image_mask = images.resize_image(2, mask, self.width, self.height) + self.paste_to = (x1, y1, x2-x1, y2-y1) + else: + self.image_mask = images.resize_image(self.resize_mode, self.image_mask, self.width, self.height) + np_mask = np.array(self.image_mask) + np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8) + self.mask_for_overlay = Image.fromarray(np_mask) + + self.overlay_images = [] + + latent_mask = self.latent_mask if self.latent_mask is not None else self.image_mask + + add_color_corrections = opts.img2img_color_correction and self.color_corrections is None + if add_color_corrections: + self.color_corrections = [] + imgs = [] + for img in self.init_images: + image = img.convert("RGB") + + if crop_region is None: + image = images.resize_image(self.resize_mode, image, self.width, self.height) + + if self.image_mask is not None: + image_masked = Image.new('RGBa', (image.width, image.height)) + image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) + + self.overlay_images.append(image_masked.convert('RGBA')) + + if crop_region is not None: + image = image.crop(crop_region) + image = images.resize_image(2, image, self.width, self.height) + + if self.image_mask is not None: + if self.inpainting_fill != 1: + image = masking.fill(image, latent_mask) + + if add_color_corrections: + self.color_corrections.append(setup_color_correction(image)) + + image = np.array(image).astype(np.float32) / 255.0 + image = np.moveaxis(image, 2, 0) + + imgs.append(image) + + if len(imgs) == 1: + batch_images = np.expand_dims(imgs[0], axis=0).repeat(self.batch_size, axis=0) + if self.overlay_images is not None: + self.overlay_images = self.overlay_images * self.batch_size + elif len(imgs) <= self.batch_size: + self.batch_size = len(imgs) + batch_images = np.array(imgs) + else: + raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less") + + image = torch.from_numpy(batch_images) + image = 2. * image - 1. + image = image.to(shared.device) + + self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image)) + + if self.image_mask is not None: + init_mask = latent_mask + latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) + latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255 + latmask = latmask[0] + latmask = np.around(latmask) + latmask = np.tile(latmask[None], (4, 1, 1)) + + self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(self.sd_model.dtype) + self.nmask = torch.asarray(latmask).to(shared.device).type(self.sd_model.dtype) + + # this needs to be fixed to be done in sample() using actual seeds for batches + if self.inpainting_fill == 2: + self.init_latent = self.init_latent * self.mask + create_random_tensors(self.init_latent.shape[1:], all_seeds[0:self.init_latent.shape[0]]) * self.nmask + elif self.inpainting_fill == 3: + self.init_latent = self.init_latent * self.mask + + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): + x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + + samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning) + + if self.mask is not None: + samples = samples * self.nmask + self.init_latent * self.mask + + del x + devices.torch_gc() + + return samples diff --git a/modules/prompt_parser.py b/modules/prompt_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..919d5d31a26dbf89e66e9442fd26df2985c784db --- /dev/null +++ b/modules/prompt_parser.py @@ -0,0 +1,366 @@ +import re +from collections import namedtuple +from typing import List +import lark + +# a prompt like this: "fantasy landscape with a [mountain:lake:0.25] and [an oak:a christmas tree:0.75][ in foreground::0.6][ in background:0.25] [shoddy:masterful:0.5]" +# will be represented with prompt_schedule like this (assuming steps=100): +# [25, 'fantasy landscape with a mountain and an oak in foreground shoddy'] +# [50, 'fantasy landscape with a lake and an oak in foreground in background shoddy'] +# [60, 'fantasy landscape with a lake and an oak in foreground in background masterful'] +# [75, 'fantasy landscape with a lake and an oak in background masterful'] +# [100, 'fantasy landscape with a lake and a christmas tree in background masterful'] + +schedule_parser = lark.Lark(r""" +!start: (prompt | /[][():]/+)* +prompt: (emphasized | scheduled | alternate | plain | WHITESPACE)* +!emphasized: "(" prompt ")" + | "(" prompt ":" prompt ")" + | "[" prompt "]" +scheduled: "[" [prompt ":"] prompt ":" [WHITESPACE] NUMBER "]" +alternate: "[" prompt ("|" prompt)+ "]" +WHITESPACE: /\s+/ +plain: /([^\\\[\]():|]|\\.)+/ +%import common.SIGNED_NUMBER -> NUMBER +""") + +def get_learned_conditioning_prompt_schedules(prompts, steps): + """ + >>> g = lambda p: get_learned_conditioning_prompt_schedules([p], 10)[0] + >>> g("test") + [[10, 'test']] + >>> g("a [b:3]") + [[3, 'a '], [10, 'a b']] + >>> g("a [b: 3]") + [[3, 'a '], [10, 'a b']] + >>> g("a [[[b]]:2]") + [[2, 'a '], [10, 'a [[b]]']] + >>> g("[(a:2):3]") + [[3, ''], [10, '(a:2)']] + >>> g("a [b : c : 1] d") + [[1, 'a b d'], [10, 'a c d']] + >>> g("a[b:[c:d:2]:1]e") + [[1, 'abe'], [2, 'ace'], [10, 'ade']] + >>> g("a [unbalanced") + [[10, 'a [unbalanced']] + >>> g("a [b:.5] c") + [[5, 'a c'], [10, 'a b c']] + >>> g("a [{b|d{:.5] c") # not handling this right now + [[5, 'a c'], [10, 'a {b|d{ c']] + >>> g("((a][:b:c [d:3]") + [[3, '((a][:b:c '], [10, '((a][:b:c d']] + """ + + def collect_steps(steps, tree): + l = [steps] + class CollectSteps(lark.Visitor): + def scheduled(self, tree): + tree.children[-1] = float(tree.children[-1]) + if tree.children[-1] < 1: + tree.children[-1] *= steps + tree.children[-1] = min(steps, int(tree.children[-1])) + l.append(tree.children[-1]) + def alternate(self, tree): + l.extend(range(1, steps+1)) + CollectSteps().visit(tree) + return sorted(set(l)) + + def at_step(step, tree): + class AtStep(lark.Transformer): + def scheduled(self, args): + before, after, _, when = args + yield before or () if step <= when else after + def alternate(self, args): + yield next(args[(step - 1)%len(args)]) + def start(self, args): + def flatten(x): + if type(x) == str: + yield x + else: + for gen in x: + yield from flatten(gen) + return ''.join(flatten(args)) + def plain(self, args): + yield args[0].value + def __default__(self, data, children, meta): + for child in children: + yield from child + return AtStep().transform(tree) + + def get_schedule(prompt): + try: + tree = schedule_parser.parse(prompt) + except lark.exceptions.LarkError as e: + if 0: + import traceback + traceback.print_exc() + return [[steps, prompt]] + return [[t, at_step(t, tree)] for t in collect_steps(steps, tree)] + + promptdict = {prompt: get_schedule(prompt) for prompt in set(prompts)} + return [promptdict[prompt] for prompt in prompts] + + +ScheduledPromptConditioning = namedtuple("ScheduledPromptConditioning", ["end_at_step", "cond"]) + + +def get_learned_conditioning(model, prompts, steps): + """converts a list of prompts into a list of prompt schedules - each schedule is a list of ScheduledPromptConditioning, specifying the comdition (cond), + and the sampling step at which this condition is to be replaced by the next one. + + Input: + (model, ['a red crown', 'a [blue:green:5] jeweled crown'], 20) + + Output: + [ + [ + ScheduledPromptConditioning(end_at_step=20, cond=tensor([[-0.3886, 0.0229, -0.0523, ..., -0.4901, -0.3066, 0.0674], ..., [ 0.3317, -0.5102, -0.4066, ..., 0.4119, -0.7647, -1.0160]], device='cuda:0')) + ], + [ + ScheduledPromptConditioning(end_at_step=5, cond=tensor([[-0.3886, 0.0229, -0.0522, ..., -0.4901, -0.3067, 0.0673], ..., [-0.0192, 0.3867, -0.4644, ..., 0.1135, -0.3696, -0.4625]], device='cuda:0')), + ScheduledPromptConditioning(end_at_step=20, cond=tensor([[-0.3886, 0.0229, -0.0522, ..., -0.4901, -0.3067, 0.0673], ..., [-0.7352, -0.4356, -0.7888, ..., 0.6994, -0.4312, -1.2593]], device='cuda:0')) + ] + ] + """ + res = [] + + prompt_schedules = get_learned_conditioning_prompt_schedules(prompts, steps) + cache = {} + + for prompt, prompt_schedule in zip(prompts, prompt_schedules): + + cached = cache.get(prompt, None) + if cached is not None: + res.append(cached) + continue + + texts = [x[1] for x in prompt_schedule] + conds = model.get_learned_conditioning(texts) + + cond_schedule = [] + for i, (end_at_step, text) in enumerate(prompt_schedule): + cond_schedule.append(ScheduledPromptConditioning(end_at_step, conds[i])) + + cache[prompt] = cond_schedule + res.append(cond_schedule) + + return res + + +re_AND = re.compile(r"\bAND\b") +re_weight = re.compile(r"^(.*?)(?:\s*:\s*([-+]?(?:\d+\.?|\d*\.\d+)))?\s*$") + +def get_multicond_prompt_list(prompts): + res_indexes = [] + + prompt_flat_list = [] + prompt_indexes = {} + + for prompt in prompts: + subprompts = re_AND.split(prompt) + + indexes = [] + for subprompt in subprompts: + match = re_weight.search(subprompt) + + text, weight = match.groups() if match is not None else (subprompt, 1.0) + + weight = float(weight) if weight is not None else 1.0 + + index = prompt_indexes.get(text, None) + if index is None: + index = len(prompt_flat_list) + prompt_flat_list.append(text) + prompt_indexes[text] = index + + indexes.append((index, weight)) + + res_indexes.append(indexes) + + return res_indexes, prompt_flat_list, prompt_indexes + + +class ComposableScheduledPromptConditioning: + def __init__(self, schedules, weight=1.0): + self.schedules: List[ScheduledPromptConditioning] = schedules + self.weight: float = weight + + +class MulticondLearnedConditioning: + def __init__(self, shape, batch): + self.shape: tuple = shape # the shape field is needed to send this object to DDIM/PLMS + self.batch: List[List[ComposableScheduledPromptConditioning]] = batch + +def get_multicond_learned_conditioning(model, prompts, steps) -> MulticondLearnedConditioning: + """same as get_learned_conditioning, but returns a list of ScheduledPromptConditioning along with the weight objects for each prompt. + For each prompt, the list is obtained by splitting the prompt using the AND separator. + + https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/ + """ + + res_indexes, prompt_flat_list, prompt_indexes = get_multicond_prompt_list(prompts) + + learned_conditioning = get_learned_conditioning(model, prompt_flat_list, steps) + + res = [] + for indexes in res_indexes: + res.append([ComposableScheduledPromptConditioning(learned_conditioning[i], weight) for i, weight in indexes]) + + return MulticondLearnedConditioning(shape=(len(prompts),), batch=res) + + +def reconstruct_cond_batch(c: List[List[ScheduledPromptConditioning]], current_step): + param = c[0][0].cond + res = torch.zeros((len(c),) + param.shape, device=param.device, dtype=param.dtype) + for i, cond_schedule in enumerate(c): + target_index = 0 + for current, (end_at, cond) in enumerate(cond_schedule): + if current_step <= end_at: + target_index = current + break + res[i] = cond_schedule[target_index].cond + + return res + + +def reconstruct_multicond_batch(c: MulticondLearnedConditioning, current_step): + param = c.batch[0][0].schedules[0].cond + + tensors = [] + conds_list = [] + + for batch_no, composable_prompts in enumerate(c.batch): + conds_for_batch = [] + + for cond_index, composable_prompt in enumerate(composable_prompts): + target_index = 0 + for current, (end_at, cond) in enumerate(composable_prompt.schedules): + if current_step <= end_at: + target_index = current + break + + conds_for_batch.append((len(tensors), composable_prompt.weight)) + tensors.append(composable_prompt.schedules[target_index].cond) + + conds_list.append(conds_for_batch) + + # if prompts have wildly different lengths above the limit we'll get tensors fo different shapes + # and won't be able to torch.stack them. So this fixes that. + token_count = max([x.shape[0] for x in tensors]) + for i in range(len(tensors)): + if tensors[i].shape[0] != token_count: + last_vector = tensors[i][-1:] + last_vector_repeated = last_vector.repeat([token_count - tensors[i].shape[0], 1]) + tensors[i] = torch.vstack([tensors[i], last_vector_repeated]) + + return conds_list, torch.stack(tensors).to(device=param.device, dtype=param.dtype) + + +re_attention = re.compile(r""" +\\\(| +\\\)| +\\\[| +\\]| +\\\\| +\\| +\(| +\[| +:([+-]?[.\d]+)\)| +\)| +]| +[^\\()\[\]:]+| +: +""", re.X) + + +def parse_prompt_attention(text): + """ + Parses a string with attention tokens and returns a list of pairs: text and its assoicated weight. + Accepted tokens are: + (abc) - increases attention to abc by a multiplier of 1.1 + (abc:3.12) - increases attention to abc by a multiplier of 3.12 + [abc] - decreases attention to abc by a multiplier of 1.1 + \( - literal character '(' + \[ - literal character '[' + \) - literal character ')' + \] - literal character ']' + \\ - literal character '\' + anything else - just text + + >>> parse_prompt_attention('normal text') + [['normal text', 1.0]] + >>> parse_prompt_attention('an (important) word') + [['an ', 1.0], ['important', 1.1], [' word', 1.0]] + >>> parse_prompt_attention('(unbalanced') + [['unbalanced', 1.1]] + >>> parse_prompt_attention('\(literal\]') + [['(literal]', 1.0]] + >>> parse_prompt_attention('(unnecessary)(parens)') + [['unnecessaryparens', 1.1]] + >>> parse_prompt_attention('a (((house:1.3)) [on] a (hill:0.5), sun, (((sky))).') + [['a ', 1.0], + ['house', 1.5730000000000004], + [' ', 1.1], + ['on', 1.0], + [' a ', 1.1], + ['hill', 0.55], + [', sun, ', 1.1], + ['sky', 1.4641000000000006], + ['.', 1.1]] + """ + + res = [] + round_brackets = [] + square_brackets = [] + + round_bracket_multiplier = 1.1 + square_bracket_multiplier = 1 / 1.1 + + def multiply_range(start_position, multiplier): + for p in range(start_position, len(res)): + res[p][1] *= multiplier + + for m in re_attention.finditer(text): + text = m.group(0) + weight = m.group(1) + + if text.startswith('\\'): + res.append([text[1:], 1.0]) + elif text == '(': + round_brackets.append(len(res)) + elif text == '[': + square_brackets.append(len(res)) + elif weight is not None and len(round_brackets) > 0: + multiply_range(round_brackets.pop(), float(weight)) + elif text == ')' and len(round_brackets) > 0: + multiply_range(round_brackets.pop(), round_bracket_multiplier) + elif text == ']' and len(square_brackets) > 0: + multiply_range(square_brackets.pop(), square_bracket_multiplier) + else: + res.append([text, 1.0]) + + for pos in round_brackets: + multiply_range(pos, round_bracket_multiplier) + + for pos in square_brackets: + multiply_range(pos, square_bracket_multiplier) + + if len(res) == 0: + res = [["", 1.0]] + + # merge runs of identical weights + i = 0 + while i + 1 < len(res): + if res[i][1] == res[i + 1][1]: + res[i][0] += res[i + 1][0] + res.pop(i + 1) + else: + i += 1 + + return res + +if __name__ == "__main__": + import doctest + doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE) +else: + import torch # doctest faster diff --git a/modules/realesrgan_model.py b/modules/realesrgan_model.py new file mode 100644 index 0000000000000000000000000000000000000000..3ac0b97aef383e2f27a50ad7996c3a95069ae9a2 --- /dev/null +++ b/modules/realesrgan_model.py @@ -0,0 +1,133 @@ +import os +import sys +import traceback + +import numpy as np +from PIL import Image +from basicsr.utils.download_util import load_file_from_url +from realesrgan import RealESRGANer + +from modules.upscaler import Upscaler, UpscalerData +from modules.shared import cmd_opts, opts + + +class UpscalerRealESRGAN(Upscaler): + def __init__(self, path): + self.name = "RealESRGAN" + self.user_path = path + super().__init__() + try: + from basicsr.archs.rrdbnet_arch import RRDBNet + from realesrgan import RealESRGANer + from realesrgan.archs.srvgg_arch import SRVGGNetCompact + self.enable = True + self.scalers = [] + scalers = self.load_models(path) + for scaler in scalers: + if scaler.name in opts.realesrgan_enabled_models: + self.scalers.append(scaler) + + except Exception: + print("Error importing Real-ESRGAN:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + self.enable = False + self.scalers = [] + + def do_upscale(self, img, path): + if not self.enable: + return img + + info = self.load_model(path) + if not os.path.exists(info.data_path): + print("Unable to load RealESRGAN model: %s" % info.name) + return img + + upsampler = RealESRGANer( + scale=info.scale, + model_path=info.data_path, + model=info.model(), + half=not cmd_opts.no_half, + tile=opts.ESRGAN_tile, + tile_pad=opts.ESRGAN_tile_overlap, + ) + + upsampled = upsampler.enhance(np.array(img), outscale=info.scale)[0] + + image = Image.fromarray(upsampled) + return image + + def load_model(self, path): + try: + info = None + for scaler in self.scalers: + if scaler.data_path == path: + info = scaler + + if info is None: + print(f"Unable to find model info: {path}") + return None + + model_file = load_file_from_url(url=info.data_path, model_dir=self.model_path, progress=True) + info.data_path = model_file + return info + except Exception as e: + print(f"Error making Real-ESRGAN models list: {e}", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + return None + + def load_models(self, _): + return get_realesrgan_models(self) + + +def get_realesrgan_models(scaler): + try: + from basicsr.archs.rrdbnet_arch import RRDBNet + from realesrgan.archs.srvgg_arch import SRVGGNetCompact + models = [ + UpscalerData( + name="R-ESRGAN General 4xV3", + path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth", + scale=4, + upscaler=scaler, + model=lambda: SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu') + ), + UpscalerData( + name="R-ESRGAN General WDN 4xV3", + path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth", + scale=4, + upscaler=scaler, + model=lambda: SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu') + ), + UpscalerData( + name="R-ESRGAN AnimeVideo", + path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth", + scale=4, + upscaler=scaler, + model=lambda: SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu') + ), + UpscalerData( + name="R-ESRGAN 4x+", + path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth", + scale=4, + upscaler=scaler, + model=lambda: RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) + ), + UpscalerData( + name="R-ESRGAN 4x+ Anime6B", + path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth", + scale=4, + upscaler=scaler, + model=lambda: RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4) + ), + UpscalerData( + name="R-ESRGAN 2x+", + path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth", + scale=2, + upscaler=scaler, + model=lambda: RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2) + ), + ] + return models + except Exception as e: + print("Error making Real-ESRGAN models list:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) diff --git a/modules/safe.py b/modules/safe.py new file mode 100644 index 0000000000000000000000000000000000000000..20be16a502e230ac9dd90b31917d159c18a0eed8 --- /dev/null +++ b/modules/safe.py @@ -0,0 +1,110 @@ +# this code is adapted from the script contributed by anon from /h/ + +import io +import pickle +import collections +import sys +import traceback + +import torch +import numpy +import _codecs +import zipfile +import re + + +# PyTorch 1.13 and later have _TypedStorage renamed to TypedStorage +TypedStorage = torch.storage.TypedStorage if hasattr(torch.storage, 'TypedStorage') else torch.storage._TypedStorage + + +def encode(*args): + out = _codecs.encode(*args) + return out + + +class RestrictedUnpickler(pickle.Unpickler): + def persistent_load(self, saved_id): + assert saved_id[0] == 'storage' + return TypedStorage() + + def find_class(self, module, name): + if module == 'collections' and name == 'OrderedDict': + return getattr(collections, name) + if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter']: + return getattr(torch._utils, name) + if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage']: + return getattr(torch, name) + if module == 'torch.nn.modules.container' and name in ['ParameterDict']: + return getattr(torch.nn.modules.container, name) + if module == 'numpy.core.multiarray' and name == 'scalar': + return numpy.core.multiarray.scalar + if module == 'numpy' and name == 'dtype': + return numpy.dtype + if module == '_codecs' and name == 'encode': + return encode + if module == "pytorch_lightning.callbacks" and name == 'model_checkpoint': + import pytorch_lightning.callbacks + return pytorch_lightning.callbacks.model_checkpoint + if module == "pytorch_lightning.callbacks.model_checkpoint" and name == 'ModelCheckpoint': + import pytorch_lightning.callbacks.model_checkpoint + return pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint + if module == "__builtin__" and name == 'set': + return set + + # Forbid everything else. + raise pickle.UnpicklingError(f"global '{module}/{name}' is forbidden") + + +allowed_zip_names = ["archive/data.pkl", "archive/version"] +allowed_zip_names_re = re.compile(r"^archive/data/\d+$") + + +def check_zip_filenames(filename, names): + for name in names: + if name in allowed_zip_names: + continue + if allowed_zip_names_re.match(name): + continue + + raise Exception(f"bad file inside {filename}: {name}") + + +def check_pt(filename): + try: + + # new pytorch format is a zip file + with zipfile.ZipFile(filename) as z: + check_zip_filenames(filename, z.namelist()) + + with z.open('archive/data.pkl') as file: + unpickler = RestrictedUnpickler(file) + unpickler.load() + + except zipfile.BadZipfile: + + # if it's not a zip file, it's an olf pytorch format, with five objects written to pickle + with open(filename, "rb") as file: + unpickler = RestrictedUnpickler(file) + for i in range(5): + unpickler.load() + + +def load(filename, *args, **kwargs): + from modules import shared + + try: + if not shared.cmd_opts.disable_safe_unpickle: + check_pt(filename) + + except Exception: + print(f"Error verifying pickled file from {filename}:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + print(f"\nThe file may be malicious, so the program is not going to read it.", file=sys.stderr) + print(f"You can skip this check with --disable-safe-unpickle commandline argument.", file=sys.stderr) + return None + + return unsafe_torch_load(filename, *args, **kwargs) + + +unsafe_torch_load = torch.load +torch.load = load diff --git a/modules/safety.py b/modules/safety.py new file mode 100644 index 0000000000000000000000000000000000000000..cff4b27834701d0c092eca557948d8f0ee896377 --- /dev/null +++ b/modules/safety.py @@ -0,0 +1,42 @@ +import torch +from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker +from transformers import AutoFeatureExtractor +from PIL import Image + +import modules.shared as shared + +safety_model_id = "CompVis/stable-diffusion-safety-checker" +safety_feature_extractor = None +safety_checker = None + +def numpy_to_pil(images): + """ + Convert a numpy image or a batch of images to a PIL image. + """ + if images.ndim == 3: + images = images[None, ...] + images = (images * 255).round().astype("uint8") + pil_images = [Image.fromarray(image) for image in images] + + return pil_images + +# check and replace nsfw content +def check_safety(x_image): + global safety_feature_extractor, safety_checker + + if safety_feature_extractor is None: + safety_feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id) + safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id) + + safety_checker_input = safety_feature_extractor(numpy_to_pil(x_image), return_tensors="pt") + x_checked_image, has_nsfw_concept = safety_checker(images=x_image, clip_input=safety_checker_input.pixel_values) + + return x_checked_image, has_nsfw_concept + + +def censor_batch(x): + x_samples_ddim_numpy = x.cpu().permute(0, 2, 3, 1).numpy() + x_checked_image, has_nsfw_concept = check_safety(x_samples_ddim_numpy) + x = torch.from_numpy(x_checked_image).permute(0, 3, 1, 2) + + return x diff --git a/modules/scripts.py b/modules/scripts.py new file mode 100644 index 0000000000000000000000000000000000000000..45230f9a14cec878ad5859e66870722a5e7a9ef7 --- /dev/null +++ b/modules/scripts.py @@ -0,0 +1,201 @@ +import os +import sys +import traceback + +import modules.ui as ui +import gradio as gr + +from modules.processing import StableDiffusionProcessing +from modules import shared + +class Script: + filename = None + args_from = None + args_to = None + + # The title of the script. This is what will be displayed in the dropdown menu. + def title(self): + raise NotImplementedError() + + # How the script is displayed in the UI. See https://gradio.app/docs/#components + # for the different UI components you can use and how to create them. + # Most UI components can return a value, such as a boolean for a checkbox. + # The returned values are passed to the run method as parameters. + def ui(self, is_img2img): + pass + + # Determines when the script should be shown in the dropdown menu via the + # returned value. As an example: + # is_img2img is True if the current tab is img2img, and False if it is txt2img. + # Thus, return is_img2img to only show the script on the img2img tab. + def show(self, is_img2img): + return True + + # This is where the additional processing is implemented. The parameters include + # self, the model object "p" (a StableDiffusionProcessing class, see + # processing.py), and the parameters returned by the ui method. + # Custom functions can be defined here, and additional libraries can be imported + # to be used in processing. The return value should be a Processed object, which is + # what is returned by the process_images method. + def run(self, *args): + raise NotImplementedError() + + # The description method is currently unused. + # To add a description that appears when hovering over the title, amend the "titles" + # dict in script.js to include the script title (returned by title) as a key, and + # your description as the value. + def describe(self): + return "" + + +scripts_data = [] + + +def load_scripts(basedir): + if not os.path.exists(basedir): + return + + for filename in sorted(os.listdir(basedir)): + path = os.path.join(basedir, filename) + + if not os.path.isfile(path): + continue + + try: + with open(path, "r", encoding="utf8") as file: + text = file.read() + + from types import ModuleType + compiled = compile(text, path, 'exec') + module = ModuleType(filename) + exec(compiled, module.__dict__) + + for key, script_class in module.__dict__.items(): + if type(script_class) == type and issubclass(script_class, Script): + scripts_data.append((script_class, path)) + + except Exception: + print(f"Error loading script: {filename}", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + + +def wrap_call(func, filename, funcname, *args, default=None, **kwargs): + try: + res = func(*args, **kwargs) + return res + except Exception: + print(f"Error calling: {filename}/{funcname}", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + + return default + + +class ScriptRunner: + def __init__(self): + self.scripts = [] + + def setup_ui(self, is_img2img): + for script_class, path in scripts_data: + script = script_class() + script.filename = path + + if not script.show(is_img2img): + continue + + self.scripts.append(script) + + titles = [wrap_call(script.title, script.filename, "title") or f"{script.filename} [error]" for script in self.scripts] + + dropdown = gr.Dropdown(label="Script", choices=["None"] + titles, value="None", type="index") + inputs = [dropdown] + + for script in self.scripts: + script.args_from = len(inputs) + script.args_to = len(inputs) + + controls = wrap_call(script.ui, script.filename, "ui", is_img2img) + + if controls is None: + continue + + for control in controls: + control.custom_script_source = os.path.basename(script.filename) + control.visible = False + + inputs += controls + script.args_to = len(inputs) + + def select_script(script_index): + if 0 < script_index <= len(self.scripts): + script = self.scripts[script_index-1] + args_from = script.args_from + args_to = script.args_to + else: + args_from = 0 + args_to = 0 + + return [ui.gr_show(True if i == 0 else args_from <= i < args_to) for i in range(len(inputs))] + + dropdown.change( + fn=select_script, + inputs=[dropdown], + outputs=inputs + ) + + return inputs + + def run(self, p: StableDiffusionProcessing, *args): + script_index = args[0] + + if script_index == 0: + return None + + script = self.scripts[script_index-1] + + if script is None: + return None + + script_args = args[script.args_from:script.args_to] + processed = script.run(p, *script_args) + + shared.total_tqdm.clear() + + return processed + + def reload_sources(self): + for si, script in list(enumerate(self.scripts)): + with open(script.filename, "r", encoding="utf8") as file: + args_from = script.args_from + args_to = script.args_to + filename = script.filename + text = file.read() + + from types import ModuleType + + compiled = compile(text, filename, 'exec') + module = ModuleType(script.filename) + exec(compiled, module.__dict__) + + for key, script_class in module.__dict__.items(): + if type(script_class) == type and issubclass(script_class, Script): + self.scripts[si] = script_class() + self.scripts[si].filename = filename + self.scripts[si].args_from = args_from + self.scripts[si].args_to = args_to + +scripts_txt2img = ScriptRunner() +scripts_img2img = ScriptRunner() + +def reload_script_body_only(): + scripts_txt2img.reload_sources() + scripts_img2img.reload_sources() + + +def reload_scripts(basedir): + global scripts_txt2img, scripts_img2img + + scripts_data.clear() + load_scripts(basedir) + + scripts_txt2img = ScriptRunner() + scripts_img2img = ScriptRunner() diff --git a/modules/scunet_model.py b/modules/scunet_model.py new file mode 100644 index 0000000000000000000000000000000000000000..36a996bf0b7f2f824975595e18a9a775476377c0 --- /dev/null +++ b/modules/scunet_model.py @@ -0,0 +1,88 @@ +import os.path +import sys +import traceback + +import PIL.Image +import numpy as np +import torch +from basicsr.utils.download_util import load_file_from_url + +import modules.upscaler +from modules import devices, modelloader +from modules.scunet_model_arch import SCUNet as net + + +class UpscalerScuNET(modules.upscaler.Upscaler): + def __init__(self, dirname): + self.name = "ScuNET" + self.model_name = "ScuNET GAN" + self.model_name2 = "ScuNET PSNR" + self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/scunet_color_real_gan.pth" + self.model_url2 = "https://github.com/cszn/KAIR/releases/download/v1.0/scunet_color_real_psnr.pth" + self.user_path = dirname + super().__init__() + model_paths = self.find_models(ext_filter=[".pth"]) + scalers = [] + add_model2 = True + for file in model_paths: + if "http" in file: + name = self.model_name + else: + name = modelloader.friendly_name(file) + if name == self.model_name2 or file == self.model_url2: + add_model2 = False + try: + scaler_data = modules.upscaler.UpscalerData(name, file, self, 4) + scalers.append(scaler_data) + except Exception: + print(f"Error loading ScuNET model: {file}", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + if add_model2: + scaler_data2 = modules.upscaler.UpscalerData(self.model_name2, self.model_url2, self) + scalers.append(scaler_data2) + self.scalers = scalers + + def do_upscale(self, img: PIL.Image, selected_file): + torch.cuda.empty_cache() + + model = self.load_model(selected_file) + if model is None: + return img + + device = devices.device_scunet + img = np.array(img) + img = img[:, :, ::-1] + img = np.moveaxis(img, 2, 0) / 255 + img = torch.from_numpy(img).float() + img = img.unsqueeze(0).to(device) + + img = img.to(device) + with torch.no_grad(): + output = model(img) + output = output.squeeze().float().cpu().clamp_(0, 1).numpy() + output = 255. * np.moveaxis(output, 0, 2) + output = output.astype(np.uint8) + output = output[:, :, ::-1] + torch.cuda.empty_cache() + return PIL.Image.fromarray(output, 'RGB') + + def load_model(self, path: str): + device = devices.device_scunet + if "http" in path: + filename = load_file_from_url(url=self.model_url, model_dir=self.model_path, file_name="%s.pth" % self.name, + progress=True) + else: + filename = path + if not os.path.exists(os.path.join(self.model_path, filename)) or filename is None: + print(f"ScuNET: Unable to load model from {filename}", file=sys.stderr) + return None + + model = net(in_nc=3, config=[4, 4, 4, 4, 4, 4, 4], dim=64) + model.load_state_dict(torch.load(filename), strict=True) + model.eval() + for k, v in model.named_parameters(): + v.requires_grad = False + model = model.to(device) + + return model + diff --git a/modules/scunet_model_arch.py b/modules/scunet_model_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..43ca8d36fe57a12dcad58e8b06ee2e0774494b0e --- /dev/null +++ b/modules/scunet_model_arch.py @@ -0,0 +1,265 @@ +# -*- coding: utf-8 -*- +import numpy as np +import torch +import torch.nn as nn +from einops import rearrange +from einops.layers.torch import Rearrange +from timm.models.layers import trunc_normal_, DropPath + + +class WMSA(nn.Module): + """ Self-attention module in Swin Transformer + """ + + def __init__(self, input_dim, output_dim, head_dim, window_size, type): + super(WMSA, self).__init__() + self.input_dim = input_dim + self.output_dim = output_dim + self.head_dim = head_dim + self.scale = self.head_dim ** -0.5 + self.n_heads = input_dim // head_dim + self.window_size = window_size + self.type = type + self.embedding_layer = nn.Linear(self.input_dim, 3 * self.input_dim, bias=True) + + self.relative_position_params = nn.Parameter( + torch.zeros((2 * window_size - 1) * (2 * window_size - 1), self.n_heads)) + + self.linear = nn.Linear(self.input_dim, self.output_dim) + + trunc_normal_(self.relative_position_params, std=.02) + self.relative_position_params = torch.nn.Parameter( + self.relative_position_params.view(2 * window_size - 1, 2 * window_size - 1, self.n_heads).transpose(1, + 2).transpose( + 0, 1)) + + def generate_mask(self, h, w, p, shift): + """ generating the mask of SW-MSA + Args: + shift: shift parameters in CyclicShift. + Returns: + attn_mask: should be (1 1 w p p), + """ + # supporting square. + attn_mask = torch.zeros(h, w, p, p, p, p, dtype=torch.bool, device=self.relative_position_params.device) + if self.type == 'W': + return attn_mask + + s = p - shift + attn_mask[-1, :, :s, :, s:, :] = True + attn_mask[-1, :, s:, :, :s, :] = True + attn_mask[:, -1, :, :s, :, s:] = True + attn_mask[:, -1, :, s:, :, :s] = True + attn_mask = rearrange(attn_mask, 'w1 w2 p1 p2 p3 p4 -> 1 1 (w1 w2) (p1 p2) (p3 p4)') + return attn_mask + + def forward(self, x): + """ Forward pass of Window Multi-head Self-attention module. + Args: + x: input tensor with shape of [b h w c]; + attn_mask: attention mask, fill -inf where the value is True; + Returns: + output: tensor shape [b h w c] + """ + if self.type != 'W': x = torch.roll(x, shifts=(-(self.window_size // 2), -(self.window_size // 2)), dims=(1, 2)) + x = rearrange(x, 'b (w1 p1) (w2 p2) c -> b w1 w2 p1 p2 c', p1=self.window_size, p2=self.window_size) + h_windows = x.size(1) + w_windows = x.size(2) + # square validation + # assert h_windows == w_windows + + x = rearrange(x, 'b w1 w2 p1 p2 c -> b (w1 w2) (p1 p2) c', p1=self.window_size, p2=self.window_size) + qkv = self.embedding_layer(x) + q, k, v = rearrange(qkv, 'b nw np (threeh c) -> threeh b nw np c', c=self.head_dim).chunk(3, dim=0) + sim = torch.einsum('hbwpc,hbwqc->hbwpq', q, k) * self.scale + # Adding learnable relative embedding + sim = sim + rearrange(self.relative_embedding(), 'h p q -> h 1 1 p q') + # Using Attn Mask to distinguish different subwindows. + if self.type != 'W': + attn_mask = self.generate_mask(h_windows, w_windows, self.window_size, shift=self.window_size // 2) + sim = sim.masked_fill_(attn_mask, float("-inf")) + + probs = nn.functional.softmax(sim, dim=-1) + output = torch.einsum('hbwij,hbwjc->hbwic', probs, v) + output = rearrange(output, 'h b w p c -> b w p (h c)') + output = self.linear(output) + output = rearrange(output, 'b (w1 w2) (p1 p2) c -> b (w1 p1) (w2 p2) c', w1=h_windows, p1=self.window_size) + + if self.type != 'W': output = torch.roll(output, shifts=(self.window_size // 2, self.window_size // 2), + dims=(1, 2)) + return output + + def relative_embedding(self): + cord = torch.tensor(np.array([[i, j] for i in range(self.window_size) for j in range(self.window_size)])) + relation = cord[:, None, :] - cord[None, :, :] + self.window_size - 1 + # negative is allowed + return self.relative_position_params[:, relation[:, :, 0].long(), relation[:, :, 1].long()] + + +class Block(nn.Module): + def __init__(self, input_dim, output_dim, head_dim, window_size, drop_path, type='W', input_resolution=None): + """ SwinTransformer Block + """ + super(Block, self).__init__() + self.input_dim = input_dim + self.output_dim = output_dim + assert type in ['W', 'SW'] + self.type = type + if input_resolution <= window_size: + self.type = 'W' + + self.ln1 = nn.LayerNorm(input_dim) + self.msa = WMSA(input_dim, input_dim, head_dim, window_size, self.type) + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.ln2 = nn.LayerNorm(input_dim) + self.mlp = nn.Sequential( + nn.Linear(input_dim, 4 * input_dim), + nn.GELU(), + nn.Linear(4 * input_dim, output_dim), + ) + + def forward(self, x): + x = x + self.drop_path(self.msa(self.ln1(x))) + x = x + self.drop_path(self.mlp(self.ln2(x))) + return x + + +class ConvTransBlock(nn.Module): + def __init__(self, conv_dim, trans_dim, head_dim, window_size, drop_path, type='W', input_resolution=None): + """ SwinTransformer and Conv Block + """ + super(ConvTransBlock, self).__init__() + self.conv_dim = conv_dim + self.trans_dim = trans_dim + self.head_dim = head_dim + self.window_size = window_size + self.drop_path = drop_path + self.type = type + self.input_resolution = input_resolution + + assert self.type in ['W', 'SW'] + if self.input_resolution <= self.window_size: + self.type = 'W' + + self.trans_block = Block(self.trans_dim, self.trans_dim, self.head_dim, self.window_size, self.drop_path, + self.type, self.input_resolution) + self.conv1_1 = nn.Conv2d(self.conv_dim + self.trans_dim, self.conv_dim + self.trans_dim, 1, 1, 0, bias=True) + self.conv1_2 = nn.Conv2d(self.conv_dim + self.trans_dim, self.conv_dim + self.trans_dim, 1, 1, 0, bias=True) + + self.conv_block = nn.Sequential( + nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False), + nn.ReLU(True), + nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False) + ) + + def forward(self, x): + conv_x, trans_x = torch.split(self.conv1_1(x), (self.conv_dim, self.trans_dim), dim=1) + conv_x = self.conv_block(conv_x) + conv_x + trans_x = Rearrange('b c h w -> b h w c')(trans_x) + trans_x = self.trans_block(trans_x) + trans_x = Rearrange('b h w c -> b c h w')(trans_x) + res = self.conv1_2(torch.cat((conv_x, trans_x), dim=1)) + x = x + res + + return x + + +class SCUNet(nn.Module): + # def __init__(self, in_nc=3, config=[2, 2, 2, 2, 2, 2, 2], dim=64, drop_path_rate=0.0, input_resolution=256): + def __init__(self, in_nc=3, config=None, dim=64, drop_path_rate=0.0, input_resolution=256): + super(SCUNet, self).__init__() + if config is None: + config = [2, 2, 2, 2, 2, 2, 2] + self.config = config + self.dim = dim + self.head_dim = 32 + self.window_size = 8 + + # drop path rate for each layer + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(config))] + + self.m_head = [nn.Conv2d(in_nc, dim, 3, 1, 1, bias=False)] + + begin = 0 + self.m_down1 = [ConvTransBlock(dim // 2, dim // 2, self.head_dim, self.window_size, dpr[i + begin], + 'W' if not i % 2 else 'SW', input_resolution) + for i in range(config[0])] + \ + [nn.Conv2d(dim, 2 * dim, 2, 2, 0, bias=False)] + + begin += config[0] + self.m_down2 = [ConvTransBlock(dim, dim, self.head_dim, self.window_size, dpr[i + begin], + 'W' if not i % 2 else 'SW', input_resolution // 2) + for i in range(config[1])] + \ + [nn.Conv2d(2 * dim, 4 * dim, 2, 2, 0, bias=False)] + + begin += config[1] + self.m_down3 = [ConvTransBlock(2 * dim, 2 * dim, self.head_dim, self.window_size, dpr[i + begin], + 'W' if not i % 2 else 'SW', input_resolution // 4) + for i in range(config[2])] + \ + [nn.Conv2d(4 * dim, 8 * dim, 2, 2, 0, bias=False)] + + begin += config[2] + self.m_body = [ConvTransBlock(4 * dim, 4 * dim, self.head_dim, self.window_size, dpr[i + begin], + 'W' if not i % 2 else 'SW', input_resolution // 8) + for i in range(config[3])] + + begin += config[3] + self.m_up3 = [nn.ConvTranspose2d(8 * dim, 4 * dim, 2, 2, 0, bias=False), ] + \ + [ConvTransBlock(2 * dim, 2 * dim, self.head_dim, self.window_size, dpr[i + begin], + 'W' if not i % 2 else 'SW', input_resolution // 4) + for i in range(config[4])] + + begin += config[4] + self.m_up2 = [nn.ConvTranspose2d(4 * dim, 2 * dim, 2, 2, 0, bias=False), ] + \ + [ConvTransBlock(dim, dim, self.head_dim, self.window_size, dpr[i + begin], + 'W' if not i % 2 else 'SW', input_resolution // 2) + for i in range(config[5])] + + begin += config[5] + self.m_up1 = [nn.ConvTranspose2d(2 * dim, dim, 2, 2, 0, bias=False), ] + \ + [ConvTransBlock(dim // 2, dim // 2, self.head_dim, self.window_size, dpr[i + begin], + 'W' if not i % 2 else 'SW', input_resolution) + for i in range(config[6])] + + self.m_tail = [nn.Conv2d(dim, in_nc, 3, 1, 1, bias=False)] + + self.m_head = nn.Sequential(*self.m_head) + self.m_down1 = nn.Sequential(*self.m_down1) + self.m_down2 = nn.Sequential(*self.m_down2) + self.m_down3 = nn.Sequential(*self.m_down3) + self.m_body = nn.Sequential(*self.m_body) + self.m_up3 = nn.Sequential(*self.m_up3) + self.m_up2 = nn.Sequential(*self.m_up2) + self.m_up1 = nn.Sequential(*self.m_up1) + self.m_tail = nn.Sequential(*self.m_tail) + # self.apply(self._init_weights) + + def forward(self, x0): + + h, w = x0.size()[-2:] + paddingBottom = int(np.ceil(h / 64) * 64 - h) + paddingRight = int(np.ceil(w / 64) * 64 - w) + x0 = nn.ReplicationPad2d((0, paddingRight, 0, paddingBottom))(x0) + + x1 = self.m_head(x0) + x2 = self.m_down1(x1) + x3 = self.m_down2(x2) + x4 = self.m_down3(x3) + x = self.m_body(x4) + x = self.m_up3(x + x4) + x = self.m_up2(x + x3) + x = self.m_up1(x + x2) + x = self.m_tail(x + x1) + + x = x[..., :h, :w] + + return x + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) \ No newline at end of file diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py new file mode 100644 index 0000000000000000000000000000000000000000..c81722a0aae6bc46c83f92196e3c8ab1655a5dbe --- /dev/null +++ b/modules/sd_hijack.py @@ -0,0 +1,405 @@ +import math +import os +import sys +import traceback +import torch +import numpy as np +from torch import einsum +from torch.nn.functional import silu + +import modules.textual_inversion.textual_inversion +from modules import prompt_parser, devices, sd_hijack_optimizations, shared +from modules.shared import opts, device, cmd_opts +from modules.sd_hijack_optimizations import invokeAI_mps_available + +import ldm.modules.attention +import ldm.modules.diffusionmodules.model + +attention_CrossAttention_forward = ldm.modules.attention.CrossAttention.forward +diffusionmodules_model_nonlinearity = ldm.modules.diffusionmodules.model.nonlinearity +diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.AttnBlock.forward + +def apply_optimizations(): + undo_optimizations() + + ldm.modules.diffusionmodules.model.nonlinearity = silu + + if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (6, 0) <= torch.cuda.get_device_capability(shared.device) <= (8, 6)): + print("Applying xformers cross attention optimization.") + ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.xformers_attention_forward + ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.xformers_attnblock_forward + elif cmd_opts.opt_split_attention_v1: + print("Applying v1 cross attention optimization.") + ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1 + elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention_invokeai or not torch.cuda.is_available()): + if not invokeAI_mps_available and shared.device.type == 'mps': + print("The InvokeAI cross attention optimization for MPS requires the psutil package which is not installed.") + print("Applying v1 cross attention optimization.") + ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1 + else: + print("Applying cross attention optimization (InvokeAI).") + ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_invokeAI + elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention or torch.cuda.is_available()): + print("Applying cross attention optimization (Doggettx).") + ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward + ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.cross_attention_attnblock_forward + + +def undo_optimizations(): + from modules.hypernetworks import hypernetwork + + ldm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward + ldm.modules.diffusionmodules.model.nonlinearity = diffusionmodules_model_nonlinearity + ldm.modules.diffusionmodules.model.AttnBlock.forward = diffusionmodules_model_AttnBlock_forward + + +def get_target_prompt_token_count(token_count): + return math.ceil(max(token_count, 1) / 75) * 75 + + +class StableDiffusionModelHijack: + fixes = None + comments = [] + layers = None + circular_enabled = False + clip = None + + embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase(cmd_opts.embeddings_dir) + + def hijack(self, m): + model_embeddings = m.cond_stage_model.transformer.text_model.embeddings + + model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self) + m.cond_stage_model = FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self) + + self.clip = m.cond_stage_model + + apply_optimizations() + + def flatten(el): + flattened = [flatten(children) for children in el.children()] + res = [el] + for c in flattened: + res += c + return res + + self.layers = flatten(m) + + def undo_hijack(self, m): + if type(m.cond_stage_model) == FrozenCLIPEmbedderWithCustomWords: + m.cond_stage_model = m.cond_stage_model.wrapped + + model_embeddings = m.cond_stage_model.transformer.text_model.embeddings + if type(model_embeddings.token_embedding) == EmbeddingsWithFixes: + model_embeddings.token_embedding = model_embeddings.token_embedding.wrapped + + def apply_circular(self, enable): + if self.circular_enabled == enable: + return + + self.circular_enabled = enable + + for layer in [layer for layer in self.layers if type(layer) == torch.nn.Conv2d]: + layer.padding_mode = 'circular' if enable else 'zeros' + + def clear_comments(self): + self.comments = [] + + def tokenize(self, text): + _, remade_batch_tokens, _, _, _, token_count = self.clip.process_text([text]) + return remade_batch_tokens[0], token_count, get_target_prompt_token_count(token_count) + + +class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): + def __init__(self, wrapped, hijack): + super().__init__() + self.wrapped = wrapped + self.hijack: StableDiffusionModelHijack = hijack + self.tokenizer = wrapped.tokenizer + self.token_mults = {} + + self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ','][0] + + tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if '(' in k or ')' in k or '[' in k or ']' in k] + for text, ident in tokens_with_parens: + mult = 1.0 + for c in text: + if c == '[': + mult /= 1.1 + if c == ']': + mult *= 1.1 + if c == '(': + mult *= 1.1 + if c == ')': + mult /= 1.1 + + if mult != 1.0: + self.token_mults[ident] = mult + + def tokenize_line(self, line, used_custom_terms, hijack_comments): + id_end = self.wrapped.tokenizer.eos_token_id + + if opts.enable_emphasis: + parsed = prompt_parser.parse_prompt_attention(line) + else: + parsed = [[line, 1.0]] + + tokenized = self.wrapped.tokenizer([text for text, _ in parsed], truncation=False, add_special_tokens=False)["input_ids"] + + fixes = [] + remade_tokens = [] + multipliers = [] + last_comma = -1 + + for tokens, (text, weight) in zip(tokenized, parsed): + i = 0 + while i < len(tokens): + token = tokens[i] + + embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i) + + if token == self.comma_token: + last_comma = len(remade_tokens) + elif opts.comma_padding_backtrack != 0 and max(len(remade_tokens), 1) % 75 == 0 and last_comma != -1 and len(remade_tokens) - last_comma <= opts.comma_padding_backtrack: + last_comma += 1 + reloc_tokens = remade_tokens[last_comma:] + reloc_mults = multipliers[last_comma:] + + remade_tokens = remade_tokens[:last_comma] + length = len(remade_tokens) + + rem = int(math.ceil(length / 75)) * 75 - length + remade_tokens += [id_end] * rem + reloc_tokens + multipliers = multipliers[:last_comma] + [1.0] * rem + reloc_mults + + if embedding is None: + remade_tokens.append(token) + multipliers.append(weight) + i += 1 + else: + emb_len = int(embedding.vec.shape[0]) + iteration = len(remade_tokens) // 75 + if (len(remade_tokens) + emb_len) // 75 != iteration: + rem = (75 * (iteration + 1) - len(remade_tokens)) + remade_tokens += [id_end] * rem + multipliers += [1.0] * rem + iteration += 1 + fixes.append((iteration, (len(remade_tokens) % 75, embedding))) + remade_tokens += [0] * emb_len + multipliers += [weight] * emb_len + used_custom_terms.append((embedding.name, embedding.checksum())) + i += embedding_length_in_tokens + + token_count = len(remade_tokens) + prompt_target_length = get_target_prompt_token_count(token_count) + tokens_to_add = prompt_target_length - len(remade_tokens) + + remade_tokens = remade_tokens + [id_end] * tokens_to_add + multipliers = multipliers + [1.0] * tokens_to_add + + return remade_tokens, fixes, multipliers, token_count + + def process_text(self, texts): + used_custom_terms = [] + remade_batch_tokens = [] + hijack_comments = [] + hijack_fixes = [] + token_count = 0 + + cache = {} + batch_multipliers = [] + for line in texts: + if line in cache: + remade_tokens, fixes, multipliers = cache[line] + else: + remade_tokens, fixes, multipliers, current_token_count = self.tokenize_line(line, used_custom_terms, hijack_comments) + token_count = max(current_token_count, token_count) + + cache[line] = (remade_tokens, fixes, multipliers) + + remade_batch_tokens.append(remade_tokens) + hijack_fixes.append(fixes) + batch_multipliers.append(multipliers) + + return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count + + + def process_text_old(self, text): + id_start = self.wrapped.tokenizer.bos_token_id + id_end = self.wrapped.tokenizer.eos_token_id + maxlen = self.wrapped.max_length # you get to stay at 77 + used_custom_terms = [] + remade_batch_tokens = [] + overflowing_words = [] + hijack_comments = [] + hijack_fixes = [] + token_count = 0 + + cache = {} + batch_tokens = self.wrapped.tokenizer(text, truncation=False, add_special_tokens=False)["input_ids"] + batch_multipliers = [] + for tokens in batch_tokens: + tuple_tokens = tuple(tokens) + + if tuple_tokens in cache: + remade_tokens, fixes, multipliers = cache[tuple_tokens] + else: + fixes = [] + remade_tokens = [] + multipliers = [] + mult = 1.0 + + i = 0 + while i < len(tokens): + token = tokens[i] + + embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i) + + mult_change = self.token_mults.get(token) if opts.enable_emphasis else None + if mult_change is not None: + mult *= mult_change + i += 1 + elif embedding is None: + remade_tokens.append(token) + multipliers.append(mult) + i += 1 + else: + emb_len = int(embedding.vec.shape[0]) + fixes.append((len(remade_tokens), embedding)) + remade_tokens += [0] * emb_len + multipliers += [mult] * emb_len + used_custom_terms.append((embedding.name, embedding.checksum())) + i += embedding_length_in_tokens + + if len(remade_tokens) > maxlen - 2: + vocab = {v: k for k, v in self.wrapped.tokenizer.get_vocab().items()} + ovf = remade_tokens[maxlen - 2:] + overflowing_words = [vocab.get(int(x), "") for x in ovf] + overflowing_text = self.wrapped.tokenizer.convert_tokens_to_string(''.join(overflowing_words)) + hijack_comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n") + + token_count = len(remade_tokens) + remade_tokens = remade_tokens + [id_end] * (maxlen - 2 - len(remade_tokens)) + remade_tokens = [id_start] + remade_tokens[0:maxlen-2] + [id_end] + cache[tuple_tokens] = (remade_tokens, fixes, multipliers) + + multipliers = multipliers + [1.0] * (maxlen - 2 - len(multipliers)) + multipliers = [1.0] + multipliers[0:maxlen - 2] + [1.0] + + remade_batch_tokens.append(remade_tokens) + hijack_fixes.append(fixes) + batch_multipliers.append(multipliers) + return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count + + def forward(self, text): + use_old = opts.use_old_emphasis_implementation + if use_old: + batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text_old(text) + else: + batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text(text) + + self.hijack.comments += hijack_comments + + if len(used_custom_terms) > 0: + self.hijack.comments.append("Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms])) + + if use_old: + self.hijack.fixes = hijack_fixes + return self.process_tokens(remade_batch_tokens, batch_multipliers) + + z = None + i = 0 + while max(map(len, remade_batch_tokens)) != 0: + rem_tokens = [x[75:] for x in remade_batch_tokens] + rem_multipliers = [x[75:] for x in batch_multipliers] + + self.hijack.fixes = [] + for unfiltered in hijack_fixes: + fixes = [] + for fix in unfiltered: + if fix[0] == i: + fixes.append(fix[1]) + self.hijack.fixes.append(fixes) + + tokens = [] + multipliers = [] + for j in range(len(remade_batch_tokens)): + if len(remade_batch_tokens[j]) > 0: + tokens.append(remade_batch_tokens[j][:75]) + multipliers.append(batch_multipliers[j][:75]) + else: + tokens.append([self.wrapped.tokenizer.eos_token_id] * 75) + multipliers.append([1.0] * 75) + + z1 = self.process_tokens(tokens, multipliers) + z = z1 if z is None else torch.cat((z, z1), axis=-2) + + remade_batch_tokens = rem_tokens + batch_multipliers = rem_multipliers + i += 1 + + return z + + + def process_tokens(self, remade_batch_tokens, batch_multipliers): + if not opts.use_old_emphasis_implementation: + remade_batch_tokens = [[self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in remade_batch_tokens] + batch_multipliers = [[1.0] + x[:75] + [1.0] for x in batch_multipliers] + + tokens = torch.asarray(remade_batch_tokens).to(device) + outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers) + + if opts.CLIP_stop_at_last_layers > 1: + z = outputs.hidden_states[-opts.CLIP_stop_at_last_layers] + z = self.wrapped.transformer.text_model.final_layer_norm(z) + else: + z = outputs.last_hidden_state + + # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise + batch_multipliers_of_same_length = [x + [1.0] * (75 - len(x)) for x in batch_multipliers] + batch_multipliers = torch.asarray(batch_multipliers_of_same_length).to(device) + original_mean = z.mean() + z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape) + new_mean = z.mean() + z *= original_mean / new_mean + + return z + + +class EmbeddingsWithFixes(torch.nn.Module): + def __init__(self, wrapped, embeddings): + super().__init__() + self.wrapped = wrapped + self.embeddings = embeddings + + def forward(self, input_ids): + batch_fixes = self.embeddings.fixes + self.embeddings.fixes = None + + inputs_embeds = self.wrapped(input_ids) + + if batch_fixes is None or len(batch_fixes) == 0 or max([len(x) for x in batch_fixes]) == 0: + return inputs_embeds + + vecs = [] + for fixes, tensor in zip(batch_fixes, inputs_embeds): + for offset, embedding in fixes: + emb = embedding.vec + emb_len = min(tensor.shape[0]-offset-1, emb.shape[0]) + tensor = torch.cat([tensor[0:offset+1], emb[0:emb_len], tensor[offset+1+emb_len:]]) + + vecs.append(tensor) + + return torch.stack(vecs) + + +def add_circular_option_to_conv_2d(): + conv2d_constructor = torch.nn.Conv2d.__init__ + + def conv2d_constructor_circular(self, *args, **kwargs): + return conv2d_constructor(self, *args, padding_mode='circular', **kwargs) + + torch.nn.Conv2d.__init__ = conv2d_constructor_circular + + +model_hijack = StableDiffusionModelHijack() diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py new file mode 100644 index 0000000000000000000000000000000000000000..79405525e6c1c20c3f99e11c1abf9bfd2c8a1202 --- /dev/null +++ b/modules/sd_hijack_optimizations.py @@ -0,0 +1,306 @@ +import math +import sys +import traceback +import importlib + +import torch +from torch import einsum + +from ldm.util import default +from einops import rearrange + +from modules import shared +from modules.hypernetworks import hypernetwork + + +if shared.cmd_opts.xformers or shared.cmd_opts.force_enable_xformers: + try: + import xformers.ops + shared.xformers_available = True + except Exception: + print("Cannot import xformers", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + + +# see https://github.com/basujindal/stable-diffusion/pull/117 for discussion +def split_cross_attention_forward_v1(self, x, context=None, mask=None): + h = self.heads + + q_in = self.to_q(x) + context = default(context, x) + + context_k, context_v = hypernetwork.apply_hypernetwork(shared.loaded_hypernetwork, context) + k_in = self.to_k(context_k) + v_in = self.to_v(context_v) + del context, context_k, context_v, x + + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in)) + del q_in, k_in, v_in + + r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device) + for i in range(0, q.shape[0], 2): + end = i + 2 + s1 = einsum('b i d, b j d -> b i j', q[i:end], k[i:end]) + s1 *= self.scale + + s2 = s1.softmax(dim=-1) + del s1 + + r1[i:end] = einsum('b i j, b j d -> b i d', s2, v[i:end]) + del s2 + del q, k, v + + r2 = rearrange(r1, '(b h) n d -> b n (h d)', h=h) + del r1 + + return self.to_out(r2) + + +# taken from https://github.com/Doggettx/stable-diffusion and modified +def split_cross_attention_forward(self, x, context=None, mask=None): + h = self.heads + + q_in = self.to_q(x) + context = default(context, x) + + context_k, context_v = hypernetwork.apply_hypernetwork(shared.loaded_hypernetwork, context) + k_in = self.to_k(context_k) + v_in = self.to_v(context_v) + + k_in *= self.scale + + del context, x + + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in)) + del q_in, k_in, v_in + + r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) + + stats = torch.cuda.memory_stats(q.device) + mem_active = stats['active_bytes.all.current'] + mem_reserved = stats['reserved_bytes.all.current'] + mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device()) + mem_free_torch = mem_reserved - mem_active + mem_free_total = mem_free_cuda + mem_free_torch + + gb = 1024 ** 3 + tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size() + modifier = 3 if q.element_size() == 2 else 2.5 + mem_required = tensor_size * modifier + steps = 1 + + if mem_required > mem_free_total: + steps = 2 ** (math.ceil(math.log(mem_required / mem_free_total, 2))) + # print(f"Expected tensor size:{tensor_size/gb:0.1f}GB, cuda free:{mem_free_cuda/gb:0.1f}GB " + # f"torch free:{mem_free_torch/gb:0.1f} total:{mem_free_total/gb:0.1f} steps:{steps}") + + if steps > 64: + max_res = math.floor(math.sqrt(math.sqrt(mem_free_total / 2.5)) / 8) * 64 + raise RuntimeError(f'Not enough memory, use lower resolution (max approx. {max_res}x{max_res}). ' + f'Need: {mem_required / 64 / gb:0.1f}GB free, Have:{mem_free_total / gb:0.1f}GB free') + + slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1] + for i in range(0, q.shape[1], slice_size): + end = i + slice_size + s1 = einsum('b i d, b j d -> b i j', q[:, i:end], k) + + s2 = s1.softmax(dim=-1, dtype=q.dtype) + del s1 + + r1[:, i:end] = einsum('b i j, b j d -> b i d', s2, v) + del s2 + + del q, k, v + + r2 = rearrange(r1, '(b h) n d -> b n (h d)', h=h) + del r1 + + return self.to_out(r2) + + +def check_for_psutil(): + try: + spec = importlib.util.find_spec('psutil') + return spec is not None + except ModuleNotFoundError: + return False + +invokeAI_mps_available = check_for_psutil() + +# -- Taken from https://github.com/invoke-ai/InvokeAI -- +if invokeAI_mps_available: + import psutil + mem_total_gb = psutil.virtual_memory().total // (1 << 30) + +def einsum_op_compvis(q, k, v): + s = einsum('b i d, b j d -> b i j', q, k) + s = s.softmax(dim=-1, dtype=s.dtype) + return einsum('b i j, b j d -> b i d', s, v) + +def einsum_op_slice_0(q, k, v, slice_size): + r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) + for i in range(0, q.shape[0], slice_size): + end = i + slice_size + r[i:end] = einsum_op_compvis(q[i:end], k[i:end], v[i:end]) + return r + +def einsum_op_slice_1(q, k, v, slice_size): + r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) + for i in range(0, q.shape[1], slice_size): + end = i + slice_size + r[:, i:end] = einsum_op_compvis(q[:, i:end], k, v) + return r + +def einsum_op_mps_v1(q, k, v): + if q.shape[1] <= 4096: # (512x512) max q.shape[1]: 4096 + return einsum_op_compvis(q, k, v) + else: + slice_size = math.floor(2**30 / (q.shape[0] * q.shape[1])) + return einsum_op_slice_1(q, k, v, slice_size) + +def einsum_op_mps_v2(q, k, v): + if mem_total_gb > 8 and q.shape[1] <= 4096: + return einsum_op_compvis(q, k, v) + else: + return einsum_op_slice_0(q, k, v, 1) + +def einsum_op_tensor_mem(q, k, v, max_tensor_mb): + size_mb = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size() // (1 << 20) + if size_mb <= max_tensor_mb: + return einsum_op_compvis(q, k, v) + div = 1 << int((size_mb - 1) / max_tensor_mb).bit_length() + if div <= q.shape[0]: + return einsum_op_slice_0(q, k, v, q.shape[0] // div) + return einsum_op_slice_1(q, k, v, max(q.shape[1] // div, 1)) + +def einsum_op_cuda(q, k, v): + stats = torch.cuda.memory_stats(q.device) + mem_active = stats['active_bytes.all.current'] + mem_reserved = stats['reserved_bytes.all.current'] + mem_free_cuda, _ = torch.cuda.mem_get_info(q.device) + mem_free_torch = mem_reserved - mem_active + mem_free_total = mem_free_cuda + mem_free_torch + # Divide factor of safety as there's copying and fragmentation + return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20)) + +def einsum_op(q, k, v): + if q.device.type == 'cuda': + return einsum_op_cuda(q, k, v) + + if q.device.type == 'mps': + if mem_total_gb >= 32: + return einsum_op_mps_v1(q, k, v) + return einsum_op_mps_v2(q, k, v) + + # Smaller slices are faster due to L2/L3/SLC caches. + # Tested on i7 with 8MB L3 cache. + return einsum_op_tensor_mem(q, k, v, 32) + +def split_cross_attention_forward_invokeAI(self, x, context=None, mask=None): + h = self.heads + + q = self.to_q(x) + context = default(context, x) + + context_k, context_v = hypernetwork.apply_hypernetwork(shared.loaded_hypernetwork, context) + k = self.to_k(context_k) * self.scale + v = self.to_v(context_v) + del context, context_k, context_v, x + + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v)) + r = einsum_op(q, k, v) + return self.to_out(rearrange(r, '(b h) n d -> b n (h d)', h=h)) + +# -- End of code from https://github.com/invoke-ai/InvokeAI -- + +def xformers_attention_forward(self, x, context=None, mask=None): + h = self.heads + q_in = self.to_q(x) + context = default(context, x) + + context_k, context_v = hypernetwork.apply_hypernetwork(shared.loaded_hypernetwork, context) + k_in = self.to_k(context_k) + v_in = self.to_v(context_v) + + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b n h d', h=h), (q_in, k_in, v_in)) + del q_in, k_in, v_in + out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=None) + + out = rearrange(out, 'b n h d -> b n (h d)', h=h) + return self.to_out(out) + +def cross_attention_attnblock_forward(self, x): + h_ = x + h_ = self.norm(h_) + q1 = self.q(h_) + k1 = self.k(h_) + v = self.v(h_) + + # compute attention + b, c, h, w = q1.shape + + q2 = q1.reshape(b, c, h*w) + del q1 + + q = q2.permute(0, 2, 1) # b,hw,c + del q2 + + k = k1.reshape(b, c, h*w) # b,c,hw + del k1 + + h_ = torch.zeros_like(k, device=q.device) + + stats = torch.cuda.memory_stats(q.device) + mem_active = stats['active_bytes.all.current'] + mem_reserved = stats['reserved_bytes.all.current'] + mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device()) + mem_free_torch = mem_reserved - mem_active + mem_free_total = mem_free_cuda + mem_free_torch + + tensor_size = q.shape[0] * q.shape[1] * k.shape[2] * q.element_size() + mem_required = tensor_size * 2.5 + steps = 1 + + if mem_required > mem_free_total: + steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2))) + + slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1] + for i in range(0, q.shape[1], slice_size): + end = i + slice_size + + w1 = torch.bmm(q[:, i:end], k) # b,hw,hw w[b,i,j]=sum_c q[b,i,c]k[b,c,j] + w2 = w1 * (int(c)**(-0.5)) + del w1 + w3 = torch.nn.functional.softmax(w2, dim=2, dtype=q.dtype) + del w2 + + # attend to values + v1 = v.reshape(b, c, h*w) + w4 = w3.permute(0, 2, 1) # b,hw,hw (first hw of k, second of q) + del w3 + + h_[:, :, i:end] = torch.bmm(v1, w4) # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j] + del v1, w4 + + h2 = h_.reshape(b, c, h, w) + del h_ + + h3 = self.proj_out(h2) + del h2 + + h3 += x + + return h3 + +def xformers_attnblock_forward(self, x): + try: + h_ = x + h_ = self.norm(h_) + q1 = self.q(h_).contiguous() + k1 = self.k(h_).contiguous() + v = self.v(h_).contiguous() + out = xformers.ops.memory_efficient_attention(q1, k1, v) + out = self.proj_out(out) + return x + out + except NotImplementedError: + return cross_attention_attnblock_forward(self, x) diff --git a/modules/sd_models.py b/modules/sd_models.py new file mode 100644 index 0000000000000000000000000000000000000000..0a55b4c3297e260c65b2566bd19fbdf5f5111e27 --- /dev/null +++ b/modules/sd_models.py @@ -0,0 +1,223 @@ +import glob +import os.path +import sys +from collections import namedtuple +import torch +from omegaconf import OmegaConf + +from ldm.util import instantiate_from_config + +from modules import shared, modelloader, devices +from modules.paths import models_path + +model_dir = "Stable-diffusion" +model_path = os.path.abspath(os.path.join(models_path, model_dir)) + +CheckpointInfo = namedtuple("CheckpointInfo", ['filename', 'title', 'hash', 'model_name', 'config']) +checkpoints_list = {} + +try: + # this silences the annoying "Some weights of the model checkpoint were not used when initializing..." message at start. + + from transformers import logging + + logging.set_verbosity_error() +except Exception: + pass + + +def setup_model(): + if not os.path.exists(model_path): + os.makedirs(model_path) + + list_models() + + +def checkpoint_tiles(): + return sorted([x.title for x in checkpoints_list.values()]) + + +def list_models(): + checkpoints_list.clear() + model_list = modelloader.load_models(model_path=model_path, command_path=shared.cmd_opts.ckpt_dir, ext_filter=[".ckpt"]) + + def modeltitle(path, shorthash): + abspath = os.path.abspath(path) + + if shared.cmd_opts.ckpt_dir is not None and abspath.startswith(shared.cmd_opts.ckpt_dir): + name = abspath.replace(shared.cmd_opts.ckpt_dir, '') + elif abspath.startswith(model_path): + name = abspath.replace(model_path, '') + else: + name = os.path.basename(path) + + if name.startswith("\\") or name.startswith("/"): + name = name[1:] + + shortname = os.path.splitext(name.replace("/", "_").replace("\\", "_"))[0] + + return f'{name} [{shorthash}]', shortname + + cmd_ckpt = shared.cmd_opts.ckpt + if os.path.exists(cmd_ckpt): + h = model_hash(cmd_ckpt) + title, short_model_name = modeltitle(cmd_ckpt, h) + checkpoints_list[title] = CheckpointInfo(cmd_ckpt, title, h, short_model_name, shared.cmd_opts.config) + shared.opts.data['sd_model_checkpoint'] = title + elif cmd_ckpt is not None and cmd_ckpt != shared.default_sd_model_file: + print(f"Checkpoint in --ckpt argument not found (Possible it was moved to {model_path}: {cmd_ckpt}", file=sys.stderr) + for filename in model_list: + h = model_hash(filename) + title, short_model_name = modeltitle(filename, h) + + basename, _ = os.path.splitext(filename) + config = basename + ".yaml" + if not os.path.exists(config): + config = shared.cmd_opts.config + + checkpoints_list[title] = CheckpointInfo(filename, title, h, short_model_name, config) + + +def get_closet_checkpoint_match(searchString): + applicable = sorted([info for info in checkpoints_list.values() if searchString in info.title], key = lambda x:len(x.title)) + if len(applicable) > 0: + return applicable[0] + return None + + +def model_hash(filename): + try: + with open(filename, "rb") as file: + import hashlib + m = hashlib.sha256() + + file.seek(0x100000) + m.update(file.read(0x10000)) + return m.hexdigest()[0:8] + except FileNotFoundError: + return 'NOFILE' + + +def select_checkpoint(): + model_checkpoint = shared.opts.sd_model_checkpoint + checkpoint_info = checkpoints_list.get(model_checkpoint, None) + if checkpoint_info is not None: + return checkpoint_info + + if len(checkpoints_list) == 0: + print(f"No checkpoints found. When searching for checkpoints, looked at:", file=sys.stderr) + if shared.cmd_opts.ckpt is not None: + print(f" - file {os.path.abspath(shared.cmd_opts.ckpt)}", file=sys.stderr) + print(f" - directory {model_path}", file=sys.stderr) + if shared.cmd_opts.ckpt_dir is not None: + print(f" - directory {os.path.abspath(shared.cmd_opts.ckpt_dir)}", file=sys.stderr) + print(f"Can't run without a checkpoint. Find and place a .ckpt file into any of those locations. The program will exit.", file=sys.stderr) + exit(1) + + checkpoint_info = next(iter(checkpoints_list.values())) + if model_checkpoint is not None: + print(f"Checkpoint {model_checkpoint} not found; loading fallback {checkpoint_info.title}", file=sys.stderr) + + return checkpoint_info + + +def get_state_dict_from_checkpoint(pl_sd): + if "state_dict" in pl_sd: + return pl_sd["state_dict"] + + return pl_sd + + +def load_model_weights(model, checkpoint_info): + checkpoint_file = checkpoint_info.filename + sd_model_hash = checkpoint_info.hash + + print(f"Loading weights [{sd_model_hash}] from {checkpoint_file}") + + pl_sd = torch.load(checkpoint_file, map_location="cpu") + if "global_step" in pl_sd: + print(f"Global Step: {pl_sd['global_step']}") + + sd = get_state_dict_from_checkpoint(pl_sd) + + model.load_state_dict(sd, strict=False) + + if shared.cmd_opts.opt_channelslast: + model.to(memory_format=torch.channels_last) + + if not shared.cmd_opts.no_half: + model.half() + + devices.dtype = torch.float32 if shared.cmd_opts.no_half else torch.float16 + devices.dtype_vae = torch.float32 if shared.cmd_opts.no_half or shared.cmd_opts.no_half_vae else torch.float16 + + vae_file = os.path.splitext(checkpoint_file)[0] + ".vae.pt" + + if not os.path.exists(vae_file) and shared.cmd_opts.vae_path is not None: + vae_file = shared.cmd_opts.vae_path + + if os.path.exists(vae_file): + print(f"Loading VAE weights from: {vae_file}") + vae_ckpt = torch.load(vae_file, map_location="cpu") + vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"} + + model.first_stage_model.load_state_dict(vae_dict) + + model.first_stage_model.to(devices.dtype_vae) + + model.sd_model_hash = sd_model_hash + model.sd_model_checkpoint = checkpoint_file + model.sd_checkpoint_info = checkpoint_info + + +def load_model(): + from modules import lowvram, sd_hijack + checkpoint_info = select_checkpoint() + + if checkpoint_info.config != shared.cmd_opts.config: + print(f"Loading config from: {checkpoint_info.config}") + + sd_config = OmegaConf.load(checkpoint_info.config) + sd_model = instantiate_from_config(sd_config.model) + load_model_weights(sd_model, checkpoint_info) + + if shared.cmd_opts.lowvram or shared.cmd_opts.medvram: + lowvram.setup_for_low_vram(sd_model, shared.cmd_opts.medvram) + else: + sd_model.to(shared.device) + + sd_hijack.model_hijack.hijack(sd_model) + + sd_model.eval() + + print(f"Model loaded.") + return sd_model + + +def reload_model_weights(sd_model, info=None): + from modules import lowvram, devices, sd_hijack + checkpoint_info = info or select_checkpoint() + + if sd_model.sd_model_checkpoint == checkpoint_info.filename: + return + + if sd_model.sd_checkpoint_info.config != checkpoint_info.config: + shared.sd_model = load_model() + return shared.sd_model + + if shared.cmd_opts.lowvram or shared.cmd_opts.medvram: + lowvram.send_everything_to_cpu() + else: + sd_model.to(devices.cpu) + + sd_hijack.model_hijack.undo_hijack(sd_model) + + load_model_weights(sd_model, checkpoint_info) + + sd_hijack.model_hijack.hijack(sd_model) + + if not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram: + sd_model.to(devices.device) + + print(f"Weights loaded.") + return sd_model diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py new file mode 100644 index 0000000000000000000000000000000000000000..20309e06b4a225dea9ba35b0c64c3b89c2c2cb0d --- /dev/null +++ b/modules/sd_samplers.py @@ -0,0 +1,411 @@ +from collections import namedtuple +import numpy as np +import torch +import tqdm +from PIL import Image +import inspect +import k_diffusion.sampling +import ldm.models.diffusion.ddim +import ldm.models.diffusion.plms +from modules import prompt_parser, devices, processing + +from modules.shared import opts, cmd_opts, state +import modules.shared as shared + + +SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options']) + +samplers_k_diffusion = [ + ('Euler a', 'sample_euler_ancestral', ['k_euler_a'], {}), + ('Euler', 'sample_euler', ['k_euler'], {}), + ('LMS', 'sample_lms', ['k_lms'], {}), + ('Heun', 'sample_heun', ['k_heun'], {}), + ('DPM2', 'sample_dpm_2', ['k_dpm_2'], {}), + ('DPM2 a', 'sample_dpm_2_ancestral', ['k_dpm_2_a'], {}), + ('DPM fast', 'sample_dpm_fast', ['k_dpm_fast'], {}), + ('DPM adaptive', 'sample_dpm_adaptive', ['k_dpm_ad'], {}), + ('LMS Karras', 'sample_lms', ['k_lms_ka'], {'scheduler': 'karras'}), + ('DPM2 Karras', 'sample_dpm_2', ['k_dpm_2_ka'], {'scheduler': 'karras'}), + ('DPM2 a Karras', 'sample_dpm_2_ancestral', ['k_dpm_2_a_ka'], {'scheduler': 'karras'}), +] + +samplers_data_k_diffusion = [ + SamplerData(label, lambda model, funcname=funcname: KDiffusionSampler(funcname, model), aliases, options) + for label, funcname, aliases, options in samplers_k_diffusion + if hasattr(k_diffusion.sampling, funcname) +] + +all_samplers = [ + *samplers_data_k_diffusion, + SamplerData('DDIM', lambda model: VanillaStableDiffusionSampler(ldm.models.diffusion.ddim.DDIMSampler, model), [], {}), + SamplerData('PLMS', lambda model: VanillaStableDiffusionSampler(ldm.models.diffusion.plms.PLMSSampler, model), [], {}), +] + +samplers = [] +samplers_for_img2img = [] + + +def create_sampler_with_index(list_of_configs, index, model): + config = list_of_configs[index] + sampler = config.constructor(model) + sampler.config = config + + return sampler + + +def set_samplers(): + global samplers, samplers_for_img2img + + hidden = set(opts.hide_samplers) + hidden_img2img = set(opts.hide_samplers + ['PLMS']) + + samplers = [x for x in all_samplers if x.name not in hidden] + samplers_for_img2img = [x for x in all_samplers if x.name not in hidden_img2img] + + +set_samplers() + +sampler_extra_params = { + 'sample_euler': ['s_churn', 's_tmin', 's_tmax', 's_noise'], + 'sample_heun': ['s_churn', 's_tmin', 's_tmax', 's_noise'], + 'sample_dpm_2': ['s_churn', 's_tmin', 's_tmax', 's_noise'], +} + +def setup_img2img_steps(p, steps=None): + if opts.img2img_fix_steps or steps is not None: + steps = int((steps or p.steps) / min(p.denoising_strength, 0.999)) if p.denoising_strength > 0 else 0 + t_enc = p.steps - 1 + else: + steps = p.steps + t_enc = int(min(p.denoising_strength, 0.999) * steps) + + return steps, t_enc + + +def sample_to_image(samples): + x_sample = processing.decode_first_stage(shared.sd_model, samples[0:1])[0] + x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0) + x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) + x_sample = x_sample.astype(np.uint8) + return Image.fromarray(x_sample) + + +def store_latent(decoded): + state.current_latent = decoded + + if opts.show_progress_every_n_steps > 0 and shared.state.sampling_step % opts.show_progress_every_n_steps == 0: + if not shared.parallel_processing_allowed: + shared.state.current_image = sample_to_image(decoded) + + + +def extended_tdqm(sequence, *args, desc=None, **kwargs): + state.sampling_steps = len(sequence) + state.sampling_step = 0 + + seq = sequence if cmd_opts.disable_console_progressbars else tqdm.tqdm(sequence, *args, desc=state.job, file=shared.progress_print_out, **kwargs) + + for x in seq: + if state.interrupted or state.skipped: + break + + yield x + + state.sampling_step += 1 + shared.total_tqdm.update() + + +ldm.models.diffusion.ddim.tqdm = lambda *args, desc=None, **kwargs: extended_tdqm(*args, desc=desc, **kwargs) +ldm.models.diffusion.plms.tqdm = lambda *args, desc=None, **kwargs: extended_tdqm(*args, desc=desc, **kwargs) + + +class VanillaStableDiffusionSampler: + def __init__(self, constructor, sd_model): + self.sampler = constructor(sd_model) + self.orig_p_sample_ddim = self.sampler.p_sample_ddim if hasattr(self.sampler, 'p_sample_ddim') else self.sampler.p_sample_plms + self.mask = None + self.nmask = None + self.init_latent = None + self.sampler_noises = None + self.step = 0 + self.eta = None + self.default_eta = 0.0 + self.config = None + + def number_of_needed_noises(self, p): + return 0 + + def p_sample_ddim_hook(self, x_dec, cond, ts, unconditional_conditioning, *args, **kwargs): + conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) + unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step) + + assert all([len(conds) == 1 for conds in conds_list]), 'composition via AND is not supported for DDIM/PLMS samplers' + cond = tensor + + # for DDIM, shapes must match, we can't just process cond and uncond independently; + # filling unconditional_conditioning with repeats of the last vector to match length is + # not 100% correct but should work well enough + if unconditional_conditioning.shape[1] < cond.shape[1]: + last_vector = unconditional_conditioning[:, -1:] + last_vector_repeated = last_vector.repeat([1, cond.shape[1] - unconditional_conditioning.shape[1], 1]) + unconditional_conditioning = torch.hstack([unconditional_conditioning, last_vector_repeated]) + elif unconditional_conditioning.shape[1] > cond.shape[1]: + unconditional_conditioning = unconditional_conditioning[:, :cond.shape[1]] + + if self.mask is not None: + img_orig = self.sampler.model.q_sample(self.init_latent, ts) + x_dec = img_orig * self.mask + self.nmask * x_dec + + res = self.orig_p_sample_ddim(x_dec, cond, ts, unconditional_conditioning=unconditional_conditioning, *args, **kwargs) + + if self.mask is not None: + store_latent(self.init_latent * self.mask + self.nmask * res[1]) + else: + store_latent(res[1]) + + self.step += 1 + return res + + def initialize(self, p): + self.eta = p.eta if p.eta is not None else opts.eta_ddim + + for fieldname in ['p_sample_ddim', 'p_sample_plms']: + if hasattr(self.sampler, fieldname): + setattr(self.sampler, fieldname, self.p_sample_ddim_hook) + + self.mask = p.mask if hasattr(p, 'mask') else None + self.nmask = p.nmask if hasattr(p, 'nmask') else None + + def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None): + steps, t_enc = setup_img2img_steps(p, steps) + + self.initialize(p) + + # existing code fails with certain step counts, like 9 + try: + self.sampler.make_schedule(ddim_num_steps=steps, ddim_eta=self.eta, ddim_discretize=p.ddim_discretize, verbose=False) + except Exception: + self.sampler.make_schedule(ddim_num_steps=steps+1, ddim_eta=self.eta, ddim_discretize=p.ddim_discretize, verbose=False) + + x1 = self.sampler.stochastic_encode(x, torch.tensor([t_enc] * int(x.shape[0])).to(shared.device), noise=noise) + + self.init_latent = x + self.step = 0 + + samples = self.sampler.decode(x1, conditioning, t_enc, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning) + + return samples + + def sample(self, p, x, conditioning, unconditional_conditioning, steps=None): + self.initialize(p) + + self.init_latent = None + self.step = 0 + + steps = steps or p.steps + + # existing code fails with certain step counts, like 9 + try: + samples_ddim, _ = self.sampler.sample(S=steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta) + except Exception: + samples_ddim, _ = self.sampler.sample(S=steps+1, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta) + + return samples_ddim + + +class CFGDenoiser(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.inner_model = model + self.mask = None + self.nmask = None + self.init_latent = None + self.step = 0 + + def forward(self, x, sigma, uncond, cond, cond_scale): + conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) + uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step) + + batch_size = len(conds_list) + repeats = [len(conds_list[i]) for i in range(batch_size)] + + x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x]) + sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma]) + + if tensor.shape[1] == uncond.shape[1]: + cond_in = torch.cat([tensor, uncond]) + + if shared.batch_cond_uncond: + x_out = self.inner_model(x_in, sigma_in, cond=cond_in) + else: + x_out = torch.zeros_like(x_in) + for batch_offset in range(0, x_out.shape[0], batch_size): + a = batch_offset + b = a + batch_size + x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=cond_in[a:b]) + else: + x_out = torch.zeros_like(x_in) + batch_size = batch_size*2 if shared.batch_cond_uncond else batch_size + for batch_offset in range(0, tensor.shape[0], batch_size): + a = batch_offset + b = min(a + batch_size, tensor.shape[0]) + x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=tensor[a:b]) + + x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond=uncond) + + denoised_uncond = x_out[-uncond.shape[0]:] + denoised = torch.clone(denoised_uncond) + + for i, conds in enumerate(conds_list): + for cond_index, weight in conds: + denoised[i] += (x_out[cond_index] - denoised_uncond[i]) * (weight * cond_scale) + + if self.mask is not None: + denoised = self.init_latent * self.mask + self.nmask * denoised + + self.step += 1 + + return denoised + + +def extended_trange(sampler, count, *args, **kwargs): + state.sampling_steps = count + state.sampling_step = 0 + + seq = range(count) if cmd_opts.disable_console_progressbars else tqdm.trange(count, *args, desc=state.job, file=shared.progress_print_out, **kwargs) + + for x in seq: + if state.interrupted or state.skipped: + break + + if sampler.stop_at is not None and x > sampler.stop_at: + break + + yield x + + state.sampling_step += 1 + shared.total_tqdm.update() + + +class TorchHijack: + def __init__(self, kdiff_sampler): + self.kdiff_sampler = kdiff_sampler + + def __getattr__(self, item): + if item == 'randn_like': + return self.kdiff_sampler.randn_like + + if hasattr(torch, item): + return getattr(torch, item) + + raise AttributeError("'{}' object has no attribute '{}'".format(type(self).__name__, item)) + + +class KDiffusionSampler: + def __init__(self, funcname, sd_model): + self.model_wrap = k_diffusion.external.CompVisDenoiser(sd_model, quantize=shared.opts.enable_quantization) + self.funcname = funcname + self.func = getattr(k_diffusion.sampling, self.funcname) + self.extra_params = sampler_extra_params.get(funcname, []) + self.model_wrap_cfg = CFGDenoiser(self.model_wrap) + self.sampler_noises = None + self.sampler_noise_index = 0 + self.stop_at = None + self.eta = None + self.default_eta = 1.0 + self.config = None + + def callback_state(self, d): + store_latent(d["denoised"]) + + def number_of_needed_noises(self, p): + return p.steps + + def randn_like(self, x): + noise = self.sampler_noises[self.sampler_noise_index] if self.sampler_noises is not None and self.sampler_noise_index < len(self.sampler_noises) else None + + if noise is not None and x.shape == noise.shape: + res = noise + else: + res = torch.randn_like(x) + + self.sampler_noise_index += 1 + return res + + def initialize(self, p): + self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None + self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None + self.model_wrap.step = 0 + self.sampler_noise_index = 0 + self.eta = p.eta or opts.eta_ancestral + + if hasattr(k_diffusion.sampling, 'trange'): + k_diffusion.sampling.trange = lambda *args, **kwargs: extended_trange(self, *args, **kwargs) + + if self.sampler_noises is not None: + k_diffusion.sampling.torch = TorchHijack(self) + + extra_params_kwargs = {} + for param_name in self.extra_params: + if hasattr(p, param_name) and param_name in inspect.signature(self.func).parameters: + extra_params_kwargs[param_name] = getattr(p, param_name) + + if 'eta' in inspect.signature(self.func).parameters: + extra_params_kwargs['eta'] = self.eta + + return extra_params_kwargs + + def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None): + steps, t_enc = setup_img2img_steps(p, steps) + + if p.sampler_noise_scheduler_override: + sigmas = p.sampler_noise_scheduler_override(steps) + elif self.config is not None and self.config.options.get('scheduler', None) == 'karras': + sigmas = k_diffusion.sampling.get_sigmas_karras(n=steps, sigma_min=0.1, sigma_max=10, device=shared.device) + else: + sigmas = self.model_wrap.get_sigmas(steps) + + sigma_sched = sigmas[steps - t_enc - 1:] + xi = x + noise * sigma_sched[0] + + extra_params_kwargs = self.initialize(p) + if 'sigma_min' in inspect.signature(self.func).parameters: + ## last sigma is zero which isn't allowed by DPM Fast & Adaptive so taking value before last + extra_params_kwargs['sigma_min'] = sigma_sched[-2] + if 'sigma_max' in inspect.signature(self.func).parameters: + extra_params_kwargs['sigma_max'] = sigma_sched[0] + if 'n' in inspect.signature(self.func).parameters: + extra_params_kwargs['n'] = len(sigma_sched) - 1 + if 'sigma_sched' in inspect.signature(self.func).parameters: + extra_params_kwargs['sigma_sched'] = sigma_sched + if 'sigmas' in inspect.signature(self.func).parameters: + extra_params_kwargs['sigmas'] = sigma_sched + + self.model_wrap_cfg.init_latent = x + + return self.func(self.model_wrap_cfg, xi, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs) + + + def sample(self, p, x, conditioning, unconditional_conditioning, steps=None): + steps = steps or p.steps + + if p.sampler_noise_scheduler_override: + sigmas = p.sampler_noise_scheduler_override(steps) + elif self.config is not None and self.config.options.get('scheduler', None) == 'karras': + sigmas = k_diffusion.sampling.get_sigmas_karras(n=steps, sigma_min=0.1, sigma_max=10, device=shared.device) + else: + sigmas = self.model_wrap.get_sigmas(steps) + + x = x * sigmas[0] + + extra_params_kwargs = self.initialize(p) + if 'sigma_min' in inspect.signature(self.func).parameters: + extra_params_kwargs['sigma_min'] = self.model_wrap.sigmas[0].item() + extra_params_kwargs['sigma_max'] = self.model_wrap.sigmas[-1].item() + if 'n' in inspect.signature(self.func).parameters: + extra_params_kwargs['n'] = steps + else: + extra_params_kwargs['sigmas'] = sigmas + samples = self.func(self.model_wrap_cfg, x, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs) + return samples + diff --git a/modules/shared.py b/modules/shared.py new file mode 100644 index 0000000000000000000000000000000000000000..5901e605638eaf4d04c79851f31e8b33ec6b7033 --- /dev/null +++ b/modules/shared.py @@ -0,0 +1,405 @@ +import argparse +import datetime +import json +import os +import sys + +import gradio as gr +import tqdm + +import modules.artists +import modules.interrogate +import modules.memmon +import modules.sd_models +import modules.styles +import modules.devices as devices +from modules import sd_samplers, sd_models +from modules.hypernetworks import hypernetwork +from modules.paths import models_path, script_path, sd_path + +sd_model_file = os.path.join(script_path, 'model.ckpt') +default_sd_model_file = sd_model_file +parser = argparse.ArgumentParser() +parser.add_argument("--config", type=str, default=os.path.join(sd_path, "configs/stable-diffusion/v1-inference.yaml"), help="path to config which constructs model",) +parser.add_argument("--ckpt", type=str, default=sd_model_file, help="path to checkpoint of stable diffusion model; if specified, this checkpoint will be added to the list of checkpoints and loaded",) +parser.add_argument("--ckpt-dir", type=str, default=None, help="Path to directory with stable diffusion checkpoints") +parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=('./src/gfpgan' if os.path.exists('./src/gfpgan') else './GFPGAN')) +parser.add_argument("--gfpgan-model", type=str, help="GFPGAN model file name", default=None) +parser.add_argument("--no-half", action='store_true', help="do not switch the model to 16-bit floats") +parser.add_argument("--no-half-vae", action='store_true', help="do not switch the VAE model to 16-bit floats") +parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware acceleration in browser)") +parser.add_argument("--max-batch-count", type=int, default=16, help="maximum batch count value for the UI") +parser.add_argument("--embeddings-dir", type=str, default=os.path.join(script_path, 'embeddings'), help="embeddings directory for textual inversion (default: embeddings)") +parser.add_argument("--hypernetwork-dir", type=str, default=os.path.join(models_path, 'hypernetworks'), help="hypernetwork directory") +parser.add_argument("--allow-code", action='store_true', help="allow custom script execution from webui") +parser.add_argument("--medvram", action='store_true', help="enable stable diffusion model optimizations for sacrificing a little speed for low VRM usage") +parser.add_argument("--lowvram", action='store_true', help="enable stable diffusion model optimizations for sacrificing a lot of speed for very low VRM usage") +parser.add_argument("--always-batch-cond-uncond", action='store_true', help="disables cond/uncond batching that is enabled to save memory with --medvram or --lowvram") +parser.add_argument("--unload-gfpgan", action='store_true', help="does not do anything.") +parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "autocast"], default="autocast") +parser.add_argument("--share", action='store_true', help="use share=True for gradio and make the UI accessible through their site (doesn't work for me but you might have better luck)") +parser.add_argument("--ngrok", type=str, help="ngrok authtoken, alternative to gradio --share", default=None) +parser.add_argument("--codeformer-models-path", type=str, help="Path to directory with codeformer model file(s).", default=os.path.join(models_path, 'Codeformer')) +parser.add_argument("--gfpgan-models-path", type=str, help="Path to directory with GFPGAN model file(s).", default=os.path.join(models_path, 'GFPGAN')) +parser.add_argument("--esrgan-models-path", type=str, help="Path to directory with ESRGAN model file(s).", default=os.path.join(models_path, 'ESRGAN')) +parser.add_argument("--bsrgan-models-path", type=str, help="Path to directory with BSRGAN model file(s).", default=os.path.join(models_path, 'BSRGAN')) +parser.add_argument("--realesrgan-models-path", type=str, help="Path to directory with RealESRGAN model file(s).", default=os.path.join(models_path, 'RealESRGAN')) +parser.add_argument("--scunet-models-path", type=str, help="Path to directory with ScuNET model file(s).", default=os.path.join(models_path, 'ScuNET')) +parser.add_argument("--swinir-models-path", type=str, help="Path to directory with SwinIR model file(s).", default=os.path.join(models_path, 'SwinIR')) +parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(models_path, 'LDSR')) +parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers") +parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work") +parser.add_argument("--deepdanbooru", action='store_true', help="enable deepdanbooru interrogator") +parser.add_argument("--opt-split-attention", action='store_true', help="force-enables Doggettx's cross-attention layer optimization. By default, it's on for torch cuda.") +parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.") +parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find") +parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization") +parser.add_argument("--use-cpu", nargs='+',choices=['SD', 'GFPGAN', 'BSRGAN', 'ESRGAN', 'SCUNet', 'CodeFormer'], help="use CPU as torch device for specified modules", default=[]) +parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests") +parser.add_argument("--port", type=int, help="launch gradio with given server port, you need root/admin rights for ports < 1024, defaults to 7860 if available", default=None) +parser.add_argument("--show-negative-prompt", action='store_true', help="does not do anything", default=False) +parser.add_argument("--ui-config-file", type=str, help="filename to use for ui configuration", default=os.path.join(script_path, 'ui-config.json')) +parser.add_argument("--hide-ui-dir-config", action='store_true', help="hide directory configuration from webui", default=False) +parser.add_argument("--ui-settings-file", type=str, help="filename to use for ui settings", default=os.path.join(script_path, 'config.json')) +parser.add_argument("--gradio-debug", action='store_true', help="launch gradio with --debug option") +parser.add_argument("--gradio-auth", type=str, help='set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3"', default=None) +parser.add_argument("--gradio-img2img-tool", type=str, help='gradio image uploader tool: can be either editor for ctopping, or color-sketch for drawing', choices=["color-sketch", "editor"], default="editor") +parser.add_argument("--opt-channelslast", action='store_true', help="change memory type for stable diffusion to channels last") +parser.add_argument("--styles-file", type=str, help="filename to use for styles", default=os.path.join(script_path, 'styles.csv')) +parser.add_argument("--autolaunch", action='store_true', help="open the webui URL in the system's default browser upon launch", default=False) +parser.add_argument("--use-textbox-seed", action='store_true', help="use textbox for seeds in UI (no up/down, but possible to input long seeds)", default=False) +parser.add_argument("--disable-console-progressbars", action='store_true', help="do not output progressbars to console", default=False) +parser.add_argument("--enable-console-prompts", action='store_true', help="print prompts to console when generating with txt2img and img2img", default=False) +parser.add_argument('--vae-path', type=str, help='Path to Variational Autoencoders model', default=None) +parser.add_argument("--disable-safe-unpickle", action='store_true', help="disable checking pytorch models for malicious code", default=False) + + +cmd_opts = parser.parse_args() + +devices.device, devices.device_gfpgan, devices.device_bsrgan, devices.device_esrgan, devices.device_scunet, devices.device_codeformer = \ +(devices.cpu if x in cmd_opts.use_cpu else devices.get_optimal_device() for x in ['SD', 'GFPGAN', 'BSRGAN', 'ESRGAN', 'SCUNet', 'CodeFormer']) + +device = devices.device + +batch_cond_uncond = cmd_opts.always_batch_cond_uncond or not (cmd_opts.lowvram or cmd_opts.medvram) +parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram +xformers_available = False +config_filename = cmd_opts.ui_settings_file + +os.makedirs(cmd_opts.hypernetwork_dir, exist_ok=True) +hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir) +loaded_hypernetwork = None + + +def reload_hypernetworks(): + global hypernetworks + + hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir) + hypernetwork.load_hypernetwork(opts.sd_hypernetwork) + + +class State: + skipped = False + interrupted = False + job = "" + job_no = 0 + job_count = 0 + job_timestamp = '0' + sampling_step = 0 + sampling_steps = 0 + current_latent = None + current_image = None + current_image_sampling_step = 0 + textinfo = None + + def skip(self): + self.skipped = True + + def interrupt(self): + self.interrupted = True + + def nextjob(self): + self.job_no += 1 + self.sampling_step = 0 + self.current_image_sampling_step = 0 + + def get_job_timestamp(self): + return datetime.datetime.now().strftime("%Y%m%d%H%M%S") # shouldn't this return job_timestamp? + + +state = State() + +artist_db = modules.artists.ArtistsDatabase(os.path.join(script_path, 'artists.csv')) + +styles_filename = cmd_opts.styles_file +prompt_styles = modules.styles.StyleDatabase(styles_filename) + +interrogator = modules.interrogate.InterrogateModels("interrogate") + +face_restorers = [] + + +def realesrgan_models_names(): + import modules.realesrgan_model + return [x.name for x in modules.realesrgan_model.get_realesrgan_models(None)] + + +class OptionInfo: + def __init__(self, default=None, label="", component=None, component_args=None, onchange=None, show_on_main_page=False, refresh=None): + self.default = default + self.label = label + self.component = component + self.component_args = component_args + self.onchange = onchange + self.section = None + self.refresh = refresh + + +def options_section(section_identifier, options_dict): + for k, v in options_dict.items(): + v.section = section_identifier + + return options_dict + + +hide_dirs = {"visible": not cmd_opts.hide_ui_dir_config} + +options_templates = {} + +options_templates.update(options_section(('saving-images', "Saving images/grids"), { + "samples_save": OptionInfo(True, "Always save all generated images"), + "samples_format": OptionInfo('png', 'File format for images'), + "samples_filename_pattern": OptionInfo("", "Images filename pattern"), + + "grid_save": OptionInfo(True, "Always save all generated image grids"), + "grid_format": OptionInfo('png', 'File format for grids'), + "grid_extended_filename": OptionInfo(False, "Add extended info (seed, prompt) to filename when saving grid"), + "grid_only_if_multiple": OptionInfo(True, "Do not save grids consisting of one picture"), + "n_rows": OptionInfo(-1, "Grid row count; use -1 for autodetect and 0 for it to be same as batch size", gr.Slider, {"minimum": -1, "maximum": 16, "step": 1}), + + "enable_pnginfo": OptionInfo(True, "Save text information about generation parameters as chunks to png files"), + "save_txt": OptionInfo(False, "Create a text file next to every image with generation parameters."), + "save_images_before_face_restoration": OptionInfo(False, "Save a copy of image before doing face restoration."), + "jpeg_quality": OptionInfo(80, "Quality for saved jpeg images", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1}), + "export_for_4chan": OptionInfo(True, "If PNG image is larger than 4MB or any dimension is larger than 4000, downscale and save copy as JPG"), + + "use_original_name_batch": OptionInfo(False, "Use original name for output filename during batch process in extras tab"), + "save_selected_only": OptionInfo(True, "When using 'Save' button, only save a single selected image"), + "do_not_add_watermark": OptionInfo(False, "Do not add watermark to images"), +})) + +options_templates.update(options_section(('saving-paths', "Paths for saving"), { + "outdir_samples": OptionInfo("", "Output directory for images; if empty, defaults to three directories below", component_args=hide_dirs), + "outdir_txt2img_samples": OptionInfo("outputs/txt2img-images", 'Output directory for txt2img images', component_args=hide_dirs), + "outdir_img2img_samples": OptionInfo("outputs/img2img-images", 'Output directory for img2img images', component_args=hide_dirs), + "outdir_extras_samples": OptionInfo("outputs/extras-images", 'Output directory for images from extras tab', component_args=hide_dirs), + "outdir_grids": OptionInfo("", "Output directory for grids; if empty, defaults to two directories below", component_args=hide_dirs), + "outdir_txt2img_grids": OptionInfo("outputs/txt2img-grids", 'Output directory for txt2img grids', component_args=hide_dirs), + "outdir_img2img_grids": OptionInfo("outputs/img2img-grids", 'Output directory for img2img grids', component_args=hide_dirs), + "outdir_save": OptionInfo("log/images", "Directory for saving images using the Save button", component_args=hide_dirs), +})) + +options_templates.update(options_section(('saving-to-dirs', "Saving to a directory"), { + "save_to_dirs": OptionInfo(False, "Save images to a subdirectory"), + "grid_save_to_dirs": OptionInfo(False, "Save grids to a subdirectory"), + "use_save_to_dirs_for_ui": OptionInfo(False, "When using \"Save\" button, save images to a subdirectory"), + "directories_filename_pattern": OptionInfo("", "Directory name pattern"), + "directories_max_prompt_words": OptionInfo(8, "Max prompt words for [prompt_words] pattern", gr.Slider, {"minimum": 1, "maximum": 20, "step": 1}), +})) + +options_templates.update(options_section(('upscaling', "Upscaling"), { + "ESRGAN_tile": OptionInfo(192, "Tile size for ESRGAN upscalers. 0 = no tiling.", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}), + "ESRGAN_tile_overlap": OptionInfo(8, "Tile overlap, in pixels for ESRGAN upscalers. Low values = visible seam.", gr.Slider, {"minimum": 0, "maximum": 48, "step": 1}), + "realesrgan_enabled_models": OptionInfo(["R-ESRGAN x4+", "R-ESRGAN x4+ Anime6B"], "Select which Real-ESRGAN models to show in the web UI. (Requires restart)", gr.CheckboxGroup, lambda: {"choices": realesrgan_models_names()}), + "SWIN_tile": OptionInfo(192, "Tile size for all SwinIR.", gr.Slider, {"minimum": 16, "maximum": 512, "step": 16}), + "SWIN_tile_overlap": OptionInfo(8, "Tile overlap, in pixels for SwinIR. Low values = visible seam.", gr.Slider, {"minimum": 0, "maximum": 48, "step": 1}), + "ldsr_steps": OptionInfo(100, "LDSR processing steps. Lower = faster", gr.Slider, {"minimum": 1, "maximum": 200, "step": 1}), + "upscaler_for_img2img": OptionInfo(None, "Upscaler for img2img", gr.Dropdown, lambda: {"choices": [x.name for x in sd_upscalers]}), +})) + +options_templates.update(options_section(('face-restoration', "Face restoration"), { + "face_restoration_model": OptionInfo(None, "Face restoration model", gr.Radio, lambda: {"choices": [x.name() for x in face_restorers]}), + "code_former_weight": OptionInfo(0.5, "CodeFormer weight parameter; 0 = maximum effect; 1 = minimum effect", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), + "face_restoration_unload": OptionInfo(False, "Move face restoration model from VRAM into RAM after processing"), +})) + +options_templates.update(options_section(('system', "System"), { + "memmon_poll_rate": OptionInfo(8, "VRAM usage polls per second during generation. Set to 0 to disable.", gr.Slider, {"minimum": 0, "maximum": 40, "step": 1}), + "samples_log_stdout": OptionInfo(False, "Always print all generation info to standard output"), + "multiple_tqdm": OptionInfo(True, "Add a second progress bar to the console that shows progress for an entire job."), +})) + +options_templates.update(options_section(('training', "Training"), { + "unload_models_when_training": OptionInfo(False, "Unload VAE and CLIP from VRAM when training"), + "dataset_filename_word_regex": OptionInfo("", "Filename word regex"), + "dataset_filename_join_string": OptionInfo(" ", "Filename join string"), + "training_image_repeats_per_epoch": OptionInfo(100, "Number of repeats for a single input image per epoch; used only for displaying epoch number", gr.Number, {"precision": 0}), +})) + +options_templates.update(options_section(('sd', "Stable Diffusion"), { + "sd_model_checkpoint": OptionInfo(None, "Stable Diffusion checkpoint", gr.Dropdown, lambda: {"choices": modules.sd_models.checkpoint_tiles()}, refresh=sd_models.list_models), + "sd_hypernetwork": OptionInfo("None", "Hypernetwork", gr.Dropdown, lambda: {"choices": ["None"] + [x for x in hypernetworks.keys()]}, refresh=reload_hypernetworks), + "sd_hypernetwork_strength": OptionInfo(1.0, "Hypernetwork strength", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.001}), + "img2img_color_correction": OptionInfo(False, "Apply color correction to img2img results to match original colors."), + "save_images_before_color_correction": OptionInfo(False, "Save a copy of image before applying color correction to img2img results"), + "img2img_fix_steps": OptionInfo(False, "With img2img, do exactly the amount of steps the slider specifies (normally you'd do less with less denoising)."), + "enable_quantization": OptionInfo(False, "Enable quantization in K samplers for sharper and cleaner results. This may change existing seeds. Requires restart to apply."), + "enable_emphasis": OptionInfo(True, "Emphasis: use (text) to make model pay more attention to text and [text] to make it pay less attention"), + "use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."), + "enable_batch_seeds": OptionInfo(True, "Make K-diffusion samplers produce same images in a batch as when making a single image"), + "comma_padding_backtrack": OptionInfo(20, "Increase coherency by padding from the last comma within n tokens when using more than 75 tokens", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1 }), + "filter_nsfw": OptionInfo(False, "Filter NSFW content"), + 'CLIP_stop_at_last_layers': OptionInfo(1, "Stop At last layers of CLIP model", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}), + "random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}), + 'quicksettings': OptionInfo("sd_model_checkpoint", "Quicksettings list"), +})) + +options_templates.update(options_section(('interrogate', "Interrogate Options"), { + "interrogate_keep_models_in_memory": OptionInfo(False, "Interrogate: keep models in VRAM"), + "interrogate_use_builtin_artists": OptionInfo(True, "Interrogate: use artists from artists.csv"), + "interrogate_return_ranks": OptionInfo(False, "Interrogate: include ranks of model tags matches in results (Has no effect on caption-based interrogators)."), + "interrogate_clip_num_beams": OptionInfo(1, "Interrogate: num_beams for BLIP", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1}), + "interrogate_clip_min_length": OptionInfo(24, "Interrogate: minimum description length (excluding artists, etc..)", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1}), + "interrogate_clip_max_length": OptionInfo(48, "Interrogate: maximum description length", gr.Slider, {"minimum": 1, "maximum": 256, "step": 1}), + "interrogate_clip_dict_limit": OptionInfo(1500, "CLIP: maximum number of lines in text file (0 = No limit)"), + "interrogate_deepbooru_score_threshold": OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), + "deepbooru_sort_alpha": OptionInfo(True, "Interrogate: deepbooru sort alphabetically"), + "deepbooru_use_spaces": OptionInfo(False, "use spaces for tags in deepbooru"), + "deepbooru_escape": OptionInfo(True, "escape (\\) brackets in deepbooru (so they are used as literal brackets and not for emphasis)"), +})) + +options_templates.update(options_section(('ui', "User interface"), { + "show_progressbar": OptionInfo(True, "Show progressbar"), + "show_progress_every_n_steps": OptionInfo(0, "Show image creation progress every N sampling steps. Set 0 to disable.", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1}), + "return_grid": OptionInfo(True, "Show grid in results for web"), + "do_not_show_images": OptionInfo(False, "Do not show any images in results for web"), + "add_model_hash_to_info": OptionInfo(True, "Add model hash to generation information"), + "add_model_name_to_info": OptionInfo(False, "Add model name to generation information"), + "font": OptionInfo("", "Font for image grids that have text"), + "js_modal_lightbox": OptionInfo(True, "Enable full page image viewer"), + "js_modal_lightbox_initially_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"), + "show_progress_in_title": OptionInfo(True, "Show generation progress in window title."), +})) + +options_templates.update(options_section(('sampler-params', "Sampler parameters"), { + "hide_samplers": OptionInfo([], "Hide samplers in user interface (requires restart)", gr.CheckboxGroup, lambda: {"choices": [x.name for x in sd_samplers.all_samplers]}), + "eta_ddim": OptionInfo(0.0, "eta (noise multiplier) for DDIM", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), + "eta_ancestral": OptionInfo(1.0, "eta (noise multiplier) for ancestral samplers", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), + "ddim_discretize": OptionInfo('uniform', "img2img DDIM discretize", gr.Radio, {"choices": ['uniform', 'quad']}), + 's_churn': OptionInfo(0.0, "sigma churn", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), + 's_tmin': OptionInfo(0.0, "sigma tmin", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), + 's_noise': OptionInfo(1.0, "sigma noise", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), + 'eta_noise_seed_delta': OptionInfo(0, "Eta noise seed delta", gr.Number, {"precision": 0}), +})) + + +class Options: + data = None + data_labels = options_templates + typemap = {int: float} + + def __init__(self): + self.data = {k: v.default for k, v in self.data_labels.items()} + + def __setattr__(self, key, value): + if self.data is not None: + if key in self.data: + self.data[key] = value + + return super(Options, self).__setattr__(key, value) + + def __getattr__(self, item): + if self.data is not None: + if item in self.data: + return self.data[item] + + if item in self.data_labels: + return self.data_labels[item].default + + return super(Options, self).__getattribute__(item) + + def save(self, filename): + with open(filename, "w", encoding="utf8") as file: + json.dump(self.data, file) + + def same_type(self, x, y): + if x is None or y is None: + return True + + type_x = self.typemap.get(type(x), type(x)) + type_y = self.typemap.get(type(y), type(y)) + + return type_x == type_y + + def load(self, filename): + with open(filename, "r", encoding="utf8") as file: + self.data = json.load(file) + + bad_settings = 0 + for k, v in self.data.items(): + info = self.data_labels.get(k, None) + if info is not None and not self.same_type(info.default, v): + print(f"Warning: bad setting value: {k}: {v} ({type(v).__name__}; expected {type(info.default).__name__})", file=sys.stderr) + bad_settings += 1 + + if bad_settings > 0: + print(f"The program is likely to not work with bad settings.\nSettings file: {filename}\nEither fix the file, or delete it and restart.", file=sys.stderr) + + def onchange(self, key, func): + item = self.data_labels.get(key) + item.onchange = func + + func() + + def dumpjson(self): + d = {k: self.data.get(k, self.data_labels.get(k).default) for k in self.data_labels.keys()} + return json.dumps(d) + + +opts = Options() +if os.path.exists(config_filename): + opts.load(config_filename) + +sd_upscalers = [] + +sd_model = None + +progress_print_out = sys.stdout + + +class TotalTQDM: + def __init__(self): + self._tqdm = None + + def reset(self): + self._tqdm = tqdm.tqdm( + desc="Total progress", + total=state.job_count * state.sampling_steps, + position=1, + file=progress_print_out + ) + + def update(self): + if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars: + return + if self._tqdm is None: + self.reset() + self._tqdm.update() + + def updateTotal(self, new_total): + if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars: + return + if self._tqdm is None: + self.reset() + self._tqdm.total=new_total + + def clear(self): + if self._tqdm is not None: + self._tqdm.close() + self._tqdm = None + + +total_tqdm = TotalTQDM() + +mem_mon = modules.memmon.MemUsageMonitor("MemMon", device, opts) +mem_mon.start() diff --git a/modules/styles.py b/modules/styles.py new file mode 100644 index 0000000000000000000000000000000000000000..d44dfc1add0205cddcf5d95ce1a419927360d7ca --- /dev/null +++ b/modules/styles.py @@ -0,0 +1,92 @@ +# We need this so Python doesn't complain about the unknown StableDiffusionProcessing-typehint at runtime +from __future__ import annotations + +import csv +import os +import os.path +import typing +import collections.abc as abc +import tempfile +import shutil + +if typing.TYPE_CHECKING: + # Only import this when code is being type-checked, it doesn't have any effect at runtime + from .processing import StableDiffusionProcessing + + +class PromptStyle(typing.NamedTuple): + name: str + prompt: str + negative_prompt: str + + +def merge_prompts(style_prompt: str, prompt: str) -> str: + if "{prompt}" in style_prompt: + res = style_prompt.replace("{prompt}", prompt) + else: + parts = filter(None, (prompt.strip(), style_prompt.strip())) + res = ", ".join(parts) + + return res + + +def apply_styles_to_prompt(prompt, styles): + for style in styles: + prompt = merge_prompts(style, prompt) + + return prompt + + +class StyleDatabase: + def __init__(self, path: str): + self.no_style = PromptStyle("None", "", "") + self.styles = {"None": self.no_style} + + if not os.path.exists(path): + return + + with open(path, "r", encoding="utf8", newline='') as file: + reader = csv.DictReader(file) + for row in reader: + # Support loading old CSV format with "name, text"-columns + prompt = row["prompt"] if "prompt" in row else row["text"] + negative_prompt = row.get("negative_prompt", "") + self.styles[row["name"]] = PromptStyle(row["name"], prompt, negative_prompt) + + def get_style_prompts(self, styles): + return [self.styles.get(x, self.no_style).prompt for x in styles] + + def get_negative_style_prompts(self, styles): + return [self.styles.get(x, self.no_style).negative_prompt for x in styles] + + def apply_styles_to_prompt(self, prompt, styles): + return apply_styles_to_prompt(prompt, [self.styles.get(x, self.no_style).prompt for x in styles]) + + def apply_negative_styles_to_prompt(self, prompt, styles): + return apply_styles_to_prompt(prompt, [self.styles.get(x, self.no_style).negative_prompt for x in styles]) + + def apply_styles(self, p: StableDiffusionProcessing) -> None: + if isinstance(p.prompt, list): + p.prompt = [self.apply_styles_to_prompt(prompt, p.styles) for prompt in p.prompt] + else: + p.prompt = self.apply_styles_to_prompt(p.prompt, p.styles) + + if isinstance(p.negative_prompt, list): + p.negative_prompt = [self.apply_negative_styles_to_prompt(prompt, p.styles) for prompt in p.negative_prompt] + else: + p.negative_prompt = self.apply_negative_styles_to_prompt(p.negative_prompt, p.styles) + + def save_styles(self, path: str) -> None: + # Write to temporary file first, so we don't nuke the file if something goes wrong + fd, temp_path = tempfile.mkstemp(".csv") + with os.fdopen(fd, "w", encoding="utf8", newline='') as file: + # _fields is actually part of the public API: typing.NamedTuple is a replacement for collections.NamedTuple, + # and collections.NamedTuple has explicit documentation for accessing _fields. Same goes for _asdict() + writer = csv.DictWriter(file, fieldnames=PromptStyle._fields) + writer.writeheader() + writer.writerows(style._asdict() for k, style in self.styles.items()) + + # Always keep a backup file around + if os.path.exists(path): + shutil.move(path, path + ".bak") + shutil.move(temp_path, path) diff --git a/modules/swinir_model.py b/modules/swinir_model.py new file mode 100644 index 0000000000000000000000000000000000000000..baa02e3d1c0c750e8ac17d0e89f67db3ec7796df --- /dev/null +++ b/modules/swinir_model.py @@ -0,0 +1,161 @@ +import contextlib +import os + +import numpy as np +import torch +from PIL import Image +from basicsr.utils.download_util import load_file_from_url +from tqdm import tqdm + +from modules import modelloader +from modules.shared import cmd_opts, opts, device +from modules.swinir_model_arch import SwinIR as net +from modules.swinir_model_arch_v2 import Swin2SR as net2 +from modules.upscaler import Upscaler, UpscalerData + +precision_scope = ( + torch.autocast if cmd_opts.precision == "autocast" else contextlib.nullcontext +) + + +class UpscalerSwinIR(Upscaler): + def __init__(self, dirname): + self.name = "SwinIR" + self.model_url = "https://github.com/JingyunLiang/SwinIR/releases/download/v0.0" \ + "/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR" \ + "-L_x4_GAN.pth " + self.model_name = "SwinIR 4x" + self.user_path = dirname + super().__init__() + scalers = [] + model_files = self.find_models(ext_filter=[".pt", ".pth"]) + for model in model_files: + if "http" in model: + name = self.model_name + else: + name = modelloader.friendly_name(model) + model_data = UpscalerData(name, model, self) + scalers.append(model_data) + self.scalers = scalers + + def do_upscale(self, img, model_file): + model = self.load_model(model_file) + if model is None: + return img + model = model.to(device) + img = upscale(img, model) + try: + torch.cuda.empty_cache() + except: + pass + return img + + def load_model(self, path, scale=4): + if "http" in path: + dl_name = "%s%s" % (self.model_name.replace(" ", "_"), ".pth") + filename = load_file_from_url(url=path, model_dir=self.model_path, file_name=dl_name, progress=True) + else: + filename = path + if filename is None or not os.path.exists(filename): + return None + if filename.endswith(".v2.pth"): + model = net2( + upscale=scale, + in_chans=3, + img_size=64, + window_size=8, + img_range=1.0, + depths=[6, 6, 6, 6, 6, 6], + embed_dim=180, + num_heads=[6, 6, 6, 6, 6, 6], + mlp_ratio=2, + upsampler="nearest+conv", + resi_connection="1conv", + ) + params = None + else: + model = net( + upscale=scale, + in_chans=3, + img_size=64, + window_size=8, + img_range=1.0, + depths=[6, 6, 6, 6, 6, 6, 6, 6, 6], + embed_dim=240, + num_heads=[8, 8, 8, 8, 8, 8, 8, 8, 8], + mlp_ratio=2, + upsampler="nearest+conv", + resi_connection="3conv", + ) + params = "params_ema" + + pretrained_model = torch.load(filename) + if params is not None: + model.load_state_dict(pretrained_model[params], strict=True) + else: + model.load_state_dict(pretrained_model, strict=True) + if not cmd_opts.no_half: + model = model.half() + return model + + +def upscale( + img, + model, + tile=opts.SWIN_tile, + tile_overlap=opts.SWIN_tile_overlap, + window_size=8, + scale=4, +): + img = np.array(img) + img = img[:, :, ::-1] + img = np.moveaxis(img, 2, 0) / 255 + img = torch.from_numpy(img).float() + img = img.unsqueeze(0).to(device) + with torch.no_grad(), precision_scope("cuda"): + _, _, h_old, w_old = img.size() + h_pad = (h_old // window_size + 1) * window_size - h_old + w_pad = (w_old // window_size + 1) * window_size - w_old + img = torch.cat([img, torch.flip(img, [2])], 2)[:, :, : h_old + h_pad, :] + img = torch.cat([img, torch.flip(img, [3])], 3)[:, :, :, : w_old + w_pad] + output = inference(img, model, tile, tile_overlap, window_size, scale) + output = output[..., : h_old * scale, : w_old * scale] + output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy() + if output.ndim == 3: + output = np.transpose( + output[[2, 1, 0], :, :], (1, 2, 0) + ) # CHW-RGB to HCW-BGR + output = (output * 255.0).round().astype(np.uint8) # float32 to uint8 + return Image.fromarray(output, "RGB") + + +def inference(img, model, tile, tile_overlap, window_size, scale): + # test the image tile by tile + b, c, h, w = img.size() + tile = min(tile, h, w) + assert tile % window_size == 0, "tile size should be a multiple of window_size" + sf = scale + + stride = tile - tile_overlap + h_idx_list = list(range(0, h - tile, stride)) + [h - tile] + w_idx_list = list(range(0, w - tile, stride)) + [w - tile] + E = torch.zeros(b, c, h * sf, w * sf, dtype=torch.half, device=device).type_as(img) + W = torch.zeros_like(E, dtype=torch.half, device=device) + + with tqdm(total=len(h_idx_list) * len(w_idx_list), desc="SwinIR tiles") as pbar: + for h_idx in h_idx_list: + for w_idx in w_idx_list: + in_patch = img[..., h_idx: h_idx + tile, w_idx: w_idx + tile] + out_patch = model(in_patch) + out_patch_mask = torch.ones_like(out_patch) + + E[ + ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf + ].add_(out_patch) + W[ + ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf + ].add_(out_patch_mask) + pbar.update(1) + output = E.div_(W) + + return output diff --git a/modules/swinir_model_arch.py b/modules/swinir_model_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..863f42db6f50e5eac70931b8c0e6443f831a6018 --- /dev/null +++ b/modules/swinir_model_arch.py @@ -0,0 +1,867 @@ +# ----------------------------------------------------------------------------------- +# SwinIR: Image Restoration Using Swin Transformer, https://arxiv.org/abs/2108.10257 +# Originally Written by Ze Liu, Modified by Jingyun Liang. +# ----------------------------------------------------------------------------------- + +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ + + +class Mlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class WindowAttention(nn.Module): + r""" Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads)) # 2*Wh-1 * 2*Ww-1, nH + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + + self.proj_drop = nn.Dropout(proj_drop) + + trunc_normal_(self.relative_position_bias_table, std=.02) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + """ + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = (q @ k.transpose(-2, -1)) + + relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view( + self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + def extra_repr(self) -> str: + return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}' + + def flops(self, N): + # calculate flops for 1 window with token length of N + flops = 0 + # qkv = self.qkv(x) + flops += N * self.dim * 3 * self.dim + # attn = (q @ k.transpose(-2, -1)) + flops += self.num_heads * N * (self.dim // self.num_heads) * N + # x = (attn @ v) + flops += self.num_heads * N * N * (self.dim // self.num_heads) + # x = self.proj(x) + flops += N * self.dim * self.dim + return flops + + +class SwinTransformerBlock(nn.Module): + r""" Swin Transformer Block. + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0, + mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0., + act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + if min(self.input_resolution) <= self.window_size: + # if window size is larger than input resolution, we don't partition windows + self.shift_size = 0 + self.window_size = min(self.input_resolution) + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, window_size=to_2tuple(self.window_size), num_heads=num_heads, + qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) + + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + if self.shift_size > 0: + attn_mask = self.calculate_mask(self.input_resolution) + else: + attn_mask = None + + self.register_buffer("attn_mask", attn_mask) + + def calculate_mask(self, x_size): + # calculate attention mask for SW-MSA + H, W = x_size + img_mask = torch.zeros((1, H, W, 1)) # 1 H W 1 + h_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + + return attn_mask + + def forward(self, x, x_size): + H, W = x_size + B, L, C = x.shape + # assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.view(B, H, W, C) + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) + else: + shifted_x = x + + # partition windows + x_windows = window_partition(shifted_x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size + if self.input_resolution == x_size: + attn_windows = self.attn(x_windows, mask=self.attn_mask) # nW*B, window_size*window_size, C + else: + attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size).to(x.device)) + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) + else: + x = shifted_x + x = x.view(B, H * W, C) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \ + f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}" + + def flops(self): + flops = 0 + H, W = self.input_resolution + # norm1 + flops += self.dim * H * W + # W-MSA/SW-MSA + nW = H * W / self.window_size / self.window_size + flops += nW * self.attn.flops(self.window_size * self.window_size) + # mlp + flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio + # norm2 + flops += self.dim * H * W + return flops + + +class PatchMerging(nn.Module): + r""" Patch Merging Layer. + + Args: + input_resolution (tuple[int]): Resolution of input feature. + dim (int): Number of input channels. + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x): + """ + x: B, H*W, C + """ + H, W = self.input_resolution + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even." + + x = x.view(B, H, W, C) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + def extra_repr(self) -> str: + return f"input_resolution={self.input_resolution}, dim={self.dim}" + + def flops(self): + H, W = self.input_resolution + flops = H * W * self.dim + flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim + return flops + + +class BasicLayer(nn.Module): + """ A basic Swin Transformer layer for one stage. + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__(self, dim, input_resolution, depth, num_heads, window_size, + mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False): + + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList([ + SwinTransformerBlock(dim=dim, input_resolution=input_resolution, + num_heads=num_heads, window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop, attn_drop=attn_drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer) + for i in range(depth)]) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x, x_size): + for blk in self.blocks: + if self.use_checkpoint: + x = checkpoint.checkpoint(blk, x, x_size) + else: + x = blk(x, x_size) + if self.downsample is not None: + x = self.downsample(x) + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}" + + def flops(self): + flops = 0 + for blk in self.blocks: + flops += blk.flops() + if self.downsample is not None: + flops += self.downsample.flops() + return flops + + +class RSTB(nn.Module): + """Residual Swin Transformer Block (RSTB). + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + img_size: Input image size. + patch_size: Patch size. + resi_connection: The convolutional block before residual connection. + """ + + def __init__(self, dim, input_resolution, depth, num_heads, window_size, + mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False, + img_size=224, patch_size=4, resi_connection='1conv'): + super(RSTB, self).__init__() + + self.dim = dim + self.input_resolution = input_resolution + + self.residual_group = BasicLayer(dim=dim, + input_resolution=input_resolution, + depth=depth, + num_heads=num_heads, + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop, attn_drop=attn_drop, + drop_path=drop_path, + norm_layer=norm_layer, + downsample=downsample, + use_checkpoint=use_checkpoint) + + if resi_connection == '1conv': + self.conv = nn.Conv2d(dim, dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv = nn.Sequential(nn.Conv2d(dim, dim // 4, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim, 3, 1, 1)) + + self.patch_embed = PatchEmbed( + img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim, + norm_layer=None) + + self.patch_unembed = PatchUnEmbed( + img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim, + norm_layer=None) + + def forward(self, x, x_size): + return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))) + x + + def flops(self): + flops = 0 + flops += self.residual_group.flops() + H, W = self.input_resolution + flops += H * W * self.dim * self.dim * 9 + flops += self.patch_embed.flops() + flops += self.patch_unembed.flops() + + return flops + + +class PatchEmbed(nn.Module): + r""" Image to Patch Embedding + + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + x = x.flatten(2).transpose(1, 2) # B Ph*Pw C + if self.norm is not None: + x = self.norm(x) + return x + + def flops(self): + flops = 0 + H, W = self.img_size + if self.norm is not None: + flops += H * W * self.embed_dim + return flops + + +class PatchUnEmbed(nn.Module): + r""" Image to Patch Unembedding + + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + def forward(self, x, x_size): + B, HW, C = x.shape + x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1]) # B Ph*Pw C + return x + + def flops(self): + flops = 0 + return flops + + +class Upsample(nn.Sequential): + """Upsample module. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.') + super(Upsample, self).__init__(*m) + + +class UpsampleOneStep(nn.Sequential): + """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle) + Used in lightweight SR to save parameters. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + + """ + + def __init__(self, scale, num_feat, num_out_ch, input_resolution=None): + self.num_feat = num_feat + self.input_resolution = input_resolution + m = [] + m.append(nn.Conv2d(num_feat, (scale ** 2) * num_out_ch, 3, 1, 1)) + m.append(nn.PixelShuffle(scale)) + super(UpsampleOneStep, self).__init__(*m) + + def flops(self): + H, W = self.input_resolution + flops = H * W * self.num_feat * 3 * 9 + return flops + + +class SwinIR(nn.Module): + r""" SwinIR + A PyTorch impl of : `SwinIR: Image Restoration Using Swin Transformer`, based on Swin Transformer. + + Args: + img_size (int | tuple(int)): Input image size. Default 64 + patch_size (int | tuple(int)): Patch size. Default: 1 + in_chans (int): Number of input image channels. Default: 3 + embed_dim (int): Patch embedding dimension. Default: 96 + depths (tuple(int)): Depth of each Swin Transformer layer. + num_heads (tuple(int)): Number of attention heads in different layers. + window_size (int): Window size. Default: 7 + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False + patch_norm (bool): If True, add normalization after patch embedding. Default: True + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False + upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction + img_range: Image range. 1. or 255. + upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None + resi_connection: The convolutional block before residual connection. '1conv'/'3conv' + """ + + def __init__(self, img_size=64, patch_size=1, in_chans=3, + embed_dim=96, depths=[6, 6, 6, 6], num_heads=[6, 6, 6, 6], + window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None, + drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1, + norm_layer=nn.LayerNorm, ape=False, patch_norm=True, + use_checkpoint=False, upscale=2, img_range=1., upsampler='', resi_connection='1conv', + **kwargs): + super(SwinIR, self).__init__() + num_in_ch = in_chans + num_out_ch = in_chans + num_feat = 64 + self.img_range = img_range + if in_chans == 3: + rgb_mean = (0.4488, 0.4371, 0.4040) + self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1) + else: + self.mean = torch.zeros(1, 1, 1, 1) + self.upscale = upscale + self.upsampler = upsampler + self.window_size = window_size + + ##################################################################################################### + ################################### 1, shallow feature extraction ################################### + self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1) + + ##################################################################################################### + ################################### 2, deep feature extraction ###################################### + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.num_features = embed_dim + self.mlp_ratio = mlp_ratio + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + + # merge non-overlapping patches into image + self.patch_unembed = PatchUnEmbed( + img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + + # absolute position embedding + if self.ape: + self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim)) + trunc_normal_(self.absolute_pos_embed, std=.02) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule + + # build Residual Swin Transformer blocks (RSTB) + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = RSTB(dim=embed_dim, + input_resolution=(patches_resolution[0], + patches_resolution[1]), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], # no impact on SR results + norm_layer=norm_layer, + downsample=None, + use_checkpoint=use_checkpoint, + img_size=img_size, + patch_size=patch_size, + resi_connection=resi_connection + + ) + self.layers.append(layer) + self.norm = norm_layer(self.num_features) + + # build the last conv layer in deep feature extraction + if resi_connection == '1conv': + self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv_after_body = nn.Sequential(nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1)) + + ##################################################################################################### + ################################ 3, high quality image reconstruction ################################ + if self.upsampler == 'pixelshuffle': + # for classical SR + self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1), + nn.LeakyReLU(inplace=True)) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR (to save parameters) + self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch, + (patches_resolution[0], patches_resolution[1])) + elif self.upsampler == 'nearest+conv': + # for real-world SR (less artifacts) + self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1), + nn.LeakyReLU(inplace=True)) + self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + if self.upscale == 4: + self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + # for image denoising and JPEG compression artifact reduction + self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @torch.jit.ignore + def no_weight_decay(self): + return {'absolute_pos_embed'} + + @torch.jit.ignore + def no_weight_decay_keywords(self): + return {'relative_position_bias_table'} + + def check_image_size(self, x): + _, _, h, w = x.size() + mod_pad_h = (self.window_size - h % self.window_size) % self.window_size + mod_pad_w = (self.window_size - w % self.window_size) % self.window_size + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect') + return x + + def forward_features(self, x): + x_size = (x.shape[2], x.shape[3]) + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers: + x = layer(x, x_size) + + x = self.norm(x) # B L C + x = self.patch_unembed(x, x_size) + + return x + + def forward(self, x): + H, W = x.shape[2:] + x = self.check_image_size(x) + + self.mean = self.mean.type_as(x) + x = (x - self.mean) * self.img_range + + if self.upsampler == 'pixelshuffle': + # for classical SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.conv_last(self.upsample(x)) + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.upsample(x) + elif self.upsampler == 'nearest+conv': + # for real-world SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.lrelu(self.conv_up1(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + if self.upscale == 4: + x = self.lrelu(self.conv_up2(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + x = self.conv_last(self.lrelu(self.conv_hr(x))) + else: + # for image denoising and JPEG compression artifact reduction + x_first = self.conv_first(x) + res = self.conv_after_body(self.forward_features(x_first)) + x_first + x = x + self.conv_last(res) + + x = x / self.img_range + self.mean + + return x[:, :, :H*self.upscale, :W*self.upscale] + + def flops(self): + flops = 0 + H, W = self.patches_resolution + flops += H * W * 3 * self.embed_dim * 9 + flops += self.patch_embed.flops() + for i, layer in enumerate(self.layers): + flops += layer.flops() + flops += H * W * 3 * self.embed_dim * self.embed_dim + flops += self.upsample.flops() + return flops + + +if __name__ == '__main__': + upscale = 4 + window_size = 8 + height = (1024 // upscale // window_size + 1) * window_size + width = (720 // upscale // window_size + 1) * window_size + model = SwinIR(upscale=2, img_size=(height, width), + window_size=window_size, img_range=1., depths=[6, 6, 6, 6], + embed_dim=60, num_heads=[6, 6, 6, 6], mlp_ratio=2, upsampler='pixelshuffledirect') + print(model) + print(height, width, model.flops() / 1e9) + + x = torch.randn((1, 3, height, width)) + x = model(x) + print(x.shape) diff --git a/modules/swinir_model_arch_v2.py b/modules/swinir_model_arch_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..0e28ae6eefa2f4bc6260b14760907c54ce633876 --- /dev/null +++ b/modules/swinir_model_arch_v2.py @@ -0,0 +1,1017 @@ +# ----------------------------------------------------------------------------------- +# Swin2SR: Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration, https://arxiv.org/abs/ +# Written by Conde and Choi et al. +# ----------------------------------------------------------------------------------- + +import math +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ + + +class Mlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + +class WindowAttention(nn.Module): + r""" Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + pretrained_window_size (tuple[int]): The height and width of the window in pre-training. + """ + + def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0., + pretrained_window_size=[0, 0]): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.pretrained_window_size = pretrained_window_size + self.num_heads = num_heads + + self.logit_scale = nn.Parameter(torch.log(10 * torch.ones((num_heads, 1, 1))), requires_grad=True) + + # mlp to generate continuous relative position bias + self.cpb_mlp = nn.Sequential(nn.Linear(2, 512, bias=True), + nn.ReLU(inplace=True), + nn.Linear(512, num_heads, bias=False)) + + # get relative_coords_table + relative_coords_h = torch.arange(-(self.window_size[0] - 1), self.window_size[0], dtype=torch.float32) + relative_coords_w = torch.arange(-(self.window_size[1] - 1), self.window_size[1], dtype=torch.float32) + relative_coords_table = torch.stack( + torch.meshgrid([relative_coords_h, + relative_coords_w])).permute(1, 2, 0).contiguous().unsqueeze(0) # 1, 2*Wh-1, 2*Ww-1, 2 + if pretrained_window_size[0] > 0: + relative_coords_table[:, :, :, 0] /= (pretrained_window_size[0] - 1) + relative_coords_table[:, :, :, 1] /= (pretrained_window_size[1] - 1) + else: + relative_coords_table[:, :, :, 0] /= (self.window_size[0] - 1) + relative_coords_table[:, :, :, 1] /= (self.window_size[1] - 1) + relative_coords_table *= 8 # normalize to -8, 8 + relative_coords_table = torch.sign(relative_coords_table) * torch.log2( + torch.abs(relative_coords_table) + 1.0) / np.log2(8) + + self.register_buffer("relative_coords_table", relative_coords_table) + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=False) + if qkv_bias: + self.q_bias = nn.Parameter(torch.zeros(dim)) + self.v_bias = nn.Parameter(torch.zeros(dim)) + else: + self.q_bias = None + self.v_bias = None + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + """ + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv_bias = None + if self.q_bias is not None: + qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias)) + qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias) + qkv = qkv.reshape(B_, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + # cosine attention + attn = (F.normalize(q, dim=-1) @ F.normalize(k, dim=-1).transpose(-2, -1)) + logit_scale = torch.clamp(self.logit_scale, max=torch.log(torch.tensor(1. / 0.01)).to(self.logit_scale.device)).exp() + attn = attn * logit_scale + + relative_position_bias_table = self.cpb_mlp(self.relative_coords_table).view(-1, self.num_heads) + relative_position_bias = relative_position_bias_table[self.relative_position_index.view(-1)].view( + self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + relative_position_bias = 16 * torch.sigmoid(relative_position_bias) + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + def extra_repr(self) -> str: + return f'dim={self.dim}, window_size={self.window_size}, ' \ + f'pretrained_window_size={self.pretrained_window_size}, num_heads={self.num_heads}' + + def flops(self, N): + # calculate flops for 1 window with token length of N + flops = 0 + # qkv = self.qkv(x) + flops += N * self.dim * 3 * self.dim + # attn = (q @ k.transpose(-2, -1)) + flops += self.num_heads * N * (self.dim // self.num_heads) * N + # x = (attn @ v) + flops += self.num_heads * N * N * (self.dim // self.num_heads) + # x = self.proj(x) + flops += N * self.dim * self.dim + return flops + +class SwinTransformerBlock(nn.Module): + r""" Swin Transformer Block. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resulotion. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + pretrained_window_size (int): Window size in pre-training. + """ + + def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0, + mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., + act_layer=nn.GELU, norm_layer=nn.LayerNorm, pretrained_window_size=0): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + if min(self.input_resolution) <= self.window_size: + # if window size is larger than input resolution, we don't partition windows + self.shift_size = 0 + self.window_size = min(self.input_resolution) + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, window_size=to_2tuple(self.window_size), num_heads=num_heads, + qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop, + pretrained_window_size=to_2tuple(pretrained_window_size)) + + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + if self.shift_size > 0: + attn_mask = self.calculate_mask(self.input_resolution) + else: + attn_mask = None + + self.register_buffer("attn_mask", attn_mask) + + def calculate_mask(self, x_size): + # calculate attention mask for SW-MSA + H, W = x_size + img_mask = torch.zeros((1, H, W, 1)) # 1 H W 1 + h_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + + return attn_mask + + def forward(self, x, x_size): + H, W = x_size + B, L, C = x.shape + #assert L == H * W, "input feature has wrong size" + + shortcut = x + x = x.view(B, H, W, C) + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) + else: + shifted_x = x + + # partition windows + x_windows = window_partition(shifted_x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size + if self.input_resolution == x_size: + attn_windows = self.attn(x_windows, mask=self.attn_mask) # nW*B, window_size*window_size, C + else: + attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size).to(x.device)) + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) + else: + x = shifted_x + x = x.view(B, H * W, C) + x = shortcut + self.drop_path(self.norm1(x)) + + # FFN + x = x + self.drop_path(self.norm2(self.mlp(x))) + + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \ + f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}" + + def flops(self): + flops = 0 + H, W = self.input_resolution + # norm1 + flops += self.dim * H * W + # W-MSA/SW-MSA + nW = H * W / self.window_size / self.window_size + flops += nW * self.attn.flops(self.window_size * self.window_size) + # mlp + flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio + # norm2 + flops += self.dim * H * W + return flops + +class PatchMerging(nn.Module): + r""" Patch Merging Layer. + Args: + input_resolution (tuple[int]): Resolution of input feature. + dim (int): Number of input channels. + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(2 * dim) + + def forward(self, x): + """ + x: B, H*W, C + """ + H, W = self.input_resolution + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even." + + x = x.view(B, H, W, C) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C + + x = self.reduction(x) + x = self.norm(x) + + return x + + def extra_repr(self) -> str: + return f"input_resolution={self.input_resolution}, dim={self.dim}" + + def flops(self): + H, W = self.input_resolution + flops = (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim + flops += H * W * self.dim // 2 + return flops + +class BasicLayer(nn.Module): + """ A basic Swin Transformer layer for one stage. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + pretrained_window_size (int): Local window size in pre-training. + """ + + def __init__(self, dim, input_resolution, depth, num_heads, window_size, + mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., + drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False, + pretrained_window_size=0): + + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList([ + SwinTransformerBlock(dim=dim, input_resolution=input_resolution, + num_heads=num_heads, window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + drop=drop, attn_drop=attn_drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer, + pretrained_window_size=pretrained_window_size) + for i in range(depth)]) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x, x_size): + for blk in self.blocks: + if self.use_checkpoint: + x = checkpoint.checkpoint(blk, x, x_size) + else: + x = blk(x, x_size) + if self.downsample is not None: + x = self.downsample(x) + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}" + + def flops(self): + flops = 0 + for blk in self.blocks: + flops += blk.flops() + if self.downsample is not None: + flops += self.downsample.flops() + return flops + + def _init_respostnorm(self): + for blk in self.blocks: + nn.init.constant_(blk.norm1.bias, 0) + nn.init.constant_(blk.norm1.weight, 0) + nn.init.constant_(blk.norm2.bias, 0) + nn.init.constant_(blk.norm2.weight, 0) + +class PatchEmbed(nn.Module): + r""" Image to Patch Embedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + B, C, H, W = x.shape + # FIXME look at relaxing size constraints + # assert H == self.img_size[0] and W == self.img_size[1], + # f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." + x = self.proj(x).flatten(2).transpose(1, 2) # B Ph*Pw C + if self.norm is not None: + x = self.norm(x) + return x + + def flops(self): + Ho, Wo = self.patches_resolution + flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1]) + if self.norm is not None: + flops += Ho * Wo * self.embed_dim + return flops + +class RSTB(nn.Module): + """Residual Swin Transformer Block (RSTB). + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + img_size: Input image size. + patch_size: Patch size. + resi_connection: The convolutional block before residual connection. + """ + + def __init__(self, dim, input_resolution, depth, num_heads, window_size, + mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., + drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False, + img_size=224, patch_size=4, resi_connection='1conv'): + super(RSTB, self).__init__() + + self.dim = dim + self.input_resolution = input_resolution + + self.residual_group = BasicLayer(dim=dim, + input_resolution=input_resolution, + depth=depth, + num_heads=num_heads, + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + drop=drop, attn_drop=attn_drop, + drop_path=drop_path, + norm_layer=norm_layer, + downsample=downsample, + use_checkpoint=use_checkpoint) + + if resi_connection == '1conv': + self.conv = nn.Conv2d(dim, dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv = nn.Sequential(nn.Conv2d(dim, dim // 4, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim, 3, 1, 1)) + + self.patch_embed = PatchEmbed( + img_size=img_size, patch_size=patch_size, in_chans=dim, embed_dim=dim, + norm_layer=None) + + self.patch_unembed = PatchUnEmbed( + img_size=img_size, patch_size=patch_size, in_chans=dim, embed_dim=dim, + norm_layer=None) + + def forward(self, x, x_size): + return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))) + x + + def flops(self): + flops = 0 + flops += self.residual_group.flops() + H, W = self.input_resolution + flops += H * W * self.dim * self.dim * 9 + flops += self.patch_embed.flops() + flops += self.patch_unembed.flops() + + return flops + +class PatchUnEmbed(nn.Module): + r""" Image to Patch Unembedding + + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + def forward(self, x, x_size): + B, HW, C = x.shape + x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1]) # B Ph*Pw C + return x + + def flops(self): + flops = 0 + return flops + + +class Upsample(nn.Sequential): + """Upsample module. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.') + super(Upsample, self).__init__(*m) + +class Upsample_hf(nn.Sequential): + """Upsample module. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.') + super(Upsample_hf, self).__init__(*m) + + +class UpsampleOneStep(nn.Sequential): + """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle) + Used in lightweight SR to save parameters. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + + """ + + def __init__(self, scale, num_feat, num_out_ch, input_resolution=None): + self.num_feat = num_feat + self.input_resolution = input_resolution + m = [] + m.append(nn.Conv2d(num_feat, (scale ** 2) * num_out_ch, 3, 1, 1)) + m.append(nn.PixelShuffle(scale)) + super(UpsampleOneStep, self).__init__(*m) + + def flops(self): + H, W = self.input_resolution + flops = H * W * self.num_feat * 3 * 9 + return flops + + + +class Swin2SR(nn.Module): + r""" Swin2SR + A PyTorch impl of : `Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration`. + + Args: + img_size (int | tuple(int)): Input image size. Default 64 + patch_size (int | tuple(int)): Patch size. Default: 1 + in_chans (int): Number of input image channels. Default: 3 + embed_dim (int): Patch embedding dimension. Default: 96 + depths (tuple(int)): Depth of each Swin Transformer layer. + num_heads (tuple(int)): Number of attention heads in different layers. + window_size (int): Window size. Default: 7 + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False + patch_norm (bool): If True, add normalization after patch embedding. Default: True + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False + upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction + img_range: Image range. 1. or 255. + upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None + resi_connection: The convolutional block before residual connection. '1conv'/'3conv' + """ + + def __init__(self, img_size=64, patch_size=1, in_chans=3, + embed_dim=96, depths=[6, 6, 6, 6], num_heads=[6, 6, 6, 6], + window_size=7, mlp_ratio=4., qkv_bias=True, + drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1, + norm_layer=nn.LayerNorm, ape=False, patch_norm=True, + use_checkpoint=False, upscale=2, img_range=1., upsampler='', resi_connection='1conv', + **kwargs): + super(Swin2SR, self).__init__() + num_in_ch = in_chans + num_out_ch = in_chans + num_feat = 64 + self.img_range = img_range + if in_chans == 3: + rgb_mean = (0.4488, 0.4371, 0.4040) + self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1) + else: + self.mean = torch.zeros(1, 1, 1, 1) + self.upscale = upscale + self.upsampler = upsampler + self.window_size = window_size + + ##################################################################################################### + ################################### 1, shallow feature extraction ################################### + self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1) + + ##################################################################################################### + ################################### 2, deep feature extraction ###################################### + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.num_features = embed_dim + self.mlp_ratio = mlp_ratio + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + + # merge non-overlapping patches into image + self.patch_unembed = PatchUnEmbed( + img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + + # absolute position embedding + if self.ape: + self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim)) + trunc_normal_(self.absolute_pos_embed, std=.02) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule + + # build Residual Swin Transformer blocks (RSTB) + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = RSTB(dim=embed_dim, + input_resolution=(patches_resolution[0], + patches_resolution[1]), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, + drop=drop_rate, attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], # no impact on SR results + norm_layer=norm_layer, + downsample=None, + use_checkpoint=use_checkpoint, + img_size=img_size, + patch_size=patch_size, + resi_connection=resi_connection + + ) + self.layers.append(layer) + + if self.upsampler == 'pixelshuffle_hf': + self.layers_hf = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = RSTB(dim=embed_dim, + input_resolution=(patches_resolution[0], + patches_resolution[1]), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, + drop=drop_rate, attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], # no impact on SR results + norm_layer=norm_layer, + downsample=None, + use_checkpoint=use_checkpoint, + img_size=img_size, + patch_size=patch_size, + resi_connection=resi_connection + + ) + self.layers_hf.append(layer) + + self.norm = norm_layer(self.num_features) + + # build the last conv layer in deep feature extraction + if resi_connection == '1conv': + self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv_after_body = nn.Sequential(nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1)) + + ##################################################################################################### + ################################ 3, high quality image reconstruction ################################ + if self.upsampler == 'pixelshuffle': + # for classical SR + self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1), + nn.LeakyReLU(inplace=True)) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + elif self.upsampler == 'pixelshuffle_aux': + self.conv_bicubic = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1) + self.conv_before_upsample = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), + nn.LeakyReLU(inplace=True)) + self.conv_aux = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + self.conv_after_aux = nn.Sequential( + nn.Conv2d(3, num_feat, 3, 1, 1), + nn.LeakyReLU(inplace=True)) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + + elif self.upsampler == 'pixelshuffle_hf': + self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1), + nn.LeakyReLU(inplace=True)) + self.upsample = Upsample(upscale, num_feat) + self.upsample_hf = Upsample_hf(upscale, num_feat) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + self.conv_first_hf = nn.Sequential(nn.Conv2d(num_feat, embed_dim, 3, 1, 1), + nn.LeakyReLU(inplace=True)) + self.conv_after_body_hf = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1) + self.conv_before_upsample_hf = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), + nn.LeakyReLU(inplace=True)) + self.conv_last_hf = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR (to save parameters) + self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch, + (patches_resolution[0], patches_resolution[1])) + elif self.upsampler == 'nearest+conv': + # for real-world SR (less artifacts) + assert self.upscale == 4, 'only support x4 now.' + self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1), + nn.LeakyReLU(inplace=True)) + self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + # for image denoising and JPEG compression artifact reduction + self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @torch.jit.ignore + def no_weight_decay(self): + return {'absolute_pos_embed'} + + @torch.jit.ignore + def no_weight_decay_keywords(self): + return {'relative_position_bias_table'} + + def check_image_size(self, x): + _, _, h, w = x.size() + mod_pad_h = (self.window_size - h % self.window_size) % self.window_size + mod_pad_w = (self.window_size - w % self.window_size) % self.window_size + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect') + return x + + def forward_features(self, x): + x_size = (x.shape[2], x.shape[3]) + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers: + x = layer(x, x_size) + + x = self.norm(x) # B L C + x = self.patch_unembed(x, x_size) + + return x + + def forward_features_hf(self, x): + x_size = (x.shape[2], x.shape[3]) + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers_hf: + x = layer(x, x_size) + + x = self.norm(x) # B L C + x = self.patch_unembed(x, x_size) + + return x + + def forward(self, x): + H, W = x.shape[2:] + x = self.check_image_size(x) + + self.mean = self.mean.type_as(x) + x = (x - self.mean) * self.img_range + + if self.upsampler == 'pixelshuffle': + # for classical SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.conv_last(self.upsample(x)) + elif self.upsampler == 'pixelshuffle_aux': + bicubic = F.interpolate(x, size=(H * self.upscale, W * self.upscale), mode='bicubic', align_corners=False) + bicubic = self.conv_bicubic(bicubic) + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + aux = self.conv_aux(x) # b, 3, LR_H, LR_W + x = self.conv_after_aux(aux) + x = self.upsample(x)[:, :, :H * self.upscale, :W * self.upscale] + bicubic[:, :, :H * self.upscale, :W * self.upscale] + x = self.conv_last(x) + aux = aux / self.img_range + self.mean + elif self.upsampler == 'pixelshuffle_hf': + # for classical SR with HF + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x_before = self.conv_before_upsample(x) + x_out = self.conv_last(self.upsample(x_before)) + + x_hf = self.conv_first_hf(x_before) + x_hf = self.conv_after_body_hf(self.forward_features_hf(x_hf)) + x_hf + x_hf = self.conv_before_upsample_hf(x_hf) + x_hf = self.conv_last_hf(self.upsample_hf(x_hf)) + x = x_out + x_hf + x_hf = x_hf / self.img_range + self.mean + + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.upsample(x) + elif self.upsampler == 'nearest+conv': + # for real-world SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.lrelu(self.conv_up1(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + x = self.lrelu(self.conv_up2(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + x = self.conv_last(self.lrelu(self.conv_hr(x))) + else: + # for image denoising and JPEG compression artifact reduction + x_first = self.conv_first(x) + res = self.conv_after_body(self.forward_features(x_first)) + x_first + x = x + self.conv_last(res) + + x = x / self.img_range + self.mean + if self.upsampler == "pixelshuffle_aux": + return x[:, :, :H*self.upscale, :W*self.upscale], aux + + elif self.upsampler == "pixelshuffle_hf": + x_out = x_out / self.img_range + self.mean + return x_out[:, :, :H*self.upscale, :W*self.upscale], x[:, :, :H*self.upscale, :W*self.upscale], x_hf[:, :, :H*self.upscale, :W*self.upscale] + + else: + return x[:, :, :H*self.upscale, :W*self.upscale] + + def flops(self): + flops = 0 + H, W = self.patches_resolution + flops += H * W * 3 * self.embed_dim * 9 + flops += self.patch_embed.flops() + for i, layer in enumerate(self.layers): + flops += layer.flops() + flops += H * W * 3 * self.embed_dim * self.embed_dim + flops += self.upsample.flops() + return flops + + +if __name__ == '__main__': + upscale = 4 + window_size = 8 + height = (1024 // upscale // window_size + 1) * window_size + width = (720 // upscale // window_size + 1) * window_size + model = Swin2SR(upscale=2, img_size=(height, width), + window_size=window_size, img_range=1., depths=[6, 6, 6, 6], + embed_dim=60, num_heads=[6, 6, 6, 6], mlp_ratio=2, upsampler='pixelshuffledirect') + print(model) + print(height, width, model.flops() / 1e9) + + x = torch.randn((1, 3, height, width)) + x = model(x) + print(x.shape) \ No newline at end of file diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..67e90afebaf266c1f15b6777c5231f9b8f9e81b1 --- /dev/null +++ b/modules/textual_inversion/dataset.py @@ -0,0 +1,113 @@ +import os +import numpy as np +import PIL +import torch +from PIL import Image +from torch.utils.data import Dataset +from torchvision import transforms + +import random +import tqdm +from modules import devices, shared +import re + +re_numbers_at_start = re.compile(r"^[-\d]+\s*") + + +class DatasetEntry: + def __init__(self, filename=None, latent=None, filename_text=None): + self.filename = filename + self.latent = latent + self.filename_text = filename_text + self.cond = None + self.cond_text = None + + +class PersonalizedBase(Dataset): + def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None, include_cond=False): + re_word = re.compile(shared.opts.dataset_filename_word_regex) if len(shared.opts.dataset_filename_word_regex)>0 else None + + self.placeholder_token = placeholder_token + + self.width = width + self.height = height + self.flip = transforms.RandomHorizontalFlip(p=flip_p) + + self.dataset = [] + + with open(template_file, "r") as file: + lines = [x.strip() for x in file.readlines()] + + self.lines = lines + + assert data_root, 'dataset directory not specified' + + cond_model = shared.sd_model.cond_stage_model + + self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)] + print("Preparing dataset...") + for path in tqdm.tqdm(self.image_paths): + try: + image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.BICUBIC) + except Exception: + continue + + text_filename = os.path.splitext(path)[0] + ".txt" + filename = os.path.basename(path) + + if os.path.exists(text_filename): + with open(text_filename, "r", encoding="utf8") as file: + filename_text = file.read() + else: + filename_text = os.path.splitext(filename)[0] + filename_text = re.sub(re_numbers_at_start, '', filename_text) + if re_word: + tokens = re_word.findall(filename_text) + filename_text = (shared.opts.dataset_filename_join_string or "").join(tokens) + + npimage = np.array(image).astype(np.uint8) + npimage = (npimage / 127.5 - 1.0).astype(np.float32) + + torchdata = torch.from_numpy(npimage).to(device=device, dtype=torch.float32) + torchdata = torch.moveaxis(torchdata, 2, 0) + + init_latent = model.get_first_stage_encoding(model.encode_first_stage(torchdata.unsqueeze(dim=0))).squeeze() + init_latent = init_latent.to(devices.cpu) + + entry = DatasetEntry(filename=path, filename_text=filename_text, latent=init_latent) + + if include_cond: + entry.cond_text = self.create_text(filename_text) + entry.cond = cond_model([entry.cond_text]).to(devices.cpu) + + self.dataset.append(entry) + + self.length = len(self.dataset) * repeats + + self.initial_indexes = np.arange(self.length) % len(self.dataset) + self.indexes = None + self.shuffle() + + def shuffle(self): + self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0])] + + def create_text(self, filename_text): + text = random.choice(self.lines) + text = text.replace("[name]", self.placeholder_token) + text = text.replace("[filewords]", filename_text) + return text + + def __len__(self): + return self.length + + def __getitem__(self, i): + if i % len(self.dataset) == 0: + self.shuffle() + + index = self.indexes[i % len(self.indexes)] + entry = self.dataset[index] + + if entry.cond is None: + entry.cond_text = self.create_text(entry.filename_text) + + return entry diff --git a/modules/textual_inversion/image_embedding.py b/modules/textual_inversion/image_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..898ce3b3bb3c2ec0c17d459903f2735647a81b14 --- /dev/null +++ b/modules/textual_inversion/image_embedding.py @@ -0,0 +1,219 @@ +import base64 +import json +import numpy as np +import zlib +from PIL import Image, PngImagePlugin, ImageDraw, ImageFont +from fonts.ttf import Roboto +import torch + + +class EmbeddingEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, torch.Tensor): + return {'TORCHTENSOR': obj.cpu().detach().numpy().tolist()} + return json.JSONEncoder.default(self, obj) + + +class EmbeddingDecoder(json.JSONDecoder): + def __init__(self, *args, **kwargs): + json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) + + def object_hook(self, d): + if 'TORCHTENSOR' in d: + return torch.from_numpy(np.array(d['TORCHTENSOR'])) + return d + + +def embedding_to_b64(data): + d = json.dumps(data, cls=EmbeddingEncoder) + return base64.b64encode(d.encode()) + + +def embedding_from_b64(data): + d = base64.b64decode(data) + return json.loads(d, cls=EmbeddingDecoder) + + +def lcg(m=2**32, a=1664525, c=1013904223, seed=0): + while True: + seed = (a * seed + c) % m + yield seed % 255 + + +def xor_block(block): + g = lcg() + randblock = np.array([next(g) for _ in range(np.product(block.shape))]).astype(np.uint8).reshape(block.shape) + return np.bitwise_xor(block.astype(np.uint8), randblock & 0x0F) + + +def style_block(block, sequence): + im = Image.new('RGB', (block.shape[1], block.shape[0])) + draw = ImageDraw.Draw(im) + i = 0 + for x in range(-6, im.size[0], 8): + for yi, y in enumerate(range(-6, im.size[1], 8)): + offset = 0 + if yi % 2 == 0: + offset = 4 + shade = sequence[i % len(sequence)] + i += 1 + draw.ellipse((x+offset, y, x+6+offset, y+6), fill=(shade, shade, shade)) + + fg = np.array(im).astype(np.uint8) & 0xF0 + + return block ^ fg + + +def insert_image_data_embed(image, data): + d = 3 + data_compressed = zlib.compress(json.dumps(data, cls=EmbeddingEncoder).encode(), level=9) + data_np_ = np.frombuffer(data_compressed, np.uint8).copy() + data_np_high = data_np_ >> 4 + data_np_low = data_np_ & 0x0F + + h = image.size[1] + next_size = data_np_low.shape[0] + (h-(data_np_low.shape[0] % h)) + next_size = next_size + ((h*d)-(next_size % (h*d))) + + data_np_low.resize(next_size) + data_np_low = data_np_low.reshape((h, -1, d)) + + data_np_high.resize(next_size) + data_np_high = data_np_high.reshape((h, -1, d)) + + edge_style = list(data['string_to_param'].values())[0].cpu().detach().numpy().tolist()[0][:1024] + edge_style = (np.abs(edge_style)/np.max(np.abs(edge_style))*255).astype(np.uint8) + + data_np_low = style_block(data_np_low, sequence=edge_style) + data_np_low = xor_block(data_np_low) + data_np_high = style_block(data_np_high, sequence=edge_style[::-1]) + data_np_high = xor_block(data_np_high) + + im_low = Image.fromarray(data_np_low, mode='RGB') + im_high = Image.fromarray(data_np_high, mode='RGB') + + background = Image.new('RGB', (image.size[0]+im_low.size[0]+im_high.size[0]+2, image.size[1]), (0, 0, 0)) + background.paste(im_low, (0, 0)) + background.paste(image, (im_low.size[0]+1, 0)) + background.paste(im_high, (im_low.size[0]+1+image.size[0]+1, 0)) + + return background + + +def crop_black(img, tol=0): + mask = (img > tol).all(2) + mask0, mask1 = mask.any(0), mask.any(1) + col_start, col_end = mask0.argmax(), mask.shape[1]-mask0[::-1].argmax() + row_start, row_end = mask1.argmax(), mask.shape[0]-mask1[::-1].argmax() + return img[row_start:row_end, col_start:col_end] + + +def extract_image_data_embed(image): + d = 3 + outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1], image.size[0], d).astype(np.uint8)) & 0x0F + black_cols = np.where(np.sum(outarr, axis=(0, 2)) == 0) + if black_cols[0].shape[0] < 2: + print('No Image data blocks found.') + return None + + data_block_lower = outarr[:, :black_cols[0].min(), :].astype(np.uint8) + data_block_upper = outarr[:, black_cols[0].max()+1:, :].astype(np.uint8) + + data_block_lower = xor_block(data_block_lower) + data_block_upper = xor_block(data_block_upper) + + data_block = (data_block_upper << 4) | (data_block_lower) + data_block = data_block.flatten().tobytes() + + data = zlib.decompress(data_block) + return json.loads(data, cls=EmbeddingDecoder) + + +def caption_image_overlay(srcimage, title, footerLeft, footerMid, footerRight, textfont=None): + from math import cos + + image = srcimage.copy() + + if textfont is None: + try: + textfont = ImageFont.truetype(opts.font or Roboto, fontsize) + textfont = opts.font or Roboto + except Exception: + textfont = Roboto + + factor = 1.5 + gradient = Image.new('RGBA', (1, image.size[1]), color=(0, 0, 0, 0)) + for y in range(image.size[1]): + mag = 1-cos(y/image.size[1]*factor) + mag = max(mag, 1-cos((image.size[1]-y)/image.size[1]*factor*1.1)) + gradient.putpixel((0, y), (0, 0, 0, int(mag*255))) + image = Image.alpha_composite(image.convert('RGBA'), gradient.resize(image.size)) + + draw = ImageDraw.Draw(image) + fontsize = 32 + font = ImageFont.truetype(textfont, fontsize) + padding = 10 + + _, _, w, h = draw.textbbox((0, 0), title, font=font) + fontsize = min(int(fontsize * (((image.size[0]*0.75)-(padding*4))/w)), 72) + font = ImageFont.truetype(textfont, fontsize) + _, _, w, h = draw.textbbox((0, 0), title, font=font) + draw.text((padding, padding), title, anchor='lt', font=font, fill=(255, 255, 255, 230)) + + _, _, w, h = draw.textbbox((0, 0), footerLeft, font=font) + fontsize_left = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72) + _, _, w, h = draw.textbbox((0, 0), footerMid, font=font) + fontsize_mid = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72) + _, _, w, h = draw.textbbox((0, 0), footerRight, font=font) + fontsize_right = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72) + + font = ImageFont.truetype(textfont, min(fontsize_left, fontsize_mid, fontsize_right)) + + draw.text((padding, image.size[1]-padding), footerLeft, anchor='ls', font=font, fill=(255, 255, 255, 230)) + draw.text((image.size[0]/2, image.size[1]-padding), footerMid, anchor='ms', font=font, fill=(255, 255, 255, 230)) + draw.text((image.size[0]-padding, image.size[1]-padding), footerRight, anchor='rs', font=font, fill=(255, 255, 255, 230)) + + return image + + +if __name__ == '__main__': + + testEmbed = Image.open('test_embedding.png') + data = extract_image_data_embed(testEmbed) + assert data is not None + + data = embedding_from_b64(testEmbed.text['sd-ti-embedding']) + assert data is not None + + image = Image.new('RGBA', (512, 512), (255, 255, 200, 255)) + cap_image = caption_image_overlay(image, 'title', 'footerLeft', 'footerMid', 'footerRight') + + test_embed = {'string_to_param': {'*': torch.from_numpy(np.random.random((2, 4096)))}} + + embedded_image = insert_image_data_embed(cap_image, test_embed) + + retrived_embed = extract_image_data_embed(embedded_image) + + assert str(retrived_embed) == str(test_embed) + + embedded_image2 = insert_image_data_embed(cap_image, retrived_embed) + + assert embedded_image == embedded_image2 + + g = lcg() + shared_random = np.array([next(g) for _ in range(100)]).astype(np.uint8).tolist() + + reference_random = [253, 242, 127, 44, 157, 27, 239, 133, 38, 79, 167, 4, 177, + 95, 130, 79, 78, 14, 52, 215, 220, 194, 126, 28, 240, 179, + 160, 153, 149, 50, 105, 14, 21, 218, 199, 18, 54, 198, 193, + 38, 128, 19, 53, 195, 124, 75, 205, 12, 6, 145, 0, 28, + 30, 148, 8, 45, 218, 171, 55, 249, 97, 166, 12, 35, 0, + 41, 221, 122, 215, 170, 31, 113, 186, 97, 119, 31, 23, 185, + 66, 140, 30, 41, 37, 63, 137, 109, 216, 55, 159, 145, 82, + 204, 86, 73, 222, 44, 198, 118, 240, 97] + + assert shared_random == reference_random + + hunna_kay_random_sum = sum(np.array([next(g) for _ in range(100000)]).astype(np.uint8).tolist()) + + assert 12731374 == hunna_kay_random_sum diff --git a/modules/textual_inversion/learn_schedule.py b/modules/textual_inversion/learn_schedule.py new file mode 100644 index 0000000000000000000000000000000000000000..2062726ad22087a3dc60ac784c3dfcf4aec0f955 --- /dev/null +++ b/modules/textual_inversion/learn_schedule.py @@ -0,0 +1,69 @@ +import tqdm + + +class LearnScheduleIterator: + def __init__(self, learn_rate, max_steps, cur_step=0): + """ + specify learn_rate as "0.001:100, 0.00001:1000, 1e-5:10000" to have lr of 0.001 until step 100, 0.00001 until 1000, 1e-5:10000 until 10000 + """ + + pairs = learn_rate.split(',') + self.rates = [] + self.it = 0 + self.maxit = 0 + for i, pair in enumerate(pairs): + tmp = pair.split(':') + if len(tmp) == 2: + step = int(tmp[1]) + if step > cur_step: + self.rates.append((float(tmp[0]), min(step, max_steps))) + self.maxit += 1 + if step > max_steps: + return + elif step == -1: + self.rates.append((float(tmp[0]), max_steps)) + self.maxit += 1 + return + else: + self.rates.append((float(tmp[0]), max_steps)) + self.maxit += 1 + return + + def __iter__(self): + return self + + def __next__(self): + if self.it < self.maxit: + self.it += 1 + return self.rates[self.it - 1] + else: + raise StopIteration + + +class LearnRateScheduler: + def __init__(self, learn_rate, max_steps, cur_step=0, verbose=True): + self.schedules = LearnScheduleIterator(learn_rate, max_steps, cur_step) + (self.learn_rate, self.end_step) = next(self.schedules) + self.verbose = verbose + + if self.verbose: + print(f'Training at rate of {self.learn_rate} until step {self.end_step}') + + self.finished = False + + def apply(self, optimizer, step_number): + if step_number <= self.end_step: + return + + try: + (self.learn_rate, self.end_step) = next(self.schedules) + except Exception: + self.finished = True + return + + if self.verbose: + tqdm.tqdm.write(f'Training at rate of {self.learn_rate} until step {self.end_step}') + + for pg in optimizer.param_groups: + pg['lr'] = self.learn_rate + diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..886cf0c3b6a4a570dd5d589af47a0568209e670c --- /dev/null +++ b/modules/textual_inversion/preprocess.py @@ -0,0 +1,116 @@ +import os +from PIL import Image, ImageOps +import platform +import sys +import tqdm +import time + +from modules import shared, images +from modules.shared import opts, cmd_opts +if cmd_opts.deepdanbooru: + import modules.deepbooru as deepbooru + + +def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False): + try: + if process_caption: + shared.interrogator.load() + + if process_caption_deepbooru: + db_opts = deepbooru.create_deepbooru_opts() + db_opts[deepbooru.OPT_INCLUDE_RANKS] = False + deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts) + + preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru) + + finally: + + if process_caption: + shared.interrogator.send_blip_to_ram() + + if process_caption_deepbooru: + deepbooru.release_process() + + + +def preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False): + width = process_width + height = process_height + src = os.path.abspath(process_src) + dst = os.path.abspath(process_dst) + + assert src != dst, 'same directory specified as source and destination' + + os.makedirs(dst, exist_ok=True) + + files = os.listdir(src) + + shared.state.textinfo = "Preprocessing..." + shared.state.job_count = len(files) + + def save_pic_with_caption(image, index): + caption = "" + + if process_caption: + caption += shared.interrogator.generate_caption(image) + + if process_caption_deepbooru: + if len(caption) > 0: + caption += ", " + caption += deepbooru.get_tags_from_process(image) + + filename_part = filename + filename_part = os.path.splitext(filename_part)[0] + filename_part = os.path.basename(filename_part) + + basename = f"{index:05}-{subindex[0]}-{filename_part}" + image.save(os.path.join(dst, f"{basename}.png")) + + if len(caption) > 0: + with open(os.path.join(dst, f"{basename}.txt"), "w", encoding="utf8") as file: + file.write(caption) + + subindex[0] += 1 + + def save_pic(image, index): + save_pic_with_caption(image, index) + + if process_flip: + save_pic_with_caption(ImageOps.mirror(image), index) + + for index, imagefile in enumerate(tqdm.tqdm(files)): + subindex = [0] + filename = os.path.join(src, imagefile) + try: + img = Image.open(filename).convert("RGB") + except Exception: + continue + + if shared.state.interrupted: + break + + ratio = img.height / img.width + is_tall = ratio > 1.35 + is_wide = ratio < 1 / 1.35 + + if process_split and is_tall: + img = img.resize((width, height * img.height // img.width)) + + top = img.crop((0, 0, width, height)) + save_pic(top, index) + + bot = img.crop((0, img.height - height, width, img.height)) + save_pic(bot, index) + elif process_split and is_wide: + img = img.resize((width * img.width // img.height, height)) + + left = img.crop((0, 0, width, height)) + save_pic(left, index) + + right = img.crop((img.width - width, 0, img.width, height)) + save_pic(right, index) + else: + img = images.resize_image(1, img, width, height) + save_pic(img, index) + + shared.state.nextjob() diff --git a/modules/textual_inversion/test_embedding.png b/modules/textual_inversion/test_embedding.png new file mode 100644 index 0000000000000000000000000000000000000000..07e2d9afaeaff3751b68a7c0f49d8b3466474282 Binary files /dev/null and b/modules/textual_inversion/test_embedding.png differ diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py new file mode 100644 index 0000000000000000000000000000000000000000..fa0e33a2ae0549916748fd2e6397a5c70e9e3ef0 --- /dev/null +++ b/modules/textual_inversion/textual_inversion.py @@ -0,0 +1,321 @@ +import os +import sys +import traceback + +import torch +import tqdm +import html +import datetime + +from PIL import Image, PngImagePlugin + +from modules import shared, devices, sd_hijack, processing, sd_models +import modules.textual_inversion.dataset +from modules.textual_inversion.learn_schedule import LearnRateScheduler + +from modules.textual_inversion.image_embedding import (embedding_to_b64, embedding_from_b64, + insert_image_data_embed, extract_image_data_embed, + caption_image_overlay) + +class Embedding: + def __init__(self, vec, name, step=None): + self.vec = vec + self.name = name + self.step = step + self.cached_checksum = None + self.sd_checkpoint = None + self.sd_checkpoint_name = None + + def save(self, filename): + embedding_data = { + "string_to_token": {"*": 265}, + "string_to_param": {"*": self.vec}, + "name": self.name, + "step": self.step, + "sd_checkpoint": self.sd_checkpoint, + "sd_checkpoint_name": self.sd_checkpoint_name, + } + + torch.save(embedding_data, filename) + + def checksum(self): + if self.cached_checksum is not None: + return self.cached_checksum + + def const_hash(a): + r = 0 + for v in a: + r = (r * 281 ^ int(v) * 997) & 0xFFFFFFFF + return r + + self.cached_checksum = f'{const_hash(self.vec.reshape(-1) * 100) & 0xffff:04x}' + return self.cached_checksum + + +class EmbeddingDatabase: + def __init__(self, embeddings_dir): + self.ids_lookup = {} + self.word_embeddings = {} + self.dir_mtime = None + self.embeddings_dir = embeddings_dir + + def register_embedding(self, embedding, model): + + self.word_embeddings[embedding.name] = embedding + + ids = model.cond_stage_model.tokenizer([embedding.name], add_special_tokens=False)['input_ids'][0] + + first_id = ids[0] + if first_id not in self.ids_lookup: + self.ids_lookup[first_id] = [] + + self.ids_lookup[first_id] = sorted(self.ids_lookup[first_id] + [(ids, embedding)], key=lambda x: len(x[0]), reverse=True) + + return embedding + + def load_textual_inversion_embeddings(self): + mt = os.path.getmtime(self.embeddings_dir) + if self.dir_mtime is not None and mt <= self.dir_mtime: + return + + self.dir_mtime = mt + self.ids_lookup.clear() + self.word_embeddings.clear() + + def process_file(path, filename): + name = os.path.splitext(filename)[0] + + data = [] + + if filename.upper().endswith('.PNG'): + embed_image = Image.open(path) + if 'sd-ti-embedding' in embed_image.text: + data = embedding_from_b64(embed_image.text['sd-ti-embedding']) + name = data.get('name', name) + else: + data = extract_image_data_embed(embed_image) + name = data.get('name', name) + else: + data = torch.load(path, map_location="cpu") + + # textual inversion embeddings + if 'string_to_param' in data: + param_dict = data['string_to_param'] + if hasattr(param_dict, '_parameters'): + param_dict = getattr(param_dict, '_parameters') # fix for torch 1.12.1 loading saved file from torch 1.11 + assert len(param_dict) == 1, 'embedding file has multiple terms in it' + emb = next(iter(param_dict.items()))[1] + # diffuser concepts + elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor: + assert len(data.keys()) == 1, 'embedding file has multiple terms in it' + + emb = next(iter(data.values())) + if len(emb.shape) == 1: + emb = emb.unsqueeze(0) + else: + raise Exception(f"Couldn't identify {filename} as neither textual inversion embedding nor diffuser concept.") + + vec = emb.detach().to(devices.device, dtype=torch.float32) + embedding = Embedding(vec, name) + embedding.step = data.get('step', None) + embedding.sd_checkpoint = data.get('hash', None) + embedding.sd_checkpoint_name = data.get('sd_checkpoint_name', None) + self.register_embedding(embedding, shared.sd_model) + + for fn in os.listdir(self.embeddings_dir): + try: + fullfn = os.path.join(self.embeddings_dir, fn) + + if os.stat(fullfn).st_size == 0: + continue + + process_file(fullfn, fn) + except Exception: + print(f"Error loading emedding {fn}:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + continue + + print(f"Loaded a total of {len(self.word_embeddings)} textual inversion embeddings.") + + def find_embedding_at_position(self, tokens, offset): + token = tokens[offset] + possible_matches = self.ids_lookup.get(token, None) + + if possible_matches is None: + return None, None + + for ids, embedding in possible_matches: + if tokens[offset:offset + len(ids)] == ids: + return embedding, len(ids) + + return None, None + + +def create_embedding(name, num_vectors_per_token, init_text='*'): + cond_model = shared.sd_model.cond_stage_model + embedding_layer = cond_model.wrapped.transformer.text_model.embeddings + + ids = cond_model.tokenizer(init_text, max_length=num_vectors_per_token, return_tensors="pt", add_special_tokens=False)["input_ids"] + embedded = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0) + vec = torch.zeros((num_vectors_per_token, embedded.shape[1]), device=devices.device) + + for i in range(num_vectors_per_token): + vec[i] = embedded[i * int(embedded.shape[0]) // num_vectors_per_token] + + fn = os.path.join(shared.cmd_opts.embeddings_dir, f"{name}.pt") + assert not os.path.exists(fn), f"file {fn} already exists" + + embedding = Embedding(vec, name) + embedding.step = 0 + embedding.save(fn) + + return fn + + +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt): + assert embedding_name, 'embedding not selected' + + shared.state.textinfo = "Initializing textual inversion training..." + shared.state.job_count = steps + + filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt') + + log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%m-%d"), embedding_name) + + if save_embedding_every > 0: + embedding_dir = os.path.join(log_directory, "embeddings") + os.makedirs(embedding_dir, exist_ok=True) + else: + embedding_dir = None + + if create_image_every > 0: + images_dir = os.path.join(log_directory, "images") + os.makedirs(images_dir, exist_ok=True) + else: + images_dir = None + + if create_image_every > 0 and save_image_with_stored_embedding: + images_embeds_dir = os.path.join(log_directory, "image_embeddings") + os.makedirs(images_embeds_dir, exist_ok=True) + else: + images_embeds_dir = None + + cond_model = shared.sd_model.cond_stage_model + + shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." + with torch.autocast("cuda"): + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) + + hijack = sd_hijack.model_hijack + + embedding = hijack.embedding_db.word_embeddings[embedding_name] + embedding.vec.requires_grad = True + + losses = torch.zeros((32,)) + + last_saved_file = "" + last_saved_image = "" + + ititial_step = embedding.step or 0 + if ititial_step > steps: + return embedding, filename + + scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) + optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate) + + pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) + for i, entry in pbar: + embedding.step = i + ititial_step + + scheduler.apply(optimizer, embedding.step) + if scheduler.finished: + break + + if shared.state.interrupted: + break + + with torch.autocast("cuda"): + c = cond_model([entry.cond_text]) + + x = entry.latent.to(devices.device) + loss = shared.sd_model(x.unsqueeze(0), c)[0] + del x + + losses[embedding.step % losses.shape[0]] = loss.item() + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + epoch_num = embedding.step // len(ds) + epoch_step = embedding.step - (epoch_num * len(ds)) + 1 + + pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{len(ds)}]loss: {losses.mean():.7f}") + + if embedding.step > 0 and embedding_dir is not None and embedding.step % save_embedding_every == 0: + last_saved_file = os.path.join(embedding_dir, f'{embedding_name}-{embedding.step}.pt') + embedding.save(last_saved_file) + + if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0: + last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png') + + preview_text = entry.cond_text if preview_image_prompt == "" else preview_image_prompt + + p = processing.StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + prompt=preview_text, + steps=20, + height=training_height, + width=training_width, + do_not_save_grid=True, + do_not_save_samples=True, + ) + + processed = processing.process_images(p) + image = processed.images[0] + + shared.state.current_image = image + + if save_image_with_stored_embedding and os.path.exists(last_saved_file): + + last_saved_image_chunks = os.path.join(images_embeds_dir, f'{embedding_name}-{embedding.step}.png') + + info = PngImagePlugin.PngInfo() + data = torch.load(last_saved_file) + info.add_text("sd-ti-embedding", embedding_to_b64(data)) + + title = "<{}>".format(data.get('name', '???')) + checkpoint = sd_models.select_checkpoint() + footer_left = checkpoint.model_name + footer_mid = '[{}]'.format(checkpoint.hash) + footer_right = '{}'.format(embedding.step) + + captioned_image = caption_image_overlay(image, title, footer_left, footer_mid, footer_right) + captioned_image = insert_image_data_embed(captioned_image, data) + + captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info) + + image.save(last_saved_image) + + last_saved_image += f", prompt: {preview_text}" + + shared.state.job_no = embedding.step + + shared.state.textinfo = f""" +

+Loss: {losses.mean():.7f}
+Step: {embedding.step}
+Last prompt: {html.escape(entry.cond_text)}
+Last saved embedding: {html.escape(last_saved_file)}
+Last saved image: {html.escape(last_saved_image)}
+

+""" + + checkpoint = sd_models.select_checkpoint() + + embedding.sd_checkpoint = checkpoint.hash + embedding.sd_checkpoint_name = checkpoint.model_name + embedding.cached_checksum = None + embedding.save(filename) + + return embedding, filename diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py new file mode 100644 index 0000000000000000000000000000000000000000..36881e7adf7cbe11ec20732d280f5bf2a2ff0ae3 --- /dev/null +++ b/modules/textual_inversion/ui.py @@ -0,0 +1,42 @@ +import html + +import gradio as gr + +import modules.textual_inversion.textual_inversion +import modules.textual_inversion.preprocess +from modules import sd_hijack, shared + + +def create_embedding(name, initialization_text, nvpt): + filename = modules.textual_inversion.textual_inversion.create_embedding(name, nvpt, init_text=initialization_text) + + sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings() + + return gr.Dropdown.update(choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())), f"Created: {filename}", "" + + +def preprocess(*args): + modules.textual_inversion.preprocess.preprocess(*args) + + return "Preprocessing finished.", "" + + +def train_embedding(*args): + + assert not shared.cmd_opts.lowvram, 'Training models with lowvram not possible' + + try: + sd_hijack.undo_optimizations() + + embedding, filename = modules.textual_inversion.textual_inversion.train_embedding(*args) + + res = f""" +Training {'interrupted' if shared.state.interrupted else 'finished'} at {embedding.step} steps. +Embedding saved to {html.escape(filename)} +""" + return res, "" + except Exception: + raise + finally: + sd_hijack.apply_optimizations() + diff --git a/modules/txt2img.py b/modules/txt2img.py new file mode 100644 index 0000000000000000000000000000000000000000..e985242b33022bf7d52a7b3e36aa51ef8e66a232 --- /dev/null +++ b/modules/txt2img.py @@ -0,0 +1,55 @@ +import modules.scripts +from modules.processing import StableDiffusionProcessing, Processed, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images +from modules.shared import opts, cmd_opts +import modules.shared as shared +import modules.processing as processing +from modules.ui import plaintext_to_html + + +def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, steps: int, sampler_index: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, enable_hr: bool, scale_latent: bool, denoising_strength: float, *args): + p = StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + outpath_samples=opts.outdir_samples or opts.outdir_txt2img_samples, + outpath_grids=opts.outdir_grids or opts.outdir_txt2img_grids, + prompt=prompt, + styles=[prompt_style, prompt_style2], + negative_prompt=negative_prompt, + seed=seed, + subseed=subseed, + subseed_strength=subseed_strength, + seed_resize_from_h=seed_resize_from_h, + seed_resize_from_w=seed_resize_from_w, + seed_enable_extras=seed_enable_extras, + sampler_index=sampler_index, + batch_size=batch_size, + n_iter=n_iter, + steps=steps, + cfg_scale=cfg_scale, + width=width, + height=height, + restore_faces=restore_faces, + tiling=tiling, + enable_hr=enable_hr, + scale_latent=scale_latent if enable_hr else None, + denoising_strength=denoising_strength if enable_hr else None, + ) + + if cmd_opts.enable_console_prompts: + print(f"\ntxt2img: {prompt}", file=shared.progress_print_out) + + processed = modules.scripts.scripts_txt2img.run(p, *args) + + if processed is None: + processed = process_images(p) + + shared.total_tqdm.clear() + + generation_info_js = processed.js() + if opts.samples_log_stdout: + print(generation_info_js) + + if opts.do_not_show_images: + processed.images = [] + + return processed.images, generation_info_js, plaintext_to_html(processed.info) + diff --git a/modules/ui.py b/modules/ui.py new file mode 100644 index 0000000000000000000000000000000000000000..7446439d01053c988926d889650b2bfb68975b15 --- /dev/null +++ b/modules/ui.py @@ -0,0 +1,1633 @@ +import base64 +import html +import io +import json +import math +import mimetypes +import os +import random +import sys +import time +import traceback +import platform +import subprocess as sp +from functools import reduce + +import numpy as np +import torch +from PIL import Image, PngImagePlugin +import piexif + +import gradio as gr +import gradio.utils +import gradio.routes + +from modules import sd_hijack +from modules.paths import script_path +from modules.shared import opts, cmd_opts +if cmd_opts.deepdanbooru: + from modules.deepbooru import get_deepbooru_tags +import modules.shared as shared +from modules.sd_samplers import samplers, samplers_for_img2img +from modules.sd_hijack import model_hijack +import modules.ldsr_model +import modules.scripts +import modules.gfpgan_model +import modules.codeformer_model +import modules.styles +import modules.generation_parameters_copypaste +from modules import prompt_parser +from modules.images import save_image +import modules.textual_inversion.ui +import modules.hypernetworks.ui + +# this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI +mimetypes.init() +mimetypes.add_type('application/javascript', '.js') + + +if not cmd_opts.share and not cmd_opts.listen: + # fix gradio phoning home + gradio.utils.version_check = lambda: None + gradio.utils.get_local_ip_address = lambda: '127.0.0.1' + +if cmd_opts.ngrok != None: + import modules.ngrok as ngrok + print('ngrok authtoken detected, trying to connect...') + ngrok.connect(cmd_opts.ngrok, cmd_opts.port if cmd_opts.port != None else 7860) + + +def gr_show(visible=True): + return {"visible": visible, "__type__": "update"} + + +sample_img2img = "assets/stable-samples/img2img/sketch-mountains-input.jpg" +sample_img2img = sample_img2img if os.path.exists(sample_img2img) else None + +css_hide_progressbar = """ +.wrap .m-12 svg { display:none!important; } +.wrap .m-12::before { content:"Loading..." } +.progress-bar { display:none!important; } +.meta-text { display:none!important; } +""" + +# Using constants for these since the variation selector isn't visible. +# Important that they exactly match script.js for tooltip to work. +random_symbol = '\U0001f3b2\ufe0f' # 🎲️ +reuse_symbol = '\u267b\ufe0f' # ♻️ +art_symbol = '\U0001f3a8' # 🎨 +paste_symbol = '\u2199\ufe0f' # ↙ +folder_symbol = '\U0001f4c2' # 📂 +refresh_symbol = '\U0001f504' # 🔄 + + +def plaintext_to_html(text): + text = "

" + "
\n".join([f"{html.escape(x)}" for x in text.split('\n')]) + "

" + return text + + +def image_from_url_text(filedata): + if type(filedata) == list: + if len(filedata) == 0: + return None + + filedata = filedata[0] + + if filedata.startswith("data:image/png;base64,"): + filedata = filedata[len("data:image/png;base64,"):] + + filedata = base64.decodebytes(filedata.encode('utf-8')) + image = Image.open(io.BytesIO(filedata)) + return image + + +def send_gradio_gallery_to_image(x): + if len(x) == 0: + return None + + return image_from_url_text(x[0]) + + +def save_files(js_data, images, do_make_zip, index): + import csv + filenames = [] + fullfns = [] + + #quick dictionary to class object conversion. Its necessary due apply_filename_pattern requiring it + class MyObject: + def __init__(self, d=None): + if d is not None: + for key, value in d.items(): + setattr(self, key, value) + + data = json.loads(js_data) + + p = MyObject(data) + path = opts.outdir_save + save_to_dirs = opts.use_save_to_dirs_for_ui + extension: str = opts.samples_format + start_index = 0 + + if index > -1 and opts.save_selected_only and (index >= data["index_of_first_image"]): # ensures we are looking at a specific non-grid picture, and we have save_selected_only + + images = [images[index]] + start_index = index + + os.makedirs(opts.outdir_save, exist_ok=True) + + with open(os.path.join(opts.outdir_save, "log.csv"), "a", encoding="utf8", newline='') as file: + at_start = file.tell() == 0 + writer = csv.writer(file) + if at_start: + writer.writerow(["prompt", "seed", "width", "height", "sampler", "cfgs", "steps", "filename", "negative_prompt"]) + + for image_index, filedata in enumerate(images, start_index): + if filedata.startswith("data:image/png;base64,"): + filedata = filedata[len("data:image/png;base64,"):] + + image = Image.open(io.BytesIO(base64.decodebytes(filedata.encode('utf-8')))) + + is_grid = image_index < p.index_of_first_image + i = 0 if is_grid else (image_index - p.index_of_first_image) + + fullfn, txt_fullfn = save_image(image, path, "", seed=p.all_seeds[i], prompt=p.all_prompts[i], extension=extension, info=p.infotexts[image_index], grid=is_grid, p=p, save_to_dirs=save_to_dirs) + + filename = os.path.relpath(fullfn, path) + filenames.append(filename) + fullfns.append(fullfn) + if txt_fullfn: + filenames.append(os.path.basename(txt_fullfn)) + fullfns.append(txt_fullfn) + + writer.writerow([data["prompt"], data["seed"], data["width"], data["height"], data["sampler"], data["cfg_scale"], data["steps"], filenames[0], data["negative_prompt"]]) + + # Make Zip + if do_make_zip: + zip_filepath = os.path.join(path, "images.zip") + + from zipfile import ZipFile + with ZipFile(zip_filepath, "w") as zip_file: + for i in range(len(fullfns)): + with open(fullfns[i], mode="rb") as f: + zip_file.writestr(filenames[i], f.read()) + fullfns.insert(0, zip_filepath) + + return gr.File.update(value=fullfns, visible=True), '', '', plaintext_to_html(f"Saved: {filenames[0]}") + + +def wrap_gradio_call(func, extra_outputs=None): + def f(*args, extra_outputs_array=extra_outputs, **kwargs): + run_memmon = opts.memmon_poll_rate > 0 and not shared.mem_mon.disabled + if run_memmon: + shared.mem_mon.monitor() + t = time.perf_counter() + + try: + res = list(func(*args, **kwargs)) + except Exception as e: + # When printing out our debug argument list, do not print out more than a MB of text + max_debug_str_len = 131072 # (1024*1024)/8 + + print("Error completing request", file=sys.stderr) + argStr = f"Arguments: {str(args)} {str(kwargs)}" + print(argStr[:max_debug_str_len], file=sys.stderr) + if len(argStr) > max_debug_str_len: + print(f"(Argument list truncated at {max_debug_str_len}/{len(argStr)} characters)", file=sys.stderr) + + print(traceback.format_exc(), file=sys.stderr) + + shared.state.job = "" + shared.state.job_count = 0 + + if extra_outputs_array is None: + extra_outputs_array = [None, ''] + + res = extra_outputs_array + [f"
{plaintext_to_html(type(e).__name__+': '+str(e))}
"] + + elapsed = time.perf_counter() - t + elapsed_m = int(elapsed // 60) + elapsed_s = elapsed % 60 + elapsed_text = f"{elapsed_s:.2f}s" + if (elapsed_m > 0): + elapsed_text = f"{elapsed_m}m "+elapsed_text + + if run_memmon: + mem_stats = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.stop().items()} + active_peak = mem_stats['active_peak'] + reserved_peak = mem_stats['reserved_peak'] + sys_peak = mem_stats['system_peak'] + sys_total = mem_stats['total'] + sys_pct = round(sys_peak/max(sys_total, 1) * 100, 2) + + vram_html = f"

Torch active/reserved: {active_peak}/{reserved_peak} MiB, Sys VRAM: {sys_peak}/{sys_total} MiB ({sys_pct}%)

" + else: + vram_html = '' + + # last item is always HTML + res[-1] += f"

Time taken: {elapsed_text}

{vram_html}
" + + shared.state.skipped = False + shared.state.interrupted = False + shared.state.job_count = 0 + + return tuple(res) + + return f + + +def check_progress_call(id_part): + if shared.state.job_count == 0: + return "", gr_show(False), gr_show(False), gr_show(False) + + progress = 0 + + if shared.state.job_count > 0: + progress += shared.state.job_no / shared.state.job_count + if shared.state.sampling_steps > 0: + progress += 1 / shared.state.job_count * shared.state.sampling_step / shared.state.sampling_steps + + progress = min(progress, 1) + + progressbar = "" + if opts.show_progressbar: + progressbar = f"""
{str(int(progress*100))+"%" if progress > 0.01 else ""}
""" + + image = gr_show(False) + preview_visibility = gr_show(False) + + if opts.show_progress_every_n_steps > 0: + if shared.parallel_processing_allowed: + + if shared.state.sampling_step - shared.state.current_image_sampling_step >= opts.show_progress_every_n_steps and shared.state.current_latent is not None: + shared.state.current_image = modules.sd_samplers.sample_to_image(shared.state.current_latent) + shared.state.current_image_sampling_step = shared.state.sampling_step + + image = shared.state.current_image + + if image is None: + image = gr.update(value=None) + else: + preview_visibility = gr_show(True) + + if shared.state.textinfo is not None: + textinfo_result = gr.HTML.update(value=shared.state.textinfo, visible=True) + else: + textinfo_result = gr_show(False) + + return f"

{progressbar}

", preview_visibility, image, textinfo_result + + +def check_progress_call_initial(id_part): + shared.state.job_count = -1 + shared.state.current_latent = None + shared.state.current_image = None + shared.state.textinfo = None + + return check_progress_call(id_part) + + +def roll_artist(prompt): + allowed_cats = set([x for x in shared.artist_db.categories() if len(opts.random_artist_categories)==0 or x in opts.random_artist_categories]) + artist = random.choice([x for x in shared.artist_db.artists if x.category in allowed_cats]) + + return prompt + ", " + artist.name if prompt != '' else artist.name + + +def visit(x, func, path=""): + if hasattr(x, 'children'): + for c in x.children: + visit(c, func, path) + elif x.label is not None: + func(path + "/" + str(x.label), x) + + +def add_style(name: str, prompt: str, negative_prompt: str): + if name is None: + return [gr_show(), gr_show()] + + style = modules.styles.PromptStyle(name, prompt, negative_prompt) + shared.prompt_styles.styles[style.name] = style + # Save all loaded prompt styles: this allows us to update the storage format in the future more easily, because we + # reserialize all styles every time we save them + shared.prompt_styles.save_styles(shared.styles_filename) + + return [gr.Dropdown.update(visible=True, choices=list(shared.prompt_styles.styles)) for _ in range(4)] + + +def apply_styles(prompt, prompt_neg, style1_name, style2_name): + prompt = shared.prompt_styles.apply_styles_to_prompt(prompt, [style1_name, style2_name]) + prompt_neg = shared.prompt_styles.apply_negative_styles_to_prompt(prompt_neg, [style1_name, style2_name]) + + return [gr.Textbox.update(value=prompt), gr.Textbox.update(value=prompt_neg), gr.Dropdown.update(value="None"), gr.Dropdown.update(value="None")] + + +def interrogate(image): + prompt = shared.interrogator.interrogate(image) + + return gr_show(True) if prompt is None else prompt + + +def interrogate_deepbooru(image): + prompt = get_deepbooru_tags(image) + return gr_show(True) if prompt is None else prompt + + +def create_seed_inputs(): + with gr.Row(): + with gr.Box(): + with gr.Row(elem_id='seed_row'): + seed = (gr.Textbox if cmd_opts.use_textbox_seed else gr.Number)(label='Seed', value=-1) + seed.style(container=False) + random_seed = gr.Button(random_symbol, elem_id='random_seed') + reuse_seed = gr.Button(reuse_symbol, elem_id='reuse_seed') + + with gr.Box(elem_id='subseed_show_box'): + seed_checkbox = gr.Checkbox(label='Extra', elem_id='subseed_show', value=False) + + # Components to show/hide based on the 'Extra' checkbox + seed_extras = [] + + with gr.Row(visible=False) as seed_extra_row_1: + seed_extras.append(seed_extra_row_1) + with gr.Box(): + with gr.Row(elem_id='subseed_row'): + subseed = gr.Number(label='Variation seed', value=-1) + subseed.style(container=False) + random_subseed = gr.Button(random_symbol, elem_id='random_subseed') + reuse_subseed = gr.Button(reuse_symbol, elem_id='reuse_subseed') + subseed_strength = gr.Slider(label='Variation strength', value=0.0, minimum=0, maximum=1, step=0.01) + + with gr.Row(visible=False) as seed_extra_row_2: + seed_extras.append(seed_extra_row_2) + seed_resize_from_w = gr.Slider(minimum=0, maximum=2048, step=64, label="Resize seed from width", value=0) + seed_resize_from_h = gr.Slider(minimum=0, maximum=2048, step=64, label="Resize seed from height", value=0) + + random_seed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[seed]) + random_subseed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[subseed]) + + def change_visibility(show): + return {comp: gr_show(show) for comp in seed_extras} + + seed_checkbox.change(change_visibility, show_progress=False, inputs=[seed_checkbox], outputs=seed_extras) + + return seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox + + +def connect_reuse_seed(seed: gr.Number, reuse_seed: gr.Button, generation_info: gr.Textbox, dummy_component, is_subseed): + """ Connects a 'reuse (sub)seed' button's click event so that it copies last used + (sub)seed value from generation info the to the seed field. If copying subseed and subseed strength + was 0, i.e. no variation seed was used, it copies the normal seed value instead.""" + def copy_seed(gen_info_string: str, index): + res = -1 + + try: + gen_info = json.loads(gen_info_string) + index -= gen_info.get('index_of_first_image', 0) + + if is_subseed and gen_info.get('subseed_strength', 0) > 0: + all_subseeds = gen_info.get('all_subseeds', [-1]) + res = all_subseeds[index if 0 <= index < len(all_subseeds) else 0] + else: + all_seeds = gen_info.get('all_seeds', [-1]) + res = all_seeds[index if 0 <= index < len(all_seeds) else 0] + + except json.decoder.JSONDecodeError as e: + if gen_info_string != '': + print("Error parsing JSON generation info:", file=sys.stderr) + print(gen_info_string, file=sys.stderr) + + return [res, gr_show(False)] + + reuse_seed.click( + fn=copy_seed, + _js="(x, y) => [x, selected_gallery_index()]", + show_progress=False, + inputs=[generation_info, dummy_component], + outputs=[seed, dummy_component] + ) + + +def update_token_counter(text, steps): + try: + _, prompt_flat_list, _ = prompt_parser.get_multicond_prompt_list([text]) + prompt_schedules = prompt_parser.get_learned_conditioning_prompt_schedules(prompt_flat_list, steps) + + except Exception: + # a parsing error can happen here during typing, and we don't want to bother the user with + # messages related to it in console + prompt_schedules = [[[steps, text]]] + + flat_prompts = reduce(lambda list1, list2: list1+list2, prompt_schedules) + prompts = [prompt_text for step, prompt_text in flat_prompts] + tokens, token_count, max_length = max([model_hijack.tokenize(prompt) for prompt in prompts], key=lambda args: args[1]) + style_class = ' class="red"' if (token_count > max_length) else "" + return f"{token_count}/{max_length}" + + +def create_toprow(is_img2img): + id_part = "img2img" if is_img2img else "txt2img" + + with gr.Row(elem_id="toprow"): + with gr.Column(scale=4): + with gr.Row(): + with gr.Column(scale=80): + with gr.Row(): + prompt = gr.Textbox(label="Prompt", elem_id=f"{id_part}_prompt", show_label=False, placeholder="Prompt", lines=2) + + with gr.Column(scale=1, elem_id="roll_col"): + roll = gr.Button(value=art_symbol, elem_id="roll", visible=len(shared.artist_db.artists) > 0) + paste = gr.Button(value=paste_symbol, elem_id="paste") + token_counter = gr.HTML(value="", elem_id=f"{id_part}_token_counter") + token_button = gr.Button(visible=False, elem_id=f"{id_part}_token_button") + + with gr.Column(scale=10, elem_id="style_pos_col"): + prompt_style = gr.Dropdown(label="Style 1", elem_id=f"{id_part}_style_index", choices=[k for k, v in shared.prompt_styles.styles.items()], value=next(iter(shared.prompt_styles.styles.keys())), visible=len(shared.prompt_styles.styles) > 1) + + with gr.Row(): + with gr.Column(scale=8): + with gr.Row(): + negative_prompt = gr.Textbox(label="Negative prompt", elem_id="negative_prompt", show_label=False, placeholder="Negative prompt", lines=2) + with gr.Column(scale=1, elem_id="roll_col"): + sh = gr.Button(elem_id="sh", visible=True) + + with gr.Column(scale=1, elem_id="style_neg_col"): + prompt_style2 = gr.Dropdown(label="Style 2", elem_id=f"{id_part}_style2_index", choices=[k for k, v in shared.prompt_styles.styles.items()], value=next(iter(shared.prompt_styles.styles.keys())), visible=len(shared.prompt_styles.styles) > 1) + + with gr.Column(scale=1): + with gr.Row(): + skip = gr.Button('Skip', elem_id=f"{id_part}_skip") + interrupt = gr.Button('Interrupt', elem_id=f"{id_part}_interrupt") + submit = gr.Button('Generate', elem_id=f"{id_part}_generate", variant='primary') + + skip.click( + fn=lambda: shared.state.skip(), + inputs=[], + outputs=[], + ) + + interrupt.click( + fn=lambda: shared.state.interrupt(), + inputs=[], + outputs=[], + ) + + with gr.Row(scale=1): + if is_img2img: + interrogate = gr.Button('Interrogate\nCLIP', elem_id="interrogate") + if cmd_opts.deepdanbooru: + deepbooru = gr.Button('Interrogate\nDeepBooru', elem_id="deepbooru") + else: + deepbooru = None + else: + interrogate = None + deepbooru = None + prompt_style_apply = gr.Button('Apply style', elem_id="style_apply") + save_style = gr.Button('Create style', elem_id="style_create") + + return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, interrogate, deepbooru, prompt_style_apply, save_style, paste, token_counter, token_button + + +def setup_progressbar(progressbar, preview, id_part, textinfo=None): + if textinfo is None: + textinfo = gr.HTML(visible=False) + + check_progress = gr.Button('Check progress', elem_id=f"{id_part}_check_progress", visible=False) + check_progress.click( + fn=lambda: check_progress_call(id_part), + show_progress=False, + inputs=[], + outputs=[progressbar, preview, preview, textinfo], + ) + + check_progress_initial = gr.Button('Check progress (first)', elem_id=f"{id_part}_check_progress_initial", visible=False) + check_progress_initial.click( + fn=lambda: check_progress_call_initial(id_part), + show_progress=False, + inputs=[], + outputs=[progressbar, preview, preview, textinfo], + ) + + +def create_ui(wrap_gradio_gpu_call): + import modules.img2img + import modules.txt2img + + with gr.Blocks(analytics_enabled=False) as txt2img_interface: + txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _, txt2img_prompt_style_apply, txt2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=False) + dummy_component = gr.Label(visible=False) + + with gr.Row(elem_id='txt2img_progress_row'): + with gr.Column(scale=1): + pass + + with gr.Column(scale=1): + progressbar = gr.HTML(elem_id="txt2img_progressbar") + txt2img_preview = gr.Image(elem_id='txt2img_preview', visible=False) + setup_progressbar(progressbar, txt2img_preview, 'txt2img') + + with gr.Row().style(equal_height=False): + with gr.Column(variant='panel'): + steps = gr.Slider(minimum=1, maximum=150, step=1, label="Sampling Steps", value=20) + sampler_index = gr.Radio(label='Sampling method', elem_id="txt2img_sampling", choices=[x.name for x in samplers], value=samplers[0].name, type="index") + + with gr.Group(): + width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) + height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) + + with gr.Row(): + restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1) + tiling = gr.Checkbox(label='Tiling', value=False) + enable_hr = gr.Checkbox(label='Highres. fix', value=False) + + with gr.Row(visible=False) as hr_options: + scale_latent = gr.Checkbox(label='Scale latent', value=False) + denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.7) + + with gr.Row(): + batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1) + batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1) + + cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0) + + seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs() + + with gr.Group(): + custom_inputs = modules.scripts.scripts_txt2img.setup_ui(is_img2img=False) + + with gr.Column(variant='panel'): + + with gr.Group(): + txt2img_preview = gr.Image(elem_id='txt2img_preview', visible=False) + txt2img_gallery = gr.Gallery(label='Output', show_label=False, elem_id='txt2img_gallery').style(grid=4) + + with gr.Group(): + with gr.Row(): + save = gr.Button('Save') + send_to_img2img = gr.Button('Send to img2img') + send_to_inpaint = gr.Button('Send to inpaint') + send_to_extras = gr.Button('Send to extras') + button_id = "hidden_element" if shared.cmd_opts.hide_ui_dir_config else 'open_folder' + open_txt2img_folder = gr.Button(folder_symbol, elem_id=button_id) + + with gr.Row(): + do_make_zip = gr.Checkbox(label="Make Zip when Save?", value=False) + + with gr.Row(): + download_files = gr.File(None, file_count="multiple", interactive=False, show_label=False, visible=False) + + with gr.Group(): + html_info = gr.HTML() + generation_info = gr.Textbox(visible=False) + + connect_reuse_seed(seed, reuse_seed, generation_info, dummy_component, is_subseed=False) + connect_reuse_seed(subseed, reuse_subseed, generation_info, dummy_component, is_subseed=True) + + txt2img_args = dict( + fn=wrap_gradio_gpu_call(modules.txt2img.txt2img), + _js="submit", + inputs=[ + txt2img_prompt, + txt2img_negative_prompt, + txt2img_prompt_style, + txt2img_prompt_style2, + steps, + sampler_index, + restore_faces, + tiling, + batch_count, + batch_size, + cfg_scale, + seed, + subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox, + height, + width, + enable_hr, + scale_latent, + denoising_strength, + ] + custom_inputs, + outputs=[ + txt2img_gallery, + generation_info, + html_info + ], + show_progress=False, + ) + + txt2img_prompt.submit(**txt2img_args) + submit.click(**txt2img_args) + + enable_hr.change( + fn=lambda x: gr_show(x), + inputs=[enable_hr], + outputs=[hr_options], + ) + + save.click( + fn=wrap_gradio_call(save_files), + _js="(x, y, z, w) => [x, y, z, selected_gallery_index()]", + inputs=[ + generation_info, + txt2img_gallery, + do_make_zip, + html_info, + ], + outputs=[ + download_files, + html_info, + html_info, + html_info, + ] + ) + + roll.click( + fn=roll_artist, + _js="update_txt2img_tokens", + inputs=[ + txt2img_prompt, + ], + outputs=[ + txt2img_prompt, + ] + ) + + txt2img_paste_fields = [ + (txt2img_prompt, "Prompt"), + (txt2img_negative_prompt, "Negative prompt"), + (steps, "Steps"), + (sampler_index, "Sampler"), + (restore_faces, "Face restoration"), + (cfg_scale, "CFG scale"), + (seed, "Seed"), + (width, "Size-1"), + (height, "Size-2"), + (batch_size, "Batch size"), + (subseed, "Variation seed"), + (subseed_strength, "Variation seed strength"), + (seed_resize_from_w, "Seed resize from-1"), + (seed_resize_from_h, "Seed resize from-2"), + (denoising_strength, "Denoising strength"), + (enable_hr, lambda d: "Denoising strength" in d), + (hr_options, lambda d: gr.Row.update(visible="Denoising strength" in d)), + ] + modules.generation_parameters_copypaste.connect_paste(paste, txt2img_paste_fields, txt2img_prompt) + token_button.click(fn=update_token_counter, inputs=[txt2img_prompt, steps], outputs=[token_counter]) + + with gr.Blocks(analytics_enabled=False) as img2img_interface: + img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_deepbooru, img2img_prompt_style_apply, img2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=True) + + with gr.Row(elem_id='img2img_progress_row'): + with gr.Column(scale=1): + pass + + with gr.Column(scale=1): + progressbar = gr.HTML(elem_id="img2img_progressbar") + img2img_preview = gr.Image(elem_id='img2img_preview', visible=False) + setup_progressbar(progressbar, img2img_preview, 'img2img') + + with gr.Row().style(equal_height=False): + with gr.Column(variant='panel'): + + with gr.Tabs(elem_id="mode_img2img") as tabs_img2img_mode: + with gr.TabItem('img2img', id='img2img'): + init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool=cmd_opts.gradio_img2img_tool) + + with gr.TabItem('Inpaint', id='inpaint'): + init_img_with_mask = gr.Image(label="Image for inpainting with mask", show_label=False, elem_id="img2maskimg", source="upload", interactive=True, type="pil", tool="sketch", image_mode="RGBA") + + init_img_inpaint = gr.Image(label="Image for img2img", show_label=False, source="upload", interactive=True, type="pil", visible=False, elem_id="img_inpaint_base") + init_mask_inpaint = gr.Image(label="Mask", source="upload", interactive=True, type="pil", visible=False, elem_id="img_inpaint_mask") + + mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4) + + with gr.Row(): + mask_mode = gr.Radio(label="Mask mode", show_label=False, choices=["Draw mask", "Upload mask"], type="index", value="Draw mask", elem_id="mask_mode") + inpainting_mask_invert = gr.Radio(label='Masking mode', show_label=False, choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index") + + inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='original', type="index") + + with gr.Row(): + inpaint_full_res = gr.Checkbox(label='Inpaint at full resolution', value=False) + inpaint_full_res_padding = gr.Slider(label='Inpaint at full resolution padding, pixels', minimum=0, maximum=256, step=4, value=32) + + with gr.TabItem('Batch img2img', id='batch'): + hidden = '
Disabled when launched with --hide-ui-dir-config.' if shared.cmd_opts.hide_ui_dir_config else '' + gr.HTML(f"

Process images in a directory on the same machine where the server is running.
Use an empty output directory to save pictures normally instead of writing to the output directory.{hidden}

") + img2img_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs) + img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs) + + with gr.Row(): + resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", show_label=False, choices=["Just resize", "Crop and resize", "Resize and fill"], type="index", value="Just resize") + + steps = gr.Slider(minimum=1, maximum=150, step=1, label="Sampling Steps", value=20) + sampler_index = gr.Radio(label='Sampling method', choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index") + + with gr.Group(): + width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) + height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) + + with gr.Row(): + restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1) + tiling = gr.Checkbox(label='Tiling', value=False) + + with gr.Row(): + batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1) + batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1) + + with gr.Group(): + cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0) + denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75) + + seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs() + + with gr.Group(): + custom_inputs = modules.scripts.scripts_img2img.setup_ui(is_img2img=True) + + with gr.Column(variant='panel'): + + with gr.Group(): + img2img_preview = gr.Image(elem_id='img2img_preview', visible=False) + img2img_gallery = gr.Gallery(label='Output', show_label=False, elem_id='img2img_gallery').style(grid=4) + + with gr.Group(): + with gr.Row(): + save = gr.Button('Save') + img2img_send_to_img2img = gr.Button('Send to img2img') + img2img_send_to_inpaint = gr.Button('Send to inpaint') + img2img_send_to_extras = gr.Button('Send to extras') + button_id = "hidden_element" if shared.cmd_opts.hide_ui_dir_config else 'open_folder' + open_img2img_folder = gr.Button(folder_symbol, elem_id=button_id) + + with gr.Row(): + do_make_zip = gr.Checkbox(label="Make Zip when Save?", value=False) + + with gr.Row(): + download_files = gr.File(None, file_count="multiple", interactive=False, show_label=False, visible=False) + + with gr.Group(): + html_info = gr.HTML() + generation_info = gr.Textbox(visible=False) + + connect_reuse_seed(seed, reuse_seed, generation_info, dummy_component, is_subseed=False) + connect_reuse_seed(subseed, reuse_subseed, generation_info, dummy_component, is_subseed=True) + + mask_mode.change( + lambda mode, img: { + init_img_with_mask: gr_show(mode == 0), + init_img_inpaint: gr_show(mode == 1), + init_mask_inpaint: gr_show(mode == 1), + }, + inputs=[mask_mode, init_img_with_mask], + outputs=[ + init_img_with_mask, + init_img_inpaint, + init_mask_inpaint, + ], + ) + + img2img_args = dict( + fn=wrap_gradio_gpu_call(modules.img2img.img2img), + _js="submit_img2img", + inputs=[ + dummy_component, + img2img_prompt, + img2img_negative_prompt, + img2img_prompt_style, + img2img_prompt_style2, + init_img, + init_img_with_mask, + init_img_inpaint, + init_mask_inpaint, + mask_mode, + steps, + sampler_index, + mask_blur, + inpainting_fill, + restore_faces, + tiling, + batch_count, + batch_size, + cfg_scale, + denoising_strength, + seed, + subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox, + height, + width, + resize_mode, + inpaint_full_res, + inpaint_full_res_padding, + inpainting_mask_invert, + img2img_batch_input_dir, + img2img_batch_output_dir, + ] + custom_inputs, + outputs=[ + img2img_gallery, + generation_info, + html_info + ], + show_progress=False, + ) + + img2img_prompt.submit(**img2img_args) + submit.click(**img2img_args) + + img2img_interrogate.click( + fn=interrogate, + inputs=[init_img], + outputs=[img2img_prompt], + ) + + if cmd_opts.deepdanbooru: + img2img_deepbooru.click( + fn=interrogate_deepbooru, + inputs=[init_img], + outputs=[img2img_prompt], + ) + + save.click( + fn=wrap_gradio_call(save_files), + _js="(x, y, z, w) => [x, y, z, selected_gallery_index()]", + inputs=[ + generation_info, + img2img_gallery, + do_make_zip, + html_info, + ], + outputs=[ + download_files, + html_info, + html_info, + html_info, + ] + ) + + roll.click( + fn=roll_artist, + _js="update_img2img_tokens", + inputs=[ + img2img_prompt, + ], + outputs=[ + img2img_prompt, + ] + ) + + prompts = [(txt2img_prompt, txt2img_negative_prompt), (img2img_prompt, img2img_negative_prompt)] + style_dropdowns = [(txt2img_prompt_style, txt2img_prompt_style2), (img2img_prompt_style, img2img_prompt_style2)] + style_js_funcs = ["update_txt2img_tokens", "update_img2img_tokens"] + + for button, (prompt, negative_prompt) in zip([txt2img_save_style, img2img_save_style], prompts): + button.click( + fn=add_style, + _js="ask_for_style_name", + # Have to pass empty dummy component here, because the JavaScript and Python function have to accept + # the same number of parameters, but we only know the style-name after the JavaScript prompt + inputs=[dummy_component, prompt, negative_prompt], + outputs=[txt2img_prompt_style, img2img_prompt_style, txt2img_prompt_style2, img2img_prompt_style2], + ) + + for button, (prompt, negative_prompt), (style1, style2), js_func in zip([txt2img_prompt_style_apply, img2img_prompt_style_apply], prompts, style_dropdowns, style_js_funcs): + button.click( + fn=apply_styles, + _js=js_func, + inputs=[prompt, negative_prompt, style1, style2], + outputs=[prompt, negative_prompt, style1, style2], + ) + + img2img_paste_fields = [ + (img2img_prompt, "Prompt"), + (img2img_negative_prompt, "Negative prompt"), + (steps, "Steps"), + (sampler_index, "Sampler"), + (restore_faces, "Face restoration"), + (cfg_scale, "CFG scale"), + (seed, "Seed"), + (width, "Size-1"), + (height, "Size-2"), + (batch_size, "Batch size"), + (subseed, "Variation seed"), + (subseed_strength, "Variation seed strength"), + (seed_resize_from_w, "Seed resize from-1"), + (seed_resize_from_h, "Seed resize from-2"), + (denoising_strength, "Denoising strength"), + ] + modules.generation_parameters_copypaste.connect_paste(paste, img2img_paste_fields, img2img_prompt) + token_button.click(fn=update_token_counter, inputs=[img2img_prompt, steps], outputs=[token_counter]) + + with gr.Blocks(analytics_enabled=False) as extras_interface: + with gr.Row().style(equal_height=False): + with gr.Column(variant='panel'): + with gr.Tabs(elem_id="mode_extras"): + with gr.TabItem('Single Image'): + extras_image = gr.Image(label="Source", source="upload", interactive=True, type="pil") + + with gr.TabItem('Batch Process'): + image_batch = gr.File(label="Batch Process", file_count="multiple", interactive=True, type="file") + + with gr.Tabs(elem_id="extras_resize_mode"): + with gr.TabItem('Scale by'): + upscaling_resize = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Resize", value=2) + with gr.TabItem('Scale to'): + with gr.Group(): + with gr.Row(): + upscaling_resize_w = gr.Number(label="Width", value=512, precision=0) + upscaling_resize_h = gr.Number(label="Height", value=512, precision=0) + upscaling_crop = gr.Checkbox(label='Crop to fit', value=True) + + with gr.Group(): + extras_upscaler_1 = gr.Radio(label='Upscaler 1', choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index") + + with gr.Group(): + extras_upscaler_2 = gr.Radio(label='Upscaler 2', choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index") + extras_upscaler_2_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Upscaler 2 visibility", value=1) + + with gr.Group(): + gfpgan_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="GFPGAN visibility", value=0, interactive=modules.gfpgan_model.have_gfpgan) + + with gr.Group(): + codeformer_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer visibility", value=0, interactive=modules.codeformer_model.have_codeformer) + codeformer_weight = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer weight (0 = maximum effect, 1 = minimum effect)", value=0, interactive=modules.codeformer_model.have_codeformer) + + submit = gr.Button('Generate', elem_id="extras_generate", variant='primary') + + with gr.Column(variant='panel'): + result_images = gr.Gallery(label="Result", show_label=False) + html_info_x = gr.HTML() + html_info = gr.HTML() + extras_send_to_img2img = gr.Button('Send to img2img') + extras_send_to_inpaint = gr.Button('Send to inpaint') + button_id = "hidden_element" if shared.cmd_opts.hide_ui_dir_config else '' + open_extras_folder = gr.Button('Open output directory', elem_id=button_id) + + submit.click( + fn=wrap_gradio_gpu_call(modules.extras.run_extras), + _js="get_extras_tab_index", + inputs=[ + dummy_component, + dummy_component, + extras_image, + image_batch, + gfpgan_visibility, + codeformer_visibility, + codeformer_weight, + upscaling_resize, + upscaling_resize_w, + upscaling_resize_h, + upscaling_crop, + extras_upscaler_1, + extras_upscaler_2, + extras_upscaler_2_visibility, + ], + outputs=[ + result_images, + html_info_x, + html_info, + ] + ) + + extras_send_to_img2img.click( + fn=lambda x: image_from_url_text(x), + _js="extract_image_from_gallery_img2img", + inputs=[result_images], + outputs=[init_img], + ) + + extras_send_to_inpaint.click( + fn=lambda x: image_from_url_text(x), + _js="extract_image_from_gallery_inpaint", + inputs=[result_images], + outputs=[init_img_with_mask], + ) + + with gr.Blocks(analytics_enabled=False) as pnginfo_interface: + with gr.Row().style(equal_height=False): + with gr.Column(variant='panel'): + image = gr.Image(elem_id="pnginfo_image", label="Source", source="upload", interactive=True, type="pil") + + with gr.Column(variant='panel'): + html = gr.HTML() + generation_info = gr.Textbox(visible=False) + html2 = gr.HTML() + + with gr.Row(): + pnginfo_send_to_txt2img = gr.Button('Send to txt2img') + pnginfo_send_to_img2img = gr.Button('Send to img2img') + + image.change( + fn=wrap_gradio_call(modules.extras.run_pnginfo), + inputs=[image], + outputs=[html, generation_info, html2], + ) + + with gr.Blocks() as modelmerger_interface: + with gr.Row().style(equal_height=False): + with gr.Column(variant='panel'): + gr.HTML(value="

A merger of the two checkpoints will be generated in your checkpoint directory.

") + + with gr.Row(): + primary_model_name = gr.Dropdown(modules.sd_models.checkpoint_tiles(), elem_id="modelmerger_primary_model_name", label="Primary Model Name") + secondary_model_name = gr.Dropdown(modules.sd_models.checkpoint_tiles(), elem_id="modelmerger_secondary_model_name", label="Secondary Model Name") + custom_name = gr.Textbox(label="Custom Name (Optional)") + interp_amount = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Interpolation Amount', value=0.3) + interp_method = gr.Radio(choices=["Weighted Sum", "Sigmoid", "Inverse Sigmoid"], value="Weighted Sum", label="Interpolation Method") + save_as_half = gr.Checkbox(value=False, label="Save as float16") + modelmerger_merge = gr.Button(elem_id="modelmerger_merge", label="Merge", variant='primary') + + with gr.Column(variant='panel'): + submit_result = gr.Textbox(elem_id="modelmerger_result", show_label=False) + + sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings() + + with gr.Blocks() as train_interface: + with gr.Row().style(equal_height=False): + gr.HTML(value="

See wiki for detailed explanation.

") + + with gr.Row().style(equal_height=False): + with gr.Tabs(elem_id="train_tabs"): + + with gr.Tab(label="Create embedding"): + new_embedding_name = gr.Textbox(label="Name") + initialization_text = gr.Textbox(label="Initialization text", value="*") + nvpt = gr.Slider(label="Number of vectors per token", minimum=1, maximum=75, step=1, value=1) + + with gr.Row(): + with gr.Column(scale=3): + gr.HTML(value="") + + with gr.Column(): + create_embedding = gr.Button(value="Create embedding", variant='primary') + + with gr.Tab(label="Create hypernetwork"): + new_hypernetwork_name = gr.Textbox(label="Name") + new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"]) + + with gr.Row(): + with gr.Column(scale=3): + gr.HTML(value="") + + with gr.Column(): + create_hypernetwork = gr.Button(value="Create hypernetwork", variant='primary') + + with gr.Tab(label="Preprocess images"): + process_src = gr.Textbox(label='Source directory') + process_dst = gr.Textbox(label='Destination directory') + process_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) + process_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) + + with gr.Row(): + process_flip = gr.Checkbox(label='Create flipped copies') + process_split = gr.Checkbox(label='Split oversized images into two') + process_caption = gr.Checkbox(label='Use BLIP for caption') + process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True if cmd_opts.deepdanbooru else False) + + with gr.Row(): + with gr.Column(scale=3): + gr.HTML(value="") + + with gr.Column(): + run_preprocess = gr.Button(value="Preprocess", variant='primary') + + with gr.Tab(label="Train"): + gr.HTML(value="

Train an embedding; must specify a directory with a set of 1:1 ratio images

") + train_embedding_name = gr.Dropdown(label='Embedding', choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())) + train_hypernetwork_name = gr.Dropdown(label='Hypernetwork', choices=[x for x in shared.hypernetworks.keys()]) + learn_rate = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.005") + dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images") + log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion") + template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt")) + training_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) + training_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) + steps = gr.Number(label='Max steps', value=100000, precision=0) + create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0) + save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0) + save_image_with_stored_embedding = gr.Checkbox(label='Save images with embedding in PNG chunks', value=True) + preview_image_prompt = gr.Textbox(label='Preview prompt', value="") + + with gr.Row(): + interrupt_training = gr.Button(value="Interrupt") + train_hypernetwork = gr.Button(value="Train Hypernetwork", variant='primary') + train_embedding = gr.Button(value="Train Embedding", variant='primary') + + with gr.Column(): + progressbar = gr.HTML(elem_id="ti_progressbar") + ti_output = gr.Text(elem_id="ti_output", value="", show_label=False) + + ti_gallery = gr.Gallery(label='Output', show_label=False, elem_id='ti_gallery').style(grid=4) + ti_preview = gr.Image(elem_id='ti_preview', visible=False) + ti_progress = gr.HTML(elem_id="ti_progress", value="") + ti_outcome = gr.HTML(elem_id="ti_error", value="") + setup_progressbar(progressbar, ti_preview, 'ti', textinfo=ti_progress) + + create_embedding.click( + fn=modules.textual_inversion.ui.create_embedding, + inputs=[ + new_embedding_name, + initialization_text, + nvpt, + ], + outputs=[ + train_embedding_name, + ti_output, + ti_outcome, + ] + ) + + create_hypernetwork.click( + fn=modules.hypernetworks.ui.create_hypernetwork, + inputs=[ + new_hypernetwork_name, + new_hypernetwork_sizes, + ], + outputs=[ + train_hypernetwork_name, + ti_output, + ti_outcome, + ] + ) + + run_preprocess.click( + fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), + _js="start_training_textual_inversion", + inputs=[ + process_src, + process_dst, + process_width, + process_height, + process_flip, + process_split, + process_caption, + process_caption_deepbooru + ], + outputs=[ + ti_output, + ti_outcome, + ], + ) + + train_embedding.click( + fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.train_embedding, extra_outputs=[gr.update()]), + _js="start_training_textual_inversion", + inputs=[ + train_embedding_name, + learn_rate, + dataset_directory, + log_directory, + training_width, + training_height, + steps, + create_image_every, + save_embedding_every, + template_file, + save_image_with_stored_embedding, + preview_image_prompt, + ], + outputs=[ + ti_output, + ti_outcome, + ] + ) + + train_hypernetwork.click( + fn=wrap_gradio_gpu_call(modules.hypernetworks.ui.train_hypernetwork, extra_outputs=[gr.update()]), + _js="start_training_textual_inversion", + inputs=[ + train_hypernetwork_name, + learn_rate, + dataset_directory, + log_directory, + steps, + create_image_every, + save_embedding_every, + template_file, + preview_image_prompt, + ], + outputs=[ + ti_output, + ti_outcome, + ] + ) + + interrupt_training.click( + fn=lambda: shared.state.interrupt(), + inputs=[], + outputs=[], + ) + + def create_setting_component(key, is_quicksettings=False): + def fun(): + return opts.data[key] if key in opts.data else opts.data_labels[key].default + + info = opts.data_labels[key] + t = type(info.default) + + args = info.component_args() if callable(info.component_args) else info.component_args + + if info.component is not None: + comp = info.component + elif t == str: + comp = gr.Textbox + elif t == int: + comp = gr.Number + elif t == bool: + comp = gr.Checkbox + else: + raise Exception(f'bad options item type: {str(t)} for key {key}') + + if info.refresh is not None: + if is_quicksettings: + res = comp(label=info.label, value=fun, **(args or {})) + refresh_button = gr.Button(value=refresh_symbol, elem_id="refresh_"+key) + else: + with gr.Row(variant="compact"): + res = comp(label=info.label, value=fun, **(args or {})) + refresh_button = gr.Button(value=refresh_symbol, elem_id="refresh_" + key) + + def refresh(): + info.refresh() + refreshed_args = info.component_args() if callable(info.component_args) else info.component_args + + for k, v in refreshed_args.items(): + setattr(res, k, v) + + return gr.update(**(refreshed_args or {})) + + refresh_button.click( + fn=refresh, + inputs=[], + outputs=[res], + ) + else: + res = comp(label=info.label, value=fun, **(args or {})) + + + return res + + components = [] + component_dict = {} + + def open_folder(f): + if not os.path.isdir(f): + print(f""" +WARNING +An open_folder request was made with an argument that is not a folder. +This could be an error or a malicious attempt to run code on your computer. +Requested path was: {f} +""", file=sys.stderr) + return + + if not shared.cmd_opts.hide_ui_dir_config: + path = os.path.normpath(f) + if platform.system() == "Windows": + os.startfile(path) + elif platform.system() == "Darwin": + sp.Popen(["open", path]) + else: + sp.Popen(["xdg-open", path]) + + def run_settings(*args): + changed = 0 + + for key, value, comp in zip(opts.data_labels.keys(), args, components): + if comp != dummy_component and not opts.same_type(value, opts.data_labels[key].default): + return f"Bad value for setting {key}: {value}; expecting {type(opts.data_labels[key].default).__name__}", opts.dumpjson() + + for key, value, comp in zip(opts.data_labels.keys(), args, components): + if comp == dummy_component: + continue + + comp_args = opts.data_labels[key].component_args + if comp_args and isinstance(comp_args, dict) and comp_args.get('visible') is False: + continue + + oldval = opts.data.get(key, None) + opts.data[key] = value + + if oldval != value: + if opts.data_labels[key].onchange is not None: + opts.data_labels[key].onchange() + + changed += 1 + + opts.save(shared.config_filename) + + return f'{changed} settings changed.', opts.dumpjson() + + def run_settings_single(value, key): + if not opts.same_type(value, opts.data_labels[key].default): + return gr.update(visible=True), opts.dumpjson() + + oldval = opts.data.get(key, None) + opts.data[key] = value + + if oldval != value: + if opts.data_labels[key].onchange is not None: + opts.data_labels[key].onchange() + + opts.save(shared.config_filename) + + return gr.update(value=value), opts.dumpjson() + + with gr.Blocks(analytics_enabled=False) as settings_interface: + settings_submit = gr.Button(value="Apply settings", variant='primary') + result = gr.HTML() + + settings_cols = 3 + items_per_col = int(len(opts.data_labels) * 0.9 / settings_cols) + + quicksettings_names = [x.strip() for x in opts.quicksettings.split(",")] + quicksettings_names = set(x for x in quicksettings_names if x != 'quicksettings') + + quicksettings_list = [] + + cols_displayed = 0 + items_displayed = 0 + previous_section = None + column = None + with gr.Row(elem_id="settings").style(equal_height=False): + for i, (k, item) in enumerate(opts.data_labels.items()): + + if previous_section != item.section: + if cols_displayed < settings_cols and (items_displayed >= items_per_col or previous_section is None): + if column is not None: + column.__exit__() + + column = gr.Column(variant='panel') + column.__enter__() + + items_displayed = 0 + cols_displayed += 1 + + previous_section = item.section + + gr.HTML(elem_id="settings_header_text_{}".format(item.section[0]), value='

{}

'.format(item.section[1])) + + if k in quicksettings_names: + quicksettings_list.append((i, k, item)) + components.append(dummy_component) + else: + component = create_setting_component(k) + component_dict[k] = component + components.append(component) + items_displayed += 1 + + with gr.Row(): + request_notifications = gr.Button(value='Request browser notifications', elem_id="request_notifications") + reload_script_bodies = gr.Button(value='Reload custom script bodies (No ui updates, No restart)', variant='secondary') + restart_gradio = gr.Button(value='Restart Gradio and Refresh components (Custom Scripts, ui.py, js and css only)', variant='primary') + + request_notifications.click( + fn=lambda: None, + inputs=[], + outputs=[], + _js='function(){}' + ) + + def reload_scripts(): + modules.scripts.reload_script_body_only() + + reload_script_bodies.click( + fn=reload_scripts, + inputs=[], + outputs=[], + _js='function(){}' + ) + + def request_restart(): + shared.state.interrupt() + settings_interface.gradio_ref.do_restart = True + + restart_gradio.click( + fn=request_restart, + inputs=[], + outputs=[], + _js='function(){restart_reload()}' + ) + + if column is not None: + column.__exit__() + + interfaces = [ + (txt2img_interface, "txt2img", "txt2img"), + (img2img_interface, "img2img", "img2img"), + (extras_interface, "Extras", "extras"), + (pnginfo_interface, "PNG Info", "pnginfo"), + (modelmerger_interface, "Checkpoint Merger", "modelmerger"), + (train_interface, "Train", "ti"), + (settings_interface, "Settings", "settings"), + ] + + with open(os.path.join(script_path, "style.css"), "r", encoding="utf8") as file: + css = file.read() + + if os.path.exists(os.path.join(script_path, "user.css")): + with open(os.path.join(script_path, "user.css"), "r", encoding="utf8") as file: + usercss = file.read() + css += usercss + + if not cmd_opts.no_progressbar_hiding: + css += css_hide_progressbar + + with gr.Blocks(css=css, analytics_enabled=False, title="Stable Diffusion") as demo: + with gr.Row(elem_id="quicksettings"): + for i, k, item in quicksettings_list: + component = create_setting_component(k, is_quicksettings=True) + component_dict[k] = component + + settings_interface.gradio_ref = demo + + with gr.Tabs(elem_id="tabs") as tabs: + for interface, label, ifid in interfaces: + with gr.TabItem(label, id=ifid, elem_id='tab_' + ifid): + interface.render() + + if os.path.exists(os.path.join(script_path, "notification.mp3")): + audio_notification = gr.Audio(interactive=False, value=os.path.join(script_path, "notification.mp3"), elem_id="audio_notification", visible=False) + + text_settings = gr.Textbox(elem_id="settings_json", value=lambda: opts.dumpjson(), visible=False) + settings_submit.click( + fn=run_settings, + inputs=components, + outputs=[result, text_settings], + ) + + for i, k, item in quicksettings_list: + component = component_dict[k] + + component.change( + fn=lambda value, k=k: run_settings_single(value, key=k), + inputs=[component], + outputs=[component, text_settings], + ) + + def modelmerger(*args): + try: + results = modules.extras.run_modelmerger(*args) + except Exception as e: + print("Error loading/saving model file:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + modules.sd_models.list_models() # to remove the potentially missing models from the list + return ["Error loading/saving model file. It doesn't exist or the name contains illegal characters"] + [gr.Dropdown.update(choices=modules.sd_models.checkpoint_tiles()) for _ in range(3)] + return results + + modelmerger_merge.click( + fn=modelmerger, + inputs=[ + primary_model_name, + secondary_model_name, + interp_method, + interp_amount, + save_as_half, + custom_name, + ], + outputs=[ + submit_result, + primary_model_name, + secondary_model_name, + component_dict['sd_model_checkpoint'], + ] + ) + paste_field_names = ['Prompt', 'Negative prompt', 'Steps', 'Face restoration', 'Seed', 'Size-1', 'Size-2'] + txt2img_fields = [field for field,name in txt2img_paste_fields if name in paste_field_names] + img2img_fields = [field for field,name in img2img_paste_fields if name in paste_field_names] + send_to_img2img.click( + fn=lambda img, *args: (image_from_url_text(img),*args), + _js="(gallery, ...args) => [extract_image_from_gallery_img2img(gallery), ...args]", + inputs=[txt2img_gallery] + txt2img_fields, + outputs=[init_img] + img2img_fields, + ) + + send_to_inpaint.click( + fn=lambda x, *args: (image_from_url_text(x), *args), + _js="(gallery, ...args) => [extract_image_from_gallery_inpaint(gallery), ...args]", + inputs=[txt2img_gallery] + txt2img_fields, + outputs=[init_img_with_mask] + img2img_fields, + ) + + img2img_send_to_img2img.click( + fn=lambda x: image_from_url_text(x), + _js="extract_image_from_gallery_img2img", + inputs=[img2img_gallery], + outputs=[init_img], + ) + + img2img_send_to_inpaint.click( + fn=lambda x: image_from_url_text(x), + _js="extract_image_from_gallery_inpaint", + inputs=[img2img_gallery], + outputs=[init_img_with_mask], + ) + + send_to_extras.click( + fn=lambda x: image_from_url_text(x), + _js="extract_image_from_gallery_extras", + inputs=[txt2img_gallery], + outputs=[extras_image], + ) + + open_txt2img_folder.click( + fn=lambda: open_folder(opts.outdir_samples or opts.outdir_txt2img_samples), + inputs=[], + outputs=[], + ) + + open_img2img_folder.click( + fn=lambda: open_folder(opts.outdir_samples or opts.outdir_img2img_samples), + inputs=[], + outputs=[], + ) + + open_extras_folder.click( + fn=lambda: open_folder(opts.outdir_samples or opts.outdir_extras_samples), + inputs=[], + outputs=[], + ) + + img2img_send_to_extras.click( + fn=lambda x: image_from_url_text(x), + _js="extract_image_from_gallery_extras", + inputs=[img2img_gallery], + outputs=[extras_image], + ) + + modules.generation_parameters_copypaste.connect_paste(pnginfo_send_to_txt2img, txt2img_paste_fields, generation_info, 'switch_to_txt2img') + modules.generation_parameters_copypaste.connect_paste(pnginfo_send_to_img2img, img2img_paste_fields, generation_info, 'switch_to_img2img_img2img') + + ui_config_file = cmd_opts.ui_config_file + ui_settings = {} + settings_count = len(ui_settings) + error_loading = False + + try: + if os.path.exists(ui_config_file): + with open(ui_config_file, "r", encoding="utf8") as file: + ui_settings = json.load(file) + except Exception: + error_loading = True + print("Error loading settings:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + + def loadsave(path, x): + def apply_field(obj, field, condition=None): + key = path + "/" + field + + if getattr(obj,'custom_script_source',None) is not None: + key = 'customscript/' + obj.custom_script_source + '/' + key + + if getattr(obj, 'do_not_save_to_config', False): + return + + saved_value = ui_settings.get(key, None) + if saved_value is None: + ui_settings[key] = getattr(obj, field) + elif condition is None or condition(saved_value): + setattr(obj, field, saved_value) + + if type(x) in [gr.Slider, gr.Radio, gr.Checkbox, gr.Textbox, gr.Number] and x.visible: + apply_field(x, 'visible') + + if type(x) == gr.Slider: + apply_field(x, 'value') + apply_field(x, 'minimum') + apply_field(x, 'maximum') + apply_field(x, 'step') + + if type(x) == gr.Radio: + apply_field(x, 'value', lambda val: val in x.choices) + + if type(x) == gr.Checkbox: + apply_field(x, 'value') + + if type(x) == gr.Textbox: + apply_field(x, 'value') + + if type(x) == gr.Number: + apply_field(x, 'value') + + visit(txt2img_interface, loadsave, "txt2img") + visit(img2img_interface, loadsave, "img2img") + visit(extras_interface, loadsave, "extras") + + if not error_loading and (not os.path.exists(ui_config_file) or settings_count != len(ui_settings)): + with open(ui_config_file, "w", encoding="utf8") as file: + json.dump(ui_settings, file, indent=4) + + return demo + + +with open(os.path.join(script_path, "script.js"), "r", encoding="utf8") as jsfile: + javascript = f'' + +jsdir = os.path.join(script_path, "javascript") +for filename in sorted(os.listdir(jsdir)): + with open(os.path.join(jsdir, filename), "r", encoding="utf8") as jsfile: + javascript += f"\n" + + +if 'gradio_routes_templates_response' not in globals(): + def template_response(*args, **kwargs): + res = gradio_routes_templates_response(*args, **kwargs) + res.body = res.body.replace(b'', f'{javascript}'.encode("utf8")) + res.init_headers() + return res + + gradio_routes_templates_response = gradio.routes.templates.TemplateResponse + gradio.routes.templates.TemplateResponse = template_response diff --git a/modules/upscaler.py b/modules/upscaler.py new file mode 100644 index 0000000000000000000000000000000000000000..6ab2fb4082301f9c440f0db7489e0de3e3f93f50 --- /dev/null +++ b/modules/upscaler.py @@ -0,0 +1,122 @@ +import os +from abc import abstractmethod + +import PIL +import numpy as np +import torch +from PIL import Image + +import modules.shared +from modules import modelloader, shared + +LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS) +from modules.paths import models_path + + +class Upscaler: + name = None + model_path = None + model_name = None + model_url = None + enable = True + filter = None + model = None + user_path = None + scalers: [] + tile = True + + def __init__(self, create_dirs=False): + self.mod_pad_h = None + self.tile_size = modules.shared.opts.ESRGAN_tile + self.tile_pad = modules.shared.opts.ESRGAN_tile_overlap + self.device = modules.shared.device + self.img = None + self.output = None + self.scale = 1 + self.half = not modules.shared.cmd_opts.no_half + self.pre_pad = 0 + self.mod_scale = None + + if self.model_path is None and self.name: + self.model_path = os.path.join(models_path, self.name) + if self.model_path and create_dirs: + os.makedirs(self.model_path, exist_ok=True) + + try: + import cv2 + self.can_tile = True + except: + pass + + @abstractmethod + def do_upscale(self, img: PIL.Image, selected_model: str): + return img + + def upscale(self, img: PIL.Image, scale: int, selected_model: str = None): + self.scale = scale + dest_w = img.width * scale + dest_h = img.height * scale + for i in range(3): + if img.width >= dest_w and img.height >= dest_h: + break + img = self.do_upscale(img, selected_model) + if img.width != dest_w or img.height != dest_h: + img = img.resize((int(dest_w), int(dest_h)), resample=LANCZOS) + + return img + + @abstractmethod + def load_model(self, path: str): + pass + + def find_models(self, ext_filter=None) -> list: + return modelloader.load_models(model_path=self.model_path, model_url=self.model_url, command_path=self.user_path) + + def update_status(self, prompt): + print(f"\nextras: {prompt}", file=shared.progress_print_out) + + +class UpscalerData: + name = None + data_path = None + scale: int = 4 + scaler: Upscaler = None + model: None + + def __init__(self, name: str, path: str, upscaler: Upscaler = None, scale: int = 4, model=None): + self.name = name + self.data_path = path + self.scaler = upscaler + self.scale = scale + self.model = model + + +class UpscalerNone(Upscaler): + name = "None" + scalers = [] + + def load_model(self, path): + pass + + def do_upscale(self, img, selected_model=None): + return img + + def __init__(self, dirname=None): + super().__init__(False) + self.scalers = [UpscalerData("None", None, self)] + + +class UpscalerLanczos(Upscaler): + scalers = [] + + def do_upscale(self, img, selected_model=None): + return img.resize((int(img.width * self.scale), int(img.height * self.scale)), resample=LANCZOS) + + def load_model(self, _): + pass + + def __init__(self, dirname=None): + super().__init__(False) + self.name = "Lanczos" + self.scalers = [UpscalerData("Lanczos", None, self)] + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a0d985ce74f4b251ca66d308db727930244cd224 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,25 @@ +basicsr +diffusers +fairscale==0.4.4 +fonts +font-roboto +gfpgan +gradio==3.4.1 +invisible-watermark +numpy +omegaconf +piexif +Pillow +pytorch_lightning +realesrgan +scikit-image>=0.19 +timm==0.4.12 +transformers==4.19.2 +torch +einops +jsonmerge +clean-fid +resize-right +torchdiffeq +kornia +lark diff --git a/requirements_versions.txt b/requirements_versions.txt new file mode 100644 index 0000000000000000000000000000000000000000..2bbea40b48792ef2c11d593e32f55232cc711bf1 --- /dev/null +++ b/requirements_versions.txt @@ -0,0 +1,24 @@ +transformers==4.19.2 +diffusers==0.3.0 +basicsr==1.4.2 +gfpgan==1.3.8 +gradio==3.4.1 +numpy==1.23.3 +Pillow==9.2.0 +realesrgan==0.3.0 +torch +omegaconf==2.2.3 +pytorch_lightning==1.7.6 +scikit-image==0.19.2 +fonts +font-roboto +timm==0.6.7 +fairscale==0.4.9 +piexif==1.1.3 +einops==0.4.1 +jsonmerge==1.8.0 +clean-fid==0.1.29 +resize-right==0.0.2 +torchdiffeq==0.2.3 +kornia==0.6.7 +lark==1.1.2 diff --git a/screenshot.png b/screenshot.png new file mode 100644 index 0000000000000000000000000000000000000000..86c3209fe3a3b92e5afa584e9e6dcd0b3dcf2ecf Binary files /dev/null and b/screenshot.png differ diff --git a/script.js b/script.js new file mode 100644 index 0000000000000000000000000000000000000000..9543cbe686ab7eb449e2d1d678525eb1c75ff0fa --- /dev/null +++ b/script.js @@ -0,0 +1,85 @@ +function gradioApp(){ + return document.getElementsByTagName('gradio-app')[0].shadowRoot; +} + +function get_uiCurrentTab() { + return gradioApp().querySelector('.tabs button:not(.border-transparent)') +} + +function get_uiCurrentTabContent() { + return gradioApp().querySelector('.tabitem[id^=tab_]:not([style*="display: none"])') +} + +uiUpdateCallbacks = [] +uiTabChangeCallbacks = [] +let uiCurrentTab = null + +function onUiUpdate(callback){ + uiUpdateCallbacks.push(callback) +} +function onUiTabChange(callback){ + uiTabChangeCallbacks.push(callback) +} + +function runCallback(x){ + try { + x() + } catch (e) { + (console.error || console.log).call(console, e.message, e); + } +} +function executeCallbacks(queue) { + queue.forEach(runCallback) +} + +document.addEventListener("DOMContentLoaded", function() { + var mutationObserver = new MutationObserver(function(m){ + executeCallbacks(uiUpdateCallbacks); + const newTab = get_uiCurrentTab(); + if ( newTab && ( newTab !== uiCurrentTab ) ) { + uiCurrentTab = newTab; + executeCallbacks(uiTabChangeCallbacks); + } + }); + mutationObserver.observe( gradioApp(), { childList:true, subtree:true }) +}); + +/** + * Add a ctrl+enter as a shortcut to start a generation + */ + document.addEventListener('keydown', function(e) { + var handled = false; + if (e.key !== undefined) { + if((e.key == "Enter" && (e.metaKey || e.ctrlKey))) handled = true; + } else if (e.keyCode !== undefined) { + if((e.keyCode == 13 && (e.metaKey || e.ctrlKey))) handled = true; + } + if (handled) { + button = get_uiCurrentTabContent().querySelector('button[id$=_generate]'); + if (button) { + button.click(); + } + e.preventDefault(); + } +}) + +/** + * checks that a UI element is not in another hidden element or tab content + */ +function uiElementIsVisible(el) { + let isVisible = !el.closest('.\\!hidden'); + if ( ! isVisible ) { + return false; + } + + while( isVisible = el.closest('.tabitem')?.style.display !== 'none' ) { + if ( ! isVisible ) { + return false; + } else if ( el.parentElement ) { + el = el.parentElement + } else { + break; + } + } + return isVisible; +} \ No newline at end of file diff --git a/scripts/custom_code.py b/scripts/custom_code.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b10c09a5c9cc1ece634cd0261fbe69884d36e7 --- /dev/null +++ b/scripts/custom_code.py @@ -0,0 +1,42 @@ +import modules.scripts as scripts +import gradio as gr + +from modules.processing import Processed +from modules.shared import opts, cmd_opts, state + +class Script(scripts.Script): + + def title(self): + return "Custom code" + + + def show(self, is_img2img): + return cmd_opts.allow_code + + def ui(self, is_img2img): + code = gr.Textbox(label="Python code", visible=False, lines=1) + + return [code] + + + def run(self, p, code): + assert cmd_opts.allow_code, '--allow-code option must be enabled' + + display_result_data = [[], -1, ""] + + def display(imgs, s=display_result_data[1], i=display_result_data[2]): + display_result_data[0] = imgs + display_result_data[1] = s + display_result_data[2] = i + + from types import ModuleType + compiled = compile(code, '', 'exec') + module = ModuleType("testmodule") + module.__dict__.update(globals()) + module.p = p + module.display = display + exec(compiled, module.__dict__) + + return Processed(p, *display_result_data) + + \ No newline at end of file diff --git a/scripts/img2imgalt.py b/scripts/img2imgalt.py new file mode 100644 index 0000000000000000000000000000000000000000..d438175ced35d1f7a16b1b58f054fc9441a7253e --- /dev/null +++ b/scripts/img2imgalt.py @@ -0,0 +1,211 @@ +from collections import namedtuple + +import numpy as np +from tqdm import trange + +import modules.scripts as scripts +import gradio as gr + +from modules import processing, shared, sd_samplers, prompt_parser +from modules.processing import Processed +from modules.shared import opts, cmd_opts, state + +import torch +import k_diffusion as K + +from PIL import Image +from torch import autocast +from einops import rearrange, repeat + + +def find_noise_for_image(p, cond, uncond, cfg_scale, steps): + x = p.init_latent + + s_in = x.new_ones([x.shape[0]]) + dnw = K.external.CompVisDenoiser(shared.sd_model) + sigmas = dnw.get_sigmas(steps).flip(0) + + shared.state.sampling_steps = steps + + for i in trange(1, len(sigmas)): + shared.state.sampling_step += 1 + + x_in = torch.cat([x] * 2) + sigma_in = torch.cat([sigmas[i] * s_in] * 2) + cond_in = torch.cat([uncond, cond]) + + c_out, c_in = [K.utils.append_dims(k, x_in.ndim) for k in dnw.get_scalings(sigma_in)] + t = dnw.sigma_to_t(sigma_in) + + eps = shared.sd_model.apply_model(x_in * c_in, t, cond=cond_in) + denoised_uncond, denoised_cond = (x_in + eps * c_out).chunk(2) + + denoised = denoised_uncond + (denoised_cond - denoised_uncond) * cfg_scale + + d = (x - denoised) / sigmas[i] + dt = sigmas[i] - sigmas[i - 1] + + x = x + d * dt + + sd_samplers.store_latent(x) + + # This shouldn't be necessary, but solved some VRAM issues + del x_in, sigma_in, cond_in, c_out, c_in, t, + del eps, denoised_uncond, denoised_cond, denoised, d, dt + + shared.state.nextjob() + + return x / x.std() + + +Cached = namedtuple("Cached", ["noise", "cfg_scale", "steps", "latent", "original_prompt", "original_negative_prompt", "sigma_adjustment"]) + + +# Based on changes suggested by briansemrau in https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/736 +def find_noise_for_image_sigma_adjustment(p, cond, uncond, cfg_scale, steps): + x = p.init_latent + + s_in = x.new_ones([x.shape[0]]) + dnw = K.external.CompVisDenoiser(shared.sd_model) + sigmas = dnw.get_sigmas(steps).flip(0) + + shared.state.sampling_steps = steps + + for i in trange(1, len(sigmas)): + shared.state.sampling_step += 1 + + x_in = torch.cat([x] * 2) + sigma_in = torch.cat([sigmas[i - 1] * s_in] * 2) + cond_in = torch.cat([uncond, cond]) + + c_out, c_in = [K.utils.append_dims(k, x_in.ndim) for k in dnw.get_scalings(sigma_in)] + + if i == 1: + t = dnw.sigma_to_t(torch.cat([sigmas[i] * s_in] * 2)) + else: + t = dnw.sigma_to_t(sigma_in) + + eps = shared.sd_model.apply_model(x_in * c_in, t, cond=cond_in) + denoised_uncond, denoised_cond = (x_in + eps * c_out).chunk(2) + + denoised = denoised_uncond + (denoised_cond - denoised_uncond) * cfg_scale + + if i == 1: + d = (x - denoised) / (2 * sigmas[i]) + else: + d = (x - denoised) / sigmas[i - 1] + + dt = sigmas[i] - sigmas[i - 1] + x = x + d * dt + + sd_samplers.store_latent(x) + + # This shouldn't be necessary, but solved some VRAM issues + del x_in, sigma_in, cond_in, c_out, c_in, t, + del eps, denoised_uncond, denoised_cond, denoised, d, dt + + shared.state.nextjob() + + return x / sigmas[-1] + + +class Script(scripts.Script): + def __init__(self): + self.cache = None + + def title(self): + return "img2img alternative test" + + def show(self, is_img2img): + return is_img2img + + def ui(self, is_img2img): + info = gr.Markdown(''' + * `CFG Scale` should be 2 or lower. + ''') + + override_sampler = gr.Checkbox(label="Override `Sampling method` to Euler?(this method is built for it)", value=True) + + override_prompt = gr.Checkbox(label="Override `prompt` to the same value as `original prompt`?(and `negative prompt`)", value=True) + original_prompt = gr.Textbox(label="Original prompt", lines=1) + original_negative_prompt = gr.Textbox(label="Original negative prompt", lines=1) + + override_steps = gr.Checkbox(label="Override `Sampling Steps` to the same value as `Decode steps`?", value=True) + st = gr.Slider(label="Decode steps", minimum=1, maximum=150, step=1, value=50) + + override_strength = gr.Checkbox(label="Override `Denoising strength` to 1?", value=True) + + cfg = gr.Slider(label="Decode CFG scale", minimum=0.0, maximum=15.0, step=0.1, value=1.0) + randomness = gr.Slider(label="Randomness", minimum=0.0, maximum=1.0, step=0.01, value=0.0) + sigma_adjustment = gr.Checkbox(label="Sigma adjustment for finding noise for image", value=False) + + return [ + info, + override_sampler, + override_prompt, original_prompt, original_negative_prompt, + override_steps, st, + override_strength, + cfg, randomness, sigma_adjustment, + ] + + def run(self, p, _, override_sampler, override_prompt, original_prompt, original_negative_prompt, override_steps, st, override_strength, cfg, randomness, sigma_adjustment): + # Override + if override_sampler: + p.sampler_index = [sampler.name for sampler in sd_samplers.samplers].index("Euler") + if override_prompt: + p.prompt = original_prompt + p.negative_prompt = original_negative_prompt + if override_steps: + p.steps = st + if override_strength: + p.denoising_strength = 1.0 + + + def sample_extra(conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): + lat = (p.init_latent.cpu().numpy() * 10).astype(int) + + same_params = self.cache is not None and self.cache.cfg_scale == cfg and self.cache.steps == st \ + and self.cache.original_prompt == original_prompt \ + and self.cache.original_negative_prompt == original_negative_prompt \ + and self.cache.sigma_adjustment == sigma_adjustment + same_everything = same_params and self.cache.latent.shape == lat.shape and np.abs(self.cache.latent-lat).sum() < 100 + + if same_everything: + rec_noise = self.cache.noise + else: + shared.state.job_count += 1 + cond = p.sd_model.get_learned_conditioning(p.batch_size * [original_prompt]) + uncond = p.sd_model.get_learned_conditioning(p.batch_size * [original_negative_prompt]) + if sigma_adjustment: + rec_noise = find_noise_for_image_sigma_adjustment(p, cond, uncond, cfg, st) + else: + rec_noise = find_noise_for_image(p, cond, uncond, cfg, st) + self.cache = Cached(rec_noise, cfg, st, lat, original_prompt, original_negative_prompt, sigma_adjustment) + + rand_noise = processing.create_random_tensors(p.init_latent.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength, seed_resize_from_h=p.seed_resize_from_h, seed_resize_from_w=p.seed_resize_from_w, p=p) + + combined_noise = ((1 - randomness) * rec_noise + randomness * rand_noise) / ((randomness**2 + (1-randomness)**2) ** 0.5) + + sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, p.sampler_index, p.sd_model) + + sigmas = sampler.model_wrap.get_sigmas(p.steps) + + noise_dt = combined_noise - (p.init_latent / sigmas[0]) + + p.seed = p.seed + 1 + + return sampler.sample_img2img(p, p.init_latent, noise_dt, conditioning, unconditional_conditioning) + + p.sample = sample_extra + + p.extra_generation_params["Decode prompt"] = original_prompt + p.extra_generation_params["Decode negative prompt"] = original_negative_prompt + p.extra_generation_params["Decode CFG scale"] = cfg + p.extra_generation_params["Decode steps"] = st + p.extra_generation_params["Randomness"] = randomness + p.extra_generation_params["Sigma Adjustment"] = sigma_adjustment + + processed = processing.process_images(p) + + return processed + diff --git a/scripts/loopback.py b/scripts/loopback.py new file mode 100644 index 0000000000000000000000000000000000000000..d8c68af89f39a66ebcb5f523b505daab7de0f80d --- /dev/null +++ b/scripts/loopback.py @@ -0,0 +1,87 @@ +import numpy as np +from tqdm import trange + +import modules.scripts as scripts +import gradio as gr + +from modules import processing, shared, sd_samplers, images +from modules.processing import Processed +from modules.sd_samplers import samplers +from modules.shared import opts, cmd_opts, state + +class Script(scripts.Script): + def title(self): + return "Loopback" + + def show(self, is_img2img): + return is_img2img + + def ui(self, is_img2img): + loops = gr.Slider(minimum=1, maximum=32, step=1, label='Loops', value=4) + denoising_strength_change_factor = gr.Slider(minimum=0.9, maximum=1.1, step=0.01, label='Denoising strength change factor', value=1) + + return [loops, denoising_strength_change_factor] + + def run(self, p, loops, denoising_strength_change_factor): + processing.fix_seed(p) + batch_count = p.n_iter + p.extra_generation_params = { + "Denoising strength change factor": denoising_strength_change_factor, + } + + p.batch_size = 1 + p.n_iter = 1 + + output_images, info = None, None + initial_seed = None + initial_info = None + + grids = [] + all_images = [] + original_init_image = p.init_images + state.job_count = loops * batch_count + + initial_color_corrections = [processing.setup_color_correction(p.init_images[0])] + + for n in range(batch_count): + history = [] + + # Reset to original init image at the start of each batch + p.init_images = original_init_image + + for i in range(loops): + p.n_iter = 1 + p.batch_size = 1 + p.do_not_save_grid = True + + if opts.img2img_color_correction: + p.color_corrections = initial_color_corrections + + state.job = f"Iteration {i + 1}/{loops}, batch {n + 1}/{batch_count}" + + processed = processing.process_images(p) + + if initial_seed is None: + initial_seed = processed.seed + initial_info = processed.info + + init_img = processed.images[0] + + p.init_images = [init_img] + p.seed = processed.seed + 1 + p.denoising_strength = min(max(p.denoising_strength * denoising_strength_change_factor, 0.1), 1) + history.append(processed.images[0]) + + grid = images.image_grid(history, rows=1) + if opts.grid_save: + images.save_image(grid, p.outpath_grids, "grid", initial_seed, p.prompt, opts.grid_format, info=info, short_filename=not opts.grid_extended_filename, grid=True, p=p) + + grids.append(grid) + all_images += history + + if opts.return_grid: + all_images = grids + all_images + + processed = Processed(p, all_images, initial_seed, initial_info) + + return processed diff --git a/scripts/outpainting_mk_2.py b/scripts/outpainting_mk_2.py new file mode 100644 index 0000000000000000000000000000000000000000..a6468e09ab5d09de1b4d37624d5f9f39a037f2dc --- /dev/null +++ b/scripts/outpainting_mk_2.py @@ -0,0 +1,262 @@ +import math + +import numpy as np +import skimage + +import modules.scripts as scripts +import gradio as gr +from PIL import Image, ImageDraw + +from modules import images, processing, devices +from modules.processing import Processed, process_images +from modules.shared import opts, cmd_opts, state + + +# this function is taken from https://github.com/parlance-zz/g-diffuser-bot +def get_matched_noise(_np_src_image, np_mask_rgb, noise_q=1, color_variation=0.05): + # helper fft routines that keep ortho normalization and auto-shift before and after fft + def _fft2(data): + if data.ndim > 2: # has channels + out_fft = np.zeros((data.shape[0], data.shape[1], data.shape[2]), dtype=np.complex128) + for c in range(data.shape[2]): + c_data = data[:, :, c] + out_fft[:, :, c] = np.fft.fft2(np.fft.fftshift(c_data), norm="ortho") + out_fft[:, :, c] = np.fft.ifftshift(out_fft[:, :, c]) + else: # one channel + out_fft = np.zeros((data.shape[0], data.shape[1]), dtype=np.complex128) + out_fft[:, :] = np.fft.fft2(np.fft.fftshift(data), norm="ortho") + out_fft[:, :] = np.fft.ifftshift(out_fft[:, :]) + + return out_fft + + def _ifft2(data): + if data.ndim > 2: # has channels + out_ifft = np.zeros((data.shape[0], data.shape[1], data.shape[2]), dtype=np.complex128) + for c in range(data.shape[2]): + c_data = data[:, :, c] + out_ifft[:, :, c] = np.fft.ifft2(np.fft.fftshift(c_data), norm="ortho") + out_ifft[:, :, c] = np.fft.ifftshift(out_ifft[:, :, c]) + else: # one channel + out_ifft = np.zeros((data.shape[0], data.shape[1]), dtype=np.complex128) + out_ifft[:, :] = np.fft.ifft2(np.fft.fftshift(data), norm="ortho") + out_ifft[:, :] = np.fft.ifftshift(out_ifft[:, :]) + + return out_ifft + + def _get_gaussian_window(width, height, std=3.14, mode=0): + window_scale_x = float(width / min(width, height)) + window_scale_y = float(height / min(width, height)) + + window = np.zeros((width, height)) + x = (np.arange(width) / width * 2. - 1.) * window_scale_x + for y in range(height): + fy = (y / height * 2. - 1.) * window_scale_y + if mode == 0: + window[:, y] = np.exp(-(x ** 2 + fy ** 2) * std) + else: + window[:, y] = (1 / ((x ** 2 + 1.) * (fy ** 2 + 1.))) ** (std / 3.14) # hey wait a minute that's not gaussian + + return window + + def _get_masked_window_rgb(np_mask_grey, hardness=1.): + np_mask_rgb = np.zeros((np_mask_grey.shape[0], np_mask_grey.shape[1], 3)) + if hardness != 1.: + hardened = np_mask_grey[:] ** hardness + else: + hardened = np_mask_grey[:] + for c in range(3): + np_mask_rgb[:, :, c] = hardened[:] + return np_mask_rgb + + width = _np_src_image.shape[0] + height = _np_src_image.shape[1] + num_channels = _np_src_image.shape[2] + + np_src_image = _np_src_image[:] * (1. - np_mask_rgb) + np_mask_grey = (np.sum(np_mask_rgb, axis=2) / 3.) + img_mask = np_mask_grey > 1e-6 + ref_mask = np_mask_grey < 1e-3 + + windowed_image = _np_src_image * (1. - _get_masked_window_rgb(np_mask_grey)) + windowed_image /= np.max(windowed_image) + windowed_image += np.average(_np_src_image) * np_mask_rgb # / (1.-np.average(np_mask_rgb)) # rather than leave the masked area black, we get better results from fft by filling the average unmasked color + + src_fft = _fft2(windowed_image) # get feature statistics from masked src img + src_dist = np.absolute(src_fft) + src_phase = src_fft / src_dist + + # create a generator with a static seed to make outpainting deterministic / only follow global seed + rng = np.random.default_rng(0) + + noise_window = _get_gaussian_window(width, height, mode=1) # start with simple gaussian noise + noise_rgb = rng.random((width, height, num_channels)) + noise_grey = (np.sum(noise_rgb, axis=2) / 3.) + noise_rgb *= color_variation # the colorfulness of the starting noise is blended to greyscale with a parameter + for c in range(num_channels): + noise_rgb[:, :, c] += (1. - color_variation) * noise_grey + + noise_fft = _fft2(noise_rgb) + for c in range(num_channels): + noise_fft[:, :, c] *= noise_window + noise_rgb = np.real(_ifft2(noise_fft)) + shaped_noise_fft = _fft2(noise_rgb) + shaped_noise_fft[:, :, :] = np.absolute(shaped_noise_fft[:, :, :]) ** 2 * (src_dist ** noise_q) * src_phase # perform the actual shaping + + brightness_variation = 0. # color_variation # todo: temporarily tieing brightness variation to color variation for now + contrast_adjusted_np_src = _np_src_image[:] * (brightness_variation + 1.) - brightness_variation * 2. + + # scikit-image is used for histogram matching, very convenient! + shaped_noise = np.real(_ifft2(shaped_noise_fft)) + shaped_noise -= np.min(shaped_noise) + shaped_noise /= np.max(shaped_noise) + shaped_noise[img_mask, :] = skimage.exposure.match_histograms(shaped_noise[img_mask, :] ** 1., contrast_adjusted_np_src[ref_mask, :], channel_axis=1) + shaped_noise = _np_src_image[:] * (1. - np_mask_rgb) + shaped_noise * np_mask_rgb + + matched_noise = shaped_noise[:] + + return np.clip(matched_noise, 0., 1.) + + + +class Script(scripts.Script): + def title(self): + return "Outpainting mk2" + + def show(self, is_img2img): + return is_img2img + + def ui(self, is_img2img): + if not is_img2img: + return None + + info = gr.HTML("

Recommended settings: Sampling Steps: 80-100, Sampler: Euler a, Denoising strength: 0.8

") + + pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128) + mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=8, visible=False) + direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down']) + noise_q = gr.Slider(label="Fall-off exponent (lower=higher detail)", minimum=0.0, maximum=4.0, step=0.01, value=1.0) + color_variation = gr.Slider(label="Color variation", minimum=0.0, maximum=1.0, step=0.01, value=0.05) + + return [info, pixels, mask_blur, direction, noise_q, color_variation] + + def run(self, p, _, pixels, mask_blur, direction, noise_q, color_variation): + initial_seed_and_info = [None, None] + + process_width = p.width + process_height = p.height + + p.mask_blur = mask_blur*4 + p.inpaint_full_res = False + p.inpainting_fill = 1 + p.do_not_save_samples = True + p.do_not_save_grid = True + + left = pixels if "left" in direction else 0 + right = pixels if "right" in direction else 0 + up = pixels if "up" in direction else 0 + down = pixels if "down" in direction else 0 + + init_img = p.init_images[0] + target_w = math.ceil((init_img.width + left + right) / 64) * 64 + target_h = math.ceil((init_img.height + up + down) / 64) * 64 + + if left > 0: + left = left * (target_w - init_img.width) // (left + right) + + if right > 0: + right = target_w - init_img.width - left + + if up > 0: + up = up * (target_h - init_img.height) // (up + down) + + if down > 0: + down = target_h - init_img.height - up + + init_image = p.init_images[0] + + state.job_count = (1 if left > 0 else 0) + (1 if right > 0 else 0) + (1 if up > 0 else 0) + (1 if down > 0 else 0) + + def expand(init, expand_pixels, is_left=False, is_right=False, is_top=False, is_bottom=False): + is_horiz = is_left or is_right + is_vert = is_top or is_bottom + pixels_horiz = expand_pixels if is_horiz else 0 + pixels_vert = expand_pixels if is_vert else 0 + + res_w = init.width + pixels_horiz + res_h = init.height + pixels_vert + process_res_w = math.ceil(res_w / 64) * 64 + process_res_h = math.ceil(res_h / 64) * 64 + + img = Image.new("RGB", (process_res_w, process_res_h)) + img.paste(init, (pixels_horiz if is_left else 0, pixels_vert if is_top else 0)) + mask = Image.new("RGB", (process_res_w, process_res_h), "white") + draw = ImageDraw.Draw(mask) + draw.rectangle(( + expand_pixels + mask_blur if is_left else 0, + expand_pixels + mask_blur if is_top else 0, + mask.width - expand_pixels - mask_blur if is_right else res_w, + mask.height - expand_pixels - mask_blur if is_bottom else res_h, + ), fill="black") + + np_image = (np.asarray(img) / 255.0).astype(np.float64) + np_mask = (np.asarray(mask) / 255.0).astype(np.float64) + noised = get_matched_noise(np_image, np_mask, noise_q, color_variation) + out = Image.fromarray(np.clip(noised * 255., 0., 255.).astype(np.uint8), mode="RGB") + + target_width = min(process_width, init.width + pixels_horiz) if is_horiz else img.width + target_height = min(process_height, init.height + pixels_vert) if is_vert else img.height + + crop_region = ( + 0 if is_left else out.width - target_width, + 0 if is_top else out.height - target_height, + target_width if is_left else out.width, + target_height if is_top else out.height, + ) + + image_to_process = out.crop(crop_region) + mask = mask.crop(crop_region) + + p.width = target_width if is_horiz else img.width + p.height = target_height if is_vert else img.height + p.init_images = [image_to_process] + p.image_mask = mask + + latent_mask = Image.new("RGB", (p.width, p.height), "white") + draw = ImageDraw.Draw(latent_mask) + draw.rectangle(( + expand_pixels + mask_blur * 2 if is_left else 0, + expand_pixels + mask_blur * 2 if is_top else 0, + mask.width - expand_pixels - mask_blur * 2 if is_right else res_w, + mask.height - expand_pixels - mask_blur * 2 if is_bottom else res_h, + ), fill="black") + p.latent_mask = latent_mask + + proc = process_images(p) + proc_img = proc.images[0] + + if initial_seed_and_info[0] is None: + initial_seed_and_info[0] = proc.seed + initial_seed_and_info[1] = proc.info + + out.paste(proc_img, (0 if is_left else out.width - proc_img.width, 0 if is_top else out.height - proc_img.height)) + out = out.crop((0, 0, res_w, res_h)) + return out + + img = init_image + + if left > 0: + img = expand(img, left, is_left=True) + if right > 0: + img = expand(img, right, is_right=True) + if up > 0: + img = expand(img, up, is_top=True) + if down > 0: + img = expand(img, down, is_bottom=True) + + res = Processed(p, [img], initial_seed_and_info[0], initial_seed_and_info[1]) + + if opts.samples_save: + images.save_image(img, p.outpath_samples, "", res.seed, p.prompt, opts.grid_format, info=res.info, p=p) + + return res + diff --git a/scripts/poor_mans_outpainting.py b/scripts/poor_mans_outpainting.py new file mode 100644 index 0000000000000000000000000000000000000000..b046911052d182cc6dd2f44b3dd5fb329f08e059 --- /dev/null +++ b/scripts/poor_mans_outpainting.py @@ -0,0 +1,147 @@ +import math + +import modules.scripts as scripts +import gradio as gr +from PIL import Image, ImageDraw + +from modules import images, processing, devices +from modules.processing import Processed, process_images +from modules.shared import opts, cmd_opts, state + + + +class Script(scripts.Script): + def title(self): + return "Poor man's outpainting" + + def show(self, is_img2img): + return is_img2img + + def ui(self, is_img2img): + if not is_img2img: + return None + + pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128) + mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, visible=False) + inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='fill', type="index", visible=False) + direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down']) + + return [pixels, mask_blur, inpainting_fill, direction] + + def run(self, p, pixels, mask_blur, inpainting_fill, direction): + initial_seed = None + initial_info = None + + p.mask_blur = mask_blur * 2 + p.inpainting_fill = inpainting_fill + p.inpaint_full_res = False + + left = pixels if "left" in direction else 0 + right = pixels if "right" in direction else 0 + up = pixels if "up" in direction else 0 + down = pixels if "down" in direction else 0 + + init_img = p.init_images[0] + target_w = math.ceil((init_img.width + left + right) / 64) * 64 + target_h = math.ceil((init_img.height + up + down) / 64) * 64 + + if left > 0: + left = left * (target_w - init_img.width) // (left + right) + if right > 0: + right = target_w - init_img.width - left + + if up > 0: + up = up * (target_h - init_img.height) // (up + down) + + if down > 0: + down = target_h - init_img.height - up + + img = Image.new("RGB", (target_w, target_h)) + img.paste(init_img, (left, up)) + + mask = Image.new("L", (img.width, img.height), "white") + draw = ImageDraw.Draw(mask) + draw.rectangle(( + left + (mask_blur * 2 if left > 0 else 0), + up + (mask_blur * 2 if up > 0 else 0), + mask.width - right - (mask_blur * 2 if right > 0 else 0), + mask.height - down - (mask_blur * 2 if down > 0 else 0) + ), fill="black") + + latent_mask = Image.new("L", (img.width, img.height), "white") + latent_draw = ImageDraw.Draw(latent_mask) + latent_draw.rectangle(( + left + (mask_blur//2 if left > 0 else 0), + up + (mask_blur//2 if up > 0 else 0), + mask.width - right - (mask_blur//2 if right > 0 else 0), + mask.height - down - (mask_blur//2 if down > 0 else 0) + ), fill="black") + + devices.torch_gc() + + grid = images.split_grid(img, tile_w=p.width, tile_h=p.height, overlap=pixels) + grid_mask = images.split_grid(mask, tile_w=p.width, tile_h=p.height, overlap=pixels) + grid_latent_mask = images.split_grid(latent_mask, tile_w=p.width, tile_h=p.height, overlap=pixels) + + p.n_iter = 1 + p.batch_size = 1 + p.do_not_save_grid = True + p.do_not_save_samples = True + + work = [] + work_mask = [] + work_latent_mask = [] + work_results = [] + + for (y, h, row), (_, _, row_mask), (_, _, row_latent_mask) in zip(grid.tiles, grid_mask.tiles, grid_latent_mask.tiles): + for tiledata, tiledata_mask, tiledata_latent_mask in zip(row, row_mask, row_latent_mask): + x, w = tiledata[0:2] + + if x >= left and x+w <= img.width - right and y >= up and y+h <= img.height - down: + continue + + work.append(tiledata[2]) + work_mask.append(tiledata_mask[2]) + work_latent_mask.append(tiledata_latent_mask[2]) + + batch_count = len(work) + print(f"Poor man's outpainting will process a total of {len(work)} images tiled as {len(grid.tiles[0][2])}x{len(grid.tiles)}.") + + state.job_count = batch_count + + for i in range(batch_count): + p.init_images = [work[i]] + p.image_mask = work_mask[i] + p.latent_mask = work_latent_mask[i] + + state.job = f"Batch {i + 1} out of {batch_count}" + processed = process_images(p) + + if initial_seed is None: + initial_seed = processed.seed + initial_info = processed.info + + p.seed = processed.seed + 1 + work_results += processed.images + + + image_index = 0 + for y, h, row in grid.tiles: + for tiledata in row: + x, w = tiledata[0:2] + + if x >= left and x+w <= img.width - right and y >= up and y+h <= img.height - down: + continue + + tiledata[2] = work_results[image_index] if image_index < len(work_results) else Image.new("RGB", (p.width, p.height)) + image_index += 1 + + combined_image = images.combine_grid(grid) + + if opts.samples_save: + images.save_image(combined_image, p.outpath_samples, "", initial_seed, p.prompt, opts.grid_format, info=initial_info, p=p) + + processed = Processed(p, [combined_image], initial_seed, initial_info) + + return processed + diff --git a/scripts/prompt_matrix.py b/scripts/prompt_matrix.py new file mode 100644 index 0000000000000000000000000000000000000000..e49c9b205f9f501930d5213e2e72bb4f7219d068 --- /dev/null +++ b/scripts/prompt_matrix.py @@ -0,0 +1,87 @@ +import math +from collections import namedtuple +from copy import copy +import random + +import modules.scripts as scripts +import gradio as gr + +from modules import images +from modules.processing import process_images, Processed +from modules.shared import opts, cmd_opts, state +import modules.sd_samplers + + +def draw_xy_grid(xs, ys, x_label, y_label, cell): + res = [] + + ver_texts = [[images.GridAnnotation(y_label(y))] for y in ys] + hor_texts = [[images.GridAnnotation(x_label(x))] for x in xs] + + first_pocessed = None + + state.job_count = len(xs) * len(ys) + + for iy, y in enumerate(ys): + for ix, x in enumerate(xs): + state.job = f"{ix + iy * len(xs) + 1} out of {len(xs) * len(ys)}" + + processed = cell(x, y) + if first_pocessed is None: + first_pocessed = processed + + res.append(processed.images[0]) + + grid = images.image_grid(res, rows=len(ys)) + grid = images.draw_grid_annotations(grid, res[0].width, res[0].height, hor_texts, ver_texts) + + first_pocessed.images = [grid] + + return first_pocessed + + +class Script(scripts.Script): + def title(self): + return "Prompt matrix" + + def ui(self, is_img2img): + put_at_start = gr.Checkbox(label='Put variable parts at start of prompt', value=False) + + return [put_at_start] + + def run(self, p, put_at_start): + modules.processing.fix_seed(p) + + original_prompt = p.prompt[0] if type(p.prompt) == list else p.prompt + + all_prompts = [] + prompt_matrix_parts = original_prompt.split("|") + combination_count = 2 ** (len(prompt_matrix_parts) - 1) + for combination_num in range(combination_count): + selected_prompts = [text.strip().strip(',') for n, text in enumerate(prompt_matrix_parts[1:]) if combination_num & (1 << n)] + + if put_at_start: + selected_prompts = selected_prompts + [prompt_matrix_parts[0]] + else: + selected_prompts = [prompt_matrix_parts[0]] + selected_prompts + + all_prompts.append(", ".join(selected_prompts)) + + p.n_iter = math.ceil(len(all_prompts) / p.batch_size) + p.do_not_save_grid = True + + print(f"Prompt matrix will create {len(all_prompts)} images using a total of {p.n_iter} batches.") + + p.prompt = all_prompts + p.seed = [p.seed for _ in all_prompts] + p.prompt_for_display = original_prompt + processed = process_images(p) + + grid = images.image_grid(processed.images, p.batch_size, rows=1 << ((len(prompt_matrix_parts) - 1) // 2)) + grid = images.draw_prompt_matrix(grid, p.width, p.height, prompt_matrix_parts) + processed.images.insert(0, grid) + + if opts.grid_save: + images.save_image(processed.images[0], p.outpath_grids, "prompt_matrix", prompt=original_prompt, seed=processed.seed, grid=True, p=p) + + return processed diff --git a/scripts/prompts_from_file.py b/scripts/prompts_from_file.py new file mode 100644 index 0000000000000000000000000000000000000000..b24f1a8060437adddc5a34d8e63ad1eaec641f47 --- /dev/null +++ b/scripts/prompts_from_file.py @@ -0,0 +1,57 @@ +import math +import os +import sys +import traceback + +import modules.scripts as scripts +import gradio as gr + +from modules.processing import Processed, process_images +from PIL import Image +from modules.shared import opts, cmd_opts, state + +class Script(scripts.Script): + def title(self): + return "Prompts from file or textbox" + + def ui(self, is_img2img): + # This checkbox would look nicer as two tabs, but there are two problems: + # 1) There is a bug in Gradio 3.3 that prevents visibility from working on Tabs + # 2) Even with Gradio 3.3.1, returning a control (like Tabs) that can't be used as input + # causes a AttributeError: 'Tabs' object has no attribute 'preprocess' assert, + # due to the way Script assumes all controls returned can be used as inputs. + # Therefore, there's no good way to use grouping components right now, + # so we will use a checkbox! :) + checkbox_txt = gr.Checkbox(label="Show Textbox", value=False) + file = gr.File(label="File with inputs", type='bytes') + prompt_txt = gr.TextArea(label="Prompts") + checkbox_txt.change(fn=lambda x: [gr.File.update(visible = not x), gr.TextArea.update(visible = x)], inputs=[checkbox_txt], outputs=[file, prompt_txt]) + return [checkbox_txt, file, prompt_txt] + + def on_show(self, checkbox_txt, file, prompt_txt): + return [ gr.Checkbox.update(visible = True), gr.File.update(visible = not checkbox_txt), gr.TextArea.update(visible = checkbox_txt) ] + + def run(self, p, checkbox_txt, data: bytes, prompt_txt: str): + if (checkbox_txt): + lines = [x.strip() for x in prompt_txt.splitlines()] + else: + lines = [x.strip() for x in data.decode('utf8', errors='ignore').split("\n")] + lines = [x for x in lines if len(x) > 0] + + img_count = len(lines) * p.n_iter + batch_count = math.ceil(img_count / p.batch_size) + loop_count = math.ceil(batch_count / p.n_iter) + print(f"Will process {img_count} images in {batch_count} batches.") + + p.do_not_save_grid = True + + state.job_count = batch_count + + images = [] + for loop_no in range(loop_count): + state.job = f"{loop_no + 1} out of {loop_count}" + p.prompt = lines[loop_no*p.batch_size:(loop_no+1)*p.batch_size] * p.n_iter + proc = process_images(p) + images += proc.images + + return Processed(p, images, p.seed, "") diff --git a/scripts/sd_upscale.py b/scripts/sd_upscale.py new file mode 100644 index 0000000000000000000000000000000000000000..cb37ff7e8797c081e622771d87b366cbdfd5aeb1 --- /dev/null +++ b/scripts/sd_upscale.py @@ -0,0 +1,97 @@ +import math + +import modules.scripts as scripts +import gradio as gr +from PIL import Image + +from modules import processing, shared, sd_samplers, images, devices +from modules.processing import Processed +from modules.shared import opts, cmd_opts, state + + +class Script(scripts.Script): + def title(self): + return "SD upscale" + + def show(self, is_img2img): + return is_img2img + + def ui(self, is_img2img): + info = gr.HTML("

Will upscale the image to twice the dimensions; use width and height sliders to set tile size

") + overlap = gr.Slider(minimum=0, maximum=256, step=16, label='Tile overlap', value=64, visible=False) + upscaler_index = gr.Radio(label='Upscaler', choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index", visible=False) + + return [info, overlap, upscaler_index] + + def run(self, p, _, overlap, upscaler_index): + processing.fix_seed(p) + upscaler = shared.sd_upscalers[upscaler_index] + + p.extra_generation_params["SD upscale overlap"] = overlap + p.extra_generation_params["SD upscale upscaler"] = upscaler.name + + initial_info = None + seed = p.seed + + init_img = p.init_images[0] + + if(upscaler.name != "None"): + img = upscaler.scaler.upscale(init_img, 2, upscaler.data_path) + else: + img = init_img + + devices.torch_gc() + + grid = images.split_grid(img, tile_w=p.width, tile_h=p.height, overlap=overlap) + + batch_size = p.batch_size + upscale_count = p.n_iter + p.n_iter = 1 + p.do_not_save_grid = True + p.do_not_save_samples = True + + work = [] + + for y, h, row in grid.tiles: + for tiledata in row: + work.append(tiledata[2]) + + batch_count = math.ceil(len(work) / batch_size) + state.job_count = batch_count * upscale_count + + print(f"SD upscaling will process a total of {len(work)} images tiled as {len(grid.tiles[0][2])}x{len(grid.tiles)} per upscale in a total of {state.job_count} batches.") + + result_images = [] + for n in range(upscale_count): + start_seed = seed + n + p.seed = start_seed + + work_results = [] + for i in range(batch_count): + p.batch_size = batch_size + p.init_images = work[i*batch_size:(i+1)*batch_size] + + state.job = f"Batch {i + 1 + n * batch_count} out of {state.job_count}" + processed = processing.process_images(p) + + if initial_info is None: + initial_info = processed.info + + p.seed = processed.seed + 1 + work_results += processed.images + + image_index = 0 + for y, h, row in grid.tiles: + for tiledata in row: + tiledata[2] = work_results[image_index] if image_index < len(work_results) else Image.new("RGB", (p.width, p.height)) + image_index += 1 + + combined_image = images.combine_grid(grid) + result_images.append(combined_image) + + if opts.samples_save: + images.save_image(combined_image, p.outpath_samples, "", start_seed, p.prompt, opts.samples_format, info=initial_info, p=p) + + processed = Processed(p, result_images, seed, initial_info) + + return processed diff --git a/scripts/xy_grid.py b/scripts/xy_grid.py new file mode 100644 index 0000000000000000000000000000000000000000..efb63af540db6eee1c406484cddb3fa1811f22e3 --- /dev/null +++ b/scripts/xy_grid.py @@ -0,0 +1,390 @@ +from collections import namedtuple +from copy import copy +from itertools import permutations, chain +import random +import csv +from io import StringIO +from PIL import Image +import numpy as np + +import modules.scripts as scripts +import gradio as gr + +from modules import images +from modules.hypernetworks import hypernetwork +from modules.processing import process_images, Processed, get_correct_sampler +from modules.shared import opts, cmd_opts, state +import modules.shared as shared +import modules.sd_samplers +import modules.sd_models +import re + + +def apply_field(field): + def fun(p, x, xs): + setattr(p, field, x) + + return fun + + +def apply_prompt(p, x, xs): + if xs[0] not in p.prompt and xs[0] not in p.negative_prompt: + raise RuntimeError(f"Prompt S/R did not find {xs[0]} in prompt or negative prompt.") + + p.prompt = p.prompt.replace(xs[0], x) + p.negative_prompt = p.negative_prompt.replace(xs[0], x) + + +def apply_order(p, x, xs): + token_order = [] + + # Initally grab the tokens from the prompt, so they can be replaced in order of earliest seen + for token in x: + token_order.append((p.prompt.find(token), token)) + + token_order.sort(key=lambda t: t[0]) + + prompt_parts = [] + + # Split the prompt up, taking out the tokens + for _, token in token_order: + n = p.prompt.find(token) + prompt_parts.append(p.prompt[0:n]) + p.prompt = p.prompt[n + len(token):] + + # Rebuild the prompt with the tokens in the order we want + prompt_tmp = "" + for idx, part in enumerate(prompt_parts): + prompt_tmp += part + prompt_tmp += x[idx] + p.prompt = prompt_tmp + p.prompt + + +def build_samplers_dict(p): + samplers_dict = {} + for i, sampler in enumerate(get_correct_sampler(p)): + samplers_dict[sampler.name.lower()] = i + for alias in sampler.aliases: + samplers_dict[alias.lower()] = i + return samplers_dict + + +def apply_sampler(p, x, xs): + sampler_index = build_samplers_dict(p).get(x.lower(), None) + if sampler_index is None: + raise RuntimeError(f"Unknown sampler: {x}") + + p.sampler_index = sampler_index + + +def confirm_samplers(p, xs): + samplers_dict = build_samplers_dict(p) + for x in xs: + if x.lower() not in samplers_dict.keys(): + raise RuntimeError(f"Unknown sampler: {x}") + + +def apply_checkpoint(p, x, xs): + info = modules.sd_models.get_closet_checkpoint_match(x) + if info is None: + raise RuntimeError(f"Unknown checkpoint: {x}") + modules.sd_models.reload_model_weights(shared.sd_model, info) + + +def confirm_checkpoints(p, xs): + for x in xs: + if modules.sd_models.get_closet_checkpoint_match(x) is None: + raise RuntimeError(f"Unknown checkpoint: {x}") + + +def apply_hypernetwork(p, x, xs): + if x.lower() in ["", "none"]: + name = None + else: + name = hypernetwork.find_closest_hypernetwork_name(x) + if not name: + raise RuntimeError(f"Unknown hypernetwork: {x}") + hypernetwork.load_hypernetwork(name) + + +def apply_hypernetwork_strength(p, x, xs): + hypernetwork.apply_strength(x) + + +def confirm_hypernetworks(p, xs): + for x in xs: + if x.lower() in ["", "none"]: + continue + if not hypernetwork.find_closest_hypernetwork_name(x): + raise RuntimeError(f"Unknown hypernetwork: {x}") + + +def apply_clip_skip(p, x, xs): + opts.data["CLIP_stop_at_last_layers"] = x + + +def format_value_add_label(p, opt, x): + if type(x) == float: + x = round(x, 8) + + return f"{opt.label}: {x}" + + +def format_value(p, opt, x): + if type(x) == float: + x = round(x, 8) + return x + + +def format_value_join_list(p, opt, x): + return ", ".join(x) + + +def do_nothing(p, x, xs): + pass + + +def format_nothing(p, opt, x): + return "" + + +def str_permutations(x): + """dummy function for specifying it in AxisOption's type when you want to get a list of permutations""" + return x + + +AxisOption = namedtuple("AxisOption", ["label", "type", "apply", "format_value", "confirm"]) +AxisOptionImg2Img = namedtuple("AxisOptionImg2Img", ["label", "type", "apply", "format_value", "confirm"]) + + +axis_options = [ + AxisOption("Nothing", str, do_nothing, format_nothing, None), + AxisOption("Seed", int, apply_field("seed"), format_value_add_label, None), + AxisOption("Var. seed", int, apply_field("subseed"), format_value_add_label, None), + AxisOption("Var. strength", float, apply_field("subseed_strength"), format_value_add_label, None), + AxisOption("Steps", int, apply_field("steps"), format_value_add_label, None), + AxisOption("CFG Scale", float, apply_field("cfg_scale"), format_value_add_label, None), + AxisOption("Prompt S/R", str, apply_prompt, format_value, None), + AxisOption("Prompt order", str_permutations, apply_order, format_value_join_list, None), + AxisOption("Sampler", str, apply_sampler, format_value, confirm_samplers), + AxisOption("Checkpoint name", str, apply_checkpoint, format_value, confirm_checkpoints), + AxisOption("Hypernetwork", str, apply_hypernetwork, format_value, confirm_hypernetworks), + AxisOption("Hypernet str.", float, apply_hypernetwork_strength, format_value_add_label, None), + AxisOption("Sigma Churn", float, apply_field("s_churn"), format_value_add_label, None), + AxisOption("Sigma min", float, apply_field("s_tmin"), format_value_add_label, None), + AxisOption("Sigma max", float, apply_field("s_tmax"), format_value_add_label, None), + AxisOption("Sigma noise", float, apply_field("s_noise"), format_value_add_label, None), + AxisOption("Eta", float, apply_field("eta"), format_value_add_label, None), + AxisOption("Clip skip", int, apply_clip_skip, format_value_add_label, None), + AxisOptionImg2Img("Denoising", float, apply_field("denoising_strength"), format_value_add_label, None), # as it is now all AxisOptionImg2Img items must go after AxisOption ones +] + + +def draw_xy_grid(p, xs, ys, x_labels, y_labels, cell, draw_legend, include_lone_images): + ver_texts = [[images.GridAnnotation(y)] for y in y_labels] + hor_texts = [[images.GridAnnotation(x)] for x in x_labels] + + # Temporary list of all the images that are generated to be populated into the grid. + # Will be filled with empty images for any individual step that fails to process properly + image_cache = [] + + processed_result = None + cell_mode = "P" + cell_size = (1,1) + + state.job_count = len(xs) * len(ys) * p.n_iter + + for iy, y in enumerate(ys): + for ix, x in enumerate(xs): + state.job = f"{ix + iy * len(xs) + 1} out of {len(xs) * len(ys)}" + + processed:Processed = cell(x, y) + try: + # this dereference will throw an exception if the image was not processed + # (this happens in cases such as if the user stops the process from the UI) + processed_image = processed.images[0] + + if processed_result is None: + # Use our first valid processed result as a template container to hold our full results + processed_result = copy(processed) + cell_mode = processed_image.mode + cell_size = processed_image.size + processed_result.images = [Image.new(cell_mode, cell_size)] + + image_cache.append(processed_image) + if include_lone_images: + processed_result.images.append(processed_image) + processed_result.all_prompts.append(processed.prompt) + processed_result.all_seeds.append(processed.seed) + processed_result.infotexts.append(processed.infotexts[0]) + except: + image_cache.append(Image.new(cell_mode, cell_size)) + + if not processed_result: + print("Unexpected error: draw_xy_grid failed to return even a single processed image") + return Processed() + + grid = images.image_grid(image_cache, rows=len(ys)) + if draw_legend: + grid = images.draw_grid_annotations(grid, cell_size[0], cell_size[1], hor_texts, ver_texts) + + processed_result.images[0] = grid + + return processed_result + + +re_range = re.compile(r"\s*([+-]?\s*\d+)\s*-\s*([+-]?\s*\d+)(?:\s*\(([+-]\d+)\s*\))?\s*") +re_range_float = re.compile(r"\s*([+-]?\s*\d+(?:.\d*)?)\s*-\s*([+-]?\s*\d+(?:.\d*)?)(?:\s*\(([+-]\d+(?:.\d*)?)\s*\))?\s*") + +re_range_count = re.compile(r"\s*([+-]?\s*\d+)\s*-\s*([+-]?\s*\d+)(?:\s*\[(\d+)\s*\])?\s*") +re_range_count_float = re.compile(r"\s*([+-]?\s*\d+(?:.\d*)?)\s*-\s*([+-]?\s*\d+(?:.\d*)?)(?:\s*\[(\d+(?:.\d*)?)\s*\])?\s*") + +class Script(scripts.Script): + def title(self): + return "X/Y plot" + + def ui(self, is_img2img): + current_axis_options = [x for x in axis_options if type(x) == AxisOption or type(x) == AxisOptionImg2Img and is_img2img] + + with gr.Row(): + x_type = gr.Dropdown(label="X type", choices=[x.label for x in current_axis_options], value=current_axis_options[1].label, visible=False, type="index", elem_id="x_type") + x_values = gr.Textbox(label="X values", visible=False, lines=1) + + with gr.Row(): + y_type = gr.Dropdown(label="Y type", choices=[x.label for x in current_axis_options], value=current_axis_options[0].label, visible=False, type="index", elem_id="y_type") + y_values = gr.Textbox(label="Y values", visible=False, lines=1) + + draw_legend = gr.Checkbox(label='Draw legend', value=True) + include_lone_images = gr.Checkbox(label='Include Separate Images', value=False) + no_fixed_seeds = gr.Checkbox(label='Keep -1 for seeds', value=False) + + return [x_type, x_values, y_type, y_values, draw_legend, include_lone_images, no_fixed_seeds] + + def run(self, p, x_type, x_values, y_type, y_values, draw_legend, include_lone_images, no_fixed_seeds): + if not no_fixed_seeds: + modules.processing.fix_seed(p) + + if not opts.return_grid: + p.batch_size = 1 + + + CLIP_stop_at_last_layers = opts.CLIP_stop_at_last_layers + + def process_axis(opt, vals): + if opt.label == 'Nothing': + return [0] + + valslist = [x.strip() for x in chain.from_iterable(csv.reader(StringIO(vals)))] + + if opt.type == int: + valslist_ext = [] + + for val in valslist: + m = re_range.fullmatch(val) + mc = re_range_count.fullmatch(val) + if m is not None: + start = int(m.group(1)) + end = int(m.group(2))+1 + step = int(m.group(3)) if m.group(3) is not None else 1 + + valslist_ext += list(range(start, end, step)) + elif mc is not None: + start = int(mc.group(1)) + end = int(mc.group(2)) + num = int(mc.group(3)) if mc.group(3) is not None else 1 + + valslist_ext += [int(x) for x in np.linspace(start=start, stop=end, num=num).tolist()] + else: + valslist_ext.append(val) + + valslist = valslist_ext + elif opt.type == float: + valslist_ext = [] + + for val in valslist: + m = re_range_float.fullmatch(val) + mc = re_range_count_float.fullmatch(val) + if m is not None: + start = float(m.group(1)) + end = float(m.group(2)) + step = float(m.group(3)) if m.group(3) is not None else 1 + + valslist_ext += np.arange(start, end + step, step).tolist() + elif mc is not None: + start = float(mc.group(1)) + end = float(mc.group(2)) + num = int(mc.group(3)) if mc.group(3) is not None else 1 + + valslist_ext += np.linspace(start=start, stop=end, num=num).tolist() + else: + valslist_ext.append(val) + + valslist = valslist_ext + elif opt.type == str_permutations: + valslist = list(permutations(valslist)) + + valslist = [opt.type(x) for x in valslist] + + # Confirm options are valid before starting + if opt.confirm: + opt.confirm(p, valslist) + + return valslist + + x_opt = axis_options[x_type] + xs = process_axis(x_opt, x_values) + + y_opt = axis_options[y_type] + ys = process_axis(y_opt, y_values) + + def fix_axis_seeds(axis_opt, axis_list): + if axis_opt.label == 'Seed': + return [int(random.randrange(4294967294)) if val is None or val == '' or val == -1 else val for val in axis_list] + else: + return axis_list + + if not no_fixed_seeds: + xs = fix_axis_seeds(x_opt, xs) + ys = fix_axis_seeds(y_opt, ys) + + if x_opt.label == 'Steps': + total_steps = sum(xs) * len(ys) + elif y_opt.label == 'Steps': + total_steps = sum(ys) * len(xs) + else: + total_steps = p.steps * len(xs) * len(ys) + + print(f"X/Y plot will create {len(xs) * len(ys) * p.n_iter} images on a {len(xs)}x{len(ys)} grid. (Total steps to process: {total_steps * p.n_iter})") + shared.total_tqdm.updateTotal(total_steps * p.n_iter) + + def cell(x, y): + pc = copy(p) + x_opt.apply(pc, x, xs) + y_opt.apply(pc, y, ys) + + return process_images(pc) + + processed = draw_xy_grid( + p, + xs=xs, + ys=ys, + x_labels=[x_opt.format_value(p, x_opt, x) for x in xs], + y_labels=[y_opt.format_value(p, y_opt, y) for y in ys], + cell=cell, + draw_legend=draw_legend, + include_lone_images=include_lone_images + ) + + if opts.grid_save: + images.save_image(processed.images[0], p.outpath_grids, "xy_grid", prompt=p.prompt, seed=processed.seed, grid=True, p=p) + + # restore checkpoint in case it was changed by axes + modules.sd_models.reload_model_weights(shared.sd_model) + + hypernetwork.load_hypernetwork(opts.sd_hypernetwork) + hypernetwork.apply_strength() + + + opts.data["CLIP_stop_at_last_layers"] = CLIP_stop_at_last_layers + + return processed diff --git a/style.css b/style.css new file mode 100644 index 0000000000000000000000000000000000000000..aa3d379c11bc195bfe23f7866796d944ebca12dc --- /dev/null +++ b/style.css @@ -0,0 +1,531 @@ +.container { + max-width: 100%; +} + +#txt2img_token_counter { + height: 0px; +} + +#img2img_token_counter { + height: 0px; +} + +#sh{ + min-width: 2em; + min-height: 2em; + max-width: 2em; + max-height: 2em; + flex-grow: 0; + padding-left: 0.25em; + padding-right: 0.25em; + margin: 0.1em 0; + opacity: 0%; + cursor: default; +} + +.output-html p {margin: 0 0.5em;} + +.row > *, +.row > .gr-form > * { + min-width: min(120px, 100%); + flex: 1 1 0%; +} + +.performance { + font-size: 0.85em; + color: #444; + display: flex; + justify-content: space-between; + white-space: nowrap; +} + +.performance .time { + margin-right: 0; +} + +.performance .vram { + margin-left: 0; + text-align: right; +} + +#txt2img_generate, #img2img_generate { + min-height: 4.5em; +} + +@media screen and (min-width: 2500px) { + #txt2img_gallery, #img2img_gallery { + min-height: 768px; + } +} + +#txt2img_gallery img, #img2img_gallery img{ + object-fit: scale-down; +} + +.justify-center.overflow-x-scroll { + justify-content: left; +} + +.justify-center.overflow-x-scroll button:first-of-type { + margin-left: auto; +} + +.justify-center.overflow-x-scroll button:last-of-type { + margin-right: auto; +} + +#random_seed, #random_subseed, #reuse_seed, #reuse_subseed, #open_folder{ + min-width: auto; + flex-grow: 0; + padding-left: 0.25em; + padding-right: 0.25em; +} + +#hidden_element{ + display: none; +} + +#seed_row, #subseed_row{ + gap: 0.5rem; +} + +#subseed_show_box{ + min-width: auto; + flex-grow: 0; +} + +#subseed_show_box > div{ + border: 0; + height: 100%; +} + +#subseed_show{ + min-width: auto; + flex-grow: 0; + padding: 0; +} + +#subseed_show label{ + height: 100%; +} + +#roll_col{ + min-width: unset !important; + flex-grow: 0 !important; + padding: 0.4em 0; +} + +#roll, #paste{ + min-width: 2em; + min-height: 2em; + max-width: 2em; + max-height: 2em; + flex-grow: 0; + padding-left: 0.25em; + padding-right: 0.25em; + margin: 0.1em 0; +} + +#style_apply, #style_create, #interrogate{ + margin: 0.75em 0.25em 0.25em 0.25em; + min-width: 5em; +} + +#style_apply, #style_create, #deepbooru{ + margin: 0.75em 0.25em 0.25em 0.25em; + min-width: 5em; +} + +#style_pos_col, #style_neg_col{ + min-width: 8em !important; +} + +#txt2img_style_index, #txt2img_style2_index, #img2img_style_index, #img2img_style2_index{ + margin-top: 1em; +} + +.gr-form{ + background: transparent; +} + +.my-4{ + margin-top: 0; + margin-bottom: 0; +} + +#toprow div{ + border: none; + gap: 0; + background: transparent; +} + +#resize_mode{ + flex: 1.5; +} + +button{ + align-self: stretch !important; +} + +#prompt, #negative_prompt{ + border: none !important; +} +#prompt textarea, #negative_prompt textarea{ + border: none !important; +} + + +#img2maskimg .h-60{ + height: 30rem; +} + +.overflow-hidden, .gr-panel{ + overflow: visible !important; +} + +#x_type, #y_type{ + max-width: 10em; +} + +#txt2img_preview, #img2img_preview, #ti_preview{ + position: absolute; + width: 320px; + left: 0; + right: 0; + margin-left: auto; + margin-right: auto; + margin-top: 34px; + z-index: 100; + border: none; + border-top-left-radius: 0; + border-top-right-radius: 0; +} + +@media screen and (min-width: 768px) { + #txt2img_preview, #img2img_preview, #ti_preview { + position: absolute; + } +} + +@media screen and (max-width: 767px) { + #txt2img_preview, #img2img_preview, #ti_preview { + position: relative; + } +} + +#txt2img_preview div.left-0.top-0, #img2img_preview div.left-0.top-0, #ti_preview div.left-0.top-0{ + display: none; +} + +fieldset span.text-gray-500, .gr-block.gr-box span.text-gray-500, label.block span{ + position: absolute; + top: -0.6em; + line-height: 1.2em; + padding: 0 0.5em; + margin: 0; + + background-color: white; + border-top: 1px solid #eee; + border-left: 1px solid #eee; + border-right: 1px solid #eee; + + z-index: 300; +} + +.dark fieldset span.text-gray-500, .dark .gr-block.gr-box span.text-gray-500, .dark label.block span{ + background-color: rgb(31, 41, 55); + border-top: 1px solid rgb(55 65 81); + border-left: 1px solid rgb(55 65 81); + border-right: 1px solid rgb(55 65 81); +} + +#settings fieldset span.text-gray-500, #settings .gr-block.gr-box span.text-gray-500, #settings label.block span{ + position: relative; + border: none; + margin-right: 8em; +} + +.gr-panel div.flex-col div.justify-between label span{ + margin: 0; +} + +.gr-panel div.flex-col div.justify-between div{ + position: absolute; + top: -0.1em; + right: 1em; + padding: 0 0.5em; +} + +#settings .gr-panel div.flex-col div.justify-between div{ + position: relative; + z-index: 200; +} + +input[type="range"]{ + margin: 0.5em 0 -0.3em 0; +} + +#txt2img_sampling label{ + padding-left: 0.6em; + padding-right: 0.6em; +} + +#mask_bug_info { + text-align: center; + display: block; + margin-top: -0.75em; + margin-bottom: -0.75em; +} + +#txt2img_negative_prompt, #img2img_negative_prompt{ +} + +#txt2img_progressbar, #img2img_progressbar, #ti_progressbar{ + position: absolute; + z-index: 1000; + right: 0; + padding-left: 5px; + padding-right: 5px; + display: block; +} + +#txt2img_progress_row, #img2img_progress_row{ + margin-bottom: 10px; + margin-top: -18px; +} + +.progressDiv{ + width: 100%; + height: 20px; + background: #b4c0cc; + border-radius: 8px; +} + +.dark .progressDiv{ + background: #424c5b; +} + +.progressDiv .progress{ + width: 0%; + height: 20px; + background: #0060df; + color: white; + font-weight: bold; + line-height: 20px; + padding: 0 8px 0 0; + text-align: right; + border-radius: 8px; +} + +#lightboxModal{ + display: none; + position: fixed; + z-index: 1001; + padding-top: 100px; + left: 0; + top: 0; + width: 100%; + height: 100%; + overflow: auto; + background-color: rgba(20, 20, 20, 0.95); +} + +.modalControls { + display: grid; + grid-template-columns: 32px auto 1fr 32px; + grid-template-areas: "zoom tile space close"; + position: absolute; + top: 0; + left: 0; + right: 0; + padding: 16px; + gap: 16px; + background-color: rgba(0,0,0,0.2); +} + +.modalClose { + grid-area: close; +} + +.modalZoom { + grid-area: zoom; +} + +.modalTileImage { + grid-area: tile; +} + +.modalClose, +.modalZoom, +.modalTileImage { + color: white; + font-size: 35px; + font-weight: bold; + cursor: pointer; +} + +.modalClose:hover, +.modalClose:focus, +.modalZoom:hover, +.modalZoom:focus { + color: #999; + text-decoration: none; + cursor: pointer; +} + +#modalImage { + display: block; + margin-left: auto; + margin-right: auto; + margin-top: auto; + width: auto; +} + +.modalImageFullscreen { + object-fit: contain; + height: 90%; +} + +.modalPrev, +.modalNext { + cursor: pointer; + position: absolute; + top: 50%; + width: auto; + padding: 16px; + margin-top: -50px; + color: white; + font-weight: bold; + font-size: 20px; + transition: 0.6s ease; + border-radius: 0 3px 3px 0; + user-select: none; + -webkit-user-select: none; +} + +.modalNext { + right: 0; + border-radius: 3px 0 0 3px; +} + +.modalPrev:hover, +.modalNext:hover { + background-color: rgba(0, 0, 0, 0.8); +} + +#imageARPreview{ + position:absolute; + top:0px; + left:0px; + border:2px solid red; + background:rgba(255, 0, 0, 0.3); + z-index: 900; + pointer-events:none; + display:none +} + +#txt2img_interrupt, #img2img_interrupt{ + position: absolute; + width: 50%; + height: 72px; + background: #b4c0cc; + border-radius: 0px; + display: none; +} + +#txt2img_skip, #img2img_skip{ + position: absolute; + width: 50%; + right: 0px; + height: 72px; + background: #b4c0cc; + border-radius: 0px; + display: none; +} + +.red { + color: red; +} + +.gallery-item { + --tw-bg-opacity: 0 !important; +} + +#img2img_image div.h-60{ + height: 480px; +} + +#context-menu{ + z-index:9999; + position:absolute; + display:block; + padding:0px 0; + border:2px solid #a55000; + border-radius:8px; + box-shadow:1px 1px 2px #CE6400; + width: 200px; +} + +.context-menu-items{ + list-style: none; + margin: 0; + padding: 0; +} + +.context-menu-items a{ + display:block; + padding:5px; + cursor:pointer; +} + +.context-menu-items a:hover{ + background: #a55000; +} + +#quicksettings { + gap: 0.4em; +} + +#quicksettings > div{ + border: none; + background: none; + flex: unset; + gap: 0.5em; +} + +#quicksettings > div > div{ + max-width: 32em; + min-width: 24em; + padding: 0; +} + +#refresh_sd_model_checkpoint, #refresh_sd_hypernetwork{ + max-width: 2.5em; + min-width: 2.5em; + height: 2.4em; +} + + +canvas[key="mask"] { + z-index: 12 !important; + filter: invert(); + mix-blend-mode: multiply; + pointer-events: none; +} + + +/* gradio 3.4.1 stuff for editable scrollbar values */ +.gr-box > div > div > input.gr-text-input{ + position: absolute; + right: 0.5em; + top: -0.6em; + z-index: 200; + width: 8em; +} +#quicksettings .gr-box > div > div > input.gr-text-input { + top: -1.12em; +} + +.row.gr-compact{ + overflow: visible; +} diff --git a/textual_inversion_templates/hypernetwork.txt b/textual_inversion_templates/hypernetwork.txt new file mode 100644 index 0000000000000000000000000000000000000000..91e06890571c7e4974d5a76c30fab62e8587c7d2 --- /dev/null +++ b/textual_inversion_templates/hypernetwork.txt @@ -0,0 +1,27 @@ +a photo of a [filewords] +a rendering of a [filewords] +a cropped photo of the [filewords] +the photo of a [filewords] +a photo of a clean [filewords] +a photo of a dirty [filewords] +a dark photo of the [filewords] +a photo of my [filewords] +a photo of the cool [filewords] +a close-up photo of a [filewords] +a bright photo of the [filewords] +a cropped photo of a [filewords] +a photo of the [filewords] +a good photo of the [filewords] +a photo of one [filewords] +a close-up photo of the [filewords] +a rendition of the [filewords] +a photo of the clean [filewords] +a rendition of a [filewords] +a photo of a nice [filewords] +a good photo of a [filewords] +a photo of the nice [filewords] +a photo of the small [filewords] +a photo of the weird [filewords] +a photo of the large [filewords] +a photo of a cool [filewords] +a photo of a small [filewords] diff --git a/textual_inversion_templates/none.txt b/textual_inversion_templates/none.txt new file mode 100644 index 0000000000000000000000000000000000000000..f77af4612b289a56b718c3bee62c66a6151f75be --- /dev/null +++ b/textual_inversion_templates/none.txt @@ -0,0 +1 @@ +picture diff --git a/textual_inversion_templates/style.txt b/textual_inversion_templates/style.txt new file mode 100644 index 0000000000000000000000000000000000000000..15af2d6b85f259d0bf41fbe0c8ca7a3340e1b259 --- /dev/null +++ b/textual_inversion_templates/style.txt @@ -0,0 +1,19 @@ +a painting, art by [name] +a rendering, art by [name] +a cropped painting, art by [name] +the painting, art by [name] +a clean painting, art by [name] +a dirty painting, art by [name] +a dark painting, art by [name] +a picture, art by [name] +a cool painting, art by [name] +a close-up painting, art by [name] +a bright painting, art by [name] +a cropped painting, art by [name] +a good painting, art by [name] +a close-up painting, art by [name] +a rendition, art by [name] +a nice painting, art by [name] +a small painting, art by [name] +a weird painting, art by [name] +a large painting, art by [name] diff --git a/textual_inversion_templates/style_filewords.txt b/textual_inversion_templates/style_filewords.txt new file mode 100644 index 0000000000000000000000000000000000000000..b3a8159a869a7890bdd42470664fadf015e0658d --- /dev/null +++ b/textual_inversion_templates/style_filewords.txt @@ -0,0 +1,19 @@ +a painting of [filewords], art by [name] +a rendering of [filewords], art by [name] +a cropped painting of [filewords], art by [name] +the painting of [filewords], art by [name] +a clean painting of [filewords], art by [name] +a dirty painting of [filewords], art by [name] +a dark painting of [filewords], art by [name] +a picture of [filewords], art by [name] +a cool painting of [filewords], art by [name] +a close-up painting of [filewords], art by [name] +a bright painting of [filewords], art by [name] +a cropped painting of [filewords], art by [name] +a good painting of [filewords], art by [name] +a close-up painting of [filewords], art by [name] +a rendition of [filewords], art by [name] +a nice painting of [filewords], art by [name] +a small painting of [filewords], art by [name] +a weird painting of [filewords], art by [name] +a large painting of [filewords], art by [name] diff --git a/textual_inversion_templates/subject.txt b/textual_inversion_templates/subject.txt new file mode 100644 index 0000000000000000000000000000000000000000..79f36aa0543fc2151b7f7e28725309c0c9a4912a --- /dev/null +++ b/textual_inversion_templates/subject.txt @@ -0,0 +1,27 @@ +a photo of a [name] +a rendering of a [name] +a cropped photo of the [name] +the photo of a [name] +a photo of a clean [name] +a photo of a dirty [name] +a dark photo of the [name] +a photo of my [name] +a photo of the cool [name] +a close-up photo of a [name] +a bright photo of the [name] +a cropped photo of a [name] +a photo of the [name] +a good photo of the [name] +a photo of one [name] +a close-up photo of the [name] +a rendition of the [name] +a photo of the clean [name] +a rendition of a [name] +a photo of a nice [name] +a good photo of a [name] +a photo of the nice [name] +a photo of the small [name] +a photo of the weird [name] +a photo of the large [name] +a photo of a cool [name] +a photo of a small [name] diff --git a/textual_inversion_templates/subject_filewords.txt b/textual_inversion_templates/subject_filewords.txt new file mode 100644 index 0000000000000000000000000000000000000000..008652a6bf4277f12a1759f5f3c815ae754dcfcf --- /dev/null +++ b/textual_inversion_templates/subject_filewords.txt @@ -0,0 +1,27 @@ +a photo of a [name], [filewords] +a rendering of a [name], [filewords] +a cropped photo of the [name], [filewords] +the photo of a [name], [filewords] +a photo of a clean [name], [filewords] +a photo of a dirty [name], [filewords] +a dark photo of the [name], [filewords] +a photo of my [name], [filewords] +a photo of the cool [name], [filewords] +a close-up photo of a [name], [filewords] +a bright photo of the [name], [filewords] +a cropped photo of a [name], [filewords] +a photo of the [name], [filewords] +a good photo of the [name], [filewords] +a photo of one [name], [filewords] +a close-up photo of the [name], [filewords] +a rendition of the [name], [filewords] +a photo of the clean [name], [filewords] +a rendition of a [name], [filewords] +a photo of a nice [name], [filewords] +a good photo of a [name], [filewords] +a photo of the nice [name], [filewords] +a photo of the small [name], [filewords] +a photo of the weird [name], [filewords] +a photo of the large [name], [filewords] +a photo of a cool [name], [filewords] +a photo of a small [name], [filewords] diff --git a/txt2img_Screenshot.png b/txt2img_Screenshot.png new file mode 100644 index 0000000000000000000000000000000000000000..6e2759a4c8aa2d05e1334e871b2a451f1104ba60 Binary files /dev/null and b/txt2img_Screenshot.png differ diff --git a/webui.bat b/webui.bat new file mode 100644 index 0000000000000000000000000000000000000000..3f1d03f6dd810cc00f7173d438d8218c35e01a5d --- /dev/null +++ b/webui.bat @@ -0,0 +1,62 @@ +@echo off + +if not defined PYTHON (set PYTHON=python) +if not defined VENV_DIR (set VENV_DIR=venv) + +set ERROR_REPORTING=FALSE + +mkdir tmp 2>NUL + +%PYTHON% -c "" >tmp/stdout.txt 2>tmp/stderr.txt +if %ERRORLEVEL% == 0 goto :start_venv +echo Couldn't launch python +goto :show_stdout_stderr + +:start_venv +if [%VENV_DIR%] == [-] goto :skip_venv + +dir %VENV_DIR%\Scripts\Python.exe >tmp/stdout.txt 2>tmp/stderr.txt +if %ERRORLEVEL% == 0 goto :activate_venv + +for /f "delims=" %%i in ('CALL %PYTHON% -c "import sys; print(sys.executable)"') do set PYTHON_FULLNAME="%%i" +echo Creating venv in directory %VENV_DIR% using python %PYTHON_FULLNAME% +%PYTHON_FULLNAME% -m venv %VENV_DIR% >tmp/stdout.txt 2>tmp/stderr.txt +if %ERRORLEVEL% == 0 goto :activate_venv +echo Unable to create venv in directory %VENV_DIR% +goto :show_stdout_stderr + +:activate_venv +set PYTHON="%~dp0%VENV_DIR%\Scripts\Python.exe" +echo venv %PYTHON% +goto :launch + +:skip_venv + +:launch +%PYTHON% launch.py +pause +exit /b + +:show_stdout_stderr + +echo. +echo exit code: %errorlevel% + +for /f %%i in ("tmp\stdout.txt") do set size=%%~zi +if %size% equ 0 goto :show_stderr +echo. +echo stdout: +type tmp\stdout.txt + +:show_stderr +for /f %%i in ("tmp\stderr.txt") do set size=%%~zi +if %size% equ 0 goto :show_stderr +echo. +echo stderr: +type tmp\stderr.txt + +:endofscript + +echo. +echo Launch unsuccessful. Exiting. +pause diff --git a/webui.py b/webui.py new file mode 100644 index 0000000000000000000000000000000000000000..fe0ce321fa05a75c1f787604c768b7de710ac22b --- /dev/null +++ b/webui.py @@ -0,0 +1,137 @@ +import os +import threading +import time +import importlib +import signal +import threading + +from fastapi.middleware.gzip import GZipMiddleware + +from modules.paths import script_path + +from modules import devices, sd_samplers +import modules.codeformer_model as codeformer +import modules.extras +import modules.face_restoration +import modules.gfpgan_model as gfpgan +import modules.img2img + +import modules.lowvram +import modules.paths +import modules.scripts +import modules.sd_hijack +import modules.sd_models +import modules.shared as shared +import modules.txt2img + +import modules.ui +from modules import devices +from modules import modelloader +from modules.paths import script_path +from modules.shared import cmd_opts +import modules.hypernetworks.hypernetwork + + +queue_lock = threading.Lock() + + +def wrap_queued_call(func): + def f(*args, **kwargs): + with queue_lock: + res = func(*args, **kwargs) + + return res + + return f + + +def wrap_gradio_gpu_call(func, extra_outputs=None): + def f(*args, **kwargs): + devices.torch_gc() + + shared.state.sampling_step = 0 + shared.state.job_count = -1 + shared.state.job_no = 0 + shared.state.job_timestamp = shared.state.get_job_timestamp() + shared.state.current_latent = None + shared.state.current_image = None + shared.state.current_image_sampling_step = 0 + shared.state.skipped = False + shared.state.interrupted = False + shared.state.textinfo = None + + with queue_lock: + res = func(*args, **kwargs) + + shared.state.job = "" + shared.state.job_count = 0 + + devices.torch_gc() + + return res + + return modules.ui.wrap_gradio_call(f, extra_outputs=extra_outputs) + +def initialize(): + modelloader.cleanup_models() + modules.sd_models.setup_model() + codeformer.setup_model(cmd_opts.codeformer_models_path) + gfpgan.setup_model(cmd_opts.gfpgan_models_path) + shared.face_restorers.append(modules.face_restoration.FaceRestoration()) + modelloader.load_upscalers() + + modules.scripts.load_scripts(os.path.join(script_path, "scripts")) + + shared.sd_model = modules.sd_models.load_model() + shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(shared.sd_model))) + shared.opts.onchange("sd_hypernetwork", wrap_queued_call(lambda: modules.hypernetworks.hypernetwork.load_hypernetwork(shared.opts.sd_hypernetwork))) + shared.opts.onchange("sd_hypernetwork_strength", modules.hypernetworks.hypernetwork.apply_strength) + + +def webui(): + initialize() + + # make the program just exit at ctrl+c without waiting for anything + def sigint_handler(sig, frame): + print(f'Interrupted with signal {sig} in {frame}') + os._exit(0) + + signal.signal(signal.SIGINT, sigint_handler) + + while 1: + + demo = modules.ui.create_ui(wrap_gradio_gpu_call=wrap_gradio_gpu_call) + + app, local_url, share_url = demo.launch( + share=cmd_opts.share, + server_name="0.0.0.0" if cmd_opts.listen else None, + server_port=cmd_opts.port, + debug=cmd_opts.gradio_debug, + auth=[tuple(cred.split(':')) for cred in cmd_opts.gradio_auth.strip('"').split(',')] if cmd_opts.gradio_auth else None, + inbrowser=cmd_opts.autolaunch, + prevent_thread_lock=True + ) + + app.add_middleware(GZipMiddleware, minimum_size=1000) + + while 1: + time.sleep(0.5) + if getattr(demo, 'do_restart', False): + time.sleep(0.5) + demo.close() + time.sleep(0.5) + break + + sd_samplers.set_samplers() + + print('Reloading Custom Scripts') + modules.scripts.reload_scripts(os.path.join(script_path, "scripts")) + print('Reloading modules: modules.ui') + importlib.reload(modules.ui) + print('Refreshing Model List') + modules.sd_models.list_models() + print('Restarting Gradio') + + +if __name__ == "__main__": + webui() diff --git a/webui.sh b/webui.sh new file mode 100644 index 0000000000000000000000000000000000000000..05ca497d2e181c9c11409451794b1fe920f7895c --- /dev/null +++ b/webui.sh @@ -0,0 +1,141 @@ +#!/bin/bash +################################################# +# Please do not make any changes to this file, # +# change the variables in webui-user.sh instead # +################################################# +# Read variables from webui-user.sh +# shellcheck source=/dev/null +if [[ -f webui-user.sh ]] +then + source ./webui-user.sh +fi + +# Set defaults +# Install directory without trailing slash +if [[ -z "${install_dir}" ]] +then + install_dir="/home/$(whoami)" +fi + +# Name of the subdirectory (defaults to stable-diffusion-webui) +if [[ -z "${clone_dir}" ]] +then + clone_dir="stable-diffusion-webui" +fi + +# python3 executable +if [[ -z "${python_cmd}" ]] +then + python_cmd="python3" +fi + +# git executable +if [[ -z "${GIT}" ]] +then + export GIT="git" +fi + +# python3 venv without trailing slash (defaults to ${install_dir}/${clone_dir}/venv) +if [[ -z "${venv_dir}" ]] +then + venv_dir="venv" +fi + +if [[ -z "${LAUNCH_SCRIPT}" ]] +then + LAUNCH_SCRIPT="launch.py" +fi + +# Disable sentry logging +export ERROR_REPORTING=FALSE + +# Do not reinstall existing pip packages on Debian/Ubuntu +export PIP_IGNORE_INSTALLED=0 + +# Pretty print +delimiter="################################################################" + +printf "\n%s\n" "${delimiter}" +printf "\e[1m\e[32mInstall script for stable-diffusion + Web UI\n" +printf "\e[1m\e[34mTested on Debian 11 (Bullseye)\e[0m" +printf "\n%s\n" "${delimiter}" + +# Do not run as root +if [[ $(id -u) -eq 0 ]] +then + printf "\n%s\n" "${delimiter}" + printf "\e[1m\e[31mERROR: This script must not be launched as root, aborting...\e[0m" + printf "\n%s\n" "${delimiter}" + exit 1 +else + printf "\n%s\n" "${delimiter}" + printf "Running on \e[1m\e[32m%s\e[0m user" "$(whoami)" + printf "\n%s\n" "${delimiter}" +fi + +if [[ -d .git ]] +then + printf "\n%s\n" "${delimiter}" + printf "Repo already cloned, using it as install directory" + printf "\n%s\n" "${delimiter}" + install_dir="${PWD}/../" + clone_dir="${PWD##*/}" +fi + +# Check prequisites +for preq in git python3 +do + if ! hash "${preq}" &>/dev/null + then + printf "\n%s\n" "${delimiter}" + printf "\e[1m\e[31mERROR: %s is not installed, aborting...\e[0m" "${preq}" + printf "\n%s\n" "${delimiter}" + exit 1 + fi +done + +if ! "${python_cmd}" -c "import venv" &>/dev/null +then + printf "\n%s\n" "${delimiter}" + printf "\e[1m\e[31mERROR: python3-venv is not installed, aborting...\e[0m" + printf "\n%s\n" "${delimiter}" + exit 1 +fi + +printf "\n%s\n" "${delimiter}" +printf "Clone or update stable-diffusion-webui" +printf "\n%s\n" "${delimiter}" +cd "${install_dir}"/ || { printf "\e[1m\e[31mERROR: Can't cd to %s/, aborting...\e[0m" "${install_dir}"; exit 1; } +if [[ -d "${clone_dir}" ]] +then + cd "${clone_dir}"/ || { printf "\e[1m\e[31mERROR: Can't cd to %s/%s/, aborting...\e[0m" "${install_dir}" "${clone_dir}"; exit 1; } + "${GIT}" pull +else + "${GIT}" clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git "${clone_dir}" + cd "${clone_dir}"/ || { printf "\e[1m\e[31mERROR: Can't cd to %s/%s/, aborting...\e[0m" "${install_dir}" "${clone_dir}"; exit 1; } +fi + +printf "\n%s\n" "${delimiter}" +printf "Create and activate python venv" +printf "\n%s\n" "${delimiter}" +cd "${install_dir}"/"${clone_dir}"/ || { printf "\e[1m\e[31mERROR: Can't cd to %s/%s/, aborting...\e[0m" "${install_dir}" "${clone_dir}"; exit 1; } +if [[ ! -d "${venv_dir}" ]] +then + "${python_cmd}" -m venv "${venv_dir}" + first_launch=1 +fi +# shellcheck source=/dev/null +if [[ -f "${venv_dir}"/bin/activate ]] +then + source "${venv_dir}"/bin/activate +else + printf "\n%s\n" "${delimiter}" + printf "\e[1m\e[31mERROR: Cannot activate python venv, aborting...\e[0m" + printf "\n%s\n" "${delimiter}" + exit 1 +fi + +printf "\n%s\n" "${delimiter}" +printf "Launching launch.py..." +printf "\n%s\n" "${delimiter}" +"${python_cmd}" "${LAUNCH_SCRIPT}"