Stable Diffusion inpaint top body experiments
5 min readJul 28, 2023
Come from here. He would like to see his A.I. improved version.
I will change his top body and also change his fashion by inpaint.
Here is my mask.
ControlNet
controlnets = [
ControlNetModel.from_pretrained(
"lllyasviel/control_v11p_sd15_inpaint",
).to(device),
]
instance
pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
model_dir,
controlnet=controlnets[0],
requires_safety_checker=False,
safety_checker=None
).to(device)
Full source code
import itertools
import math
import os.path
import random
import diffusers
import numpy as np
import torch
# !pip install transformers accelerate
from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel
from diffusers.utils import load_image
from tqdm import tqdm
from set_seed import seed_everything
from read_lora import load_lora_weights_orig
from solve_77_limits import get_pipeline_embeds
human_name: str = "koh"
out_dir: str = f"{human_name}"
device: str = "mps" if torch.backends.mps.is_available() else "cpu"
# device: str = "cpu" # With this LoRA it can't run with mps. It raises RuntimeError: Invalid buffer size: 58.07 GB
print(device)
init_image = load_image(f"sources/{human_name}/koh.jpeg")
width, height = init_image.size
size_factor: float = 0.5
new_width, new_height = math.floor(width * size_factor / 8) * 8, math.floor(height * size_factor / 8) * 8
init_image = init_image.resize((new_width, new_height))
seed: int = 8811
seed_everything(seed)
generator = torch.Generator(device=device).manual_seed(seed)
mask_body_image = load_image(
f"sources/{human_name}/mask_koh.png"
)
mask_body_image = mask_body_image.resize((new_width, new_height))
def make_inpaint_condition(image, image_mask):
image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size"
image[image_mask > 0.5] = 1 # set as masked pixel
image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
image = torch.from_numpy(image)
return image
base_prompt = "a topless masculine man"
additional_prompts = [
"(masterpiece:1.2), best quality,PIXIV",
]
negative_prompt: str = "(low quality, worst quality:1.4),"
strengths = [1, ]
# guidance_scales = [round(0.1 * _, 3) for _ in range(70, 252, 2)]
guidance_scales = [30, ]
# eta_list = [0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.5, 2.0, 4, 6, 8, 10]
eta_list = [1]
models = [
("stable-diffusion-v1-5", "runwayml/stable-diffusion-v1-5"), # Reference bad
("majicmixRealistic_v6", "../ai_directory/majicmixRealistic_v6"), # Quite good
("realisticVisionV40_v40VAE", "../ai_directory/realisticVisionV40_v40VAE"),
# ("MeinaV10", "../ai_directory/MeinaV10"), # Anime
# ("perfectWorld_v4Baked", "../ai_directory/perfectWorld_v4Baked"), # Ordinary
("chilloutmix_NiPrunedFp32Fix", "../ai_directory/chilloutmix_NiPrunedFp32Fix"),
("henmixrealV10_henmixrealV10", "../ai_directory/henmixrealV10_henmixrealV10"),
("realisticVisionV40_v40VAE", "../ai_directory/realisticVisionV40_v40VAE"),
]
schedulers = [
("LMSDiscreteScheduler", diffusers.schedulers.scheduling_lms_discrete.LMSDiscreteScheduler),
("DDIMScheduler", diffusers.schedulers.scheduling_ddim.DDIMScheduler),
("DPMSolverMultistepScheduler", diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler),
("EulerDiscreteScheduler", diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler),
("PNDMScheduler", diffusers.schedulers.scheduling_pndm.PNDMScheduler),
("DDPMScheduler", diffusers.schedulers.scheduling_ddpm.DDPMScheduler),
("EulerAncestralDiscreteScheduler",
diffusers.schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteScheduler)
]
loras = [
(None, None),
# ("virginie_efira_v01", "../ai_files/virginie_efira_v01.safetensors")
]
lora_multipliers = [0.5, 1.0, 1.5, 2.0]
combined_list = list(itertools.product(
models, schedulers, loras, lora_multipliers, strengths, guidance_scales, eta_list, additional_prompts)
)
# Shuffle the combined list
random.shuffle(combined_list)
for item in tqdm(combined_list, total=len(combined_list)):
(model_name, model_dir), (scheduler_name, scheduler), (lora_name, lora_file), lora_multiplier, strength, guidance_scale, eta, add_prompt = item
controlnets = [
ControlNetModel.from_pretrained(
"lllyasviel/control_v11p_sd15_inpaint",
).to(device),
]
pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
model_dir,
controlnet=controlnets[0],
requires_safety_checker=False,
safety_checker=None
).to(device)
pipe.requires_safety_check = False
pipe.safety_checker = None
if lora_name != None:
pipe = load_lora_weights_orig(pipe, lora_file, lora_multiplier, device, torch.float32)
pipe.scheduler = scheduler.from_config(pipe.scheduler.config)
my_images = [
make_inpaint_condition(init_image, mask_body_image),
]
prompt: str = f"{base_prompt} {add_prompt}"
prompt_embeds, negative_prompt_embeds = get_pipeline_embeds(pipe, prompt, negative_prompt, device)
filename: str = f"{out_dir}/{model_name}_{scheduler_name}_{lora_name}_{lora_multiplier}_{strength}_{guidance_scale}_{eta}_{base_prompt}_{add_prompt[:20]}.png"
print(f"{filename} is running")
if not os.path.exists(filename):
# generate image
image = pipe(
prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
image=init_image,
mask_image=mask_body_image,
control_image=my_images[0],
num_inference_steps=100,
generator=generator,
eta=eta,
strength=strength,
guidance_scale=guidance_scale,
).images[0].save(filename)
else:
print(f"{filename} is exists")
Additional code
import os
import random
import torch
import numpy as np
from PIL import Image
def seed_everything(seed: int):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
elif torch.backends.mps.is_available():
torch.mps.manual_seed(seed)
else:
pass
"""https://github.com/huggingface/diffusers/issues/3064#issuecomment-1512429695"""
from collections import defaultdict
import torch
from safetensors.torch import load_file
def load_lora_weights_orig(pipeline, checkpoint_path, multiplier, device, dtype):
LORA_PREFIX_UNET = "lora_unet"
LORA_PREFIX_TEXT_ENCODER = "lora_te"
# load LoRA weight from .safetensors
state_dict = load_file(checkpoint_path, device=device)
updates = defaultdict(dict)
for key, value in state_dict.items():
# it is suggested to print out the key, it usually will be something like below
# "lora_te_text_model_encoder_layers_0_self_attn_k_proj.lora_down.weight"
layer, elem = key.split('.', 1)
updates[layer][elem] = value
# directly update weight in diffusers model
for layer, elems in updates.items():
if "text" in layer:
layer_infos = layer.split(LORA_PREFIX_TEXT_ENCODER + "_")[-1].split("_")
curr_layer = pipeline.text_encoder
else:
layer_infos = layer.split(LORA_PREFIX_UNET + "_")[-1].split("_")
curr_layer = pipeline.unet
# find the target layer
temp_name = layer_infos.pop(0)
while len(layer_infos) > -1:
try:
curr_layer = curr_layer.__getattr__(temp_name)
if len(layer_infos) > 0:
temp_name = layer_infos.pop(0)
elif len(layer_infos) == 0:
break
except Exception:
if len(temp_name) > 0:
temp_name += "_" + layer_infos.pop(0)
else:
temp_name = layer_infos.pop(0)
# get elements for this layer
weight_up = elems['lora_up.weight'].to(dtype)
weight_down = elems['lora_down.weight'].to(dtype)
alpha = elems['alpha']
if alpha:
alpha = alpha.item() / weight_up.shape[1]
else:
alpha = 1.0
# update weight
if len(weight_up.shape) == 4:
curr_layer.weight.data += multiplier * alpha * torch.mm(weight_up.squeeze(3).squeeze(2),
weight_down.squeeze(3).squeeze(2)).unsqueeze(
2).unsqueeze(3)
else:
curr_layer.weight.data += multiplier * alpha * torch.mm(weight_up, weight_down)
return pipeline
"""
Overcome the limit by using chunk.
https://github.com/huggingface/diffusers/issues/2136
"""
import torch
def get_pipeline_embeds(pipeline, prompt, negative_prompt, device):
""" Get pipeline embeds for prompts bigger than the maxlength of the pipe
:param pipeline:
:param prompt:
:param negative_prompt:
:param device:
:return:
"""
max_length = pipeline.tokenizer.model_max_length
# simple way to determine length of tokens
count_prompt = len(prompt.split(" "))
count_negative_prompt = len(negative_prompt.split(" "))
# create the tensor based on which prompt is longer
if count_prompt >= count_negative_prompt:
input_ids = pipeline.tokenizer(prompt, return_tensors="pt", truncation=False).input_ids.to(device)
shape_max_length = input_ids.shape[-1]
negative_ids = pipeline.tokenizer(negative_prompt, truncation=False, padding="max_length",
max_length=shape_max_length, return_tensors="pt").input_ids.to(device)
else:
negative_ids = pipeline.tokenizer(negative_prompt, return_tensors="pt", truncation=False).input_ids.to(device)
shape_max_length = negative_ids.shape[-1]
input_ids = pipeline.tokenizer(prompt, return_tensors="pt", truncation=False, padding="max_length",
max_length=shape_max_length).input_ids.to(device)
concat_embeds = []
neg_embeds = []
for i in range(0, shape_max_length, max_length):
concat_embeds.append(pipeline.text_encoder(input_ids[:, i: i + max_length])[0])
neg_embeds.append(pipeline.text_encoder(negative_ids[:, i: i + max_length])[0])
return torch.cat(concat_embeds, dim=1), torch.cat(neg_embeds, dim=1)
Enough for the boring code. Let’s get to the results.
Next step is upscale
the picture.
♠️️️️️️️️️️️️️️️️️️️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️♠️