Spaces:
Runtime error
Runtime error
File size: 7,882 Bytes
5407fb2 029ea00 1716635 20d2f51 1716635 029ea00 1716635 5407fb2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import gradio
import av, pathlib, diffusers, torch, transformers, builtins, numpy, re
from animatediff.generate import controlnet_preprocess, img2img_preprocess, wild_card_conversion, region_preprocess, unload_controlnet_models
from animatediff.settings import get_model_config, get_infer_config
from animatediff.utils.pipeline import send_to_device
from animatediff.utils.util import set_tensor_interpolation_method
from animatediff.pipelines import load_text_embeddings
from animatediff.pipelines.lora import load_lcm_lora
import huggingface_hub
import animatediff
width=432
height=768
length=1440
model_config = get_model_config('config/prompts/prompt_travel.json')
is_sdxl = False
infer_config = get_infer_config(True, is_sdxl)
set_tensor_interpolation_method(model_config.tensor_interpolation_slerp)
device = torch.device('cuda')
save_dir = pathlib.Path('output')
controlnet_image_map, controlnet_type_map, controlnet_ref_map, controlnet_no_shrink = controlnet_preprocess(model_config.controlnet_map, width, height, length, save_dir, device, is_sdxl)
img2img_map = img2img_preprocess(model_config.img2img_map, width, height, length, save_dir)
base_model = pathlib.Path('/tmp/base')
diffusers.StableDiffusionPipeline.from_pretrained('stable-diffusion-v1-5/stable-diffusion-v1-5').save_pretrained(base_model)
tokenizer = transformers.CLIPTokenizer.from_pretrained(base_model, subfolder='tokenizer')
text_encoder = transformers.CLIPTextModel.from_pretrained(base_model, subfolder='text_encoder')
vae = diffusers.AutoencoderKL.from_single_file('https://huggingface.co/chaowenguoback/pal/blob/main/vae-ft-mse-840000-ema-pruned.safetensors')
huggingface_hub.hf_hub_download(repo_id='wangfuyun/AnimateLCM', filename='AnimateLCM_sd15_t2v.ckpt', local_dir=pathlib.Path.cwd())
unet = animatediff.models.unet.UNet2DConditionModel.from_pretrained_2d(
pretrained_model_path=base_model,
motion_module_path=pathlib.Path.cwd().joinpath('AnimateLCM_sd15_t2v.ckpt'),
subfolder='unet',
unet_additional_kwargs=infer_config.unet_additional_kwargs,
feature_extractor = transformers.CLIPImageProcessor.from_pretrained(base_model, subfolder='feature_extractor')
)
pipeline = diffusers.StableDiffusionPipeline.from_single_file('https://huggingface.co/chaowenguoback/15/blob/main/chilloutMix-Ni.safetensors', config='stable-diffusion-v1-5/stable-diffusion-v1-5', safety_checker=None, use_safetensors=True)
unet.load_state_dict(pipeline.unet.state_dict(), strict=False)
text_encoder.load_state_dict(pipeline.text_encoder.state_dict(), strict=False)
del pipeline
unet.enable_xformers_memory_efficient_attention()
pipeline = animatediff.pipelines.AnimationPipeline(
vae=vae,
text_encoder=text_encoder,
tokenizer=tokenizer,
unet=unet,
scheduler=diffusers.LCMScheduler.from_config(infer_config.noise_scheduler_kwargs),
feature_extractor=feature_extractor,
controlnet_map=None,
)
lcm_lora = pathlib.Path.cwd().joinpath('data/models/lcm_lora/sd15')
lcm_lora.mkdir(parents=True)
huggingface_hub.hf_hub_download(repo_id='wangfuyun/AnimateLCM', filename='AnimateLCM_sd15_t2v_lora.safetensors', local_dir=lcm_lora)
load_lcm_lora(pipeline, {'start_scale':0.15, 'end_scale':0.75, 'gradient_start':0.2, 'gradient_end':0.75}, is_sdxl=is_sdxl)
pipeline.lora_map = None
pipeline.load_lora_weights('chaowenguoback/15', weight_name='add_detail.safetensors', adapter_name='detail')
pipeline.load_lora_weights('chaowenguoback/15', weight_name='b1r1av5-000007.safetensors', adapter_name='bikini')
pipeline.load_lora_weights('chaowenguoback/15', weight_name='btcstr.safetensors', adapter_name='c-string')
pipeline.load_lora_weights('chaowenguoback/15', weight_name='蓝洁瑛.safetensors', adapter_name='character')
pipeline.set_adapters(['detail', 'bikini', 'c-string', 'character'], [1, 0.4, 0.2, 0.8])
pipeline.unet = pipeline.unet.half()
pipeline.text_encoder = pipeline.text_encoder.half()
pipeline.text_encoder = pipeline.text_encoder.to(device)
load_text_embeddings(pipeline)
pipeline.text_encoder = pipeline.text_encoder.to('cpu')
pipeline = send_to_device(pipeline, device, freeze=True, force_half=False, compile=False, is_sdxl=is_sdxl)
wild_card_conversion(model_config)
is_init_img_exist = img2img_map != None
region_condi_list, region_list, ip_adapter_config_map, region2index = region_preprocess(model_config, width, height, length, save_dir, is_init_img_exist, is_sdxl)
if controlnet_type_map:
for c in controlnet_type_map:
tmp_r = [region2index[r] for r in controlnet_type_map[c]["control_region_list"]]
controlnet_type_map[c]["control_region_list"] = [r for r in tmp_r if r != -1]
prompt_map = region_condi_list[0]["prompt_map"]
prompt_tags = [re.compile(r"[^\w\-, ]").sub("", tag).strip().replace(" ", "-") for tag in prompt_map[list(prompt_map.keys())[0]].split(",")]
prompt_str = "_".join((prompt_tags[:6]))[:50]
output = pipeline(
n_prompt='nipple, waistband, back view, monochrome, longbody, lowres, bad anatomy, bad hands, fused fingers, missing fingers, too many fingers, cropped, worst quality, low quality, deformed body, bloated, ugly, unrealistic, extra hands and arms',
num_inference_steps=8,
guidance_scale=3,
unet_batch_size=1,
width=width,
height=height,
video_length=length,
return_dict=False,
context_frames=16,
context_stride=1,
context_overlap=16 // 4,
context_schedule='composite',
clip_skip=2,
controlnet_type_map=controlnet_image_map,
controlnet_image_map=controlnet_image_map,
controlnet_ref_map=controlnet_ref_map,
controlnet_no_shrink=controlnet_no_shrink,
controlnet_max_samples_on_vram=model_config.controlnet_map["max_samples_on_vram"] if "max_samples_on_vram" in model_config.controlnet_map else 999,
controlnet_max_models_on_vram=model_config.controlnet_map["max_models_on_vram"] if "max_models_on_vram" in model_config.controlnet_map else 99,
controlnet_is_loop = model_config.controlnet_map["is_loop"] if "is_loop" in model_config.controlnet_map else True,
img2img_map=img2img_map,
ip_adapter_config_map=ip_adapter_config_map,
region_list=region_list,
region_condi_list=region_condi_list,
interpolation_factor=1,
is_single_prompt_mode=model_config.is_single_prompt_mode,
gradual_latent_map=model_config.gradual_latent_hires_fix_map,
callback=None,
callback_steps=None,
)
unload_controlnet_models(pipe=pipeline)
frames = output.permute(0, 2, 1, 3, 4).squeeze(0)
frames = frames.mul(255).add_(0.5).clamp_(0, 255).permute(0, 2, 3, 1).to("cpu", torch.uint8).numpy()
del pipeline
torch.cuda.empty_cache()
pipeline = diffusers.AudioLDM2Pipeline.from_pretrained('cvssp/audioldm2-music', torch_dtype=torch.float16).to('cuda')
pipeline.scheduler = diffusers.DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config)
music = pipeline(prompt='Light rhythm techno', negative_prompt='low quality, average quality', num_inference_steps=20, audio_length_in_s=180).audios[0]
del pipeline
torch.cuda.empty_cache()
with av.open('video.mp4', mode='w') as writer:
video = writer.add_stream('h264', rate=8)
video.width = width * 4
video.height = height * 4
video.pix_fmt = 'yuv420p'
audio = writer.add_stream('aac', rate=16000)
for frame in frames: writer.mux(video.encode(av.VideoFrame.from_ndarray(frame)))
writer.mux(video.encode())
for _ in builtins.range(0, music.shape[0], audio.frame_size):
frame = av.AudioFrame.from_ndarray(music[_:_ + audio.frame_size][None], format='fltp', layout='mono')
frame.sample_rate = audio.sample_rate
frame.pts = _
writer.mux(audio.encode(frame))
writer.mux(audio.encode())
def greet(name, intensity):
return "Hello, " + name + "!" * int(intensity)
demo = gradio.Interface(
fn=greet,
inputs=["text", "slider"],
outputs=["text"],
api_name="predict"
)
demo.launch() |