|
|
import torch |
|
|
from diffsynth import ModelManager, HunyuanVideoPipeline, download_models, save_video |
|
|
from modelscope import dataset_snapshot_download |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
download_models(["HunyuanVideoI2V"]) |
|
|
model_manager = ModelManager() |
|
|
|
|
|
|
|
|
model_manager.load_models( |
|
|
[ |
|
|
"models/HunyuanVideoI2V/transformers/mp_rank_00_model_states.pt" |
|
|
], |
|
|
torch_dtype=torch.bfloat16, |
|
|
device="cpu" |
|
|
) |
|
|
|
|
|
|
|
|
model_manager.load_models( |
|
|
[ |
|
|
"models/HunyuanVideoI2V/text_encoder/model.safetensors", |
|
|
"models/HunyuanVideoI2V/text_encoder_2", |
|
|
'models/HunyuanVideoI2V/vae/pytorch_model.pt' |
|
|
], |
|
|
torch_dtype=torch.float16, |
|
|
device="cpu" |
|
|
) |
|
|
|
|
|
pipe = HunyuanVideoPipeline.from_model_manager(model_manager, |
|
|
torch_dtype=torch.bfloat16, |
|
|
device="cuda", |
|
|
enable_vram_management=True) |
|
|
|
|
|
dataset_snapshot_download(dataset_id="DiffSynth-Studio/examples_in_diffsynth", |
|
|
local_dir="./", |
|
|
allow_file_pattern=f"data/examples/hunyuanvideo/*") |
|
|
|
|
|
i2v_resolution = "720p" |
|
|
prompt = "An Asian man with short hair in black tactical uniform and white clothes waves a firework stick." |
|
|
images = [Image.open("data/examples/hunyuanvideo/0.jpg").convert('RGB')] |
|
|
video = pipe(prompt, input_images=images, num_inference_steps=50, seed=0, i2v_resolution=i2v_resolution) |
|
|
save_video(video, f"video_{i2v_resolution}_low_vram.mp4", fps=30, quality=6) |
|
|
|