import logging import gradio as gr import spaces from depthcrafter.inference import DepthCrafterInference logging.basicConfig(level=logging.INFO) examples = [ # Examples temporarily removed for Hugging Face Spaces deployment ] # Initialize the inference class globally depthcrafter_inference = DepthCrafterInference( unet_path="tencent/DepthCrafter", pre_train_path="stabilityai/stable-video-diffusion-img2vid-xt", cpu_offload=None, device="cpu", ) @spaces.GPU(duration=120) def infer_depth( video: str, num_denoising_steps: int, guidance_scale: float, max_res: int = 1024, process_length: int = -1, target_fps: int = -1, save_folder: str = "./demo_output", window_size: int = 110, overlap: int = 25, seed: int = 42, track_time: bool = False, save_npz: bool = False, ): """ Gradio inference function. """ res_paths = depthcrafter_inference.infer( video_path=video, num_denoising_steps=num_denoising_steps, guidance_scale=guidance_scale, save_folder=save_folder, window_size=window_size, process_length=process_length, overlap=overlap, max_res=max_res, target_fps=target_fps, seed=seed, track_time=track_time, save_npz=save_npz, ) depthcrafter_inference.clear_cache() # Returning input and vis as per original code behavior return res_paths[:2] def construct_demo(): with gr.Blocks(analytics_enabled=False) as depthcrafter_iface: gr.Markdown( """

DepthCrafter: Generating Consistent Long Depth Sequences for Open-world Videos

Wenbo Hu, Xiangjun Gao, Xiaoyu Li, Sijie Zhao, Xiaodong Cun, Yong Zhang, Long Quan, Ying Shan

If you find DepthCrafter useful, please help ⭐ the [Github Repo] , which is important to Open-Source projects. Thanks! [ArXiv] [Project Page]
""" ) with gr.Row(equal_height=True): with gr.Column(scale=1): input_video = gr.Video(label="Input Video") # with gr.Tab(label="Output"): with gr.Column(scale=2): with gr.Row(equal_height=True): output_video_1 = gr.Video( label="Preprocessed video", interactive=False, autoplay=True, loop=True, scale=5, ) output_video_2 = gr.Video( label="Generated Depth Video", interactive=False, autoplay=True, loop=True, scale=5, ) with gr.Row(equal_height=True): with gr.Column(scale=1): with gr.Row(equal_height=False): with gr.Accordion("Advanced Settings", open=False): num_denoising_steps = gr.Slider( label="num denoising steps", minimum=1, maximum=25, value=5, step=1, ) guidance_scale = gr.Slider( label="cfg scale", minimum=1.0, maximum=1.2, value=1.0, step=0.1, ) max_res = gr.Slider( label="max resolution", minimum=512, maximum=2048, value=1024, step=64, ) process_length = gr.Slider( label="process length", minimum=-1, maximum=280, value=60, step=1, ) process_target_fps = gr.Slider( label="target FPS", minimum=-1, maximum=30, value=15, step=1, ) generate_btn = gr.Button("Generate") with gr.Column(scale=2): pass gr.Examples( examples=examples, inputs=[ input_video, num_denoising_steps, guidance_scale, max_res, process_length, process_target_fps, ], outputs=[output_video_1, output_video_2], fn=infer_depth, cache_examples=False, ) gr.Markdown( """ Note: For time quota consideration, we set the default parameters to be more efficient here, with a trade-off of shorter video length and slightly lower quality. You may adjust the parameters according to our [Github Repo] for better results if you have enough time quota. """ ) generate_btn.click( fn=infer_depth, inputs=[ input_video, num_denoising_steps, guidance_scale, max_res, process_length, process_target_fps, ], outputs=[output_video_1, output_video_2], ) return depthcrafter_iface if __name__ == "__main__": demo = construct_demo() demo.queue() # demo.launch(server_name="0.0.0.0", server_port=12345, # debug=True, share=False) demo.launch(share=True)