diff --git a/cli/src/main.rs b/cli/src/main.rs index 4a94f17..fedd669 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -11,10 +11,10 @@ use diffusion_rs::{ preset::{ Anima2Weight, AnimaWeight, ChromaRadianceWeight, ChromaWeight, DiffInstructStarWeight, ErnieImageWeight, Flux1MiniWeight, Flux1Weight, Flux2Klein4BWeight, Flux2Klein9BWeight, - Flux2KleinBase4BWeight, Flux2KleinBase9BWeight, Flux2Weight, NitroSDRealismWeight, - NitroSDVibrantWeight, OvisImageWeight, Preset, PresetBuilder, PresetDiscriminants, - QwenImageWeight, SDXS512DreamShaperWeight, SSD1BWeight, TwinFlowZImageTurboExpWeight, - WeightType, ZImageTurboWeight, + Flux2KleinBase4BWeight, Flux2KleinBase9BWeight, Flux2Weight, LongCatImageWeight, + NitroSDRealismWeight, NitroSDVibrantWeight, OvisImageWeight, Preset, PresetBuilder, + PresetDiscriminants, QwenImageWeight, SDXS512DreamShaperWeight, SSD1BWeight, + TwinFlowZImageTurboExpWeight, WeightType, ZImageTurboWeight, }, util::set_hf_token, }; @@ -412,6 +412,12 @@ fn get_preset(args: &Args) -> Preset { ), PresetDiscriminants::HiDreamO1ImageDev => Preset::HiDreamO1ImageDev, PresetDiscriminants::HiDreamO1Image => Preset::HiDreamO1Image, + PresetDiscriminants::LongCatImage => Preset::LongCatImage( + args.weights + .unwrap_or_else(|| LongCatImageWeight::default().into()) + .try_into() + .unwrap(), + ), }; preset } diff --git a/src/api.rs b/src/api.rs index 6338af2..fe2ec65 100644 --- a/src/api.rs +++ b/src/api.rs @@ -309,6 +309,9 @@ pub struct HiresParams { /// highres fix second pass denoising strength (default: 0.7) #[builder(default = "0.7")] denoising_strength: f32, + /// Custom sigma values for the highres fix second pass + #[builder(default = "None")] + hires_sigmas: Option>, } /// Config struct for a specific diffusion model @@ -561,6 +564,10 @@ pub struct ModelConfig { #[builder(default = "true")] vae_temporal_tiling: bool, + /// Extra VAE tiling args, key=value list. LTX video VAE supports + #[builder(default = "(None, CLibString::default())", setter(custom))] + extra_tiling_args: (Option>, CLibString), + #[builder(default = "None", private)] upscaler_ctx: Option<*mut upscaler_ctx_t>, @@ -711,6 +718,22 @@ impl ModelConfigBuilder { self.params_backend = Some((Some(backend_map), CLibString::from(params_backend_str))); self } + + pub fn extra_tiling_args( + &mut self, + extra_tiling_args_map: HashMap, + ) -> &mut Self { + let extra_tiling_args_str = extra_tiling_args_map + .iter() + .map(|(key, value)| format!("{}={}", key, value)) + .collect::>() + .join(","); + self.extra_tiling_args = Some(( + Some(extra_tiling_args_map), + CLibString::from(extra_tiling_args_str), + )); + self + } } impl ModelConfig { @@ -884,7 +907,8 @@ impl From<&ModelConfig> for ModelConfigBuilder { ) .extra_sample_params(value.extra_sample_params.clone()) .backend(value.backend.0.clone().unwrap_or_default()) - .params_backend(value.params_backend.0.clone().unwrap_or_default()); + .params_backend(value.params_backend.0.clone().unwrap_or_default()) + .extra_tiling_args(value.extra_tiling_args.0.clone().unwrap_or_default()); builder.lora_models_internal(value.lora_models.clone()); @@ -1368,6 +1392,7 @@ fn gen_img_maybe_progress( rel_size_x: model_config.vae_relative_tile_size.0, rel_size_y: model_config.vae_relative_tile_size.1, temporal_tiling: model_config.vae_temporal_tiling, + extra_tiling_args: model_config.extra_tiling_args.1.as_ptr(), }; let pm_params = sd_pm_params_t { id_images: null_mut(), @@ -1506,9 +1531,15 @@ fn gen_img_maybe_progress( } let mut hires_path = null(); + let mut hires_sigmas = null_mut(); + let mut hires_sigmas_count = 0; if let Some(path) = &model_config.hires_params.2 { hires_path = path.as_ptr(); } + if let Some(sigmas) = &mut model_config.hires_params.1.hires_sigmas { + hires_sigmas = sigmas.as_mut_ptr(); + hires_sigmas_count = sigmas.len() as i32; + } let hires = sd_hires_params_t { enabled: model_config.hires_params.0 != Upscaler::SD_HIRES_UPSCALER_NONE, @@ -1520,6 +1551,8 @@ fn gen_img_maybe_progress( steps: model_config.hires_params.1.steps, denoising_strength: model_config.hires_params.1.denoising_strength, upscale_tile_size: model_config.hires_params.1.upscale_tile_size, + custom_sigmas: hires_sigmas, + custom_sigmas_count: hires_sigmas_count, }; let sd_img_gen_params = sd_img_gen_params_t { diff --git a/src/preset.rs b/src/preset.rs index 3a42e4b..3836627 100644 --- a/src/preset.rs +++ b/src/preset.rs @@ -9,12 +9,12 @@ use crate::{ anima, anima2, chroma, chroma_radiance, diff_instruct_star, dream_shaper_xl_2_1_turbo, ernie_image, ernie_image_turbo, flux_1_dev, flux_1_mini, flux_1_schnell, flux_2_dev, flux_2_klein_4b, flux_2_klein_9b, flux_2_klein_base_4b, flux_2_klein_base_9b, - hi_dream_o1_image, hi_dream_o1_image_dev, juggernaut_xl_11, nitro_sd_realism, - nitro_sd_vibrant, ovis_image, qwen_image, sd_turbo, sdxl_base_1_0, sdxl_turbo_1_0, - sdxs512_dream_shaper, segmind_vega, ssd_1b, stable_diffusion_1_4, stable_diffusion_1_5, - stable_diffusion_2_1, stable_diffusion_3_5_large, stable_diffusion_3_5_large_turbo, - stable_diffusion_3_5_medium, stable_diffusion_3_medium, twinflow_z_image_turbo, - z_image_turbo, + hi_dream_o1_image, hi_dream_o1_image_dev, juggernaut_xl_11, long_cat_image, + nitro_sd_realism, nitro_sd_vibrant, ovis_image, qwen_image, sd_turbo, sdxl_base_1_0, + sdxl_turbo_1_0, sdxs512_dream_shaper, segmind_vega, ssd_1b, stable_diffusion_1_4, + stable_diffusion_1_5, stable_diffusion_2_1, stable_diffusion_3_5_large, + stable_diffusion_3_5_large_turbo, stable_diffusion_3_5_medium, stable_diffusion_3_medium, + twinflow_z_image_turbo, z_image_turbo, }, }; @@ -41,7 +41,8 @@ use crate::{ AnimaWeight(derive(Default)), Anima2Weight(derive(Default)), SDXS512DreamShaperWeight(derive(Default)), - ErnieImageWeight(derive(Default)) + ErnieImageWeight(derive(Default)), + LongCatImageWeight(derive(Default)) )] #[derive(Debug, Clone, Copy, EnumString, VariantNames)] #[strum(ascii_case_insensitive)] @@ -74,10 +75,17 @@ pub enum WeightType { Flux2Klein9BWeight(default), Flux2KleinBase9BWeight(default), AnimaWeight, - ErnieImageWeight(default) + ErnieImageWeight(default), + LongCatImageWeight(default) )] Q4_0, - #[subenum(Flux2Weight, QwenImageWeight, AnimaWeight, ErnieImageWeight)] + #[subenum( + Flux2Weight, + QwenImageWeight, + AnimaWeight, + ErnieImageWeight, + LongCatImageWeight + )] Q4_1, #[subenum( NitroSDRealismWeight, @@ -88,10 +96,17 @@ pub enum WeightType { QwenImageWeight, TwinFlowZImageTurboExpWeight, AnimaWeight, - ErnieImageWeight + ErnieImageWeight, + LongCatImageWeight )] Q5_0, - #[subenum(Flux2Weight, QwenImageWeight, AnimaWeight, ErnieImageWeight)] + #[subenum( + Flux2Weight, + QwenImageWeight, + AnimaWeight, + ErnieImageWeight, + LongCatImageWeight + )] Q5_1, #[subenum( Flux1Weight, @@ -112,7 +127,8 @@ pub enum WeightType { AnimaWeight(default), Anima2Weight(default), SDXS512DreamShaperWeight, - ErnieImageWeight + ErnieImageWeight, + LongCatImageWeight )] Q8_0, Q8_1, @@ -139,7 +155,8 @@ pub enum WeightType { QwenImageWeight, TwinFlowZImageTurboExpWeight, AnimaWeight, - ErnieImageWeight + ErnieImageWeight, + LongCatImageWeight )] Q3_K, #[subenum( @@ -149,7 +166,8 @@ pub enum WeightType { QwenImageWeight, AnimaWeight, Anima2Weight, - ErnieImageWeight + ErnieImageWeight, + LongCatImageWeight )] Q4_K, #[subenum( @@ -158,7 +176,8 @@ pub enum WeightType { QwenImageWeight, AnimaWeight, Anima2Weight, - ErnieImageWeight + ErnieImageWeight, + LongCatImageWeight )] Q5_K, #[subenum( @@ -172,7 +191,8 @@ pub enum WeightType { TwinFlowZImageTurboExpWeight, AnimaWeight, Anima2Weight, - ErnieImageWeight + ErnieImageWeight, + LongCatImageWeight )] Q6_K, Q8_K, @@ -205,12 +225,15 @@ pub enum WeightType { Flux2KleinBase9BWeight, AnimaWeight, Anima2Weight, - ErnieImageWeight + ErnieImageWeight, + LongCatImageWeight )] BF16, TQ1_0, TQ2_0, MXFP4, + NVFP4, + Q1_0, #[subenum(SSD1BWeight(default), QwenImageWeight)] F8_E4M3, } @@ -313,6 +336,9 @@ pub enum Preset { HiDreamO1ImageDev, /// cfg_scale 1.0. 20 steps 1024x1024. HiDreamO1Image, + /// Requires access rights to providing a token via [crate::util::set_hf_token] + /// cfg_scale 5.0. Enable [crate::api::SampleMethod::EULER_SAMPLE_METHOD] and Diffusion Flash attention. flow_shift 3.0. 512 x 512. 20 steps + LongCatImage(LongCatImageWeight), } impl Preset { @@ -356,6 +382,7 @@ impl Preset { Preset::ErnieImageTurbo(sd_type_t) => ernie_image_turbo(sd_type_t), Preset::HiDreamO1ImageDev => hi_dream_o1_image_dev(), Preset::HiDreamO1Image => hi_dream_o1_image(), + Preset::LongCatImage(sd_type_t) => long_cat_image(sd_type_t), } } } @@ -682,4 +709,9 @@ mod tests { fn test_ernie_image_turbo() { run(Preset::ErnieImageTurbo(super::ErnieImageWeight::Q4_0)); } + #[ignore] + #[test] + fn long_cat_image() { + run(Preset::LongCatImage(super::LongCatImageWeight::Q4_0)); + } } diff --git a/src/preset_builder.rs b/src/preset_builder.rs index 4084cdd..f4d66f3 100644 --- a/src/preset_builder.rs +++ b/src/preset_builder.rs @@ -10,8 +10,9 @@ use crate::{ Anima2Weight, AnimaWeight, ChromaRadianceWeight, ChromaWeight, ConfigsBuilder, DiffInstructStarWeight, ErnieImageWeight, Flux1MiniWeight, Flux1Weight, Flux2Klein4BWeight, Flux2Klein9BWeight, Flux2KleinBase4BWeight, Flux2KleinBase9BWeight, Flux2Weight, - NitroSDRealismWeight, NitroSDVibrantWeight, OvisImageWeight, QwenImageWeight, - SDXS512DreamShaperWeight, SSD1BWeight, TwinFlowZImageTurboExpWeight, ZImageTurboWeight, + LongCatImageWeight, NitroSDRealismWeight, NitroSDVibrantWeight, OvisImageWeight, + QwenImageWeight, SDXS512DreamShaperWeight, SSD1BWeight, TwinFlowZImageTurboExpWeight, + ZImageTurboWeight, }, }; use diffusion_rs_sys::scheduler_t; @@ -1547,3 +1548,134 @@ pub fn hi_dream_o1_image() -> Result { Ok((config, model_config)) } + +pub fn long_cat_image(sd_type: LongCatImageWeight) -> Result { + let (model, llm) = long_cat_image_weight_llm(sd_type)?; + let vae = download_file_hf_hub("black-forest-labs/FLUX.1-dev", "ae.safetensors")?; + let mut config = ConfigBuilder::default(); + let mut model_config = ModelConfigBuilder::default(); + + model_config + .diffusion_model(model) + .llm(llm) + .vae(vae) + .flow_shift(3.) + .diffusion_flash_attention(true); + + config + .sampling_method(SampleMethod::EULER_SAMPLE_METHOD) + .cfg_scale(5.) + .steps(20) + .height(512) + .width(512); + + Ok((config, model_config)) +} + +fn long_cat_image_weight_llm(sd_type: LongCatImageWeight) -> Result<(PathBuf, PathBuf), ApiError> { + let (model, llm) = match sd_type { + LongCatImageWeight::Q4_0 => ( + ( + "vantagewithai/LongCat-Image-GGUF", + "comfy/LongCat-Image-Q4_0.gguf", + ), + ( + "mradermacher/Qwen2.5-VL-7B-Instruct-GGUF", + "Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf", + ), + ), + LongCatImageWeight::Q4_1 => ( + ( + "vantagewithai/LongCat-Image-GGUF", + "comfy/LongCat-Image-Q4_1.gguf", + ), + ( + "mradermacher/Qwen2.5-VL-7B-Instruct-GGUF", + "Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf", + ), + ), + LongCatImageWeight::Q5_0 => ( + ( + "vantagewithai/LongCat-Image-GGUF", + "comfy/LongCat-Image-Q5_0.gguf", + ), + ( + "mradermacher/Qwen2.5-VL-7B-Instruct-GGUF", + "Qwen2.5-VL-7B-Instruct.Q5_K_M.gguf", + ), + ), + LongCatImageWeight::Q5_1 => ( + ( + "vantagewithai/LongCat-Image-GGUF", + "comfy/LongCat-Image-Q5_1.gguf", + ), + ( + "mradermacher/Qwen2.5-VL-7B-Instruct-GGUF", + "Qwen2.5-VL-7B-Instruct.Q5_K_M.gguf", + ), + ), + LongCatImageWeight::Q8_0 => ( + ( + "vantagewithai/LongCat-Image-GGUF", + "comfy/LongCat-Image-Q8_0.gguf", + ), + ( + "mradermacher/Qwen2.5-VL-7B-Instruct-GGUF", + "Qwen2.5-VL-7B-Instruct.Q8_0gguf", + ), + ), + LongCatImageWeight::Q3_K => ( + ( + "vantagewithai/LongCat-Image-GGUF", + "comfy/LongCat-Image-Q3_K_M.gguf", + ), + ( + "mradermacher/Qwen2.5-VL-7B-Instruct-GGUF", + "Qwen2.5-VL-7B-Instruct.Q3_K_M.gguf", + ), + ), + LongCatImageWeight::Q4_K => ( + ( + "vantagewithai/LongCat-Image-GGUF", + "comfy/LongCat-Image-Q4_K_M.gguf", + ), + ( + "mradermacher/Qwen2.5-VL-7B-Instruct-GGUF", + "Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf", + ), + ), + LongCatImageWeight::Q5_K => ( + ( + "vantagewithai/LongCat-Image-GGUF", + "comfy/LongCat-Image-Q5_K_M.gguf", + ), + ( + "mradermacher/Qwen2.5-VL-7B-Instruct-GGUF", + "Qwen2.5-VL-7B-Instruct.Q5_K_M.gguf", + ), + ), + LongCatImageWeight::Q6_K => ( + ( + "vantagewithai/LongCat-Image-GGUF", + "comfy/LongCat-Image-Q6_K.gguf", + ), + ( + "mradermacher/Qwen2.5-VL-7B-Instruct-GGUF", + "Qwen2.5-VL-7B-Instruct.Q6_K.gguf", + ), + ), + LongCatImageWeight::BF16 => ( + ( + "vantagewithai/LongCat-Image-GGUF", + "comfy/comfy/LongCat-Image-BF16.gguf", + ), + ( + "mradermacher/Qwen2.5-VL-7B-Instruct-GGUF", + "Qwen2.5-VL-7B-Instruct.f16.gguf", + ), + ), + }; + let model_path = download_file_hf_hub(model.0, model.1)?; + let llm_path = download_file_hf_hub(llm.0, llm.1)?; + Ok((model_path, llm_path)) +} diff --git a/sys/stable-diffusion.cpp b/sys/stable-diffusion.cpp index baf7eda..72e512a 160000 --- a/sys/stable-diffusion.cpp +++ b/sys/stable-diffusion.cpp @@ -1 +1 @@ -Subproject commit baf7eda1e4eeeefb87a2ca4ad0257977b95cf538 +Subproject commit 72e512a0ccb087ec6f7b3c764e93c9a6e6e3bd20