diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 4cea83a1..2ca46f09 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2454,18 +2454,24 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx, LOG_WARN("Turn off PhotoMaker"); sd_ctx->sd->stacked_id = false; } else { - id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, id_embeds, class_tokens_mask); - int64_t t1 = ggml_time_ms(); - LOG_INFO("Photomaker ID Stacking, taking %" PRId64 " ms", t1 - t0); - if (sd_ctx->sd->free_params_immediately) { - sd_ctx->sd->pmid_model->free_params_buffer(); - } - // Encode input prompt without the trigger word for delayed conditioning - prompt_text_only = sd_ctx->sd->cond_stage_model->remove_trigger_from_prompt(work_ctx, prompt); - // printf("%s || %s \n", prompt.c_str(), prompt_text_only.c_str()); - prompt = prompt_text_only; // - if (sample_steps < 50) { - LOG_WARN("It's recommended to use >= 50 steps for photo maker!"); + if (pm_params.id_images_count != id_embeds->ne[1]) { + LOG_WARN("PhotoMaker image count (%d) does NOT match ID embeds (%d). You should run face_detect.py again.",pm_params.id_images_count,id_embeds->ne[1]); + LOG_WARN("Turn off PhotoMaker"); + sd_ctx->sd->stacked_id = false; + } else { + id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, id_embeds, class_tokens_mask); + int64_t t1 = ggml_time_ms(); + LOG_INFO("Photomaker ID Stacking, taking %" PRId64 " ms", t1 - t0); + if (sd_ctx->sd->free_params_immediately) { + sd_ctx->sd->pmid_model->free_params_buffer(); + } + // Encode input prompt without the trigger word for delayed conditioning + prompt_text_only = sd_ctx->sd->cond_stage_model->remove_trigger_from_prompt(work_ctx, prompt); + // printf("%s || %s \n", prompt.c_str(), prompt_text_only.c_str()); + prompt = prompt_text_only; // + if (sample_steps < 50) { + LOG_WARN("It's recommended to use >= 50 steps for photo maker!"); + } } } } else {