Address PR feedback

tdakhran · tdakhran · commit b81928f9795d · 2025-11-30T11:27:48.000+01:00
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
@@ -7427,7 +7427,7 @@ static void ggml_compute_forward_upscale_f32(
             return std::max(1.0f - fabsf(x), 0.0f);
         };
 
-        // support and invscale, maximum 1 pixel for bilinear
+        // support and invscale, minimum 1 pixel for bilinear
         const float support1  = std::max(1.0f, 1.0f / sf1);
         const float invscale1 = 1.0f / support1;
         const float support0  = std::max(1.0f, 1.0f / sf0);
diff --git a/ggml/src/ggml-cuda/upscale.cu b/ggml/src/ggml-cuda/upscale.cu
@@ -107,7 +107,7 @@ static __global__ void upscale_f32_bilinear_antialias(const float * src0, float
     const float y = ((float)i11_dst + pixel_offset) / sf1;
     const float x = ((float)i10_dst + pixel_offset) / sf0;
 
-    // support and invscale, maximum 1 pixel for bilinear
+    // support and invscale, minimum 1 pixel for bilinear
     const float support1  = max(1.0f / sf1, 1.0f);
     const float invscale1 = 1.0f / support1;
     const float support0  = max(1.0f / sf0, 1.0f);
@@ -281,7 +281,7 @@ void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     if (mode == GGML_SCALE_MODE_NEAREST) {
         upscale_f32_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, stream);
     } else if (mode == GGML_SCALE_MODE_BILINEAR) {
-        bool antialias = (mode_flags & GGML_SCALE_FLAG_ANTIALIAS);
+        const bool antialias = (mode_flags & GGML_SCALE_FLAG_ANTIALIAS);
         upscale_f32_bilinear_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
                                  src0->ne[0], src0->ne[1], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3],
                                  sf0, sf1, sf2, sf3, pixel_offset, antialias, stream);
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -4891,6 +4891,8 @@ static struct ggml_tensor * ggml_interpolate_impl(
         int64_t               ne3,
         uint32_t              mode) {
     GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
+    // TODO: implement antialias for modes other than bilinear
+    GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
 
     struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
 
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
@@ -2787,6 +2787,7 @@ struct clip_model_loader {
                     {
                         get_u32(KEY_PROJ_SCALE_FACTOR, hparams.n_merge, false);
                         // ref: https://huggingface.co/LiquidAI/LFM2-VL-3B/blob/main/preprocessor_config.json
+                        // config above specifies number of tokens after downsampling, while here it is before, relax lowerbound to 64
                         hparams.set_limit_image_tokens(64, 1024);
                     } break;
                 case PROJECTOR_TYPE_PIXTRAL:

Original file line number	Diff line number	Diff line change
`@@ -2787,6 +2787,7 @@ struct clip_model_loader {`
`2787`	`2787`	`{`
`2788`	`2788`	`get_u32(KEY_PROJ_SCALE_FACTOR, hparams.n_merge, false);`
`2789`	`2789`	`// ref: https://huggingface.co/LiquidAI/LFM2-VL-3B/blob/main/preprocessor_config.json`
	`2790`	`+ // config above specifies number of tokens after downsampling, while here it is before, relax lowerbound to 64`
`2790`	`2791`	`hparams.set_limit_image_tokens(64, 1024);`
`2791`	`2792`	`} break;`
`2792`	`2793`	`case PROJECTOR_TYPE_PIXTRAL:`