@@ -7402,100 +7402,66 @@ static void ggml_compute_forward_upscale_f32(
74027402 sf1 = ne1 > 1 && ne01 > 1 ? (float )(ne1 - 1 ) / (ne01 - 1 ) : sf1;
74037403 }
74047404
7405- // Antialiasing preprocessing step
7406- // Apply antialiasing filter if flag is set and write directly to dst
7407- bool antialiasing_applied = false ;
7408-
7409- if (mode_flags & GGML_SCALE_FLAG_ANTIALIAS) {
7410- // Only apply antialiasing when downsampling (scale < 1.0)
7411- const float scale0 = (float )ne00 / (float )ne0;
7412- const float scale1 = (float )ne01 / (float )ne1;
7413-
7414- if (scale0 > 1 .0f || scale1 > 1 .0f ) {
7415- // Apply antialiasing filter to src0 and write directly to dst
7416- // PyTorch's bilinear filter function: f(x) = max(0, 1 - |x|)
7417- auto bilinear_filter = [](float x) -> float {
7418- x = fabsf (x);
7419- if (x < 1 .0f ) {
7420- return 1 .0f - x;
7421- }
7422- return 0 .0f ;
7423- };
7424-
7425- const int interp_size = 2 ; // bilinear
7426-
7427- for (int64_t i3 = 0 ; i3 < ne3; i3++) {
7428- const int64_t i03 = i3 / sf3;
7429- for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
7430- const int64_t i02 = i2 / sf2;
7431- for (int64_t i1 = 0 ; i1 < ne1; i1++) {
7432- // Compute center position in source coordinates
7433- const float center_y = scale1 * ((float )i1 + 0 .5f );
7434-
7435- // Compute support and invscale for y direction
7436- const float support_y = (scale1 > 1 .0f ) ? (interp_size * 0 .5f ) * scale1 : interp_size * 0 .5f ;
7437- const float invscale_y = (scale1 > 1 .0f ) ? (1 .0f / scale1) : 1 .0f ;
7438-
7439- for (int64_t i0 = 0 ; i0 < ne0; i0++) {
7440- const float center_x = scale0 * ((float )i0 + 0 .5f );
7441-
7442- // Compute support and invscale for x direction
7443- const float support_x = (scale0 > 1 .0f ) ? (interp_size * 0 .5f ) * scale0 : interp_size * 0 .5f ;
7444- const float invscale_x = (scale0 > 1 .0f ) ? (1 .0f / scale0) : 1 .0f ;
7445-
7446- // Calculate the range of source pixels that contribute
7447- const int64_t x_min = std::max (int64_t (0 ), (int64_t )(center_x - support_x + 0 .5f ));
7448- const int64_t x_max = std::min (ne00, (int64_t )(center_x + support_x + 0 .5f ));
7449- const int64_t y_min = std::max (int64_t (0 ), (int64_t )(center_y - support_y + 0 .5f ));
7450- const int64_t y_max = std::min (ne01, (int64_t )(center_y + support_y + 0 .5f ));
7451-
7452- float val = 0 .0f ;
7453- float total_weight = 0 .0f ;
7454-
7455- // Apply bilinear filter with antialiasing
7456- for (int64_t sy = y_min; sy < y_max; sy++) {
7457- const float weight_y = bilinear_filter ((sy - center_y + 0 .5f ) * invscale_y);
7458-
7459- for (int64_t sx = x_min; sx < x_max; sx++) {
7460- const float weight_x = bilinear_filter ((sx - center_x + 0 .5f ) * invscale_x);
7461- const float weight = weight_x * weight_y;
7462-
7463- if (weight > 0 .0f ) {
7464- const float pixel = *(const float *)((const char *)src0->data +
7465- sx*nb00 +
7466- sy*nb01 +
7467- i02*nb02 +
7468- i03*nb03);
7469- val += pixel * weight;
7470- total_weight += weight;
7471- }
7405+ // Similar to F.interpolate(..., mode="bilinear", align_corners=False, antialias=True)
7406+ // https://github.com/pytorch/pytorch/blob/8871ff29b743948d1225389d5b7068f37b22750b/aten/src/ATen/native/cpu/UpSampleKernel.cpp
7407+ if (mode == GGML_SCALE_MODE_BILINEAR && (mode_flags & GGML_SCALE_FLAG_ANTIALIAS)) {
7408+ auto triangle_filter = [](float x) -> float {
7409+ return std::max (1 .0f - fabsf (x), 0 .f );
7410+ };
7411+
7412+ // support and invscale, maximum 1 pixel for bilinear
7413+ const float support1 = std::max (1 .f , 1 .f / sf1);
7414+ const float invscale1 = 1.0 / support1;
7415+ const float support0 = std::max (1 .f , 1 .f / sf0);
7416+ const float invscale0 = 1 .f / support0;
7417+
7418+ for (int64_t i3 = 0 ; i3 < ne3; i3++) {
7419+ const int64_t i03 = i3 / sf3;
7420+ for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
7421+ const int64_t i02 = i2 / sf2;
7422+ for (int64_t i1 = 0 ; i1 < ne1; i1++) {
7423+ const float y = ((float ) i1 + pixel_offset) / sf1;
7424+ for (int64_t i0 = 0 ; i0 < ne0; i0++) {
7425+ const float x = ((float ) i0 + pixel_offset) / sf0;
7426+
7427+ // the range of source pixels that contribute
7428+ const int64_t x_min = std::max (int64_t (0 ), (int64_t ) (x - support0 + pixel_offset));
7429+ const int64_t x_max = std::min (ne00, (int64_t ) (x + support0 + pixel_offset));
7430+ const int64_t y_min = std::max (int64_t (0 ), (int64_t ) (y - support1 + pixel_offset));
7431+ const int64_t y_max = std::min (ne01, (int64_t ) (y + support1 + pixel_offset));
7432+
7433+ // bilinear filter with antialiasing
7434+ float val = 0 .0f ;
7435+ float total_weight = 0 .0f ;
7436+
7437+ for (int64_t sy = y_min; sy < y_max; sy++) {
7438+ const float weight_y = triangle_filter ((sy - y + pixel_offset) * invscale1);
7439+
7440+ for (int64_t sx = x_min; sx < x_max; sx++) {
7441+ const float weight_x = triangle_filter ((sx - x + pixel_offset) * invscale0);
7442+ const float weight = weight_x * weight_y;
7443+
7444+ if (weight <= 0 .0f ) {
7445+ continue ;
74727446 }
7473- }
74747447
7475- // Normalize by total weight
7476- if (total_weight > 0 . 0f ) {
7477- val /= total_weight ;
7448+ const float pixel = *( const float *)(( const char *)src0-> data + sx*nb00 + sy*nb01 + i02*nb02 + i03*nb03);
7449+ val += pixel * weight;
7450+ total_weight += weight ;
74787451 }
7452+ }
74797453
7480- // Write directly to dst
7481- float * dst_ptr = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
7482- *dst_ptr = val;
7454+ if (total_weight > 0 .0f ) {
7455+ val /= total_weight;
74837456 }
7457+
7458+ float * dst_ptr = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
7459+ *dst_ptr = val;
74847460 }
74857461 }
74867462 }
7487-
7488- antialiasing_applied = true ;
74897463 }
7490- }
7491-
7492- // If antialiasing was not applied, proceed with regular interpolation
7493- if (antialiasing_applied) {
7494- // Antialiasing result is already in dst, we're done
7495- return ;
7496- }
7497-
7498- if (mode == GGML_SCALE_MODE_NEAREST) {
7464+ } else if (mode == GGML_SCALE_MODE_NEAREST) {
74997465 for (int64_t i3 = 0 ; i3 < ne3; i3++) {
75007466 const int64_t i03 = i3 / sf3;
75017467 for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
0 commit comments