Skip to content

Commit b1ba04d

Browse files
committed
Reformat
1 parent 1f4f2bd commit b1ba04d

File tree

1 file changed

+51
-85
lines changed

1 file changed

+51
-85
lines changed

ggml/src/ggml-cpu/ops.cpp

Lines changed: 51 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -7402,100 +7402,66 @@ static void ggml_compute_forward_upscale_f32(
74027402
sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1;
74037403
}
74047404

7405-
// Antialiasing preprocessing step
7406-
// Apply antialiasing filter if flag is set and write directly to dst
7407-
bool antialiasing_applied = false;
7408-
7409-
if (mode_flags & GGML_SCALE_FLAG_ANTIALIAS) {
7410-
// Only apply antialiasing when downsampling (scale < 1.0)
7411-
const float scale0 = (float)ne00 / (float)ne0;
7412-
const float scale1 = (float)ne01 / (float)ne1;
7413-
7414-
if (scale0 > 1.0f || scale1 > 1.0f) {
7415-
// Apply antialiasing filter to src0 and write directly to dst
7416-
// PyTorch's bilinear filter function: f(x) = max(0, 1 - |x|)
7417-
auto bilinear_filter = [](float x) -> float {
7418-
x = fabsf(x);
7419-
if (x < 1.0f) {
7420-
return 1.0f - x;
7421-
}
7422-
return 0.0f;
7423-
};
7424-
7425-
const int interp_size = 2; // bilinear
7426-
7427-
for (int64_t i3 = 0; i3 < ne3; i3++) {
7428-
const int64_t i03 = i3 / sf3;
7429-
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
7430-
const int64_t i02 = i2 / sf2;
7431-
for (int64_t i1 = 0; i1 < ne1; i1++) {
7432-
// Compute center position in source coordinates
7433-
const float center_y = scale1 * ((float)i1 + 0.5f);
7434-
7435-
// Compute support and invscale for y direction
7436-
const float support_y = (scale1 > 1.0f) ? (interp_size * 0.5f) * scale1 : interp_size * 0.5f;
7437-
const float invscale_y = (scale1 > 1.0f) ? (1.0f / scale1) : 1.0f;
7438-
7439-
for (int64_t i0 = 0; i0 < ne0; i0++) {
7440-
const float center_x = scale0 * ((float)i0 + 0.5f);
7441-
7442-
// Compute support and invscale for x direction
7443-
const float support_x = (scale0 > 1.0f) ? (interp_size * 0.5f) * scale0 : interp_size * 0.5f;
7444-
const float invscale_x = (scale0 > 1.0f) ? (1.0f / scale0) : 1.0f;
7445-
7446-
// Calculate the range of source pixels that contribute
7447-
const int64_t x_min = std::max(int64_t(0), (int64_t)(center_x - support_x + 0.5f));
7448-
const int64_t x_max = std::min(ne00, (int64_t)(center_x + support_x + 0.5f));
7449-
const int64_t y_min = std::max(int64_t(0), (int64_t)(center_y - support_y + 0.5f));
7450-
const int64_t y_max = std::min(ne01, (int64_t)(center_y + support_y + 0.5f));
7451-
7452-
float val = 0.0f;
7453-
float total_weight = 0.0f;
7454-
7455-
// Apply bilinear filter with antialiasing
7456-
for (int64_t sy = y_min; sy < y_max; sy++) {
7457-
const float weight_y = bilinear_filter((sy - center_y + 0.5f) * invscale_y);
7458-
7459-
for (int64_t sx = x_min; sx < x_max; sx++) {
7460-
const float weight_x = bilinear_filter((sx - center_x + 0.5f) * invscale_x);
7461-
const float weight = weight_x * weight_y;
7462-
7463-
if (weight > 0.0f) {
7464-
const float pixel = *(const float *)((const char *)src0->data +
7465-
sx*nb00 +
7466-
sy*nb01 +
7467-
i02*nb02 +
7468-
i03*nb03);
7469-
val += pixel * weight;
7470-
total_weight += weight;
7471-
}
7405+
// Similar to F.interpolate(..., mode="bilinear", align_corners=False, antialias=True)
7406+
// https://github.com/pytorch/pytorch/blob/8871ff29b743948d1225389d5b7068f37b22750b/aten/src/ATen/native/cpu/UpSampleKernel.cpp
7407+
if (mode == GGML_SCALE_MODE_BILINEAR && (mode_flags & GGML_SCALE_FLAG_ANTIALIAS)) {
7408+
auto triangle_filter = [](float x) -> float {
7409+
return std::max(1.0f - fabsf(x), 0.f);
7410+
};
7411+
7412+
// support and invscale, maximum 1 pixel for bilinear
7413+
const float support1 = std::max(1.f, 1.f / sf1);
7414+
const float invscale1 = 1.0 / support1;
7415+
const float support0 = std::max(1.f, 1.f / sf0);
7416+
const float invscale0 = 1.f / support0;
7417+
7418+
for (int64_t i3 = 0; i3 < ne3; i3++) {
7419+
const int64_t i03 = i3 / sf3;
7420+
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
7421+
const int64_t i02 = i2 / sf2;
7422+
for (int64_t i1 = 0; i1 < ne1; i1++) {
7423+
const float y = ((float) i1 + pixel_offset) / sf1;
7424+
for (int64_t i0 = 0; i0 < ne0; i0++) {
7425+
const float x = ((float) i0 + pixel_offset) / sf0;
7426+
7427+
// the range of source pixels that contribute
7428+
const int64_t x_min = std::max(int64_t(0), (int64_t) (x - support0 + pixel_offset));
7429+
const int64_t x_max = std::min(ne00, (int64_t) (x + support0 + pixel_offset));
7430+
const int64_t y_min = std::max(int64_t(0), (int64_t) (y - support1 + pixel_offset));
7431+
const int64_t y_max = std::min(ne01, (int64_t) (y + support1 + pixel_offset));
7432+
7433+
// bilinear filter with antialiasing
7434+
float val = 0.0f;
7435+
float total_weight = 0.0f;
7436+
7437+
for (int64_t sy = y_min; sy < y_max; sy++) {
7438+
const float weight_y = triangle_filter((sy - y + pixel_offset) * invscale1);
7439+
7440+
for (int64_t sx = x_min; sx < x_max; sx++) {
7441+
const float weight_x = triangle_filter((sx - x + pixel_offset) * invscale0);
7442+
const float weight = weight_x * weight_y;
7443+
7444+
if (weight <= 0.0f) {
7445+
continue;
74727446
}
7473-
}
74747447

7475-
// Normalize by total weight
7476-
if (total_weight > 0.0f) {
7477-
val /= total_weight;
7448+
const float pixel = *(const float *)((const char *)src0->data + sx*nb00 + sy*nb01 + i02*nb02 + i03*nb03);
7449+
val += pixel * weight;
7450+
total_weight += weight;
74787451
}
7452+
}
74797453

7480-
// Write directly to dst
7481-
float * dst_ptr = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
7482-
*dst_ptr = val;
7454+
if (total_weight > 0.0f) {
7455+
val /= total_weight;
74837456
}
7457+
7458+
float * dst_ptr = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
7459+
*dst_ptr = val;
74847460
}
74857461
}
74867462
}
7487-
7488-
antialiasing_applied = true;
74897463
}
7490-
}
7491-
7492-
// If antialiasing was not applied, proceed with regular interpolation
7493-
if (antialiasing_applied) {
7494-
// Antialiasing result is already in dst, we're done
7495-
return;
7496-
}
7497-
7498-
if (mode == GGML_SCALE_MODE_NEAREST) {
7464+
} else if (mode == GGML_SCALE_MODE_NEAREST) {
74997465
for (int64_t i3 = 0; i3 < ne3; i3++) {
75007466
const int64_t i03 = i3 / sf3;
75017467
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {

0 commit comments

Comments
 (0)