imglib
diff --git a/‎src/main/java/net/imglib2/algorithm/convolution/fast_gauss/FastGaussCalculator.java‎
Lines changed: 58 additions & 74 deletions b/‎src/main/java/net/imglib2/algorithm/convolution/fast_gauss/FastGaussCalculator.java‎
Lines changed: 58 additions & 74 deletions
diff --git a/‎src/main/java/net/imglib2/algorithm/convolution/fast_gauss/FastGaussConvolverRealType.java‎
Lines changed: 0 additions & 1 deletion b/‎src/main/java/net/imglib2/algorithm/convolution/fast_gauss/FastGaussConvolverRealType.java‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/main/java/net/imglib2/algorithm/gauss3/Gauss3.java‎
Lines changed: 70 additions & 10 deletions b/‎src/main/java/net/imglib2/algorithm/gauss3/Gauss3.java‎
Lines changed: 70 additions & 10 deletions
@@ -15,65 +15,49 @@
  */
 public class FastGaussCalculator
 {
-	private final Parameters fc;
-	// we will utilize ring buffers whose current positions will be driven
-	// by the currently processed index inside the input array -- we will use
-	// bit masking to achieve fast the effect of the operation modulo, furthermore
-	// it does not suffer from sign effect: -5 % 8 = -5 in Java; I need it be +3
-	// to make it work nicely for the ring buffer (which cannot have negative indices)
-    //
-	// remember current plus last two results for every yk-term (there's M of them),
-	// so we need either 3*3 or 3*4=12 values => 4 bits = capacity for 16 values,
-	//
-	// actually, we will use 4 ring sub-buffers (each of length 4, 2 bits) for the
-	// individual yks...: y1, y3, y5, y7 (the order in which they are touched during
-	// the computation of the filter)
-	//
-	private final double[] yk0 = new double[ 4 ];
-
-	private final double[] yk1 = new double[ 4 ];
-
-	private final double[] yk2 = new double[ 4 ];
-
-	private final double[] yk3 = new double[ 4 ];
-
-	private int x;
+	private double[] y_n = new double[ 4 ]; // stores y values for k=1,3,5,7 of the current step
+
+	private double[] y_n_minus_1 = new double[ 4 ]; // stores the y_n value of the previous step
+
+	private double[] y_n_minus_2 = new double[ 4 ]; // stores the y_n value of the step before the previous step
+
+	private final double[] nk_2;
+
+	private final double[] dk_1;
+
+	private final int M;
 
 	public FastGaussCalculator( final Parameters fc )
 	{
-		this.fc = fc;
+		nk_2 = fc.nk_2;
+		dk_1 = fc.dk_1;
+		M = fc.M;
 	}
 
 	public void initialize( final double boundaryValue )
 	{
 		//calculate yk that one would get on constant signal of 1.0
-		//(Vlado's invention by solving eq. (35) assuming yk_n = yk_n-1 = yk_n-2)
-		final double y1_mN = 2.0 * fc.nk_2[ 0 ] / ( fc.dk_1[ 0 ] + 2.0 );
-		final double y3_mN = 2.0 * fc.nk_2[ 1 ] / ( fc.dk_1[ 1 ] + 2.0 );
-		final double y5_mN = 2.0 * fc.nk_2[ 2 ] / ( fc.dk_1[ 2 ] + 2.0 );
-		final double y7_mN = fc.M == 4 ? 2.0 * fc.nk_2[ 3 ] / ( fc.dk_1[ 3 ] + 2.0 ) : 0.0;
-
-		//calculate yk that one would get on constant signal of array[0],
-		//and use this value for yk[x-2] and yk[x-1] when x=-Nm1
-		x = -1;
-		yk0[ x & 3 ] = yk0[ ( x - 1 ) & 3 ] = boundaryValue * y1_mN;
-		yk1[ x & 3 ] = yk1[ ( x - 1 ) & 3 ] = boundaryValue * y3_mN;
-		yk2[ x & 3 ] = yk2[ ( x - 1 ) & 3 ] = boundaryValue * y5_mN;
-		yk3[ x & 3 ] = yk3[ ( x - 1 ) & 3 ] = boundaryValue * y7_mN;
+		//(Vlado's invention by solving eq. (35) assuming y_n = y_n_minus_1 = y_n_minus_2)
+
+		for ( int i = 0; i < M; i++ )
+			y_n[ i ] = y_n_minus_1[ i ] = boundaryValue * 2.0 * nk_2[ i ] / ( dk_1[ i ] + 2.0 );
 	}
 
 	public void update( final double tmp )
 	{
-		++x;
-		yk0[ x & 3 ] = fc.nk_2[ 0 ] * tmp - fc.dk_1[ 0 ] * yk0[ ( x - 1 ) & 3 ] - yk0[ ( x - 2 ) & 3 ];
-		yk1[ x & 3 ] = fc.nk_2[ 1 ] * tmp - fc.dk_1[ 1 ] * yk1[ ( x - 1 ) & 3 ] - yk1[ ( x - 2 ) & 3 ];
-		yk2[ x & 3 ] = fc.nk_2[ 2 ] * tmp - fc.dk_1[ 2 ] * yk2[ ( x - 1 ) & 3 ] - yk2[ ( x - 2 ) & 3 ];
-		yk3[ x & 3 ] = fc.M == 4 ? fc.nk_2[ 3 ] * tmp - fc.dk_1[ 3 ] * yk3[ ( x - 1 ) & 3 ] - yk3[ ( x - 2 ) & 3 ] : 0;
+		double[] t = y_n_minus_2;
+		y_n_minus_2 = y_n_minus_1;
+		y_n_minus_1 = y_n;
+		y_n = t;
+		y_n[ 0 ] = nk_2[ 0 ] * tmp - dk_1[ 0 ] * y_n_minus_1[ 0 ] - y_n_minus_2[ 0 ];
+		y_n[ 1 ] = nk_2[ 1 ] * tmp - dk_1[ 1 ] * y_n_minus_1[ 1 ] - y_n_minus_2[ 1 ];
+		y_n[ 2 ] = nk_2[ 2 ] * tmp - dk_1[ 2 ] * y_n_minus_1[ 2 ] - y_n_minus_2[ 2 ];
+		y_n[ 3 ] = nk_2[ 3 ] * tmp - dk_1[ 3 ] * y_n_minus_1[ 3 ] - y_n_minus_2[ 3 ];
 	}
 
 	public double getValue()
 	{
-		return yk0[ x & 3 ] + yk1[ x & 3 ] + yk2[ x & 3 ] + yk3[ x & 3 ];
+		return y_n[ 0 ] + y_n[ 1 ] + y_n[ 2 ] + y_n[ 3 ];
 	}
 
 	/**
@@ -100,7 +84,7 @@ public static class Parameters
 		/**
 		 * the width of the filter
 		 */
-		public int N = 0;
+		public final int N;
 
 		/**
 		 * the filtering coefficients
@@ -116,36 +100,31 @@ public static class Parameters
 
 		public static Parameters fast( final double sigma )
 		{
-			return new Parameters( 3, sigma );
+			return new Parameters( 3, sigma, round( 3.2795 * sigma + 0.25460 ) );
 		}
 
 		public static Parameters exact( final double sigma )
 		{
-			return new Parameters( 4, sigma );
+			return new Parameters( 4, sigma, round( 3.7210 * sigma + 0.20157 ) );
 		}
 
 		/**
 		 * Construct with the same accuracy as the Gauss1D for which you want to
 		 * use it.
 		 */
-		private Parameters( final int _M, final double sigma )
+		private Parameters( final int _M, final double sigma, final int N )
 		{
 			if ( sigma <= 0 )
 				throw new IllegalArgumentException( "Sigma must be positive." );
 			M = ( _M == 3 || _M == 4 ) ? _M : 3;
 			nk_2 = new double[ 4 ];
 			dk_1 = new double[ 4 ];
 
-			// eq. (57), the filter width
-			final double N1 = ( int ) ( ( M == 3 ? 3.2795 * sigma + 0.25460 // M==3
-					: 3.7210 * sigma + 0.20157 // M==4
-			) + 0.5 );
-
 			// Table I, 1st/top objective
-			final double omega_1 = 1.0 * Math.PI / ( 2.0 * N1 );
-			final double omega_3 = 3.0 * Math.PI / ( 2.0 * N1 );
-			final double omega_5 = 5.0 * Math.PI / ( 2.0 * N1 );
-			final double omega_7 = 7.0 * Math.PI / ( 2.0 * N1 );
+			final double omega_1 = 1.0 * Math.PI / ( 2.0 * N );
+			final double omega_3 = 3.0 * Math.PI / ( 2.0 * N );
+			final double omega_5 = 5.0 * Math.PI / ( 2.0 * N );
+			final double omega_7 = 7.0 * Math.PI / ( 2.0 * N );
 
 			// eq. (37) i=1,3,5,7
 			final double p_1 = 1.0 / Math.tan( 0.5 * omega_1 );
@@ -161,10 +140,10 @@ private Parameters( final int _M, final double sigma )
 
 			// approximate rho_i:
 			// eq. (50) i=1,3,5,7
-			final double rho_1 = Math.exp( -0.5 * sigma * sigma * omega_1 * omega_1 ) / N1;
-			final double rho_3 = Math.exp( -0.5 * sigma * sigma * omega_3 * omega_3 ) / N1;
-			final double rho_5 = Math.exp( -0.5 * sigma * sigma * omega_5 * omega_5 ) / N1;
-			final double rho_7 = Math.exp( -0.5 * sigma * sigma * omega_7 * omega_7 ) / N1;
+			final double rho_1 = Math.exp( -0.5 * sigma * sigma * omega_1 * omega_1 ) / N;
+			final double rho_3 = Math.exp( -0.5 * sigma * sigma * omega_3 * omega_3 ) / N;
+			final double rho_5 = Math.exp( -0.5 * sigma * sigma * omega_5 * omega_5 ) / N;
+			final double rho_7 = Math.exp( -0.5 * sigma * sigma * omega_7 * omega_7 ) / N;
 	/*
 			//accurate rho_i:
 			// eq. (50) i=1,3,5,7
@@ -202,6 +181,8 @@ private Parameters( final int _M, final double sigma )
 
 			// get beta_k
 			double beta_1, beta_3;
+			double gamma_2 = N * N - sigma * sigma;
+			double gamma_3 = C_15 * rho_1 + C_35 * rho_3 + rho_5;
 			if ( M == 3 )
 			{
 				//build 6 minor 2x2 matrices to invA by excluding i-th row and j-th column from invA,
@@ -222,10 +203,8 @@ private Parameters( final int _M, final double sigma )
 				final double det_invA = p_1 * D11 - r_1 * D21 + C_15 * D31;
 
 				// eq. (53), only first two rows of matrix A_N
-				beta_1 = ( +D11 - ( N1 * N1 - sigma * sigma ) * D21
-						+ ( C_15 * rho_1 + C_35 * rho_3 + rho_5 ) * D31 ) / det_invA;
-				beta_3 = ( -D12 + ( N1 * N1 - sigma * sigma ) * D22
-						- ( C_15 * rho_1 + C_35 * rho_3 + rho_5 ) * D32 ) / det_invA;
+				beta_1 = ( +D11 - gamma_2 * D21 + gamma_3 * D31 ) / det_invA;
+				beta_3 = ( -D12 + gamma_2 * D22 - gamma_3 * D32 ) / det_invA;
 			}
 			else
 			{ // M == 4
@@ -239,7 +218,7 @@ private Parameters( final int _M, final double sigma )
 
 				final double D11 = r_3 - r_5 * C_35 - r_7 * C_37;
 				final double D21 = p_3 - p_5 * C_35 - p_7 * C_37;
-				final double D31 = p_3 * r_5 + p_5 * r_7 + C_37 - p_5 * r_3 - p_7 * r_5 * C_37;
+				final double D31 = p_3 * r_5 + p_5 * r_7 * C_37 - p_5 * r_3 - p_7 * r_5 * C_37;
 				final double D41 = p_5 * r_7 * C_35 + p_7 * r_3 - p_3 * r_7 - p_7 * r_5 * C_35;
 
 				final double D12 = r_1 - r_5 * C_15 - r_7 * C_17;
@@ -250,10 +229,9 @@ private Parameters( final int _M, final double sigma )
 				final double det_invA = p_1 * D11 - r_1 * D21 + C_15 * D31 - C_17 * D41;
 
 				// eq. (53), only first two rows of matrix A_N
-				beta_1 = ( +D11 - ( N1 * N1 - sigma * sigma ) * D21 + ( C_15 * rho_1 + C_35 * rho_3 + rho_5 ) * D31
-						- ( C_17 * rho_1 + C_37 * rho_3 + rho_7 ) * D41 ) / det_invA;
-				beta_3 = ( -D12 + ( N1 * N1 - sigma * sigma ) * D22 - ( C_15 * rho_1 + C_35 * rho_3 + rho_5 ) * D32
-						+ ( C_17 * rho_1 + C_37 * rho_3 + rho_7 ) * D42 ) / det_invA;
+				double gamma4 = C_17 * rho_1 + C_37 * rho_3 + rho_7;
+				beta_1 = ( +D11 - gamma_2 * D21 + gamma_3 * D31 - gamma4 * D41 ) / det_invA;
+				beta_3 = ( -D12 + gamma_2 * D22 - gamma_3 * D32 + gamma4 * D42 ) / det_invA;
 			}
 
 			// eq. (49), since I didn't want to continue building A_N to be used in eq. (53),
@@ -263,25 +241,31 @@ private Parameters( final int _M, final double sigma )
 			final double beta_7 = rho_7 + C_17 * ( rho_1 - beta_1 ) + C_37 * ( rho_3 - beta_3 );
 
 			// fill the output container FilteringCoeffs
-			N = ( int ) N1;
+			this.N = N;
 
-			nk_2[ 0 ] = -beta_1 * Math.cos( omega_1 * ( N1 + 1.0 ) );
-			nk_2[ 1 ] = -beta_3 * Math.cos( omega_3 * ( N1 + 1.0 ) );
-			nk_2[ 2 ] = -beta_5 * Math.cos( omega_5 * ( N1 + 1.0 ) );
+			nk_2[ 0 ] = -beta_1 * Math.cos( omega_1 * ( N + 1 ) );
+			nk_2[ 1 ] = -beta_3 * Math.cos( omega_3 * ( N + 1 ) );
+			nk_2[ 2 ] = -beta_5 * Math.cos( omega_5 * ( N + 1 ) );
 
 			dk_1[ 0 ] = -2.0 * Math.cos( omega_1 );
 			dk_1[ 1 ] = -2.0 * Math.cos( omega_3 );
 			dk_1[ 2 ] = -2.0 * Math.cos( omega_5 );
 
 			if ( M == 4 )
 			{
-				nk_2[ 3 ] = -beta_7 * Math.cos( omega_7 * ( N1 + 1.0 ) );
+				nk_2[ 3 ] = -beta_7 * Math.cos( omega_7 * ( N + 1 ) );
 				dk_1[ 3 ] = -2.0 * Math.cos( omega_7 );
 				// NB: array length was guarded at the beginning of this function
 			}
 
 			// declare to what sigma the coefficients belong to
 			Sigma = sigma;
 		}
+
+	}
+
+	private static int round( double value )
+	{
+		return ( int ) ( value + 0.5 );
 	}
 }
@@ -99,7 +99,6 @@ public void run()
 			for ( int i = -offset; i < 0; ++i )
 			{
 				fg.update( boundaryValue + tmpE[ i + offset ] );
-				in.fwd( d );
 			}
 
 			for ( int i = 0; i < lineLength; ++i )
 
@@ -226,26 +226,86 @@ public static int[] halfkernelsizes( final double[] sigma )
 		return size;
 	}
 
+	/**
+	 * Returns a gaussian half kernel with the given sigma and size.
+	 * <p>
+	 * The edges are smoothed by a second degree polynomial.
+	 * This improves the first derivative and the fourier spectrum
+	 * of the gaussian kernel.
+	 */
 	public static double[] halfkernel( final double sigma, final int size, final boolean normalize )
 	{
-		final double two_sq_sigma = 2 * sigma * sigma;
+		final double two_sq_sigma = 2 * square( sigma );
 		final double[] kernel = new double[ size ];
 
 		kernel[ 0 ] = 1;
 		for ( int x = 1; x < size; ++x )
-			kernel[ x ] = Math.exp( -( x * x ) / two_sq_sigma );
+			kernel[ x ] = Math.exp( -square( x ) / two_sq_sigma );
+
+		smoothEdge( kernel );
 
 		if ( normalize )
-		{
-			double sum = 0.5;
-			for ( int x = 1; x < size; ++x )
-				sum += kernel[ x ];
-			sum *= 2;
+			normalizeHalfkernel( kernel );
 
-			for ( int x = 0; x < size; ++x )
-				kernel[ x ] /= sum;
+		return kernel;
+	}
+
+	/**
+	 * This method smooths the truncated end of the gaussian kernel.
+	 * The code is taken from ImageJ1 "Gaussian Blur ...".
+	 * <p>
+	 * Detailed explanation:
+	 * <p>
+	 * The values kernel[x] for r < x < L are replaced by the values
+	 * of a polynomial p(x) = slope * (x - L) ^ 2.
+	 * Where L equals kernel.length. And the "slope" and "r" are chosen
+	 * such that there is a smooth transition between the kernel and
+	 * the polynomial. Thus their value and first derivative
+	 * match for x = r: p(r) = kernel[r] and p'(r) = kernel'[r].
+	 */
+	private static void smoothEdge( double[] kernel )
+	{
+		int L = kernel.length;
+		double slope = Double.MAX_VALUE;
+		int r = L;
+		while ( r > L / 2 )
+		{
+			r--;
+			double a = kernel[ r ] / square( L - r );
+			if ( a < slope )
+				slope = a;
+			else
+			{
+				r++;
+				break;
+			}
 		}
+		for ( int x = r + 1; x < L; x++ )
+			kernel[ x ] = slope * square( L - x );
+	}
 
-		return kernel;
+	/**
+	 * Normalizes a half kernel such that the values sum up to 1.
+	 */
+	private static void normalizeHalfkernel( double[] kernel )
+	{
+		double sum = 0.5 * kernel[ 0 ];
+		for ( int x = 1; x < kernel.length; ++x )
+			sum += kernel[ x ];
+		sum *= 2;
+		multiply( kernel, 1 / sum );
+	}
+
+	// -- Helper methods --
+
+	private static double square( double x )
+	{
+		return x * x;
+	}
+
+	private static void multiply( double[] values, double factor )
+	{
+		for ( int x = 0; x < values.length; ++x )
+			values[ x ] *= factor;
 	}
 }
Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,6 @@ public void run()`
`99`	`99`	`for ( int i = -offset; i < 0; ++i )`
`100`	`100`	`{`
`101`	`101`	`fg.update( boundaryValue + tmpE[ i + offset ] );`
`102`		`- in.fwd( d );`
`103`	`102`	`}`
`104`	`103`
`105`	`104`	`for ( int i = 0; i < lineLength; ++i )`