// 1D fft routine Copyright (C) 1984-2000  j.p.lewis  
// this code translated from PL/1 to C, then to java... but it works.
// 
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Library General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
// 
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Library General Public License for more details.
// 
// You should have received a copy of the GNU Library General Public
// License along with this library; if not, write to the
// Free Software Foundation, Inc., 59 Temple Place - Suite 330,
// Boston, MA  02111-1307, USA.
//
// Primary author contact info:  www.idiom.com/~zilla  zilla@computer.org

/* fft.c zilla@mit-devo 84 simple 1d fft 
 * checked against example in stearns book, ok.
 * [0]=dc, [n/2]=hc,
 * 1..n/2-1 = cconj( n-1..n/2+1 );
 *
 * compilation options
 * -DRECURSE	use sine recursion
 * -DTESTIT	generate fftTST program
 * -DBENCH	generate fftBENCH program
 *
 * modified:
 *
 * todo:
 *
 * java/c benchmarks(TIMES=20000)  should print 20478976.0
 * ibmjdk116 -O/celeron(433) 26.600u 0.110s
 * gcc/celeron(433)          20.510u 0.010s
 * gcc -O/celeron(433)       11.200u 0.040
 *
 * sgi jdk116/r10k/195        95.931u 0.421s   -O makes little difference
 * sgi jdk12-jun99/r10k/195   76.894u 0.505s   -O makes little difference
 * sgi cc -n32/r10k/195       54.662u 0.040s
 * sgi cc -O -n32/r10k/195    14.528u 0.018s
 *
 * java/c benchmarks(TIMES=4000)  should print 4094976.0
 * gcc/celeron(433)       4.100u 0.000s
 * gcc/celeron(433)       2.210u 0.010s  with -O
 * ibmjdk116/celeron(433) 5.410u 0.070s
 * ibmjdk116/celeron(433) 6.3, 0.0 after adding the print
 * ibmjdk116/celeron(433) 5.6, 0.16 after adding the print, -O
 * jdk116/r10k/195         19.582u 0.391s
 * jdk12jun99/r10k/195     16.258u 0.466s  -O or not little difference
 * cc -n32/r10k/195        10.864u 0.017s
 * cc -O -n32/r10k/195     2.889u 0.013s
 *
 * java/c benchmarks(TIMES=200)
 * 117interp p120         9.830u 0.490s
 * 12sunwjit p120         6.720u 0.440s
 * cc p120                1.330u 0.000s
 * ibm116 celeron(433)   0.430u 0.070
 * gcc celeron(433)      0.190u 0.020s
 *
 * benchmarks w/ -DRECURSE -DBENCH -O
 * sparc2                2.6   27x
 * sparc1		10      7x
 * original
 */

/**
 * 1d fft with fast sine recursion.
 *
 * @author jplewis (from stearns book)
 */
final public class fft
{
  final static void fft(float[] R, float[] I, int pow2)
  //float R[], I[];	/*  r[0:2^pow2-1], i[0:2^pow2-1] */
  //int     pow2;			/* a power of 2 */
  {
    int len,nn,m,l,step;
    int bits,i,j;
    float rl; double a;
    float w_r,w_i;

    double incsin,inccos,dcossin;

    //len = Mipower (2, pow2);
    len = 1<<pow2;
    nn = len - 1;
    bits = 0;

    for (m = 1; m <= nn; m++) {	/* bit reversal */
	l = len / 2;

	while ((bits + l) > nn) {
	    l = l / 2;
	}

	bits = (bits % l) + l;

	if (bits > m) {
	    float swap_r,swap_i;
	    swap_r = R[m];
	    R[m] = R[bits];
	    R[bits] = swap_r;
	    swap_i = I[m];
	    I[m] = I[bits];
	    I[bits] = swap_i;
	}
    } /* bitreversal */

    l = 1;

    while (l < len) {
	step = 2 * l;
	rl = l;

	//#	ifdef RECURSE
	a = Math.PI / l;	// for last step, this is 2pi/n 
	incsin = - Math.sin(a);	// running backwards 
	inccos =   2.0 * Math.sin(0.5 * a) * Math.sin(0.5 * a);
	dcossin = -2.0 * inccos;
	w_r = 1.f;
	w_i = 0.f;
	//#	endif /*RECURSE*/

	for (m = 1; m <= l; m++) {

	    //#	ifndef RECURSE
	    a = Math.PI * (double) (1 - m) / rl;
	    w_r = (float)Math.cos(a);
	    w_i = (float)Math.sin(a);
	    //#	endif /*!RECURSE*/

	    for (i = m - 1; i < len; i += step) {
		float swap_r,swap_i;
		j = i + l;
		swap_r = w_r * R[j] - w_i * I[j]; /* ac-bd */
		swap_i = w_r * I[j] + w_i * R[j]; /* ad+bc */
		R[j] = R[i] - swap_r;
		I[j] = I[i] - swap_i;
		R[i] += swap_r;
		I[i] += swap_i;
	    } /*fori*/

	    //#	ifdef RECURSE
            inccos += dcossin * w_r;
            w_r += inccos;
            incsin += dcossin * w_i;
            w_i += incsin;
	    //#	endif /*RECURSE*/

	} //for

	l = step;
    } //while l<len 

  } //fft


  /*
   *% cc -O % -DTESTIT -lZS -lZ -lm -o fftTST %*
   *@ cc -O % -DTESTIT -lZS -lZ -lm -o fftTST @*
   *% cc -O % -DRECURSE -DTESTIT -O -lZS -lZ -lm -o fftTST %*
    * Compare to stearns p267; results should be:
    0: 1.300423 0.000000
    1: 0.432141 -1.070881
    2: -0.304285 -0.479190
    3: -0.239123 -0.163785
    4: -0.167305 -0.069558
    5: -0.126940 -0.033932
    6: -0.105087 -0.017109
    7: -0.094182 -0.007306
    8: -0.090862 0.000000
    9: -0.094182 0.007306
    10: -0.105087 0.017109
    11: -0.126940 0.033932
    12: -0.167305 0.069558
    13: -0.239123 0.163785
    14: -0.304285 0.479190
    15: 0.432141 1.070881
  */

  public static void test(String[] args)
  {
    float[] fr = new float[16];
    float[] fi = new float[16];
    for (int i = 0; i < 16; i++) {
	double t = 0.375 * (double) i;
	fr[i] = (float)(Math.exp(-t) * Math.sin(t));
	fi[i] = 0.f;
    }
    fft (fr, fi, 4);
    for (int i = 0; i < 16; i++)
	cstdio.printf ("%d: %f %f\n", i, fr[i], fi[i]);
  }

  private static void bench(int TIMES)
  {
    int PO2 = 10;
    int N = 1024;
    float fN = 1024.f;

    float[] R = new float[N];
    float[] I = new float[N];

    for( int i=0; i < TIMES; i++ ) {
	for( int j=0; j < N; j++ ) {
	  R[j] = (float)j / fN;		// trivial modify arrays
	  I[j] = (float)i;		// so that optimizer cannot omit call
	}
	fft(R,I,PO2);
    }

    System.out.println(I[0]);
  } //bench


  public static void main(String[] args)
  {
    final int TIMES = 20000;
    bench(1);   // run once to force jit
    bench(TIMES);
    System.exit(0);
  } //main

} //class fft



