mirror of
				https://github.com/cookiengineer/audacity
				synced 2025-10-31 22:23:54 +01:00 
			
		
		
		
	Using LAME 3.10 Windows project files substantially changed from original, and included into audacity solution.
		
			
				
	
	
		
			489 lines
		
	
	
		
			9.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			489 lines
		
	
	
		
			9.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| ; from a new GOGO-no-coda (1999/09)
 | |
| ;	Copyright (C) 1999 shigeo
 | |
| ;	special thanks to Keiichi SAKAI, URURI
 | |
| ; hacked and back-ported to LAME
 | |
| ;	 by Takehiro TOMINAGA Nov 2000
 | |
| 
 | |
| %include "nasm.h"
 | |
| 
 | |
| 	globaldef fht_3DN
 | |
| 
 | |
| 	segment_data
 | |
| 	align	16
 | |
| costab	dd	0x80000000, 0
 | |
| 	dd	1.414213562,1.414213562
 | |
| 	dd	9.238795283293805e-01, 9.238795283293805e-01
 | |
| 	dd	3.826834424611044e-01, 3.826834424611044e-01
 | |
| 	dd	9.951847264044178e-01, 9.951847264044178e-01
 | |
| 	dd	9.801714304836734e-02, 9.801714304836734e-02
 | |
| 	dd	9.996988186794428e-01, 9.996988186794428e-01
 | |
| 	dd	2.454122920569705e-02, 2.454122920569705e-02
 | |
| 	dd	9.999811752815535e-01, 9.999811752815535e-01
 | |
| 	dd	6.135884819898878e-03, 6.135884819898878e-03
 | |
| D_1_0_0_0	dd	0.0		, 1.0
 | |
| 
 | |
| 	segment_code
 | |
| 
 | |
| PIC_OFFSETTABLE
 | |
| 
 | |
| 
 | |
| ;void fht_3DN(float *fz, int nn);
 | |
| 
 | |
| proc	fht_3DN
 | |
| 
 | |
| 	pushd	ebp, ebx, esi, edi
 | |
| 
 | |
| 	sub	esp, 20
 | |
| 
 | |
| 	call	get_pc.bp
 | |
| 	add	ebp, PIC_BASE()
 | |
| 
 | |
| 	mov	r0, [esp+40]		;fi
 | |
| 	mov	r1, [esp+44]		;r1 = nn
 | |
| 	lea	r3, [PIC_EBP_REL(costab)]		;tri = costab
 | |
| 	lea	r4, [r0+r1*8]		;r4 = fn = &fz[n]
 | |
| 	mov	[esp+16], r4
 | |
| 	mov	r4, 8			;kx = k1/2
 | |
| 
 | |
| 	pmov	mm7, [r3]
 | |
| 
 | |
| 	loopalign 16
 | |
| .do1
 | |
| 	lea	r3, [r3+16]	;tri += 2;
 | |
| 	pmov	mm6, [PIC_EBP_REL(costab+8)]
 | |
| 	lea	r2, [r4+r4*2]		;k3*fsize/2
 | |
| 	mov	r5, 4		;i = 1*fsize
 | |
| 
 | |
| 	loopalign 16
 | |
| .do2:
 | |
| 	lea	r1, [r0+r4]		;gi = fi + kx
 | |
| 	;f
 | |
| 	pmov	mm0, [r0]	;fi0
 | |
| 	pmov	mm1, [r0+r4*2]	;fi1
 | |
| 	pmov	mm2, [r0+r2*2]	;fi3
 | |
| 	pmov	mm3, [r0+r4*4]	;fi2
 | |
| 
 | |
| 	pupldq	mm0, mm0	;fi0 | fi0
 | |
| 	pupldq	mm1, mm1	;fi1 | fi1
 | |
| 	pupldq	mm2, mm2	;fi2 | fi2
 | |
| 	pupldq	mm3, mm3	;fi3 | fi3
 | |
| 
 | |
| 	pxor	mm1, mm7	;fi1 | -fi1
 | |
| 	pxor	mm3, mm7	;fi3 | -fi3
 | |
| 
 | |
| 	pfsub	mm0, mm1	;f1 | f0
 | |
| 	pfsub	mm2, mm3	;f3 | f2
 | |
| 
 | |
| 	pmov	mm4, mm0
 | |
| 	pfadd	mm0, mm2	;f1+f3|f0+f2 = fi1 | fi0
 | |
| 	pfsub	mm4, mm2	;f1-f3|f0-f2 = fi3 | fi2
 | |
| 
 | |
| 	pmovd	[r0], mm0	;fi[0]
 | |
| 	puphdq	mm0, mm0
 | |
| 	pmovd	[r0+r4*4], mm4	;fi[k2]
 | |
| 	puphdq	mm4, mm4
 | |
| 
 | |
| 	pmovd	[r0+r4*2], mm4	;fi[k1]
 | |
| 	pmovd	[r0+r2*2], mm0	;fi[k3]
 | |
| 	lea	r0, [r0+r4*8]
 | |
| 
 | |
| 	;g
 | |
| 	pmov	mm0, [r1]	;gi0
 | |
| 	pmov	mm1, [r1+r4*2]	;gi1
 | |
| 	pmov	mm2, [r1+r4*4]	;gi2
 | |
| 	pmov	mm3, [r1+r2*2]	;gi3
 | |
| 
 | |
| 	pupldq	mm1, mm1
 | |
| 	pupldq	mm0, mm0	;gi0 | gi0
 | |
| 	pupldq	mm2, mm3	;gi3 | gi2
 | |
| 
 | |
| 	pxor	mm1, mm7	;gi1 | -gi1
 | |
| 
 | |
| 	pfsub	mm0, mm1	;gi0-gi1|gi0+gi1 = g1 | g0
 | |
| 	pfmul	mm2, mm6	;gi3*SQRT2|gi2*SQRT2 = g3 | g2
 | |
| 
 | |
| 	pmov	mm4, mm0
 | |
| 	pfadd	mm0, mm2	;g1+g3|g0+g2 = gi1 | gi0
 | |
| 	pfsub	mm4, mm2	;g1-g3|g0-g2 = gi3 | gi2
 | |
| 
 | |
| 	pmovd	[r1], mm0	;gi[0]
 | |
| 	puphdq	mm0, mm0
 | |
| 	pmovd	[r1+r4*4], mm4	;gi[k2]
 | |
| 	puphdq	mm4, mm4
 | |
| 
 | |
| 	cmp	r0, [esp + 16]
 | |
| 	pmovd	[r1+r4*2], mm0	;gi[k1]
 | |
| 	pmovd	[r1+r2*2], mm4	;gi[k3]
 | |
| 
 | |
| 	jb near .do2
 | |
| 
 | |
| 	pmov	mm6, [r3+r5]	; this is not aligned address!!
 | |
| 
 | |
| 	loopalign 16
 | |
| .for:
 | |
| ;
 | |
| ; mm6 = c1 | s1
 | |
| ; mm7 = 0x800000000 | 0
 | |
| ;
 | |
| 	pmov	mm1, mm6
 | |
| 	mov	r0, [esp+40]	; fz
 | |
| 	puphdq	mm1, mm1	; c1 | c1
 | |
| 	lea	r1, [r0+r4*2]
 | |
| 	pfadd	mm1, mm1	; c1+c1 | c1+c1
 | |
| 	pfmul	mm1, mm6	; 2*c1*c1 | 2*c1*s1
 | |
| 	pfsub	mm1, [PIC_EBP_REL(D_1_0_0_0)] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
 | |
| 
 | |
| 	pmov	mm0, mm1
 | |
| 	pxor	mm7, mm6	; c1 | -s1
 | |
| 
 | |
| 	pupldq	mm2, mm0
 | |
| 	pupldq	mm3, mm6	; ** | c1
 | |
| 	puphdq	mm0, mm2	; s2 | c2
 | |
| 	puphdq	mm6, mm3	;-s1 | c1
 | |
| 
 | |
| 	pxor	mm0, [PIC_EBP_REL(costab)]	; c2 | -s2
 | |
| 
 | |
| ; mm0 =  s2| c2
 | |
| ; mm1 = -c2| s2
 | |
| ; mm6 =  c1| s1
 | |
| ; mm7 =  s1|-c1 (we use the opposite sign. from GOGO here)
 | |
| 
 | |
| 	pmov	[esp], mm0
 | |
| 	pmov	[esp+8], mm1
 | |
| 
 | |
| 	sub	r1, r5		;r1 = gi
 | |
| 	add	r0, r5		;r0 = fi
 | |
| 
 | |
| 	loopalign 16
 | |
| .do3:
 | |
| 	pmov	mm2, [r0+r4*2] ; fi[k1]
 | |
| 	pmov	mm4, [r1+r4*2] ; gi[k1]
 | |
| 	pmov	mm3, [r0+r2*2] ; fi[k3]
 | |
| 	pmov	mm5, [r1+r2*2] ; gi[k3]
 | |
| 
 | |
| 	pupldq	mm2, mm2	; fi1 | fi1
 | |
| 	pupldq	mm4, mm4	; gi1 | gi1
 | |
| 	pupldq	mm3, mm3	; fi3 | fi3
 | |
| 	pupldq	mm5, mm5	; gi3 | gi3
 | |
| 
 | |
| 	pfmul	mm2, mm0	; s2 * fi1 | c2 * fi1
 | |
| 	pfmul	mm4, mm1	;-c2 * gi1 | s2 * gi1
 | |
| 	pfmul	mm3, mm0	; s2 * fi3 | c2 * fi3
 | |
| 	pfmul	mm5, mm1	;-c2 * gi3 | s2 * gi3
 | |
| 
 | |
| 	pfadd	mm2, mm4		;b | a
 | |
| 	pfadd	mm3, mm5		;d | c
 | |
| 
 | |
| 	pmov	mm0, [r0]
 | |
| 	pmov	mm4, [r1]
 | |
| 	pmov	mm1, [r0+r4*4]
 | |
| 	pmov	mm5, [r1+r4*4]
 | |
| 
 | |
| 	pupldq	mm0, mm4		;gi0 | fi0
 | |
| 	pupldq	mm1, mm5		;gi2 | fi2
 | |
| 
 | |
| 	pmov	mm4, mm2
 | |
| 	pmov	mm5, mm3
 | |
| 
 | |
| 	pfadd	mm2, mm0		;g0 | f0
 | |
| 	pfadd	mm3, mm1		;g2 | f2
 | |
| 
 | |
| 	pfsub	mm0, mm4		;g1 | f1
 | |
| 	pfsub	mm1, mm5		;g3 | f3
 | |
| 
 | |
| 	pmov	mm4, mm3
 | |
| 	pmov	mm5, mm1
 | |
| 
 | |
| 	pupldq	mm4, mm4		;f2 | f2
 | |
| 	puphdq	mm5, mm5		;g3 | g3
 | |
| 	puphdq	mm3, mm3		;g2 | g2
 | |
| 	pupldq	mm1, mm1		;f3 | f3
 | |
| 
 | |
| 	pfmul	mm4, mm6		;f2 * c1 | f2 * s1
 | |
| 	pfmul	mm5, mm7		;g3 * s1 | g3 *-c1
 | |
| 	pfmul	mm3, mm6		;g2 * c1 | g2 * s1
 | |
| 	pfmul	mm1, mm7		;f3 * s1 | f3 *-c1
 | |
| 
 | |
| 	pfadd	mm4, mm5		;a | b
 | |
| 	pfsub	mm3, mm1		;d | c
 | |
| 
 | |
| 	pmov	mm5, mm2
 | |
| 	pmov	mm1, mm0
 | |
| 
 | |
| 	pupldq	mm2, mm2		;f0 | f0
 | |
| 	pupldq	mm0, mm0		;f1 | f1
 | |
| 
 | |
| 	puphdq	mm1, mm2		;f0 | g1
 | |
| 	puphdq	mm5, mm0		;f1 | g0
 | |
| 
 | |
| 	pmov	mm2, mm4
 | |
| 	pmov	mm0, mm3
 | |
| 
 | |
| 	pfadd	mm4, mm1		;fi0 | gi1
 | |
| 	pfadd	mm3, mm5		;fi1 | gi0
 | |
| 	pfsub	mm1, mm2		;fi2 | gi3
 | |
| 	pfsub	mm5, mm0		;fi3 | gi2
 | |
| 
 | |
| 	pmovd	[r1+r4*2], mm4	;gi[k1]
 | |
| 	puphdq	mm4, mm4
 | |
| 	pmovd	[r1], mm3		;gi[0]
 | |
| 	puphdq	mm3, mm3
 | |
| 	pmovd	[r1+r2*2], mm1	;gi[k3]
 | |
| 	puphdq	mm1, mm1
 | |
| 	pmovd	[r1+r4*4], mm5	;gi[k2]
 | |
| 	puphdq	mm5, mm5
 | |
| 
 | |
| 	pmovd	[r0], mm4	;fi[0]
 | |
| 	pmovd	[r0+r4*2], mm3	;fi[k1]
 | |
| 	pmovd	[r0+r4*4], mm1	;fi[k2]
 | |
| 	pmovd	[r0+r2*2], mm5	;fi[k3]
 | |
| 
 | |
| 	lea	r0, [r0+r4*8]
 | |
| 	lea	r1, [r1+r4*8]
 | |
| 	cmp	r0, [esp + 16]
 | |
| 	pmov	mm0, [esp]
 | |
| 	pmov	mm1, [esp+8]
 | |
| 
 | |
| 	jb near	.do3
 | |
| 
 | |
| 	add	r5, 4
 | |
| ; mm6 =  c1| s1
 | |
| ; mm7 =  s1|-c1 (we use the opposite sign. from GOGO here)
 | |
| 	pfmul	mm6, [r3]	; c1*a | s1*a
 | |
| 	pfmul	mm7, [r3+8]	; s1*b |-c1*b
 | |
| 	cmp	r5, r4
 | |
| 
 | |
| 	pfsub	mm6, mm7	; c1*a-s1*b | s1*a+c1*b
 | |
| 	pupldq	mm7,mm6
 | |
| 	puphdq	mm6,mm7
 | |
| 	pmov	mm7, [PIC_EBP_REL(costab)]
 | |
| 	jb near	.for
 | |
| 
 | |
| 	mov	r0, [esp+40]	;fi
 | |
| 	cmp	r4, [esp+40+4]
 | |
| 	lea	r4, [r4*4]	;kx *= 4
 | |
| 
 | |
| 	jb near	.do1
 | |
| .exitttt
 | |
| 	femms
 | |
| 	add	esp,20
 | |
| 	popd	ebp, ebx, esi, edi
 | |
| endproc
 | |
| 
 | |
| 
 | |
| ;void fht_E3DN(float *fz, int nn);
 | |
| 
 | |
| proc	fht_E3DN
 | |
| 
 | |
| 	pushd	ebp, ebx, esi, edi
 | |
| 
 | |
| 	sub	esp, 20
 | |
| 
 | |
| 	call	get_pc.bp
 | |
| 	add	ebp, PIC_BASE()
 | |
| 
 | |
| 	mov	r0, [esp+40]		;fi
 | |
| 	mov	r1, [esp+44]		;r1 = nn
 | |
| 	lea	r3, [PIC_EBP_REL(costab)]		;tri = costab
 | |
| 	lea	r4, [r0+r1*8]		;r4 = fn = &fz[n]
 | |
| 	mov	[esp+16], r4
 | |
| 	mov	r4, 8			;kx = k1/2
 | |
| 
 | |
| 	pmov	mm7, [r3]
 | |
| 
 | |
| 	loopalign 16
 | |
| .do1
 | |
| 	lea	r3, [r3+16]	;tri += 2;
 | |
| 	pmov	mm6, [PIC_EBP_REL(costab+8)]
 | |
| 	lea	r2, [r4+r4*2]		;k3*fsize/2
 | |
| 	mov	r5, 4		;i = 1*fsize
 | |
| 
 | |
| 	loopalign 16
 | |
| .do2:
 | |
| 	lea	r1, [r0+r4]		;gi = fi + kx
 | |
| ;f
 | |
| 	pmov	mm0, [r0]	; X  | fi0
 | |
| 	pmov	mm1, [r0+r4*4]	; X  | fi2
 | |
| 	pupldq	mm0, [r0+r4*2]	;fi1 | fi0
 | |
| 	pupldq	mm1, [r0+r2*2]	;fi3 | fi2
 | |
| 	pfpnacc	mm0, mm0	;fi0+fi1 | fi0-fi1 = f0|f1
 | |
| 	pfpnacc	mm1, mm1	;fi2+fi3 | fi2-fi3 = f2|f3
 | |
| 
 | |
| 	pmov	mm2, mm0
 | |
| 	pfadd	mm0, mm1	;f0+f2|f1+f3 = fi0 | fi1
 | |
| 	pfsub	mm2, mm1	;f0-f2|f1-f3 = fi2 | fi3
 | |
| 
 | |
| 	pmovd	[r0+r4*2], mm0	;fi[k1]
 | |
| 	pmovd	[r0+r2*2], mm2	;fi[k3]
 | |
| 
 | |
| 	puphdq	mm0, mm0
 | |
| 	puphdq	mm2, mm2
 | |
| 	pmovd	[r0], mm0	;fi[0]
 | |
| 	pmovd	[r0+r4*4], mm2	;fi[k2]
 | |
| 
 | |
| 	lea	r0, [r0+r4*8]
 | |
| ;g
 | |
| 	pmov	mm3, [r1]	;    gi0
 | |
| 	pmov	mm4, [r1+r2*2]	;    gi3
 | |
| 	pupldq	mm3, [r1+r4*2]	;gi1|gi0
 | |
| 	pupldq	mm4, [r1+r4*4]	;gi2|gi3
 | |
| 
 | |
| 	pfpnacc	mm3, mm3	;gi0+gi1  |gi0-gi1   = f0|f1
 | |
| 	pfmul	mm4, mm6	;gi2*SQRT2|gi3*SQRT2 = f2|f3
 | |
| 
 | |
| 	pmov	mm5, mm3
 | |
| 	pfadd	mm3, mm4	;f0+f2|f1+f3
 | |
| 	pfsub	mm5, mm4	;f0-f2|f1-f3
 | |
| 
 | |
| 	cmp	r0, [esp + 16]
 | |
| 	pmovd	[r1+r4*2], mm3	;gi[k1]
 | |
| 	pmovd	[r1+r2*2], mm5	;gi[k3]
 | |
| 	puphdq	mm3, mm3
 | |
| 	puphdq	mm5, mm5
 | |
| 	pmovd	[r1], mm3	;gi[0]
 | |
| 	pmovd	[r1+r4*4], mm5	;gi[k2]
 | |
| 
 | |
| 	jb near .do2
 | |
| 
 | |
| 	pmov	mm6, [r3+r5]	; this is not aligned address!!
 | |
| 
 | |
| 	loopalign 16
 | |
| .for:
 | |
| ;
 | |
| ; mm6 = c1 | s1
 | |
| ; mm7 = 0x800000000 | 0
 | |
| ;
 | |
| 	pmov	mm5, mm6
 | |
| 	mov	r0, [esp+40]	; fz
 | |
| 	puphdq	mm5, mm5	; c1 | c1
 | |
| 	lea	r1, [r0+r4*2]
 | |
| 	pfadd	mm5, mm5	; c1+c1 | c1+c1
 | |
| 	pfmul	mm5, mm6	; 2*c1*c1 | 2*c1*s1
 | |
| 	pfsub	mm5, [PIC_EBP_REL(D_1_0_0_0)] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
 | |
| 
 | |
| 	pswapd	mm4, mm5	; s2 |-c2
 | |
| 	pxor	mm4, mm7	; s2 | c2
 | |
| 	pxor	mm7, mm6	; c1 |-s1
 | |
| 	pswapd	mm6, mm6	; s1 | c1
 | |
| 
 | |
| ; mm4 =  s2| c2
 | |
| ; mm5 = -c2| s2
 | |
| ; mm6 =  c1| s1
 | |
| ; mm7 =  s1|-c1 (we use the opposite sign. from GOGO here)
 | |
| 
 | |
| 	pmov	[esp], mm4
 | |
| 	pmov	[esp+8], mm5
 | |
| 
 | |
| 	sub	r1, r5		;r1 = gi
 | |
| 	add	r0, r5		;r0 = fi
 | |
| 
 | |
| 	loopalign 16
 | |
| .do3:
 | |
| 	pmov	mm0, [r0+r2*2] ; fi[k1]
 | |
| 	pmov	mm2, [r1+r2*2] ; gi[k1]
 | |
| 	pmov	mm1, [r0+r4*2] ; fi[k3]
 | |
| 	pmov	mm3, [r1+r4*2] ; gi[k3]
 | |
| 
 | |
| 	pupldq	mm0, mm0
 | |
| 	pupldq	mm2, mm2
 | |
| 	pupldq	mm1, mm1
 | |
| 	pupldq	mm3, mm3
 | |
| 
 | |
| 	pfmul	mm0, mm4
 | |
| 	pfmul	mm2, mm5
 | |
| 	pfmul	mm1, mm4
 | |
| 	pfmul	mm3, mm5
 | |
| 
 | |
| 	pfadd	mm0, mm2		;d | c
 | |
| 	pfadd	mm1, mm3		;b | a
 | |
| 
 | |
| 	pmov	mm2, [r0+r4*4]		;fi2
 | |
| 	pupldq	mm3, [r1+r4*4]		;gi2 | -
 | |
| 	pmov	mm4, [r0]		;fi0
 | |
| 	pupldq	mm5, [r1]		;gi0 | -
 | |
| 
 | |
| 	pupldq	mm2, mm0		;c | fi2
 | |
| 	puphdq	mm3, mm0		;d | gi2
 | |
| 	pupldq	mm4, mm1		;a | fi0
 | |
| 	puphdq	mm5, mm1		;b | gi0
 | |
| 
 | |
| 	pfpnacc	mm2, mm2		;f2 | f3
 | |
| 	pfpnacc	mm3, mm3		;g2 | g3
 | |
| 	pfpnacc	mm4, mm4		;f0 | f1
 | |
| 	pfpnacc	mm5, mm5		;g0 | g1
 | |
| 
 | |
| 	pmov	mm0, mm2
 | |
| 	pmov	mm1, mm3
 | |
| 	pupldq	mm2, mm2		;f3 | f3
 | |
| 	pupldq	mm3, mm3		;g3 | g3
 | |
| 	puphdq	mm0, mm0		;f2 | f2
 | |
| 	puphdq	mm1, mm1		;g2 | g2
 | |
| 
 | |
| 	pswapd	mm4, mm4		;f1 | f0
 | |
| 	pswapd	mm5, mm5		;g1 | g0
 | |
| 
 | |
| 	pfmul	mm0, mm7		;f2 * s1 | f2 *-c1
 | |
| 	pfmul	mm3, mm6		;g3 * c1 | g3 * s1
 | |
| 	pfmul	mm1, mm6		;g2 * c1 | g2 * s1
 | |
| 	pfmul	mm2, mm7		;f3 * s1 | f3 *-c1
 | |
| 
 | |
| 	pfsub	mm0, mm3		; b |-a
 | |
| 	pfsub	mm1, mm2		; d | c
 | |
| 
 | |
| 	pmov	mm2, mm5
 | |
| 	pmov	mm3, mm4
 | |
| 	pupldq	mm4, mm0		;-a | f0
 | |
| 	pupldq	mm5, mm1		; c | g0
 | |
| 	puphdq	mm2, mm0		; b | g1
 | |
| 	puphdq	mm3, mm1		; d | f1
 | |
| 
 | |
| 	pfpnacc	mm4, mm4		;fi2 | fi0
 | |
| 	pfpnacc	mm5, mm5		;gi0 | gi2
 | |
| 	pfpnacc	mm2, mm2		;gi1 | gi3
 | |
| 	pfpnacc	mm3, mm3		;fi1 | fi3
 | |
| 
 | |
| 	pmovd	[r0], mm4		;fi[0]
 | |
| 	pmovd	[r1+r4*4], mm5		;gi[k2]
 | |
| 	pmovd	[r1+r2*2], mm2		;gi[k3]
 | |
| 	pmovd	[r0+r2*2], mm3		;fi[k3]
 | |
| 
 | |
| 	puphdq	mm4, mm4
 | |
| 	puphdq	mm5, mm5
 | |
| 	puphdq	mm2, mm2
 | |
| 	puphdq	mm3, mm3
 | |
| 	pmovd	[r0+r4*4], mm4		;fi[k2]
 | |
| 	pmovd	[r1], mm5		;gi[0]
 | |
| 	pmovd	[r1+r4*2], mm2		;gi[k1]
 | |
| 	pmovd	[r0+r4*2], mm3		;fi[k1]
 | |
| 
 | |
| 	lea	r0, [r0+r4*8]
 | |
| 	lea	r1, [r1+r4*8]
 | |
| 	cmp	r0, [esp + 16]
 | |
| 	pmov	mm4, [esp]
 | |
| 	pmov	mm5, [esp+8]
 | |
| 
 | |
| 	jb near	.do3
 | |
| 
 | |
| 	add	r5, 4
 | |
| ; mm6 =  c1| s1
 | |
| ; mm7 =  s1|-c1 (we use the opposite sign. from GOGO here)
 | |
| 	pfmul	mm6, [r3]	; c1*a | s1*a
 | |
| 	pfmul	mm7, [r3+8]	; s1*b |-c1*b
 | |
| 	cmp	r5, r4
 | |
| 
 | |
| 	pfsub	mm6, mm7	; c1*a-s1*b | s1*a+c1*b
 | |
| 	pswapd	mm6, mm6 ; ???	; s1*a+c1*b | c1*a-s1*b
 | |
| 	pmov	mm7, [PIC_EBP_REL(costab)]
 | |
| 	jb near	.for
 | |
| 
 | |
| 	mov	r0, [esp+40]	;fi
 | |
| 	cmp	r4, [esp+40+4]
 | |
| 	lea	r4, [r4*4]	;kx *= 4
 | |
| 
 | |
| 	jb near	.do1
 | |
| .exitttt
 | |
| 	femms
 | |
| 	add	esp,20
 | |
| 	popd	ebp, ebx, esi, edi
 | |
| endproc
 |