mirror of
				https://github.com/cookiengineer/audacity
				synced 2025-10-26 15:23:48 +01:00 
			
		
		
		
	Using LAME 3.10 Windows project files substantially changed from original, and included into audacity solution.
		
			
				
	
	
		
			448 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			448 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| ; new count bit routine
 | ||
| ;	part of this code is origined from
 | ||
| ;	new GOGO-no-coda (1999, 2000)
 | ||
| ;	Copyright (C) 1999 shigeo
 | ||
| ;	modified by Keiichi SAKAI
 | ||
| 
 | ||
| %include "nasm.h"
 | ||
| 
 | ||
| 	globaldef	choose_table_MMX
 | ||
| 	globaldef	MMX_masking
 | ||
| 
 | ||
| 	externdef	largetbl
 | ||
| 	externdef	t1l
 | ||
| 	externdef	table23
 | ||
| 	externdef	table56
 | ||
| 
 | ||
| 	segment_data
 | ||
| 	align	16
 | ||
| D14_14_14_14	dd	0x000E000E, 0x000E000E
 | ||
| D15_15_15_15	dd	0xfff0fff0, 0xfff0fff0
 | ||
| mul_add		dd	0x00010010, 0x00010010
 | ||
| mul_add23	dd	0x00010003, 0x00010003
 | ||
| mul_add56	dd	0x00010004, 0x00010004
 | ||
| tableDEF
 | ||
| 	dd	0x00010003,0x01,0x00050005,0x05,0x00070006,0x07,0x00090008,0x08,0x000a0008, 0x09
 | ||
| 	dd	0x000a0009,0x0a,0x000b000a,0x0a,0x000b000a,0x0b,0x000c000a,0x0a,0x000c000b, 0x0b
 | ||
| 	dd	0x000c000b,0x0c,0x000d000c,0x0c,0x000d000c,0x0d,0x000d000c,0x0d,0x000e000d, 0x0e
 | ||
| 	dd	0x000b000e,0x0e,0x00040005,0x04,0x00060005,0x06,0x00080007,0x08,0x00090008, 0x09
 | ||
| 	dd	0x000a0009,0x0a,0x000b0009,0x0a,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0b
 | ||
| 	dd	0x000c000b,0x0b,0x000c000b,0x0c,0x000d000c,0x0c,0x000e000c,0x0d,0x000d000c, 0x0e
 | ||
| 	dd	0x000e000d,0x0e,0x000b000d,0x0e,0x00070006,0x07,0x00080007,0x08,0x00090007, 0x09
 | ||
| 	dd	0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
 | ||
| 	dd	0x000d000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000c,0x0d,0x000d000c, 0x0d
 | ||
| 	dd	0x000e000d,0x0e,0x000e000d,0x0f,0x000c000d,0x0f,0x00090007,0x08,0x00090008, 0x09
 | ||
| 	dd	0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
 | ||
| 	dd	0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000c,0x0d,0x000e000c, 0x0d
 | ||
| 	dd	0x000e000c,0x0d,0x000f000d,0x0e,0x000f000d,0x0f,0x000d000d,0x0f,0x000a0008, 0x09
 | ||
| 	dd	0x000a0008,0x09,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
 | ||
| 	dd	0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0c,0x000e000b,0x0d,0x000e000c, 0x0d
 | ||
| 	dd	0x000e000c,0x0e,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d,0x0f,0x000c000d, 0x10
 | ||
| 	dd	0x000a0009,0x0a,0x000a0009,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
 | ||
| 	dd	0x000d000a,0x0c,0x000d000b,0x0d,0x000e000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
 | ||
| 	dd	0x000e000c,0x0e,0x000f000c,0x0d,0x000f000d,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
 | ||
| 	dd	0x000d000e,0x10,0x000b000a,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
 | ||
| 	dd	0x000d000a,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
 | ||
| 	dd	0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
 | ||
| 	dd	0x0010000e,0x10,0x000d000e,0x10,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0c
 | ||
| 	dd	0x000c000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0e,0x000e000c, 0x0e
 | ||
| 	dd	0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0f,0x000f000c,0x0f,0x000f000d, 0x0f
 | ||
| 	dd	0x0011000d,0x10,0x0011000d,0x12,0x000d000e,0x12,0x000b000a,0x0a,0x000c000a, 0x0a
 | ||
| 	dd	0x000c000a,0x0b,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
 | ||
| 	dd	0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000d, 0x0f
 | ||
| 	dd	0x0010000d,0x0f,0x0010000e,0x10,0x0010000e,0x11,0x000d000e,0x11,0x000c000a, 0x0b
 | ||
| 	dd	0x000c000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
 | ||
| 	dd	0x000e000c,0x0d,0x000f000c,0x0f,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
 | ||
| 	dd	0x0010000d,0x10,0x000f000d,0x10,0x0010000e,0x10,0x000f000e,0x12,0x000e000e, 0x11
 | ||
| 	dd	0x000c000b,0x0b,0x000d000b,0x0c,0x000c000b,0x0c,0x000d000b,0x0d,0x000e000c, 0x0d
 | ||
| 	dd	0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0e,0x0010000d, 0x0f
 | ||
| 	dd	0x0010000d,0x10,0x0010000d,0x0f,0x0011000d,0x10,0x0011000e,0x11,0x0010000f, 0x12
 | ||
| 	dd	0x000d000e,0x13,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b, 0x0d
 | ||
| 	dd	0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0e,0x0010000c,0x0e,0x0010000d, 0x0f
 | ||
| 	dd	0x0010000d,0x0f,0x0010000d,0x0f,0x0010000d,0x10,0x0010000e,0x11,0x000f000e, 0x11
 | ||
| 	dd	0x0010000e,0x11,0x000e000f,0x12,0x000d000c,0x0c,0x000e000c,0x0d,0x000e000b, 0x0d
 | ||
| 	dd	0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0f,0x000f000d,0x0e,0x000f000d, 0x0f
 | ||
| 	dd	0x000f000d,0x10,0x0011000d,0x10,0x0010000d,0x11,0x0010000d,0x11,0x0010000e, 0x11
 | ||
| 	dd	0x0010000e,0x12,0x0012000f,0x12,0x000e000f,0x12,0x000f000c,0x0d,0x000e000c, 0x0d
 | ||
| 	dd	0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
 | ||
| 	dd	0x0010000d,0x10,0x0010000d,0x10,0x0012000e,0x10,0x0011000e,0x10,0x0011000e, 0x11
 | ||
| 	dd	0x0011000e,0x12,0x0013000e,0x11,0x0011000f,0x12,0x000e000f,0x12,0x000e000d, 0x0e
 | ||
| 	dd	0x000f000d,0x0e,0x000d000d,0x0e,0x000e000d,0x0f,0x0010000d,0x0f,0x0010000d, 0x0f
 | ||
| 	dd	0x000f000d,0x11,0x0010000d,0x10,0x0010000e,0x10,0x0011000e,0x13,0x0012000e, 0x11
 | ||
| 	dd	0x0011000e,0x11,0x0013000f,0x11,0x0011000f,0x13,0x0010000e,0x12,0x000e000f, 0x12
 | ||
| 	dd	0x000b000d,0x0d,0x000b000d,0x0e,0x000b000d,0x0f,0x000c000d,0x10,0x000c000d, 0x10
 | ||
| 	dd	0x000d000d,0x10,0x000d000d,0x11,0x000d000e,0x10,0x000e000e,0x11,0x000e000e, 0x11
 | ||
| 	dd	0x000e000e,0x12,0x000e000e,0x12,0x000e000f,0x15,0x000e000f,0x14,0x000e000f, 0x15
 | ||
| 	dd	0x000c000f,0x12
 | ||
| 
 | ||
| tableABC
 | ||
| 	dd	0x00020004,0x1,0x00040004,0x4,0x00060006,0x7,0x00080008,0x9,0x00090009,0xa,0x000a000a,0xa
 | ||
| 	dd	0x0009000a,0xa,0x000a000a,0xb,0x00000000,0x0,0x00020003,0x1,0x00040004,0x4,0x00070006,0x7
 | ||
| 	dd	0x00090007,0x9,0x00090009,0x9,0x000a000a,0xa,0x00000000,0x0,0x00040004,0x4,0x00050005,0x6
 | ||
| 	dd	0x00060006,0x8,0x00080007,0x9,0x000a0009,0xa,0x000a0009,0xb,0x0009000a,0xa,0x000a000a,0xa
 | ||
| 	dd	0x00000000,0x0,0x00040004,0x4,0x00040005,0x6,0x00060006,0x8,0x000a0007,0x9,0x000a0008,0x9
 | ||
| 	dd	0x000a000a,0xa,0x00000000,0x0,0x00060006,0x7,0x00070006,0x8,0x00080007,0x9,0x00090008,0xa
 | ||
| 	dd	0x000a0009,0xb,0x000b000a,0xc,0x000a0009,0xb,0x000a000a,0xb,0x00000000,0x0,0x00070005,0x7
 | ||
| 	dd	0x00060006,0x7,0x00080007,0x9,0x000a0008,0xa,0x000a0009,0xa,0x000b000a,0xb,0x00000000,0x0
 | ||
| 	dd	0x00080007,0x8,0x00080007,0x9,0x00090008,0xa,0x000b0008,0xb,0x000a0009,0xc,0x000c000a,0xc
 | ||
| 	dd	0x000a000a,0xb,0x000b000a,0xc,0x00000000,0x0,0x00090007,0x8,0x000a0007,0x9,0x000a0008,0xa
 | ||
| 	dd	0x000b0009,0xb,0x000b0009,0xb,0x000c000a,0xb,0x00000000,0x0,0x00090008,0x9,0x000a0008,0xa
 | ||
| 	dd	0x000a0009,0xb,0x000b0009,0xc,0x000b000a,0xc,0x000c000a,0xc,0x000b000a,0xc,0x000c000b,0xc
 | ||
| 	dd	0x00000000,0x0,0x00090008,0x8,0x00090008,0x9,0x000a0009,0xa,0x000b0009,0xb,0x000c000a,0xb
 | ||
| 	dd	0x000c000b,0xc,0x00000000,0x0,0x00090009,0xa,0x000a0009,0xb,0x000b000a,0xc,0x000c000a,0xc
 | ||
| 	dd	0x000c000a,0xd,0x000d000b,0xd,0x000c000a,0xc,0x000d000b,0xd,0x00000000,0x0,0x000a0009,0x9
 | ||
| 	dd	0x000a0009,0xa,0x000b000a,0xb,0x000b000a,0xc,0x000d000b,0xc,0x000d000b,0xc,0x00000000,0x0
 | ||
| 	dd	0x00090009,0x9,0x00090009,0xa,0x00090009,0xb,0x000a000a,0xc,0x000b000a,0xc,0x000c000b,0xc
 | ||
| 	dd	0x000c000b,0xd,0x000c000c,0xd,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
 | ||
| 	dd	0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x0009000a,0xa,0x0009000a,0xa
 | ||
| 	dd	0x000a000a,0xb,0x000b000b,0xc,0x000c000b,0xc,0x000c000b,0xd,0x000c000b,0xd,0x000c000c,0xd
 | ||
| 	dd	0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
 | ||
| 	dd	0x0,0x00000000, 0x0,0x00000000
 | ||
| 
 | ||
| linbits32
 | ||
| 	dd	0x00040004,0x10001,0x00040004,0x20002,0x00040004,0x30003,0x00040004,0x40004
 | ||
| 	dd	0x00050005,0x60006,0x00060006,0x60006,0x00070007,0x80008,0x00080008,0x80008
 | ||
| 	dd	0x00090009,0xa000a,0x000b000b,0xa000a,0x000b000b,0xd000d,0x000d000d,0xd000d
 | ||
| 	dd	0x000d000d,0xd000d
 | ||
| 
 | ||
| 
 | ||
| choose_table_H
 | ||
| 	dw	0x1810, 0x1811, 0x1812, 0x1813, 0x1914, 0x1a14, 0x1b15, 0x1c15
 | ||
| 	dw	0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17
 | ||
| 
 | ||
| choose_jump_table_L:
 | ||
| 	dd	table_MMX.L_case_0    - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_1    - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_2    - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_3    - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_45   - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_45   - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_67   - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_67   - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_8_15 - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_8_15 - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_8_15 - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_8_15 - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_8_15 - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_8_15 - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_8_15 - choose_table_MMX
 | ||
| 	dd	table_MMX.L_case_8_15 - choose_table_MMX
 | ||
| 
 | ||
| 	segment_code
 | ||
| ;
 | ||
| ; use MMX
 | ||
| ;
 | ||
| 
 | ||
| PIC_OFFSETTABLE
 | ||
| 
 | ||
| 	align	16
 | ||
| ; int choose_table(int *ix, int *end, int *s)
 | ||
| choose_table_MMX:
 | ||
| 	push	ebp
 | ||
| 	call	get_pc.bp
 | ||
| 	add	ebp, PIC_BASE()
 | ||
| 
 | ||
| 	mov	ecx,[esp+8]	;ecx = begin
 | ||
| 	mov	edx,[esp+12]	;edx = end
 | ||
| 	sub	ecx,edx		;ecx = begin-end(should be minus)
 | ||
| 	test	ecx,8
 | ||
|  	pxor	mm0,mm0		;mm0=[0:0]
 | ||
| 	movq	mm1,[edx+ecx]
 | ||
| 	jz	.lp
 | ||
| 
 | ||
| 	add	ecx,8
 | ||
| 	jz	.exit
 | ||
| 
 | ||
| 	align	4
 | ||
| .lp:
 | ||
| 	movq	mm4,[edx+ecx]
 | ||
| 	movq	mm5,[edx+ecx+8]
 | ||
| 	add	ecx,16
 | ||
| 	psubusw	mm4,mm0	; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B
 | ||
| 	psubusw	mm5,mm1	; $B$=$s$J%3%^%s%I$O$J$$(B :-p
 | ||
| 	paddw	mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B
 | ||
| 	paddw	mm1,mm5
 | ||
| 	jnz	.lp
 | ||
| .exit:
 | ||
| 	psubusw	mm1,mm0	; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
 | ||
| 	paddw	mm0,mm1
 | ||
| 
 | ||
| 	movq	mm4,mm0
 | ||
| 	punpckhdq	mm4,mm4
 | ||
| 	psubusw	mm4,mm0	; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
 | ||
| 	paddw	mm0,mm4
 | ||
| 	movd	eax,mm0
 | ||
| 
 | ||
| 	cmp	eax,15
 | ||
| 	ja	.with_ESC
 | ||
| 	lea	ecx,[PIC_EBP_REL(choose_table_MMX)]
 | ||
| 	add	ecx,[PIC_EBP_REL(choose_jump_table_L+eax*4)]
 | ||
| 	jmp 	ecx
 | ||
| 
 | ||
| .with_ESC1:
 | ||
| 	emms
 | ||
| 	mov	ecx, [esp+16]	; *s
 | ||
| 	mov	[ecx], eax
 | ||
| 	or	eax,-1
 | ||
| 	pop	ebp
 | ||
| 	ret
 | ||
| 
 | ||
| .with_ESC:
 | ||
| 	cmp	eax, 8191+15
 | ||
| 	ja	.with_ESC1
 | ||
| 
 | ||
| 	sub	eax,15
 | ||
| 	push	ebx
 | ||
| 	push	esi
 | ||
| 	bsr	eax, eax
 | ||
| %assign _P 4*2
 | ||
| 	movq    mm5, [PIC_EBP_REL(D15_15_15_15)]
 | ||
| 	movq	mm6, [PIC_EBP_REL(D14_14_14_14)]
 | ||
| 	movq	mm3, [PIC_EBP_REL(mul_add)]
 | ||
| 
 | ||
| 	mov	ecx, [esp+_P+8]		; = ix
 | ||
| ;	mov	edx, [esp+_P+12]	; = end
 | ||
| 	sub	ecx, edx
 | ||
| 
 | ||
| 	xor	esi, esi	; sum = 0
 | ||
| 	test    ecx, 8
 | ||
| 	pxor	mm7, mm7	; linbits_sum, 14$B$r1[$($?$b$N$N?t(B
 | ||
| 	jz	.H_dual_lp1
 | ||
| 
 | ||
| 	movq	mm0, [edx+ecx]
 | ||
| 	add	ecx,8
 | ||
| 	packssdw	mm0,mm7
 | ||
| 	movq	mm2, mm0
 | ||
| 	paddusw	mm0, mm5	; mm0 = min(ix, 15)+0xfff0
 | ||
| 	pcmpgtw	mm2, mm6	; 14$B$h$jBg$-$$$+!)(B
 | ||
| 	psubw	mm7, mm2	; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
 | ||
| 	pmaddwd	mm0, mm3	; {0, 0, y, x}*{1, 16, 1, 16}
 | ||
| 	movd	ebx, mm0
 | ||
| 	mov	esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
 | ||
| 
 | ||
| 	jz	.H_dual_exit
 | ||
| 
 | ||
| 	align   4
 | ||
| .H_dual_lp1:
 | ||
| 	movq	mm0, [edx+ecx]
 | ||
| 	movq	mm1, [edx+ecx+8]
 | ||
| 	packssdw	mm0,mm1
 | ||
| 	movq	mm2, mm0
 | ||
| 	paddusw	mm0, mm5	; mm0 = min(ix, 15)+0xfff0
 | ||
| 	pcmpgtw	mm2, mm6	; 14$B$h$jBg$-$$$+!)(B
 | ||
| 	pmaddwd	mm0, mm3	; {y, x, y, x}*{1, 16, 1, 16}
 | ||
| 	movd	ebx, mm0
 | ||
| 	punpckhdq	mm0,mm0
 | ||
| 	add	esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
 | ||
| 	movd	ebx, mm0
 | ||
| 	add	esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
 | ||
| 	add	ecx, 16
 | ||
| 	psubw	mm7, mm2	; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
 | ||
| 	jnz	.H_dual_lp1
 | ||
| 
 | ||
| .H_dual_exit:
 | ||
| 	pmov	mm1,mm7
 | ||
| 	punpckhdq	mm7,mm7
 | ||
| 	paddd	mm7,mm1
 | ||
| 	punpckldq	mm7,mm7
 | ||
| 
 | ||
| 	pmaddwd	mm7, [PIC_EBP_REL(linbits32+eax*8)]	; linbits
 | ||
| 	mov	ax, [PIC_EBP_REL(choose_table_H+eax*2)]
 | ||
| 
 | ||
| 	movd	ecx, mm7
 | ||
| 	punpckhdq	mm7,mm7
 | ||
| 	movd	edx,mm7
 | ||
| 	emms
 | ||
| 	shl	edx, 16
 | ||
| 	add	ecx, edx
 | ||
| 
 | ||
| 	add	ecx, esi
 | ||
| 
 | ||
| 	pop	esi
 | ||
| 	pop	ebx
 | ||
| 
 | ||
| 	mov	edx, ecx
 | ||
| 	and	ecx, 0xffff	; ecx = sum2
 | ||
| 	shr	edx, 16	; edx = sum
 | ||
| 
 | ||
| 	cmp	edx, ecx
 | ||
| 	jle	.chooseE_s1
 | ||
| 	mov	edx, ecx
 | ||
| 	shr	eax, 8
 | ||
| .chooseE_s1:
 | ||
| 	mov	ecx, [esp+16] ; *s
 | ||
| 	and	eax, 0xff
 | ||
| 	add	[ecx], edx
 | ||
| 	pop	ebp
 | ||
| 	ret
 | ||
| 
 | ||
| table_MMX.L_case_0:
 | ||
| 	emms
 | ||
| 	pop	ebp
 | ||
| 	ret
 | ||
| 
 | ||
| table_MMX.L_case_1:
 | ||
| 	emms
 | ||
| 	mov	eax, [esp+16] ; *s
 | ||
| 	mov	ecx, [esp+8] ; *ix
 | ||
| 	sub	ecx, edx
 | ||
| 	push	ebx
 | ||
| .lp:
 | ||
| 	mov	ebx, [edx+ecx]
 | ||
| 	add	ebx, ebx
 | ||
| 	add	ebx, [edx+ecx+4]
 | ||
| 	movzx	ebx, byte [PIC_EBP_REL(ebx+t1l)]
 | ||
| 	add	[eax], ebx
 | ||
| 	add	ecx, 8
 | ||
| 	jnz	.lp
 | ||
| 	pop	ebx
 | ||
| 	mov	eax, 1
 | ||
| 	pop	ebp
 | ||
| 	ret
 | ||
| 
 | ||
| table_MMX.L_case_45:
 | ||
| 	push	dword 7
 | ||
| 	lea	ecx, [PIC_EBP_REL(tableABC+9*8)]
 | ||
| 	jmp	from3
 | ||
| 
 | ||
| table_MMX.L_case_67:
 | ||
| 	push	dword 10
 | ||
| 	lea	ecx, [PIC_EBP_REL(tableABC)]
 | ||
| 	jmp	from3
 | ||
| 
 | ||
| table_MMX.L_case_8_15:
 | ||
| 	push	dword 13
 | ||
| 	lea	ecx, [PIC_EBP_REL(tableDEF)]
 | ||
| from3:
 | ||
| 	mov	eax,[esp+12]	;eax = *begin
 | ||
| ;	mov	edx,[esp+16]	;edx = *end
 | ||
| 
 | ||
| 	push	ebx
 | ||
| 	sub	eax, edx
 | ||
| 
 | ||
| 	movq	mm5,[PIC_EBP_REL(mul_add)]
 | ||
| 	pxor	mm2,mm2	;mm2 = sum
 | ||
| 
 | ||
| 	test	eax, 8
 | ||
| 	jz	.choose3_lp1
 | ||
| ; odd length
 | ||
| 	movq	mm0,[edx+eax]	;mm0 = ix[0] | ix[1]
 | ||
| 	add	eax,8
 | ||
| 	packssdw	mm0,mm2
 | ||
| 
 | ||
| 	pmaddwd	mm0,mm5
 | ||
| 	movd	ebx,mm0
 | ||
| 
 | ||
| 	movq	mm2,  [ecx+ebx*8]
 | ||
| 
 | ||
| 	jz	.choose3_exit
 | ||
| 
 | ||
| 	align	4
 | ||
| .choose3_lp1
 | ||
| 	movq	mm0,[edx+eax]
 | ||
| 	movq	mm1,[edx+eax+8]
 | ||
| 	add	eax,16
 | ||
| 	packssdw	mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
 | ||
| 	pmaddwd	mm0,mm5
 | ||
| 	movd	ebx,mm0
 | ||
| 	punpckhdq	mm0,mm0
 | ||
| 	paddd	mm2, [ecx+ebx*8]
 | ||
| 	movd	ebx,mm0
 | ||
| 	paddd	mm2, [ecx+ebx*8]
 | ||
| 	jnz	.choose3_lp1
 | ||
| .choose3_exit
 | ||
| ;	xor	eax,eax
 | ||
| 	movd	ebx, mm2
 | ||
| 	punpckhdq	mm2,mm2
 | ||
| 	mov	ecx, ebx
 | ||
| 	and	ecx, 0xffff	; ecx = sum2
 | ||
| 	shr	ebx, 16	; ebx = sum1
 | ||
| 	movd	edx, mm2	; edx = sum
 | ||
| 
 | ||
| 	cmp	edx, ebx
 | ||
| 	jle	.choose3_s1
 | ||
| 	mov	edx, ebx
 | ||
| 	inc	eax
 | ||
| .choose3_s1:
 | ||
| 	emms
 | ||
| 	pop	ebx
 | ||
| 	cmp	edx, ecx
 | ||
| 	jle	.choose3_s2
 | ||
| 	mov	edx, ecx
 | ||
| 	mov	eax, 2
 | ||
| .choose3_s2:
 | ||
| 	pop	ecx
 | ||
| 	add	eax, ecx
 | ||
| 	mov	ecx, [esp+16] ; *s
 | ||
| 	add	[ecx], edx
 | ||
| 	pop	ebp
 | ||
| 	ret
 | ||
| 
 | ||
| table_MMX.L_case_2:
 | ||
| 	push	dword 2
 | ||
| 	lea	ecx,[PIC_EBP_REL(table23)]
 | ||
| 	pmov	mm5,[PIC_EBP_REL(mul_add23)]
 | ||
| 	jmp	from2
 | ||
| table_MMX.L_case_3:
 | ||
| 	push	dword 5
 | ||
| 	lea	ecx,[PIC_EBP_REL(table56)]
 | ||
| 	pmov	mm5,[PIC_EBP_REL(mul_add56)]
 | ||
| from2:
 | ||
| 	mov	eax,[esp+12]	;eax = *begin
 | ||
| ;	mov	edx,[esp+16]	;edx = *end
 | ||
| 	push	ebx
 | ||
| 	push	edi
 | ||
| 
 | ||
| 	sub	eax, edx
 | ||
| 	xor	edi, edi
 | ||
| 	test	eax, 8
 | ||
| 	jz	.choose2_lp1
 | ||
| ; odd length
 | ||
| 	movq	mm0,[edx+eax]	;mm0 = ix[0] | ix[1]
 | ||
| 	pxor	mm2,mm2		;mm2 = sum
 | ||
| 	packssdw	mm0,mm2
 | ||
| 
 | ||
| 	pmaddwd	mm0,mm5
 | ||
| 	movd	ebx,mm0
 | ||
| 
 | ||
| 	mov	edi,  [ecx+ebx*4]
 | ||
| 
 | ||
| 	add	eax,8
 | ||
| 	jz	.choose2_exit
 | ||
| 
 | ||
| 	align	4
 | ||
| .choose2_lp1
 | ||
| 	movq	mm0,[edx+eax]
 | ||
| 	movq	mm1,[edx+eax+8]
 | ||
| 	packssdw	mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
 | ||
| 	pmaddwd	mm0,mm5
 | ||
| 	movd	ebx,mm0
 | ||
| 	punpckhdq	mm0,mm0
 | ||
| 	add	edi, [ecx+ebx*4]
 | ||
| 	movd	ebx, mm0
 | ||
| 	add	edi, [ecx+ebx*4]
 | ||
| 	add	eax,16
 | ||
| 	jnc	.choose2_lp1
 | ||
| .choose2_exit
 | ||
| 	mov	ecx, edi
 | ||
| 	pop	edi
 | ||
| 	pop	ebx
 | ||
| 	pop	eax ; table num.
 | ||
| 	emms
 | ||
| 
 | ||
| 	mov	edx, ecx
 | ||
| 	and	ecx, 0xffff	; ecx = sum2
 | ||
| 	shr	edx, 16	; edx = sum1
 | ||
| 
 | ||
| 	cmp	edx, ecx
 | ||
| 	jle	.choose2_s1
 | ||
| 	mov	edx, ecx
 | ||
| 	inc	eax
 | ||
| .choose2_s1:
 | ||
| 	mov	ecx, [esp+16] ; *s
 | ||
| 	add	[ecx], edx
 | ||
| 	pop	ebp
 | ||
| 	ret
 | ||
| 
 | ||
| 	end
 |