mirror of
https://github.com/cookiengineer/audacity
synced 2025-05-01 08:09:41 +02:00
Using LAME 3.10 Windows project files substantially changed from original, and included into audacity solution.
448 lines
13 KiB
Plaintext
448 lines
13 KiB
Plaintext
; new count bit routine
|
||
; part of this code is origined from
|
||
; new GOGO-no-coda (1999, 2000)
|
||
; Copyright (C) 1999 shigeo
|
||
; modified by Keiichi SAKAI
|
||
|
||
%include "nasm.h"
|
||
|
||
globaldef choose_table_MMX
|
||
globaldef MMX_masking
|
||
|
||
externdef largetbl
|
||
externdef t1l
|
||
externdef table23
|
||
externdef table56
|
||
|
||
segment_data
|
||
align 16
|
||
D14_14_14_14 dd 0x000E000E, 0x000E000E
|
||
D15_15_15_15 dd 0xfff0fff0, 0xfff0fff0
|
||
mul_add dd 0x00010010, 0x00010010
|
||
mul_add23 dd 0x00010003, 0x00010003
|
||
mul_add56 dd 0x00010004, 0x00010004
|
||
tableDEF
|
||
dd 0x00010003,0x01,0x00050005,0x05,0x00070006,0x07,0x00090008,0x08,0x000a0008, 0x09
|
||
dd 0x000a0009,0x0a,0x000b000a,0x0a,0x000b000a,0x0b,0x000c000a,0x0a,0x000c000b, 0x0b
|
||
dd 0x000c000b,0x0c,0x000d000c,0x0c,0x000d000c,0x0d,0x000d000c,0x0d,0x000e000d, 0x0e
|
||
dd 0x000b000e,0x0e,0x00040005,0x04,0x00060005,0x06,0x00080007,0x08,0x00090008, 0x09
|
||
dd 0x000a0009,0x0a,0x000b0009,0x0a,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0b
|
||
dd 0x000c000b,0x0b,0x000c000b,0x0c,0x000d000c,0x0c,0x000e000c,0x0d,0x000d000c, 0x0e
|
||
dd 0x000e000d,0x0e,0x000b000d,0x0e,0x00070006,0x07,0x00080007,0x08,0x00090007, 0x09
|
||
dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
|
||
dd 0x000d000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000c,0x0d,0x000d000c, 0x0d
|
||
dd 0x000e000d,0x0e,0x000e000d,0x0f,0x000c000d,0x0f,0x00090007,0x08,0x00090008, 0x09
|
||
dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
|
||
dd 0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000c,0x0d,0x000e000c, 0x0d
|
||
dd 0x000e000c,0x0d,0x000f000d,0x0e,0x000f000d,0x0f,0x000d000d,0x0f,0x000a0008, 0x09
|
||
dd 0x000a0008,0x09,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
|
||
dd 0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0c,0x000e000b,0x0d,0x000e000c, 0x0d
|
||
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d,0x0f,0x000c000d, 0x10
|
||
dd 0x000a0009,0x0a,0x000a0009,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
|
||
dd 0x000d000a,0x0c,0x000d000b,0x0d,0x000e000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
|
||
dd 0x000e000c,0x0e,0x000f000c,0x0d,0x000f000d,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
|
||
dd 0x000d000e,0x10,0x000b000a,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
|
||
dd 0x000d000a,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
|
||
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
|
||
dd 0x0010000e,0x10,0x000d000e,0x10,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0c
|
||
dd 0x000c000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0e,0x000e000c, 0x0e
|
||
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0f,0x000f000c,0x0f,0x000f000d, 0x0f
|
||
dd 0x0011000d,0x10,0x0011000d,0x12,0x000d000e,0x12,0x000b000a,0x0a,0x000c000a, 0x0a
|
||
dd 0x000c000a,0x0b,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
|
||
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000d, 0x0f
|
||
dd 0x0010000d,0x0f,0x0010000e,0x10,0x0010000e,0x11,0x000d000e,0x11,0x000c000a, 0x0b
|
||
dd 0x000c000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
|
||
dd 0x000e000c,0x0d,0x000f000c,0x0f,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
|
||
dd 0x0010000d,0x10,0x000f000d,0x10,0x0010000e,0x10,0x000f000e,0x12,0x000e000e, 0x11
|
||
dd 0x000c000b,0x0b,0x000d000b,0x0c,0x000c000b,0x0c,0x000d000b,0x0d,0x000e000c, 0x0d
|
||
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0e,0x0010000d, 0x0f
|
||
dd 0x0010000d,0x10,0x0010000d,0x0f,0x0011000d,0x10,0x0011000e,0x11,0x0010000f, 0x12
|
||
dd 0x000d000e,0x13,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b, 0x0d
|
||
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0e,0x0010000c,0x0e,0x0010000d, 0x0f
|
||
dd 0x0010000d,0x0f,0x0010000d,0x0f,0x0010000d,0x10,0x0010000e,0x11,0x000f000e, 0x11
|
||
dd 0x0010000e,0x11,0x000e000f,0x12,0x000d000c,0x0c,0x000e000c,0x0d,0x000e000b, 0x0d
|
||
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0f,0x000f000d,0x0e,0x000f000d, 0x0f
|
||
dd 0x000f000d,0x10,0x0011000d,0x10,0x0010000d,0x11,0x0010000d,0x11,0x0010000e, 0x11
|
||
dd 0x0010000e,0x12,0x0012000f,0x12,0x000e000f,0x12,0x000f000c,0x0d,0x000e000c, 0x0d
|
||
dd 0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
|
||
dd 0x0010000d,0x10,0x0010000d,0x10,0x0012000e,0x10,0x0011000e,0x10,0x0011000e, 0x11
|
||
dd 0x0011000e,0x12,0x0013000e,0x11,0x0011000f,0x12,0x000e000f,0x12,0x000e000d, 0x0e
|
||
dd 0x000f000d,0x0e,0x000d000d,0x0e,0x000e000d,0x0f,0x0010000d,0x0f,0x0010000d, 0x0f
|
||
dd 0x000f000d,0x11,0x0010000d,0x10,0x0010000e,0x10,0x0011000e,0x13,0x0012000e, 0x11
|
||
dd 0x0011000e,0x11,0x0013000f,0x11,0x0011000f,0x13,0x0010000e,0x12,0x000e000f, 0x12
|
||
dd 0x000b000d,0x0d,0x000b000d,0x0e,0x000b000d,0x0f,0x000c000d,0x10,0x000c000d, 0x10
|
||
dd 0x000d000d,0x10,0x000d000d,0x11,0x000d000e,0x10,0x000e000e,0x11,0x000e000e, 0x11
|
||
dd 0x000e000e,0x12,0x000e000e,0x12,0x000e000f,0x15,0x000e000f,0x14,0x000e000f, 0x15
|
||
dd 0x000c000f,0x12
|
||
|
||
tableABC
|
||
dd 0x00020004,0x1,0x00040004,0x4,0x00060006,0x7,0x00080008,0x9,0x00090009,0xa,0x000a000a,0xa
|
||
dd 0x0009000a,0xa,0x000a000a,0xb,0x00000000,0x0,0x00020003,0x1,0x00040004,0x4,0x00070006,0x7
|
||
dd 0x00090007,0x9,0x00090009,0x9,0x000a000a,0xa,0x00000000,0x0,0x00040004,0x4,0x00050005,0x6
|
||
dd 0x00060006,0x8,0x00080007,0x9,0x000a0009,0xa,0x000a0009,0xb,0x0009000a,0xa,0x000a000a,0xa
|
||
dd 0x00000000,0x0,0x00040004,0x4,0x00040005,0x6,0x00060006,0x8,0x000a0007,0x9,0x000a0008,0x9
|
||
dd 0x000a000a,0xa,0x00000000,0x0,0x00060006,0x7,0x00070006,0x8,0x00080007,0x9,0x00090008,0xa
|
||
dd 0x000a0009,0xb,0x000b000a,0xc,0x000a0009,0xb,0x000a000a,0xb,0x00000000,0x0,0x00070005,0x7
|
||
dd 0x00060006,0x7,0x00080007,0x9,0x000a0008,0xa,0x000a0009,0xa,0x000b000a,0xb,0x00000000,0x0
|
||
dd 0x00080007,0x8,0x00080007,0x9,0x00090008,0xa,0x000b0008,0xb,0x000a0009,0xc,0x000c000a,0xc
|
||
dd 0x000a000a,0xb,0x000b000a,0xc,0x00000000,0x0,0x00090007,0x8,0x000a0007,0x9,0x000a0008,0xa
|
||
dd 0x000b0009,0xb,0x000b0009,0xb,0x000c000a,0xb,0x00000000,0x0,0x00090008,0x9,0x000a0008,0xa
|
||
dd 0x000a0009,0xb,0x000b0009,0xc,0x000b000a,0xc,0x000c000a,0xc,0x000b000a,0xc,0x000c000b,0xc
|
||
dd 0x00000000,0x0,0x00090008,0x8,0x00090008,0x9,0x000a0009,0xa,0x000b0009,0xb,0x000c000a,0xb
|
||
dd 0x000c000b,0xc,0x00000000,0x0,0x00090009,0xa,0x000a0009,0xb,0x000b000a,0xc,0x000c000a,0xc
|
||
dd 0x000c000a,0xd,0x000d000b,0xd,0x000c000a,0xc,0x000d000b,0xd,0x00000000,0x0,0x000a0009,0x9
|
||
dd 0x000a0009,0xa,0x000b000a,0xb,0x000b000a,0xc,0x000d000b,0xc,0x000d000b,0xc,0x00000000,0x0
|
||
dd 0x00090009,0x9,0x00090009,0xa,0x00090009,0xb,0x000a000a,0xc,0x000b000a,0xc,0x000c000b,0xc
|
||
dd 0x000c000b,0xd,0x000c000c,0xd,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
|
||
dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x0009000a,0xa,0x0009000a,0xa
|
||
dd 0x000a000a,0xb,0x000b000b,0xc,0x000c000b,0xc,0x000c000b,0xd,0x000c000b,0xd,0x000c000c,0xd
|
||
dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
|
||
dd 0x0,0x00000000, 0x0,0x00000000
|
||
|
||
linbits32
|
||
dd 0x00040004,0x10001,0x00040004,0x20002,0x00040004,0x30003,0x00040004,0x40004
|
||
dd 0x00050005,0x60006,0x00060006,0x60006,0x00070007,0x80008,0x00080008,0x80008
|
||
dd 0x00090009,0xa000a,0x000b000b,0xa000a,0x000b000b,0xd000d,0x000d000d,0xd000d
|
||
dd 0x000d000d,0xd000d
|
||
|
||
|
||
choose_table_H
|
||
dw 0x1810, 0x1811, 0x1812, 0x1813, 0x1914, 0x1a14, 0x1b15, 0x1c15
|
||
dw 0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17
|
||
|
||
choose_jump_table_L:
|
||
dd table_MMX.L_case_0 - choose_table_MMX
|
||
dd table_MMX.L_case_1 - choose_table_MMX
|
||
dd table_MMX.L_case_2 - choose_table_MMX
|
||
dd table_MMX.L_case_3 - choose_table_MMX
|
||
dd table_MMX.L_case_45 - choose_table_MMX
|
||
dd table_MMX.L_case_45 - choose_table_MMX
|
||
dd table_MMX.L_case_67 - choose_table_MMX
|
||
dd table_MMX.L_case_67 - choose_table_MMX
|
||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||
|
||
segment_code
|
||
;
|
||
; use MMX
|
||
;
|
||
|
||
PIC_OFFSETTABLE
|
||
|
||
align 16
|
||
; int choose_table(int *ix, int *end, int *s)
|
||
choose_table_MMX:
|
||
push ebp
|
||
call get_pc.bp
|
||
add ebp, PIC_BASE()
|
||
|
||
mov ecx,[esp+8] ;ecx = begin
|
||
mov edx,[esp+12] ;edx = end
|
||
sub ecx,edx ;ecx = begin-end(should be minus)
|
||
test ecx,8
|
||
pxor mm0,mm0 ;mm0=[0:0]
|
||
movq mm1,[edx+ecx]
|
||
jz .lp
|
||
|
||
add ecx,8
|
||
jz .exit
|
||
|
||
align 4
|
||
.lp:
|
||
movq mm4,[edx+ecx]
|
||
movq mm5,[edx+ecx+8]
|
||
add ecx,16
|
||
psubusw mm4,mm0 ; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B
|
||
psubusw mm5,mm1 ; $B$=$s$J%3%^%s%I$O$J$$(B :-p
|
||
paddw mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B
|
||
paddw mm1,mm5
|
||
jnz .lp
|
||
.exit:
|
||
psubusw mm1,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
|
||
paddw mm0,mm1
|
||
|
||
movq mm4,mm0
|
||
punpckhdq mm4,mm4
|
||
psubusw mm4,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
|
||
paddw mm0,mm4
|
||
movd eax,mm0
|
||
|
||
cmp eax,15
|
||
ja .with_ESC
|
||
lea ecx,[PIC_EBP_REL(choose_table_MMX)]
|
||
add ecx,[PIC_EBP_REL(choose_jump_table_L+eax*4)]
|
||
jmp ecx
|
||
|
||
.with_ESC1:
|
||
emms
|
||
mov ecx, [esp+16] ; *s
|
||
mov [ecx], eax
|
||
or eax,-1
|
||
pop ebp
|
||
ret
|
||
|
||
.with_ESC:
|
||
cmp eax, 8191+15
|
||
ja .with_ESC1
|
||
|
||
sub eax,15
|
||
push ebx
|
||
push esi
|
||
bsr eax, eax
|
||
%assign _P 4*2
|
||
movq mm5, [PIC_EBP_REL(D15_15_15_15)]
|
||
movq mm6, [PIC_EBP_REL(D14_14_14_14)]
|
||
movq mm3, [PIC_EBP_REL(mul_add)]
|
||
|
||
mov ecx, [esp+_P+8] ; = ix
|
||
; mov edx, [esp+_P+12] ; = end
|
||
sub ecx, edx
|
||
|
||
xor esi, esi ; sum = 0
|
||
test ecx, 8
|
||
pxor mm7, mm7 ; linbits_sum, 14$B$r1[$($?$b$N$N?t(B
|
||
jz .H_dual_lp1
|
||
|
||
movq mm0, [edx+ecx]
|
||
add ecx,8
|
||
packssdw mm0,mm7
|
||
movq mm2, mm0
|
||
paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0
|
||
pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B
|
||
psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
|
||
pmaddwd mm0, mm3 ; {0, 0, y, x}*{1, 16, 1, 16}
|
||
movd ebx, mm0
|
||
mov esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
|
||
|
||
jz .H_dual_exit
|
||
|
||
align 4
|
||
.H_dual_lp1:
|
||
movq mm0, [edx+ecx]
|
||
movq mm1, [edx+ecx+8]
|
||
packssdw mm0,mm1
|
||
movq mm2, mm0
|
||
paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0
|
||
pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B
|
||
pmaddwd mm0, mm3 ; {y, x, y, x}*{1, 16, 1, 16}
|
||
movd ebx, mm0
|
||
punpckhdq mm0,mm0
|
||
add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
|
||
movd ebx, mm0
|
||
add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
|
||
add ecx, 16
|
||
psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
|
||
jnz .H_dual_lp1
|
||
|
||
.H_dual_exit:
|
||
pmov mm1,mm7
|
||
punpckhdq mm7,mm7
|
||
paddd mm7,mm1
|
||
punpckldq mm7,mm7
|
||
|
||
pmaddwd mm7, [PIC_EBP_REL(linbits32+eax*8)] ; linbits
|
||
mov ax, [PIC_EBP_REL(choose_table_H+eax*2)]
|
||
|
||
movd ecx, mm7
|
||
punpckhdq mm7,mm7
|
||
movd edx,mm7
|
||
emms
|
||
shl edx, 16
|
||
add ecx, edx
|
||
|
||
add ecx, esi
|
||
|
||
pop esi
|
||
pop ebx
|
||
|
||
mov edx, ecx
|
||
and ecx, 0xffff ; ecx = sum2
|
||
shr edx, 16 ; edx = sum
|
||
|
||
cmp edx, ecx
|
||
jle .chooseE_s1
|
||
mov edx, ecx
|
||
shr eax, 8
|
||
.chooseE_s1:
|
||
mov ecx, [esp+16] ; *s
|
||
and eax, 0xff
|
||
add [ecx], edx
|
||
pop ebp
|
||
ret
|
||
|
||
table_MMX.L_case_0:
|
||
emms
|
||
pop ebp
|
||
ret
|
||
|
||
table_MMX.L_case_1:
|
||
emms
|
||
mov eax, [esp+16] ; *s
|
||
mov ecx, [esp+8] ; *ix
|
||
sub ecx, edx
|
||
push ebx
|
||
.lp:
|
||
mov ebx, [edx+ecx]
|
||
add ebx, ebx
|
||
add ebx, [edx+ecx+4]
|
||
movzx ebx, byte [PIC_EBP_REL(ebx+t1l)]
|
||
add [eax], ebx
|
||
add ecx, 8
|
||
jnz .lp
|
||
pop ebx
|
||
mov eax, 1
|
||
pop ebp
|
||
ret
|
||
|
||
table_MMX.L_case_45:
|
||
push dword 7
|
||
lea ecx, [PIC_EBP_REL(tableABC+9*8)]
|
||
jmp from3
|
||
|
||
table_MMX.L_case_67:
|
||
push dword 10
|
||
lea ecx, [PIC_EBP_REL(tableABC)]
|
||
jmp from3
|
||
|
||
table_MMX.L_case_8_15:
|
||
push dword 13
|
||
lea ecx, [PIC_EBP_REL(tableDEF)]
|
||
from3:
|
||
mov eax,[esp+12] ;eax = *begin
|
||
; mov edx,[esp+16] ;edx = *end
|
||
|
||
push ebx
|
||
sub eax, edx
|
||
|
||
movq mm5,[PIC_EBP_REL(mul_add)]
|
||
pxor mm2,mm2 ;mm2 = sum
|
||
|
||
test eax, 8
|
||
jz .choose3_lp1
|
||
; odd length
|
||
movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1]
|
||
add eax,8
|
||
packssdw mm0,mm2
|
||
|
||
pmaddwd mm0,mm5
|
||
movd ebx,mm0
|
||
|
||
movq mm2, [ecx+ebx*8]
|
||
|
||
jz .choose3_exit
|
||
|
||
align 4
|
||
.choose3_lp1
|
||
movq mm0,[edx+eax]
|
||
movq mm1,[edx+eax+8]
|
||
add eax,16
|
||
packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
|
||
pmaddwd mm0,mm5
|
||
movd ebx,mm0
|
||
punpckhdq mm0,mm0
|
||
paddd mm2, [ecx+ebx*8]
|
||
movd ebx,mm0
|
||
paddd mm2, [ecx+ebx*8]
|
||
jnz .choose3_lp1
|
||
.choose3_exit
|
||
; xor eax,eax
|
||
movd ebx, mm2
|
||
punpckhdq mm2,mm2
|
||
mov ecx, ebx
|
||
and ecx, 0xffff ; ecx = sum2
|
||
shr ebx, 16 ; ebx = sum1
|
||
movd edx, mm2 ; edx = sum
|
||
|
||
cmp edx, ebx
|
||
jle .choose3_s1
|
||
mov edx, ebx
|
||
inc eax
|
||
.choose3_s1:
|
||
emms
|
||
pop ebx
|
||
cmp edx, ecx
|
||
jle .choose3_s2
|
||
mov edx, ecx
|
||
mov eax, 2
|
||
.choose3_s2:
|
||
pop ecx
|
||
add eax, ecx
|
||
mov ecx, [esp+16] ; *s
|
||
add [ecx], edx
|
||
pop ebp
|
||
ret
|
||
|
||
table_MMX.L_case_2:
|
||
push dword 2
|
||
lea ecx,[PIC_EBP_REL(table23)]
|
||
pmov mm5,[PIC_EBP_REL(mul_add23)]
|
||
jmp from2
|
||
table_MMX.L_case_3:
|
||
push dword 5
|
||
lea ecx,[PIC_EBP_REL(table56)]
|
||
pmov mm5,[PIC_EBP_REL(mul_add56)]
|
||
from2:
|
||
mov eax,[esp+12] ;eax = *begin
|
||
; mov edx,[esp+16] ;edx = *end
|
||
push ebx
|
||
push edi
|
||
|
||
sub eax, edx
|
||
xor edi, edi
|
||
test eax, 8
|
||
jz .choose2_lp1
|
||
; odd length
|
||
movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1]
|
||
pxor mm2,mm2 ;mm2 = sum
|
||
packssdw mm0,mm2
|
||
|
||
pmaddwd mm0,mm5
|
||
movd ebx,mm0
|
||
|
||
mov edi, [ecx+ebx*4]
|
||
|
||
add eax,8
|
||
jz .choose2_exit
|
||
|
||
align 4
|
||
.choose2_lp1
|
||
movq mm0,[edx+eax]
|
||
movq mm1,[edx+eax+8]
|
||
packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
|
||
pmaddwd mm0,mm5
|
||
movd ebx,mm0
|
||
punpckhdq mm0,mm0
|
||
add edi, [ecx+ebx*4]
|
||
movd ebx, mm0
|
||
add edi, [ecx+ebx*4]
|
||
add eax,16
|
||
jnc .choose2_lp1
|
||
.choose2_exit
|
||
mov ecx, edi
|
||
pop edi
|
||
pop ebx
|
||
pop eax ; table num.
|
||
emms
|
||
|
||
mov edx, ecx
|
||
and ecx, 0xffff ; ecx = sum2
|
||
shr edx, 16 ; edx = sum1
|
||
|
||
cmp edx, ecx
|
||
jle .choose2_s1
|
||
mov edx, ecx
|
||
inc eax
|
||
.choose2_s1:
|
||
mov ecx, [esp+16] ; *s
|
||
add [ecx], edx
|
||
pop ebp
|
||
ret
|
||
|
||
end
|