mirror of
https://github.com/cookiengineer/audacity
synced 2025-10-18 16:41:14 +02:00
Start on built-in LAME
Using LAME 3.10 Windows project files substantially changed from original, and included into audacity solution.
This commit is contained in:
447
lib-src/libmp3lame/i386/choose_table.nas
Normal file
447
lib-src/libmp3lame/i386/choose_table.nas
Normal file
@@ -0,0 +1,447 @@
|
||||
; new count bit routine
|
||||
; part of this code is origined from
|
||||
; new GOGO-no-coda (1999, 2000)
|
||||
; Copyright (C) 1999 shigeo
|
||||
; modified by Keiichi SAKAI
|
||||
|
||||
%include "nasm.h"
|
||||
|
||||
globaldef choose_table_MMX
|
||||
globaldef MMX_masking
|
||||
|
||||
externdef largetbl
|
||||
externdef t1l
|
||||
externdef table23
|
||||
externdef table56
|
||||
|
||||
segment_data
|
||||
align 16
|
||||
D14_14_14_14 dd 0x000E000E, 0x000E000E
|
||||
D15_15_15_15 dd 0xfff0fff0, 0xfff0fff0
|
||||
mul_add dd 0x00010010, 0x00010010
|
||||
mul_add23 dd 0x00010003, 0x00010003
|
||||
mul_add56 dd 0x00010004, 0x00010004
|
||||
tableDEF
|
||||
dd 0x00010003,0x01,0x00050005,0x05,0x00070006,0x07,0x00090008,0x08,0x000a0008, 0x09
|
||||
dd 0x000a0009,0x0a,0x000b000a,0x0a,0x000b000a,0x0b,0x000c000a,0x0a,0x000c000b, 0x0b
|
||||
dd 0x000c000b,0x0c,0x000d000c,0x0c,0x000d000c,0x0d,0x000d000c,0x0d,0x000e000d, 0x0e
|
||||
dd 0x000b000e,0x0e,0x00040005,0x04,0x00060005,0x06,0x00080007,0x08,0x00090008, 0x09
|
||||
dd 0x000a0009,0x0a,0x000b0009,0x0a,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0b
|
||||
dd 0x000c000b,0x0b,0x000c000b,0x0c,0x000d000c,0x0c,0x000e000c,0x0d,0x000d000c, 0x0e
|
||||
dd 0x000e000d,0x0e,0x000b000d,0x0e,0x00070006,0x07,0x00080007,0x08,0x00090007, 0x09
|
||||
dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
|
||||
dd 0x000d000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000c,0x0d,0x000d000c, 0x0d
|
||||
dd 0x000e000d,0x0e,0x000e000d,0x0f,0x000c000d,0x0f,0x00090007,0x08,0x00090008, 0x09
|
||||
dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
|
||||
dd 0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000c,0x0d,0x000e000c, 0x0d
|
||||
dd 0x000e000c,0x0d,0x000f000d,0x0e,0x000f000d,0x0f,0x000d000d,0x0f,0x000a0008, 0x09
|
||||
dd 0x000a0008,0x09,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
|
||||
dd 0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0c,0x000e000b,0x0d,0x000e000c, 0x0d
|
||||
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d,0x0f,0x000c000d, 0x10
|
||||
dd 0x000a0009,0x0a,0x000a0009,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
|
||||
dd 0x000d000a,0x0c,0x000d000b,0x0d,0x000e000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
|
||||
dd 0x000e000c,0x0e,0x000f000c,0x0d,0x000f000d,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
|
||||
dd 0x000d000e,0x10,0x000b000a,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
|
||||
dd 0x000d000a,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
|
||||
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
|
||||
dd 0x0010000e,0x10,0x000d000e,0x10,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0c
|
||||
dd 0x000c000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0e,0x000e000c, 0x0e
|
||||
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0f,0x000f000c,0x0f,0x000f000d, 0x0f
|
||||
dd 0x0011000d,0x10,0x0011000d,0x12,0x000d000e,0x12,0x000b000a,0x0a,0x000c000a, 0x0a
|
||||
dd 0x000c000a,0x0b,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
|
||||
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000d, 0x0f
|
||||
dd 0x0010000d,0x0f,0x0010000e,0x10,0x0010000e,0x11,0x000d000e,0x11,0x000c000a, 0x0b
|
||||
dd 0x000c000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
|
||||
dd 0x000e000c,0x0d,0x000f000c,0x0f,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
|
||||
dd 0x0010000d,0x10,0x000f000d,0x10,0x0010000e,0x10,0x000f000e,0x12,0x000e000e, 0x11
|
||||
dd 0x000c000b,0x0b,0x000d000b,0x0c,0x000c000b,0x0c,0x000d000b,0x0d,0x000e000c, 0x0d
|
||||
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0e,0x0010000d, 0x0f
|
||||
dd 0x0010000d,0x10,0x0010000d,0x0f,0x0011000d,0x10,0x0011000e,0x11,0x0010000f, 0x12
|
||||
dd 0x000d000e,0x13,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b, 0x0d
|
||||
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0e,0x0010000c,0x0e,0x0010000d, 0x0f
|
||||
dd 0x0010000d,0x0f,0x0010000d,0x0f,0x0010000d,0x10,0x0010000e,0x11,0x000f000e, 0x11
|
||||
dd 0x0010000e,0x11,0x000e000f,0x12,0x000d000c,0x0c,0x000e000c,0x0d,0x000e000b, 0x0d
|
||||
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0f,0x000f000d,0x0e,0x000f000d, 0x0f
|
||||
dd 0x000f000d,0x10,0x0011000d,0x10,0x0010000d,0x11,0x0010000d,0x11,0x0010000e, 0x11
|
||||
dd 0x0010000e,0x12,0x0012000f,0x12,0x000e000f,0x12,0x000f000c,0x0d,0x000e000c, 0x0d
|
||||
dd 0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
|
||||
dd 0x0010000d,0x10,0x0010000d,0x10,0x0012000e,0x10,0x0011000e,0x10,0x0011000e, 0x11
|
||||
dd 0x0011000e,0x12,0x0013000e,0x11,0x0011000f,0x12,0x000e000f,0x12,0x000e000d, 0x0e
|
||||
dd 0x000f000d,0x0e,0x000d000d,0x0e,0x000e000d,0x0f,0x0010000d,0x0f,0x0010000d, 0x0f
|
||||
dd 0x000f000d,0x11,0x0010000d,0x10,0x0010000e,0x10,0x0011000e,0x13,0x0012000e, 0x11
|
||||
dd 0x0011000e,0x11,0x0013000f,0x11,0x0011000f,0x13,0x0010000e,0x12,0x000e000f, 0x12
|
||||
dd 0x000b000d,0x0d,0x000b000d,0x0e,0x000b000d,0x0f,0x000c000d,0x10,0x000c000d, 0x10
|
||||
dd 0x000d000d,0x10,0x000d000d,0x11,0x000d000e,0x10,0x000e000e,0x11,0x000e000e, 0x11
|
||||
dd 0x000e000e,0x12,0x000e000e,0x12,0x000e000f,0x15,0x000e000f,0x14,0x000e000f, 0x15
|
||||
dd 0x000c000f,0x12
|
||||
|
||||
tableABC
|
||||
dd 0x00020004,0x1,0x00040004,0x4,0x00060006,0x7,0x00080008,0x9,0x00090009,0xa,0x000a000a,0xa
|
||||
dd 0x0009000a,0xa,0x000a000a,0xb,0x00000000,0x0,0x00020003,0x1,0x00040004,0x4,0x00070006,0x7
|
||||
dd 0x00090007,0x9,0x00090009,0x9,0x000a000a,0xa,0x00000000,0x0,0x00040004,0x4,0x00050005,0x6
|
||||
dd 0x00060006,0x8,0x00080007,0x9,0x000a0009,0xa,0x000a0009,0xb,0x0009000a,0xa,0x000a000a,0xa
|
||||
dd 0x00000000,0x0,0x00040004,0x4,0x00040005,0x6,0x00060006,0x8,0x000a0007,0x9,0x000a0008,0x9
|
||||
dd 0x000a000a,0xa,0x00000000,0x0,0x00060006,0x7,0x00070006,0x8,0x00080007,0x9,0x00090008,0xa
|
||||
dd 0x000a0009,0xb,0x000b000a,0xc,0x000a0009,0xb,0x000a000a,0xb,0x00000000,0x0,0x00070005,0x7
|
||||
dd 0x00060006,0x7,0x00080007,0x9,0x000a0008,0xa,0x000a0009,0xa,0x000b000a,0xb,0x00000000,0x0
|
||||
dd 0x00080007,0x8,0x00080007,0x9,0x00090008,0xa,0x000b0008,0xb,0x000a0009,0xc,0x000c000a,0xc
|
||||
dd 0x000a000a,0xb,0x000b000a,0xc,0x00000000,0x0,0x00090007,0x8,0x000a0007,0x9,0x000a0008,0xa
|
||||
dd 0x000b0009,0xb,0x000b0009,0xb,0x000c000a,0xb,0x00000000,0x0,0x00090008,0x9,0x000a0008,0xa
|
||||
dd 0x000a0009,0xb,0x000b0009,0xc,0x000b000a,0xc,0x000c000a,0xc,0x000b000a,0xc,0x000c000b,0xc
|
||||
dd 0x00000000,0x0,0x00090008,0x8,0x00090008,0x9,0x000a0009,0xa,0x000b0009,0xb,0x000c000a,0xb
|
||||
dd 0x000c000b,0xc,0x00000000,0x0,0x00090009,0xa,0x000a0009,0xb,0x000b000a,0xc,0x000c000a,0xc
|
||||
dd 0x000c000a,0xd,0x000d000b,0xd,0x000c000a,0xc,0x000d000b,0xd,0x00000000,0x0,0x000a0009,0x9
|
||||
dd 0x000a0009,0xa,0x000b000a,0xb,0x000b000a,0xc,0x000d000b,0xc,0x000d000b,0xc,0x00000000,0x0
|
||||
dd 0x00090009,0x9,0x00090009,0xa,0x00090009,0xb,0x000a000a,0xc,0x000b000a,0xc,0x000c000b,0xc
|
||||
dd 0x000c000b,0xd,0x000c000c,0xd,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
|
||||
dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x0009000a,0xa,0x0009000a,0xa
|
||||
dd 0x000a000a,0xb,0x000b000b,0xc,0x000c000b,0xc,0x000c000b,0xd,0x000c000b,0xd,0x000c000c,0xd
|
||||
dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
|
||||
dd 0x0,0x00000000, 0x0,0x00000000
|
||||
|
||||
linbits32
|
||||
dd 0x00040004,0x10001,0x00040004,0x20002,0x00040004,0x30003,0x00040004,0x40004
|
||||
dd 0x00050005,0x60006,0x00060006,0x60006,0x00070007,0x80008,0x00080008,0x80008
|
||||
dd 0x00090009,0xa000a,0x000b000b,0xa000a,0x000b000b,0xd000d,0x000d000d,0xd000d
|
||||
dd 0x000d000d,0xd000d
|
||||
|
||||
|
||||
choose_table_H
|
||||
dw 0x1810, 0x1811, 0x1812, 0x1813, 0x1914, 0x1a14, 0x1b15, 0x1c15
|
||||
dw 0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17
|
||||
|
||||
choose_jump_table_L:
|
||||
dd table_MMX.L_case_0 - choose_table_MMX
|
||||
dd table_MMX.L_case_1 - choose_table_MMX
|
||||
dd table_MMX.L_case_2 - choose_table_MMX
|
||||
dd table_MMX.L_case_3 - choose_table_MMX
|
||||
dd table_MMX.L_case_45 - choose_table_MMX
|
||||
dd table_MMX.L_case_45 - choose_table_MMX
|
||||
dd table_MMX.L_case_67 - choose_table_MMX
|
||||
dd table_MMX.L_case_67 - choose_table_MMX
|
||||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||||
dd table_MMX.L_case_8_15 - choose_table_MMX
|
||||
|
||||
segment_code
|
||||
;
|
||||
; use MMX
|
||||
;
|
||||
|
||||
PIC_OFFSETTABLE
|
||||
|
||||
align 16
|
||||
; int choose_table(int *ix, int *end, int *s)
|
||||
choose_table_MMX:
|
||||
push ebp
|
||||
call get_pc.bp
|
||||
add ebp, PIC_BASE()
|
||||
|
||||
mov ecx,[esp+8] ;ecx = begin
|
||||
mov edx,[esp+12] ;edx = end
|
||||
sub ecx,edx ;ecx = begin-end(should be minus)
|
||||
test ecx,8
|
||||
pxor mm0,mm0 ;mm0=[0:0]
|
||||
movq mm1,[edx+ecx]
|
||||
jz .lp
|
||||
|
||||
add ecx,8
|
||||
jz .exit
|
||||
|
||||
align 4
|
||||
.lp:
|
||||
movq mm4,[edx+ecx]
|
||||
movq mm5,[edx+ecx+8]
|
||||
add ecx,16
|
||||
psubusw mm4,mm0 ; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B
|
||||
psubusw mm5,mm1 ; $B$=$s$J%3%^%s%I$O$J$$(B :-p
|
||||
paddw mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B
|
||||
paddw mm1,mm5
|
||||
jnz .lp
|
||||
.exit:
|
||||
psubusw mm1,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
|
||||
paddw mm0,mm1
|
||||
|
||||
movq mm4,mm0
|
||||
punpckhdq mm4,mm4
|
||||
psubusw mm4,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
|
||||
paddw mm0,mm4
|
||||
movd eax,mm0
|
||||
|
||||
cmp eax,15
|
||||
ja .with_ESC
|
||||
lea ecx,[PIC_EBP_REL(choose_table_MMX)]
|
||||
add ecx,[PIC_EBP_REL(choose_jump_table_L+eax*4)]
|
||||
jmp ecx
|
||||
|
||||
.with_ESC1:
|
||||
emms
|
||||
mov ecx, [esp+16] ; *s
|
||||
mov [ecx], eax
|
||||
or eax,-1
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
.with_ESC:
|
||||
cmp eax, 8191+15
|
||||
ja .with_ESC1
|
||||
|
||||
sub eax,15
|
||||
push ebx
|
||||
push esi
|
||||
bsr eax, eax
|
||||
%assign _P 4*2
|
||||
movq mm5, [PIC_EBP_REL(D15_15_15_15)]
|
||||
movq mm6, [PIC_EBP_REL(D14_14_14_14)]
|
||||
movq mm3, [PIC_EBP_REL(mul_add)]
|
||||
|
||||
mov ecx, [esp+_P+8] ; = ix
|
||||
; mov edx, [esp+_P+12] ; = end
|
||||
sub ecx, edx
|
||||
|
||||
xor esi, esi ; sum = 0
|
||||
test ecx, 8
|
||||
pxor mm7, mm7 ; linbits_sum, 14$B$r1[$($?$b$N$N?t(B
|
||||
jz .H_dual_lp1
|
||||
|
||||
movq mm0, [edx+ecx]
|
||||
add ecx,8
|
||||
packssdw mm0,mm7
|
||||
movq mm2, mm0
|
||||
paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0
|
||||
pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B
|
||||
psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
|
||||
pmaddwd mm0, mm3 ; {0, 0, y, x}*{1, 16, 1, 16}
|
||||
movd ebx, mm0
|
||||
mov esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
|
||||
|
||||
jz .H_dual_exit
|
||||
|
||||
align 4
|
||||
.H_dual_lp1:
|
||||
movq mm0, [edx+ecx]
|
||||
movq mm1, [edx+ecx+8]
|
||||
packssdw mm0,mm1
|
||||
movq mm2, mm0
|
||||
paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0
|
||||
pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B
|
||||
pmaddwd mm0, mm3 ; {y, x, y, x}*{1, 16, 1, 16}
|
||||
movd ebx, mm0
|
||||
punpckhdq mm0,mm0
|
||||
add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
|
||||
movd ebx, mm0
|
||||
add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
|
||||
add ecx, 16
|
||||
psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
|
||||
jnz .H_dual_lp1
|
||||
|
||||
.H_dual_exit:
|
||||
pmov mm1,mm7
|
||||
punpckhdq mm7,mm7
|
||||
paddd mm7,mm1
|
||||
punpckldq mm7,mm7
|
||||
|
||||
pmaddwd mm7, [PIC_EBP_REL(linbits32+eax*8)] ; linbits
|
||||
mov ax, [PIC_EBP_REL(choose_table_H+eax*2)]
|
||||
|
||||
movd ecx, mm7
|
||||
punpckhdq mm7,mm7
|
||||
movd edx,mm7
|
||||
emms
|
||||
shl edx, 16
|
||||
add ecx, edx
|
||||
|
||||
add ecx, esi
|
||||
|
||||
pop esi
|
||||
pop ebx
|
||||
|
||||
mov edx, ecx
|
||||
and ecx, 0xffff ; ecx = sum2
|
||||
shr edx, 16 ; edx = sum
|
||||
|
||||
cmp edx, ecx
|
||||
jle .chooseE_s1
|
||||
mov edx, ecx
|
||||
shr eax, 8
|
||||
.chooseE_s1:
|
||||
mov ecx, [esp+16] ; *s
|
||||
and eax, 0xff
|
||||
add [ecx], edx
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
table_MMX.L_case_0:
|
||||
emms
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
table_MMX.L_case_1:
|
||||
emms
|
||||
mov eax, [esp+16] ; *s
|
||||
mov ecx, [esp+8] ; *ix
|
||||
sub ecx, edx
|
||||
push ebx
|
||||
.lp:
|
||||
mov ebx, [edx+ecx]
|
||||
add ebx, ebx
|
||||
add ebx, [edx+ecx+4]
|
||||
movzx ebx, byte [PIC_EBP_REL(ebx+t1l)]
|
||||
add [eax], ebx
|
||||
add ecx, 8
|
||||
jnz .lp
|
||||
pop ebx
|
||||
mov eax, 1
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
table_MMX.L_case_45:
|
||||
push dword 7
|
||||
lea ecx, [PIC_EBP_REL(tableABC+9*8)]
|
||||
jmp from3
|
||||
|
||||
table_MMX.L_case_67:
|
||||
push dword 10
|
||||
lea ecx, [PIC_EBP_REL(tableABC)]
|
||||
jmp from3
|
||||
|
||||
table_MMX.L_case_8_15:
|
||||
push dword 13
|
||||
lea ecx, [PIC_EBP_REL(tableDEF)]
|
||||
from3:
|
||||
mov eax,[esp+12] ;eax = *begin
|
||||
; mov edx,[esp+16] ;edx = *end
|
||||
|
||||
push ebx
|
||||
sub eax, edx
|
||||
|
||||
movq mm5,[PIC_EBP_REL(mul_add)]
|
||||
pxor mm2,mm2 ;mm2 = sum
|
||||
|
||||
test eax, 8
|
||||
jz .choose3_lp1
|
||||
; odd length
|
||||
movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1]
|
||||
add eax,8
|
||||
packssdw mm0,mm2
|
||||
|
||||
pmaddwd mm0,mm5
|
||||
movd ebx,mm0
|
||||
|
||||
movq mm2, [ecx+ebx*8]
|
||||
|
||||
jz .choose3_exit
|
||||
|
||||
align 4
|
||||
.choose3_lp1
|
||||
movq mm0,[edx+eax]
|
||||
movq mm1,[edx+eax+8]
|
||||
add eax,16
|
||||
packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
|
||||
pmaddwd mm0,mm5
|
||||
movd ebx,mm0
|
||||
punpckhdq mm0,mm0
|
||||
paddd mm2, [ecx+ebx*8]
|
||||
movd ebx,mm0
|
||||
paddd mm2, [ecx+ebx*8]
|
||||
jnz .choose3_lp1
|
||||
.choose3_exit
|
||||
; xor eax,eax
|
||||
movd ebx, mm2
|
||||
punpckhdq mm2,mm2
|
||||
mov ecx, ebx
|
||||
and ecx, 0xffff ; ecx = sum2
|
||||
shr ebx, 16 ; ebx = sum1
|
||||
movd edx, mm2 ; edx = sum
|
||||
|
||||
cmp edx, ebx
|
||||
jle .choose3_s1
|
||||
mov edx, ebx
|
||||
inc eax
|
||||
.choose3_s1:
|
||||
emms
|
||||
pop ebx
|
||||
cmp edx, ecx
|
||||
jle .choose3_s2
|
||||
mov edx, ecx
|
||||
mov eax, 2
|
||||
.choose3_s2:
|
||||
pop ecx
|
||||
add eax, ecx
|
||||
mov ecx, [esp+16] ; *s
|
||||
add [ecx], edx
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
table_MMX.L_case_2:
|
||||
push dword 2
|
||||
lea ecx,[PIC_EBP_REL(table23)]
|
||||
pmov mm5,[PIC_EBP_REL(mul_add23)]
|
||||
jmp from2
|
||||
table_MMX.L_case_3:
|
||||
push dword 5
|
||||
lea ecx,[PIC_EBP_REL(table56)]
|
||||
pmov mm5,[PIC_EBP_REL(mul_add56)]
|
||||
from2:
|
||||
mov eax,[esp+12] ;eax = *begin
|
||||
; mov edx,[esp+16] ;edx = *end
|
||||
push ebx
|
||||
push edi
|
||||
|
||||
sub eax, edx
|
||||
xor edi, edi
|
||||
test eax, 8
|
||||
jz .choose2_lp1
|
||||
; odd length
|
||||
movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1]
|
||||
pxor mm2,mm2 ;mm2 = sum
|
||||
packssdw mm0,mm2
|
||||
|
||||
pmaddwd mm0,mm5
|
||||
movd ebx,mm0
|
||||
|
||||
mov edi, [ecx+ebx*4]
|
||||
|
||||
add eax,8
|
||||
jz .choose2_exit
|
||||
|
||||
align 4
|
||||
.choose2_lp1
|
||||
movq mm0,[edx+eax]
|
||||
movq mm1,[edx+eax+8]
|
||||
packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
|
||||
pmaddwd mm0,mm5
|
||||
movd ebx,mm0
|
||||
punpckhdq mm0,mm0
|
||||
add edi, [ecx+ebx*4]
|
||||
movd ebx, mm0
|
||||
add edi, [ecx+ebx*4]
|
||||
add eax,16
|
||||
jnc .choose2_lp1
|
||||
.choose2_exit
|
||||
mov ecx, edi
|
||||
pop edi
|
||||
pop ebx
|
||||
pop eax ; table num.
|
||||
emms
|
||||
|
||||
mov edx, ecx
|
||||
and ecx, 0xffff ; ecx = sum2
|
||||
shr edx, 16 ; edx = sum1
|
||||
|
||||
cmp edx, ecx
|
||||
jle .choose2_s1
|
||||
mov edx, ecx
|
||||
inc eax
|
||||
.choose2_s1:
|
||||
mov ecx, [esp+16] ; *s
|
||||
add [ecx], edx
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
end
|
Reference in New Issue
Block a user