1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-12-12 15:46:25 +01:00

Move libmp3lame into the lame directory

This is where it would be in the original library structure
and makes the CMakeLists.txt a tad cleaner.
This commit is contained in:
Leland Lucius
2020-05-26 02:05:43 -05:00
parent d738d35a7b
commit 51507ecdf8
63 changed files with 23 additions and 1365 deletions

View File

@@ -0,0 +1,73 @@
## $Id: Makefile.am,v 1.28 2013/06/12 09:16:29 rbrito Exp $
AUTOMAKE_OPTIONS = foreign
DEFS = @DEFS@ @CONFIG_DEFS@
ECHO ?= echo
nasm_sources = \
choose_table.nas \
cpu_feat.nas \
fft3dn.nas \
fftsse.nas
if HAVE_NASM
noinst_LTLIBRARIES = liblameasmroutines.la
liblameasmroutines_la_SOURCES = $(nasm_sources)
liblameasmroutines_la_DEPENDENCIES = $(nasm_sources:.nas.lo)
am_liblameasmroutines_la_OBJECTS = \
choose_table$U.lo \
cpu_feat$U.lo \
fft3dn$U.lo \
fftsse$U.lo
endif
noinst_HEADERS = nasm.h
INCLUDES = @INCLUDES@ -I$(top_srcdir)/libmp3lame/@CPUTYPE@
SUFFIXES = .nas .lo
EXTRA_liblameasmroutines_la_SOURCES = $(nasm_sources)
CLEANFILES = \
choose_table.o.lst \
choose_table.lo.lst \
cpu_feat.o.lst \
cpu_feat.lo.lst \
fft3dn.o.lst \
fft3dn.lo.lst \
fftsse.o.lst \
fftsse.lo.lst
EXTRA_DIST = \
fft.nas \
fftfpu.nas \
ffttbl.nas \
scalar.nas
NASM = @NASM@
NASMFLAGS=@NASM_FORMAT@ -i $(top_srcdir)/libmp3lame/@CPUTYPE@/
.nas.o: $< nasm.h
$(NASM) $(NASMFLAGS) $< -o $@ -l $@.lst
.nas.lo: $< nasm.h
$(ECHO) '# Generated by ltmain.sh - GNU libtool 1.5.22 (1.1220.2.365 2005/12/18 22:14:06)' >$@
$(ECHO) "pic_object='$*.o'" >>$@
$(ECHO) "non_pic_object='$*.o'" >>$@
$(NASM) $(NASMFLAGS) $< -o $*.o -l $@.lst
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
$(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
#$(OBJECTS): libtool
#libtool: $(LIBTOOL_DEPS)
# $(SHELL) $(top_builddir)/config.status --recheck

View File

@@ -0,0 +1,633 @@
# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = libmp3lame/i386
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
$(top_srcdir)/configure.in
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \
$(am__DIST_COMMON)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
liblameasmroutines_la_LIBADD =
am__liblameasmroutines_la_SOURCES_DIST = choose_table.nas cpu_feat.nas \
fft3dn.nas fftsse.nas
am__objects_1 = choose_table.lo cpu_feat.lo fft3dn.lo fftsse.lo
liblameasmroutines_la_OBJECTS = $(am_liblameasmroutines_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
@HAVE_NASM_TRUE@am_liblameasmroutines_la_rpath =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
SOURCES = $(liblameasmroutines_la_SOURCES) \
$(EXTRA_liblameasmroutines_la_SOURCES)
DIST_SOURCES = $(am__liblameasmroutines_la_SOURCES_DIST) \
$(EXTRA_liblameasmroutines_la_SOURCES)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
HEADERS = $(noinst_HEADERS)
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
am__DIST_COMMON = $(srcdir)/Makefile.in
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
ALLOCA = @ALLOCA@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CONFIG_DEFS = @CONFIG_DEFS@
CONFIG_MATH_LIB = @CONFIG_MATH_LIB@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CPUCCODE = @CPUCCODE@
CPUTYPE = @CPUTYPE@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@ @CONFIG_DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
FRONTEND_CFLAGS = @FRONTEND_CFLAGS@
FRONTEND_LDADD = @FRONTEND_LDADD@
FRONTEND_LDFLAGS = @FRONTEND_LDFLAGS@
GREP = @GREP@
GTK_CFLAGS = @GTK_CFLAGS@
GTK_CONFIG = @GTK_CONFIG@
GTK_LIBS = @GTK_LIBS@
INCLUDES = @INCLUDES@ -I$(top_srcdir)/libmp3lame/@CPUTYPE@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LD = @LD@
LDADD = @LDADD@
LDFLAGS = @LDFLAGS@
LIBICONV = @LIBICONV@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIBTOOL_DEPS = @LIBTOOL_DEPS@
LIB_MAJOR_VERSION = @LIB_MAJOR_VERSION@
LIB_MINOR_VERSION = @LIB_MINOR_VERSION@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBICONV = @LTLIBICONV@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
MAINT = @MAINT@
MAKEDEP = @MAKEDEP@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NASM = @NASM@
NASM_FORMAT = @NASM_FORMAT@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PKG_CONFIG = @PKG_CONFIG@
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
RANLIB = @RANLIB@
RM_F = @RM_F@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
SNDFILE_CFLAGS = @SNDFILE_CFLAGS@
SNDFILE_LIBS = @SNDFILE_LIBS@
STRIP = @STRIP@
VERSION = @VERSION@
WITH_FRONTEND = @WITH_FRONTEND@
WITH_MP3RTP = @WITH_MP3RTP@
WITH_MP3X = @WITH_MP3X@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
AUTOMAKE_OPTIONS = foreign
nasm_sources = \
choose_table.nas \
cpu_feat.nas \
fft3dn.nas \
fftsse.nas
@HAVE_NASM_TRUE@noinst_LTLIBRARIES = liblameasmroutines.la
@HAVE_NASM_TRUE@liblameasmroutines_la_SOURCES = $(nasm_sources)
@HAVE_NASM_TRUE@liblameasmroutines_la_DEPENDENCIES = $(nasm_sources:.nas.lo)
@HAVE_NASM_TRUE@am_liblameasmroutines_la_OBJECTS = \
@HAVE_NASM_TRUE@ choose_table$U.lo \
@HAVE_NASM_TRUE@ cpu_feat$U.lo \
@HAVE_NASM_TRUE@ fft3dn$U.lo \
@HAVE_NASM_TRUE@ fftsse$U.lo
noinst_HEADERS = nasm.h
SUFFIXES = .nas .lo
EXTRA_liblameasmroutines_la_SOURCES = $(nasm_sources)
CLEANFILES = \
choose_table.o.lst \
choose_table.lo.lst \
cpu_feat.o.lst \
cpu_feat.lo.lst \
fft3dn.o.lst \
fft3dn.lo.lst \
fftsse.o.lst \
fftsse.lo.lst
EXTRA_DIST = \
fft.nas \
fftfpu.nas \
ffttbl.nas \
scalar.nas
NASMFLAGS = @NASM_FORMAT@ -i $(top_srcdir)/libmp3lame/@CPUTYPE@/
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
$(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
all: all-am
.SUFFIXES:
.SUFFIXES: .nas .lo .o
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign libmp3lame/i386/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --foreign libmp3lame/i386/Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
clean-noinstLTLIBRARIES:
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
@list='$(noinst_LTLIBRARIES)'; \
locs=`for p in $$list; do echo $$p; done | \
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
sort -u`; \
test -z "$$locs" || { \
echo rm -f $${locs}; \
rm -f $${locs}; \
}
liblameasmroutines.la: $(liblameasmroutines_la_OBJECTS) $(liblameasmroutines_la_DEPENDENCIES) $(EXTRA_liblameasmroutines_la_DEPENDENCIES)
$(AM_V_CCLD)$(LINK) $(am_liblameasmroutines_la_rpath) $(liblameasmroutines_la_OBJECTS) $(liblameasmroutines_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-am
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LTLIBRARIES) $(HEADERS)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
-test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
mostlyclean-am
distclean: distclean-am
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am:
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am:
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am:
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
ctags-am distclean distclean-compile distclean-generic \
distclean-libtool distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-man install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags tags-am uninstall uninstall-am
.PRECIOUS: Makefile
ECHO ?= echo
.nas.o: $< nasm.h
$(NASM) $(NASMFLAGS) $< -o $@ -l $@.lst
.nas.lo: $< nasm.h
$(ECHO) '# Generated by ltmain.sh - GNU libtool 1.5.22 (1.1220.2.365 2005/12/18 22:14:06)' >$@
$(ECHO) "pic_object='$*.o'" >>$@
$(ECHO) "non_pic_object='$*.o'" >>$@
$(NASM) $(NASMFLAGS) $< -o $*.o -l $@.lst
#$(OBJECTS): libtool
#libtool: $(LIBTOOL_DEPS)
# $(SHELL) $(top_builddir)/config.status --recheck
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

View File

@@ -0,0 +1,447 @@
; new count bit routine
; part of this code is origined from
; new GOGO-no-coda (1999, 2000)
; Copyright (C) 1999 shigeo
; modified by Keiichi SAKAI
%include "nasm.h"
globaldef choose_table_MMX
globaldef MMX_masking
externdef largetbl
externdef t1l
externdef table23
externdef table56
segment_data
align 16
D14_14_14_14 dd 0x000E000E, 0x000E000E
D15_15_15_15 dd 0xfff0fff0, 0xfff0fff0
mul_add dd 0x00010010, 0x00010010
mul_add23 dd 0x00010003, 0x00010003
mul_add56 dd 0x00010004, 0x00010004
tableDEF
dd 0x00010003,0x01,0x00050005,0x05,0x00070006,0x07,0x00090008,0x08,0x000a0008, 0x09
dd 0x000a0009,0x0a,0x000b000a,0x0a,0x000b000a,0x0b,0x000c000a,0x0a,0x000c000b, 0x0b
dd 0x000c000b,0x0c,0x000d000c,0x0c,0x000d000c,0x0d,0x000d000c,0x0d,0x000e000d, 0x0e
dd 0x000b000e,0x0e,0x00040005,0x04,0x00060005,0x06,0x00080007,0x08,0x00090008, 0x09
dd 0x000a0009,0x0a,0x000b0009,0x0a,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0b
dd 0x000c000b,0x0b,0x000c000b,0x0c,0x000d000c,0x0c,0x000e000c,0x0d,0x000d000c, 0x0e
dd 0x000e000d,0x0e,0x000b000d,0x0e,0x00070006,0x07,0x00080007,0x08,0x00090007, 0x09
dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
dd 0x000d000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000c,0x0d,0x000d000c, 0x0d
dd 0x000e000d,0x0e,0x000e000d,0x0f,0x000c000d,0x0f,0x00090007,0x08,0x00090008, 0x09
dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
dd 0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000c,0x0d,0x000e000c, 0x0d
dd 0x000e000c,0x0d,0x000f000d,0x0e,0x000f000d,0x0f,0x000d000d,0x0f,0x000a0008, 0x09
dd 0x000a0008,0x09,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
dd 0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0c,0x000e000b,0x0d,0x000e000c, 0x0d
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d,0x0f,0x000c000d, 0x10
dd 0x000a0009,0x0a,0x000a0009,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
dd 0x000d000a,0x0c,0x000d000b,0x0d,0x000e000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
dd 0x000e000c,0x0e,0x000f000c,0x0d,0x000f000d,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
dd 0x000d000e,0x10,0x000b000a,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
dd 0x000d000a,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
dd 0x0010000e,0x10,0x000d000e,0x10,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0c
dd 0x000c000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0e,0x000e000c, 0x0e
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0f,0x000f000c,0x0f,0x000f000d, 0x0f
dd 0x0011000d,0x10,0x0011000d,0x12,0x000d000e,0x12,0x000b000a,0x0a,0x000c000a, 0x0a
dd 0x000c000a,0x0b,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000d, 0x0f
dd 0x0010000d,0x0f,0x0010000e,0x10,0x0010000e,0x11,0x000d000e,0x11,0x000c000a, 0x0b
dd 0x000c000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
dd 0x000e000c,0x0d,0x000f000c,0x0f,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
dd 0x0010000d,0x10,0x000f000d,0x10,0x0010000e,0x10,0x000f000e,0x12,0x000e000e, 0x11
dd 0x000c000b,0x0b,0x000d000b,0x0c,0x000c000b,0x0c,0x000d000b,0x0d,0x000e000c, 0x0d
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0e,0x0010000d, 0x0f
dd 0x0010000d,0x10,0x0010000d,0x0f,0x0011000d,0x10,0x0011000e,0x11,0x0010000f, 0x12
dd 0x000d000e,0x13,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b, 0x0d
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0e,0x0010000c,0x0e,0x0010000d, 0x0f
dd 0x0010000d,0x0f,0x0010000d,0x0f,0x0010000d,0x10,0x0010000e,0x11,0x000f000e, 0x11
dd 0x0010000e,0x11,0x000e000f,0x12,0x000d000c,0x0c,0x000e000c,0x0d,0x000e000b, 0x0d
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0f,0x000f000d,0x0e,0x000f000d, 0x0f
dd 0x000f000d,0x10,0x0011000d,0x10,0x0010000d,0x11,0x0010000d,0x11,0x0010000e, 0x11
dd 0x0010000e,0x12,0x0012000f,0x12,0x000e000f,0x12,0x000f000c,0x0d,0x000e000c, 0x0d
dd 0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
dd 0x0010000d,0x10,0x0010000d,0x10,0x0012000e,0x10,0x0011000e,0x10,0x0011000e, 0x11
dd 0x0011000e,0x12,0x0013000e,0x11,0x0011000f,0x12,0x000e000f,0x12,0x000e000d, 0x0e
dd 0x000f000d,0x0e,0x000d000d,0x0e,0x000e000d,0x0f,0x0010000d,0x0f,0x0010000d, 0x0f
dd 0x000f000d,0x11,0x0010000d,0x10,0x0010000e,0x10,0x0011000e,0x13,0x0012000e, 0x11
dd 0x0011000e,0x11,0x0013000f,0x11,0x0011000f,0x13,0x0010000e,0x12,0x000e000f, 0x12
dd 0x000b000d,0x0d,0x000b000d,0x0e,0x000b000d,0x0f,0x000c000d,0x10,0x000c000d, 0x10
dd 0x000d000d,0x10,0x000d000d,0x11,0x000d000e,0x10,0x000e000e,0x11,0x000e000e, 0x11
dd 0x000e000e,0x12,0x000e000e,0x12,0x000e000f,0x15,0x000e000f,0x14,0x000e000f, 0x15
dd 0x000c000f,0x12
tableABC
dd 0x00020004,0x1,0x00040004,0x4,0x00060006,0x7,0x00080008,0x9,0x00090009,0xa,0x000a000a,0xa
dd 0x0009000a,0xa,0x000a000a,0xb,0x00000000,0x0,0x00020003,0x1,0x00040004,0x4,0x00070006,0x7
dd 0x00090007,0x9,0x00090009,0x9,0x000a000a,0xa,0x00000000,0x0,0x00040004,0x4,0x00050005,0x6
dd 0x00060006,0x8,0x00080007,0x9,0x000a0009,0xa,0x000a0009,0xb,0x0009000a,0xa,0x000a000a,0xa
dd 0x00000000,0x0,0x00040004,0x4,0x00040005,0x6,0x00060006,0x8,0x000a0007,0x9,0x000a0008,0x9
dd 0x000a000a,0xa,0x00000000,0x0,0x00060006,0x7,0x00070006,0x8,0x00080007,0x9,0x00090008,0xa
dd 0x000a0009,0xb,0x000b000a,0xc,0x000a0009,0xb,0x000a000a,0xb,0x00000000,0x0,0x00070005,0x7
dd 0x00060006,0x7,0x00080007,0x9,0x000a0008,0xa,0x000a0009,0xa,0x000b000a,0xb,0x00000000,0x0
dd 0x00080007,0x8,0x00080007,0x9,0x00090008,0xa,0x000b0008,0xb,0x000a0009,0xc,0x000c000a,0xc
dd 0x000a000a,0xb,0x000b000a,0xc,0x00000000,0x0,0x00090007,0x8,0x000a0007,0x9,0x000a0008,0xa
dd 0x000b0009,0xb,0x000b0009,0xb,0x000c000a,0xb,0x00000000,0x0,0x00090008,0x9,0x000a0008,0xa
dd 0x000a0009,0xb,0x000b0009,0xc,0x000b000a,0xc,0x000c000a,0xc,0x000b000a,0xc,0x000c000b,0xc
dd 0x00000000,0x0,0x00090008,0x8,0x00090008,0x9,0x000a0009,0xa,0x000b0009,0xb,0x000c000a,0xb
dd 0x000c000b,0xc,0x00000000,0x0,0x00090009,0xa,0x000a0009,0xb,0x000b000a,0xc,0x000c000a,0xc
dd 0x000c000a,0xd,0x000d000b,0xd,0x000c000a,0xc,0x000d000b,0xd,0x00000000,0x0,0x000a0009,0x9
dd 0x000a0009,0xa,0x000b000a,0xb,0x000b000a,0xc,0x000d000b,0xc,0x000d000b,0xc,0x00000000,0x0
dd 0x00090009,0x9,0x00090009,0xa,0x00090009,0xb,0x000a000a,0xc,0x000b000a,0xc,0x000c000b,0xc
dd 0x000c000b,0xd,0x000c000c,0xd,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x0009000a,0xa,0x0009000a,0xa
dd 0x000a000a,0xb,0x000b000b,0xc,0x000c000b,0xc,0x000c000b,0xd,0x000c000b,0xd,0x000c000c,0xd
dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
dd 0x0,0x00000000, 0x0,0x00000000
linbits32
dd 0x00040004,0x10001,0x00040004,0x20002,0x00040004,0x30003,0x00040004,0x40004
dd 0x00050005,0x60006,0x00060006,0x60006,0x00070007,0x80008,0x00080008,0x80008
dd 0x00090009,0xa000a,0x000b000b,0xa000a,0x000b000b,0xd000d,0x000d000d,0xd000d
dd 0x000d000d,0xd000d
choose_table_H
dw 0x1810, 0x1811, 0x1812, 0x1813, 0x1914, 0x1a14, 0x1b15, 0x1c15
dw 0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17
choose_jump_table_L:
dd table_MMX.L_case_0 - choose_table_MMX
dd table_MMX.L_case_1 - choose_table_MMX
dd table_MMX.L_case_2 - choose_table_MMX
dd table_MMX.L_case_3 - choose_table_MMX
dd table_MMX.L_case_45 - choose_table_MMX
dd table_MMX.L_case_45 - choose_table_MMX
dd table_MMX.L_case_67 - choose_table_MMX
dd table_MMX.L_case_67 - choose_table_MMX
dd table_MMX.L_case_8_15 - choose_table_MMX
dd table_MMX.L_case_8_15 - choose_table_MMX
dd table_MMX.L_case_8_15 - choose_table_MMX
dd table_MMX.L_case_8_15 - choose_table_MMX
dd table_MMX.L_case_8_15 - choose_table_MMX
dd table_MMX.L_case_8_15 - choose_table_MMX
dd table_MMX.L_case_8_15 - choose_table_MMX
dd table_MMX.L_case_8_15 - choose_table_MMX
segment_code
;
; use MMX
;
PIC_OFFSETTABLE
align 16
; int choose_table(int *ix, int *end, int *s)
choose_table_MMX:
push ebp
call get_pc.bp
add ebp, PIC_BASE()
mov ecx,[esp+8] ;ecx = begin
mov edx,[esp+12] ;edx = end
sub ecx,edx ;ecx = begin-end(should be minus)
test ecx,8
pxor mm0,mm0 ;mm0=[0:0]
movq mm1,[edx+ecx]
jz .lp
add ecx,8
jz .exit
align 4
.lp:
movq mm4,[edx+ecx]
movq mm5,[edx+ecx+8]
add ecx,16
psubusw mm4,mm0 ; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B
psubusw mm5,mm1 ; $B$=$s$J%3%^%s%I$O$J$$(B :-p
paddw mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B
paddw mm1,mm5
jnz .lp
.exit:
psubusw mm1,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
paddw mm0,mm1
movq mm4,mm0
punpckhdq mm4,mm4
psubusw mm4,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
paddw mm0,mm4
movd eax,mm0
cmp eax,15
ja .with_ESC
lea ecx,[PIC_EBP_REL(choose_table_MMX)]
add ecx,[PIC_EBP_REL(choose_jump_table_L+eax*4)]
jmp ecx
.with_ESC1:
emms
mov ecx, [esp+16] ; *s
mov [ecx], eax
or eax,-1
pop ebp
ret
.with_ESC:
cmp eax, 8191+15
ja .with_ESC1
sub eax,15
push ebx
push esi
bsr eax, eax
%assign _P 4*2
movq mm5, [PIC_EBP_REL(D15_15_15_15)]
movq mm6, [PIC_EBP_REL(D14_14_14_14)]
movq mm3, [PIC_EBP_REL(mul_add)]
mov ecx, [esp+_P+8] ; = ix
; mov edx, [esp+_P+12] ; = end
sub ecx, edx
xor esi, esi ; sum = 0
test ecx, 8
pxor mm7, mm7 ; linbits_sum, 14$B$r1[$($?$b$N$N?t(B
jz .H_dual_lp1
movq mm0, [edx+ecx]
add ecx,8
packssdw mm0,mm7
movq mm2, mm0
paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0
pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B
psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
pmaddwd mm0, mm3 ; {0, 0, y, x}*{1, 16, 1, 16}
movd ebx, mm0
mov esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
jz .H_dual_exit
align 4
.H_dual_lp1:
movq mm0, [edx+ecx]
movq mm1, [edx+ecx+8]
packssdw mm0,mm1
movq mm2, mm0
paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0
pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B
pmaddwd mm0, mm3 ; {y, x, y, x}*{1, 16, 1, 16}
movd ebx, mm0
punpckhdq mm0,mm0
add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
movd ebx, mm0
add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
add ecx, 16
psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
jnz .H_dual_lp1
.H_dual_exit:
pmov mm1,mm7
punpckhdq mm7,mm7
paddd mm7,mm1
punpckldq mm7,mm7
pmaddwd mm7, [PIC_EBP_REL(linbits32+eax*8)] ; linbits
mov ax, [PIC_EBP_REL(choose_table_H+eax*2)]
movd ecx, mm7
punpckhdq mm7,mm7
movd edx,mm7
emms
shl edx, 16
add ecx, edx
add ecx, esi
pop esi
pop ebx
mov edx, ecx
and ecx, 0xffff ; ecx = sum2
shr edx, 16 ; edx = sum
cmp edx, ecx
jle .chooseE_s1
mov edx, ecx
shr eax, 8
.chooseE_s1:
mov ecx, [esp+16] ; *s
and eax, 0xff
add [ecx], edx
pop ebp
ret
table_MMX.L_case_0:
emms
pop ebp
ret
table_MMX.L_case_1:
emms
mov eax, [esp+16] ; *s
mov ecx, [esp+8] ; *ix
sub ecx, edx
push ebx
.lp:
mov ebx, [edx+ecx]
add ebx, ebx
add ebx, [edx+ecx+4]
movzx ebx, byte [PIC_EBP_REL(ebx+t1l)]
add [eax], ebx
add ecx, 8
jnz .lp
pop ebx
mov eax, 1
pop ebp
ret
table_MMX.L_case_45:
push dword 7
lea ecx, [PIC_EBP_REL(tableABC+9*8)]
jmp from3
table_MMX.L_case_67:
push dword 10
lea ecx, [PIC_EBP_REL(tableABC)]
jmp from3
table_MMX.L_case_8_15:
push dword 13
lea ecx, [PIC_EBP_REL(tableDEF)]
from3:
mov eax,[esp+12] ;eax = *begin
; mov edx,[esp+16] ;edx = *end
push ebx
sub eax, edx
movq mm5,[PIC_EBP_REL(mul_add)]
pxor mm2,mm2 ;mm2 = sum
test eax, 8
jz .choose3_lp1
; odd length
movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1]
add eax,8
packssdw mm0,mm2
pmaddwd mm0,mm5
movd ebx,mm0
movq mm2, [ecx+ebx*8]
jz .choose3_exit
align 4
.choose3_lp1
movq mm0,[edx+eax]
movq mm1,[edx+eax+8]
add eax,16
packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
pmaddwd mm0,mm5
movd ebx,mm0
punpckhdq mm0,mm0
paddd mm2, [ecx+ebx*8]
movd ebx,mm0
paddd mm2, [ecx+ebx*8]
jnz .choose3_lp1
.choose3_exit
; xor eax,eax
movd ebx, mm2
punpckhdq mm2,mm2
mov ecx, ebx
and ecx, 0xffff ; ecx = sum2
shr ebx, 16 ; ebx = sum1
movd edx, mm2 ; edx = sum
cmp edx, ebx
jle .choose3_s1
mov edx, ebx
inc eax
.choose3_s1:
emms
pop ebx
cmp edx, ecx
jle .choose3_s2
mov edx, ecx
mov eax, 2
.choose3_s2:
pop ecx
add eax, ecx
mov ecx, [esp+16] ; *s
add [ecx], edx
pop ebp
ret
table_MMX.L_case_2:
push dword 2
lea ecx,[PIC_EBP_REL(table23)]
pmov mm5,[PIC_EBP_REL(mul_add23)]
jmp from2
table_MMX.L_case_3:
push dword 5
lea ecx,[PIC_EBP_REL(table56)]
pmov mm5,[PIC_EBP_REL(mul_add56)]
from2:
mov eax,[esp+12] ;eax = *begin
; mov edx,[esp+16] ;edx = *end
push ebx
push edi
sub eax, edx
xor edi, edi
test eax, 8
jz .choose2_lp1
; odd length
movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1]
pxor mm2,mm2 ;mm2 = sum
packssdw mm0,mm2
pmaddwd mm0,mm5
movd ebx,mm0
mov edi, [ecx+ebx*4]
add eax,8
jz .choose2_exit
align 4
.choose2_lp1
movq mm0,[edx+eax]
movq mm1,[edx+eax+8]
packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
pmaddwd mm0,mm5
movd ebx,mm0
punpckhdq mm0,mm0
add edi, [ecx+ebx*4]
movd ebx, mm0
add edi, [ecx+ebx*4]
add eax,16
jnc .choose2_lp1
.choose2_exit
mov ecx, edi
pop edi
pop ebx
pop eax ; table num.
emms
mov edx, ecx
and ecx, 0xffff ; ecx = sum2
shr edx, 16 ; edx = sum1
cmp edx, ecx
jle .choose2_s1
mov edx, ecx
inc eax
.choose2_s1:
mov ecx, [esp+16] ; *s
add [ecx], edx
pop ebp
ret
end

View File

@@ -0,0 +1,107 @@
;
;
; assembler routines to detect CPU-features
;
; MMX / 3DNow! / SSE / SSE2
;
; for the LAME project
; Frank Klemm, Robert Hegemann 2000-10-12
;
%include "nasm.h"
globaldef has_MMX_nasm
globaldef has_3DNow_nasm
globaldef has_SSE_nasm
globaldef has_SSE2_nasm
segment_code
testCPUID:
pushfd
pop eax
mov ecx,eax
xor eax,0x200000
push eax
popfd
pushfd
pop eax
cmp eax,ecx
ret
;---------------------------------------
; int has_MMX_nasm (void)
;---------------------------------------
has_MMX_nasm:
pushad
call testCPUID
jz return0 ; no CPUID command, so no MMX
mov eax,0x1
CPUID
test edx,0x800000
jz return0 ; no MMX support
jmp return1 ; MMX support
;---------------------------------------
; int has_SSE_nasm (void)
;---------------------------------------
has_SSE_nasm:
pushad
call testCPUID
jz return0 ; no CPUID command, so no SSE
mov eax,0x1
CPUID
test edx,0x02000000
jz return0 ; no SSE support
jmp return1 ; SSE support
;---------------------------------------
; int has_SSE2_nasm (void)
;---------------------------------------
has_SSE2_nasm:
pushad
call testCPUID
jz return0 ; no CPUID command, so no SSE2
mov eax,0x1
CPUID
test edx,0x04000000
jz return0 ; no SSE2 support
jmp return1 ; SSE2 support
;---------------------------------------
; int has_3DNow_nasm (void)
;---------------------------------------
has_3DNow_nasm:
pushad
call testCPUID
jz return0 ; no CPUID command, so no 3DNow!
mov eax,0x80000000
CPUID
cmp eax,0x80000000
jbe return0 ; no extended MSR(1), so no 3DNow!
mov eax,0x80000001
CPUID
test edx,0x80000000
jz return0 ; no 3DNow! support
; 3DNow! support
return1:
popad
xor eax,eax
inc eax
ret
return0:
popad
xor eax,eax
ret
end

View File

@@ -0,0 +1,267 @@
; for new GOGO-no-coda (1999/09)
; Copyright (C) 1999 shigeo
; special thanks to Keiichi SAKAI, URURI
%include "nasm.h"
globaldef fht_3DN
globaldef fht
externdef costab_fft
externdef sintab_fft
externdef gray_index
segment_data
align 16
D_MSB1_0 dd 0 ,0x80000000
D_SQRT2 dd 1.414213562,1.414213562
t_s0 dd 0 ;[ t_c:t_s]
t_c0 dd 0
t_c1 dd 0 ;[-t_s:t_c]
t_s1 dd 0
D_s1c1 dd 0, 0
D_Mc1s1 dd 0, 0
D_s2c2 dd 0, 0
D_Mc2s2 dd 0, 0
D_0_1 dd 1.0, 0.0
S_05 DD 0.5
S_00005 DD 0.0005
fht dd 0 ;<3B>ؿ<EFBFBD><D8BF>ݥ<EFBFBD><DDA5><EFBFBD><EFBFBD><EFBFBD>
segment_code
;************************************************************************
; by shigeo
; 99/08/16
; 23000clk <20>ɤ<EFBFBD><C9A4>ä<EFBFBD><C3A4><EFBFBD>
; 18500clk bit reversal from gogo1 by URURI
;void fht(float *fz, int n);
align 16
fht_3DN:
push ebx
push esi
push edi
push ebp
%assign _P 4*4
;<3B>ޤ<EFBFBD><DEA4>ǽ<EFBFBD><C7BD>Υ<CEA5><EBA1BC>... <20><>fht()<29>γ<EFBFBD><CEB3>ذ<EFBFBD>ư
mov esi,[esp+_P+4] ;esi=fz
mov ecx,[esp+_P+8] ;ecx=n
;<3B><EFBFBD><E1A5A4><EFBFBD><EFBFBD><EBA1BC>
movq mm7,[D_MSB1_0] ;mm7=[1<<31:0]
%assign LOCAL_STACK 16
sub esp,LOCAL_STACK
%assign _P (_P+LOCAL_STACK)
xor eax,eax
mov [esp],eax ;k=0
%define k dword [esp]
%define kx dword [esp+4]
%define fn dword [esp+8]
.lp30: ;k=0; do{
mov ecx,k
add ecx,2
mov k,ecx
mov eax,1
shl eax,cl ;eax=k1 = 1<<k
lea ebx,[eax+eax] ;ebx=k2 = k1*2
lea ecx,[eax+eax*2] ;ecx=k3 = k2 + k1 = k1*3
lea edx,[ebx+ebx] ;edx=k4 = k1*4
mov esi,eax
shr esi,1 ;esi=kx=k1>>1
mov kx,esi ;<3B><>¸(<28><><EFBFBD>ǻȤ<C7BB>)
mov edi,[esp+_P+4] ;edi=fi=fz
lea ebp,[edi+esi*4] ;ebp=gi=fz+kx
mov esi,[esp+_P+8] ;esi=n
lea esi,[edi+esi*4] ;esi=fn=fz+n
movq mm6,[D_SQRT2] ;mm6=[<5B><>2:<3A><>2]
.lp31: ;fn=fz+n; do{ FLOAT g0,f0,f1,...
movd mm0,[edi] ;mm0=[0:fi[ 0]]
movd mm1,[edi+eax*4] ;mm1=[0:fi[k1]]
punpckldq mm0,mm0 ;mm0=[fi_0 :fi_0 ]
punpckldq mm1,mm1 ;mm1=[fi_k1:fi_k1]
movd mm2,[edi+ebx*4]
movd mm3,[edi+ecx*4]
punpckldq mm2,mm2 ;mm2=[fi_k2:fi_k2]
punpckldq mm3,mm3 ;mm3=[fi_k3:fi_k3]
pxor mm1,mm7 ;mm1=[-fi_k1:fi_k1]
pxor mm3,mm7 ;mm3=[-fi_k3:fi_k3]
pfadd mm0,mm1 ;mm0=[f1:f0]=[fi_0 -fi_k1 : fi_0 +fi_k1]
pfadd mm2,mm3 ;mm2=[f3:f2]=[fi_k2-fi_k3 : fi_k2+fi_k3]
movq mm3,mm0 ;mm3=[f1:f0]
pfadd mm0,mm2 ;mm0=[f1+f3:f0+f2]
movd [edi],mm0 ;fi[0]=f0+f2
psrlq mm0,32 ;mm0=[0:f1+f3]
pfsub mm3,mm2 ;mm3=[f1-f3:f0-f2]
movd [edi+eax*4],mm0 ;fi[k1]=f1+f3
movd [edi+ebx*4],mm3 ;fi[k2]=f0-f2
psrlq mm3,32 ;mm3=[0:f1-f3]
movd [edi+ecx*4],mm3 ;fi[k3]=f1-f3
movd mm0,[ebp] ;mm0=[0:gi_0]
movd mm1,[ebp+eax*4] ;mm1=[0:gi_k1]
punpckldq mm0,mm0 ;mm0=[gi_0 :gi_0 ]
punpckldq mm1,mm1 ;mm1=[gi_k1:gi_k1]
movd mm2,[ebp+ebx*4] ;mm2=[0:gi_k2]
pxor mm1,mm7 ;mm1=[-gi_k1:gi_k1]
punpckldq mm2,[ebp+ecx*4] ;mm2=[gi_k3:gi_k2]
pfadd mm0,mm1 ;mm0=[g1:g0]=[gi_0 -gi_k1:gi_0 +gi_k1]
pfmul mm2,mm6 ;mm2=[g3:g2]=sqrt2 * [gi_k3:gi_k2]
movq mm1,mm0 ;mm1=[g1:g0]
pfadd mm0,mm2 ;mm0=[g1+g3:g0+g2]
movd [ebp],mm0 ;gi[0]=g0+g2
psrlq mm0,32 ;mm0=[0:g1+g3]
pfsub mm1,mm2 ;mm1=[g1-g3:g0-g2]
movd [ebp+eax*4],mm0 ;gi[k1]=g1+g3
movd [ebp+ebx*4],mm1 ;gi[k2]=g0-g2
psrlq mm1,32 ;mm1=[0:g1-g3]
movd [ebp+ecx*4],mm1 ;gi[k3]=g1-g3
lea edi,[edi+edx*4] ;fi += k4
lea ebp,[ebp+edx*4] ;gi += k4
cmp edi,esi
jc near .lp31 ;}while(fi<fn);
; <09><><EFBFBD><EFBFBD><EFBFBD>ޤǤ<DEA4>¿ʬO.K.
mov fn,esi ;fn=fz+n
;<3B><><EFBFBD><EFBFBD><EFBFBD>ͤϰ<CDA4><CFB0><EFBFBD>³<EFBFBD><C2B3><EFBFBD>Ȥ<EFBFBD>
;eax=k1,ebx=k2,ecx=k3,edx=k4
mov edi,k
lea ebp,[costab_fft+edi*4]
mov ebp,[ebp] ;ebp=t_c
mov [t_c0],ebp
mov [t_c1],ebp ;t_c
lea ebp,[sintab_fft+edi*4]
mov ebp,[ebp] ;ebx=t_s
mov [t_s0],ebp
xor ebp,0x80000000
mov [t_s1],ebp ;-t_s
movq mm1,[D_0_1] ;mm1=[0:1]
movq [D_s1c1],mm1 ;mm1=[s1:c1]
mov esi,1 ;esi=i=1
.lp32: ; for(i=1;i<kx;i++){
movq mm0,[D_s1c1] ;mm1=[s1:t]=[s1:c1]
movq mm2,mm0
pfmul mm0,[t_c1] ;mm0=[-s1*t_s: t*t_c]
pfmul mm2,[t_s0] ;mm2=[ s1*t_c: t*t_s]
pfacc mm0,mm2 ;mm0=[s1:c1]=[ s1*t_c+t*t_s:-s1*t_s+t*t_c]
movq mm2,mm0 ;mm2=[s1:c1]
movq [D_s1c1],mm0 ;<3B><>¸
movq mm6,mm2
punpckldq mm5,mm6
punpckhdq mm6,mm5 ;mm6=[ c1:s1]
pxor mm6,mm7 ;mm6=[-c1:s1]
movq [D_Mc1s1],mm6 ;<3B><>¸
pfmul mm2,mm2 ;mm2=[s1*s1:c1*c1]
movq mm3,mm0 ;mm3=[s1:c1]
pxor mm2,mm7 ;mm2=[-s1*s1:c1*c1]
psrlq mm3,32 ;mm3=[ 0:s1]
pfacc mm2,mm2 ;mm2=[c2:c2]=[c1*c1-s1*s1:<]
pfmul mm0,mm3 ;mm0=[ 0:c1*s1]
pfadd mm0,mm0 ;mm0=[0:s2]=[ 0:2*c1*s1]
punpckldq mm2,mm0 ;mm2=[s2:c2]
movq [D_s2c2],mm2 ;<3B><>¸
punpckldq mm0,mm2
punpckhdq mm2,mm0 ;mm2=[c2:s2]
pxor mm2,mm7 ;mm2=[-c2:s2]
movq [D_Mc2s2],mm2 ;<3B><>¸
mov edi,[esp+_P+4] ;edi=fz
lea edi,[edi+esi*4] ;edi=fz+i
mov ebp,[esp+_P+4] ;ebp=fz
neg esi ;esi=-i
lea ebp,[ebp+eax*4] ;ebp=fz+k1
lea ebp,[ebp+esi*4] ;ebp=gi=fz+k1-i
neg esi ;esi=i
.lp33: ; do{ FLOAT a,b,g0,f0,f1,g1,f2,g2,f3,g3;
movd mm0,[edi+eax*4] ;mm0=[0:fi_k1]
punpckldq mm0,[ebp+eax*4] ;mm0=[gi_k1:fi_k1]
movq mm1,mm0
pfmul mm0,[D_s2c2] ;mm0=[ s2*gi_k1:c2*fi_k1]
pfmul mm1,[D_Mc2s2] ;mm1=[-c2*gi_k1:s2*fi_k1]
pfacc mm0,mm1 ;mm0=[b:a]
movd mm4,[edi] ;mm4=[0:fi_0]
movq mm3,mm0 ;mm3=[b:a]
punpckldq mm4,[ebp] ;mm4=[gi_0:fi_0]
pfadd mm3,mm4 ;mm3=[g0:f0]=[gi_0+b:fi_0+a]
pfsub mm4,mm0 ;mm4=[g1:f1]=[gi_0-b:fi_0-a]
movd mm0,[edi+ecx*4] ;mm0=[0:fi_k3]
punpckldq mm0,[ebp+ecx*4] ;mm0=[gi_k3:fi_k3]
movq mm1,mm0
pfmul mm0,[D_s2c2] ;mm0=[ s2*gi_k3:c2*fi_k3]
pfmul mm1,[D_Mc2s2] ;mm1=[-c2*gi_k3:s2*fi_k3]
pfacc mm0,mm1 ;mm0=[b:a]
movd mm5,[edi+ebx*4] ;mm5=[0:fi_k2]
movq mm6,mm0 ;mm6=[b:a]
punpckldq mm5,[ebp+ebx*4] ;mm5=[gi_k2:fi_k2]
pfadd mm6,mm5 ;mm6=[g2:f2]=[gi_k2+b:fi_k2+a]
pfsub mm5,mm0 ;mm5=[g3:f3]=[gi_k2-b:fi_k2-a]
punpckldq mm1,mm6 ;mm1=[f2:*]
movq mm0,[D_s1c1] ;mm0=[s1:c1]
punpckhdq mm1,mm5 ;mm1=[g3:f2]
pfmul mm0,mm1 ;mm0=[ s1*g3:c1*f2]
movq mm2,[D_Mc1s1] ;mm2=[-c1:s1]
pfmul mm2,mm1 ;mm2=[-c1*g3:s1*f2]
pfacc mm0,mm2 ;mm0=[b:a]
punpckldq mm1,mm3 ;mm1=[f0:*]
punpckhdq mm1,mm4 ;mm1=[g1:f0]
movq mm2,mm0 ;mm2=[b:a]
pfadd mm0,mm1 ;mm0=[g1+b:f0+a]
pfsubr mm2,mm1 ;mm2=[g1-b:f0-a]
movd [edi],mm0 ;fi[0]=f0+a
psrlq mm0,32 ;mm0=[0:g1+b]
movd [edi+ebx*4],mm2 ;fi[k2]=f0-a
psrlq mm2,32 ;mm2=[0:g1-b]
movd [ebp+eax*4],mm0 ;gi[k1]=g1+b
movd [ebp+ecx*4],mm2 ;gi[k3]=g1-b
psrlq mm6,32 ;mm6=[0:g2]
movq mm0,[D_s1c1] ;mm0=[s1:c1]
punpckldq mm5,mm6 ;mm5=[g2:f3]
pfmul mm0,mm5 ;mm0=[g2* s1:f3*c1]
pfmul mm5,[D_Mc1s1] ;mm5=[g2*-c1:f3*s1]
pfacc mm0,mm5 ;mm0=[-b:a]
psrlq mm3,32 ;mm3=[0:g0]
movq mm1,mm0 ;mm1=[-b:a]
punpckldq mm3,mm4 ;mm3=[f1:g0]
pfadd mm0,mm3 ;mm0=[f1-b:g0+a]
pfsubr mm1,mm3 ;mm1=[f1+b:g0-a]
movd [ebp],mm0 ;gi[0]=g0+a
psrlq mm0,32 ;mm0=[0:f1-b]
movd [ebp+ebx*4],mm1 ;gi[k2]=g0-a
psrlq mm1,32 ;mm1=[0:f1+b]
movd [edi+ecx*4],mm0 ;fi[k3]=f1-b
movd [edi+eax*4],mm1 ;fi[k1]=f1+b
lea edi,[edi+edx*4] ;fi += k4
lea ebp,[ebp+edx*4] ;gi += k4
cmp edi,fn
jc near .lp33 ;}while(fi<fn)
inc esi
cmp esi,kx
jnz near .lp32 ;}
cmp edx,[esp+_P+8]
jnz near .lp30 ;}while(k4<n)
.exit:
add esp,LOCAL_STACK
femms
pop ebp
pop edi
pop esi
pop ebx
ret

View File

@@ -0,0 +1,488 @@
; from a new GOGO-no-coda (1999/09)
; Copyright (C) 1999 shigeo
; special thanks to Keiichi SAKAI, URURI
; hacked and back-ported to LAME
; by Takehiro TOMINAGA Nov 2000
%include "nasm.h"
globaldef fht_3DN
segment_data
align 16
costab dd 0x80000000, 0
dd 1.414213562,1.414213562
dd 9.238795283293805e-01, 9.238795283293805e-01
dd 3.826834424611044e-01, 3.826834424611044e-01
dd 9.951847264044178e-01, 9.951847264044178e-01
dd 9.801714304836734e-02, 9.801714304836734e-02
dd 9.996988186794428e-01, 9.996988186794428e-01
dd 2.454122920569705e-02, 2.454122920569705e-02
dd 9.999811752815535e-01, 9.999811752815535e-01
dd 6.135884819898878e-03, 6.135884819898878e-03
D_1_0_0_0 dd 0.0 , 1.0
segment_code
PIC_OFFSETTABLE
;void fht_3DN(float *fz, int nn);
proc fht_3DN
pushd ebp, ebx, esi, edi
sub esp, 20
call get_pc.bp
add ebp, PIC_BASE()
mov r0, [esp+40] ;fi
mov r1, [esp+44] ;r1 = nn
lea r3, [PIC_EBP_REL(costab)] ;tri = costab
lea r4, [r0+r1*8] ;r4 = fn = &fz[n]
mov [esp+16], r4
mov r4, 8 ;kx = k1/2
pmov mm7, [r3]
loopalign 16
.do1
lea r3, [r3+16] ;tri += 2;
pmov mm6, [PIC_EBP_REL(costab+8)]
lea r2, [r4+r4*2] ;k3*fsize/2
mov r5, 4 ;i = 1*fsize
loopalign 16
.do2:
lea r1, [r0+r4] ;gi = fi + kx
;f
pmov mm0, [r0] ;fi0
pmov mm1, [r0+r4*2] ;fi1
pmov mm2, [r0+r2*2] ;fi3
pmov mm3, [r0+r4*4] ;fi2
pupldq mm0, mm0 ;fi0 | fi0
pupldq mm1, mm1 ;fi1 | fi1
pupldq mm2, mm2 ;fi2 | fi2
pupldq mm3, mm3 ;fi3 | fi3
pxor mm1, mm7 ;fi1 | -fi1
pxor mm3, mm7 ;fi3 | -fi3
pfsub mm0, mm1 ;f1 | f0
pfsub mm2, mm3 ;f3 | f2
pmov mm4, mm0
pfadd mm0, mm2 ;f1+f3|f0+f2 = fi1 | fi0
pfsub mm4, mm2 ;f1-f3|f0-f2 = fi3 | fi2
pmovd [r0], mm0 ;fi[0]
puphdq mm0, mm0
pmovd [r0+r4*4], mm4 ;fi[k2]
puphdq mm4, mm4
pmovd [r0+r4*2], mm4 ;fi[k1]
pmovd [r0+r2*2], mm0 ;fi[k3]
lea r0, [r0+r4*8]
;g
pmov mm0, [r1] ;gi0
pmov mm1, [r1+r4*2] ;gi1
pmov mm2, [r1+r4*4] ;gi2
pmov mm3, [r1+r2*2] ;gi3
pupldq mm1, mm1
pupldq mm0, mm0 ;gi0 | gi0
pupldq mm2, mm3 ;gi3 | gi2
pxor mm1, mm7 ;gi1 | -gi1
pfsub mm0, mm1 ;gi0-gi1|gi0+gi1 = g1 | g0
pfmul mm2, mm6 ;gi3*SQRT2|gi2*SQRT2 = g3 | g2
pmov mm4, mm0
pfadd mm0, mm2 ;g1+g3|g0+g2 = gi1 | gi0
pfsub mm4, mm2 ;g1-g3|g0-g2 = gi3 | gi2
pmovd [r1], mm0 ;gi[0]
puphdq mm0, mm0
pmovd [r1+r4*4], mm4 ;gi[k2]
puphdq mm4, mm4
cmp r0, [esp + 16]
pmovd [r1+r4*2], mm0 ;gi[k1]
pmovd [r1+r2*2], mm4 ;gi[k3]
jb near .do2
pmov mm6, [r3+r5] ; this is not aligned address!!
loopalign 16
.for:
;
; mm6 = c1 | s1
; mm7 = 0x800000000 | 0
;
pmov mm1, mm6
mov r0, [esp+40] ; fz
puphdq mm1, mm1 ; c1 | c1
lea r1, [r0+r4*2]
pfadd mm1, mm1 ; c1+c1 | c1+c1
pfmul mm1, mm6 ; 2*c1*c1 | 2*c1*s1
pfsub mm1, [PIC_EBP_REL(D_1_0_0_0)] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
pmov mm0, mm1
pxor mm7, mm6 ; c1 | -s1
pupldq mm2, mm0
pupldq mm3, mm6 ; ** | c1
puphdq mm0, mm2 ; s2 | c2
puphdq mm6, mm3 ;-s1 | c1
pxor mm0, [PIC_EBP_REL(costab)] ; c2 | -s2
; mm0 = s2| c2
; mm1 = -c2| s2
; mm6 = c1| s1
; mm7 = s1|-c1 (we use the opposite sign. from GOGO here)
pmov [esp], mm0
pmov [esp+8], mm1
sub r1, r5 ;r1 = gi
add r0, r5 ;r0 = fi
loopalign 16
.do3:
pmov mm2, [r0+r4*2] ; fi[k1]
pmov mm4, [r1+r4*2] ; gi[k1]
pmov mm3, [r0+r2*2] ; fi[k3]
pmov mm5, [r1+r2*2] ; gi[k3]
pupldq mm2, mm2 ; fi1 | fi1
pupldq mm4, mm4 ; gi1 | gi1
pupldq mm3, mm3 ; fi3 | fi3
pupldq mm5, mm5 ; gi3 | gi3
pfmul mm2, mm0 ; s2 * fi1 | c2 * fi1
pfmul mm4, mm1 ;-c2 * gi1 | s2 * gi1
pfmul mm3, mm0 ; s2 * fi3 | c2 * fi3
pfmul mm5, mm1 ;-c2 * gi3 | s2 * gi3
pfadd mm2, mm4 ;b | a
pfadd mm3, mm5 ;d | c
pmov mm0, [r0]
pmov mm4, [r1]
pmov mm1, [r0+r4*4]
pmov mm5, [r1+r4*4]
pupldq mm0, mm4 ;gi0 | fi0
pupldq mm1, mm5 ;gi2 | fi2
pmov mm4, mm2
pmov mm5, mm3
pfadd mm2, mm0 ;g0 | f0
pfadd mm3, mm1 ;g2 | f2
pfsub mm0, mm4 ;g1 | f1
pfsub mm1, mm5 ;g3 | f3
pmov mm4, mm3
pmov mm5, mm1
pupldq mm4, mm4 ;f2 | f2
puphdq mm5, mm5 ;g3 | g3
puphdq mm3, mm3 ;g2 | g2
pupldq mm1, mm1 ;f3 | f3
pfmul mm4, mm6 ;f2 * c1 | f2 * s1
pfmul mm5, mm7 ;g3 * s1 | g3 *-c1
pfmul mm3, mm6 ;g2 * c1 | g2 * s1
pfmul mm1, mm7 ;f3 * s1 | f3 *-c1
pfadd mm4, mm5 ;a | b
pfsub mm3, mm1 ;d | c
pmov mm5, mm2
pmov mm1, mm0
pupldq mm2, mm2 ;f0 | f0
pupldq mm0, mm0 ;f1 | f1
puphdq mm1, mm2 ;f0 | g1
puphdq mm5, mm0 ;f1 | g0
pmov mm2, mm4
pmov mm0, mm3
pfadd mm4, mm1 ;fi0 | gi1
pfadd mm3, mm5 ;fi1 | gi0
pfsub mm1, mm2 ;fi2 | gi3
pfsub mm5, mm0 ;fi3 | gi2
pmovd [r1+r4*2], mm4 ;gi[k1]
puphdq mm4, mm4
pmovd [r1], mm3 ;gi[0]
puphdq mm3, mm3
pmovd [r1+r2*2], mm1 ;gi[k3]
puphdq mm1, mm1
pmovd [r1+r4*4], mm5 ;gi[k2]
puphdq mm5, mm5
pmovd [r0], mm4 ;fi[0]
pmovd [r0+r4*2], mm3 ;fi[k1]
pmovd [r0+r4*4], mm1 ;fi[k2]
pmovd [r0+r2*2], mm5 ;fi[k3]
lea r0, [r0+r4*8]
lea r1, [r1+r4*8]
cmp r0, [esp + 16]
pmov mm0, [esp]
pmov mm1, [esp+8]
jb near .do3
add r5, 4
; mm6 = c1| s1
; mm7 = s1|-c1 (we use the opposite sign. from GOGO here)
pfmul mm6, [r3] ; c1*a | s1*a
pfmul mm7, [r3+8] ; s1*b |-c1*b
cmp r5, r4
pfsub mm6, mm7 ; c1*a-s1*b | s1*a+c1*b
pupldq mm7,mm6
puphdq mm6,mm7
pmov mm7, [PIC_EBP_REL(costab)]
jb near .for
mov r0, [esp+40] ;fi
cmp r4, [esp+40+4]
lea r4, [r4*4] ;kx *= 4
jb near .do1
.exitttt
femms
add esp,20
popd ebp, ebx, esi, edi
endproc
;void fht_E3DN(float *fz, int nn);
proc fht_E3DN
pushd ebp, ebx, esi, edi
sub esp, 20
call get_pc.bp
add ebp, PIC_BASE()
mov r0, [esp+40] ;fi
mov r1, [esp+44] ;r1 = nn
lea r3, [PIC_EBP_REL(costab)] ;tri = costab
lea r4, [r0+r1*8] ;r4 = fn = &fz[n]
mov [esp+16], r4
mov r4, 8 ;kx = k1/2
pmov mm7, [r3]
loopalign 16
.do1
lea r3, [r3+16] ;tri += 2;
pmov mm6, [PIC_EBP_REL(costab+8)]
lea r2, [r4+r4*2] ;k3*fsize/2
mov r5, 4 ;i = 1*fsize
loopalign 16
.do2:
lea r1, [r0+r4] ;gi = fi + kx
;f
pmov mm0, [r0] ; X | fi0
pmov mm1, [r0+r4*4] ; X | fi2
pupldq mm0, [r0+r4*2] ;fi1 | fi0
pupldq mm1, [r0+r2*2] ;fi3 | fi2
pfpnacc mm0, mm0 ;fi0+fi1 | fi0-fi1 = f0|f1
pfpnacc mm1, mm1 ;fi2+fi3 | fi2-fi3 = f2|f3
pmov mm2, mm0
pfadd mm0, mm1 ;f0+f2|f1+f3 = fi0 | fi1
pfsub mm2, mm1 ;f0-f2|f1-f3 = fi2 | fi3
pmovd [r0+r4*2], mm0 ;fi[k1]
pmovd [r0+r2*2], mm2 ;fi[k3]
puphdq mm0, mm0
puphdq mm2, mm2
pmovd [r0], mm0 ;fi[0]
pmovd [r0+r4*4], mm2 ;fi[k2]
lea r0, [r0+r4*8]
;g
pmov mm3, [r1] ; gi0
pmov mm4, [r1+r2*2] ; gi3
pupldq mm3, [r1+r4*2] ;gi1|gi0
pupldq mm4, [r1+r4*4] ;gi2|gi3
pfpnacc mm3, mm3 ;gi0+gi1 |gi0-gi1 = f0|f1
pfmul mm4, mm6 ;gi2*SQRT2|gi3*SQRT2 = f2|f3
pmov mm5, mm3
pfadd mm3, mm4 ;f0+f2|f1+f3
pfsub mm5, mm4 ;f0-f2|f1-f3
cmp r0, [esp + 16]
pmovd [r1+r4*2], mm3 ;gi[k1]
pmovd [r1+r2*2], mm5 ;gi[k3]
puphdq mm3, mm3
puphdq mm5, mm5
pmovd [r1], mm3 ;gi[0]
pmovd [r1+r4*4], mm5 ;gi[k2]
jb near .do2
pmov mm6, [r3+r5] ; this is not aligned address!!
loopalign 16
.for:
;
; mm6 = c1 | s1
; mm7 = 0x800000000 | 0
;
pmov mm5, mm6
mov r0, [esp+40] ; fz
puphdq mm5, mm5 ; c1 | c1
lea r1, [r0+r4*2]
pfadd mm5, mm5 ; c1+c1 | c1+c1
pfmul mm5, mm6 ; 2*c1*c1 | 2*c1*s1
pfsub mm5, [PIC_EBP_REL(D_1_0_0_0)] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
pswapd mm4, mm5 ; s2 |-c2
pxor mm4, mm7 ; s2 | c2
pxor mm7, mm6 ; c1 |-s1
pswapd mm6, mm6 ; s1 | c1
; mm4 = s2| c2
; mm5 = -c2| s2
; mm6 = c1| s1
; mm7 = s1|-c1 (we use the opposite sign. from GOGO here)
pmov [esp], mm4
pmov [esp+8], mm5
sub r1, r5 ;r1 = gi
add r0, r5 ;r0 = fi
loopalign 16
.do3:
pmov mm0, [r0+r2*2] ; fi[k1]
pmov mm2, [r1+r2*2] ; gi[k1]
pmov mm1, [r0+r4*2] ; fi[k3]
pmov mm3, [r1+r4*2] ; gi[k3]
pupldq mm0, mm0
pupldq mm2, mm2
pupldq mm1, mm1
pupldq mm3, mm3
pfmul mm0, mm4
pfmul mm2, mm5
pfmul mm1, mm4
pfmul mm3, mm5
pfadd mm0, mm2 ;d | c
pfadd mm1, mm3 ;b | a
pmov mm2, [r0+r4*4] ;fi2
pupldq mm3, [r1+r4*4] ;gi2 | -
pmov mm4, [r0] ;fi0
pupldq mm5, [r1] ;gi0 | -
pupldq mm2, mm0 ;c | fi2
puphdq mm3, mm0 ;d | gi2
pupldq mm4, mm1 ;a | fi0
puphdq mm5, mm1 ;b | gi0
pfpnacc mm2, mm2 ;f2 | f3
pfpnacc mm3, mm3 ;g2 | g3
pfpnacc mm4, mm4 ;f0 | f1
pfpnacc mm5, mm5 ;g0 | g1
pmov mm0, mm2
pmov mm1, mm3
pupldq mm2, mm2 ;f3 | f3
pupldq mm3, mm3 ;g3 | g3
puphdq mm0, mm0 ;f2 | f2
puphdq mm1, mm1 ;g2 | g2
pswapd mm4, mm4 ;f1 | f0
pswapd mm5, mm5 ;g1 | g0
pfmul mm0, mm7 ;f2 * s1 | f2 *-c1
pfmul mm3, mm6 ;g3 * c1 | g3 * s1
pfmul mm1, mm6 ;g2 * c1 | g2 * s1
pfmul mm2, mm7 ;f3 * s1 | f3 *-c1
pfsub mm0, mm3 ; b |-a
pfsub mm1, mm2 ; d | c
pmov mm2, mm5
pmov mm3, mm4
pupldq mm4, mm0 ;-a | f0
pupldq mm5, mm1 ; c | g0
puphdq mm2, mm0 ; b | g1
puphdq mm3, mm1 ; d | f1
pfpnacc mm4, mm4 ;fi2 | fi0
pfpnacc mm5, mm5 ;gi0 | gi2
pfpnacc mm2, mm2 ;gi1 | gi3
pfpnacc mm3, mm3 ;fi1 | fi3
pmovd [r0], mm4 ;fi[0]
pmovd [r1+r4*4], mm5 ;gi[k2]
pmovd [r1+r2*2], mm2 ;gi[k3]
pmovd [r0+r2*2], mm3 ;fi[k3]
puphdq mm4, mm4
puphdq mm5, mm5
puphdq mm2, mm2
puphdq mm3, mm3
pmovd [r0+r4*4], mm4 ;fi[k2]
pmovd [r1], mm5 ;gi[0]
pmovd [r1+r4*2], mm2 ;gi[k1]
pmovd [r0+r4*2], mm3 ;fi[k1]
lea r0, [r0+r4*8]
lea r1, [r1+r4*8]
cmp r0, [esp + 16]
pmov mm4, [esp]
pmov mm5, [esp+8]
jb near .do3
add r5, 4
; mm6 = c1| s1
; mm7 = s1|-c1 (we use the opposite sign. from GOGO here)
pfmul mm6, [r3] ; c1*a | s1*a
pfmul mm7, [r3+8] ; s1*b |-c1*b
cmp r5, r4
pfsub mm6, mm7 ; c1*a-s1*b | s1*a+c1*b
pswapd mm6, mm6 ; ??? ; s1*a+c1*b | c1*a-s1*b
pmov mm7, [PIC_EBP_REL(costab)]
jb near .for
mov r0, [esp+40] ;fi
cmp r4, [esp+40+4]
lea r4, [r4*4] ;kx *= 4
jb near .do1
.exitttt
femms
add esp,20
popd ebp, ebx, esi, edi
endproc

View File

@@ -0,0 +1,619 @@
; back port from GOGO-no coda 2.24b by Takehiro TOMINAGA
; GOGO-no-coda
; Copyright (C) 1999 shigeo
; special thanks to URURI
%include "nasm.h"
externdef costab_fft
externdef sintab_fft
segment_data
align 32
D_1_41421 dd 1.41421356
D_1_0 dd 1.0
D_0_5 dd 0.5
D_0_25 dd 0.25
D_0_0005 dd 0.0005
D_0_0 dd 0.0
segment_code
;void fht(float *fz, int n);
proc fht_FPU
%$fz arg 4
%$n arg 4
%$k local 4
%$f0 local 4
%$f1 local 4
%$f2 local 4
%$f3 local 4
%$g0 local 4
%$g1 local 4
%$g2 local 4
%$g3 local 4
%$s1 local 4
%$c1 local 4
%$s2 local 4
%$c2 local 4
%$t_s local 4
%$t_c local 4
alloc
pushd ebp, ebx, esi, edi
fht_FPU_1st_part:
fht_FPU_2nd_part:
fht_FPU_3rd_part:
.do_init:
mov r3, 16 ;k1*fsize = 4*fsize = k4
mov r4, 8 ;kx = k1/2
mov r2, 48 ;k3*fsize
mov dword [sp(%$k)], 2 ;k = 2
mov r0, [sp(%$fz)] ;fi
lea r1, [r0+8] ;gi = fi + kx
.do:
.do2:
;f
fld dword [r0]
fsub dword [r0+r3]
fld dword [r0]
fadd dword [r0+r3]
fld dword [r0+r3*2]
fsub dword [r0+r2]
fld dword [r0+r3*2]
fadd dword [r0+r2] ;f2 f3 f0 f1
fld st2 ;f0 f2 f3 f0 f1
fadd st0, st1
fstp dword [r0] ;fi[0]
fld st3 ;f1 f2 f3 f0 f1
fadd st0, st2
fstp dword [r0+r3] ;fi[k1]
fsubr st0, st2 ;f0-f2 f3 f0 f1
fstp dword [r0+r3*2] ;fi[k2]
fsubr st0, st2 ;f1-f3 f0 f1
fstp dword [r0+r2] ;fi[k3]
fcompp
;g
fld dword [r1]
fsub dword [r1+r3]
fld dword [r1]
fadd dword [r1+r3]
fld dword [D_1_41421]
fmul dword [r1+r2]
fld dword [D_1_41421]
fmul dword [r1+r3*2] ;g2 g3 g0 g1
fld st2 ;g0 g2 g3 g0 g1
fadd st0, st1
fstp dword [r1] ;gi[0]
fld st3 ;g1 g2 g3 g0 g1
fadd st0, st2
fstp dword [r1+r3] ;gi[k1]
fsubr st0, st2 ;g0-g2 g3 g0 g1
fstp dword [r1+r3*2] ;gi[k2]
fsubr st0, st2 ;g1-g3 g0 g1
fstp dword [r1+r2] ;gi[k3]
fcompp
lea r0, [r0+r3*4]
lea r1, [r1+r3*4]
cmp r0, r6
jb .do2
mov r0, [sp(%$k)]
fld dword [costab_fft +r0*4]
fstp dword [sp(%$t_c)]
fld dword [sintab_fft +r0*4]
fstp dword [sp(%$t_s)]
fld dword [D_1_0]
fstp dword [sp(%$c1)]
fld dword [D_0_0]
fstp dword [sp(%$s1)]
.for_init:
mov r5, 4 ;i = 1*fsize
.for:
fld dword [sp(%$c1)]
fmul dword [sp(%$t_c)]
fld dword [sp(%$s1)]
fmul dword [sp(%$t_s)]
fsubp st1, st0 ;c1
fld dword [sp(%$c1)]
fmul dword [sp(%$t_s)]
fld dword [sp(%$s1)]
fmul dword [sp(%$t_c)]
faddp st1, st0 ;s1 c1
fld st1
fmul st0, st0 ;c1c1 s1 c1
fld st1
fmul st0, st0 ;s1s1 c1c1 s1 c1
fsubp st1, st0 ;c2 s1 c1
fstp dword [sp(%$c2)] ;s1 c1
fld st1 ;c1 s1 c1
fmul st0, st1 ;c1s1 s1 c1
fadd st0, st0 ;s2 s1 c1
fstp dword [sp(%$s2)] ;s1 c1
fstp dword [sp(%$s1)] ;c1
fstp dword [sp(%$c1)] ;
mov r0, [sp(%$fz)]
add r0, r5 ;r0 = fi
mov r1, [sp(%$fz)]
add r1, r3
sub r1, r5 ;r1 = gi
.do3:
fld dword [sp(%$s2)]
fmul dword [r0+r3]
fld dword [sp(%$c2)]
fmul dword [r1+r3]
fsubp st1, st0 ;b = s2*fi[k1] - c2*gi[k1]
fld dword [sp(%$c2)]
fmul dword [r0+r3]
fld dword [sp(%$s2)]
fmul dword [r1+r3]
faddp st1, st0 ;a = c2*fi[k1] + s2*gi[k1] b
fld dword [r0]
fsub st0, st1 ;f1 a b
fstp dword [sp(%$f1)] ;a b
fadd dword [r0] ;f0 b
fstp dword [sp(%$f0)] ;b
fld dword [r1]
fsub st0, st1 ;g1 b
fstp dword [sp(%$g1)] ;b
fadd dword [r1] ;g0
fstp dword [sp(%$g0)] ;
fld dword [sp(%$s2)]
fmul dword [r0+r2]
fld dword [sp(%$c2)]
fmul dword [r1+r2]
fsubp st1, st0 ;b = s2*fi[k3] - c2*gi[k3]
fld dword [sp(%$c2)]
fmul dword [r0+r2]
fld dword [sp(%$s2)]
fmul dword [r1+r2]
faddp st1, st0 ;a = c2*fi[k3] + s2*gi[k3] b
fld dword [r0+r3*2]
fsub st0, st1 ;f3 a b
fstp dword [sp(%$f3)] ;a b
fadd dword [r0+r3*2] ;f2 b
fstp dword [sp(%$f2)] ;b
fld dword [r1+r3*2]
fsub st0, st1 ;g3 b
fstp dword [sp(%$g3)] ;b
fadd dword [r1+r3*2] ;g2
fstp dword [sp(%$g2)] ;
fld dword [sp(%$s1)]
fmul dword [sp(%$f2)]
fld dword [sp(%$c1)]
fmul dword [sp(%$g3)]
fsubp st1, st0 ;b = s1*f2 - c1*g3
fld dword [sp(%$c1)]
fmul dword [sp(%$f2)]
fld dword [sp(%$s1)]
fmul dword [sp(%$g3)]
faddp st1, st0 ;a = c1*f2 + s1*g3 b
fld dword [sp(%$f0)]
fsub st0, st1 ;fi[k2] a b
fstp dword [r0+r3*2]
fadd dword [sp(%$f0)] ;fi[0] b
fstp dword [r0]
fld dword [sp(%$g1)]
fsub st0, st1 ;gi[k3] b
fstp dword [r1+r2]
fadd dword [sp(%$g1)] ;gi[k1]
fstp dword [r1+r3]
fld dword [sp(%$c1)]
fmul dword [sp(%$g2)]
fld dword [sp(%$s1)]
fmul dword [sp(%$f3)]
fsubp st1, st0 ;b = c1*g2 - s1*f3
fld dword [sp(%$s1)]
fmul dword [sp(%$g2)]
fld dword [sp(%$c1)]
fmul dword [sp(%$f3)]
faddp st1, st0 ;a = s1*g2 + c1*f3 b
fld dword [sp(%$g0)]
fsub st0, st1 ;gi[k2] a b
fstp dword [r1+r3*2]
fadd dword [sp(%$g0)] ;gi[0] b
fstp dword [r1]
fld dword [sp(%$f1)]
fsub st0, st1 ;fi[k3] b
fstp dword [r0+r2]
fadd dword [sp(%$f1)] ;fi[k1]
fstp dword [r0+r3]
lea r0, [r0+r3*4]
lea r1, [r1+r3*4]
cmp r0, r6
jb near .do3
add r5, 4
cmp r5, r4
jb near .for
cmp r3, [sp(%$n)]
jae .exit
add dword [sp(%$k)], 2 ;k += 2;
lea r3, [r3*4] ;k1 *= 4
lea r2, [r2*4] ;k3 *= 4
lea r4, [r4*4] ;kx *= 4
mov r0, [sp(%$fz)] ;fi
lea r1, [r0+r4] ;gi = fi + kx
jmp .do
.exit:
popd ebp, ebx, esi, edi
endproc
;*************************************************************
;void fht_FPU_FXCH(float *fz, int n);
proc fht_FPU_FXCH
%$fz arg 4
%$n arg 4
%$k local 4
%$f0 local 4
%$f1 local 4
%$f2 local 4
%$f3 local 4
%$g0 local 4
%$g1 local 4
%$g2 local 4
%$g3 local 4
%$s1 local 4
%$c1 local 4
%$s2 local 4
%$c2 local 4
%$t_s local 4
%$t_c local 4
alloc
pushd ebp, ebx, esi, edi
fht_FPU_FXCH_1st_part:
fht_FPU_FXCH_2nd_part:
fht_FPU_FXCH_3rd_part:
.do_init:
mov r3, 16 ;k1*fsize = 4*fsize = k4
mov r4, 8 ;kx = k1/2
mov r2, 48 ;k3*fsize
mov dword [sp(%$k)], 2 ;k = 2
mov r0, [sp(%$fz)] ;fi
lea r1, [r0+8] ;gi = fi + kx
.do:
.do2:
;f
fld dword [r0]
fsub dword [r0+r3]
fld dword [r0]
fadd dword [r0+r3]
fld dword [r0+r3*2]
fsub dword [r0+r2]
fld dword [r0+r3*2]
fadd dword [r0+r2] ;f2 f3 f0 f1
fld st3
fld st3
fxch st5
fadd st0, st3
fxch st4
fadd st0, st2
fxch st3
fsubp st1, st0
fxch st1
fsubp st4, st0
fxch st2
fstp dword [r0+r3] ;fi[k1]
fstp dword [r0] ;fi[0]
fstp dword [r0+r2] ;fi[k3]
fstp dword [r0+r3*2] ;fi[k2]
;g
fld dword [r1]
fsub dword [r1+r3]
fld dword [r1]
fadd dword [r1+r3]
fld dword [D_1_41421]
fmul dword [r1+r2]
fld dword [D_1_41421]
fmul dword [r1+r3*2] ;g2 g3 g0 g1
fld st3
fld st3
fxch st5
fadd st0, st3
fxch st4
fadd st0, st2
fxch st3
fsubp st1, st0
fxch st1
fsubp st4, st0
fxch st2
fstp dword [r1+r3] ;gi[k1]
fstp dword [r1] ;gi[0]
fstp dword [r1+r2] ;gi[k3]
fstp dword [r1+r3*2] ;gi[k2]
lea r0, [r0+r3*4]
lea r1, [r1+r3*4]
cmp r0, r6
jb .do2
mov r0, [sp(%$k)]
fld dword [costab_fft +r0*4]
fld dword [sintab_fft +r0*4]
fld dword [D_1_0]
fld dword [D_0_0]
fxch st3
fstp dword [sp(%$t_c)]
fxch st1
fstp dword [sp(%$t_s)]
fstp dword [sp(%$c1)]
fstp dword [sp(%$s1)]
.for_init:
mov r5, 4 ;i = 1*fsize
.for:
fld dword [sp(%$c1)]
fmul dword [sp(%$t_c)]
fld dword [sp(%$s1)]
fmul dword [sp(%$t_s)]
fld dword [sp(%$c1)]
fmul dword [sp(%$t_s)]
fld dword [sp(%$s1)]
fmul dword [sp(%$t_c)]
fxch st2
fsubp st3, st0 ;c1
faddp st1, st0 ;s1 c1
fld st1
fxch st2
fmul st0, st0 ;c1c1 s1 c1
fld st1
fxch st2
fmul st0, st0 ;s1s1 c1c1 s1 c1
fxch st3
fst dword [sp(%$c1)] ;c1
fxch st2
fst dword [sp(%$s1)] ;s1 c1c1 c1 s1s1
fmulp st2, st0
fsubrp st2, st0
fadd st0, st0 ;s2 c2
fxch st1
fstp dword [sp(%$c2)]
fstp dword [sp(%$s2)]
mov r0, [sp(%$fz)]
mov r1, [sp(%$fz)]
add r0, r5 ;r0 = fi
add r1, r3
sub r1, r5 ;r1 = gi
.do3:
fld dword [sp(%$s2)]
fmul dword [r0+r3]
fld dword [sp(%$c2)]
fmul dword [r1+r3]
fld dword [sp(%$c2)]
fmul dword [r0+r3]
fld dword [sp(%$s2)]
fmul dword [r1+r3]
fxch st2
fsubp st3, st0 ;b = s2*fi[k1] - c2*gi[k1]
faddp st1, st0 ;a = c2*fi[k1] + s2*gi[k1] b
fld dword [r1]
fsub st0, st2 ;g1 a b
fxch st2
fadd dword [r1] ;g0 a g1
fld dword [r0]
fsub st0, st2 ;f1 g0 a g1
fxch st2
fadd dword [r0] ;f0 g0 f1 g1
fxch st3
fstp dword [sp(%$g1)]
fstp dword [sp(%$g0)]
fstp dword [sp(%$f1)]
fstp dword [sp(%$f0)]
fld dword [sp(%$s2)]
fmul dword [r0+r2]
fld dword [sp(%$c2)]
fmul dword [r1+r2]
fld dword [sp(%$c2)]
fmul dword [r0+r2]
fld dword [sp(%$s2)]
fmul dword [r1+r2]
fxch st2
fsubp st3, st0 ;b = s2*fi[k3] - c2*gi[k3]
faddp st1, st0 ;a = c2*fi[k3] + s2*gi[k3] b
fld dword [r1+r3*2]
fsub st0, st2 ;g3 a b
fxch st2
fadd dword [r1+r3*2] ;g2 a g3
fld dword [r0+r3*2]
fsub st0, st2 ;f3 g2 a g3
fxch st2
fadd dword [r0+r3*2] ;f2 g2 f3 g3
fxch st3
fstp dword [sp(%$g3)]
fstp dword [sp(%$g2)]
fstp dword [sp(%$f3)]
fstp dword [sp(%$f2)]
fld dword [sp(%$s1)]
fmul dword [sp(%$f2)]
fld dword [sp(%$c1)]
fmul dword [sp(%$g3)]
fld dword [sp(%$c1)]
fmul dword [sp(%$f2)]
fld dword [sp(%$s1)]
fmul dword [sp(%$g3)]
fxch st2
fsubp st3, st0 ;b = s1*f2 - c1*g3
faddp st1, st0 ;a = c1*f2 + s1*g3 b
fld dword [sp(%$g1)]
fsub st0, st2 ;gi[k3] a b
fxch st2
fadd dword [sp(%$g1)] ;gi[k1] a gi[k3]
fld dword [sp(%$f0)]
fsub st0, st2 ;fi[k2] gi[k1] a gi[k3]
fxch st2
fadd dword [sp(%$f0)] ;fi[0] gi[k1] fi[k2] gi[k3]
fxch st3
fstp dword [r1+r2]
fstp dword [r1+r3]
fstp dword [r0+r3*2]
fstp dword [r0]
fld dword [sp(%$c1)]
fmul dword [sp(%$g2)]
fld dword [sp(%$s1)]
fmul dword [sp(%$f3)]
fld dword [sp(%$s1)]
fmul dword [sp(%$g2)]
fld dword [sp(%$c1)]
fmul dword [sp(%$f3)]
fxch st2
fsubp st3, st0 ;b = c1*g2 - s1*f3
faddp st1, st0 ;a = s1*g2 + c1*f3 b
fld dword [sp(%$f1)]
fsub st0, st2 ;fi[k3] a b
fxch st2
fadd dword [sp(%$f1)] ;fi[k1] a fi[k3]
fld dword [sp(%$g0)]
fsub st0, st2 ;gi[k2] fi[k1] a fi[k3]
fxch st2
fadd dword [sp(%$g0)] ;gi[0] fi[k1] gi[k2] fi[k3]
fxch st3
fstp dword [r0+r2]
fstp dword [r0+r3]
fstp dword [r1+r3*2]
fstp dword [r1]
lea r0, [r0+r3*4]
lea r1, [r1+r3*4]
cmp r0, r6
jb near .do3
add r5, 4
cmp r5, r4
jb near .for
cmp r3, [sp(%$n)]
jae .exit
add dword [sp(%$k)], 2 ;k += 2;
lea r3, [r3*4] ;k1 *= 4
lea r2, [r2*4] ;k3 *= 4
lea r4, [r4*4] ;kx *= 4
mov r0, [sp(%$fz)] ;fi
lea r1, [r0+r4] ;gi = fi + kx
jmp .do
.exit:
popd ebp, ebx, esi, edi
endproc
end

View File

@@ -0,0 +1,422 @@
; back port from GOGO-no coda 2.24b by Takehiro TOMINAGA
; GOGO-no-coda
; Copyright (C) 1999 shigeo
; special thanks to Keiichi SAKAI
%include "nasm.h"
globaldef fht_SSE
segment_data
align 16
Q_MMPP dd 0x0,0x0,0x80000000,0x80000000
Q_MPMP dd 0x0,0x80000000,0x0,0x80000000
D_1100 dd 0.0, 0.0, 1.0, 1.0
costab_fft:
dd 9.238795325112867e-01
dd 3.826834323650898e-01
dd 9.951847266721969e-01
dd 9.801714032956060e-02
dd 9.996988186962042e-01
dd 2.454122852291229e-02
dd 9.999811752836011e-01
dd 6.135884649154475e-03
S_SQRT2 dd 1.414213562
segment_code
PIC_OFFSETTABLE
;------------------------------------------------------------------------
; by K. SAKAI
; 99/08/18 PIII 23k[clk]
; 99/08/19 ̿<><CCBF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><ECB4B9> PIII 22k[clk]
; 99/08/20 bit reversal <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E5A4AB><EFBFBD>ܿ<EFBFBD><DCBF><EFBFBD><EFBFBD><EFBFBD> PIII 17k[clk]
; 99/08/23 <09><><EFBFBD><EFBFBD> unroll PIII 14k[clk]
; 99/11/12 clean up
;
;void fht_SSE(float *fz, int n);
align 16
fht_SSE:
push ebx
push esi
push edi
push ebp
%assign _P 4*5
;2<><32><EFBFBD>ܤΥ<CEA5><EBA1BC>
mov eax,[esp+_P+0] ;eax=fz
mov ebp,[esp+_P+4] ;=n
shl ebp,3
add ebp,eax ; fn = fz + n, <20><><EFBFBD>δؿ<CEB4><D8BF><EFBFBD>λ<EFBFBD>ޤ<EFBFBD><DEA4><EFBFBD><EFBFBD><EFBFBD>
push ebp
call get_pc.bp
add ebp, PIC_BASE()
lea ecx,[PIC_EBP_REL(costab_fft)]
xor eax,eax
mov al,8 ; =k1=1*(sizeof float) // 4, 16, 64, 256,...
.lp2: ; do{
mov esi,[esp+_P+4] ; esi=fi=fz
lea edx,[eax+eax*2]
mov ebx, esi
; <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>2<EFBFBD><32><EFBFBD>󤷤<EFBFBD><F3A4B7A4><EFBFBD><EFBFBD>ԤǤ<D4A4><C7A4>ʤ<EFBFBD><CAA4><EFBFBD>ʬ<EFBFBD><CAAC>FPU<50>Τۤ<CEA4><DBA4><EFBFBD>®<EFBFBD><C2AE><EFBFBD><EFBFBD>
loopalign 16
.lp20: ; do{
; f0 = fi[0 ] + fi[k1];
; f2 = fi[k2] + fi[k3];
; f1 = fi[0 ] - fi[k1];
; f3 = fi[k2] - fi[k3];
; fi[0 ] = f0 + f2;
; fi[k1] = f1 + f3;
; fi[k2] = f0 - f2;
; fi[k3] = f1 - f3;
lea edi,[ebx+eax] ; edi=gi=fi+ki/2
fld dword [ebx]
fadd dword [ebx+eax*2]
fld dword [ebx+eax*4]
fadd dword [ebx+edx*2]
fld dword [ebx]
fsub dword [ebx+eax*2]
fld dword [ebx+eax*4]
fsub dword [ebx+edx*2]
fld st1
fadd st0,st1
fstp dword [ebx+eax*2]
fsubp st1,st0
fstp dword [ebx+edx*2]
fld st1
fadd st0,st1
fstp dword [ebx]
fsubp st1,st0
fstp dword [ebx+eax*4]
lea ebx,[ebx + eax*8] ; = fi += (k1 * 4);
; g0 = gi[0 ] + gi[k1];
; g2 = SQRT2 * gi[k2];
; g1 = gi[0 ] - gi[k1];
; g3 = SQRT2 * gi[k3];
; gi[0 ] = g0 + g2;
; gi[k2] = g0 - g2;
; gi[k1] = g1 + g3;
; gi[k3] = g1 - g3;
fld dword [edi]
fadd dword [edi+eax*2]
fld dword [PIC_EBP_REL(S_SQRT2)]
fmul dword [edi+eax*4]
fld dword [edi]
fsub dword [edi+eax*2]
fld dword [PIC_EBP_REL(S_SQRT2)]
fmul dword [edi+edx*2]
fld st1
fadd st0,st1
fstp dword [edi+eax*2]
fsubp st1,st0
fstp dword [edi+edx*2]
fld st1
fadd st0,st1
fstp dword [edi]
fsubp st1,st0
fstp dword [edi+eax*4]
cmp ebx,[esp]
jl near .lp20 ; while (fi<fn);
; i = 1; //for (i=1;i<kx;i++){
; c1 = 1.0*t_c - 0.0*t_s;
; s1 = 0.0*t_c + 1.0*t_s;
movlps xmm6,[ecx] ; = { --, --, s1, c1}
movaps xmm7,xmm6
shufps xmm6,xmm6,R4(0,1,1,0) ; = {+c1, +s1, +s1, +c1} -> ɬ<><C9AC>
; c2 = c1*c1 - s1*s1 = 1 - (2*s1)*s1;
; s2 = c1*s1 + s1*c1 = 2*s1*c1;
shufps xmm7,xmm7,R4(1,0,0,1)
movss xmm5,xmm7 ; = { --, --, --, s1}
xorps xmm7,[PIC_EBP_REL(Q_MMPP)] ; = {-s1, -c1, +c1, +s1} -> ɬ<><C9AC>
addss xmm5,xmm5 ; = (--, --, --, 2*s1)
add esi,4 ; esi = fi = fz + i
shufps xmm5,xmm5,R4(0,0,0,0) ; = (2*s1, 2*s1, 2*s1, 2*s1)
mulps xmm5,xmm6 ; = (2*s1*c1, 2*s1*s1, 2*s1*s1, 2*s1*c1)
subps xmm5,[PIC_EBP_REL(D_1100)] ; = (--, 2*s1*s1-1, --, 2*s1*c1) = {-- -c2 -- s2}
movaps xmm4,xmm5
shufps xmm5,xmm5,R4(2,0,2,0) ; = {-c2, s2, -c2, s2} -> ɬ<><C9AC>
xorps xmm4,[PIC_EBP_REL(Q_MMPP)] ; = {--, c2, --, s2}
shufps xmm4,xmm4,R4(0,2,0,2) ; = {s2, c2, s2, c2} -> ɬ<><C9AC>
loopalign 16
.lp21: ; do{
; a = c2*fi[k1] + s2*gi[k1];
; b = s2*fi[k1] - c2*gi[k1];
; c = c2*fi[k3] + s2*gi[k3];
; d = s2*fi[k3] - c2*gi[k3];
; f0 = fi[0 ] + a;
; g0 = gi[0 ] + b;
; f2 = fi[k1 * 2] + c;
; g2 = gi[k1 * 2] + d;
; f1 = fi[0 ] - a;
; g1 = gi[0 ] - b;
; f3 = fi[k1 * 2] - c;
; g3 = gi[k1 * 2] - d;
lea edi,[esi + eax*2 - 8] ; edi = gi = fz +k1-i
movss xmm0,[esi + eax*2] ; = fi[k1]
movss xmm2,[esi + edx*2] ; = fi[k3]
shufps xmm0,xmm2,0x00 ; = {fi[k3], fi[k3], fi[k1], fi[k1]}
movss xmm1,[edi + eax*2] ; = fi[k1]
movss xmm3,[edi + edx*2] ; = fi[k3]
shufps xmm1,xmm3,0x00 ; = {gi[k3], gi[k3], gi[k1], gi[k1]}
movss xmm2,[esi] ; = fi[0]
mulps xmm0,xmm4 ; *= {+s2, +c2, +s2, +c2}
movss xmm3,[esi + eax*4] ; = fi[k2]
unpcklps xmm2,xmm3 ; = {--, --, fi[k2], fi[0]}
mulps xmm1,xmm5 ; *= {-c2, +s2, -c2, +s2}
movss xmm3,[edi + eax*4] ; = gi[k2]
addps xmm0,xmm1 ; = {d, c, b, a}
movss xmm1,[edi] ; = gi[0]
unpcklps xmm1,xmm3 ; = {--, --, gi[k2], gi[0]}
unpcklps xmm2,xmm1 ; = {gi[k2], fi[k2], gi[0], fi[0]}
movaps xmm1,xmm2
addps xmm1,xmm0 ; = {g2, f2, g0, f0}
subps xmm2,xmm0 ; = {g3, f3, g1, f1}
; a = c1*f2 + s1*g3;
; c = s1*g2 + c1*f3;
; b = s1*f2 - c1*g3;
; d = c1*g2 - s1*f3;
; fi[0 ] = f0 + a;
; gi[0 ] = g0 + c;
; gi[k1] = g1 + b;
; fi[k1] = f1 + d;
; fi[k1 * 2] = f0 - a;
; gi[k1 * 2] = g0 - c;
; gi[k3] = g1 - b;
; fi[k3] = f1 - d;
movaps xmm3,xmm1
movhlps xmm1,xmm1 ; = {g2, f2, g2, f2}
shufps xmm3,xmm2,0x14 ; = {f1, g1, g0, f0}
mulps xmm1,xmm6 ; *= {+c1, +s1, +s1, +c1}
shufps xmm2,xmm2,0xBB ; = {f3, g3, f3, g3}
mulps xmm2,xmm7 ; *= {-s1, -c1, +c1, +s1}
addps xmm1,xmm2 ; = {d, b, c, a}
movaps xmm2,xmm3
addps xmm3,xmm1 ; = {fi[k1], gi[k1], gi[0], fi[0]}
subps xmm2,xmm1 ; = {fi[k3], gi[k3], gi[k1*2], fi[k1*2]}
movhlps xmm0,xmm3
movss [esi],xmm3
shufps xmm3,xmm3,0x55
movss [edi+eax*2],xmm0
shufps xmm0,xmm0,0x55
movss [edi],xmm3
movss [esi+eax*2],xmm0
movhlps xmm0,xmm2
movss [esi+eax*4],xmm2
shufps xmm2,xmm2,0x55
movss [edi+edx*2],xmm0
shufps xmm0,xmm0,0x55
movss [edi+eax*4],xmm2
movss [esi+edx*2],xmm0
lea esi,[esi + eax*8] ; fi += (k1 * 4);
cmp esi,[esp]
jl near .lp21 ; while (fi<fn);
; unroll<6C><6C><EFBFBD><EFBFBD>do loop<6F><70>43+4̿<34><CCBF>
; <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǤϤʤ<CFA4>for<6F><EFBFBD>פ<EFBFBD>i=2<><32><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>unrolling<6E><67><EFBFBD><EFBFBD>
; kx= 2, 8, 32, 128
; k4= 16, 64, 256, 1024
; 0, 6/2,30/2,126/2
xor ebx,ebx
mov bl, 4*2 ; = i = 4
cmp ebx,eax ; i < k1
jnl near .F22
; for (i=2;i<kx;i+=2){
loopalign 16
.lp22:
; at here, xmm6 is {c3, s3, s3, c3}
; c1 = c3*t_c - s3*t_s;
; s1 = c3*t_s + s3*t_c;
movlps xmm0,[ecx]
shufps xmm0,xmm0,R4(1,1,0,0) ; = {t_s, t_s, t_c, t_c}
mulps xmm6,xmm0 ; = {c3*ts, s3*ts, s3*tc, c3*tc}
movhlps xmm4,xmm6 ; = {--, --, c3*ts, s3*ts}
xorps xmm4,[PIC_EBP_REL(Q_MPMP)] ; = {--, --, -c3*ts, s3*ts}
subps xmm6,xmm4 ; = {-,-, c3*ts+s3*tc, c3*tc-s3*ts}={-,-,s1,c1}
; c3 = c1*t_c - s1*t_s;
; s3 = s1*t_c + c1*t_s;
shufps xmm6,xmm6,0x14 ; = {c1, s1, s1, c1}
mulps xmm0,xmm6 ; = {ts*c1 ts*s1 tc*s1 tc*c1}
movhlps xmm3,xmm0
xorps xmm3,[PIC_EBP_REL(Q_MPMP)]
subps xmm0,xmm3 ; = {--, --, s3, c3}
; {s2 s4 c4 c2} = {2*s1*c1 2*s3*c3 1-2*s3*s3 1-2*s1*s1}
unpcklps xmm6,xmm0 ; xmm6 = {s3, s1, c3, c1}
movaps xmm7, xmm6
shufps xmm6,xmm6,R4(2,3,1,0) ; xmm6 = {s1, s3, c3, c1}
addps xmm7, xmm7 ; {s3*2, s1*2, --, --}
mov edi,[esp+_P+4] ; = fz
shufps xmm7, xmm7, R4(2,3,3,2) ; {s1*2, s3*2, s3*2, s1*2}
sub edi,ebx ; edi = fz - i/2
mulps xmm7, xmm6 ; {s1*s1*2, s3*s3*2, s3*c3*2, s1*c1*2}
lea esi,[edi + ebx*2] ; esi = fi = fz +i/2
subps xmm7, [PIC_EBP_REL(D_1100)] ; {-c2, -c4, s4, s2}
lea edi,[edi + eax*2-4] ; edi = gi = fz +k1-i/2
; fi = fz +i;
; gi = fz +k1-i;
; do{
.lp220:
; unroll<6C><6C><EFBFBD><EFBFBD>do loop<6F><70>51+4̿<34><CCBF>
; a = c2*fi[k1 ] + s2*gi[k1 ];
; e = c4*fi[k1+1] + s4*gi[k1-1];
; f = s4*fi[k1+1] - c4*gi[k1-1];
; b = s2*fi[k1 ] - c2*gi[k1 ];
; c = c2*fi[k3 ] + s2*gi[k3 ];
; g = c4*fi[k3+1] + s4*gi[k3-1];
; h = s4*fi[k3+1] - c4*gi[k3-1];
; d = s2*fi[k3 ] - c2*gi[k3 ];
movaps xmm4,xmm7 ; = {-c2 -c4 s4 s2}
xorps xmm4,[PIC_EBP_REL(Q_MMPP)] ; = { c2 c4 s4 s2}
shufps xmm4,xmm4,0x1B ; = { s2 s4 c4 c2}
movlps xmm0,[esi+eax*2]
movlps xmm1,[edi+eax*2]
movlps xmm2,[esi+edx*2]
movlps xmm3,[edi+edx*2]
shufps xmm0,xmm0,0x14
shufps xmm1,xmm1,0x41
shufps xmm2,xmm2,0x14
shufps xmm3,xmm3,0x41
mulps xmm0,xmm4
mulps xmm1,xmm7
mulps xmm2,xmm4
mulps xmm3,xmm7
addps xmm0,xmm1 ; xmm0 = {b, f, e, a}
addps xmm2,xmm3 ; xmm2 = {d, h, g, c}
;17
; f0 = fi[0 ] + a;
; f4 = fi[0 +1] + e;
; g4 = gi[0 -1] + f;
; g0 = gi[0 ] + b;
; f1 = fi[0 ] - a;
; f5 = fi[0 +1] - e;
; g5 = gi[0 -1] - f;
; g1 = gi[0 ] - b;
; f2 = fi[k2 ] + c;
; f6 = fi[k2+1] + g;
; g6 = gi[k2-1] + h;
; g2 = gi[k2 ] + d;
; f3 = fi[k2 ] - c;
; f7 = fi[k2+1] - g;
; g7 = gi[k2-1] - h;
; g3 = gi[k2 ] - d;
movlps xmm1,[esi ]
movhps xmm1,[edi ]
movaps xmm4,xmm1
subps xmm1,xmm0 ; xmm1 = {g1, g5, f5, f1}
movlps xmm3,[esi+eax*4]
movhps xmm3,[edi+eax*4]
movaps xmm5,xmm3
subps xmm3,xmm2 ; xmm3 = {g3, g7, f7, f3}
addps xmm0,xmm4 ; xmm0 = {g0, g4, f4, f0}
addps xmm2,xmm5 ; xmm2 = {g2, g6, f6, f2}
;10
; a = c1*f2 + s1*g3; <09><>*<2A><> + <20><>*<2A><>
; e = c3*f6 + s3*g7;
; g = s3*g6 + c3*f7;
; c = s1*g2 + c1*f3;
; d = c1*g2 - s1*f3; <09><>*<2A><> - <20><>*<2A><>
; h = c3*g6 - s3*f7;
; f = s3*f6 - c3*g7;
; b = s1*f2 - c1*g3;
movaps xmm5,xmm6 ; xmm6 = {s1, s3, c3, c1}
shufps xmm5,xmm5,0x1B ; = {c1, c3, s3, s1}
movaps xmm4,xmm2
mulps xmm4,xmm6
shufps xmm2,xmm2,0x1B ; xmm2 = {f2, f6, g6, g2}
mulps xmm2,xmm6
mulps xmm5,xmm3
mulps xmm3,xmm6
shufps xmm3,xmm3,0x1B
addps xmm4,xmm3 ; = {c, g, e, a}
subps xmm2,xmm5 ; = {b, f, h, d}
;10
; fi[0 ] = f0 + a;
; fi[0 +1] = f4 + e;
; gi[0 -1] = g4 + g;
; gi[0 ] = g0 + c;
; fi[k2 ] = f0 - a;
; fi[k2+1] = f4 - e;
; gi[k2-1] = g4 - g;
; gi[k2 ] = g0 - c;
; fi[k1 ] = f1 + d;
; fi[k1+1] = f5 + h;
; gi[k1-1] = g5 + f;
; gi[k1 ] = g1 + b;
; fi[k3 ] = f1 - d;
; fi[k3+1] = f5 - h;
; gi[k3-1] = g5 - f;
; gi[k3 ] = g1 - b;
movaps xmm3,xmm0
subps xmm0,xmm4
movlps [esi+eax*4],xmm0
movhps [edi+eax*4],xmm0
addps xmm4,xmm3
movlps [esi ],xmm4
movhps [edi ],xmm4
movaps xmm5,xmm1
subps xmm1,xmm2
movlps [esi+edx*2],xmm1
movhps [edi+edx*2],xmm1
addps xmm2,xmm5
movlps [esi+eax*2],xmm2
movhps [edi+eax*2],xmm2
; 14
; gi += k4;
; fi += k4;
lea edi,[edi + eax*8] ; gi += (k1 * 4);
lea esi,[esi + eax*8] ; fi += (k1 * 4);
cmp esi,[esp]
jl near .lp220 ; while (fi<fn);
; } while (fi<fn);
add ebx,byte 2*4 ; i+= 4
cmp ebx,eax ; i < k1
shufps xmm6,xmm6,R4(1,2,2,1) ; (--,s3,c3,--) => {c3, s3, s3, c3}
jl near .lp22
; }
.F22:
shl eax,2
add ecx, byte 8
cmp eax,[esp+_P+8] ; while ((k1 * 4)<n);
jle near .lp2
pop ebp
pop ebp
pop edi
pop esi
pop ebx
ret
end

View File

@@ -0,0 +1,78 @@
; for new GOGO-no-coda (1999/09)
; Copyright (C) 1999 shigeo
; special thanks to URURI, Keiichi SAKAI
; fft sin,cos,gray<61>ơ<EFBFBD><C6A1>֥<EFBFBD>
; 99/08/21
; 99/09/01(<28><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>)<29><><EFBFBD>ӥå<D3A5>ȿž<C8BF>ơ<EFBFBD><C6A1>֥<EFBFBD><D6A5><EFBFBD><EFBFBD><EFBFBD>
; 99/09/14 <20><><EFBFBD><EFBFBD><ECA5A4><EFBFBD><EFBFBD><EFBFBD>ɥ<EFBFBD><C9A5><EFBFBD><EFBFBD>ǥå<C7A5><C3A5><EFBFBD>ɽ<EFBFBD><C9BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Х
%include "nasm.h"
globaldef costab_fft
globaldef sintab_fft
globaldef gray_index
segment_data
align 16
costab_fft:
dd 0.000000000000
dd 0.707106781187
dd 0.923879532511
dd 0.980785280403
dd 0.995184726672
dd 0.998795456205
dd 0.999698818696
dd 0.999924701839
dd 0.999981175283
dd 0.999995293810
dd 0.999998823452
dd 0.999999705863
dd 0.999999926466
dd 0.999999981616
dd 0.999999995404
dd 0.999999998851
sintab_fft:
dd 1.000000000000
dd 0.707106781187
dd 0.382683432365
dd 0.195090322016
dd 0.098017140330
dd 0.049067674327
dd 0.024541228523
dd 0.012271538286
dd 0.006135884649
dd 0.003067956763
dd 0.001533980186
dd 0.000766990319
dd 0.000383495188
dd 0.000191747597
dd 0.000095873799
dd 0.000047936900
align 16
gray_index:
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 7
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 8
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 7
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 9
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 7
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 8
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 7
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6
db 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 10
segment_code
end

View File

@@ -0,0 +1,268 @@
; Copyright (C) 1999 URURI
; nasm<EFBFBD>ѥޥ<EFBFBD><EFBFBD><EFBFBD>
; 1999/08/21 <EFBFBD><EFBFBD><EFBFBD>
; 1999/10/10 <EFBFBD><EFBFBD><EFBFBD>Ĥ<EFBFBD><EFBFBD>ɲ<EFBFBD>
; 1999/10/27 aout<EFBFBD>б<EFBFBD>
; 1999/11/07 pushf, popf <EFBFBD><EFBFBD>NASM<EFBFBD>ΥХ<EFBFBD><EFBFBD>б<EFBFBD>
; 1999/12/02 for BCC ( Thanks to Miquel )
; for Windows Visual C++ -> define WIN32
; Borland or cygwin -> WIN32 and COFF
; for FreeBSD 2.x -> AOUT
; for TownsOS -> __tos__
; otherwise -> none
;̾<EFBFBD><EFBFBD><EFBFBD><EFBFBD>դ<EFBFBD><EFBFBD><EFBFBD>
BITS 32
section .note.GNU-stack noalloc noexec nowrite progbits
%ifdef YASM
%define segment_code segment .text align=16 use32
%define segment_data segment .data align=16 use32
%define segment_bss segment .bss align=16 use32
%elifdef WIN32
%define segment_code segment .text align=16 class=CODE use32
%define segment_data segment .data align=16 class=DATA use32
%ifdef __BORLANDC__
%define segment_bss segment .data align=16 class=DATA use32
%else
%define segment_bss segment .bss align=16 class=DATA use32
%endif
%elifdef AOUT
%define _NAMING
%define segment_code segment .text
%define segment_data segment .data
%define segment_bss segment .bss
%else
%ifidn __OUTPUT_FORMAT__,elf
section .note.GNU-stack progbits noalloc noexec nowrite align=1
%endif
%define segment_code segment .text align=16 class=CODE use32
%define segment_data segment .data align=16 class=DATA use32
%define segment_bss segment .bss align=16 class=DATA use32
%endif
%ifdef WIN32
%define _NAMING
%endif
%ifdef __tos__
group CGROUP text
group DGROUP data
%endif
;ñ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ư<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
%idefine float dword
%idefine fsize 4
%idefine fsizen(a) (fsize*(a))
;<EFBFBD><EFBFBD>ɷ<EFBFBD><EFBFBD>
%idefine wsize 2
%idefine wsizen(a) (wsize*(a))
%idefine dwsize 4
%idefine dwsizen(a) (dwsize*(a))
;REG
%define r0 eax
%define r1 ebx
%define r2 ecx
%define r3 edx
%define r4 esi
%define r5 edi
%define r6 ebp
%define r7 esp
;MMX,3DNow!,SSE
%define pmov movq
%define pmovd movd
%define pupldq punpckldq
%define puphdq punpckhdq
%define puplwd punpcklwd
%define puphwd punpckhwd
%define xm0 xmm0
%define xm1 xmm1
%define xm2 xmm2
%define xm3 xmm3
%define xm4 xmm4
%define xm5 xmm5
%define xm6 xmm6
%define xm7 xmm7
;<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>åե<EFBFBD><EFBFBD>Ѥ<EFBFBD>4<EFBFBD>ʥޥ<EFBFBD><EFBFBD><EFBFBD>
%define R4(a,b,c,d) (a*64+b*16+c*4+d)
;C<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʴʰץޥ<EFBFBD><EFBFBD><EFBFBD>
%imacro globaldef 1
%ifdef _NAMING
%define %1 _%1
%endif
global %1
%endmacro
%imacro externdef 1
%ifdef _NAMING
%define %1 _%1
%endif
extern %1
%endmacro
%imacro proc 1
%push proc
%ifdef _NAMING
global _%1
%else
global %1
%endif
align 32
%1:
_%1:
%assign %$STACK 0
%assign %$STACKN 0
%assign %$ARG 4
%endmacro
%imacro endproc 0
%ifnctx proc
%error expected 'proc' before 'endproc'.
%else
%if %$STACK > 0
add esp, %$STACK
%endif
%if %$STACK <> (-%$STACKN)
%error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd'
%endif
ret
%pop
%endif
%endmacro
%idefine sp(a) esp+%$STACK+a
%imacro arg 1
%00 equ %$ARG
%assign %$ARG %$ARG+%1
%endmacro
%imacro local 1
%assign %$STACKN %$STACKN-%1
%00 equ %$STACKN
%endmacro
%imacro alloc 0
sub esp, (-%$STACKN)-%$STACK
%assign %$STACK (-%$STACKN)
%endmacro
%imacro pushd 1-*
%rep %0
push %1
%assign %$STACK %$STACK+4
%rotate 1
%endrep
%endmacro
%imacro popd 1-*
%rep %0
%rotate -1
pop %1
%assign %$STACK %$STACK-4
%endrep
%endmacro
; bug of NASM-0.98
%define pushf db 0x66, 0x9C
%define popf db 0x66, 0x9D
%define ge16(n) ((((n) / 16)*0xFFFFFFFF) & 0xFFFFFFFF)
%define ge15(n) ((((n) / 15)*0xFFFFFFFF) & 0xFFFFFFFF)
%define ge14(n) ((((n) / 14)*0xFFFFFFFF) & 0xFFFFFFFF)
%define ge13(n) ((((n) / 13)*0xFFFFFFFF) & 0xFFFFFFFF)
%define ge12(n) ((((n) / 12)*0xFFFFFFFF) & 0xFFFFFFFF)
%define ge11(n) ((((n) / 11)*0xFFFFFFFF) & 0xFFFFFFFF)
%define ge10(n) ((((n) / 10)*0xFFFFFFFF) & 0xFFFFFFFF)
%define ge9(n) ((((n) / 9)*0xFFFFFFFF) & 0xFFFFFFFF)
%define ge8(n) (ge9(n) | ((((n) / 8)*0xFFFFFFFF) & 0xFFFFFFFF))
%define ge7(n) (ge9(n) | ((((n) / 7)*0xFFFFFFFF) & 0xFFFFFFFF))
%define ge6(n) (ge9(n) | ((((n) / 6)*0xFFFFFFFF) & 0xFFFFFFFF))
%define ge5(n) (ge9(n) | ((((n) / 5)*0xFFFFFFFF) & 0xFFFFFFFF))
%define ge4(n) (ge5(n) | ((((n) / 4)*0xFFFFFFFF) & 0xFFFFFFFF))
%define ge3(n) (ge5(n) | ((((n) / 3)*0xFFFFFFFF) & 0xFFFFFFFF))
%define ge2(n) (ge3(n) | ((((n) / 2)*0xFFFFFFFF) & 0xFFFFFFFF))
%define ge1(n) (ge2(n) | ((((n) / 1)*0xFFFFFFFF) & 0xFFFFFFFF))
; macro to align for begining of loop
; %1 does not align if it LE bytes to next alignment
; 4..16
; default is 12
%imacro loopalignK6 0-1 12
%%here:
times (($$-%%here) & 15 & ge1(($$-%%here) & 15) & ~ge4(($$-%%here) & 15)) nop
times (1 & ge4(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) jmp short %%skip
times (((($$-%%here) & 15)-2) & ge4(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) nop
%%skip:
%endmacro
%imacro loopalignK7 0-1 12
%%here:
times (1 & ge1(($$-%%here) & 15) & ~ge2(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) nop
times (1 & ge2(($$-%%here) & 15) & ~ge3(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 08Bh,0C0h
times (1 & ge3(($$-%%here) & 15) & ~ge4(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 08Dh,004h,020h
times (1 & ge4(($$-%%here) & 15) & ~ge5(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 08Dh,044h,020h,000h
times (1 & ge5(($$-%%here) & 15) & ~ge6(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 08Dh,044h,020h,000h,090h
times (1 & ge6(($$-%%here) & 15) & ~ge7(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 08Dh,080h,0,0,0,0
times (1 & ge7(($$-%%here) & 15) & ~ge8(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 08Dh,004h,005h,0,0,0,0
times (1 & ge8(($$-%%here) & 15) & ~ge9(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 08Dh,004h,005h,0,0,0,0,90h
times (1 & ge9(($$-%%here) & 15) & ~ge10(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 0EBh,007h,90h,90h,90h,90h,90h,90h,90h
times (1 & ge10(($$-%%here) & 15) & ~ge11(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 0EBh,008h,90h,90h,90h,90h,90h,90h,90h,90h
times (1 & ge11(($$-%%here) & 15) & ~ge12(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 0EBh,009h,90h,90h,90h,90h,90h,90h,90h,90h,90h
times (1 & ge12(($$-%%here) & 15) & ~ge13(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 0EBh,00Ah,90h,90h,90h,90h,90h,90h,90h,90h,90h,90h
times (1 & ge13(($$-%%here) & 15) & ~ge14(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 0EBh,00Bh,90h,90h,90h,90h,90h,90h,90h,90h,90h,90h,90h
times (1 & ge14(($$-%%here) & 15) & ~ge15(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 0EBh,00Ch,90h,90h,90h,90h,90h,90h,90h,90h,90h,90h,90h,90h
times (1 & ge15(($$-%%here) & 15) & ~ge16(($$-%%here) & 15) & ~ge%1(($$-%%here) & 15)) DB 0EBh,00Dh,90h,90h,90h,90h,90h,90h,90h,90h,90h,90h,90h,90h,90h
%%skip:
%endmacro
%imacro loopalign 0-1 12
loopalignK7 %1
%endmacro
%define PACK(x,y,z,w) (x*64+y*16+z*4+w)
%ifidn __OUTPUT_FORMAT__,elf
%idefine PIC_BASE(A) _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc
%idefine PIC_EBP_REL(A) ebp + A wrt ..gotoff
%macro PIC_OFFSETTABLE 0
extern _GLOBAL_OFFSET_TABLE_
get_pc.bp:
mov ebp, [esp]
retn
%endmacro
%else
%define PIC_BASE(A) (0)
%define PIC_EBP_REL(A) (A)
%macro PIC_OFFSETTABLE 0
get_pc.bp:
mov ebp, [esp]
retn
%endmacro
%endif

File diff suppressed because it is too large Load Diff