mirror of
				https://github.com/cookiengineer/audacity
				synced 2025-11-03 15:43:50 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			193 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			193 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
% -*- mode: latex; TeX-master: "Vorbis_I_spec"; -*-
 | 
						|
%!TEX root = Vorbis_I_spec.tex
 | 
						|
% $Id$
 | 
						|
\section{Floor type 0 setup and decode} \label{vorbis:spec:floor0}
 | 
						|
 | 
						|
\subsection{Overview}
 | 
						|
 | 
						|
Vorbis floor type zero uses Line Spectral Pair (LSP, also alternately
 | 
						|
known as Line Spectral Frequency or LSF) representation to encode a
 | 
						|
smooth spectral envelope curve as the frequency response of the LSP
 | 
						|
filter.  This representation is equivalent to a traditional all-pole
 | 
						|
infinite impulse response filter as would be used in linear predictive
 | 
						|
coding; LSP representation may be converted to LPC representation and
 | 
						|
vice-versa.
 | 
						|
 | 
						|
 | 
						|
 | 
						|
\subsection{Floor 0 format}
 | 
						|
 | 
						|
Floor zero configuration consists of six integer fields and a list of
 | 
						|
VQ codebooks for use in coding/decoding the LSP filter coefficient
 | 
						|
values used by each frame.
 | 
						|
 | 
						|
\subsubsection{header decode}
 | 
						|
 | 
						|
Configuration information for instances of floor zero decodes from the
 | 
						|
codec setup header (third packet).  configuration decode proceeds as
 | 
						|
follows:
 | 
						|
 | 
						|
\begin{Verbatim}[commandchars=\\\{\}]
 | 
						|
  1) [floor0\_order] = read an unsigned integer of 8 bits
 | 
						|
  2) [floor0\_rate] = read an unsigned integer of 16 bits
 | 
						|
  3) [floor0\_bark\_map\_size] = read an unsigned integer of 16 bits
 | 
						|
  4) [floor0\_amplitude\_bits] = read an unsigned integer of six bits
 | 
						|
  5) [floor0\_amplitude\_offset] = read an unsigned integer of eight bits
 | 
						|
  6) [floor0\_number\_of\_books] = read an unsigned integer of four bits and add 1
 | 
						|
  7) array [floor0\_book\_list] = read a list of [floor0\_number\_of\_books] unsigned integers of eight bits each;
 | 
						|
\end{Verbatim}
 | 
						|
 | 
						|
An end-of-packet condition during any of these bitstream reads renders
 | 
						|
this stream undecodable.  In addition, any element of the array
 | 
						|
\varname{[floor0\_book\_list]} that is greater than the maximum codebook
 | 
						|
number for this bitstream is an error condition that also renders the
 | 
						|
stream undecodable.
 | 
						|
 | 
						|
 | 
						|
 | 
						|
\subsubsection{packet decode} \label{vorbis:spec:floor0-decode}
 | 
						|
 | 
						|
Extracting a floor0 curve from an audio packet consists of first
 | 
						|
decoding the curve amplitude and \varname{[floor0\_order]} LSP
 | 
						|
coefficient values from the bitstream, and then computing the floor
 | 
						|
curve, which is defined as the frequency response of the decoded LSP
 | 
						|
filter.
 | 
						|
 | 
						|
Packet decode proceeds as follows:
 | 
						|
\begin{Verbatim}[commandchars=\\\{\}]
 | 
						|
  1) [amplitude] = read an unsigned integer of [floor0\_amplitude\_bits] bits
 | 
						|
  2) if ( [amplitude] is greater than zero ) \{
 | 
						|
       3) [coefficients] is an empty, zero length vector
 | 
						|
       4) [booknumber] = read an unsigned integer of \link{vorbis:spec:ilog}{ilog}( [floor0\_number\_of\_books] ) bits
 | 
						|
       5) if ( [booknumber] is greater than the highest number decode codebook ) then packet is undecodable
 | 
						|
       6) [last] = zero;
 | 
						|
       7) vector [temp\_vector] = read vector from bitstream using codebook number [floor0\_book\_list] element [booknumber] in VQ context.
 | 
						|
       8) add the scalar value [last] to each scalar in vector [temp\_vector]
 | 
						|
       9) [last] = the value of the last scalar in vector [temp\_vector]
 | 
						|
      10) concatenate [temp\_vector] onto the end of the [coefficients] vector
 | 
						|
      11) if (length of vector [coefficients] is less than [floor0\_order], continue at step 6
 | 
						|
 | 
						|
     \}
 | 
						|
 | 
						|
 12) done.
 | 
						|
 | 
						|
\end{Verbatim}
 | 
						|
 | 
						|
Take note of the following properties of decode:
 | 
						|
\begin{itemize}
 | 
						|
 \item An \varname{[amplitude]} value of zero must result in a return code that indicates this channel is unused in this frame (the output of the channel will be all-zeroes in synthesis).  Several later stages of decode don't occur for an unused channel.
 | 
						|
 \item An end-of-packet condition during decode should be considered a
 | 
						|
nominal occruence; if end-of-packet is reached during any read
 | 
						|
operation above, floor decode is to return 'unused' status as if the
 | 
						|
\varname{[amplitude]} value had read zero at the beginning of decode.
 | 
						|
 | 
						|
 \item The book number used for decode
 | 
						|
can, in fact, be stored in the bitstream in \link{vorbis:spec:ilog}{ilog}( \varname{[floor0\_number\_of\_books]} -
 | 
						|
1 ) bits.  Nevertheless, the above specification is correct and values
 | 
						|
greater than the maximum possible book value are reserved.
 | 
						|
 | 
						|
 \item The number of scalars read into the vector \varname{[coefficients]}
 | 
						|
may be greater than \varname{[floor0\_order]}, the number actually
 | 
						|
required for curve computation.  For example, if the VQ codebook used
 | 
						|
for the floor currently being decoded has a
 | 
						|
\varname{[codebook\_dimensions]} value of three and
 | 
						|
\varname{[floor0\_order]} is ten, the only way to fill all the needed
 | 
						|
scalars in \varname{[coefficients]} is to to read a total of twelve
 | 
						|
scalars as four vectors of three scalars each.  This is not an error
 | 
						|
condition, and care must be taken not to allow a buffer overflow in
 | 
						|
decode. The extra values are not used and may be ignored or discarded.
 | 
						|
\end{itemize}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
\subsubsection{curve computation} \label{vorbis:spec:floor0-synth}
 | 
						|
 | 
						|
Given an \varname{[amplitude]} integer and \varname{[coefficients]}
 | 
						|
vector from packet decode as well as the [floor0\_order],
 | 
						|
[floor0\_rate], [floor0\_bark\_map\_size], [floor0\_amplitude\_bits] and
 | 
						|
[floor0\_amplitude\_offset] values from floor setup, and an output
 | 
						|
vector size \varname{[n]} specified by the decode process, we compute a
 | 
						|
floor output vector.
 | 
						|
 | 
						|
If the value \varname{[amplitude]} is zero, the return value is a
 | 
						|
length \varname{[n]} vector with all-zero scalars.  Otherwise, begin by
 | 
						|
assuming the following definitions for the given vector to be
 | 
						|
synthesized:
 | 
						|
 | 
						|
   \begin{displaymath}
 | 
						|
     \mathrm{map}_i = \left\{
 | 
						|
       \begin{array}{ll}
 | 
						|
          \min (
 | 
						|
            \mathtt{floor0\texttt{\_}bark\texttt{\_}map\texttt{\_}size} - 1,
 | 
						|
            foobar
 | 
						|
          ) & \textrm{for } i \in [0,n-1] \\
 | 
						|
          -1 & \textrm{for } i = n
 | 
						|
        \end{array}
 | 
						|
      \right.
 | 
						|
    \end{displaymath}
 | 
						|
 | 
						|
    where
 | 
						|
 | 
						|
    \begin{displaymath}
 | 
						|
    foobar =
 | 
						|
      \left\lfloor
 | 
						|
        \mathrm{bark}\left(\frac{\mathtt{floor0\texttt{\_}rate} \cdot i}{2n}\right) \cdot \frac{\mathtt{floor0\texttt{\_}bark\texttt{\_}map\texttt{\_}size}} {\mathrm{bark}(.5 \cdot \mathtt{floor0\texttt{\_}rate})}
 | 
						|
      \right\rfloor
 | 
						|
    \end{displaymath}
 | 
						|
 | 
						|
    and
 | 
						|
 | 
						|
    \begin{displaymath}
 | 
						|
      \mathrm{bark}(x) = 13.1 \arctan (.00074x) + 2.24 \arctan (.0000000185x^2 + .0001x)
 | 
						|
    \end{displaymath}
 | 
						|
 | 
						|
The above is used to synthesize the LSP curve on a Bark-scale frequency
 | 
						|
axis, then map the result to a linear-scale frequency axis.
 | 
						|
Similarly, the below calculation synthesizes the output LSP curve \varname{[output]} on a log
 | 
						|
(dB) amplitude scale, mapping it to linear amplitude in the last step:
 | 
						|
 | 
						|
\begin{enumerate}
 | 
						|
 \item  \varname{[i]} = 0
 | 
						|
 \item  \varname{[$\omega$]} = $\pi$ * map element \varname{[i]} / \varname{[floor0\_bark\_map\_size]}
 | 
						|
 \item if ( \varname{[floor0\_order]} is odd ) {
 | 
						|
  \begin{enumerate}
 | 
						|
   \item calculate \varname{[p]} and \varname{[q]} according to:
 | 
						|
           \begin{eqnarray*}
 | 
						|
             p & = & (1 - \cos^2\omega)\prod_{j=0}^{\frac{\mathtt{floor0\texttt{\_}order}-3}{2}} 4 (\cos([\mathtt{coefficients}]_{2j+1}) - \cos \omega)^2 \\
 | 
						|
             q & = & \frac{1}{4} \prod_{j=0}^{\frac{\mathtt{floor0\texttt{\_}order}-1}{2}} 4 (\cos([\mathtt{coefficients}]_{2j}) - \cos \omega)^2
 | 
						|
           \end{eqnarray*}
 | 
						|
 | 
						|
  \end{enumerate}
 | 
						|
  } else \varname{[floor0\_order]} is even {
 | 
						|
  \begin{enumerate}[resume]
 | 
						|
   \item calculate \varname{[p]} and \varname{[q]} according to:
 | 
						|
           \begin{eqnarray*}
 | 
						|
             p & = & \frac{(1 - \cos\omega)}{2} \prod_{j=0}^{\frac{\mathtt{floor0\texttt{\_}order}-2}{2}} 4 (\cos([\mathtt{coefficients}]_{2j+1}) - \cos \omega)^2 \\
 | 
						|
             q & = & \frac{(1 + \cos\omega)}{2} \prod_{j=0}^{\frac{\mathtt{floor0\texttt{\_}order}-2}{2}} 4 (\cos([\mathtt{coefficients}]_{2j}) - \cos \omega)^2
 | 
						|
           \end{eqnarray*}
 | 
						|
 | 
						|
  \end{enumerate}
 | 
						|
  }
 | 
						|
 | 
						|
 \item calculate \varname{[linear\_floor\_value]} according to:
 | 
						|
         \begin{displaymath}
 | 
						|
           \exp \left( .11512925 \left(\frac{\mathtt{amplitude} \cdot \mathtt{floor0\texttt{\_}amplitute\texttt{\_}offset}}{(2^{\mathtt{floor0\texttt{\_}amplitude\texttt{\_}bits}}-1)\sqrt{p+q}}
 | 
						|
                  - \mathtt{floor0\texttt{\_}amplitude\texttt{\_}offset} \right) \right)
 | 
						|
         \end{displaymath}
 | 
						|
 | 
						|
 \item \varname{[iteration\_condition]} = map element \varname{[i]}
 | 
						|
 \item \varname{[output]} element \varname{[i]} = \varname{[linear\_floor\_value]}
 | 
						|
 \item increment \varname{[i]}
 | 
						|
 \item if ( map element \varname{[i]} is equal to \varname{[iteration\_condition]} ) continue at step 5
 | 
						|
 \item if ( \varname{[i]} is less than \varname{[n]} ) continue at step 2
 | 
						|
 \item done
 | 
						|
\end{enumerate}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 |