From 913c41e6e87372a22084647df85c56c20ac4d197 Mon Sep 17 00:00:00 2001
From: Leland Lucius <github@homerow.net>
Date: Sat, 1 Aug 2020 15:52:31 -0500
Subject: [PATCH] AUP3: Minor code cleanup and addition of comments

Probably over commented for some, but I want a good refresher
a year from now when I've completely forgotten how it works.
---
 src/ProjectFileIO.cpp | 168 +++++++++++++++++++++++++++++++-----------
 src/ProjectFileIO.h   |  11 ++-
 2 files changed, 133 insertions(+), 46 deletions(-)

diff --git a/src/ProjectFileIO.cpp b/src/ProjectFileIO.cpp
index df8a5b21c..307b1c4c3 100644
--- a/src/ProjectFileIO.cpp
+++ b/src/ProjectFileIO.cpp
@@ -2056,11 +2056,8 @@ void ProjectFileIO::Reset()
    SetFileName({});
 }
 
-wxLongLong ProjectFileIO::GetFreeDiskSpace()
+wxLongLong ProjectFileIO::GetFreeDiskSpace() const
 {
-   // make sure it's open and the path is defined
-   auto db = DB();
-
    wxLongLong freeSpace;
    if (wxGetDiskSpace(wxPathOnly(mFileName), NULL, &freeSpace))
    {
@@ -2156,11 +2153,18 @@ int64_t ProjectFileIO::GetTotalUsage()
    return GetDiskUsage(CurrConn().get(), 0);
 }
 
+//
+// Returns the amount of disk space used by the specified sample blockid or all
+// of the sample blocks if the blockid is 0.  It does this by using the raw SQLite
+// pages available from the "sqlite_dbpage" virtual table to traverse the SQLite
+// table b-tree described here:  https://www.sqlite.org/fileformat.html
+//
 int64_t ProjectFileIO::GetDiskUsage(DBConnection *conn, SampleBlockID blockid /* = 0 */)
 {
+   // Information we need to track our travels through the b-tree
    typedef struct
    {
-      SampleBlockID pgno;
+      int64_t pgno;
       int currentCell;
       int numCells;
       unsigned char data[65536];
@@ -2169,133 +2173,203 @@ int64_t ProjectFileIO::GetDiskUsage(DBConnection *conn, SampleBlockID blockid /*
 
    int64_t total = 0;
    int64_t found = 0;
-   int64_t next = 0;
+   int64_t right = 0;
    int rc;
 
    // Get the rootpage for the sampleblocks table.
-   sqlite3_stmt *stmt = conn->Prepare(DBConnection::GetRootPage,
-      "SELECT rootpage FROM sqlite_master WHERE tbl_name = 'sampleblocks';");
-   sqlite3_step(stmt);
-   int64_t rootpage = sqlite3_column_int64(stmt, 0);
+   sqlite3_stmt *stmt =
+      conn->Prepare(DBConnection::GetRootPage,
+                    "SELECT rootpage FROM sqlite_master WHERE tbl_name = 'sampleblocks';");
+   if (stmt == nullptr || sqlite3_step(stmt) != SQLITE_ROW)
+   {
+      return 0;
+   }
+
+   // And store it in our first stack frame
+   stack.push_back({sqlite3_column_int64(stmt, 0)});
+
+   // All done with the statement
    sqlite3_clear_bindings(stmt);
    sqlite3_reset(stmt);
 
    // Prepare/retrieve statement to read raw database page
    stmt = conn->Prepare(DBConnection::GetDBPage,
       "SELECT data FROM sqlite_dbpage WHERE pgno = ?1;");
+   if (stmt == nullptr)
+   {
+      return 0;
+   }
 
-   stack.push_back({rootpage, 0, 0});
+   // Traverse the b-tree until we've visited all of the leaf pages or until
+   // we find the one corresponding to the passed in sample blockid. Because we
+   // use an integer primary key for the sampleblocks table, the traversal will
+   // be in ascending blockid sequence.
    do
    {
-      int nd = (stack.size() - 1) * 2;
+      // Acces the top stack frame
       page &pg = stack.back();
 
+      // Read the page from the sqlite_dbpage table if it hasn't yet been loaded
       if (pg.numCells == 0)
       {
+         // Bind the page number
          sqlite3_bind_int64(stmt, 1, pg.pgno);
 
-         rc = sqlite3_step(stmt);
-         if (rc != SQLITE_ROW)
+         // And retrieve the page
+         if (sqlite3_step(stmt) != SQLITE_ROW)
          {
-            return found;
+            return 0;
          }
 
+         // Copy the page content to the stack frame
          memcpy(&pg.data,
-                  sqlite3_column_blob(stmt, 0),
-                  sqlite3_column_bytes(stmt, 0));
+                sqlite3_column_blob(stmt, 0),
+                sqlite3_column_bytes(stmt, 0));
 
-         pg.currentCell = 0;
+         // And retrieve the total number of cells within it
          pg.numCells = get2(&pg.data[3]);
 
+         // Reset statement for next usage
          sqlite3_clear_bindings(stmt);
          sqlite3_reset(stmt);
       }
 
-      //wxLogDebug("%*.*spgno %lld currentCell %d numCells %d", nd, nd, "", pg.pgno, pg.currentCell, pg.numCells);
+      //wxLogDebug("%*.*spgno %lld currentCell %d numCells %d", (stack.size() - 1) * 2, (stack.size() - 1) * 2, "", pg.pgno, pg.currentCell, pg.numCells);
+
+      // Process an interior table b-tree page
       if (pg.data[0] == 0x05)
       {
+         // Process the next cell if we haven't examined all of them yet
          if (pg.currentCell < pg.numCells)
          {
-            next = get4(&pg.data[8]);
+            // Remember the right-most leaf page number.
+            right = get4(&pg.data[8]);
 
-            bool cont = false;
+            // Iterate over the cells.
+            //
+            // If we're not looking for a specific blockid, then we always push the
+            // target page onto the stack and leave the loop after a single iteration.
+            //
+            // Otherwise, we match the blockid against the highest integer key contained
+            // within the cell and if the blockid falls within the cell, we stack the
+            // page and stop the iteration.
+            //
+            // In theory, we could do a binary search for a specific blockid here, but
+            // because our sample blocks are always large, we will get very few cells
+            // per page...usually 6 or less.
+            //
+            // In both cases, the stacked page can be either an internal or leaf page.
+            bool stacked = false;
             while (pg.currentCell < pg.numCells)
             {
-               int celloff = get2(&pg.data[12 + (pg.currentCell++ * 2)]);
+               // Get the offset to this cell using the offset in the cell pointer
+               // array.
+               //
+               // The cell pointer array starts immediately after the page header
+               // at offset 12 and the retrieved offset is from the beginning of
+               // the page.
+               int celloff = get2(&pg.data[12 + (pg.currentCell * 2)]);
 
+               // Bump to the next cell for the next iteration.
+               pg.currentCell++;
+
+               // Get the page number this cell describes
                int pagenum = get4(&pg.data[celloff]);
 
+               // And the highest integer key, which starts at offset 4 within the cell.
                int64_t intkey = 0;
                get_varint(&pg.data[celloff + 4], &intkey);
 
-               //wxLogDebug("%*.*sinternal - next %lld celloff %d pagenum %d intkey %lld", nd, nd, " ", next, celloff, pagenum, intkey);
+               //wxLogDebug("%*.*sinternal - right %lld celloff %d pagenum %d intkey %lld", (stack.size() - 1) * 2, (stack.size() - 1) * 2, " ", right, celloff, pagenum, intkey);
+
+               // Stack the described page if we're not looking for a specific blockid
+               // or if this page contains the given blockid.
                if (!blockid || blockid <= intkey)
                {
                   stack.push_back({pagenum, 0, 0});
-                  cont = true;
+                  stacked = true;
                   break;
                }
             }
 
-            if (cont)
+            // If we pushed a new page onto the stack, we need to jump back up
+            // to read the page
+            if (stacked)
             {
                continue;
             }
          }
 
-         if (next)
+         // We've exhausted all the cells with this page, so we stack the right-most
+         // leaf page.  Ensure we only process it once.
+         if (right)
          {
-            stack.push_back({next, 0, 0});
-            next = 0;
+            stack.push_back({right, 0, 0});
+            right = 0;
             continue;
          }
-
       }
+      // Process a leaf table b-tree page
       else if (pg.data[0] == 0x0d)
       {
+         // Iterate over the cells
+         //
+         // If we're not looking for a specific blockid, then just accumulate the
+         // payload sizes. We will be reading every leaf page in the sampleblocks
+         // table.
+         //
+         // Otherwise we break out when we find the matching blockid. In this case,
+         // we only ever look at 1 leaf page.
          bool stop = false;
          for (int i = 0; i < pg.numCells; i++)
          {
+            // Get the offset to this cell using the offset in the cell pointer
+            // array.
+            //
+            // The cell pointer array starts immediately after the page header
+            // at offset 8 and the retrieved offset is from the beginning of
+            // the page.
             int celloff = get2(&pg.data[8 + (i * 2)]);
 
+            // Get the total payload size in bytes of the described row.
             int64_t payload = 0;
             int digits = get_varint(&pg.data[celloff], &payload);
 
+            // Get the integer key for this row.
             int64_t intkey = 0;
             get_varint(&pg.data[celloff + digits], &intkey);
-            //wxLogDebug("%*.*sleaf - celloff %4d intkey %lld payload %lld", nd, nd, " ", celloff, intkey, payload);
 
-            if (blockid)
-            {
-               if (blockid == intkey)
-               {
-                  found = payload;
-                  break;
-               }
-            }
-            else
+            //wxLogDebug("%*.*sleaf - celloff %4d intkey %lld payload %lld", (stack.size() - 1) * 2, (stack.size() - 1) * 2, " ", celloff, intkey, payload);
+
+            // Add this payload size to the total if we're not looking for a specific
+            // blockid
+            if (!blockid)
             {
                total += payload;
             }
-         }
-
-         if (found)
-         {
-            break;
+            // Otherwise, return the payload size for a matching row
+            else if (blockid == intkey)
+            {
+               return payload;
+            }
          }
       }
 
+      // Done with the current branch, so pop back up to the previous one (if any)
       stack.pop_back();
    } while (!stack.empty());
 
-   return blockid ? found : total;
+   // Return the total used for all sample blocks
+   return total;
 }
 
+// Retrieves a 2-byte big-endian integer from the page data
 unsigned int ProjectFileIO::get2(const unsigned char *ptr)
 {
    return (ptr[0] << 8) | ptr[1];
 }
 
+// Retrieves a 4-byte big-endian integer from the page data
 unsigned int ProjectFileIO::get4(const unsigned char *ptr)
 {
    return ((unsigned int) ptr[0] << 24) |
@@ -2304,10 +2378,14 @@ unsigned int ProjectFileIO::get4(const unsigned char *ptr)
           ((unsigned int) ptr[3]);
 }
 
+// Retrieves a variable length integer from the page data. Returns the
+// number of digits used to encode the integer and the stores the
+// value at the given location.
 int ProjectFileIO::get_varint(const unsigned char *ptr, int64_t *out)
 {
    int64_t val = 0;
    int i;
+
    for (i = 0; i < 8; ++i)
    {
       val = (val << 7) + (ptr[i] & 0x7f);
diff --git a/src/ProjectFileIO.h b/src/ProjectFileIO.h
index f474e6448..09fda7dac 100644
--- a/src/ProjectFileIO.h
+++ b/src/ProjectFileIO.h
@@ -89,11 +89,20 @@ public:
    bool SaveProject(const FilePath &fileName, const std::shared_ptr<TrackList> &lastSaved);
    bool SaveCopy(const FilePath& fileName);
 
-   wxLongLong GetFreeDiskSpace();
+   wxLongLong GetFreeDiskSpace() const;
 
+   // Returns the bytes used for the given sample block
    int64_t GetBlockUsage(SampleBlockID blockid);
+
+   // Returns the bytes used for all blocks owned by the given track list
    int64_t GetCurrentUsage(const std::shared_ptr<TrackList> &tracks);
+
+   // Return the bytes used by all sample blocks in the project file, whether
+   // they are attached to the active tracks or held by the Undo manager.
    int64_t GetTotalUsage();
+
+   // Return the bytes used for the given block using the connection to a
+   // specific database. This is the workhorse for the above 3 methods.
    static int64_t GetDiskUsage(DBConnection *conn, SampleBlockID blockid);
 
    const TranslatableString &GetLastError() const;