From 15e62510104bc0e2b9180b66e5845d985cac03cc Mon Sep 17 00:00:00 2001
From: Benji Dial <benji@benjidial.net>
Date: Wed, 10 Jan 2024 00:17:29 -0500
Subject: [PATCH] partial (largely untested) memory block device and tar file
 system support

---
 documentation/storage.txt            |   7 +
 include/mercury/kernel/bd/memory.hpp |  23 +++
 include/mercury/kernel/fs/tarfs.hpp  |  40 ++++
 include/mercury/kernel/paging.hpp    |   3 +
 include/mercury/kernel/storage.hpp   | 130 ++++++++++++
 include/mercury/kernel/terminal.hpp  |   3 +-
 include/mercury/kernel/utility.hpp   | 287 +++++++++++++++++++--------
 kernel/allocator.cpp                 | 160 +++++++++++++++
 kernel/bd/memory.cpp                 |  27 +++
 kernel/entry.cpp                     |  52 +++--
 kernel/fs/tarfs.cpp                  | 224 +++++++++++++++++++++
 kernel/paging.cpp                    |   7 +
 kernel/storage.cpp                   | 246 +++++++++++++++++++++++
 kernel/terminal.cpp                  |   7 +-
 makefile                             |   7 +-
 15 files changed, 1125 insertions(+), 98 deletions(-)
 create mode 100644 documentation/storage.txt
 create mode 100644 include/mercury/kernel/bd/memory.hpp
 create mode 100644 include/mercury/kernel/fs/tarfs.hpp
 create mode 100644 include/mercury/kernel/storage.hpp
 create mode 100644 kernel/allocator.cpp
 create mode 100644 kernel/bd/memory.cpp
 create mode 100644 kernel/fs/tarfs.cpp
 create mode 100644 kernel/storage.cpp

diff --git a/documentation/storage.txt b/documentation/storage.txt
new file mode 100644
index 0000000..2d92dc7
--- /dev/null
+++ b/documentation/storage.txt
@@ -0,0 +1,7 @@
+a block_device is a block device such as a disk or a partition that has been
+seen since the last boot. when a driver detects a device, it should check if
+a block_device with that id has already been created, and if so, reuse it.
+
+eventually, i would like to implement kernel-space exceptions and use those
+instead of having functions return an io_result, since it's a bit unwieldy
+to propogate those results as is.
diff --git a/include/mercury/kernel/bd/memory.hpp b/include/mercury/kernel/bd/memory.hpp
new file mode 100644
index 0000000..079abee
--- /dev/null
+++ b/include/mercury/kernel/bd/memory.hpp
@@ -0,0 +1,23 @@
+#ifndef MERCURY_KERNEL_BD_MEMORY_HPP
+#define MERCURY_KERNEL_BD_MEMORY_HPP
+
+#include <mercury/kernel/storage.hpp>
+
+namespace mercury::kernel::bd {
+
+  class memory : public storage::block_device {
+
+  private:
+    uint8_t *buffer;
+
+  public:
+    memory(void *buffer, size_t buffer_len);
+
+    storage::io_result read_blocks_no_cache(uint64_t start, uint64_t count, void *into) override;
+    storage::io_result write_blocks_no_cache(uint64_t start, uint64_t count, const void *from) override;
+
+  };
+
+}
+
+#endif
diff --git a/include/mercury/kernel/fs/tarfs.hpp b/include/mercury/kernel/fs/tarfs.hpp
new file mode 100644
index 0000000..41773ee
--- /dev/null
+++ b/include/mercury/kernel/fs/tarfs.hpp
@@ -0,0 +1,40 @@
+#ifndef MERCURY_KERNEL_FS_TARFS_HPP
+#define MERCURY_KERNEL_FS_TARFS_HPP
+
+#include <mercury/kernel/storage.hpp>
+
+namespace mercury::kernel::fs {
+
+  class tarfs_instance : public storage::file_system_instance {
+
+  private:
+    storage::block_device *bd;
+
+    storage::io_result next_node(storage::node_id_t &node);
+    //name_buf must be at least 255 chars long.
+    storage::io_result read_name(storage::node_id_t node, char *name_buf, size_t &name_len_out);
+    //len <= 12
+    storage::io_result read_num(uint64_t offset, size_t len, uint64_t &out);
+
+  public:
+    tarfs_instance(storage::block_device *bd);
+
+    storage::io_result get_root_node(storage::node_id_t &out) override;
+    storage::io_result get_first_child(
+      storage::node_id_t node, storage::node_id_t &out, storage::directory_iter_t &iter_out) override;
+    storage::io_result get_next_child(
+      storage::node_id_t node, storage::node_id_t &out, storage::directory_iter_t &iter) override;
+    storage::io_result get_child(
+      storage::node_id_t node, storage::node_id_t &out, const char *name, size_t name_len) override;
+    storage::io_result get_name_length(storage::node_id_t node, size_t &length_out) override;
+    storage::io_result get_name(storage::node_id_t node, char *buffer, size_t &length_out) override;
+    storage::io_result get_file_length(storage::node_id_t node, uint64_t &length_out) override;
+    storage::io_result get_file_type(storage::node_id_t node, storage::file_type &out) override;
+
+  };
+
+  storage::io_result tarfs_mounter(storage::block_device *bd, storage::file_system_instance *&fs_out);
+
+}
+
+#endif
diff --git a/include/mercury/kernel/paging.hpp b/include/mercury/kernel/paging.hpp
index 9627162..4ba5fdf 100644
--- a/include/mercury/kernel/paging.hpp
+++ b/include/mercury/kernel/paging.hpp
@@ -26,6 +26,9 @@ namespace mercury::kernel::paging {
   void map_kernel_page(
     uint64_t paddr, uint64_t vaddr, bool write, bool execute);
 
+  //maps writable and not executable
+  void *map_new_kernel_pages(uint64_t count);
+
   uint64_t get_used_vram_page_count();
   uint64_t get_free_pram_page_count();
 
diff --git a/include/mercury/kernel/storage.hpp b/include/mercury/kernel/storage.hpp
new file mode 100644
index 0000000..1374766
--- /dev/null
+++ b/include/mercury/kernel/storage.hpp
@@ -0,0 +1,130 @@
+#ifndef MERCURY_KERNEL_STORAGE_HPP
+#define MERCURY_KERNEL_STORAGE_HPP
+
+#include <mercury/kernel/utility.hpp>
+#include <cstdint>
+
+namespace mercury::kernel::storage {
+
+  typedef uint64_t node_id_t;
+  typedef uint64_t directory_iter_t;
+
+  enum file_type {
+    regular_file,
+    directory,
+    symlink
+  };
+
+  enum io_result {
+    success,
+    mount_point_already_used,
+    not_a_directory,
+    not_supported,
+    out_of_bounds,
+    device_error,
+    fs_corrupt,
+    not_found,
+    bad_path,
+  };
+
+  class file_system_instance {
+
+  public:
+    virtual ~file_system_instance() {}
+
+    virtual io_result get_root_node(node_id_t &out) = 0;
+
+    //get_first_child sets iter_out to a "directory iterator" representing the
+    //first child of this node. get_next_child gets the child after the child
+    //represented by the value of iter passed in, and changes iter to represent
+    //that next child. for an empty directory node, get_first_child returns
+    //io_result::out_of_bounds. when iter represents the last child of a node,
+    //get_next_child also returns io_result::out_of_bounds.
+    virtual io_result get_first_child(node_id_t node, node_id_t &out, directory_iter_t &iter_out) = 0;
+    virtual io_result get_next_child(node_id_t node, node_id_t &out, directory_iter_t &iter) = 0;
+    virtual io_result get_child(node_id_t node, node_id_t &out, const char *name, size_t name_len) = 0;
+
+    virtual io_result get_name_length(node_id_t node, size_t &length_out) = 0;
+    //buffer is assumed to be long enough - call get_name_length first.
+    virtual io_result get_name(node_id_t node, char *buffer, size_t &length_out) = 0;
+
+    virtual io_result get_file_length(node_id_t node, uint64_t &length_out) = 0;
+
+    virtual io_result get_file_type(node_id_t node, file_type &out) = 0;
+  };
+
+  class block_device {
+
+  private:
+    uint8_t *block_cache;
+    uint64_t block_cache_i;
+    bool block_cache_dirty;
+
+    io_result load_cache_block(uint64_t i);
+
+  protected:
+    //implemented in driver, bounds not checked
+    virtual io_result read_blocks_no_cache(uint64_t start, uint64_t count, void *into) = 0;
+    virtual io_result write_blocks_no_cache(uint64_t start, uint64_t count, const void *from) = 0;
+
+    //it is assumed that this is only called once, by the driver, after
+    //block_size and block_count have been set, and before read_bytes or
+    //write_bytes can be called (e.g. in the derived class's constructor)
+    void allocate_block_cache();
+
+  public:
+    //set by storage component
+    file_system_instance *mounted_as;
+    utility::uuid id;
+
+    //set by driver
+    uint64_t block_size;
+    uint64_t block_count;
+
+    virtual ~block_device() {
+      if (block_cache)
+        delete block_cache;
+    }
+
+    io_result read_bytes(uint64_t start, uint64_t count, void *into);
+    io_result write_bytes(uint64_t start, uint64_t count, const void *from);
+
+  };
+
+  typedef io_result (*file_system_mounter)(block_device *bd, file_system_instance *&fs_out);
+
+  extern utility::list<block_device *> *block_devices;
+
+  void init_storage();
+
+  //a canon path contains no . or empty directory names, and
+  //contains no .. except for at the start of a relative path.
+  struct canon_path {
+
+    bool absolute;
+    unsigned parent_count;
+    utility::vector<utility::string> segments;
+
+    void parent();
+    void rel(const canon_path &r);
+
+    utility::string to_string(bool trailing_slash) const;
+
+  };
+
+  void canonize_path(const char *str, size_t len, canon_path &out);
+
+  //path must be absolute.
+  io_result mount_device(block_device *bd, const canon_path &path, file_system_mounter mounter);
+
+  //path must be absolute. symlinks are always resolved on nodes other than the
+  //final one. they are resolved on the final node if and only if resolve final
+  //node is set to true. mount points are always traversed, including on the
+  //final node. this assumes that the path is under some mount point, and the
+  //same for any symlinks traversed (e.g. / is mounted).
+  io_result look_up_absolute_path(const canon_path &path, block_device *&bd_out,
+    node_id_t &node_out, bool resolve_final_node, canon_path &path_without_symlinks_out);
+
+}
+
+#endif
diff --git a/include/mercury/kernel/terminal.hpp b/include/mercury/kernel/terminal.hpp
index b9d9976..776ff64 100644
--- a/include/mercury/kernel/terminal.hpp
+++ b/include/mercury/kernel/terminal.hpp
@@ -1,6 +1,7 @@
 #ifndef MERCURY_KERNEL_TERMINAL_HPP
 #define MERCURY_KERNEL_TERMINAL_HPP
 
+#include <cstddef>
 #include <cstdint>
 
 namespace mercury::kernel::terminal {
@@ -20,7 +21,7 @@ namespace mercury::kernel::terminal {
   extern framebuffer::color fg_color;
 
   void put_char(char ch);
-
+  void put_string(const char *str, size_t len);
   void put_string_sz(const char *str);
 
   void put_int_decimal(uint64_t n, bool with_commas = true);
diff --git a/include/mercury/kernel/utility.hpp b/include/mercury/kernel/utility.hpp
index dbf0cf9..f12101a 100644
--- a/include/mercury/kernel/utility.hpp
+++ b/include/mercury/kernel/utility.hpp
@@ -20,101 +20,232 @@ namespace mercury::kernel::utility {
     uint8_t bytes[16];
   };
 
-  template <class value_t>
-  struct string_tree {
-
-    std::optional<value_t> value_here = {};
-    string_tree<value_t> *parent = 0;
-    string_tree<value_t> *subtrees[256] = {};
-
-    //if there is a node whose key has a prefix in common with str, then this
-    //returns the deepest such node, and sets leftover_out to point to the
-    //character of str after the common prefix with that returned node. if
-    //there is no such node, this returns the current node and sets
-    //leftover_out to a null pointer.
-    string_tree &max_common_prefix(
-        const char *str, size_t str_len, const char *&leftover_out) {
-
-      string_tree<value_t> *last_with_value = 0;
-      const char *leftover_at_last_with_value = 0;
-      string_tree<value_t> *on = this;
-      size_t len_so_far = 0;
-
-      while (true) {
-
-        if (on->value_here) {
-          last_with_value = on;
-          leftover_at_last_with_value = str + len_so_far;
-        }
-
-        if (len_so_far == str_len)
-          break;
-
-        on = on->subtrees[(uint8_t)str[len_so_far]];
-        if (!on)
-          break;
-
-        ++len_so_far;
-
-      }
-
-      if (last_with_value) {
-        leftover_out = leftover_at_last_with_value;
-        return *last_with_value;
-      }
-
-      leftover_out = 0;
-      return *this;
-
-    }
-
-    bool try_insert(const char *str, size_t str_len, value_t value) {
-
-      string_tree<value_t> *on = this;
-      for (size_t i = 0; i < str_len; ++i) {
-        string_tree<value_t> *&subtree = on->subtrees[(uint8_t)str[i]];
-        if (!subtree) {
-          subtree = new string_tree<value_t>();
-          subtree->parent = on;
-        }
-        on = subtree;
-      }
-
-      if (on->value_here)
-        return false;
-      on->value_here = value;
-      return true;
-
-    }
-
-  };
-
   //if c appears in str, this returns the index of the first time it appears.
   //otherwise, this returns len.
-  static inline size_t find(const char *str, size_t len, char c) {
-    for (size_t i = 0; i < len; ++i)
+  static inline unsigned find(const char *str, unsigned len, char c) {
+    for (unsigned i = 0; i < len; ++i)
       if (str[i] == c)
         return i;
     return len;
   }
 
   template <class value_t>
-  struct flist {
+  struct list {
 
     struct node {
       value_t value;
       node *next;
+      node *prev;
     };
 
     node *first;
+    node *last;
 
-    flist() : first(0) {}
+    list() : first(0), last(0) {}
 
-    void insert(value_t value) {
-      first = new node {
-        .value = value,
-        .next = first
-      };
+    ~list() {
+      if (first) {
+        for (node *n = first->next; n; n = n->next)
+          delete n->prev;
+        delete last;
+      }
+    }
+
+    void insert_end(const value_t &value) {
+      node *n = new node {};
+      n->value = value;
+      n->next = 0;
+      n->prev = last;
+      last = n;
+    }
+
+    void insert_end(value_t &&value) {
+      node *n = new node {};
+      n->value = value;
+      n->next = 0;
+      n->prev = last;
+      last = n;
+    }
+
+    void clear() {
+      for (node *n = first; n; n = n->next)
+        delete n;
+      first = 0;
+      last = 0;
+    }
+
+    //assumes there is a last.
+    void remove_last() {
+      node *new_last = last->prev;
+      if (new_last)
+        new_last->next = 0;
+      else
+        first = 0;
+      delete last;
+      last = new_last;
+    }
+
+  };
+
+  template <class value_t>
+  struct vector {
+
+    value_t *buffer;
+    unsigned buffer_len;
+    unsigned count;
+
+    vector(unsigned buffer_len = 16)
+      : buffer(new value_t[buffer_len]), buffer_len(buffer_len), count(0) {}
+
+    vector(const value_t *copy_from, unsigned len)
+      : buffer(new value_t[len]), buffer_len(len), count(len) {
+      for (unsigned i = 0; i < len; ++i)
+        buffer[i] = copy_from[i];
+    }
+
+    ~vector() {
+      if (buffer)
+        delete[] buffer;
+    }
+
+    vector<value_t> &operator =(const vector<value_t> &other) {
+      if (buffer)
+        delete[] buffer;
+      buffer = new value_t[other.buffer_len];
+      buffer_len = other.buffer_len;
+      for (unsigned i = 0; i < other.count; ++i)
+        buffer[i] = other.buffer[i];
+      return *this;
+    }
+
+    vector<value_t> &operator =(vector<value_t> &&other) {
+      if (buffer)
+        delete[] buffer;
+      buffer = other.buffer;
+      buffer_len = other.buffer_len;
+      count = other.count;
+      other.buffer = 0;
+      return *this;
+    }
+
+    bool operator ==(const vector<value_t> &other) {
+      if (other.count != count)
+        return false;
+      for (unsigned i = 0; i < count; ++i)
+        if (other.buffer[i] != buffer[i])
+          return false;
+      return true;
+    }
+
+    void verify_buffer_len(unsigned target_len) {
+      if (buffer_len >= target_len)
+        return;
+      unsigned new_buffer_len = buffer_len;
+      do
+        new_buffer_len *= 2;
+      while (new_buffer_len < target_len);
+      value_t *new_buffer = new value_t[new_buffer_len];
+      for (unsigned i = 0; i < count; ++i)
+        new_buffer[i] = std::move(buffer[i]);
+      delete[] buffer;
+      buffer = new_buffer;
+      buffer_len = new_buffer_len;
+    }
+
+    void add_end(value_t &&v) {
+      verify_buffer_len(count + 1);
+      buffer[count] = std::move(v);
+      ++count;
+    }
+
+    void add_end(const value_t &v) {
+      verify_buffer_len(count + 1);
+      buffer[count] = v;
+      ++count;
+    }
+
+  };
+
+  typedef vector<char> string;
+
+  template <class key_component_t, class value_t>
+  struct trie {
+
+    typedef vector<key_component_t> key_t;
+
+    struct child {
+      key_component_t component;
+      trie tr;
+    };
+
+    //really this should be a hashmap or something
+    vector<child> children;
+
+    trie *try_get_child(const key_component_t &component) const {
+      for (unsigned i = 0; i < children.count; ++i)
+        if (children.buffer[i].component == component)
+          return &children.buffer[i].tr;
+      return 0;
+    }
+
+    std::optional<value_t> value_here;
+
+    trie() : children(0) {}
+
+    //prefix length is in key components, with the root node having a prefix
+    //length of 0. if no prefix of key has a value, then this just returns the
+    //root node and sets prefix length to 0.
+    const trie &longest_prefix_with_value(
+      const key_t &key, unsigned &prefix_length_out
+    ) const {
+
+      const trie *on = this;
+      unsigned with_i = 0;
+
+      const trie *longest_found = this;
+      prefix_length_out = 0;
+
+      while (true) {
+        if (with_i == key.count)
+          return *longest_found;
+        on = on->try_get_child(key.buffer[with_i]);
+        if (!on)
+          return *longest_found;
+        ++with_i;
+        if (on->value_here) {
+          longest_found = on;
+          prefix_length_out = with_i;
+        }
+      }
+
+    }
+
+    bool has_key(const key_t &key) {
+      const trie *on = this;
+      for (unsigned i = 0; i < key.count; ++i) {
+        on = on->try_get_child(key.buffer[i]);
+        if (!on)
+          return false;
+      }
+      return on->value_here.has_value();
+    }
+
+    //returns false if this key is already in use.
+    bool try_insert(const key_t &key, value_t value) {
+      trie *on = this;
+      for (unsigned i = 0; i < key.count; ++i) {
+        on = on->try_get_child(key.buffer[i]);
+        if (!on) {
+          child ch;
+          ch.component = key.buffer[i];
+          on->children.add_end(std::move(ch));
+          on = &on->children.buffer[on->children.count - 1].tr;
+        }
+      }
+      if (on->value_here)
+        return false;
+      *on->value_here = value;
+      return true;
     }
 
   };
diff --git a/kernel/allocator.cpp b/kernel/allocator.cpp
new file mode 100644
index 0000000..b04078c
--- /dev/null
+++ b/kernel/allocator.cpp
@@ -0,0 +1,160 @@
+#include <mercury/kernel/paging.hpp>
+#include <cstddef>
+
+namespace mercury::kernel::allocator {
+
+  struct free_entry {
+    uint64_t start;
+    uint64_t len;//0 for unused
+  };
+
+  struct free_page {
+    free_page *next;
+    free_entry entries[255];
+  };
+
+  free_page *first_page;
+
+  static_assert(sizeof(free_page) == 4088);
+
+  free_entry *get_entry(uint64_t start, uint64_t len) {
+    for (free_page *fp = first_page; fp; fp = fp->next)
+      for (int i = 0; i < 255; ++i)
+        if (fp->entries[i].start == start && fp->entries[i].len == len)
+          return fp->entries + i;
+    return 0;
+  }
+
+  void add_entry(uint64_t start, uint64_t len) {
+    for (free_page *fp = first_page; fp; fp = fp->next)
+      for (int i = 0; i < 255; ++i)
+        if (fp->entries[i].len == 0) {
+          fp->entries[i].start = start;
+          fp->entries[i].len = len;
+          return;
+        }
+    free_page *new_page = (free_page *)paging::map_new_kernel_pages(1);
+    new_page->next = first_page;
+    first_page = new_page;
+    for (int i = 2; i < 255; ++i)
+      new_page->entries[i].len = 0;
+    new_page->entries[0].start = (uint64_t)new_page + 4088;
+    new_page->entries[0].len = 8;
+    new_page->entries[1].start = start;
+    new_page->entries[1].len = len;
+  }
+
+  //len is power of 2, start is len-aligned
+  void free_block(uint64_t start, uint64_t len) {
+    free_entry *buddy = get_entry(start ^ len, len);
+    if (buddy) {
+      buddy->start = start & ~len;
+      buddy->len = len * 2;
+    }
+    else
+      add_entry(start, len);
+  }
+
+  void free_region(uint64_t start, uint64_t len) {
+    uint64_t block_size = 1;
+    while (block_size <= len) {
+      if (start & block_size) {
+        free_block(start, block_size);
+        start += block_size;
+        len -= block_size;
+      }
+      block_size *= 2;
+    }
+    while (len) {
+      block_size /= 2;
+      if (block_size <= len) {
+        free_block(start, block_size);
+        start += block_size;
+        len -= block_size;
+      }
+    }
+    //testing
+    if (len != 0)
+      while (1)
+        ;
+  }
+
+  uint64_t take_region(uint64_t len) {
+
+    uint64_t min_size = 1;
+    while (min_size < len)
+      min_size *= 2;
+
+    free_entry *entry = 0;
+
+    for (free_page *fp = first_page; fp; fp = fp->next)
+      for (int i = 0; i < 255; ++i)
+        if (fp->entries[i].len >= min_size) {
+          if (fp->entries[i].len == min_size) {
+            entry = fp->entries + i;
+            goto loop_done;
+          }
+          if (entry == 0 || fp->entries[i].len < entry->len)
+            entry = fp->entries + i;
+        }
+
+  loop_done:
+    if (entry != 0) {
+      uint64_t start = entry->start;
+      uint64_t block_len = entry->len;
+      entry->len = 0;
+      if (block_len != len)
+        free_region(start + len, block_len - len);
+      return start;
+    }
+
+    uint64_t pages = (len - 1) / 4096 + 1;
+    uint64_t start = (uint64_t)paging::map_new_kernel_pages(pages);
+    if (pages * 4096 != len)
+      free_region(start + len, pages * 4096 - len);
+    return start;
+
+  }
+
+}
+
+using namespace mercury::kernel::allocator;
+
+void *_new(size_t len) {
+  if (len == 0)
+    return 0;
+  uint64_t vaddr = take_region(len + sizeof(size_t));
+  *(size_t *)vaddr = len;
+  return (void *)(vaddr + sizeof(size_t));
+}
+
+void _delete(void *ptr) {
+  if ((uint64_t)ptr == 0)
+    return;
+  uint64_t vaddr = (uint64_t)ptr - sizeof(size_t);
+  free_region(vaddr, *(size_t *)vaddr + sizeof(size_t));
+}
+
+void *operator new(size_t len) {
+  return _new(len);
+}
+
+void operator delete(void *ptr, size_t) {
+  return _delete(ptr);
+}
+
+void operator delete(void *ptr) {
+  return _delete(ptr);
+}
+
+void *operator new[](size_t len) {
+  return _new(len);
+}
+
+void operator delete[](void *ptr, size_t) {
+  return _delete(ptr);
+}
+
+void operator delete[](void *ptr) {
+  return _delete(ptr);
+}
diff --git a/kernel/bd/memory.cpp b/kernel/bd/memory.cpp
new file mode 100644
index 0000000..1015e40
--- /dev/null
+++ b/kernel/bd/memory.cpp
@@ -0,0 +1,27 @@
+#include <mercury/kernel/bd/memory.hpp>
+
+namespace mercury::kernel::bd {
+
+  memory::memory(void *buffer, size_t buffer_len) : buffer((uint8_t *)buffer) {
+    block_size = 1;
+    block_count = buffer_len;
+    //block cache will never be used, since the block size is 1.
+  }
+
+  storage::io_result memory::read_blocks_no_cache(
+    uint64_t start, uint64_t count, void *into
+  ) {
+    for (uint64_t i = 0; i < count; ++i)
+      ((uint8_t *)into)[i] = buffer[start + i];
+    return storage::io_result::success;
+  }
+
+  storage::io_result memory::write_blocks_no_cache(
+    uint64_t start, uint64_t count, const void *into
+  ) {
+    for (uint64_t i = 0; i < count; ++i)
+      buffer[start + i] = ((uint8_t *)into)[i];
+    return storage::io_result::success;
+  }
+
+}
diff --git a/kernel/entry.cpp b/kernel/entry.cpp
index c8f74c8..fb85ad2 100644
--- a/kernel/entry.cpp
+++ b/kernel/entry.cpp
@@ -1,4 +1,6 @@
 #include <mercury/kernel/framebuffer.hpp>
+#include <mercury/kernel/bd/memory.hpp>
+#include <mercury/kernel/fs/tarfs.hpp>
 #include <mercury/kernel/terminal.hpp>
 #include <mercury/kernel/limine.hpp>
 #include <mercury/kernel/paging.hpp>
@@ -177,9 +179,28 @@ extern "C" [[noreturn]] void entry() {
 
 }
 
+[[noreturn]] static void halt() {
+  while (1)
+    ;
+}
+
 [[noreturn]] static void with_kernel_p4() {
 
   terminal::init_terminal();
+  storage::init_storage();
+
+  storage::block_device *initfs_bd = new bd::memory(initfs, initfs_len);
+  storage::block_devices->insert_end(initfs_bd);
+
+  storage::canon_path root;
+  storage::canonize_path("/", 1, root);
+
+  if (storage::mount_device(initfs_bd, root, &fs::tarfs_mounter) !=
+      storage::io_result::success) {
+    terminal::put_string_sz("failed to mount initfs.");
+    halt();
+  }
+
   terminal::put_string_sz("kernel initialization complete.\n");
 
   int used_vram_kib = paging::get_used_vram_page_count() * 4;
@@ -190,23 +211,24 @@ extern "C" [[noreturn]] void entry() {
   terminal::put_int_decimal(free_pram_kib);
   terminal::put_string_sz(" kiB physical memory free.\n");
 
-  terminal::put_string_sz("initfs first sector:");
-  for (int y = 0; y < 8; ++y) {
-    terminal::put_string_sz("\n  ");
-    for (int x = 0; x < 64; ++x)
-      terminal::put_char((char)initfs[y * 64 + x]);
+  storage::canon_path test_path;
+  storage::canonize_path("/test.txt", 9, test_path);
+
+  storage::block_device *test_bd;
+  storage::node_id_t test_node_id;
+  storage::canon_path test_path_without_symlinks;
+
+  if (storage::look_up_absolute_path(
+        test_path, test_bd, test_node_id, true, test_path_without_symlinks) !=
+      storage::io_result::success) {
+    terminal::put_string_sz("failed to look up /test.txt in vfs.");
+    halt();
   }
 
-  terminal::put_string_sz("\ninitfs second sector:");
-  for (int y = 0; y < 8; ++y) {
-    terminal::put_string_sz("\n  ");
-    for (int x = 0; x < 64; ++x)
-      terminal::put_char((char)initfs[512 + y * 64 + x]);
-  }
+  terminal::put_string_sz("/test.txt has node id ");
+  terminal::put_int_decimal(test_node_id);
+  terminal::put_string_sz(" in its file system.");
 
-  while (1)
-    ;
-
-  //TODO
+  halt();
 
 }
diff --git a/kernel/fs/tarfs.cpp b/kernel/fs/tarfs.cpp
new file mode 100644
index 0000000..706280e
--- /dev/null
+++ b/kernel/fs/tarfs.cpp
@@ -0,0 +1,224 @@
+#include <mercury/kernel/fs/tarfs.hpp>
+
+//in fs::tarfs_instance, storage::node_id_t refers to the number
+//of bytes into the block device that the info sector is located
+
+namespace mercury::kernel::fs {
+
+  storage::io_result tarfs_mounter(
+    storage::block_device *bd, storage::file_system_instance *&fs_out
+  ) {
+    fs_out = new tarfs_instance(bd);
+    return storage::io_result::success;
+  }
+
+  tarfs_instance::tarfs_instance(storage::block_device *bd) : bd(bd) {}
+
+  storage::io_result tarfs_instance::next_node(storage::node_id_t &node) {
+    uint64_t file_length;
+    storage::io_result result = read_num(node + 124, 12, file_length);
+    if (result != storage::io_result::success)
+      return result;
+    node += ((file_length - 1) / 512 + 2) * 512;
+    return storage::io_result::success;
+  }
+
+  storage::io_result tarfs_instance::read_name(
+    storage::node_id_t node, char *name_buf, size_t &name_len_out
+  ) {
+    name_len_out = 0;
+    storage::io_result result = bd->read_bytes(node + 345, 155, name_buf);
+    if (result != storage::io_result::success)
+      return result;
+    while (name_buf[name_len_out] && name_len_out < 155)
+      ++name_len_out;
+    result = bd->read_bytes(node, 100, name_buf + name_len_out);
+    if (result != storage::io_result::success)
+      return result;
+    size_t new_limit = name_len_out + 100;
+    while (name_buf[name_len_out] && name_len_out < new_limit)
+      ++name_len_out;
+    return storage::io_result::success;
+  }
+
+  storage::io_result tarfs_instance::read_num(
+    uint64_t offset, size_t len, uint64_t &out
+  ) {
+
+    //len <= 12
+    char buffer[12];
+    storage::io_result result = bd->read_bytes(offset, len, buffer);
+    out = 0;
+
+    for (size_t i = 0; i < len; ++i) {
+      if (!buffer[i])
+        return i == 0 ? storage::io_result::fs_corrupt
+                      : storage::io_result::success;
+      if (buffer[i] < '0' || buffer[i] > '7')
+        return storage::io_result::fs_corrupt;
+      out = out * 8 + buffer[i] - '0';
+    }
+
+    return storage::io_result::success;
+
+  }
+
+#define RETURN_MAYBE_NOT_FOUND(expr) \
+  { \
+    storage::io_result _result = expr; \
+    if (_result == storage::io_result::out_of_bounds) \
+      return storage::io_result::not_found; \
+    if (_result != storage::io_result::success) \
+      return _result; \
+  }
+
+  storage::io_result tarfs_instance::get_root_node(storage::node_id_t &out) {
+    out = 0;
+    while (true) {
+      char name_buf[255];
+      size_t name_len;
+      RETURN_MAYBE_NOT_FOUND(read_name(out, name_buf, name_len))
+      if (name_len == 2 && name_buf[0] == '.' && name_buf[1] == '/')
+        return storage::io_result::success;
+      RETURN_MAYBE_NOT_FOUND(next_node(out))
+    }
+  }
+
+  storage::io_result tarfs_instance::get_first_child(storage::node_id_t node,
+    storage::node_id_t &out, storage::directory_iter_t &iter_out
+  ) {
+
+    out = 0;
+    while (true) {
+
+      char name_buf[255];
+      size_t name_len;
+      RETURN_MAYBE_NOT_FOUND(read_name(out, name_buf, name_len))
+
+      //TODO
+      while (1)
+        ;
+
+      RETURN_MAYBE_NOT_FOUND(next_node(out))
+
+    }
+
+  }
+
+  storage::io_result tarfs_instance::get_next_child(storage::node_id_t node,
+    storage::node_id_t &out, storage::directory_iter_t &iter
+  ) {
+
+    out = iter;
+    while (true) {
+
+      char name_buf[255];
+      size_t name_len;
+      RETURN_MAYBE_NOT_FOUND(read_name(out, name_buf, name_len))
+
+      //TODO
+      //NOTE: before return, do iter = out.
+      while (1)
+        ;
+
+
+      RETURN_MAYBE_NOT_FOUND(next_node(out))
+
+    }
+
+  }
+
+  storage::io_result tarfs_instance::get_child(storage::node_id_t node,
+    storage::node_id_t &out, const char *name, size_t name_len
+  ) {
+
+    char full_name[255];
+    size_t full_name_len;
+    RETURN_MAYBE_NOT_FOUND(read_name(out, full_name, full_name_len))
+
+    if (full_name_len + name_len > 255)
+      return storage::io_result::not_supported;
+
+    for (size_t i = 0; i < name_len; ++i)
+      full_name[full_name_len + i] = name[i];
+    full_name_len += name_len;
+
+    out = 0;
+    while (true) {
+
+      char cand_name[255];
+      size_t cand_name_len;
+      RETURN_MAYBE_NOT_FOUND(read_name(out, cand_name, cand_name_len))
+
+      if (cand_name_len != full_name_len)
+        goto next_iter;
+      for (size_t i = 0; i < full_name_len; ++i)
+        if (cand_name[i] != full_name[i])
+          goto next_iter;
+
+      return storage::io_result::success;
+
+    next_iter:
+      RETURN_MAYBE_NOT_FOUND(next_node(out))
+
+    }
+
+  }
+
+  storage::io_result tarfs_instance::get_name_length(
+    storage::node_id_t node, size_t &length_out
+  ) {
+
+    //TODO
+    while (1)
+      ;
+
+  }
+
+  storage::io_result tarfs_instance::get_name(
+    storage::node_id_t node, char *buffer, size_t &length_out
+  ) {
+
+    //TODO
+    while (1)
+      ;
+
+  }
+
+  storage::io_result tarfs_instance::get_file_length(
+    storage::node_id_t node, uint64_t &length_out
+  ) {
+
+    //TODO
+    while (1)
+      ;
+
+  }
+
+  storage::io_result tarfs_instance::get_file_type(
+    storage::node_id_t node, storage::file_type &out
+  ) {
+
+    uint64_t ft;
+    storage::io_result result = read_num(node + 156, 1, ft);
+    if (result != storage::io_result::success)
+      return result;
+
+    switch (ft) {
+    case 0:
+      out = storage::file_type::regular_file;
+      return storage::io_result::success;
+    case 2:
+      out = storage::file_type::symlink;
+      return storage::io_result::success;
+    case 5:
+      out = storage::file_type::directory;
+      return storage::io_result::success;
+    default:
+      return storage::io_result::not_supported;
+    }
+
+  }
+
+
+}
diff --git a/kernel/paging.cpp b/kernel/paging.cpp
index 3bd27d0..8c27abc 100644
--- a/kernel/paging.cpp
+++ b/kernel/paging.cpp
@@ -96,6 +96,13 @@ namespace mercury::kernel::paging {
     return 0;
   }
 
+  void *map_new_kernel_pages(uint64_t count) {
+    uint64_t vaddr = find_unmapped_vram_region(count);
+    for (uint64_t i = 0; i < count; ++i)
+      map_kernel_page(take_pram_page(), vaddr + i * 4096, true, false);
+    return (void *)vaddr;
+  }
+
   uint64_t get_used_vram_page_count() {
     uint64_t count = 0;
     for (uint64_t i = 0; i < kernel_vram_pages; ++i)
diff --git a/kernel/storage.cpp b/kernel/storage.cpp
new file mode 100644
index 0000000..ff86896
--- /dev/null
+++ b/kernel/storage.cpp
@@ -0,0 +1,246 @@
+#include <mercury/kernel/storage.hpp>
+
+#define RETURN_IF_NOT_SUCCESS(expr) \
+  { \
+    io_result _result = expr; \
+    if (_result != io_result::success) \
+      return _result; \
+  }
+
+namespace mercury::kernel::storage {
+
+  io_result block_device::load_cache_block(uint64_t i) {
+
+    if (block_cache_i == i)
+      return io_result::success;
+
+    if (block_cache_dirty) {
+      RETURN_IF_NOT_SUCCESS(
+        write_blocks_no_cache(block_cache_i, 1, block_cache))
+      block_cache_dirty = false;
+    }
+
+    io_result result = read_blocks_no_cache(i, 1, block_cache);
+
+    if (result != io_result::success) {
+      block_cache_i = block_count;
+      return result;
+    }
+
+    block_cache_i = i;
+    return io_result::success;
+
+  }
+
+  io_result block_device::read_bytes(
+    uint64_t start, uint64_t count, void *into
+  ) {
+
+    if (start + count > block_size * block_count)
+      return io_result::out_of_bounds;
+
+    uint8_t *into_u8 = (uint8_t *)into;
+
+    if (start % block_size != 0) {
+      uint64_t prefix_len = block_size - start % block_size;
+      RETURN_IF_NOT_SUCCESS(load_cache_block(start / block_size))
+      for (uint64_t i = 0; i < prefix_len; ++i)
+        into_u8[i] = block_cache[start % block_size + i];
+      into_u8 += prefix_len;
+      start += prefix_len;
+      count -= prefix_len;
+    }
+
+    uint64_t postfix_start = ((start + count) / block_size) * block_size;
+
+    if (postfix_start != start) {
+      RETURN_IF_NOT_SUCCESS(read_blocks_no_cache(
+        start / block_size, (postfix_start - start) / block_size, into_u8))
+      count -= postfix_start - start;
+      into_u8 += postfix_start - start;
+      start = postfix_start;
+    }
+
+    if (count != 0) {
+      RETURN_IF_NOT_SUCCESS(load_cache_block(start / block_size))
+      for (uint64_t i = 0; i < count; ++i)
+        into_u8[i] = block_cache[i];
+    }
+
+    return io_result::success;
+
+  }
+
+  utility::list<block_device *> *block_devices;
+
+  static utility::trie<utility::string, block_device *> *mounted_devices;
+
+  void init_storage() {
+    block_devices = new utility::list<block_device *>();
+    mounted_devices = new utility::trie<utility::string, block_device *>();
+  }
+
+  void canon_path::parent() {
+    if (segments.count != 0)
+      --segments.count;
+    else if (!absolute)
+      ++parent_count;
+  }
+
+  void canon_path::rel(const canon_path &r) {
+    if (r.absolute) {
+      segments.count = 0;
+      absolute = true;
+      parent_count = 0;
+    }
+    for (unsigned i = 0; i < r.parent_count; ++i)
+      parent();
+    for (unsigned i = 0; i < r.segments.count; ++i)
+      segments.add_end(r.segments.buffer[i]);
+  }
+
+  void canonize_path(const char *str, size_t len, canon_path &out) {
+
+    out.absolute = false;
+    out.parent_count = 0;
+    out.segments.count = 0;
+
+    if (len == 0)
+      return;
+
+    if (len == 1 && str[0] == '/') {
+      out.absolute = true;
+      return;
+    }
+
+    if (str[0] == '/') {
+      out.absolute = true;
+      ++str;
+      --len;
+    }
+
+    while (len != 0) {
+
+      size_t segment_len = utility::find(str, len, '/');
+      size_t to_skip = segment_len == len ? len : len + 1;
+
+      if (segment_len == 0)
+        ;
+
+      else if (segment_len == 1 && str[0] == '.')
+        ;
+
+      else if (segment_len == 2 && str[0] == '.' && str[1] == '.')
+        out.parent();
+
+      else {
+        utility::string segment(str, segment_len);
+        out.segments.add_end(std::move(segment));
+      }
+
+      str += to_skip;
+      len -= to_skip;
+
+    }
+
+  }
+
+  io_result mount_device(
+    block_device *bd, const canon_path &path, file_system_mounter mounter
+  ) {
+
+    if (!path.absolute)
+      return io_result::bad_path;
+
+    if (mounted_devices->has_key(path.segments))
+      return io_result::mount_point_already_used;
+
+    file_system_instance *fs;
+    RETURN_IF_NOT_SUCCESS(mounter(bd, fs));
+    bd->mounted_as = fs;
+    mounted_devices->try_insert(path.segments, bd);
+    return io_result::success;
+
+  }
+
+  static io_result symlink_contents(
+    block_device *bd, node_id_t node, canon_path &out
+  ) {
+    //TODO
+    while (1)
+      ;
+  }
+
+  static io_result resolve_symlinks(
+    block_device *&bd, node_id_t &node, canon_path &path
+  ) {
+
+    file_type ft;
+    RETURN_IF_NOT_SUCCESS(bd->mounted_as->get_file_type(node, ft))
+
+    if (ft != file_type::symlink)
+      return io_result::success;
+
+    canon_path contents;
+    RETURN_IF_NOT_SUCCESS(symlink_contents(bd, node, contents))
+    path.parent();
+    path.rel(contents);
+
+    return look_up_absolute_path(path, bd, node, true, path);
+
+  }
+
+  io_result look_up_absolute_path(const canon_path &path,
+    block_device *&bd_out, node_id_t &node_out, bool resolve_final_node,
+    canon_path &path_without_symlinks_out
+  ) {
+
+    if (!path.absolute)
+      return io_result::bad_path;
+
+    unsigned prefix_length;
+    bd_out =
+      *mounted_devices->longest_prefix_with_value(path.segments, prefix_length)
+        .value_here;
+    RETURN_IF_NOT_SUCCESS(bd_out->mounted_as->get_root_node(node_out))
+
+    path_without_symlinks_out.absolute = true;
+    path_without_symlinks_out.parent_count = 0;
+    path_without_symlinks_out.segments.count = 0;
+
+    for (unsigned i = 0; i < prefix_length; ++i)
+      path_without_symlinks_out.segments.add_end(path.segments.buffer[i]);
+
+    for (unsigned i = prefix_length; i < path.segments.count - 1; ++i) {
+
+      path_without_symlinks_out.segments.add_end(path.segments.buffer[i]);
+
+      RETURN_IF_NOT_SUCCESS(bd_out->mounted_as->get_child(node_out, node_out,
+        path.segments.buffer[i].buffer, path.segments.buffer[i].count))
+      RETURN_IF_NOT_SUCCESS(resolve_symlinks(
+        bd_out, node_out, path_without_symlinks_out))
+
+      file_type ft;
+      RETURN_IF_NOT_SUCCESS(bd_out->mounted_as->get_file_type(node_out, ft))
+
+      if (ft != file_type::directory)
+        return io_result::not_a_directory;
+
+    }
+
+    const utility::string &last_segment =
+      path.segments.buffer[path.segments.count - 1];
+    path_without_symlinks_out.segments.add_end(last_segment);
+
+    RETURN_IF_NOT_SUCCESS(bd_out->mounted_as->get_child(
+      node_out, node_out, last_segment.buffer, last_segment.count))
+
+    if (resolve_final_node)
+      RETURN_IF_NOT_SUCCESS(resolve_symlinks(
+        bd_out, node_out, path_without_symlinks_out))
+
+    return io_result::success;
+
+  }
+
+}
diff --git a/kernel/terminal.cpp b/kernel/terminal.cpp
index f017cad..7a878ee 100644
--- a/kernel/terminal.cpp
+++ b/kernel/terminal.cpp
@@ -74,8 +74,13 @@ namespace mercury::kernel::terminal {
     }
   }
 
+  void put_string(const char *str, size_t len) {
+    for (size_t i = 0; i < len; ++i)
+      put_char(str[i]);
+  }
+
   void put_string_sz(const char *str) {
-    for (int i = 0; str[i]; ++i)
+    for (size_t i = 0; str[i]; ++i)
       put_char(str[i]);
   }
 
diff --git a/makefile b/makefile
index 9497d2c..0af5829 100644
--- a/makefile
+++ b/makefile
@@ -1,5 +1,6 @@
-GPP_ARGS = -Wall -Wextra -O3 -ggdb -I include -ffreestanding -mno-sse
-KERNEL_OBJECTS = entry.cpp framebuffer.cpp paging.asm paging.cpp terminal.cpp utility.cpp
+GPP_ARGS = -Wall -Wextra -O3 -ggdb -I include -ffreestanding -fno-exceptions -fno-rtti -mno-sse
+KERNEL_OBJECTS = allocator.cpp entry.cpp framebuffer.cpp paging.asm \
+  paging.cpp storage.cpp terminal.cpp utility.cpp bd/memory.cpp fs/tarfs.cpp
 
 all: out/disk.iso
 
@@ -37,7 +38,7 @@ obj/kernel.elf: ${KERNEL_OBJECTS:%=obj/kernel/%.o}
 obj/initfs.tgz:
 	@mkdir -p obj/initfs
 	echo test > obj/initfs/test.txt
-	cd obj/initfs && tar czf ../initfs.tgz *
+	tar czf obj/initfs.tgz -C obj/initfs .
 
 out/disk.iso: obj/kernel.elf obj/initfs.tgz limine
 	mkdir -p obj/iso out