]> code.delx.au - refind/commitdiff
Ext4fs support.
authorsrs5694 <srs5694@users.sourceforge.net>
Sun, 16 Dec 2012 03:19:18 +0000 (22:19 -0500)
committersrs5694 <srs5694@users.sourceforge.net>
Sun, 16 Dec 2012 03:19:18 +0000 (22:19 -0500)
filesystems/fsw_efi_edk2_base.h [new file with mode: 0644]
filesystems/fsw_ext4.c [new file with mode: 0644]
filesystems/fsw_ext4.h [new file with mode: 0644]
filesystems/fsw_ext4_disk.h [new file with mode: 0644]

diff --git a/filesystems/fsw_efi_edk2_base.h b/filesystems/fsw_efi_edk2_base.h
new file mode 100644 (file)
index 0000000..27c6d58
--- /dev/null
@@ -0,0 +1,99 @@
+/**
+ * \file fsw_efi_edk2_base.h
+ * Base definitions for the EDK EFI Toolkit environment.
+ */
+/*
+ * Copyright (c) 2012 Stefan Agner
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the
+ *    distribution.
+ *
+ *  * Neither the name of Christoph Pfisterer nor the names of the
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Copyright (C) 2010 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef _FSW_EFI_EDK2_BASE_H_
+#define _FSW_EFI_EDK2_BASE_H_
+/*
+ * Here is common declarations for EDK<->EDK2 compatibility
+ */
+# include <Base.h>
+# include <Uefi.h>
+# include <Library/DebugLib.h>
+# include <Library/BaseLib.h>
+# include <Protocol/DriverBinding.h>
+# include <Library/BaseMemoryLib.h>
+# include <Library/UefiRuntimeServicesTableLib.h>
+# include <Library/UefiDriverEntryPoint.h>
+# include <Library/UefiBootServicesTableLib.h>
+# include <Library/MemoryAllocationLib.h>
+# include <Library/DevicePathLib.h>
+# include <Protocol/DevicePathFromText.h>
+# include <Protocol/DevicePathToText.h>
+# include <Protocol/DebugPort.h>
+# include <Protocol/DebugSupport.h>
+# include <Library/PrintLib.h>
+# include <Library/UefiLib.h>
+# include <Protocol/SimpleFileSystem.h>
+# include <Protocol/BlockIo.h>
+# include <Protocol/DiskIo.h>
+# include <Guid/FileSystemInfo.h>
+# include <Guid/FileInfo.h>
+# include <Guid/FileSystemVolumeLabelInfo.h>
+# include <Protocol/ComponentName.h>
+
+# define BS gBS
+# define PROTO_NAME(x) gEfi ## x ## Guid
+# define GUID_NAME(x) gEfi ## x ## Guid
+
+# define EFI_FILE_HANDLE_REVISION EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_REVISION
+# define SIZE_OF_EFI_FILE_SYSTEM_VOLUME_LABEL_INFO  SIZE_OF_EFI_FILE_SYSTEM_VOLUME_LABEL
+# define EFI_FILE_SYSTEM_VOLUME_LABEL_INFO EFI_FILE_SYSTEM_VOLUME_LABEL
+# define EFI_SIGNATURE_32(a, b, c, d) SIGNATURE_32(a, b, c, d)
+# define DivU64x32(x,y,z) DivU64x32((x),(y))
+
+
+// INTN CompareGuidEdk1(
+//   IN EFI_GUID     *Guid1,
+//   IN EFI_GUID     *Guid2
+//                      );
+
+//#define CompareGuid(x, y) CompareGuidEdk1((x),(y))
+//#define FSW_DEBUG_LEVEL 3
+
+int fsw_streq_ISO88591_UTF16(void *s1data, void *s2data, int len);
+
+#endif
diff --git a/filesystems/fsw_ext4.c b/filesystems/fsw_ext4.c
new file mode 100644 (file)
index 0000000..c12d95f
--- /dev/null
@@ -0,0 +1,679 @@
+/**
+ * \file fsw_ext4.c
+ * ext4 file system driver code.
+ */
+
+/*-
+ * Copyright (c) 2012 Stefan Agner
+ * Portions Copyright (c) 2006 Christoph Pfisterer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include "fsw_ext4.h"
+
+
+// functions
+
+static fsw_status_t fsw_ext4_volume_mount(struct fsw_ext4_volume *vol);
+static void         fsw_ext4_volume_free(struct fsw_ext4_volume *vol);
+static fsw_status_t fsw_ext4_volume_stat(struct fsw_ext4_volume *vol, struct fsw_volume_stat *sb);
+
+static fsw_status_t fsw_ext4_dnode_fill(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno);
+static void         fsw_ext4_dnode_free(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno);
+static fsw_status_t fsw_ext4_dnode_stat(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                        struct fsw_dnode_stat *sb);
+static fsw_status_t fsw_ext4_get_extent(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                        struct fsw_extent *extent);
+static fsw_status_t fsw_ext4_get_by_blkaddr(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                        struct fsw_extent *extent);
+static fsw_status_t fsw_ext4_get_by_extent(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                        struct fsw_extent *extent);
+
+static fsw_status_t fsw_ext4_dir_lookup(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                        struct fsw_string *lookup_name, struct fsw_ext4_dnode **child_dno);
+static fsw_status_t fsw_ext4_dir_read(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                      struct fsw_shandle *shand, struct fsw_ext4_dnode **child_dno);
+static fsw_status_t fsw_ext4_read_dentry(struct fsw_shandle *shand, struct ext4_dir_entry *entry);
+
+static fsw_status_t fsw_ext4_readlink(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                      struct fsw_string *link);
+
+//
+// Dispatch Table
+//
+
+struct fsw_fstype_table   FSW_FSTYPE_TABLE_NAME(ext4) = {
+    { FSW_STRING_TYPE_ISO88591, 4, 4, "ext4" },
+    sizeof(struct fsw_ext4_volume),
+    sizeof(struct fsw_ext4_dnode),
+
+    fsw_ext4_volume_mount,
+    fsw_ext4_volume_free,
+    fsw_ext4_volume_stat,
+    fsw_ext4_dnode_fill,
+    fsw_ext4_dnode_free,
+    fsw_ext4_dnode_stat,
+    fsw_ext4_get_extent,
+    fsw_ext4_dir_lookup,
+    fsw_ext4_dir_read,
+    fsw_ext4_readlink,
+};
+
+/**
+ * Mount an ext4 volume. Reads the superblock and constructs the
+ * root directory dnode.
+ */
+
+static fsw_status_t fsw_ext4_volume_mount(struct fsw_ext4_volume *vol)
+{
+    fsw_status_t    status;
+    void            *buffer;
+    fsw_u32         blocksize;
+    fsw_u32         groupcnt, groupno, gdesc_per_block, gdesc_bno, gdesc_index;
+    struct ext4_group_desc *gdesc;
+    int             i;
+    struct fsw_string s;
+
+    // allocate memory to keep the superblock around
+    status = fsw_alloc(sizeof(struct ext4_super_block), &vol->sb);
+    if (status)
+        return status;
+
+    // read the superblock into its buffer
+    fsw_set_blocksize(vol, EXT4_SUPERBLOCK_BLOCKSIZE, EXT4_SUPERBLOCK_BLOCKSIZE);
+    status = fsw_block_get(vol, EXT4_SUPERBLOCK_BLOCKNO, 0, &buffer);
+    if (status)
+        return status;
+    fsw_memcpy(vol->sb, buffer, sizeof(struct ext4_super_block));
+    fsw_block_release(vol, EXT4_SUPERBLOCK_BLOCKNO, buffer);
+
+    // check the superblock
+    if (vol->sb->s_magic != EXT4_SUPER_MAGIC)
+        return FSW_UNSUPPORTED;
+    if (vol->sb->s_rev_level != EXT4_GOOD_OLD_REV &&
+        vol->sb->s_rev_level != EXT4_DYNAMIC_REV)
+        return FSW_UNSUPPORTED;
+
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: Incompat flag %x\n"), vol->sb->s_feature_incompat));
+
+    if (vol->sb->s_rev_level == EXT4_DYNAMIC_REV &&
+        (vol->sb->s_feature_incompat & ~(EXT4_FEATURE_INCOMPAT_FILETYPE | EXT4_FEATURE_INCOMPAT_RECOVER |
+                                         EXT4_FEATURE_INCOMPAT_EXTENTS | EXT4_FEATURE_INCOMPAT_FLEX_BG)))
+        return FSW_UNSUPPORTED;
+
+
+    if (vol->sb->s_rev_level == EXT4_DYNAMIC_REV &&
+        (vol->sb->s_feature_incompat & EXT4_FEATURE_INCOMPAT_RECOVER))
+    {
+        FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: This ext3 file system needs recovery\n")));
+        // Print(L"Ext4 WARNING: This file system needs recovery, trying to use it anyway.\n");
+    }
+
+    blocksize = EXT4_BLOCK_SIZE(vol->sb);
+    if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE)
+        return FSW_UNSUPPORTED;
+
+    // set real blocksize
+    fsw_set_blocksize(vol, blocksize, blocksize);
+
+    // get other info from superblock
+    vol->ind_bcnt = EXT4_ADDR_PER_BLOCK(vol->sb);
+    vol->dind_bcnt = vol->ind_bcnt * vol->ind_bcnt;
+    vol->inode_size = vol->sb->s_inode_size;//EXT4_INODE_SIZE(vol->sb);
+
+    for (i = 0; i < 16; i++)
+        if (vol->sb->s_volume_name[i] == 0)
+            break;
+    s.type = FSW_STRING_TYPE_ISO88591;
+    s.size = s.len = i;
+    s.data = vol->sb->s_volume_name;
+    status = fsw_strdup_coerce(&vol->g.label, vol->g.host_string_type, &s);
+    if (status)
+        return status;
+
+    // size of group descriptor depends on feature....
+    if (!(vol->sb->s_feature_incompat & EXT4_FEATURE_INCOMPAT_64BIT)) {
+        // Default minimal group descriptor size...
+        vol->sb->s_desc_size = EXT4_MIN_DESC_SIZE;
+    }
+
+    // Calculate group descriptor count the way the kernel does it...
+    groupcnt = (vol->sb->s_blocks_count_lo - vol->sb->s_first_data_block + 
+                vol->sb->s_blocks_per_group - 1) / vol->sb->s_blocks_per_group;
+    // Descriptors in one block... s_desc_size needs to be set!
+    gdesc_per_block = EXT4_DESC_PER_BLOCK(vol->sb);
+    
+    // Read the group descriptors to get inode table offsets
+    status = fsw_alloc(sizeof(fsw_u32) * groupcnt, &vol->inotab_bno);
+    if (status)
+        return status;
+    for (groupno = 0; groupno < groupcnt; groupno++) {
+        // get the block group descriptor
+        gdesc_bno = (vol->sb->s_first_data_block + 1) + groupno / gdesc_per_block;
+        gdesc_index = groupno % gdesc_per_block;
+        status = fsw_block_get(vol, gdesc_bno, 1, (void **)&buffer);
+        if (status)
+            return status;
+        gdesc = (struct ext4_group_desc *)(buffer + gdesc_index * vol->sb->s_desc_size);
+        vol->inotab_bno[groupno] = gdesc->bg_inode_table_lo;
+        fsw_block_release(vol, gdesc_bno, buffer);
+    }
+
+    // setup the root dnode
+    status = fsw_dnode_create_root(vol, EXT4_ROOT_INO, &vol->g.root);
+    if (status)
+        return status;
+
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: success, blocksize %d\n"), blocksize));
+
+    return FSW_SUCCESS;
+}
+
+/**
+ * Free the volume data structure. Called by the core after an unmount or after
+ * an unsuccessful mount to release the memory used by the file system type specific
+ * part of the volume structure.
+ */
+
+static void fsw_ext4_volume_free(struct fsw_ext4_volume *vol)
+{
+    if (vol->sb)
+        fsw_free(vol->sb);
+    if (vol->inotab_bno)
+        fsw_free(vol->inotab_bno);
+}
+
+/**
+ * Get in-depth information on a volume.
+ */
+
+static fsw_status_t fsw_ext4_volume_stat(struct fsw_ext4_volume *vol, struct fsw_volume_stat *sb)
+{
+    sb->total_bytes = (fsw_u64)vol->sb->s_blocks_count_lo * vol->g.log_blocksize;
+    sb->free_bytes  = (fsw_u64)vol->sb->s_free_blocks_count_lo * vol->g.log_blocksize;
+    return FSW_SUCCESS;
+}
+
+/**
+ * Get full information on a dnode from disk. This function is called by the core
+ * whenever it needs to access fields in the dnode structure that may not
+ * be filled immediately upon creation of the dnode. In the case of ext4, we
+ * delay fetching of the inode structure until dnode_fill is called. The size and
+ * type fields are invalid until this function has been called.
+ */
+
+static fsw_status_t fsw_ext4_dnode_fill(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno)
+{
+    fsw_status_t    status;
+    fsw_u32         groupno, ino_in_group, ino_bno, ino_index;
+    fsw_u8          *buffer;
+
+    if (dno->raw)
+        return FSW_SUCCESS;
+
+
+    // read the inode block
+    groupno = (dno->g.dnode_id - 1) / vol->sb->s_inodes_per_group;
+    ino_in_group = (dno->g.dnode_id - 1) % vol->sb->s_inodes_per_group;
+    ino_bno = vol->inotab_bno[groupno] +
+        ino_in_group / (vol->g.phys_blocksize / vol->inode_size);
+    ino_index = ino_in_group % (vol->g.phys_blocksize / vol->inode_size);
+    status = fsw_block_get(vol, ino_bno, 2, (void **)&buffer);
+
+    if (status)
+        return status;
+
+    // keep our inode around
+    status = fsw_memdup((void **)&dno->raw, buffer + ino_index * vol->inode_size, vol->inode_size);
+    fsw_block_release(vol, ino_bno, buffer);
+    if (status)
+        return status;
+
+    // get info from the inode
+    dno->g.size = dno->raw->i_size_lo; // TODO: check docs for 64-bit sized files
+
+    if (S_ISREG(dno->raw->i_mode))
+        dno->g.type = FSW_DNODE_TYPE_FILE;
+    else if (S_ISDIR(dno->raw->i_mode))
+        dno->g.type = FSW_DNODE_TYPE_DIR;
+    else if (S_ISLNK(dno->raw->i_mode))
+        dno->g.type = FSW_DNODE_TYPE_SYMLINK;
+    else
+        dno->g.type = FSW_DNODE_TYPE_SPECIAL;
+
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_dnode_fill: inode flags %x\n"), dno->raw->i_flags));
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_dnode_fill: i_mode %x\n"), dno->raw->i_mode));
+    return FSW_SUCCESS;
+}
+
+/**
+ * Free the dnode data structure. Called by the core when deallocating a dnode
+ * structure to release the memory used by the file system type specific part
+ * of the dnode structure.
+ */
+
+static void fsw_ext4_dnode_free(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno)
+{
+    if (dno->raw)
+        fsw_free(dno->raw);
+}
+
+/**
+ * Get in-depth information on a dnode. The core makes sure that fsw_ext4_dnode_fill
+ * has been called on the dnode before this function is called. Note that some
+ * data is not directly stored into the structure, but passed to a host-specific
+ * callback that converts it to the host-specific format.
+ */
+
+static fsw_status_t fsw_ext4_dnode_stat(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                        struct fsw_dnode_stat *sb)
+{
+    sb->used_bytes = dno->raw->i_blocks_lo * EXT4_BLOCK_SIZE(vol->sb);   // very, very strange...
+    sb->store_time_posix(sb, FSW_DNODE_STAT_CTIME, dno->raw->i_ctime);
+    sb->store_time_posix(sb, FSW_DNODE_STAT_ATIME, dno->raw->i_atime);
+    sb->store_time_posix(sb, FSW_DNODE_STAT_MTIME, dno->raw->i_mtime);
+    sb->store_attr_posix(sb, dno->raw->i_mode);
+
+    return FSW_SUCCESS;
+}
+
+/**
+ * Retrieve file data mapping information. This function is called by the core when
+ * fsw_shandle_read needs to know where on the disk the required piece of the file's
+ * data can be found. The core makes sure that fsw_ext4_dnode_fill has been called
+ * on the dnode before. Our task here is to get the physical disk block number for
+ * the requested logical block number.
+ *
+ * The ext4 file system usually uses extents do to store those disk block numbers.
+ * However, since ext4 is backward compatible, depending on inode flags the old direct
+ * and indirect addressing scheme can still be in place...
+ */
+
+static fsw_status_t fsw_ext4_get_extent(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                        struct fsw_extent *extent)
+{
+    // Preconditions: The caller has checked that the requested logical block
+    //  is within the file's size. The dnode has complete information, i.e.
+    //  fsw_ext4_dnode_read_info was called successfully on it.
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_extent: inode %d, block %d\n"), dno->g.dnode_id, extent->log_start));
+    extent->type = FSW_EXTENT_TYPE_PHYSBLOCK;
+    extent->log_count = 1;
+
+    if(dno->raw->i_flags & 1 << EXT4_INODE_EXTENTS)
+    {
+       FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_extent: inode %d uses extents\n"), dno->g.dnode_id));
+       return fsw_ext4_get_by_extent(vol, dno, extent);
+    }
+    else
+    {
+       FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_extent: inode %d uses direct/indirect block addressing\n"),
+           dno->g.dnode_id));
+       return fsw_ext4_get_by_blkaddr(vol, dno, extent);
+    }
+}
+
+/**
+ * New ext4 extents...
+ */
+static fsw_status_t fsw_ext4_get_by_extent(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                        struct fsw_extent *extent)
+{
+    fsw_status_t  status;
+    fsw_u32       bno, release_bno, buf_bcnt, buf_offset, file_bcnt;
+    int           ext_cnt;
+    int           ext_depth;
+    void          *buffer;
+
+    struct ext4_extent_header  *ext4_extent_header;
+    struct ext4_extent_idx     *ext4_extent_idx;
+    struct ext4_extent         *ext4_extent;
+
+    // Logical block requested by core...
+    bno = extent->log_start;
+
+    // First buffer is the i_block field from inode...
+    buffer = (void *)dno->raw->i_block;
+    buf_bcnt = EXT4_NDIR_BLOCKS;
+    buf_offset = 0;
+    while(1) {
+        ext4_extent_header = (struct ext4_extent_header *)buffer + buf_offset;
+        buf_offset += sizeof(struct ext4_extent_header);
+        FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_by_extent: extent header with %d entries\n"), 
+                      ext4_extent_header->eh_entries));
+        if(ext4_extent_header->eh_magic != EXT4_EXT_MAGIC)
+            return FSW_VOLUME_CORRUPTED;
+
+        for(ext_cnt = 0;ext_cnt < ext4_extent_header->eh_entries;ext_cnt++)
+        {
+            if(ext4_extent_header->eh_depth == 0)
+            {
+                // Leaf node, the header follows actual extents
+                ext4_extent = (struct ext4_extent *)(buffer + buf_offset);
+                buf_offset += sizeof(struct ext4_extent);
+                FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_by_extent: extent node cover %d...\n"), ext4_extent->ee_block));
+
+                // Is the requested block in this extent?
+                if(bno >= ext4_extent->ee_block && bno < ext4_extent->ee_block + ext4_extent->ee_len)
+                {
+                    extent->phys_start = ext4_extent->ee_start_lo + (bno - ext4_extent->ee_block);
+                    extent->log_count = ext4_extent->ee_len - (bno - ext4_extent->ee_block);
+                    return FSW_SUCCESS;
+                }
+            }
+            else
+            {
+                FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_by_extent: index extents, depth %d\n"), 
+                          ext4_extent_header->eh_depth));
+                ext4_extent_idx = (struct ext4_extent_idx *)(buffer + buf_offset);
+                buf_offset += sizeof(struct ext4_extent_idx);
+                
+                FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_by_extent: index node covers block %d...\n"),
+                          ext4_extent_idx->ei_block));
+                if(bno >= ext4_extent_idx->ei_block)
+                {
+                    // Follow extent tree...
+                    status = fsw_block_get(vol, ext4_extent_idx->ei_leaf_lo, 1, (void **)&buffer);
+                    if (status)
+                        return status;
+                    buf_offset = 0;
+                    break;
+                }
+            }
+        }
+    }
+    
+    return FSW_NOT_FOUND;
+}
+
+/**
+ * The ext2/ext3 file system does not use extents, but stores a list of block numbers
+ * using the usual direct, indirect, double-indirect, triple-indirect scheme. To
+ * optimize access, this function checks if the following file blocks are mapped
+ * to consecutive disk blocks and returns a combined extent if possible.
+ */
+static fsw_status_t fsw_ext4_get_by_blkaddr(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                        struct fsw_extent *extent)
+{
+    fsw_status_t    status;
+    fsw_u32         bno, release_bno, buf_bcnt, file_bcnt;
+    int             path[5], i;
+    fsw_u32         *buffer;
+    bno = extent->log_start;
+
+    // try direct block pointers in the inode
+    if (bno < EXT4_NDIR_BLOCKS) {
+        path[0] = bno;
+        path[1] = -1;
+    } else {
+        bno -= EXT4_NDIR_BLOCKS;
+
+        // try indirect block
+        if (bno < vol->ind_bcnt) {
+            path[0] = EXT4_IND_BLOCK;
+            path[1] = bno;
+            path[2] = -1;
+        } else {
+            bno -= vol->ind_bcnt;
+
+            // try double-indirect block
+            if (bno < vol->dind_bcnt) {
+                path[0] = EXT4_DIND_BLOCK;
+                path[1] = bno / vol->ind_bcnt;
+                path[2] = bno % vol->ind_bcnt;
+                path[3] = -1;
+            } else {
+                bno -= vol->dind_bcnt;
+
+                // use the triple-indirect block
+                path[0] = EXT4_TIND_BLOCK;
+                path[1] = bno / vol->dind_bcnt;
+                path[2] = (bno / vol->ind_bcnt) % vol->ind_bcnt;
+                path[3] = bno % vol->ind_bcnt;
+                path[4] = -1;
+            }
+        }
+    }
+    
+    // follow the indirection path
+    buffer = dno->raw->i_block;
+    buf_bcnt = EXT4_NDIR_BLOCKS;
+    release_bno = 0;
+    for (i = 0; ; i++) {
+        bno = buffer[path[i]];
+        if (bno == 0) {
+            extent->type = FSW_EXTENT_TYPE_SPARSE;
+            if (release_bno)
+                fsw_block_release(vol, release_bno, buffer);
+            return FSW_SUCCESS;
+        }
+        if (path[i+1] < 0)
+            break;
+
+        if (release_bno)
+            fsw_block_release(vol, release_bno, buffer);
+        status = fsw_block_get(vol, bno, 1, (void **)&buffer);
+        if (status)
+            return status;
+        release_bno = bno;
+        buf_bcnt = vol->ind_bcnt;
+    }
+    extent->phys_start = bno;
+
+    // check if the following blocks can be aggregated into one extent
+    file_bcnt = (fsw_u32)((dno->g.size + vol->g.log_blocksize - 1) & (vol->g.log_blocksize - 1));
+    while (path[i]           + extent->log_count < buf_bcnt &&    // indirect block has more block pointers
+           extent->log_start + extent->log_count < file_bcnt) {   // file has more blocks
+        if (buffer[path[i] + extent->log_count] == buffer[path[i] + extent->log_count - 1] + 1)
+            extent->log_count++;
+        else
+            break;
+    }
+
+    if (release_bno)
+        fsw_block_release(vol, release_bno, buffer);
+    return FSW_SUCCESS;
+}
+
+/**
+ * Lookup a directory's child dnode by name. This function is called on a directory
+ * to retrieve the directory entry with the given name. A dnode is constructed for
+ * this entry and returned. The core makes sure that fsw_ext4_dnode_fill has been called
+ * and the dnode is actually a directory.
+ */
+
+static fsw_status_t fsw_ext4_dir_lookup(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                        struct fsw_string *lookup_name, struct fsw_ext4_dnode **child_dno_out)
+{
+    fsw_status_t    status;
+    struct fsw_shandle shand;
+    fsw_u32         child_ino;
+    struct ext4_dir_entry entry;
+    struct fsw_string entry_name;
+
+    // Preconditions: The caller has checked that dno is a directory node.
+
+    entry_name.type = FSW_STRING_TYPE_ISO88591;
+
+    // setup handle to read the directory
+    status = fsw_shandle_open(dno, &shand);
+    if (status)
+        return status;
+
+    // scan the directory for the file
+    child_ino = 0;
+    while (child_ino == 0) {
+        // read next entry
+        status = fsw_ext4_read_dentry(&shand, &entry);
+        if (status)
+            goto errorexit;
+        if (entry.inode == 0) {
+            // end of directory reached
+            status = FSW_NOT_FOUND;
+            goto errorexit;
+        }
+
+        // compare name
+        entry_name.len = entry_name.size = entry.name_len;
+        entry_name.data = entry.name;
+        if (fsw_streq(lookup_name, &entry_name)) {
+            child_ino = entry.inode;
+            break;
+        }
+    }
+
+    // setup a dnode for the child item
+    status = fsw_dnode_create(dno, child_ino, FSW_DNODE_TYPE_UNKNOWN, &entry_name, child_dno_out);
+
+errorexit:
+    fsw_shandle_close(&shand);
+    return status;
+}
+
+/**
+ * Get the next directory entry when reading a directory. This function is called during
+ * directory iteration to retrieve the next directory entry. A dnode is constructed for
+ * the entry and returned. The core makes sure that fsw_ext4_dnode_fill has been called
+ * and the dnode is actually a directory. The shandle provided by the caller is used to
+ * record the position in the directory between calls.
+ */
+
+static fsw_status_t fsw_ext4_dir_read(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                      struct fsw_shandle *shand, struct fsw_ext4_dnode **child_dno_out)
+{
+    fsw_status_t    status;
+    struct ext4_dir_entry entry;
+    struct fsw_string entry_name;
+
+    // Preconditions: The caller has checked that dno is a directory node. The caller
+    //  has opened a storage handle to the directory's storage and keeps it around between
+    //  calls.
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_dir_read: started reading dir\n")));
+
+    while (1) {
+        // read next entry
+        status = fsw_ext4_read_dentry(shand, &entry);
+        if (status)
+            return status;
+        if (entry.inode == 0)   // end of directory
+            return FSW_NOT_FOUND;
+
+        // skip . and ..
+        if ((entry.name_len == 1 && entry.name[0] == '.') ||
+            (entry.name_len == 2 && entry.name[0] == '.' && entry.name[1] == '.'))
+            continue;
+        break;
+    }
+
+    // setup name
+    entry_name.type = FSW_STRING_TYPE_ISO88591;
+    entry_name.len = entry_name.size = entry.name_len;
+    entry_name.data = entry.name;
+
+    // setup a dnode for the child item
+    status = fsw_dnode_create(dno, entry.inode, FSW_DNODE_TYPE_UNKNOWN, &entry_name, child_dno_out);
+
+    return status;
+}
+
+/**
+ * Read a directory entry from the directory's raw data. This internal function is used
+ * to read a raw ext2 directory entry into memory. The shandle's position pointer is adjusted
+ * to point to the next entry.
+ */
+
+static fsw_status_t fsw_ext4_read_dentry(struct fsw_shandle *shand, struct ext4_dir_entry *entry)
+{
+    fsw_status_t    status;
+    fsw_u32         buffer_size;
+
+    while (1) {
+        // read dir_entry header (fixed length)
+        buffer_size = 8;
+        status = fsw_shandle_read(shand, &buffer_size, entry);
+        if (status)
+            return status;
+
+        if (buffer_size < 8 || entry->rec_len == 0) {
+            // end of directory reached
+            entry->inode = 0;
+            return FSW_SUCCESS;
+        }
+        if (entry->rec_len < 8)
+            return FSW_VOLUME_CORRUPTED;
+        if (entry->inode != 0) {
+            // this entry is used
+            if (entry->rec_len < 8 + entry->name_len)
+                return FSW_VOLUME_CORRUPTED;
+            break;
+        }
+
+        // valid, but unused entry, skip it
+        shand->pos += entry->rec_len - 8;
+    }
+
+    // read file name (variable length)
+    buffer_size = entry->name_len;
+    status = fsw_shandle_read(shand, &buffer_size, entry->name);
+    if (status)
+        return status;
+    if (buffer_size < entry->name_len)
+        return FSW_VOLUME_CORRUPTED;
+
+    // skip any remaining padding
+    shand->pos += entry->rec_len - (8 + entry->name_len);
+
+    return FSW_SUCCESS;
+}
+
+/**
+ * Get the target path of a symbolic link. This function is called when a symbolic
+ * link needs to be resolved. The core makes sure that the fsw_ext4_dnode_fill has been
+ * called on the dnode and that it really is a symlink.
+ *
+ * For ext4, the target path can be stored inline in the inode structure (in the space
+ * otherwise occupied by the block pointers) or in the inode's data. There is no flag
+ * indicating this, only the number of blocks entry (i_blocks) can be used as an
+ * indication. The check used here comes from the Linux kernel.
+ */
+
+static fsw_status_t fsw_ext4_readlink(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
+                                      struct fsw_string *link_target)
+{
+    fsw_status_t    status;
+    int             ea_blocks;
+    struct fsw_string s;
+
+    if (dno->g.size > FSW_PATH_MAX)
+        return FSW_VOLUME_CORRUPTED;
+
+    /* Linux kernels ext4_inode_is_fast_symlink... */
+    ea_blocks = dno->raw->i_file_acl_lo ? (vol->g.log_blocksize >> 9) : 0;
+
+    if (dno->raw->i_blocks_lo - ea_blocks == 0) {
+        // "fast" symlink, path is stored inside the inode
+        s.type = FSW_STRING_TYPE_ISO88591;
+        s.size = s.len = (int)dno->g.size;
+        s.data = dno->raw->i_block;
+        status = fsw_strdup_coerce(link_target, vol->g.host_string_type, &s);
+    } else {
+        // "slow" symlink, path is stored in normal inode data
+        status = fsw_dnode_readlink_data(dno, link_target);
+    }
+
+    return status;
+}
+
+// EOF
diff --git a/filesystems/fsw_ext4.h b/filesystems/fsw_ext4.h
new file mode 100644 (file)
index 0000000..df7ad35
--- /dev/null
@@ -0,0 +1,66 @@
+/**
+ * \file fsw_ext4.h
+ * ext4 file system driver header.
+ */
+
+/*-
+ * Copyright (c) 2012 Stefan Agner
+ * Portions Copyright (c) 2006 Christoph Pfisterer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#ifndef _FSW_EXT4_H_
+#define _FSW_EXT4_H_
+
+#define VOLSTRUCTNAME fsw_ext4_volume
+#define DNODESTRUCTNAME fsw_ext4_dnode
+#include "fsw_core.h"
+
+#include "fsw_ext4_disk.h"
+
+
+//! Block size to be used when reading the ext4 superblock.
+#define EXT4_SUPERBLOCK_BLOCKSIZE  1024
+//! Block number where the (master copy of the) ext4 superblock resides.
+#define EXT4_SUPERBLOCK_BLOCKNO       1
+
+
+/**
+ * ext4: Volume structure with ext2-specific data.
+ */
+
+struct fsw_ext4_volume {
+    struct fsw_volume g;            //!< Generic volume structure
+    
+    struct ext4_super_block *sb;    //!< Full raw ext2 superblock structure
+    fsw_u32     *inotab_bno;        //!< Block numbers of the inode tables
+    fsw_u32     ind_bcnt;           //!< Number of blocks addressable through an indirect block
+    fsw_u32     dind_bcnt;          //!< Number of blocks addressable through a double-indirect block
+    fsw_u32     inode_size;         //!< Size of inode structure in bytes
+};
+
+/**
+ * ext2: Dnode structure with ext2-specific data.
+ */
+
+struct fsw_ext4_dnode {
+    struct fsw_dnode g;             //!< Generic dnode structure
+    
+    struct ext4_inode *raw;         //!< Full raw inode structure
+};
+
+
+#endif
diff --git a/filesystems/fsw_ext4_disk.h b/filesystems/fsw_ext4_disk.h
new file mode 100644 (file)
index 0000000..f268903
--- /dev/null
@@ -0,0 +1,499 @@
+/**
+ * \file fsw_ext4_disk.h
+ * ext4 file system on-disk structures.
+ */
+
+/*-
+ * Copyright (c) 2012 Stefan Agner
+ * Portions Copyright (c) 2006 Christoph Pfisterer
+ * Portions Copyright (c) 1991-2012 by various Linux kernel contributors
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#ifndef _FSW_EXT4_DISK_H_
+#define _FSW_EXT4_DISK_H_
+
+// types
+
+typedef fsw_s8  __s8;
+typedef fsw_u8  __u8;
+typedef fsw_s16 __s16;
+typedef fsw_u16 __u16;
+typedef fsw_s32 __s32;
+typedef fsw_u32 __u32;
+typedef fsw_s64 __s64;
+typedef fsw_u64 __u64;
+
+typedef __u16   __le16;
+typedef __u32   __le32;
+typedef __u64   __le64;
+
+//
+// from Linux kernel, fs/ext4/ext4.h
+//
+
+/*
+ * Special inode numbers
+ */
+#define        EXT4_BAD_INO             1      /* Bad blocks inode */
+#define EXT4_ROOT_INO           2      /* Root inode */
+#define EXT4_USR_QUOTA_INO      3      /* User quota inode */
+#define EXT4_GRP_QUOTA_INO      4      /* Group quota inode */
+#define EXT4_BOOT_LOADER_INO    5      /* Boot loader inode */
+#define EXT4_UNDEL_DIR_INO      6      /* Undelete directory inode */
+#define EXT4_RESIZE_INO                 7      /* Reserved group descriptors inode */
+#define EXT4_JOURNAL_INO        8      /* Journal inode */
+
+/*
+ * The second extended file system magic number
+ */
+#define EXT4_SUPER_MAGIC        0xEF53
+
+/*
+ * Macro-instructions used to manage several block sizes
+ */
+#define EXT4_MIN_BLOCK_SIZE             1024
+#define EXT4_MAX_BLOCK_SIZE             4096
+#define EXT4_MIN_BLOCK_LOG_SIZE           10
+#define EXT4_BLOCK_SIZE(s)              (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+#define EXT4_ADDR_PER_BLOCK(s)          (EXT4_BLOCK_SIZE(s) / sizeof (__u32))
+#define EXT4_BLOCK_SIZE_BITS(s)         ((s)->s_log_block_size + 10)
+#define EXT4_INODE_SIZE(s)      (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
+                                 EXT4_GOOD_OLD_INODE_SIZE : \
+                                 (s)->s_inode_size)
+
+/*
+ * Structure of a blocks group descriptor
+ */
+struct ext4_group_desc
+{
+       __le32  bg_block_bitmap_lo;     /* Blocks bitmap block */
+       __le32  bg_inode_bitmap_lo;     /* Inodes bitmap block */
+       __le32  bg_inode_table_lo;      /* Inodes table block */
+       __le16  bg_free_blocks_count_lo;/* Free blocks count */
+       __le16  bg_free_inodes_count_lo;/* Free inodes count */
+       __le16  bg_used_dirs_count_lo;  /* Directories count */
+       __le16  bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
+       __le32  bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
+       __le16  bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */
+       __le16  bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */
+       __le16  bg_itable_unused_lo;    /* Unused inodes count */
+       __le16  bg_checksum;            /* crc16(sb_uuid+group+desc) */
+       __le32  bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
+       __le32  bg_inode_bitmap_hi;     /* Inodes bitmap block MSB */
+       __le32  bg_inode_table_hi;      /* Inodes table block MSB */
+       __le16  bg_free_blocks_count_hi;/* Free blocks count MSB */
+       __le16  bg_free_inodes_count_hi;/* Free inodes count MSB */
+       __le16  bg_used_dirs_count_hi;  /* Directories count MSB */
+       __le16  bg_itable_unused_hi;    /* Unused inodes count MSB */
+       __le32  bg_exclude_bitmap_hi;   /* Exclude bitmap block MSB */
+       __le16  bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */
+       __le16  bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */
+       __u32   bg_reserved;
+};
+
+
+/*
+ * Macro-instructions used to manage group descriptors
+ */
+#define EXT4_MIN_DESC_SIZE              32
+#define EXT4_MIN_DESC_SIZE_64BIT        64
+#define EXT4_MAX_DESC_SIZE              EXT4_MIN_BLOCK_SIZE
+#define EXT4_DESC_SIZE(s)              ((s)->s_desc_size)
+#define EXT4_BLOCKS_PER_GROUP(s)       ((s)->s_blocks_per_group)
+#define EXT4_DESC_PER_BLOCK(s)         (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
+#define EXT4_INODES_PER_GROUP(s)       ((s)->s_inodes_per_group)
+
+/*
+ * Constants relative to the data blocks
+ */
+#define EXT4_NDIR_BLOCKS                12
+#define EXT4_IND_BLOCK                  EXT4_NDIR_BLOCKS
+#define EXT4_DIND_BLOCK                 (EXT4_IND_BLOCK + 1)
+#define EXT4_TIND_BLOCK                 (EXT4_DIND_BLOCK + 1)
+#define EXT4_N_BLOCKS                   (EXT4_TIND_BLOCK + 1)
+
+/*
+ * Inode flags
+ */
+#define EXT4_SECRM_FL                   0x00000001 /* Secure deletion */
+#define EXT4_UNRM_FL                    0x00000002 /* Undelete */
+#define EXT4_COMPR_FL                   0x00000004 /* Compress file */
+#define EXT4_SYNC_FL                    0x00000008 /* Synchronous updates */
+#define EXT4_IMMUTABLE_FL               0x00000010 /* Immutable file */
+#define EXT4_APPEND_FL                  0x00000020 /* writes to file may only append */
+#define EXT4_NODUMP_FL                  0x00000040 /* do not dump file */
+#define EXT4_NOATIME_FL                 0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define EXT4_DIRTY_FL                   0x00000100
+#define EXT4_COMPRBLK_FL                0x00000200 /* One or more compressed clusters */
+#define EXT4_NOCOMP_FL                  0x00000400 /* Don't compress */
+#define EXT4_ECOMPR_FL                  0x00000800 /* Compression error */
+/* End compression flags --- maybe not all used */      
+#define EXT4_INDEX_FL                   0x00001000 /* hash-indexed directory */
+#define EXT4_IMAGIC_FL                  0x00002000 /* AFS directory */
+#define EXT4_JOURNAL_DATA_FL            0x00004000 /* Reserved for ext3 */
+#define EXT4_NOTAIL_FL                  0x00008000 /* file tail should not be merged */
+#define EXT4_DIRSYNC_FL                 0x00010000 /* dirsync behaviour (directories only) */
+#define EXT4_TOPDIR_FL                  0x00020000 /* Top of directory hierarchies*/
+#define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
+#define EXT4_EXTENTS_FL                 0x00080000 /* Inode uses extents */
+#define EXT4_EA_INODE_FL                0x00200000 /* Inode used for large EA */
+#define EXT4_EOFBLOCKS_FL               0x00400000 /* Blocks allocated beyond EOF */
+#define EXT4_RESERVED_FL                0x80000000 /* reserved for ext4 lib */
+
+#define EXT4_FL_USER_VISIBLE           0x004BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE                0x004B80FF /* User modifiable flags */
+
+
+/*
+ * Structure of an inode on the disk
+ */
+struct ext4_inode {
+       __le16  i_mode;         /* File mode */
+       __le16  i_uid;          /* Low 16 bits of Owner Uid */
+       __le32  i_size_lo;      /* Size in bytes */
+       __le32  i_atime;        /* Access time */
+       __le32  i_ctime;        /* Inode Change time */
+       __le32  i_mtime;        /* Modification time */
+       __le32  i_dtime;        /* Deletion Time */
+       __le16  i_gid;          /* Low 16 bits of Group Id */
+       __le16  i_links_count;  /* Links count */
+       __le32  i_blocks_lo;    /* Blocks count */
+       __le32  i_flags;        /* File flags */
+       union {
+               struct {
+                       __le32  l_i_version;
+               } linux1;
+               struct {
+                       __u32  h_i_translator;
+               } hurd1;
+               struct {
+                       __u32  m_i_reserved1;
+               } masix1;
+       } osd1;                         /* OS dependent 1 */
+       __le32  i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
+       __le32  i_generation;   /* File version (for NFS) */
+       __le32  i_file_acl_lo;  /* File ACL */
+       __le32  i_size_high;
+       __le32  i_obso_faddr;   /* Obsoleted fragment address */
+       union {
+               struct {
+                       __le16  l_i_blocks_high; /* were l_i_reserved1 */
+                       __le16  l_i_file_acl_high;
+                       __le16  l_i_uid_high;   /* these 2 fields */
+                       __le16  l_i_gid_high;   /* were reserved2[0] */
+                       __le16  l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
+                       __le16  l_i_reserved;
+               } linux2;
+               struct {
+                       __le16  h_i_reserved1;  /* Obsoleted fragment number/size which are removed in ext4 */
+                       __u16   h_i_mode_high;
+                       __u16   h_i_uid_high;
+                       __u16   h_i_gid_high;
+                       __u32   h_i_author;
+               } hurd2;
+               struct {
+                       __le16  h_i_reserved1;  /* Obsoleted fragment number/size which are removed in ext4 */
+                       __le16  m_i_file_acl_high;
+                       __u32   m_i_reserved2[2];
+               } masix2;
+       } osd2;                         /* OS dependent 2 */
+       __le16  i_extra_isize;
+       __le16  i_checksum_hi;  /* crc32c(uuid+inum+inode) BE */
+       __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
+       __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
+       __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
+       __le32  i_crtime;       /* File Creation time */
+       __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
+       __le32  i_version_hi;   /* high 32 bits for 64-bit version */
+};
+
+
+/*
+ * Inode flags used for atomic set/get
+ */
+enum {
+       EXT4_INODE_SECRM        = 0,    /* Secure deletion */
+       EXT4_INODE_UNRM         = 1,    /* Undelete */
+       EXT4_INODE_COMPR        = 2,    /* Compress file */
+       EXT4_INODE_SYNC         = 3,    /* Synchronous updates */
+       EXT4_INODE_IMMUTABLE    = 4,    /* Immutable file */
+       EXT4_INODE_APPEND       = 5,    /* writes to file may only append */
+       EXT4_INODE_NODUMP       = 6,    /* do not dump file */
+       EXT4_INODE_NOATIME      = 7,    /* do not update atime */
+/* Reserved for compression usage... */
+       EXT4_INODE_DIRTY        = 8,
+       EXT4_INODE_COMPRBLK     = 9,    /* One or more compressed clusters */
+       EXT4_INODE_NOCOMPR      = 10,   /* Don't compress */
+       EXT4_INODE_ECOMPR       = 11,   /* Compression error */
+/* End compression flags --- maybe not all used */
+       EXT4_INODE_INDEX        = 12,   /* hash-indexed directory */
+       EXT4_INODE_IMAGIC       = 13,   /* AFS directory */
+       EXT4_INODE_JOURNAL_DATA = 14,   /* file data should be journaled */
+       EXT4_INODE_NOTAIL       = 15,   /* file tail should not be merged */
+       EXT4_INODE_DIRSYNC      = 16,   /* dirsync behaviour (directories only) */
+       EXT4_INODE_TOPDIR       = 17,   /* Top of directory hierarchies*/
+       EXT4_INODE_HUGE_FILE    = 18,   /* Set to each huge file */
+       EXT4_INODE_EXTENTS      = 19,   /* Inode uses extents */
+       EXT4_INODE_EA_INODE     = 21,   /* Inode used for large EA */
+       EXT4_INODE_EOFBLOCKS    = 22,   /* Blocks allocated beyond EOF */
+       EXT4_INODE_RESERVED     = 31,   /* reserved for ext4 lib */
+};
+
+/*
+ * Structure of the super block
+ */
+struct ext4_super_block {
+/*00*/ __le32  s_inodes_count;         /* Inodes count */
+       __le32  s_blocks_count_lo;      /* Blocks count */
+       __le32  s_r_blocks_count_lo;    /* Reserved blocks count */
+       __le32  s_free_blocks_count_lo; /* Free blocks count */
+/*10*/ __le32  s_free_inodes_count;    /* Free inodes count */
+       __le32  s_first_data_block;     /* First Data Block */
+       __le32  s_log_block_size;       /* Block size */
+       __le32  s_log_cluster_size;     /* Allocation cluster size */
+/*20*/ __le32  s_blocks_per_group;     /* # Blocks per group */
+       __le32  s_clusters_per_group;   /* # Clusters per group */
+       __le32  s_inodes_per_group;     /* # Inodes per group */
+       __le32  s_mtime;                /* Mount time */
+/*30*/ __le32  s_wtime;                /* Write time */
+       __le16  s_mnt_count;            /* Mount count */
+       __le16  s_max_mnt_count;        /* Maximal mount count */
+       __le16  s_magic;                /* Magic signature */
+       __le16  s_state;                /* File system state */
+       __le16  s_errors;               /* Behaviour when detecting errors */
+       __le16  s_minor_rev_level;      /* minor revision level */
+/*40*/ __le32  s_lastcheck;            /* time of last check */
+       __le32  s_checkinterval;        /* max. time between checks */
+       __le32  s_creator_os;           /* OS */
+       __le32  s_rev_level;            /* Revision level */
+/*50*/ __le16  s_def_resuid;           /* Default uid for reserved blocks */
+       __le16  s_def_resgid;           /* Default gid for reserved blocks */
+       /*
+        * These fields are for EXT4_DYNAMIC_REV superblocks only.
+        *
+        * Note: the difference between the compatible feature set and
+        * the incompatible feature set is that if there is a bit set
+        * in the incompatible feature set that the kernel doesn't
+        * know about, it should refuse to mount the filesystem.
+        *
+        * e2fsck's requirements are more strict; if it doesn't know
+        * about a feature in either the compatible or incompatible
+        * feature set, it must abort and not try to meddle with
+        * things it doesn't understand...
+        */
+       __le32  s_first_ino;            /* First non-reserved inode */
+       __le16  s_inode_size;           /* size of inode structure */
+       __le16  s_block_group_nr;       /* block group # of this superblock */
+       __le32  s_feature_compat;       /* compatible feature set */
+/*60*/ __le32  s_feature_incompat;     /* incompatible feature set */
+       __le32  s_feature_ro_compat;    /* readonly-compatible feature set */
+/*68*/ __u8    s_uuid[16];             /* 128-bit uuid for volume */
+/*78*/ char    s_volume_name[16];      /* volume name */
+/*88*/ char    s_last_mounted[64];     /* directory where last mounted */
+/*C8*/ __le32  s_algorithm_usage_bitmap; /* For compression */
+       /*
+        * Performance hints.  Directory preallocation should only
+        * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+        */
+       __u8    s_prealloc_blocks;      /* Nr of blocks to try to preallocate*/
+       __u8    s_prealloc_dir_blocks;  /* Nr to preallocate for dirs */
+       __le16  s_reserved_gdt_blocks;  /* Per group desc for online growth */
+       /*
+        * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
+        */
+/*D0*/ __u8    s_journal_uuid[16];     /* uuid of journal superblock */
+/*E0*/ __le32  s_journal_inum;         /* inode number of journal file */
+       __le32  s_journal_dev;          /* device number of journal file */
+       __le32  s_last_orphan;          /* start of list of inodes to delete */
+       __le32  s_hash_seed[4];         /* HTREE hash seed */
+       __u8    s_def_hash_version;     /* Default hash version to use */
+       __u8    s_jnl_backup_type;
+       __le16  s_desc_size;            /* size of group descriptor */
+/*100*/        __le32  s_default_mount_opts;
+       __le32  s_first_meta_bg;        /* First metablock block group */
+       __le32  s_mkfs_time;            /* When the filesystem was created */
+       __le32  s_jnl_blocks[17];       /* Backup of the journal inode */
+       /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
+/*150*/        __le32  s_blocks_count_hi;      /* Blocks count */
+       __le32  s_r_blocks_count_hi;    /* Reserved blocks count */
+       __le32  s_free_blocks_count_hi; /* Free blocks count */
+       __le16  s_min_extra_isize;      /* All inodes have at least # bytes */
+       __le16  s_want_extra_isize;     /* New inodes should reserve # bytes */
+       __le32  s_flags;                /* Miscellaneous flags */
+       __le16  s_raid_stride;          /* RAID stride */
+       __le16  s_mmp_update_interval;  /* # seconds to wait in MMP checking */
+       __le64  s_mmp_block;            /* Block for multi-mount protection */
+       __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
+       __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
+       __u8    s_checksum_type;        /* metadata checksum algorithm used */
+       __le16  s_reserved_pad;
+       __le64  s_kbytes_written;       /* nr of lifetime kilobytes written */
+       __le32  s_snapshot_inum;        /* Inode number of active snapshot */
+       __le32  s_snapshot_id;          /* sequential ID of active snapshot */
+       __le64  s_snapshot_r_blocks_count; /* reserved blocks for active
+                                             snapshot's future use */
+       __le32  s_snapshot_list;        /* inode number of the head of the
+                                          on-disk snapshot list */
+#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
+       __le32  s_error_count;          /* number of fs errors */
+       __le32  s_first_error_time;     /* first time an error happened */
+       __le32  s_first_error_ino;      /* inode involved in first error */
+       __le64  s_first_error_block;    /* block involved of first error */
+       __u8    s_first_error_func[32]; /* function where the error happened */
+       __le32  s_first_error_line;     /* line number where error happened */
+       __le32  s_last_error_time;      /* most recent time of an error */
+       __le32  s_last_error_ino;       /* inode involved in last error */
+       __le32  s_last_error_line;      /* line number where error happened */
+       __le64  s_last_error_block;     /* block involved of last error */
+       __u8    s_last_error_func[32];  /* function where the error happened */
+#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
+       __u8    s_mount_opts[64];
+       __le32  s_usr_quota_inum;       /* inode for tracking user quota */
+       __le32  s_grp_quota_inum;       /* inode for tracking group quota */
+       __le32  s_overhead_clusters;    /* overhead blocks/clusters in fs */
+       __le32  s_reserved[108];        /* Padding to the end of the block */
+       __le32  s_checksum;             /* crc32c(superblock) */
+};
+
+/*
+ * Revision levels
+ */
+#define EXT4_GOOD_OLD_REV       0       /* The good old (original) format */
+#define EXT4_DYNAMIC_REV        1       /* V2 format w/ dynamic inode sizes */
+
+#define EXT4_CURRENT_REV        EXT4_GOOD_OLD_REV
+#define EXT4_MAX_SUPP_REV       EXT4_DYNAMIC_REV
+
+#define EXT4_GOOD_OLD_INODE_SIZE 128
+
+/*
+ * Feature set definitions (only the once we need for read support)
+ */
+
+#define EXT4_FEATURE_INCOMPAT_COMPRESSION      0x0001
+#define EXT4_FEATURE_INCOMPAT_FILETYPE         0x0002
+#define EXT4_FEATURE_INCOMPAT_RECOVER          0x0004 /* Needs recovery */
+#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV      0x0008 /* Journal device */
+#define EXT4_FEATURE_INCOMPAT_META_BG          0x0010
+#define EXT4_FEATURE_INCOMPAT_EXTENTS          0x0040 /* extents support */
+#define EXT4_FEATURE_INCOMPAT_64BIT            0x0080
+#define EXT4_FEATURE_INCOMPAT_MMP               0x0100
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG          0x0200
+#define EXT4_FEATURE_INCOMPAT_EA_INODE         0x0400 /* EA in inode */
+#define EXT4_FEATURE_INCOMPAT_DIRDATA          0x1000 /* data in dirent */
+#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
+#define EXT4_FEATURE_INCOMPAT_LARGEDIR         0x4000 /* >2GB or 3-lvl htree */
+#define EXT4_FEATURE_INCOMPAT_INLINEDATA       0x8000 /* data in inode */
+
+#define EXT4_FEATURE_INCOMPAT_SUPP     (EXT4_FEATURE_INCOMPAT_FILETYPE| \
+                                        EXT4_FEATURE_INCOMPAT_RECOVER| \
+                                        EXT4_FEATURE_INCOMPAT_META_BG| \
+                                        EXT4_FEATURE_INCOMPAT_EXTENTS| \
+                                        EXT4_FEATURE_INCOMPAT_64BIT| \
+                                        EXT4_FEATURE_INCOMPAT_FLEX_BG| \
+                                        EXT4_FEATURE_INCOMPAT_MMP)
+
+/*
+ * Structure of a directory entry
+ */
+#define EXT4_NAME_LEN 255
+
+struct ext4_dir_entry {
+    __le32  inode;                  /* Inode number */
+    __le16  rec_len;                /* Directory entry length */
+    __u8    name_len;               /* Name length */
+    __u8    file_type;
+    char    name[EXT4_NAME_LEN];    /* File name */
+};
+// NOTE: The original Linux kernel header defines ext4_dir_entry with the original
+//  layout and ext4_dir_entry_2 with the revised layout. We simply use the revised one.
+
+/*
+ * Ext2 directory file types.  Only the low 3 bits are used.  The
+ * other bits are reserved for now.
+ */
+enum {
+    EXT4_FT_UNKNOWN,
+    EXT4_FT_REG_FILE,
+    EXT4_FT_DIR,
+    EXT4_FT_CHRDEV,
+    EXT4_FT_BLKDEV,
+    EXT4_FT_FIFO,
+    EXT4_FT_SOCK,
+    EXT4_FT_SYMLINK,
+    EXT4_FT_MAX
+};
+
+/*
+ * ext4_inode has i_block array (60 bytes total).
+ * The first 12 bytes store ext4_extent_header;
+ * the remainder stores an array of ext4_extent.
+ * For non-inode extent blocks, ext4_extent_tail
+ * follows the array.
+ */
+
+/*
+ * This is the extent tail on-disk structure.
+ * All other extent structures are 12 bytes long.  It turns out that
+ * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which
+ * covers all valid ext4 block sizes.  Therefore, this tail structure can be
+ * crammed into the end of the block without having to rebalance the tree.
+ */
+struct ext4_extent_tail {
+       __le32  et_checksum;    /* crc32c(uuid+inum+extent_block) */
+};
+
+/*
+ * This is the extent on-disk structure.
+ * It's used at the bottom of the tree.
+ */
+struct ext4_extent {
+       __le32  ee_block;       /* first logical block extent covers */
+       __le16  ee_len;         /* number of blocks covered by extent */
+       __le16  ee_start_hi;    /* high 16 bits of physical block */
+       __le32  ee_start_lo;    /* low 32 bits of physical block */
+};
+
+/*
+ * This is index on-disk structure.
+ * It's used at all the levels except the bottom.
+ */
+struct ext4_extent_idx {
+       __le32  ei_block;       /* index covers logical blocks from 'block' */
+       __le32  ei_leaf_lo;     /* pointer to the physical block of the next *
+                                * level. leaf or next index could be there */
+       __le16  ei_leaf_hi;     /* high 16 bits of physical block */
+       __u16   ei_unused;
+};
+
+
+/*
+ * Each block (leaves and indexes), even inode-stored has header.
+ */
+struct ext4_extent_header {
+       __le16  eh_magic;       /* probably will support different formats */
+       __le16  eh_entries;     /* number of valid entries */
+       __le16  eh_max;         /* capacity of store in entries */
+       __le16  eh_depth;       /* has tree real underlying blocks? */
+       __le32  eh_generation;  /* generation of the tree */
+};
+
+#define EXT4_EXT_MAGIC         (0xf30a)
+
+
+#endif