From e562783225622cbbcd3c527c6582ae5432eaf185 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sat, 15 Dec 2012 03:31:28 +0100 Subject: [PATCH] Initial extent support, only leafs are supported by now --- filesystems/fsw_ext4.c | 146 ++++++++++++++++++++++++++++------- filesystems/fsw_ext4_disk.h | 92 ++++++++++++++++++++++ filesystems/test/fsw_posix.c | 2 +- 3 files changed, 213 insertions(+), 27 deletions(-) diff --git a/filesystems/fsw_ext4.c b/filesystems/fsw_ext4.c index 8d145e2..d5c4d95 100644 --- a/filesystems/fsw_ext4.c +++ b/filesystems/fsw_ext4.c @@ -37,6 +37,10 @@ static fsw_status_t fsw_ext4_dnode_stat(struct fsw_ext4_volume *vol, struct fsw_ struct fsw_dnode_stat *sb); static fsw_status_t fsw_ext4_get_extent(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno, struct fsw_extent *extent); +static fsw_status_t fsw_ext4_get_by_blkaddr(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno, + struct fsw_extent *extent); +static fsw_status_t fsw_ext4_get_by_extent(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno, + struct fsw_extent *extent); static fsw_status_t fsw_ext4_dir_lookup(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno, struct fsw_string *lookup_name, struct fsw_ext4_dnode **child_dno); @@ -106,25 +110,29 @@ static fsw_status_t fsw_ext4_volume_mount(struct fsw_ext4_volume *vol) FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: Incompat flag %x\n"), vol->sb->s_feature_incompat)); if (vol->sb->s_rev_level == EXT4_DYNAMIC_REV && - (vol->sb->s_feature_incompat & ~(EXT4_FEATURE_INCOMPAT_FILETYPE | EXT4_FEATURE_INCOMPAT_RECOVER))) + (vol->sb->s_feature_incompat & ~(EXT4_FEATURE_INCOMPAT_FILETYPE | EXT4_FEATURE_INCOMPAT_RECOVER | + EXT4_FEATURE_INCOMPAT_EXTENTS))) return FSW_UNSUPPORTED; - if (vol->sb->s_rev_level == EXT4_DYNAMIC_REV && - (vol->sb->s_feature_incompat & EXT4_FEATURE_INCOMPAT_RECOVER)) - { - FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: This ext3 file system needs recovery\n"))); - // Print(L"Ext4 WARNING: This file system needs recovery, trying to use it anyway.\n"); - } + if (vol->sb->s_rev_level == EXT4_DYNAMIC_REV && + (vol->sb->s_feature_incompat & EXT4_FEATURE_INCOMPAT_RECOVER)) + { + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: This ext3 file system needs recovery\n"))); + // Print(L"Ext4 WARNING: This file system needs recovery, trying to use it anyway.\n"); + } + + blocksize = EXT4_BLOCK_SIZE(vol->sb); + if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE) + return FSW_UNSUPPORTED; // set real blocksize - blocksize = EXT4_BLOCK_SIZE(vol->sb); fsw_set_blocksize(vol, blocksize, blocksize); // get other info from superblock vol->ind_bcnt = EXT4_ADDR_PER_BLOCK(vol->sb); vol->dind_bcnt = vol->ind_bcnt * vol->ind_bcnt; - vol->inode_size = EXT4_INODE_SIZE(vol->sb); + vol->inode_size = vol->sb->s_inode_size;//EXT4_INODE_SIZE(vol->sb); for (i = 0; i < 16; i++) if (vol->sb->s_volume_name[i] == 0) @@ -136,10 +144,19 @@ static fsw_status_t fsw_ext4_volume_mount(struct fsw_ext4_volume *vol) if (status) return status; - // read the group descriptors to get inode table offsets - groupcnt = ((vol->sb->s_inodes_count - 2) / vol->sb->s_inodes_per_group) + 1; - gdesc_per_block = (vol->g.phys_blocksize / sizeof(struct ext4_group_desc)); + // size of group descriptor depends on feature.... + if (!(vol->sb->s_feature_incompat & EXT4_FEATURE_INCOMPAT_64BIT)) { + // Default minimal group descriptor size... + vol->sb->s_desc_size = EXT4_MIN_DESC_SIZE; + } + // Calculate group descriptor count the way the kernel does it... + groupcnt = (vol->sb->s_blocks_count_lo - vol->sb->s_first_data_block + + vol->sb->s_blocks_per_group - 1) / vol->sb->s_blocks_per_group; + // Descriptors in one block... s_desc_size needs to be set! + gdesc_per_block = EXT4_DESC_PER_BLOCK(vol->sb); + + // Read the group descriptors to get inode table offsets status = fsw_alloc(sizeof(fsw_u32) * groupcnt, &vol->inotab_bno); if (status) return status; @@ -150,7 +167,7 @@ static fsw_status_t fsw_ext4_volume_mount(struct fsw_ext4_volume *vol) status = fsw_block_get(vol, gdesc_bno, 1, (void **)&buffer); if (status) return status; - gdesc = ((struct ext4_group_desc *)(buffer)) + gdesc_index; + gdesc = (struct ext4_group_desc *)(buffer + gdesc_index * vol->sb->s_desc_size); vol->inotab_bno[groupno] = gdesc->bg_inode_table_lo; fsw_block_release(vol, gdesc_bno, buffer); } @@ -207,7 +224,6 @@ static fsw_status_t fsw_ext4_dnode_fill(struct fsw_ext4_volume *vol, struct fsw_ if (dno->raw) return FSW_SUCCESS; - FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_dnode_fill: inode %d\n"), dno->g.dnode_id)); // read the inode block groupno = (dno->g.dnode_id - 1) / vol->sb->s_inodes_per_group; @@ -216,6 +232,7 @@ static fsw_status_t fsw_ext4_dnode_fill(struct fsw_ext4_volume *vol, struct fsw_ ino_in_group / (vol->g.phys_blocksize / vol->inode_size); ino_index = ino_in_group % (vol->g.phys_blocksize / vol->inode_size); status = fsw_block_get(vol, ino_bno, 2, (void **)&buffer); + if (status) return status; @@ -237,6 +254,8 @@ static fsw_status_t fsw_ext4_dnode_fill(struct fsw_ext4_volume *vol, struct fsw_ else dno->g.type = FSW_DNODE_TYPE_SPECIAL; + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_dnode_fill: inode flags %x\n"), dno->raw->i_flags)); + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_dnode_fill: i_mode %x\n"), dno->raw->i_mode)); return FSW_SUCCESS; } @@ -278,27 +297,101 @@ static fsw_status_t fsw_ext4_dnode_stat(struct fsw_ext4_volume *vol, struct fsw_ * on the dnode before. Our task here is to get the physical disk block number for * the requested logical block number. * - * TODO... - * The ext2 file system does not use extents, but stores a list of block numbers - * using the usual direct, indirect, double-indirect, triple-indirect scheme. To - * optimize access, this function checks if the following file blocks are mapped - * to consecutive disk blocks and returns a combined extent if possible. + * The ext4 file system usually uses extents do to store those disk block numbers. + * However, since ext4 is backward compatible, depending on inode flags the old direct + * and indirect addressing scheme can still be in place... */ static fsw_status_t fsw_ext4_get_extent(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno, struct fsw_extent *extent) { - fsw_status_t status; - fsw_u32 bno, release_bno, buf_bcnt, file_bcnt; - fsw_u32 *buffer; - int path[5], i; - // Preconditions: The caller has checked that the requested logical block // is within the file's size. The dnode has complete information, i.e. // fsw_ext4_dnode_read_info was called successfully on it. - + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_extent: inode flags %x\n"), dno->raw->i_flags)); extent->type = FSW_EXTENT_TYPE_PHYSBLOCK; extent->log_count = 1; + + if(dno->raw->i_flags & 1 << EXT4_INODE_EXTENTS) + { + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_extent: inode %d uses extents\n"), dno->g.dnode_id)); + return fsw_ext4_get_by_extent(vol, dno, extent); + } + else + { + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_extent: inode %d uses direct/indirect block addressing\n"), + dno->g.dnode_id)); + return fsw_ext4_get_by_blkaddr(vol, dno, extent); + } +} + +/** + * New ext4 extents... + */ +static fsw_status_t fsw_ext4_get_by_extent(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno, + struct fsw_extent *extent) +{ + fsw_status_t status; + fsw_u32 bno, release_bno, buf_bcnt, buf_offset, file_bcnt; + int i; + fsw_u8 *buffer; + struct ext4_extent_header *ext4_extent_header; + struct ext4_extent *ext4_extent; + + // Logical block requested by core... + bno = extent->log_start; + + // First buffer is the i_block field from inde... + buffer = dno->raw->i_block; + buf_bcnt = EXT4_NDIR_BLOCKS; + buf_offset = 0; + + ext4_extent_header = (struct ext4_extent_header *)buffer + buf_offset; + buf_offset += sizeof(struct ext4_extent_header); + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_by_extent: extent header magic %x\n"), + ext4_extent_header->eh_magic)); + if(ext4_extent_header->eh_magic != EXT4_EXT_MAGIC) + return FSW_VOLUME_CORRUPTED; + + if(ext4_extent_header->eh_depth == 0) + { + // Leaf node, the header follows actual extents + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_by_extent: leaf extent with %d extents\n"), + ext4_extent_header->eh_entries)); + for(i = 0;i < ext4_extent_header->eh_entries;i++) + { + ext4_extent = (struct ext4_extent *)(buffer + buf_offset); + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_by_extent: extent with %d len\n"), ext4_extent->ee_len)); + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_by_extent: extent with %d start_hi\n"), ext4_extent->ee_start_hi)); + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_get_by_extent: extent with %d start_lo\n"), ext4_extent->ee_start_lo)); + if(bno >= ext4_extent->ee_block && bno < ext4_extent->ee_block + ext4_extent->ee_len) + { + extent->phys_start = ext4_extent->ee_start_lo; + } + buf_offset += sizeof(struct ext4_extent); + } + } + else + { + return FSW_NOT_FOUND; + } + + return FSW_SUCCESS; +} + +/** + * The ext2/ext3 file system does not use extents, but stores a list of block numbers + * using the usual direct, indirect, double-indirect, triple-indirect scheme. To + * optimize access, this function checks if the following file blocks are mapped + * to consecutive disk blocks and returns a combined extent if possible. + */ +static fsw_status_t fsw_ext4_get_by_blkaddr(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno, + struct fsw_extent *extent) +{ + fsw_status_t status; + fsw_u32 bno, release_bno, buf_bcnt, file_bcnt; + int path[5], i; + fsw_u32 *buffer; bno = extent->log_start; // try direct block pointers in the inode @@ -334,7 +427,7 @@ static fsw_status_t fsw_ext4_get_extent(struct fsw_ext4_volume *vol, struct fsw_ } } } - + // follow the indirection path buffer = dno->raw->i_block; buf_bcnt = EXT4_NDIR_BLOCKS; @@ -448,6 +541,7 @@ static fsw_status_t fsw_ext4_dir_read(struct fsw_ext4_volume *vol, struct fsw_ex // Preconditions: The caller has checked that dno is a directory node. The caller // has opened a storage handle to the directory's storage and keeps it around between // calls. + FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_dir_read: started reading dir\n"))); while (1) { // read next entry diff --git a/filesystems/fsw_ext4_disk.h b/filesystems/fsw_ext4_disk.h index f12ce0c..f268903 100644 --- a/filesystems/fsw_ext4_disk.h +++ b/filesystems/fsw_ext4_disk.h @@ -109,6 +109,9 @@ struct ext4_group_desc /* * Macro-instructions used to manage group descriptors */ +#define EXT4_MIN_DESC_SIZE 32 +#define EXT4_MIN_DESC_SIZE_64BIT 64 +#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE #define EXT4_DESC_SIZE(s) ((s)->s_desc_size) #define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) #define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s)) @@ -219,6 +222,38 @@ struct ext4_inode { __le32 i_version_hi; /* high 32 bits for 64-bit version */ }; + +/* + * Inode flags used for atomic set/get + */ +enum { + EXT4_INODE_SECRM = 0, /* Secure deletion */ + EXT4_INODE_UNRM = 1, /* Undelete */ + EXT4_INODE_COMPR = 2, /* Compress file */ + EXT4_INODE_SYNC = 3, /* Synchronous updates */ + EXT4_INODE_IMMUTABLE = 4, /* Immutable file */ + EXT4_INODE_APPEND = 5, /* writes to file may only append */ + EXT4_INODE_NODUMP = 6, /* do not dump file */ + EXT4_INODE_NOATIME = 7, /* do not update atime */ +/* Reserved for compression usage... */ + EXT4_INODE_DIRTY = 8, + EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ + EXT4_INODE_NOCOMPR = 10, /* Don't compress */ + EXT4_INODE_ECOMPR = 11, /* Compression error */ +/* End compression flags --- maybe not all used */ + EXT4_INODE_INDEX = 12, /* hash-indexed directory */ + EXT4_INODE_IMAGIC = 13, /* AFS directory */ + EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */ + EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */ + EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */ + EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/ + EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */ + EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ + EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ + EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ + EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ +}; + /* * Structure of the super block */ @@ -334,6 +369,7 @@ struct ext4_super_block { __le32 s_reserved[108]; /* Padding to the end of the block */ __le32 s_checksum; /* crc32c(superblock) */ }; + /* * Revision levels */ @@ -403,5 +439,61 @@ enum { EXT4_FT_MAX }; +/* + * ext4_inode has i_block array (60 bytes total). + * The first 12 bytes store ext4_extent_header; + * the remainder stores an array of ext4_extent. + * For non-inode extent blocks, ext4_extent_tail + * follows the array. + */ + +/* + * This is the extent tail on-disk structure. + * All other extent structures are 12 bytes long. It turns out that + * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which + * covers all valid ext4 block sizes. Therefore, this tail structure can be + * crammed into the end of the block without having to rebalance the tree. + */ +struct ext4_extent_tail { + __le32 et_checksum; /* crc32c(uuid+inum+extent_block) */ +}; + +/* + * This is the extent on-disk structure. + * It's used at the bottom of the tree. + */ +struct ext4_extent { + __le32 ee_block; /* first logical block extent covers */ + __le16 ee_len; /* number of blocks covered by extent */ + __le16 ee_start_hi; /* high 16 bits of physical block */ + __le32 ee_start_lo; /* low 32 bits of physical block */ +}; + +/* + * This is index on-disk structure. + * It's used at all the levels except the bottom. + */ +struct ext4_extent_idx { + __le32 ei_block; /* index covers logical blocks from 'block' */ + __le32 ei_leaf_lo; /* pointer to the physical block of the next * + * level. leaf or next index could be there */ + __le16 ei_leaf_hi; /* high 16 bits of physical block */ + __u16 ei_unused; +}; + + +/* + * Each block (leaves and indexes), even inode-stored has header. + */ +struct ext4_extent_header { + __le16 eh_magic; /* probably will support different formats */ + __le16 eh_entries; /* number of valid entries */ + __le16 eh_max; /* capacity of store in entries */ + __le16 eh_depth; /* has tree real underlying blocks? */ + __le32 eh_generation; /* generation of the tree */ +}; + +#define EXT4_EXT_MAGIC (0xf30a) + #endif diff --git a/filesystems/test/fsw_posix.c b/filesystems/test/fsw_posix.c index 6234f8c..eee8c5f 100644 --- a/filesystems/test/fsw_posix.c +++ b/filesystems/test/fsw_posix.c @@ -40,7 +40,7 @@ #ifndef FSTYPE /** The file system type name to use. */ -#define FSTYPE ext4 +#define FSTYPE ext2 #endif -- 2.39.2