]> code.delx.au - refind/blob - filesystems/fsw_ext4.c
Initial commit of new driver for Ext4. Updated the on disk layout structures
[refind] / filesystems / fsw_ext4.c
1 /**
2 * \file fsw_ext4.c
3 * ext4 file system driver code.
4 */
5
6 /*-
7 * Copyright (c) 2012 Stefan Agner
8 * Portions Copyright (c) 2006 Christoph Pfisterer
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 */
24
25 #include "fsw_ext4.h"
26
27
28 // functions
29
30 static fsw_status_t fsw_ext4_volume_mount(struct fsw_ext4_volume *vol);
31 static void fsw_ext4_volume_free(struct fsw_ext4_volume *vol);
32 static fsw_status_t fsw_ext4_volume_stat(struct fsw_ext4_volume *vol, struct fsw_volume_stat *sb);
33
34 static fsw_status_t fsw_ext4_dnode_fill(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno);
35 static void fsw_ext4_dnode_free(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno);
36 static fsw_status_t fsw_ext4_dnode_stat(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
37 struct fsw_dnode_stat *sb);
38 static fsw_status_t fsw_ext4_get_extent(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
39 struct fsw_extent *extent);
40
41 static fsw_status_t fsw_ext4_dir_lookup(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
42 struct fsw_string *lookup_name, struct fsw_ext4_dnode **child_dno);
43 static fsw_status_t fsw_ext4_dir_read(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
44 struct fsw_shandle *shand, struct fsw_ext4_dnode **child_dno);
45 static fsw_status_t fsw_ext4_read_dentry(struct fsw_shandle *shand, struct ext4_dir_entry *entry);
46
47 static fsw_status_t fsw_ext4_readlink(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
48 struct fsw_string *link);
49
50 //
51 // Dispatch Table
52 //
53
54 struct fsw_fstype_table FSW_FSTYPE_TABLE_NAME(ext4) = {
55 { FSW_STRING_TYPE_ISO88591, 4, 4, "ext4" },
56 sizeof(struct fsw_ext4_volume),
57 sizeof(struct fsw_ext4_dnode),
58
59 fsw_ext4_volume_mount,
60 fsw_ext4_volume_free,
61 fsw_ext4_volume_stat,
62 fsw_ext4_dnode_fill,
63 fsw_ext4_dnode_free,
64 fsw_ext4_dnode_stat,
65 fsw_ext4_get_extent,
66 fsw_ext4_dir_lookup,
67 fsw_ext4_dir_read,
68 fsw_ext4_readlink,
69 };
70
71 /**
72 * Mount an ext4 volume. Reads the superblock and constructs the
73 * root directory dnode.
74 */
75
76 static fsw_status_t fsw_ext4_volume_mount(struct fsw_ext4_volume *vol)
77 {
78 fsw_status_t status;
79 void *buffer;
80 fsw_u32 blocksize;
81 fsw_u32 groupcnt, groupno, gdesc_per_block, gdesc_bno, gdesc_index;
82 struct ext4_group_desc *gdesc;
83 int i;
84 struct fsw_string s;
85
86 // allocate memory to keep the superblock around
87 status = fsw_alloc(sizeof(struct ext4_super_block), &vol->sb);
88 if (status)
89 return status;
90
91 // read the superblock into its buffer
92 fsw_set_blocksize(vol, EXT4_SUPERBLOCK_BLOCKSIZE, EXT4_SUPERBLOCK_BLOCKSIZE);
93 status = fsw_block_get(vol, EXT4_SUPERBLOCK_BLOCKNO, 0, &buffer);
94 if (status)
95 return status;
96 fsw_memcpy(vol->sb, buffer, sizeof(struct ext4_super_block));
97 fsw_block_release(vol, EXT4_SUPERBLOCK_BLOCKNO, buffer);
98
99 // check the superblock
100 if (vol->sb->s_magic != EXT4_SUPER_MAGIC)
101 return FSW_UNSUPPORTED;
102 if (vol->sb->s_rev_level != EXT4_GOOD_OLD_REV &&
103 vol->sb->s_rev_level != EXT4_DYNAMIC_REV)
104 return FSW_UNSUPPORTED;
105
106 FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: Incompat flag %x\n"), vol->sb->s_feature_incompat));
107
108 if (vol->sb->s_rev_level == EXT4_DYNAMIC_REV &&
109 (vol->sb->s_feature_incompat & ~(EXT4_FEATURE_INCOMPAT_FILETYPE | EXT4_FEATURE_INCOMPAT_RECOVER)))
110 return FSW_UNSUPPORTED;
111
112
113 if (vol->sb->s_rev_level == EXT4_DYNAMIC_REV &&
114 (vol->sb->s_feature_incompat & EXT4_FEATURE_INCOMPAT_RECOVER))
115 {
116 FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: This ext3 file system needs recovery\n")));
117 // Print(L"Ext4 WARNING: This file system needs recovery, trying to use it anyway.\n");
118 }
119
120 // set real blocksize
121 blocksize = EXT4_BLOCK_SIZE(vol->sb);
122 fsw_set_blocksize(vol, blocksize, blocksize);
123
124 // get other info from superblock
125 vol->ind_bcnt = EXT4_ADDR_PER_BLOCK(vol->sb);
126 vol->dind_bcnt = vol->ind_bcnt * vol->ind_bcnt;
127 vol->inode_size = EXT4_INODE_SIZE(vol->sb);
128
129 for (i = 0; i < 16; i++)
130 if (vol->sb->s_volume_name[i] == 0)
131 break;
132 s.type = FSW_STRING_TYPE_ISO88591;
133 s.size = s.len = i;
134 s.data = vol->sb->s_volume_name;
135 status = fsw_strdup_coerce(&vol->g.label, vol->g.host_string_type, &s);
136 if (status)
137 return status;
138
139 // read the group descriptors to get inode table offsets
140 groupcnt = ((vol->sb->s_inodes_count - 2) / vol->sb->s_inodes_per_group) + 1;
141 gdesc_per_block = (vol->g.phys_blocksize / sizeof(struct ext4_group_desc));
142
143 status = fsw_alloc(sizeof(fsw_u32) * groupcnt, &vol->inotab_bno);
144 if (status)
145 return status;
146 for (groupno = 0; groupno < groupcnt; groupno++) {
147 // get the block group descriptor
148 gdesc_bno = (vol->sb->s_first_data_block + 1) + groupno / gdesc_per_block;
149 gdesc_index = groupno % gdesc_per_block;
150 status = fsw_block_get(vol, gdesc_bno, 1, (void **)&buffer);
151 if (status)
152 return status;
153 gdesc = ((struct ext4_group_desc *)(buffer)) + gdesc_index;
154 vol->inotab_bno[groupno] = gdesc->bg_inode_table_lo;
155 fsw_block_release(vol, gdesc_bno, buffer);
156 }
157
158 // setup the root dnode
159 status = fsw_dnode_create_root(vol, EXT4_ROOT_INO, &vol->g.root);
160 if (status)
161 return status;
162
163 FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: success, blocksize %d\n"), blocksize));
164
165 return FSW_SUCCESS;
166 }
167
168 /**
169 * Free the volume data structure. Called by the core after an unmount or after
170 * an unsuccessful mount to release the memory used by the file system type specific
171 * part of the volume structure.
172 */
173
174 static void fsw_ext4_volume_free(struct fsw_ext4_volume *vol)
175 {
176 if (vol->sb)
177 fsw_free(vol->sb);
178 if (vol->inotab_bno)
179 fsw_free(vol->inotab_bno);
180 }
181
182 /**
183 * Get in-depth information on a volume.
184 */
185
186 static fsw_status_t fsw_ext4_volume_stat(struct fsw_ext4_volume *vol, struct fsw_volume_stat *sb)
187 {
188 sb->total_bytes = (fsw_u64)vol->sb->s_blocks_count_lo * vol->g.log_blocksize;
189 sb->free_bytes = (fsw_u64)vol->sb->s_free_blocks_count_lo * vol->g.log_blocksize;
190 return FSW_SUCCESS;
191 }
192
193 /**
194 * Get full information on a dnode from disk. This function is called by the core
195 * whenever it needs to access fields in the dnode structure that may not
196 * be filled immediately upon creation of the dnode. In the case of ext4, we
197 * delay fetching of the inode structure until dnode_fill is called. The size and
198 * type fields are invalid until this function has been called.
199 */
200
201 static fsw_status_t fsw_ext4_dnode_fill(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno)
202 {
203 fsw_status_t status;
204 fsw_u32 groupno, ino_in_group, ino_bno, ino_index;
205 fsw_u8 *buffer;
206
207 if (dno->raw)
208 return FSW_SUCCESS;
209
210 FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_dnode_fill: inode %d\n"), dno->g.dnode_id));
211
212 // read the inode block
213 groupno = (dno->g.dnode_id - 1) / vol->sb->s_inodes_per_group;
214 ino_in_group = (dno->g.dnode_id - 1) % vol->sb->s_inodes_per_group;
215 ino_bno = vol->inotab_bno[groupno] +
216 ino_in_group / (vol->g.phys_blocksize / vol->inode_size);
217 ino_index = ino_in_group % (vol->g.phys_blocksize / vol->inode_size);
218 status = fsw_block_get(vol, ino_bno, 2, (void **)&buffer);
219 if (status)
220 return status;
221
222 // keep our inode around
223 status = fsw_memdup((void **)&dno->raw, buffer + ino_index * vol->inode_size, vol->inode_size);
224 fsw_block_release(vol, ino_bno, buffer);
225 if (status)
226 return status;
227
228 // get info from the inode
229 dno->g.size = dno->raw->i_size_lo; // TODO: check docs for 64-bit sized files
230
231 if (S_ISREG(dno->raw->i_mode))
232 dno->g.type = FSW_DNODE_TYPE_FILE;
233 else if (S_ISDIR(dno->raw->i_mode))
234 dno->g.type = FSW_DNODE_TYPE_DIR;
235 else if (S_ISLNK(dno->raw->i_mode))
236 dno->g.type = FSW_DNODE_TYPE_SYMLINK;
237 else
238 dno->g.type = FSW_DNODE_TYPE_SPECIAL;
239
240 return FSW_SUCCESS;
241 }
242
243 /**
244 * Free the dnode data structure. Called by the core when deallocating a dnode
245 * structure to release the memory used by the file system type specific part
246 * of the dnode structure.
247 */
248
249 static void fsw_ext4_dnode_free(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno)
250 {
251 if (dno->raw)
252 fsw_free(dno->raw);
253 }
254
255 /**
256 * Get in-depth information on a dnode. The core makes sure that fsw_ext4_dnode_fill
257 * has been called on the dnode before this function is called. Note that some
258 * data is not directly stored into the structure, but passed to a host-specific
259 * callback that converts it to the host-specific format.
260 */
261
262 static fsw_status_t fsw_ext4_dnode_stat(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
263 struct fsw_dnode_stat *sb)
264 {
265 sb->used_bytes = dno->raw->i_blocks_lo * EXT4_BLOCK_SIZE(vol->sb); // very, very strange...
266 sb->store_time_posix(sb, FSW_DNODE_STAT_CTIME, dno->raw->i_ctime);
267 sb->store_time_posix(sb, FSW_DNODE_STAT_ATIME, dno->raw->i_atime);
268 sb->store_time_posix(sb, FSW_DNODE_STAT_MTIME, dno->raw->i_mtime);
269 sb->store_attr_posix(sb, dno->raw->i_mode);
270
271 return FSW_SUCCESS;
272 }
273
274 /**
275 * Retrieve file data mapping information. This function is called by the core when
276 * fsw_shandle_read needs to know where on the disk the required piece of the file's
277 * data can be found. The core makes sure that fsw_ext4_dnode_fill has been called
278 * on the dnode before. Our task here is to get the physical disk block number for
279 * the requested logical block number.
280 *
281 * TODO...
282 * The ext2 file system does not use extents, but stores a list of block numbers
283 * using the usual direct, indirect, double-indirect, triple-indirect scheme. To
284 * optimize access, this function checks if the following file blocks are mapped
285 * to consecutive disk blocks and returns a combined extent if possible.
286 */
287
288 static fsw_status_t fsw_ext4_get_extent(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
289 struct fsw_extent *extent)
290 {
291 fsw_status_t status;
292 fsw_u32 bno, release_bno, buf_bcnt, file_bcnt;
293 fsw_u32 *buffer;
294 int path[5], i;
295
296 // Preconditions: The caller has checked that the requested logical block
297 // is within the file's size. The dnode has complete information, i.e.
298 // fsw_ext4_dnode_read_info was called successfully on it.
299
300 extent->type = FSW_EXTENT_TYPE_PHYSBLOCK;
301 extent->log_count = 1;
302 bno = extent->log_start;
303
304 // try direct block pointers in the inode
305 if (bno < EXT4_NDIR_BLOCKS) {
306 path[0] = bno;
307 path[1] = -1;
308 } else {
309 bno -= EXT4_NDIR_BLOCKS;
310
311 // try indirect block
312 if (bno < vol->ind_bcnt) {
313 path[0] = EXT4_IND_BLOCK;
314 path[1] = bno;
315 path[2] = -1;
316 } else {
317 bno -= vol->ind_bcnt;
318
319 // try double-indirect block
320 if (bno < vol->dind_bcnt) {
321 path[0] = EXT4_DIND_BLOCK;
322 path[1] = bno / vol->ind_bcnt;
323 path[2] = bno % vol->ind_bcnt;
324 path[3] = -1;
325 } else {
326 bno -= vol->dind_bcnt;
327
328 // use the triple-indirect block
329 path[0] = EXT4_TIND_BLOCK;
330 path[1] = bno / vol->dind_bcnt;
331 path[2] = (bno / vol->ind_bcnt) % vol->ind_bcnt;
332 path[3] = bno % vol->ind_bcnt;
333 path[4] = -1;
334 }
335 }
336 }
337
338 // follow the indirection path
339 buffer = dno->raw->i_block;
340 buf_bcnt = EXT4_NDIR_BLOCKS;
341 release_bno = 0;
342 for (i = 0; ; i++) {
343 bno = buffer[path[i]];
344 if (bno == 0) {
345 extent->type = FSW_EXTENT_TYPE_SPARSE;
346 if (release_bno)
347 fsw_block_release(vol, release_bno, buffer);
348 return FSW_SUCCESS;
349 }
350 if (path[i+1] < 0)
351 break;
352
353 if (release_bno)
354 fsw_block_release(vol, release_bno, buffer);
355 status = fsw_block_get(vol, bno, 1, (void **)&buffer);
356 if (status)
357 return status;
358 release_bno = bno;
359 buf_bcnt = vol->ind_bcnt;
360 }
361 extent->phys_start = bno;
362
363 // check if the following blocks can be aggregated into one extent
364 file_bcnt = (fsw_u32)((dno->g.size + vol->g.log_blocksize - 1) & (vol->g.log_blocksize - 1));
365 while (path[i] + extent->log_count < buf_bcnt && // indirect block has more block pointers
366 extent->log_start + extent->log_count < file_bcnt) { // file has more blocks
367 if (buffer[path[i] + extent->log_count] == buffer[path[i] + extent->log_count - 1] + 1)
368 extent->log_count++;
369 else
370 break;
371 }
372
373 if (release_bno)
374 fsw_block_release(vol, release_bno, buffer);
375 return FSW_SUCCESS;
376 }
377
378 /**
379 * Lookup a directory's child dnode by name. This function is called on a directory
380 * to retrieve the directory entry with the given name. A dnode is constructed for
381 * this entry and returned. The core makes sure that fsw_ext4_dnode_fill has been called
382 * and the dnode is actually a directory.
383 */
384
385 static fsw_status_t fsw_ext4_dir_lookup(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
386 struct fsw_string *lookup_name, struct fsw_ext4_dnode **child_dno_out)
387 {
388 fsw_status_t status;
389 struct fsw_shandle shand;
390 fsw_u32 child_ino;
391 struct ext4_dir_entry entry;
392 struct fsw_string entry_name;
393
394 // Preconditions: The caller has checked that dno is a directory node.
395
396 entry_name.type = FSW_STRING_TYPE_ISO88591;
397
398 // setup handle to read the directory
399 status = fsw_shandle_open(dno, &shand);
400 if (status)
401 return status;
402
403 // scan the directory for the file
404 child_ino = 0;
405 while (child_ino == 0) {
406 // read next entry
407 status = fsw_ext4_read_dentry(&shand, &entry);
408 if (status)
409 goto errorexit;
410 if (entry.inode == 0) {
411 // end of directory reached
412 status = FSW_NOT_FOUND;
413 goto errorexit;
414 }
415
416 // compare name
417 entry_name.len = entry_name.size = entry.name_len;
418 entry_name.data = entry.name;
419 if (fsw_streq(lookup_name, &entry_name)) {
420 child_ino = entry.inode;
421 break;
422 }
423 }
424
425 // setup a dnode for the child item
426 status = fsw_dnode_create(dno, child_ino, FSW_DNODE_TYPE_UNKNOWN, &entry_name, child_dno_out);
427
428 errorexit:
429 fsw_shandle_close(&shand);
430 return status;
431 }
432
433 /**
434 * Get the next directory entry when reading a directory. This function is called during
435 * directory iteration to retrieve the next directory entry. A dnode is constructed for
436 * the entry and returned. The core makes sure that fsw_ext4_dnode_fill has been called
437 * and the dnode is actually a directory. The shandle provided by the caller is used to
438 * record the position in the directory between calls.
439 */
440
441 static fsw_status_t fsw_ext4_dir_read(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
442 struct fsw_shandle *shand, struct fsw_ext4_dnode **child_dno_out)
443 {
444 fsw_status_t status;
445 struct ext4_dir_entry entry;
446 struct fsw_string entry_name;
447
448 // Preconditions: The caller has checked that dno is a directory node. The caller
449 // has opened a storage handle to the directory's storage and keeps it around between
450 // calls.
451
452 while (1) {
453 // read next entry
454 status = fsw_ext4_read_dentry(shand, &entry);
455 if (status)
456 return status;
457 if (entry.inode == 0) // end of directory
458 return FSW_NOT_FOUND;
459
460 // skip . and ..
461 if ((entry.name_len == 1 && entry.name[0] == '.') ||
462 (entry.name_len == 2 && entry.name[0] == '.' && entry.name[1] == '.'))
463 continue;
464 break;
465 }
466
467 // setup name
468 entry_name.type = FSW_STRING_TYPE_ISO88591;
469 entry_name.len = entry_name.size = entry.name_len;
470 entry_name.data = entry.name;
471
472 // setup a dnode for the child item
473 status = fsw_dnode_create(dno, entry.inode, FSW_DNODE_TYPE_UNKNOWN, &entry_name, child_dno_out);
474
475 return status;
476 }
477
478 /**
479 * Read a directory entry from the directory's raw data. This internal function is used
480 * to read a raw ext2 directory entry into memory. The shandle's position pointer is adjusted
481 * to point to the next entry.
482 */
483
484 static fsw_status_t fsw_ext4_read_dentry(struct fsw_shandle *shand, struct ext4_dir_entry *entry)
485 {
486 fsw_status_t status;
487 fsw_u32 buffer_size;
488
489 while (1) {
490 // read dir_entry header (fixed length)
491 buffer_size = 8;
492 status = fsw_shandle_read(shand, &buffer_size, entry);
493 if (status)
494 return status;
495
496 if (buffer_size < 8 || entry->rec_len == 0) {
497 // end of directory reached
498 entry->inode = 0;
499 return FSW_SUCCESS;
500 }
501 if (entry->rec_len < 8)
502 return FSW_VOLUME_CORRUPTED;
503 if (entry->inode != 0) {
504 // this entry is used
505 if (entry->rec_len < 8 + entry->name_len)
506 return FSW_VOLUME_CORRUPTED;
507 break;
508 }
509
510 // valid, but unused entry, skip it
511 shand->pos += entry->rec_len - 8;
512 }
513
514 // read file name (variable length)
515 buffer_size = entry->name_len;
516 status = fsw_shandle_read(shand, &buffer_size, entry->name);
517 if (status)
518 return status;
519 if (buffer_size < entry->name_len)
520 return FSW_VOLUME_CORRUPTED;
521
522 // skip any remaining padding
523 shand->pos += entry->rec_len - (8 + entry->name_len);
524
525 return FSW_SUCCESS;
526 }
527
528 /**
529 * Get the target path of a symbolic link. This function is called when a symbolic
530 * link needs to be resolved. The core makes sure that the fsw_ext4_dnode_fill has been
531 * called on the dnode and that it really is a symlink.
532 *
533 * For ext4, the target path can be stored inline in the inode structure (in the space
534 * otherwise occupied by the block pointers) or in the inode's data. There is no flag
535 * indicating this, only the number of blocks entry (i_blocks) can be used as an
536 * indication. The check used here comes from the Linux kernel.
537 */
538
539 static fsw_status_t fsw_ext4_readlink(struct fsw_ext4_volume *vol, struct fsw_ext4_dnode *dno,
540 struct fsw_string *link_target)
541 {
542 fsw_status_t status;
543 int ea_blocks;
544 struct fsw_string s;
545
546 if (dno->g.size > FSW_PATH_MAX)
547 return FSW_VOLUME_CORRUPTED;
548
549 /* Linux kernels ext4_inode_is_fast_symlink... */
550 ea_blocks = dno->raw->i_file_acl_lo ? (vol->g.log_blocksize >> 9) : 0;
551
552 if (dno->raw->i_blocks_lo - ea_blocks == 0) {
553 // "fast" symlink, path is stored inside the inode
554 s.type = FSW_STRING_TYPE_ISO88591;
555 s.size = s.len = (int)dno->g.size;
556 s.data = dno->raw->i_block;
557 status = fsw_strdup_coerce(link_target, vol->g.host_string_type, &s);
558 } else {
559 // "slow" symlink, path is stored in normal inode data
560 status = fsw_dnode_readlink_data(dno, link_target);
561 }
562
563 return status;
564 }
565
566 // EOF