From df8e96f39103adf5a13332d784040a2c62667243 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 25 Apr 2007 03:23:42 +0100 Subject: [JFFS2] Improve read_inode memory usage, v2. We originally used to read every node and allocate a jffs2_tmp_dnode_info structure for each, before processing them in (reverse) version order and discarding the ones which are obsoleted by later nodes. With huge logfiles, this behaviour caused memory problems. For example, a file involved in OLPC trac #1292 has 1822391 nodes, and would cause the XO machine to run out of memory during the first stage of read_inode(). Instead of just inserting nodes into a tree in version order as we find them, we now put them into a tree in order of their offset within the file, which allows us to immediately discard nodes which are completely obsoleted. We don't use a full tree with 'fragments' pointing to the real data structure, as we do in the normal fragtree. We sort only on the start address, and add an 'overlapped' flag to the tmp_dnode_info to indicate that the node in question is (partially) overlapped by another. When the scan is complete, we start at the end of the file, adding each node to a real fragtree as before. Where the node is non-overlapped, we just add it (it doesn't matter that it's not the latest version; there is no overlap). When the node at the end of the tree _is_ overlapped, we sort it and all its overlapping nodes into version order and then add them to the fragtree in that order. This 'early discard' reduces the peak allocation of tmp_dnode_info structures from 1.8M to a mere 62872 (3.5%) in the degenerate case referenced above. This version of the patch also correctly rememembers the highest node version# seen for an inode when it's scanned. Signed-off-by: David Woodhouse --- fs/jffs2/nodelist.c | 460 ---------------------------------------------------- 1 file changed, 460 deletions(-) (limited to 'fs/jffs2/nodelist.c') diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c index 5a6b4d64206..fecffbc6355 100644 --- a/fs/jffs2/nodelist.c +++ b/fs/jffs2/nodelist.c @@ -397,466 +397,6 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in return 0; } -/* - * Check the data CRC of the node. - * - * Returns: 0 if the data CRC is correct; - * 1 - if incorrect; - * error code if an error occured. - */ -static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn) -{ - struct jffs2_raw_node_ref *ref = tn->fn->raw; - int err = 0, pointed = 0; - struct jffs2_eraseblock *jeb; - unsigned char *buffer; - uint32_t crc, ofs, len; - size_t retlen; - - BUG_ON(tn->csize == 0); - - if (!jffs2_is_writebuffered(c)) - goto adj_acc; - - /* Calculate how many bytes were already checked */ - ofs = ref_offset(ref) + sizeof(struct jffs2_raw_inode); - len = ofs % c->wbuf_pagesize; - if (likely(len)) - len = c->wbuf_pagesize - len; - - if (len >= tn->csize) { - dbg_readinode("no need to check node at %#08x, data length %u, data starts at %#08x - it has already been checked.\n", - ref_offset(ref), tn->csize, ofs); - goto adj_acc; - } - - ofs += len; - len = tn->csize - len; - - dbg_readinode("check node at %#08x, data length %u, partial CRC %#08x, correct CRC %#08x, data starts at %#08x, start checking from %#08x - %u bytes.\n", - ref_offset(ref), tn->csize, tn->partial_crc, tn->data_crc, ofs - len, ofs, len); - -#ifndef __ECOS - /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(), - * adding and jffs2_flash_read_end() interface. */ - if (c->mtd->point) { - err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); - if (!err && retlen < tn->csize) { - JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); - c->mtd->unpoint(c->mtd, buffer, ofs, len); - } else if (err) - JFFS2_WARNING("MTD point failed: error code %d.\n", err); - else - pointed = 1; /* succefully pointed to device */ - } -#endif - - if (!pointed) { - buffer = kmalloc(len, GFP_KERNEL); - if (unlikely(!buffer)) - return -ENOMEM; - - /* TODO: this is very frequent pattern, make it a separate - * routine */ - err = jffs2_flash_read(c, ofs, len, &retlen, buffer); - if (err) { - JFFS2_ERROR("can not read %d bytes from 0x%08x, error code: %d.\n", len, ofs, err); - goto free_out; - } - - if (retlen != len) { - JFFS2_ERROR("short read at %#08x: %zd instead of %d.\n", ofs, retlen, len); - err = -EIO; - goto free_out; - } - } - - /* Continue calculating CRC */ - crc = crc32(tn->partial_crc, buffer, len); - if(!pointed) - kfree(buffer); -#ifndef __ECOS - else - c->mtd->unpoint(c->mtd, buffer, ofs, len); -#endif - - if (crc != tn->data_crc) { - JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n", - ofs, tn->data_crc, crc); - return 1; - } - -adj_acc: - jeb = &c->blocks[ref->flash_offset / c->sector_size]; - len = ref_totlen(c, jeb, ref); - - /* - * Mark the node as having been checked and fix the - * accounting accordingly. - */ - spin_lock(&c->erase_completion_lock); - jeb->used_size += len; - jeb->unchecked_size -= len; - c->used_size += len; - c->unchecked_size -= len; - spin_unlock(&c->erase_completion_lock); - - return 0; - -free_out: - if(!pointed) - kfree(buffer); -#ifndef __ECOS - else - c->mtd->unpoint(c->mtd, buffer, ofs, len); -#endif - return err; -} - -/* - * Helper function for jffs2_add_older_frag_to_fragtree(). - * - * Checks the node if we are in the checking stage. - */ -static int check_node(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn) -{ - int ret; - - BUG_ON(ref_obsolete(tn->fn->raw)); - - /* We only check the data CRC of unchecked nodes */ - if (ref_flags(tn->fn->raw) != REF_UNCHECKED) - return 0; - - dbg_fragtree2("check node %#04x-%#04x, phys offs %#08x.\n", - tn->fn->ofs, tn->fn->ofs + tn->fn->size, ref_offset(tn->fn->raw)); - - ret = check_node_data(c, tn); - if (unlikely(ret < 0)) { - JFFS2_ERROR("check_node_data() returned error: %d.\n", - ret); - } else if (unlikely(ret > 0)) { - dbg_fragtree2("CRC error, mark it obsolete.\n"); - jffs2_mark_node_obsolete(c, tn->fn->raw); - } - - return ret; -} - -/* - * Helper function for jffs2_add_older_frag_to_fragtree(). - * - * Called when the new fragment that is being inserted - * splits a hole fragment. - */ -static int split_hole(struct jffs2_sb_info *c, struct rb_root *root, - struct jffs2_node_frag *newfrag, struct jffs2_node_frag *hole) -{ - dbg_fragtree2("fragment %#04x-%#04x splits the hole %#04x-%#04x\n", - newfrag->ofs, newfrag->ofs + newfrag->size, hole->ofs, hole->ofs + hole->size); - - if (hole->ofs == newfrag->ofs) { - /* - * Well, the new fragment actually starts at the same offset as - * the hole. - */ - if (hole->ofs + hole->size > newfrag->ofs + newfrag->size) { - /* - * We replace the overlapped left part of the hole by - * the new node. - */ - - dbg_fragtree2("insert fragment %#04x-%#04x and cut the left part of the hole\n", - newfrag->ofs, newfrag->ofs + newfrag->size); - rb_replace_node(&hole->rb, &newfrag->rb, root); - - hole->ofs += newfrag->size; - hole->size -= newfrag->size; - - /* - * We know that 'hole' should be the right hand - * fragment. - */ - jffs2_fragtree_insert(hole, newfrag); - rb_insert_color(&hole->rb, root); - } else { - /* - * Ah, the new fragment is of the same size as the hole. - * Relace the hole by it. - */ - dbg_fragtree2("insert fragment %#04x-%#04x and overwrite hole\n", - newfrag->ofs, newfrag->ofs + newfrag->size); - rb_replace_node(&hole->rb, &newfrag->rb, root); - jffs2_free_node_frag(hole); - } - } else { - /* The new fragment lefts some hole space at the left */ - - struct jffs2_node_frag * newfrag2 = NULL; - - if (hole->ofs + hole->size > newfrag->ofs + newfrag->size) { - /* The new frag also lefts some space at the right */ - newfrag2 = new_fragment(NULL, newfrag->ofs + - newfrag->size, hole->ofs + hole->size - - newfrag->ofs - newfrag->size); - if (unlikely(!newfrag2)) { - jffs2_free_node_frag(newfrag); - return -ENOMEM; - } - } - - hole->size = newfrag->ofs - hole->ofs; - dbg_fragtree2("left the hole %#04x-%#04x at the left and inserd fragment %#04x-%#04x\n", - hole->ofs, hole->ofs + hole->size, newfrag->ofs, newfrag->ofs + newfrag->size); - - jffs2_fragtree_insert(newfrag, hole); - rb_insert_color(&newfrag->rb, root); - - if (newfrag2) { - dbg_fragtree2("left the hole %#04x-%#04x at the right\n", - newfrag2->ofs, newfrag2->ofs + newfrag2->size); - jffs2_fragtree_insert(newfrag2, newfrag); - rb_insert_color(&newfrag2->rb, root); - } - } - - return 0; -} - -/* - * This function is used when we build inode. It expects the nodes are passed - * in the decreasing version order. The whole point of this is to improve the - * inodes checking on NAND: we check the nodes' data CRC only when they are not - * obsoleted. Previously, add_frag_to_fragtree() function was used and - * nodes were passed to it in the increasing version ordes and CRCs of all - * nodes were checked. - * - * Note: tn->fn->size shouldn't be zero. - * - * Returns 0 if the node was inserted - * 1 if it wasn't inserted (since it is obsolete) - * < 0 an if error occured - */ -int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, - struct jffs2_tmp_dnode_info *tn) -{ - struct jffs2_node_frag *this, *newfrag; - uint32_t lastend; - struct jffs2_full_dnode *fn = tn->fn; - struct rb_root *root = &f->fragtree; - uint32_t fn_size = fn->size, fn_ofs = fn->ofs; - int err, checked = 0; - int ref_flag; - - dbg_fragtree("insert fragment %#04x-%#04x, ver %u\n", fn_ofs, fn_ofs + fn_size, tn->version); - - /* Skip all the nodes which are completed before this one starts */ - this = jffs2_lookup_node_frag(root, fn_ofs); - if (this) - dbg_fragtree2("'this' found %#04x-%#04x (%s)\n", this->ofs, this->ofs + this->size, this->node ? "data" : "hole"); - - if (this) - lastend = this->ofs + this->size; - else - lastend = 0; - - /* Detect the preliminary type of node */ - if (fn->size >= PAGE_CACHE_SIZE) - ref_flag = REF_PRISTINE; - else - ref_flag = REF_NORMAL; - - /* See if we ran off the end of the root */ - if (lastend <= fn_ofs) { - /* We did */ - - /* - * We are going to insert the new node into the - * fragment tree, so check it. - */ - err = check_node(c, f, tn); - if (err != 0) - return err; - - fn->frags = 1; - - newfrag = new_fragment(fn, fn_ofs, fn_size); - if (unlikely(!newfrag)) - return -ENOMEM; - - err = no_overlapping_node(c, root, newfrag, this, lastend); - if (unlikely(err != 0)) { - jffs2_free_node_frag(newfrag); - return err; - } - - goto out_ok; - } - - fn->frags = 0; - - while (1) { - /* - * Here we have: - * fn_ofs < this->ofs + this->size && fn_ofs >= this->ofs. - * - * Remember, 'this' has higher version, any non-hole node - * which is already in the fragtree is newer then the newly - * inserted. - */ - if (!this->node) { - /* - * 'this' is the hole fragment, so at least the - * beginning of the new fragment is valid. - */ - - /* - * We are going to insert the new node into the - * fragment tree, so check it. - */ - if (!checked) { - err = check_node(c, f, tn); - if (unlikely(err != 0)) - return err; - checked = 1; - } - - if (this->ofs + this->size >= fn_ofs + fn_size) { - /* We split the hole on two parts */ - - fn->frags += 1; - newfrag = new_fragment(fn, fn_ofs, fn_size); - if (unlikely(!newfrag)) - return -ENOMEM; - - err = split_hole(c, root, newfrag, this); - if (unlikely(err)) - return err; - goto out_ok; - } - - /* - * The beginning of the new fragment is valid since it - * overlaps the hole node. - */ - - ref_flag = REF_NORMAL; - - fn->frags += 1; - newfrag = new_fragment(fn, fn_ofs, - this->ofs + this->size - fn_ofs); - if (unlikely(!newfrag)) - return -ENOMEM; - - if (fn_ofs == this->ofs) { - /* - * The new node starts at the same offset as - * the hole and supersieds the hole. - */ - dbg_fragtree2("add the new fragment instead of hole %#04x-%#04x, refcnt %d\n", - fn_ofs, fn_ofs + this->ofs + this->size - fn_ofs, fn->frags); - - rb_replace_node(&this->rb, &newfrag->rb, root); - jffs2_free_node_frag(this); - } else { - /* - * The hole becomes shorter as its right part - * is supersieded by the new fragment. - */ - dbg_fragtree2("reduce size of hole %#04x-%#04x to %#04x-%#04x\n", - this->ofs, this->ofs + this->size, this->ofs, this->ofs + this->size - newfrag->size); - - dbg_fragtree2("add new fragment %#04x-%#04x, refcnt %d\n", fn_ofs, - fn_ofs + this->ofs + this->size - fn_ofs, fn->frags); - - this->size -= newfrag->size; - jffs2_fragtree_insert(newfrag, this); - rb_insert_color(&newfrag->rb, root); - } - - fn_ofs += newfrag->size; - fn_size -= newfrag->size; - this = rb_entry(rb_next(&newfrag->rb), - struct jffs2_node_frag, rb); - - dbg_fragtree2("switch to the next 'this' fragment: %#04x-%#04x %s\n", - this->ofs, this->ofs + this->size, this->node ? "(data)" : "(hole)"); - } - - /* - * 'This' node is not the hole so it obsoletes the new fragment - * either fully or partially. - */ - if (this->ofs + this->size >= fn_ofs + fn_size) { - /* The new node is obsolete, drop it */ - if (fn->frags == 0) { - dbg_fragtree2("%#04x-%#04x is obsolete, mark it obsolete\n", fn_ofs, fn_ofs + fn_size); - ref_flag = REF_OBSOLETE; - } - goto out_ok; - } else { - struct jffs2_node_frag *new_this; - - /* 'This' node obsoletes the beginning of the new node */ - dbg_fragtree2("the beginning %#04x-%#04x is obsolete\n", fn_ofs, this->ofs + this->size); - - ref_flag = REF_NORMAL; - - fn_size -= this->ofs + this->size - fn_ofs; - fn_ofs = this->ofs + this->size; - dbg_fragtree2("now considering %#04x-%#04x\n", fn_ofs, fn_ofs + fn_size); - - new_this = rb_entry(rb_next(&this->rb), struct jffs2_node_frag, rb); - if (!new_this) { - /* - * There is no next fragment. Add the rest of - * the new node as the right-hand child. - */ - if (!checked) { - err = check_node(c, f, tn); - if (unlikely(err != 0)) - return err; - checked = 1; - } - - fn->frags += 1; - newfrag = new_fragment(fn, fn_ofs, fn_size); - if (unlikely(!newfrag)) - return -ENOMEM; - - dbg_fragtree2("there are no more fragments, insert %#04x-%#04x\n", - newfrag->ofs, newfrag->ofs + newfrag->size); - rb_link_node(&newfrag->rb, &this->rb, &this->rb.rb_right); - rb_insert_color(&newfrag->rb, root); - goto out_ok; - } else { - this = new_this; - dbg_fragtree2("switch to the next 'this' fragment: %#04x-%#04x %s\n", - this->ofs, this->ofs + this->size, this->node ? "(data)" : "(hole)"); - } - } - } - -out_ok: - BUG_ON(fn->size < PAGE_CACHE_SIZE && ref_flag == REF_PRISTINE); - - if (ref_flag == REF_OBSOLETE) { - dbg_fragtree2("the node is obsolete now\n"); - /* jffs2_mark_node_obsolete() will adjust space accounting */ - jffs2_mark_node_obsolete(c, fn->raw); - return 1; - } - - dbg_fragtree2("the node is \"%s\" now\n", ref_flag == REF_NORMAL ? "REF_NORMAL" : "REF_PRISTINE"); - - /* Space accounting was adjusted at check_node_data() */ - spin_lock(&c->erase_completion_lock); - fn->raw->flash_offset = ref_offset(fn->raw) | ref_flag; - spin_unlock(&c->erase_completion_lock); - - return 0; -} - void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state) { spin_lock(&c->inocache_lock); -- cgit v1.2.3 From c00c310eac04a28d2143368ae988716792ed53ce Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 25 Apr 2007 14:16:47 +0100 Subject: [JFFS2] Tidy up licensing/copyright boilerplate. In particular, remove the bit in the LICENCE file about contacting Red Hat for alternative arrangements. Their errant IS department broke that arrangement a long time ago -- the policy of collecting copyright assignments from contributors came to an end when the plug was pulled on the servers hosting the project, without notice or reason. We do still dual-license it for use with eCos, with the GPL+exception licence approved by the FSF as being GPL-compatible. It's just that nobody has the right to license it differently. Signed-off-by: David Woodhouse --- fs/jffs2/nodelist.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/jffs2/nodelist.c') diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c index fecffbc6355..ac2a4c422e3 100644 --- a/fs/jffs2/nodelist.c +++ b/fs/jffs2/nodelist.c @@ -1,14 +1,12 @@ /* * JFFS2 -- Journalling Flash File System, Version 2. * - * Copyright (C) 2001-2003 Red Hat, Inc. + * Copyright © 2001-2007 Red Hat, Inc. * * Created by David Woodhouse * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: nodelist.c,v 1.115 2005/11/07 11:14:40 gleixner Exp $ - * */ #include -- cgit v1.2.3 From 61c4b23770d1b0cef7c06a23378ab544eb0c64b4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 25 Apr 2007 17:04:23 +0100 Subject: [JFFS2] Handle inodes with only a single metadata node with non-zero isize This should never happen unless there's corruption on the medium and the actual data nodes go missing. But the failure mode (an oops when we assume the fragtree isn't empty and go looking for its last node) isn't useful. Signed-off-by: David Woodhouse --- fs/jffs2/nodelist.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'fs/jffs2/nodelist.c') diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c index ac2a4c422e3..4bf86088b3a 100644 --- a/fs/jffs2/nodelist.c +++ b/fs/jffs2/nodelist.c @@ -52,7 +52,7 @@ void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new *prev = new; } -void jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list, uint32_t size) +uint32_t jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list, uint32_t size) { struct jffs2_node_frag *frag = jffs2_lookup_node_frag(list, size); @@ -74,18 +74,24 @@ void jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list, uint } if (size == 0) - return; + return 0; - /* - * If the last fragment starts at the RAM page boundary, it is - * REF_PRISTINE irrespective of its size. - */ frag = frag_last(list); + + /* Sanity check for truncation to longer than we started with... */ + if (!frag) + return 0; + if (frag->ofs + frag->size < size) + return frag->ofs + frag->size; + + /* If the last fragment starts at the RAM page boundary, it is + * REF_PRISTINE irrespective of its size. */ if (frag->node && (frag->ofs & (PAGE_CACHE_SIZE - 1)) == 0) { dbg_fragtree2("marking the last fragment 0x%08x-0x%08x REF_PRISTINE.\n", frag->ofs, frag->ofs + frag->size); frag->node->raw->flash_offset = ref_offset(frag->node->raw) | REF_PRISTINE; } + return size; } static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, -- cgit v1.2.3