Commit yaboot 1.3.0

[yaboot.git] / second / fs_reiserfs.c
diff --git a/second/fs_reiserfs.c b/second/fs_reiserfs.c

new file mode 100644 (file)

index 0000000..f28680f
--- /dev/null
+++ b/second/fs_reiserfs.c
@@ -0,0 +1,1025 @@
+/* ReiserFS filesystem
+   
+   Copyright (C) 2001 Jeffrey Mahoney (jeffm@suse.com)
+
+   Adapted from GRUB
+
+   Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+#include "types.h"
+#include "ctype.h"
+#include "string.h"
+#include "stdlib.h"
+#include "fs.h"
+#include "reiserfs/reiserfs.h"
+
+
+/* Exported in struct fs_t */
+static int reiserfs_open( struct boot_file_t *file, const char *dev_name,
+    struct partition_t *part, const char *file_name );
+static int reiserfs_read( struct boot_file_t *file, unsigned int size,
+
+    void *buffer );
+static int reiserfs_seek( struct boot_file_t *file, unsigned int newpos );
+static int reiserfs_close( struct boot_file_t *file );
+
+struct fs_t reiserfs_filesystem = {
+    name:"reiserfs",
+    open:reiserfs_open,
+    read:reiserfs_read,
+    seek:reiserfs_seek,
+    close:reiserfs_close
+};
+
+static int reiserfs_read_super( void );
+static int reiserfs_open_file( char *dirname );
+static int reiserfs_read_data( char *buf, __u32 len );
+
+
+static struct reiserfs_state reiserfs;
+static struct reiserfs_state *INFO = &reiserfs;
+
+/* Adapted from GRUB: */
+static char FSYS_BUF[FSYSREISER_CACHE_SIZE];
+int errnum;
+
+
+static int
+reiserfs_open( struct boot_file_t *file, const char *dev_name,
+               struct partition_t *part, const char *file_name )
+{
+    static char buffer[1024];
+
+    DEBUG_ENTER;
+    DEBUG_OPEN;
+
+    memset( INFO, 0, sizeof(struct reiserfs_state) );
+    INFO->file = file;
+
+    if (part)
+    {
+        DEBUG_F( "Determining offset for partition %d\n", part->part_number );
+        INFO->partition_offset = ((__u64)(part->part_start)) * ((__u64)part->blocksize);
+        DEBUG_F( "%Lu = %lu * %hu\n", INFO->partition_offset,
+                                     part->part_start,
+                                     part->blocksize );
+    }
+    else
+        INFO->partition_offset = 0;
+
+    sprintf( buffer, "%s:%d", dev_name, /*part ? part->part_number :*/ 0 );
+    file->of_device = prom_open( buffer );
+    DEBUG_F( "Trying to open dev_name=%s; filename=%s; partition offset=%Lu\n",
+             buffer, file_name, INFO->partition_offset );
+
+    if ( file->of_device == PROM_INVALID_HANDLE || file->of_device == NULL )
+    {
+       DEBUG_F( "Can't open device %s\n", file->of_device );
+        DEBUG_LEAVE(FILE_ERR_NOTFOUND);
+       return FILE_ERR_NOTFOUND;
+    }
+
+    DEBUG_F( "%s was successfully opened\n" );
+
+    if ( reiserfs_read_super() != 1 )
+    {
+       DEBUG_F( "Couldn't open ReiserFS @ %s/%Lu\n", buffer, INFO->partition_offset );
+       prom_close( file->of_device );
+        DEBUG_LEAVE(FILE_ERR_NOTFOUND);
+       return FILE_ERR_NOTFOUND;
+    }
+
+    DEBUG_F( "Attempting to open %s\n", file_name );
+    strcpy(buffer, file_name); /* reiserfs_open_file modifies argument */
+    if( reiserfs_open_file(buffer) == 0 )
+    {
+        DEBUG_F( "reiserfs_open_file failed. errnum = %d\n", errnum );
+        prom_close( file->of_device );
+        DEBUG_LEAVE(FILE_ERR_NOTFOUND);
+        return FILE_ERR_NOTFOUND;
+    }
+
+    DEBUG_F( "Successfully opened %s\n", file_name );
+
+    DEBUG_LEAVE(FILE_ERR_OK);
+    return FILE_ERR_OK;
+}
+
+static int
+reiserfs_read( struct boot_file_t *file, unsigned int size, void *buffer )
+{
+    return reiserfs_read_data( buffer, size );
+}
+
+static int
+reiserfs_seek( struct boot_file_t *file, unsigned int newpos )
+{
+    file->pos = newpos;
+    return FILE_ERR_OK;
+}
+
+static int
+reiserfs_close( struct boot_file_t *file )
+{
+    if( file->of_device )
+    {
+        prom_close(file->of_device);
+        file->of_device = 0;
+    }
+    return FILE_ERR_OK;
+}
+
+
+static __inline__ __u32
+log2( __u32 word )
+{
+    int i = 0;
+    while( word && (word & (1 << ++i)) == 0 );
+    return i;
+}
+
+static __inline__ int
+is_power_of_two( unsigned long word )
+{
+    return ( word & -word ) == word;
+}
+
+static int
+read_disk_block( struct boot_file_t *file, __u32 block, __u32 start,
+                 __u32 length, void *buf )
+{
+    __u16 fs_blocksize = INFO->blocksize == 0 ? REISERFS_OLD_BLOCKSIZE
+                                              : INFO->blocksize;
+    unsigned long long pos = block * fs_blocksize;
+    pos += INFO->partition_offset + start;
+    DEBUG_F( "Reading %lu bytes, starting at block %lu, disk offset %Lu\n",
+             length, block, pos );
+    prom_lseek( file->of_device, pos );
+    return prom_read( file->of_device, buf, length );
+}
+
+
+static int
+journal_read( __u32 block, __u32 len, char *buffer )
+{
+    return read_disk_block( INFO->file,
+                      (INFO->journal_block + block), 0,
+                       len, buffer );
+}
+
+/* Read a block from ReiserFS file system, taking the journal into
+ * account.  If the block nr is in the journal, the block from the
+ * journal taken.  
+ */
+static int
+block_read( __u32 blockNr, __u32 start, __u32 len, char *buffer )
+{
+    __u32 transactions = INFO->journal_transactions;
+    __u32 desc_block = INFO->journal_first_desc;
+    __u32 journal_mask = INFO->journal_block_count - 1;
+    __u32 translatedNr = blockNr;
+    __u32 *journal_table = JOURNAL_START;
+
+//    DEBUG_F( "block_read( %u, %u, %u, ..)\n", blockNr, start, len );
+
+    while ( transactions-- > 0 )
+    {
+       int i = 0;
+       int j_len;
+
+       if ( *journal_table != 0xffffffff )
+       {
+           /* Search for the blockNr in cached journal */
+           j_len = le32_to_cpu(*journal_table++);
+           while ( i++ < j_len )
+           {
+               if ( le32_to_cpu(*journal_table++) == blockNr )
+               {
+                   journal_table += j_len - i;
+                   goto found;
+               }
+           }
+       }
+       else
+       {
+           /* This is the end of cached journal marker.  The remaining
+            * transactions are still on disk. */
+           struct reiserfs_journal_desc desc;
+           struct reiserfs_journal_commit commit;
+
+           if ( !journal_read( desc_block, sizeof(desc), (char *) &desc ) )
+               return 0;
+
+           j_len = le32_to_cpu(desc.j_len);
+           while ( i < j_len && i < JOURNAL_TRANS_HALF )
+               if ( le32_to_cpu(desc.j_realblock[i++]) == blockNr )
+                   goto found;
+
+           if ( j_len >= JOURNAL_TRANS_HALF )
+           {
+               int commit_block = ( desc_block + 1 + j_len ) & journal_mask;
+
+               if ( !journal_read( commit_block,
+                       sizeof(commit), (char *) &commit ) )
+                   return 0;
+
+               while ( i < j_len )
+                   if ( le32_to_cpu(commit.j_realblock[i++ - JOURNAL_TRANS_HALF]) == blockNr )
+                       goto found;
+           }
+       }
+       goto not_found;
+
+      found:
+       translatedNr =
+           INFO->journal_block + ( ( desc_block + i ) & journal_mask );
+
+       DEBUG_F( "block_read: block %u is mapped to journal block %u.\n",
+           blockNr, translatedNr - INFO->journal_block );
+
+       /* We must continue the search, as this block may be overwritten in 
+        * later transactions. */
+      not_found:
+       desc_block = (desc_block + 2 + j_len) & journal_mask;
+    }
+
+    return read_disk_block( INFO->file, translatedNr, start, len, buffer );
+}
+
+/* Init the journal data structure.  We try to cache as much as
+ * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
+ * we can still read the rest from the disk on demand.
+ *
+ * The first number of valid transactions and the descriptor block of the
+ * first valid transaction are held in INFO.  The transactions are all 
+ * adjacent, but we must take care of the journal wrap around. 
+ */
+static int
+journal_init( void )
+{
+    struct reiserfs_journal_header header;
+    struct reiserfs_journal_desc desc;
+    struct reiserfs_journal_commit commit;
+    __u32 block_count = INFO->journal_block_count;
+    __u32 desc_block;
+    __u32 commit_block;
+    __u32 next_trans_id;
+    __u32 *journal_table = JOURNAL_START;
+
+    journal_read( block_count, sizeof ( header ), ( char * ) &header );
+    desc_block = le32_to_cpu(header.j_first_unflushed_offset);
+    if ( desc_block >= block_count )
+       return 0;
+
+    INFO->journal_transactions = 0;
+    INFO->journal_first_desc = desc_block;
+    next_trans_id = le32_to_cpu(header.j_last_flush_trans_id) + 1;
+
+    DEBUG_F( "journal_init: last flushed %u\n", le32_to_cpu(header.j_last_flush_trans_id) );
+
+    while ( 1 )
+    {
+       journal_read( desc_block, sizeof(desc), (char *) &desc );
+       if ( strcmp( JOURNAL_DESC_MAGIC, desc.j_magic ) != 0
+           || desc.j_trans_id != next_trans_id
+           || desc.j_mount_id != header.j_mount_id )
+           /* no more valid transactions */
+           break;
+
+       commit_block = ( desc_block + le32_to_cpu(desc.j_len) + 1 ) & ( block_count - 1 );
+       journal_read( commit_block, sizeof(commit), (char *) &commit );
+       if ( desc.j_trans_id != commit.j_trans_id
+           || desc.j_len != commit.j_len )
+           /* no more valid transactions */
+           break;
+
+
+       DEBUG_F( "Found valid transaction %u/%u at %u.\n",
+           le32_to_cpu(desc.j_trans_id), le32_to_cpu(desc.j_mount_id),
+            desc_block );
+
+
+       next_trans_id++;
+       if ( journal_table < JOURNAL_END )
+       {
+           if ( ( journal_table + 1 + le32_to_cpu(desc.j_len) ) >= JOURNAL_END )
+           {
+               /* The table is almost full; mark the end of the cached * *
+                * journal. */
+               *journal_table = 0xffffffff;
+               journal_table = JOURNAL_END;
+           }
+           else
+           {
+               int i;
+
+               /* Cache the length and the realblock numbers in the table. * 
+                * The block number of descriptor can easily be computed. *
+                * and need not to be stored here. */
+               *journal_table++ = desc.j_len;
+               for ( i = 0; i < le32_to_cpu(desc.j_len) && i < JOURNAL_TRANS_HALF; i++ )
+               {
+                   *journal_table++ = desc.j_realblock[i];
+
+                   DEBUG_F( "block %u is in journal %u.\n",
+                       le32_to_cpu(desc.j_realblock[i]), desc_block );
+
+               }
+               for ( ; i < le32_to_cpu(desc.j_len); i++ )
+               {
+                   *journal_table++ =
+                       commit.j_realblock[i - JOURNAL_TRANS_HALF];
+
+                   DEBUG_F( "block %u is in journal %u.\n",
+                       le32_to_cpu(commit.j_realblock[i - JOURNAL_TRANS_HALF]),
+                       desc_block );
+
+               }
+           }
+       }
+       desc_block = (commit_block + 1) & (block_count - 1);
+    }
+
+    DEBUG_F( "Transaction %u/%u at %u isn't valid.\n",
+       le32_to_cpu(desc.j_trans_id), le32_to_cpu(desc.j_mount_id),
+        desc_block );
+
+
+    INFO->journal_transactions
+       = next_trans_id - le32_to_cpu(header.j_last_flush_trans_id) - 1;
+    return errnum == 0;
+}
+
+/* check filesystem types and read superblock into memory buffer */
+static int
+reiserfs_read_super( void )
+{
+    struct reiserfs_super_block super;
+    __u64 superblock = REISERFS_SUPERBLOCK_BLOCK;
+
+    read_disk_block( INFO->file, superblock, 0, sizeof(super), &super );
+
+    DEBUG_F( "Found super->magic %s\n", super.s_magic );
+
+    if( strcmp( REISER2FS_SUPER_MAGIC_STRING, super.s_magic ) != 0 &&
+        strcmp( REISERFS_SUPER_MAGIC_STRING, super.s_magic ) != 0 )
+    {
+       /* Try old super block position */
+       superblock = REISERFS_OLD_SUPERBLOCK_BLOCK;
+       read_disk_block( INFO->file, superblock, 0, sizeof (super),  &super );
+
+       if ( strcmp( REISER2FS_SUPER_MAGIC_STRING, super.s_magic ) != 0 &&
+             strcmp( REISERFS_SUPER_MAGIC_STRING, super.s_magic ) != 0 )
+       {
+           /* pre journaling super block - untested */
+           if ( strcmp( REISERFS_SUPER_MAGIC_STRING,
+                    (char *) ((__u32) &super + 20 ) ) != 0 )
+               return 0;
+
+           super.s_blocksize = cpu_to_le16(REISERFS_OLD_BLOCKSIZE);
+           super.s_journal_block = 0;
+           super.s_version = 0;
+       }
+    }
+
+    DEBUG_F( "ReiserFS superblock data:\n" );
+    DEBUG_F( "Block count: %lu\n", le32_to_cpu(super.s_block_count) )
+    DEBUG_F( "Free blocks: %lu\n", le32_to_cpu(super.s_free_blocks) );
+    DEBUG_F( "Journal block: %lu\n", le32_to_cpu(super.s_journal_block) );
+    DEBUG_F( "Journal size (in blocks): %lu\n",
+                                    le32_to_cpu(super.s_orig_journal_size) );
+    DEBUG_F( "Root block: %lu\n\n", le32_to_cpu(super.s_root_block) );
+
+
+    INFO->version = le16_to_cpu(super.s_version);
+    INFO->blocksize = le16_to_cpu(super.s_blocksize);
+    INFO->blocksize_shift = log2( INFO->blocksize );
+
+    INFO->journal_block = le32_to_cpu(super.s_journal_block);
+    INFO->journal_block_count = le32_to_cpu(super.s_orig_journal_size);
+
+    INFO->cached_slots = (FSYSREISER_CACHE_SIZE >> INFO->blocksize_shift) - 1;
+
+    /* At this point, we've found a valid superblock. If we run into problems
+     * mounting the FS, the user should probably know. */
+
+    /* A few sanity checks ... */
+    if ( INFO->version > REISERFS_MAX_SUPPORTED_VERSION )
+    {
+        prom_printf( "ReiserFS: Unsupported version field: %u\n",
+                     INFO->version );
+       return 0;
+    }
+
+    if ( INFO->blocksize < FSYSREISER_MIN_BLOCKSIZE
+       || INFO->blocksize > FSYSREISER_MAX_BLOCKSIZE )
+    {
+        prom_printf( "ReiserFS: Unsupported block size: %u\n",
+                     INFO->blocksize );
+       return 0;
+    }
+
+    /* Setup the journal.. */
+    if ( INFO->journal_block != 0 )
+    {
+       if ( !is_power_of_two( INFO->journal_block_count ) )
+        {
+            prom_printf( "ReiserFS: Unsupported journal size, "
+                         "not a power of 2: %lu\n",
+                         INFO->journal_block_count );
+           return 0;
+        }
+
+       journal_init();
+       /* Read in super block again, maybe it is in the journal */
+       block_read( superblock, 0, sizeof (struct reiserfs_super_block),
+                    (char *) &super );
+    }
+
+    /* Read in the root block */
+    if ( !block_read( le32_to_cpu(super.s_root_block), 0,
+            INFO->blocksize, ROOT ) )
+    {
+        prom_printf( "ReiserFS: Failed to read in root block\n" );
+       return 0;
+    }
+
+    /* The root node is always the "deepest", so we can
+       determine the hieght of the tree using it. */
+    INFO->tree_depth = blkh_level(BLOCKHEAD(ROOT));
+
+
+    DEBUG_F( "root read_in: block=%u, depth=%u\n",
+       le32_to_cpu(super.s_root_block), INFO->tree_depth );
+
+    if ( INFO->tree_depth >= REISERFS_MAX_TREE_HEIGHT )
+    {
+        prom_printf( "ReiserFS: Unsupported tree depth (too deep): %u\n",
+                 INFO->tree_depth );
+       return 0;
+    }
+
+    if ( INFO->tree_depth == BLKH_LEVEL_LEAF )
+    {
+       /* There is only one node in the whole filesystem, which is
+          simultanously leaf and root */
+       memcpy( LEAF, ROOT, INFO->blocksize );
+    }
+    return 1;
+}
+
+/***************** TREE ACCESSING METHODS *****************************/
+
+/* I assume you are familiar with the ReiserFS tree, if not go to
+ * http://devlinux.com/projects/reiserfs/
+ *
+ * My tree node cache is organized as following
+ *   0   ROOT node
+ *   1   LEAF node  (if the ROOT is also a LEAF it is copied here
+ *   2-n other nodes on current path from bottom to top.  
+ *       if there is not enough space in the cache, the top most are
+ *       omitted.
+ *
+ * I have only two methods to find a key in the tree:
+ *   search_stat(dir_id, objectid) searches for the stat entry (always
+ *       the first entry) of an object.
+ *   next_key() gets the next key in tree order.
+ *
+ * This means, that I can only sequential reads of files are
+ * efficient, but this really doesn't hurt for grub.  
+ */
+
+/* Read in the node at the current path and depth into the node cache.
+ * You must set INFO->blocks[depth] before.
+ */
+static char *
+read_tree_node( __u32 blockNr, __u16 depth )
+{
+    char *cache = CACHE(depth);
+    int num_cached = INFO->cached_slots;
+
+    if ( depth < num_cached )
+    {
+       /* This is the cached part of the path.
+           Check if same block is needed. */
+       if ( blockNr == INFO->blocks[depth] )
+           return cache;
+    }
+    else
+       cache = CACHE(num_cached);
+
+    DEBUG_F( "  next read_in: block=%u (depth=%u)\n", blockNr, depth );
+
+    if ( !block_read( blockNr, 0, INFO->blocksize, cache ) )
+    {
+        DEBUG_F( "block_read failed\n" );
+       return 0;
+    }
+
+    DEBUG_F( "FOUND: blk_level=%u, blk_nr_item=%u, blk_free_space=%u\n",
+            blkh_level(BLOCKHEAD(cache)),
+            blkh_nr_item(BLOCKHEAD(cache)),
+            le16_to_cpu(BLOCKHEAD(cache)->blk_free_space) );
+
+    /* Make sure it has the right node level */
+    if ( blkh_level(BLOCKHEAD(cache)) != depth )
+    {
+        DEBUG_F( "depth = %u != %u\n", blkh_level(BLOCKHEAD(cache)), depth );
+        DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
+       errnum = FILE_ERR_BAD_FSYS;
+       return 0;
+    }
+
+    INFO->blocks[depth] = blockNr;
+    return cache;
+}
+
+/* Get the next key, i.e. the key following the last retrieved key in
+ * tree order.  INFO->current_ih and 
+ * INFO->current_info are adapted accordingly.  */
+static int
+next_key( void )
+{
+    __u16 depth;
+    struct item_head *ih = INFO->current_ih + 1;
+    char *cache;
+
+
+    DEBUG_F( "next_key:\n  old ih: key %u:%u:%u:%u version:%u\n",
+       le32_to_cpu(INFO->current_ih->ih_key.k_dir_id),
+       le32_to_cpu(INFO->current_ih->ih_key.k_objectid),
+       le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_offset),
+       le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_uniqueness),
+       ih_version(INFO->current_ih) );
+
+
+    if ( ih == &ITEMHEAD[blkh_nr_item(BLOCKHEAD( LEAF ))] )
+    {
+       depth = BLKH_LEVEL_LEAF;
+       /* The last item, was the last in the leaf node. * Read in the next
+        * * block */
+       do
+       {
+           if ( depth == INFO->tree_depth )
+           {
+               /* There are no more keys at all. * Return a dummy item with
+                * * MAX_KEY */
+               ih =
+                   ( struct item_head * )
+                   &BLOCKHEAD( LEAF )->blk_right_delim_key;
+               goto found;
+           }
+           depth++;
+
+           DEBUG_F( "  depth=%u, i=%u\n", depth, INFO->next_key_nr[depth] );
+
+       }
+       while ( INFO->next_key_nr[depth] == 0 );
+
+       if ( depth == INFO->tree_depth )
+           cache = ROOT;
+       else if ( depth <= INFO->cached_slots )
+           cache = CACHE( depth );
+       else
+       {
+           cache = read_tree_node( INFO->blocks[depth], --depth );
+           if ( !cache )
+               return 0;
+       }
+
+       do
+       {
+           __u16 nr_item = blkh_nr_item(BLOCKHEAD( cache ));
+           int key_nr = INFO->next_key_nr[depth]++;
+
+
+           DEBUG_F( "  depth=%u, i=%u/%u\n", depth, key_nr, nr_item );
+
+           if ( key_nr == nr_item )
+               /* This is the last item in this block, set the next_key_nr * 
+                * to 0 */
+               INFO->next_key_nr[depth] = 0;
+
+           cache =
+               read_tree_node( dc_block_number( &(DC( cache )[key_nr])),
+               --depth );
+           if ( !cache )
+               return 0;
+       }
+       while ( depth > BLKH_LEVEL_LEAF );
+
+       ih = ITEMHEAD;
+    }
+  found:
+    INFO->current_ih = ih;
+    INFO->current_item = &LEAF[ih_location(ih)];
+
+    DEBUG_F( "  new ih: key %u:%u:%u:%u version:%u\n",
+       le32_to_cpu(INFO->current_ih->ih_key.k_dir_id),
+       le32_to_cpu(INFO->current_ih->ih_key.k_objectid),
+       le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_offset),
+       le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_uniqueness),
+       ih_version(INFO->current_ih) );
+
+    return 1;
+}
+
+/* preconditions: reiserfs_read_super already executed, therefore 
+ *   INFO block is valid
+ * returns: 0 if error (errnum is set), 
+ *   nonzero iff we were able to find the key successfully.
+ * postconditions: on a nonzero return, the current_ih and 
+ *   current_item fields describe the key that equals the
+ *   searched key.  INFO->next_key contains the next key after
+ *   the searched key.
+ * side effects: messes around with the cache.
+ */
+static int
+search_stat( __u32 dir_id, __u32 objectid )
+{
+    char *cache;
+    int depth;
+    int nr_item;
+    int i;
+    struct item_head *ih;
+
+
+    DEBUG_F( "search_stat:\n  key %u:%u:0:0\n", le32_to_cpu(dir_id),
+            le32_to_cpu(objectid) );
+
+
+    depth = INFO->tree_depth;
+    cache = ROOT;
+
+    DEBUG_F( "depth = %d\n", depth );
+    while ( depth > BLKH_LEVEL_LEAF )
+    {
+       struct key *key;
+
+       nr_item = blkh_nr_item(BLOCKHEAD( cache ));
+
+       key = KEY( cache );
+
+       for ( i = 0; i < nr_item; i++ )
+       {
+          if (le32_to_cpu(key->k_dir_id) > le32_to_cpu(dir_id)
+              || (key->k_dir_id == dir_id
+                  && (le32_to_cpu(key->k_objectid) > le32_to_cpu(objectid)
+                      || (key->k_objectid == objectid
+                          && (key->u.k_offset_v1.k_offset
+                              | key->u.k_offset_v1.k_uniqueness) > 0))))
+               break;
+           key++;
+       }
+
+
+       DEBUG_F( "  depth=%d, i=%d/%d\n", depth, i, nr_item );
+
+       INFO->next_key_nr[depth] = ( i == nr_item ) ? 0 : i + 1;
+       cache = read_tree_node( dc_block_number(&(DC(cache)[i])), --depth );
+       if ( !cache )
+           return 0;
+    }
+
+    /* cache == LEAF */
+    nr_item = blkh_nr_item(BLOCKHEAD(LEAF));
+    ih = ITEMHEAD;
+    DEBUG_F( "nr_item = %d\n", nr_item );
+    for ( i = 0; i < nr_item; i++ )
+    {
+       if ( ih->ih_key.k_dir_id == dir_id
+           && ih->ih_key.k_objectid == objectid
+           && ih->ih_key.u.k_offset_v1.k_offset == 0
+           && ih->ih_key.u.k_offset_v1.k_uniqueness == 0 )
+       {
+
+           DEBUG_F( "  depth=%d, i=%d/%d\n", depth, i, nr_item );
+
+           INFO->current_ih = ih;
+           INFO->current_item = &LEAF[ih_location(ih)];
+
+           return 1;
+       }
+
+       ih++;
+    }
+
+    DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
+    errnum = FILE_ERR_BAD_FSYS;
+    return 0;
+}
+
+static int
+reiserfs_read_data( char *buf, __u32 len )
+{
+    __u32 blocksize;
+    __u32 offset;
+    __u32 to_read;
+    char *prev_buf = buf;
+
+
+    DEBUG_F( "reiserfs_read_data: INFO->file->pos=%Lu len=%u, offset=%Lu\n",
+       INFO->file->pos, len, (__u64) IH_KEY_OFFSET(INFO->current_ih) - 1 );
+
+
+    if ( INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
+       || IH_KEY_OFFSET( INFO->current_ih ) > INFO->file->pos + 1 )
+    {
+       search_stat( INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid );
+       goto get_next_key;
+    }
+
+    while ( errnum == 0 )
+    {
+       if ( INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid )
+           break;
+
+       offset = INFO->file->pos - IH_KEY_OFFSET( INFO->current_ih ) + 1;
+       blocksize = ih_item_len(INFO->current_ih);
+
+
+       DEBUG_F( "  loop: INFO->file->pos=%Lu len=%u, offset=%u blocksize=%u\n",
+           INFO->file->pos, len, offset, blocksize );
+
+
+       if ( IH_KEY_ISTYPE( INFO->current_ih, TYPE_DIRECT )
+           && offset < blocksize )
+       {
+           to_read = blocksize - offset;
+           if ( to_read > len )
+               to_read = len;
+
+            memcpy( buf, INFO->current_item + offset, to_read );
+           goto update_buf_len;
+       }
+       else if ( IH_KEY_ISTYPE( INFO->current_ih, TYPE_INDIRECT ) )
+       {
+           blocksize = ( blocksize >> 2 ) << INFO->blocksize_shift;
+
+           while ( offset < blocksize )
+           {
+               __u32 blocknr = le32_to_cpu(((__u32 *)
+                    INFO->current_item)[offset >> INFO->blocksize_shift]);
+
+               int blk_offset = offset & (INFO->blocksize - 1);
+
+               to_read = INFO->blocksize - blk_offset;
+               if ( to_read > len )
+                   to_read = len;
+
+               /* Journal is only for meta data.
+                   Data blocks can be read directly without using block_read */
+               read_disk_block( INFO->file, blocknr, blk_offset, to_read,
+                                 buf );
+
+             update_buf_len:
+               len -= to_read;
+               buf += to_read;
+               offset += to_read;
+               INFO->file->pos += to_read;
+               if ( len == 0 )
+                   goto done;
+           }
+       }
+      get_next_key:
+       next_key();
+    }
+  done:
+    return (errnum != 0) ? 0 : buf - prev_buf;
+}
+
+
+/* preconditions: reiserfs_read_super already executed, therefore 
+ *   INFO block is valid
+ * returns: 0 if error, nonzero iff we were able to find the file successfully
+ * postconditions: on a nonzero return, INFO->fileinfo contains the info
+ *   of the file we were trying to look up, filepos is 0 and filemax is 
+ *   the size of the file.
+ */
+static int
+reiserfs_open_file( char *dirname )
+{
+    struct reiserfs_de_head *de_head;
+    char *rest, ch;
+    __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
+
+    char linkbuf[PATH_MAX];    /* buffer for following symbolic links */
+    int link_count = 0;
+    int mode;
+
+    dir_id = cpu_to_le32(REISERFS_ROOT_PARENT_OBJECTID);
+    objectid = cpu_to_le32(REISERFS_ROOT_OBJECTID);
+
+    while ( 1 )
+    {
+
+       DEBUG_F( "dirname=%s\n", dirname );
+
+       /* Search for the stat info first. */
+       if ( !search_stat( dir_id, objectid ) )
+           return 0;
+
+
+       DEBUG_F( "sd_mode=0%o sd_size=%u\n",
+           sd_mode((struct stat_data *) INFO->current_item ),
+           sd_size(INFO->current_ih, INFO->current_item ));
+
+
+        mode = sd_mode((struct stat_data *)INFO->current_item);
+
+       /* If we've got a symbolic link, then chase it. */
+       if ( S_ISLNK( mode ) )
+       {
+           int len = 0;
+
+           if ( ++link_count > MAX_LINK_COUNT )
+           {
+               errnum = FILE_ERR_SYMLINK_LOOP;
+               return 0;
+           }
+
+           /* Get the symlink size. */
+            INFO->file->len = sd_size(INFO->current_ih, INFO->current_item);
+
+           /* Find out how long our remaining name is. */
+           while ( dirname[len] && !isspace( dirname[len] ) )
+               len++;
+
+           if ( INFO->file->len + len > sizeof ( linkbuf ) - 1 )
+           {
+               errnum = FILE_ERR_LENGTH;
+               return 0;
+           }
+
+           /* Copy the remaining name to the end of the symlink data. Note * 
+            * that DIRNAME and LINKBUF may overlap! */
+           memmove( linkbuf + INFO->file->len, dirname, len + 1 );
+
+           INFO->fileinfo.k_dir_id = dir_id;
+           INFO->fileinfo.k_objectid = objectid;
+           INFO->file->pos = 0;
+           if ( !next_key()
+               || reiserfs_read_data( linkbuf, INFO->file->len ) != INFO->file->len )
+               return 0;
+
+
+           DEBUG_F( "symlink=%s\n", linkbuf );
+
+
+           dirname = linkbuf;
+           if ( *dirname == '/' )
+           {
+               /* It's an absolute link, so look it up in root. */
+               dir_id = cpu_to_le32(REISERFS_ROOT_PARENT_OBJECTID);
+               objectid = cpu_to_le32(REISERFS_ROOT_OBJECTID);
+           }
+           else
+           {
+               /* Relative, so look it up in our parent directory. */
+               dir_id = parent_dir_id;
+               objectid = parent_objectid;
+           }
+
+           /* Now lookup the new name. */
+           continue;
+       }
+
+       /* if we have a real file (and we're not just printing *
+        * possibilities), then this is where we want to exit */
+
+       if ( !*dirname || isspace( *dirname ) )
+       {
+           if ( !S_ISREG( mode ) )
+           {
+               errnum = FILE_ERR_BAD_TYPE;
+               return 0;
+           }
+
+           INFO->file->pos = 0;
+            INFO->file->len = sd_size(INFO->current_ih, INFO->current_item);
+
+           INFO->fileinfo.k_dir_id = dir_id;
+           INFO->fileinfo.k_objectid = objectid;
+           return next_key();
+       }
+
+       /* continue with the file/directory name interpretation */
+       while ( *dirname == '/' )
+           dirname++;
+       if ( !S_ISDIR( mode ) )
+       {
+           errnum = FILE_ERR_BAD_TYPE;
+           return 0;
+       }
+       for ( rest = dirname; ( ch = *rest ) && !isspace( ch ) && ch != '/';
+           rest++ ) ;
+       *rest = 0;
+
+       while ( 1 )
+       {
+           char *name_end;
+           int num_entries;
+
+           if ( !next_key() )
+               return 0;
+
+           if ( INFO->current_ih->ih_key.k_objectid != objectid )
+               break;
+
+           name_end = INFO->current_item + ih_item_len(INFO->current_ih);
+           de_head = ( struct reiserfs_de_head * ) INFO->current_item;
+           num_entries = ih_entry_count(INFO->current_ih);
+           while ( num_entries > 0 )
+           {
+               char *filename = INFO->current_item + deh_location(de_head);
+               char tmp = *name_end;
+
+                if( deh_state(de_head) & (1 << DEH_Visible))
+               {
+                   int cmp;
+
+                   /* Directory names in ReiserFS are not null * terminated. 
+                    * We write a temporary 0 behind it. * NOTE: that this
+                    * may overwrite the first block in * the tree cache.
+                    * That doesn't hurt as long as we * don't call next_key
+                    * () in between. */
+                   *name_end = 0;
+                   cmp = strcmp( dirname, filename );
+                   *name_end = tmp;
+                   if ( cmp == 0 )
+                       goto found;
+               }
+               /* The beginning of this name marks the end of the next name. 
+                */
+               name_end = filename;
+               de_head++;
+               num_entries--;
+           }
+       }
+
+       errnum = FILE_ERR_NOTFOUND;
+       *rest = ch;
+       return 0;
+
+      found:
+       *rest = ch;
+       dirname = rest;
+
+       parent_dir_id = dir_id;
+       parent_objectid = objectid;
+       dir_id = de_head->deh_dir_id; /* LE */
+       objectid = de_head->deh_objectid; /* LE */
+    }
+}
+
+
+
+#ifndef __LITTLE_ENDIAN
+typedef union {
+    struct offset_v2 offset_v2;
+    __u64 linear;
+} offset_v2_esafe_overlay;
+
+inline __u16
+offset_v2_k_type( struct offset_v2 *v2 )
+{
+    offset_v2_esafe_overlay tmp = *(offset_v2_esafe_overlay *)v2;
+    tmp.linear = le64_to_cpu( tmp.linear );
+    return tmp.offset_v2.k_type;
+}
+ 
+inline loff_t
+offset_v2_k_offset( struct offset_v2 *v2 )
+{
+    offset_v2_esafe_overlay tmp = *(offset_v2_esafe_overlay *)v2;
+    tmp.linear = le64_to_cpu( tmp.linear );
+    return tmp.offset_v2.k_offset;
+}
+#endif
+
+inline int
+uniqueness2type (__u32 uniqueness)
+{
+    switch (uniqueness) {
+    case V1_SD_UNIQUENESS: return TYPE_STAT_DATA;
+    case V1_INDIRECT_UNIQUENESS: return TYPE_INDIRECT;
+    case V1_DIRECT_UNIQUENESS: return TYPE_DIRECT;
+    case V1_DIRENTRY_UNIQUENESS: return TYPE_DIRENTRY;
+    }
+    return TYPE_ANY;
+}