second/fs_reiserfs.c

   1 /*
   2  *  fs_reiserfs.c - an implementation for the Reiser filesystem
   3  *
   4  *  Copyright (C) 2001 Jeffrey Mahoney (jeffm@suse.com)
   5  *
   6  *  Adapted from Grub
   7  *
   8  *  Copyright (C) 2000, 2001 Free Software Foundation, Inc.
   9  *
  10  *  This program is free software; you can redistribute it and/or modify
  11  *  it under the terms of the GNU General Public License as published by
  12  *  the Free Software Foundation; either version 2 of the License, or
  13  *  (at your option) any later version.
  14  *
  15  *  This program is distributed in the hope that it will be useful,
  16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  *  GNU General Public License for more details.
  19  *
  20  *  You should have received a copy of the GNU General Public License
  21  *  along with this program; if not, write to the Free Software
  22  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  23  */
  24
  25 #include "types.h"
  26 #include "ctype.h"
  27 #include "string.h"
  28 #include "stdlib.h"
  29 #include "fs.h"
  30 #include "errors.h"
  31 #include "debug.h"
  32 #include "reiserfs/reiserfs.h"
  33
  34 /* Exported in struct fs_t */
  35 static int reiserfs_open( struct boot_file_t *file, const char *dev_name,
  36                           struct partition_t *part, const char *file_name );
  37 static int reiserfs_read( struct boot_file_t *file, unsigned int size,
  38
  39                           void *buffer );
  40 static int reiserfs_seek( struct boot_file_t *file, unsigned int newpos );
  41 static int reiserfs_close( struct boot_file_t *file );
  42
  43 struct fs_t reiserfs_filesystem = {
  44      name:"reiserfs",
  45      open:reiserfs_open,
  46      read:reiserfs_read,
  47      seek:reiserfs_seek,
  48      close:reiserfs_close
  49 };
  50
  51 static int reiserfs_read_super( void );
  52 static int reiserfs_open_file( char *dirname );
  53 static int reiserfs_read_data( char *buf, __u32 len );
  54
  55
  56 static struct reiserfs_state reiserfs;
  57 static struct reiserfs_state *INFO = &reiserfs;
  58
  59 /* Adapted from GRUB: */
  60 static char FSYS_BUF[FSYSREISER_CACHE_SIZE];
  61 int errnum;
  62
  63
  64 static int
  65 reiserfs_open( struct boot_file_t *file, const char *dev_name,
  66                struct partition_t *part, const char *file_name )
  67 {
  68      static char buffer[1024];
  69
  70      DEBUG_ENTER;
  71      DEBUG_OPEN;
  72
  73      memset( INFO, 0, sizeof(struct reiserfs_state) );
  74      INFO->file = file;
  75
  76      if (part)
  77      {
  78           DEBUG_F( "Determining offset for partition %d\n", part->part_number );
  79           INFO->partition_offset = ((uint64_t)part->part_start) * part->blocksize;
  80           DEBUG_F( "%Lu = %lu * %hu\n", INFO->partition_offset,
  81                    part->part_start,
  82                    part->blocksize );
  83      }
  84      else
  85           INFO->partition_offset = 0;
  86
  87      sprintf( buffer, "%s:%d", dev_name, 0 ); /* 0 is full disk in OF */
  88      file->of_device = prom_open( buffer );
  89      DEBUG_F( "Trying to open dev_name=%s; filename=%s; partition offset=%Lu\n",
  90               buffer, file_name, INFO->partition_offset );
  91
  92      if ( file->of_device == PROM_INVALID_HANDLE || file->of_device == NULL )
  93      {
  94           DEBUG_F( "Can't open device %p\n", file->of_device );
  95           DEBUG_LEAVE(FILE_ERR_BADDEV);
  96           return FILE_ERR_BADDEV;
  97      }
  98
  99      DEBUG_F("%p was successfully opened\n", file->of_device);
 100
 101      if ( reiserfs_read_super() != 1 )
 102      {
 103           DEBUG_F( "Couldn't open ReiserFS @ %s/%Lu\n", buffer, INFO->partition_offset );
 104           prom_close( file->of_device );
 105           DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
 106           return FILE_ERR_BAD_FSYS;
 107      }
 108
 109      DEBUG_F( "Attempting to open %s\n", file_name );
 110      strcpy(buffer, file_name); /* reiserfs_open_file modifies argument */
 111      if (reiserfs_open_file(buffer) == 0)
 112      {
 113           DEBUG_F( "reiserfs_open_file failed. errnum = %d\n", errnum );
 114           prom_close( file->of_device );
 115           DEBUG_LEAVE_F(errnum);
 116           return errnum;
 117      }
 118
 119      DEBUG_F( "Successfully opened %s\n", file_name );
 120
 121      DEBUG_LEAVE(FILE_ERR_OK);
 122      DEBUG_SLEEP;
 123      return FILE_ERR_OK;
 124 }
 125
 126 static int
 127 reiserfs_read( struct boot_file_t *file, unsigned int size, void *buffer )
 128 {
 129      return reiserfs_read_data( buffer, size );
 130 }
 131
 132 static int
 133 reiserfs_seek( struct boot_file_t *file, unsigned int newpos )
 134 {
 135      file->pos = newpos;
 136      return FILE_ERR_OK;
 137 }
 138
 139 static int
 140 reiserfs_close( struct boot_file_t *file )
 141 {
 142      if( file->of_device )
 143      {
 144           prom_close(file->of_device);
 145           file->of_device = 0;
 146           DEBUG_F("reiserfs_close called\n");
 147      }
 148      return FILE_ERR_OK;
 149 }
 150
 151
 152 static __inline__ __u32
 153 log2( __u32 word )
 154 {
 155      int i = 0;
 156      while( word && (word & (1 << ++i)) == 0 );
 157      return i;
 158 }
 159
 160 static __inline__ int
 161 is_power_of_two( unsigned long word )
 162 {
 163      return ( word & -word ) == word;
 164 }
 165
 166 static int
 167 read_disk_block( struct boot_file_t *file, __u32 block, __u32 start,
 168                  __u32 length, void *buf )
 169 {
 170      __u16 fs_blocksize = INFO->blocksize == 0 ? REISERFS_OLD_BLOCKSIZE
 171           : INFO->blocksize;
 172      unsigned long long pos = (unsigned long long)block * (unsigned long long)fs_blocksize;
 173      pos += (unsigned long long)INFO->partition_offset + (unsigned long long)start;
 174      DEBUG_F( "Reading %u bytes, starting at block %u, disk offset %Lu\n",
 175               length, block, pos );
 176      if (!prom_lseek( file->of_device, pos )) {
 177           DEBUG_F("prom_lseek failed\n");
 178           return 0;
 179      }
 180      return prom_read( file->of_device, buf, length );
 181 }
 182
 183
 184 static int
 185 journal_read( __u32 block, __u32 len, char *buffer )
 186 {
 187      return read_disk_block( INFO->file,
 188                              (INFO->journal_block + block), 0,
 189                              len, buffer );
 190 }
 191
 192 /* Read a block from ReiserFS file system, taking the journal into
 193  * account.  If the block nr is in the journal, the block from the
 194  * journal taken.
 195  */
 196 static int
 197 block_read( __u32 blockNr, __u32 start, __u32 len, char *buffer )
 198 {
 199      __u32 transactions = INFO->journal_transactions;
 200      __u32 desc_block = INFO->journal_first_desc;
 201      __u32 journal_mask = INFO->journal_block_count - 1;
 202      __u32 translatedNr = blockNr;
 203      __u32 *journal_table = JOURNAL_START;
 204
 205 //    DEBUG_F( "block_read( %u, %u, %u, ..)\n", blockNr, start, len );
 206
 207      while ( transactions-- > 0 )
 208      {
 209           int i = 0;
 210           int j_len;
 211
 212           if ( *journal_table != 0xffffffff )
 213           {
 214                /* Search for the blockNr in cached journal */
 215                j_len = le32_to_cpu(*journal_table++);
 216                while ( i++ < j_len )
 217                {
 218                     if ( le32_to_cpu(*journal_table++) == blockNr )
 219                     {
 220                          journal_table += j_len - i;
 221                          goto found;
 222                     }
 223                }
 224           }
 225           else
 226           {
 227                /* This is the end of cached journal marker.  The remaining
 228                 * transactions are still on disk. */
 229                struct reiserfs_journal_desc desc;
 230                struct reiserfs_journal_commit commit;
 231
 232                if ( !journal_read( desc_block, sizeof(desc), (char *) &desc ) )
 233                     return 0;
 234
 235                j_len = le32_to_cpu(desc.j_len);
 236                while ( i < j_len && i < JOURNAL_TRANS_HALF )
 237                     if ( le32_to_cpu(desc.j_realblock[i++]) == blockNr )
 238                          goto found;
 239
 240                if ( j_len >= JOURNAL_TRANS_HALF )
 241                {
 242                     int commit_block = ( desc_block + 1 + j_len ) & journal_mask;
 243
 244                     if ( !journal_read( commit_block,
 245                                         sizeof(commit), (char *) &commit ) )
 246                          return 0;
 247
 248                     while ( i < j_len )
 249                          if ( le32_to_cpu(commit.j_realblock[i++ - JOURNAL_TRANS_HALF]) == blockNr )
 250                               goto found;
 251                }
 252           }
 253           goto not_found;
 254
 255      found:
 256           translatedNr =
 257                INFO->journal_block + ( ( desc_block + i ) & journal_mask );
 258
 259           DEBUG_F( "block_read: block %u is mapped to journal block %u.\n",
 260                    blockNr, translatedNr - INFO->journal_block );
 261
 262           /* We must continue the search, as this block may be overwritten in
 263            * later transactions. */
 264      not_found:
 265           desc_block = (desc_block + 2 + j_len) & journal_mask;
 266      }
 267
 268      return read_disk_block( INFO->file, translatedNr, start, len, buffer );
 269 }
 270
 271 /* Init the journal data structure.  We try to cache as much as
 272  * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
 273  * we can still read the rest from the disk on demand.
 274  *
 275  * The first number of valid transactions and the descriptor block of the
 276  * first valid transaction are held in INFO.  The transactions are all
 277  * adjacent, but we must take care of the journal wrap around.
 278  */
 279 static int
 280 journal_init( void )
 281 {
 282      struct reiserfs_journal_header header;
 283      struct reiserfs_journal_desc desc;
 284      struct reiserfs_journal_commit commit;
 285      __u32 block_count = INFO->journal_block_count;
 286      __u32 desc_block;
 287      __u32 commit_block;
 288      __u32 next_trans_id;
 289      __u32 *journal_table = JOURNAL_START;
 290
 291      journal_read( block_count, sizeof ( header ), ( char * ) &header );
 292      desc_block = le32_to_cpu(header.j_first_unflushed_offset);
 293      if ( desc_block >= block_count )
 294           return 0;
 295
 296      INFO->journal_transactions = 0;
 297      INFO->journal_first_desc = desc_block;
 298      next_trans_id = le32_to_cpu(header.j_last_flush_trans_id) + 1;
 299
 300      DEBUG_F( "journal_init: last flushed %u\n", le32_to_cpu(header.j_last_flush_trans_id) );
 301
 302      while ( 1 )
 303      {
 304           journal_read( desc_block, sizeof(desc), (char *) &desc );
 305           if ( strcmp( JOURNAL_DESC_MAGIC, desc.j_magic ) != 0
 306                || desc.j_trans_id != next_trans_id
 307                || desc.j_mount_id != header.j_mount_id )
 308                /* no more valid transactions */
 309                break;
 310
 311           commit_block = ( desc_block + le32_to_cpu(desc.j_len) + 1 ) & ( block_count - 1 );
 312           journal_read( commit_block, sizeof(commit), (char *) &commit );
 313           if ( desc.j_trans_id != commit.j_trans_id
 314                || desc.j_len != commit.j_len )
 315                /* no more valid transactions */
 316                break;
 317
 318
 319           DEBUG_F( "Found valid transaction %u/%u at %u.\n",
 320                    le32_to_cpu(desc.j_trans_id), le32_to_cpu(desc.j_mount_id),
 321                    desc_block );
 322
 323
 324           next_trans_id++;
 325           if ( journal_table < JOURNAL_END )
 326           {
 327                if ( ( journal_table + 1 + le32_to_cpu(desc.j_len) ) >= JOURNAL_END )
 328                {
 329                     /* The table is almost full; mark the end of the cached * *
 330                      * journal. */
 331                     *journal_table = 0xffffffff;
 332                     journal_table = JOURNAL_END;
 333                }
 334                else
 335                {
 336                     int i;
 337
 338                     /* Cache the length and the realblock numbers in the table. *
 339                      * The block number of descriptor can easily be computed. *
 340                      * and need not to be stored here. */
 341                     *journal_table++ = desc.j_len;
 342                     for ( i = 0; i < le32_to_cpu(desc.j_len) && i < JOURNAL_TRANS_HALF; i++ )
 343                     {
 344                          *journal_table++ = desc.j_realblock[i];
 345
 346                          DEBUG_F( "block %u is in journal %u.\n",
 347                                   le32_to_cpu(desc.j_realblock[i]), desc_block );
 348
 349                     }
 350                     for ( ; i < le32_to_cpu(desc.j_len); i++ )
 351                     {
 352                          *journal_table++ =
 353                               commit.j_realblock[i - JOURNAL_TRANS_HALF];
 354
 355                          DEBUG_F( "block %u is in journal %u.\n",
 356                                   le32_to_cpu(commit.j_realblock[i - JOURNAL_TRANS_HALF]),
 357                                   desc_block );
 358
 359                     }
 360                }
 361           }
 362           desc_block = (commit_block + 1) & (block_count - 1);
 363      }
 364
 365      DEBUG_F( "Transaction %u/%u at %u isn't valid.\n",
 366               le32_to_cpu(desc.j_trans_id), le32_to_cpu(desc.j_mount_id),
 367               desc_block );
 368
 369
 370      INFO->journal_transactions
 371           = next_trans_id - le32_to_cpu(header.j_last_flush_trans_id) - 1;
 372      return (errnum == 0);
 373 }
 374
 375 /* check filesystem types and read superblock into memory buffer */
 376 static int
 377 reiserfs_read_super( void )
 378 {
 379      struct reiserfs_super_block super;
 380      __u64 superblock = REISERFS_SUPERBLOCK_BLOCK;
 381
 382      if (read_disk_block(INFO->file, superblock, 0, sizeof(super), &super) != sizeof(super)) {
 383           DEBUG_F("read_disk_block failed!\n");
 384           return 0;
 385      }
 386
 387      DEBUG_F( "Found super->magic: \"%s\"\n", super.s_magic );
 388
 389      if( strcmp( REISER2FS_SUPER_MAGIC_STRING, super.s_magic ) != 0 &&
 390          strcmp( REISERFS_SUPER_MAGIC_STRING, super.s_magic ) != 0 )
 391      {
 392           /* Try old super block position */
 393           superblock = REISERFS_OLD_SUPERBLOCK_BLOCK;
 394
 395           if (read_disk_block( INFO->file, superblock, 0, sizeof (super),  &super ) != sizeof(super)) {
 396                DEBUG_F("read_disk_block failed!\n");
 397                return 0;
 398           }
 399
 400           if ( strcmp( REISER2FS_SUPER_MAGIC_STRING, super.s_magic ) != 0 &&
 401                strcmp( REISERFS_SUPER_MAGIC_STRING, super.s_magic ) != 0 )
 402           {
 403                /* pre journaling super block - untested */
 404                if ( strcmp( REISERFS_SUPER_MAGIC_STRING,
 405                             (char *) ((__u32) &super + 20 ) ) != 0 )
 406                     return 0;
 407
 408                super.s_blocksize = cpu_to_le16(REISERFS_OLD_BLOCKSIZE);
 409                super.s_journal_block = 0;
 410                super.s_version = 0;
 411           }
 412      }
 413
 414      DEBUG_F( "ReiserFS superblock data:\n" );
 415      DEBUG_F( "Block count: %u\n", le32_to_cpu(super.s_block_count) )
 416           DEBUG_F( "Free blocks: %u\n", le32_to_cpu(super.s_free_blocks) );
 417      DEBUG_F( "Journal block: %u\n", le32_to_cpu(super.s_journal_block) );
 418      DEBUG_F( "Journal size (in blocks): %u\n",
 419               le32_to_cpu(super.s_orig_journal_size) );
 420      DEBUG_F( "Root block: %u\n\n", le32_to_cpu(super.s_root_block) );
 421
 422
 423      INFO->version = le16_to_cpu(super.s_version);
 424      INFO->blocksize = le16_to_cpu(super.s_blocksize);
 425      INFO->blocksize_shift = log2( INFO->blocksize );
 426
 427      INFO->journal_block = le32_to_cpu(super.s_journal_block);
 428      INFO->journal_block_count = le32_to_cpu(super.s_orig_journal_size);
 429
 430      INFO->cached_slots = (FSYSREISER_CACHE_SIZE >> INFO->blocksize_shift) - 1;
 431
 432      /* At this point, we've found a valid superblock. If we run into problems
 433       * mounting the FS, the user should probably know. */
 434
 435      /* A few sanity checks ... */
 436      if ( INFO->version > REISERFS_MAX_SUPPORTED_VERSION )
 437      {
 438           prom_printf( "ReiserFS: Unsupported version field: %u\n",
 439                        INFO->version );
 440           return 0;
 441      }
 442
 443      if ( INFO->blocksize < FSYSREISER_MIN_BLOCKSIZE
 444           || INFO->blocksize > FSYSREISER_MAX_BLOCKSIZE )
 445      {
 446           prom_printf( "ReiserFS: Unsupported block size: %u\n",
 447                        INFO->blocksize );
 448           return 0;
 449      }
 450
 451      /* Setup the journal.. */
 452      if ( INFO->journal_block != 0 )
 453      {
 454           if ( !is_power_of_two( INFO->journal_block_count ) )
 455           {
 456                prom_printf( "ReiserFS: Unsupported journal size, "
 457                             "not a power of 2: %u\n",
 458                             INFO->journal_block_count );
 459                return 0;
 460           }
 461
 462           journal_init();
 463           /* Read in super block again, maybe it is in the journal */
 464           block_read( superblock, 0, sizeof (struct reiserfs_super_block),
 465                       (char *) &super );
 466      }
 467
 468      /* Read in the root block */
 469      if ( !block_read( le32_to_cpu(super.s_root_block), 0,
 470                        INFO->blocksize, ROOT ) )
 471      {
 472           prom_printf( "ReiserFS: Failed to read in root block\n" );
 473           return 0;
 474      }
 475
 476      /* The root node is always the "deepest", so we can
 477         determine the hieght of the tree using it. */
 478      INFO->tree_depth = blkh_level(BLOCKHEAD(ROOT));
 479
 480
 481      DEBUG_F( "root read_in: block=%u, depth=%u\n",
 482               le32_to_cpu(super.s_root_block), INFO->tree_depth );
 483
 484      if ( INFO->tree_depth >= REISERFS_MAX_TREE_HEIGHT )
 485      {
 486           prom_printf( "ReiserFS: Unsupported tree depth (too deep): %u\n",
 487                        INFO->tree_depth );
 488           return 0;
 489      }
 490
 491      if ( INFO->tree_depth == BLKH_LEVEL_LEAF )
 492      {
 493           /* There is only one node in the whole filesystem, which is
 494              simultanously leaf and root */
 495           memcpy( LEAF, ROOT, INFO->blocksize );
 496      }
 497      return 1;
 498 }
 499
 500 /***************** TREE ACCESSING METHODS *****************************/
 501
 502 /* I assume you are familiar with the ReiserFS tree, if not go to
 503  * http://devlinux.com/projects/reiserfs/
 504  *
 505  * My tree node cache is organized as following
 506  *   0   ROOT node
 507  *   1   LEAF node  (if the ROOT is also a LEAF it is copied here
 508  *   2-n other nodes on current path from bottom to top.
 509  *       if there is not enough space in the cache, the top most are
 510  *       omitted.
 511  *
 512  * I have only two methods to find a key in the tree:
 513  *   search_stat(dir_id, objectid) searches for the stat entry (always
 514  *       the first entry) of an object.
 515  *   next_key() gets the next key in tree order.
 516  *
 517  * This means, that I can only sequential reads of files are
 518  * efficient, but this really doesn't hurt for grub.
 519  */
 520
 521 /* Read in the node at the current path and depth into the node cache.
 522  * You must set INFO->blocks[depth] before.
 523  */
 524 static char *
 525 read_tree_node( __u32 blockNr, __u16 depth )
 526 {
 527      char *cache = CACHE(depth);
 528      int num_cached = INFO->cached_slots;
 529      errnum = 0;
 530
 531      if ( depth < num_cached )
 532      {
 533           /* This is the cached part of the path.
 534              Check if same block is needed. */
 535           if ( blockNr == INFO->blocks[depth] )
 536                return cache;
 537      }
 538      else
 539           cache = CACHE(num_cached);
 540
 541      DEBUG_F( "  next read_in: block=%u (depth=%u)\n", blockNr, depth );
 542
 543      if ( !block_read( blockNr, 0, INFO->blocksize, cache ) )
 544      {
 545           DEBUG_F( "block_read failed\n" );
 546           return 0;
 547      }
 548
 549      DEBUG_F( "FOUND: blk_level=%u, blk_nr_item=%u, blk_free_space=%u\n",
 550               blkh_level(BLOCKHEAD(cache)),
 551               blkh_nr_item(BLOCKHEAD(cache)),
 552               le16_to_cpu(BLOCKHEAD(cache)->blk_free_space) );
 553
 554      /* Make sure it has the right node level */
 555      if ( blkh_level(BLOCKHEAD(cache)) != depth )
 556      {
 557           DEBUG_F( "depth = %u != %u\n", blkh_level(BLOCKHEAD(cache)), depth );
 558           DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
 559           errnum = FILE_ERR_BAD_FSYS;
 560           return 0;
 561      }
 562
 563      INFO->blocks[depth] = blockNr;
 564      return cache;
 565 }
 566
 567 /* Get the next key, i.e. the key following the last retrieved key in
 568  * tree order.  INFO->current_ih and
 569  * INFO->current_info are adapted accordingly.  */
 570 static int
 571 next_key( void )
 572 {
 573      __u16 depth;
 574      struct item_head *ih = INFO->current_ih + 1;
 575      char *cache;
 576
 577
 578      DEBUG_F( "next_key:\n  old ih: key %u:%u:%u:%u version:%u\n",
 579               le32_to_cpu(INFO->current_ih->ih_key.k_dir_id),
 580               le32_to_cpu(INFO->current_ih->ih_key.k_objectid),
 581               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_offset),
 582               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_uniqueness),
 583               ih_version(INFO->current_ih) );
 584
 585
 586      if ( ih == &ITEMHEAD[blkh_nr_item(BLOCKHEAD( LEAF ))] )
 587      {
 588           depth = BLKH_LEVEL_LEAF;
 589           /* The last item, was the last in the leaf node. * Read in the next
 590            * * block */
 591           do
 592           {
 593                if ( depth == INFO->tree_depth )
 594                {
 595                     /* There are no more keys at all. * Return a dummy item with
 596                      * * MAX_KEY */
 597                     ih =
 598                          ( struct item_head * )
 599                          &BLOCKHEAD( LEAF )->blk_right_delim_key;
 600                     goto found;
 601                }
 602                depth++;
 603
 604                DEBUG_F( "  depth=%u, i=%u\n", depth, INFO->next_key_nr[depth] );
 605
 606           }
 607           while ( INFO->next_key_nr[depth] == 0 );
 608
 609           if ( depth == INFO->tree_depth )
 610                cache = ROOT;
 611           else if ( depth <= INFO->cached_slots )
 612                cache = CACHE( depth );
 613           else
 614           {
 615                cache = read_tree_node( INFO->blocks[depth], --depth );
 616                if ( !cache )
 617                     return 0;
 618           }
 619
 620           do
 621           {
 622                __u16 nr_item = blkh_nr_item(BLOCKHEAD( cache ));
 623                int key_nr = INFO->next_key_nr[depth]++;
 624
 625
 626                DEBUG_F( "  depth=%u, i=%u/%u\n", depth, key_nr, nr_item );
 627
 628                if ( key_nr == nr_item )
 629                     /* This is the last item in this block, set the next_key_nr *
 630                      * to 0 */
 631                     INFO->next_key_nr[depth] = 0;
 632
 633                cache =
 634                     read_tree_node( dc_block_number( &(DC( cache )[key_nr])),
 635                                     --depth );
 636                if ( !cache )
 637                     return 0;
 638           }
 639           while ( depth > BLKH_LEVEL_LEAF );
 640
 641           ih = ITEMHEAD;
 642      }
 643 found:
 644      INFO->current_ih = ih;
 645      INFO->current_item = &LEAF[ih_location(ih)];
 646
 647      DEBUG_F( "  new ih: key %u:%u:%u:%u version:%u\n",
 648               le32_to_cpu(INFO->current_ih->ih_key.k_dir_id),
 649               le32_to_cpu(INFO->current_ih->ih_key.k_objectid),
 650               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_offset),
 651               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_uniqueness),
 652               ih_version(INFO->current_ih) );
 653
 654      return 1;
 655 }
 656
 657 /* preconditions: reiserfs_read_super already executed, therefore
 658  *   INFO block is valid
 659  * returns: 0 if error (errnum is set),
 660  *   nonzero iff we were able to find the key successfully.
 661  * postconditions: on a nonzero return, the current_ih and
 662  *   current_item fields describe the key that equals the
 663  *   searched key.  INFO->next_key contains the next key after
 664  *   the searched key.
 665  * side effects: messes around with the cache.
 666  */
 667 static int
 668 search_stat( __u32 dir_id, __u32 objectid )
 669 {
 670      char *cache;
 671      int depth;
 672      int nr_item;
 673      int i;
 674      struct item_head *ih;
 675      errnum = 0;
 676
 677      DEBUG_F( "search_stat:\n  key %u:%u:0:0\n", le32_to_cpu(dir_id),
 678               le32_to_cpu(objectid) );
 679
 680
 681      depth = INFO->tree_depth;
 682      cache = ROOT;
 683
 684      DEBUG_F( "depth = %d\n", depth );
 685      while ( depth > BLKH_LEVEL_LEAF )
 686      {
 687           struct key *key;
 688
 689           nr_item = blkh_nr_item(BLOCKHEAD( cache ));
 690
 691           key = KEY( cache );
 692
 693           for ( i = 0; i < nr_item; i++ )
 694           {
 695                if (le32_to_cpu(key->k_dir_id) > le32_to_cpu(dir_id)
 696                    || (key->k_dir_id == dir_id
 697                        && (le32_to_cpu(key->k_objectid) > le32_to_cpu(objectid)
 698                            || (key->k_objectid == objectid
 699                                && (key->u.k_offset_v1.k_offset
 700                                    | key->u.k_offset_v1.k_uniqueness) > 0))))
 701                     break;
 702                key++;
 703           }
 704
 705
 706           DEBUG_F( "  depth=%d, i=%d/%d\n", depth, i, nr_item );
 707
 708           INFO->next_key_nr[depth] = ( i == nr_item ) ? 0 : i + 1;
 709           cache = read_tree_node( dc_block_number(&(DC(cache)[i])), --depth );
 710           if ( !cache )
 711                return 0;
 712      }
 713
 714      /* cache == LEAF */
 715      nr_item = blkh_nr_item(BLOCKHEAD(LEAF));
 716      ih = ITEMHEAD;
 717      DEBUG_F( "nr_item = %d\n", nr_item );
 718      for ( i = 0; i < nr_item; i++ )
 719      {
 720           if ( ih->ih_key.k_dir_id == dir_id
 721                && ih->ih_key.k_objectid == objectid
 722                && ih->ih_key.u.k_offset_v1.k_offset == 0
 723                && ih->ih_key.u.k_offset_v1.k_uniqueness == 0 )
 724           {
 725
 726                DEBUG_F( "  depth=%d, i=%d/%d\n", depth, i, nr_item );
 727
 728                INFO->current_ih = ih;
 729                INFO->current_item = &LEAF[ih_location(ih)];
 730
 731                return 1;
 732           }
 733
 734           ih++;
 735      }
 736
 737      DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
 738      errnum = FILE_ERR_BAD_FSYS;
 739      return 0;
 740 }
 741
 742 static int
 743 reiserfs_read_data( char *buf, __u32 len )
 744 {
 745      __u32 blocksize;
 746      __u32 offset;
 747      __u32 to_read;
 748      char *prev_buf = buf;
 749      errnum = 0;
 750
 751      DEBUG_F( "reiserfs_read_data: INFO->file->pos=%Lu len=%u, offset=%Lu\n",
 752               INFO->file->pos, len, (__u64) IH_KEY_OFFSET(INFO->current_ih) - 1 );
 753
 754
 755      if ( INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
 756           || IH_KEY_OFFSET( INFO->current_ih ) > INFO->file->pos + 1 )
 757      {
 758           search_stat( INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid );
 759           goto get_next_key;
 760      }
 761
 762      while ( errnum == 0 )
 763      {
 764           if ( INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid )
 765                break;
 766
 767           offset = INFO->file->pos - IH_KEY_OFFSET( INFO->current_ih ) + 1;
 768           blocksize = ih_item_len(INFO->current_ih);
 769
 770
 771           DEBUG_F( "  loop: INFO->file->pos=%Lu len=%u, offset=%u blocksize=%u\n",
 772                    INFO->file->pos, len, offset, blocksize );
 773
 774
 775           if ( IH_KEY_ISTYPE( INFO->current_ih, TYPE_DIRECT )
 776                && offset < blocksize )
 777           {
 778                to_read = blocksize - offset;
 779                if ( to_read > len )
 780                     to_read = len;
 781
 782                memcpy( buf, INFO->current_item + offset, to_read );
 783                goto update_buf_len;
 784           }
 785           else if ( IH_KEY_ISTYPE( INFO->current_ih, TYPE_INDIRECT ) )
 786           {
 787                blocksize = ( blocksize >> 2 ) << INFO->blocksize_shift;
 788
 789                while ( offset < blocksize )
 790                {
 791                     __u32 blocknr = le32_to_cpu(((__u32 *)
 792                                                  INFO->current_item)[offset >> INFO->blocksize_shift]);
 793
 794                     int blk_offset = offset & (INFO->blocksize - 1);
 795
 796                     to_read = INFO->blocksize - blk_offset;
 797                     if ( to_read > len )
 798                          to_read = len;
 799
 800                     /* Journal is only for meta data.
 801                        Data blocks can be read directly without using block_read */
 802                     read_disk_block( INFO->file, blocknr, blk_offset, to_read,
 803                                      buf );
 804
 805                update_buf_len:
 806                     len -= to_read;
 807                     buf += to_read;
 808                     offset += to_read;
 809                     INFO->file->pos += to_read;
 810                     if ( len == 0 )
 811                          goto done;
 812                }
 813           }
 814      get_next_key:
 815           next_key();
 816      }
 817 done:
 818      return (errnum != 0) ? 0 : buf - prev_buf;
 819 }
 820
 821
 822 /* preconditions: reiserfs_read_super already executed, therefore
 823  *   INFO block is valid
 824  * returns: 0 if error, nonzero iff we were able to find the file successfully
 825  * postconditions: on a nonzero return, INFO->fileinfo contains the info
 826  *   of the file we were trying to look up, filepos is 0 and filemax is
 827  *   the size of the file.
 828  */
 829 static int
 830 reiserfs_open_file( char *dirname )
 831 {
 832      struct reiserfs_de_head *de_head;
 833      char *rest, ch;
 834      __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
 835
 836      char linkbuf[PATH_MAX];    /* buffer for following symbolic links */
 837      int link_count = 0;
 838      int mode;
 839      errnum = 0;
 840
 841      dir_id = cpu_to_le32(REISERFS_ROOT_PARENT_OBJECTID);
 842      objectid = cpu_to_le32(REISERFS_ROOT_OBJECTID);
 843
 844      while ( 1 )
 845      {
 846
 847           DEBUG_F( "dirname=%s\n", dirname );
 848
 849           /* Search for the stat info first. */
 850           if ( !search_stat( dir_id, objectid ) )
 851                return 0;
 852
 853
 854           DEBUG_F( "sd_mode=0%o sd_size=%Lu\n",
 855                    sd_mode((struct stat_data *) INFO->current_item ),
 856                    sd_size(INFO->current_ih, INFO->current_item ));
 857
 858
 859           mode = sd_mode((struct stat_data *)INFO->current_item);
 860
 861           /* If we've got a symbolic link, then chase it. */
 862           if ( S_ISLNK( mode ) )
 863           {
 864                int len = 0;
 865
 866                DEBUG_F("link count = %d\n", link_count);
 867                DEBUG_SLEEP;
 868                if ( ++link_count > MAX_LINK_COUNT )
 869                {
 870                     DEBUG_F("Symlink loop\n");
 871                     errnum = FILE_ERR_SYMLINK_LOOP;
 872                     return 0;
 873                }
 874
 875                /* Get the symlink size. */
 876                INFO->file->len = sd_size(INFO->current_ih, INFO->current_item);
 877
 878                /* Find out how long our remaining name is. */
 879                while ( dirname[len] && !isspace( dirname[len] ) )
 880                     len++;
 881
 882                if ( INFO->file->len + len > sizeof ( linkbuf ) - 1 )
 883                {
 884                     errnum = FILE_ERR_LENGTH;
 885                     return 0;
 886                }
 887
 888                /* Copy the remaining name to the end of the symlink data. Note *
 889                 * that DIRNAME and LINKBUF may overlap! */
 890                memmove( linkbuf + INFO->file->len, dirname, len + 1 );
 891
 892                INFO->fileinfo.k_dir_id = dir_id;
 893                INFO->fileinfo.k_objectid = objectid;
 894                INFO->file->pos = 0;
 895                if ( !next_key()
 896                     || reiserfs_read_data( linkbuf, INFO->file->len ) != INFO->file->len ) {
 897                     DEBUG_F("reiserfs_open_file - if !next_key || reiserfs_read_data\n");
 898                     DEBUG_SLEEP;
 899                     errnum = FILE_IOERR;
 900                     return 0;
 901                }
 902
 903
 904                DEBUG_F( "symlink=%s\n", linkbuf );
 905                DEBUG_SLEEP;
 906
 907                dirname = linkbuf;
 908                if ( *dirname == '/' )
 909                {
 910                     /* It's an absolute link, so look it up in root. */
 911                     dir_id = cpu_to_le32(REISERFS_ROOT_PARENT_OBJECTID);
 912                     objectid = cpu_to_le32(REISERFS_ROOT_OBJECTID);
 913                }
 914                else
 915                {
 916                     /* Relative, so look it up in our parent directory. */
 917                     dir_id = parent_dir_id;
 918                     objectid = parent_objectid;
 919                }
 920
 921                /* Now lookup the new name. */
 922                continue;
 923           }
 924
 925           /* if we have a real file (and we're not just printing *
 926            * possibilities), then this is where we want to exit */
 927
 928           if ( !*dirname || isspace( *dirname ) )
 929           {
 930                if ( !S_ISREG( mode ) )
 931                {
 932                     errnum = FILE_ERR_BAD_TYPE;
 933                     return 0;
 934                }
 935
 936                INFO->file->pos = 0;
 937                INFO->file->len = sd_size(INFO->current_ih, INFO->current_item);
 938
 939                INFO->fileinfo.k_dir_id = dir_id;
 940                INFO->fileinfo.k_objectid = objectid;
 941                return next_key();
 942           }
 943
 944           /* continue with the file/directory name interpretation */
 945           while ( *dirname == '/' )
 946                dirname++;
 947           if ( !S_ISDIR( mode ) )
 948           {
 949                errnum = FILE_ERR_NOTDIR;
 950                return 0;
 951           }
 952           for ( rest = dirname; ( ch = *rest ) && !isspace( ch ) && ch != '/';
 953                 rest++ ) ;
 954           *rest = 0;
 955
 956           while ( 1 )
 957           {
 958                char *name_end;
 959                int num_entries;
 960
 961                if ( !next_key() )
 962                     return 0;
 963
 964                if ( INFO->current_ih->ih_key.k_objectid != objectid )
 965                     break;
 966
 967                name_end = INFO->current_item + ih_item_len(INFO->current_ih);
 968                de_head = ( struct reiserfs_de_head * ) INFO->current_item;
 969                num_entries = ih_entry_count(INFO->current_ih);
 970                while ( num_entries > 0 )
 971                {
 972                     char *filename = INFO->current_item + deh_location(de_head);
 973                     char tmp = *name_end;
 974
 975                     if( deh_state(de_head) & (1 << DEH_Visible))
 976                     {
 977                          int cmp;
 978
 979                          /* Directory names in ReiserFS are not null * terminated.
 980                           * We write a temporary 0 behind it. * NOTE: that this
 981                           * may overwrite the first block in * the tree cache.
 982                           * That doesn't hurt as long as we * don't call next_key
 983                           * () in between. */
 984                          *name_end = 0;
 985                          cmp = strcmp( dirname, filename );
 986                          *name_end = tmp;
 987                          if ( cmp == 0 )
 988                               goto found;
 989                     }
 990                     /* The beginning of this name marks the end of the next name.
 991                      */
 992                     name_end = filename;
 993                     de_head++;
 994                     num_entries--;
 995                }
 996           }
 997
 998           errnum = FILE_ERR_NOTFOUND;
 999           *rest = ch;
1000           return 0;
1001
1002      found:
1003           *rest = ch;
1004           dirname = rest;
1005
1006           parent_dir_id = dir_id;
1007           parent_objectid = objectid;
1008           dir_id = de_head->deh_dir_id; /* LE */
1009           objectid = de_head->deh_objectid; /* LE */
1010      }
1011 }
1012
1013
1014
1015 #ifndef __LITTLE_ENDIAN
1016 typedef union {
1017      struct offset_v2 offset_v2;
1018      __u64 linear;
1019 } offset_v2_esafe_overlay;
1020
1021 inline __u16
1022 offset_v2_k_type( struct offset_v2 *v2 )
1023 {
1024      offset_v2_esafe_overlay tmp = *(offset_v2_esafe_overlay *)v2;
1025      tmp.linear = le64_to_cpu( tmp.linear );
1026      return tmp.offset_v2.k_type;
1027 }
1028
1029 inline loff_t
1030 offset_v2_k_offset( struct offset_v2 *v2 )
1031 {
1032      offset_v2_esafe_overlay tmp = *(offset_v2_esafe_overlay *)v2;
1033      tmp.linear = le64_to_cpu( tmp.linear );
1034      return tmp.offset_v2.k_offset;
1035 }
1036 #endif
1037
1038 inline int
1039 uniqueness2type (__u32 uniqueness)
1040 {
1041      switch (uniqueness) {
1042      case V1_SD_UNIQUENESS: return TYPE_STAT_DATA;
1043      case V1_INDIRECT_UNIQUENESS: return TYPE_INDIRECT;
1044      case V1_DIRECT_UNIQUENESS: return TYPE_DIRECT;
1045      case V1_DIRENTRY_UNIQUENESS: return TYPE_DIRENTRY;
1046      }
1047      return TYPE_ANY;
1048 }
1049
1050 /*
1051  * Local variables:
1052  * c-file-style: "k&r"
1053  * c-basic-offset: 5
1054  * End:
1055  */