second/fs_reiserfs.c

   1 /*
   2  *  fs_reiserfs.c - an implementation for the Reiser filesystem
   3  *
   4  *  Copyright (C) 2001 Jeffrey Mahoney (jeffm@suse.com)
   5  *
   6  *  Adapted from Grub
   7  *
   8  *  Copyright (C) 2000, 2001 Free Software Foundation, Inc.
   9  *
  10  *  This program is free software; you can redistribute it and/or modify
  11  *  it under the terms of the GNU General Public License as published by
  12  *  the Free Software Foundation; either version 2 of the License, or
  13  *  (at your option) any later version.
  14  *
  15  *  This program is distributed in the hope that it will be useful,
  16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  *  GNU General Public License for more details.
  19  *
  20  *  You should have received a copy of the GNU General Public License
  21  *  along with this program; if not, write to the Free Software
  22  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  23  */
  24
  25 #include "types.h"
  26 #include "ctype.h"
  27 #include "string.h"
  28 #include "stdlib.h"
  29 #include "fs.h"
  30 #include "errors.h"
  31 #include "debug.h"
  32 #include "bootinfo.h"
  33 #include "reiserfs/reiserfs.h"
  34
  35 /* Exported in struct fs_t */
  36 static int reiserfs_open( struct boot_file_t *file, const char *dev_name,
  37                           struct partition_t *part, const char *file_name );
  38 static int reiserfs_read( struct boot_file_t *file, unsigned int size,
  39
  40                           void *buffer );
  41 static int reiserfs_seek( struct boot_file_t *file, unsigned int newpos );
  42 static int reiserfs_close( struct boot_file_t *file );
  43
  44 struct fs_t reiserfs_filesystem = {
  45      name:"reiserfs",
  46      open:reiserfs_open,
  47      read:reiserfs_read,
  48      seek:reiserfs_seek,
  49      close:reiserfs_close
  50 };
  51
  52 static int reiserfs_read_super( void );
  53 static int reiserfs_open_file( char *dirname );
  54 static int reiserfs_read_data( char *buf, __u32 len );
  55
  56
  57 static struct reiserfs_state reiserfs;
  58 static struct reiserfs_state *INFO = &reiserfs;
  59
  60 /* Adapted from GRUB: */
  61 static char FSYS_BUF[FSYSREISER_CACHE_SIZE];
  62 int errnum;
  63
  64
  65 static int
  66 reiserfs_open( struct boot_file_t *file, const char *dev_name,
  67                struct partition_t *part, const char *file_name )
  68 {
  69      static char buffer[1024];
  70
  71      DEBUG_ENTER;
  72      DEBUG_OPEN;
  73
  74      memset( INFO, 0, sizeof(struct reiserfs_state) );
  75      INFO->file = file;
  76
  77      if (part)
  78      {
  79           DEBUG_F( "Determining offset for partition %d\n", part->part_number );
  80           INFO->partition_offset = ((uint64_t)part->part_start) * part->blocksize;
  81           DEBUG_F( "%Lu = %lu * %hu\n", INFO->partition_offset,
  82                    part->part_start,
  83                    part->blocksize );
  84      }
  85      else
  86           INFO->partition_offset = 0;
  87
  88      strncpy(buffer, dev_name, 1020);
  89      if (_machine != _MACH_bplan)
  90           strcat(buffer, ":0");  /* 0 is full disk in (non-buggy) OF */
  91
  92      file->of_device = prom_open( buffer );
  93      DEBUG_F( "Trying to open dev_name=%s; filename=%s; partition offset=%Lu\n",
  94               buffer, file_name, INFO->partition_offset );
  95
  96      if ( file->of_device == PROM_INVALID_HANDLE || file->of_device == NULL )
  97      {
  98           DEBUG_F( "Can't open device %p\n", file->of_device );
  99           DEBUG_LEAVE(FILE_ERR_BADDEV);
 100           return FILE_ERR_BADDEV;
 101      }
 102
 103      DEBUG_F("%p was successfully opened\n", file->of_device);
 104
 105      if ( reiserfs_read_super() != 1 )
 106      {
 107           DEBUG_F( "Couldn't open ReiserFS @ %s/%Lu\n", buffer, INFO->partition_offset );
 108           prom_close( file->of_device );
 109           DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
 110           return FILE_ERR_BAD_FSYS;
 111      }
 112
 113      DEBUG_F( "Attempting to open %s\n", file_name );
 114      strcpy(buffer, file_name); /* reiserfs_open_file modifies argument */
 115      if (reiserfs_open_file(buffer) == 0)
 116      {
 117           DEBUG_F( "reiserfs_open_file failed. errnum = %d\n", errnum );
 118           prom_close( file->of_device );
 119           DEBUG_LEAVE_F(errnum);
 120           return errnum;
 121      }
 122
 123      DEBUG_F( "Successfully opened %s\n", file_name );
 124
 125      DEBUG_LEAVE(FILE_ERR_OK);
 126      DEBUG_SLEEP;
 127      return FILE_ERR_OK;
 128 }
 129
 130 static int
 131 reiserfs_read( struct boot_file_t *file, unsigned int size, void *buffer )
 132 {
 133      return reiserfs_read_data( buffer, size );
 134 }
 135
 136 static int
 137 reiserfs_seek( struct boot_file_t *file, unsigned int newpos )
 138 {
 139      file->pos = newpos;
 140      return FILE_ERR_OK;
 141 }
 142
 143 static int
 144 reiserfs_close( struct boot_file_t *file )
 145 {
 146      if( file->of_device )
 147      {
 148           prom_close(file->of_device);
 149           file->of_device = 0;
 150           DEBUG_F("reiserfs_close called\n");
 151      }
 152      return FILE_ERR_OK;
 153 }
 154
 155
 156 static __inline__ __u32
 157 log2( __u32 word )
 158 {
 159      int i = 0;
 160      while( word && (word & (1 << ++i)) == 0 );
 161      return i;
 162 }
 163
 164 static __inline__ int
 165 is_power_of_two( unsigned long word )
 166 {
 167      return ( word & -word ) == word;
 168 }
 169
 170 static int
 171 read_disk_block( struct boot_file_t *file, __u32 block, __u32 start,
 172                  __u32 length, void *buf )
 173 {
 174      __u16 fs_blocksize = INFO->blocksize == 0 ? REISERFS_OLD_BLOCKSIZE
 175           : INFO->blocksize;
 176      unsigned long long pos = (unsigned long long)block * (unsigned long long)fs_blocksize;
 177      pos += (unsigned long long)INFO->partition_offset + (unsigned long long)start;
 178      DEBUG_F( "Reading %u bytes, starting at block %u, disk offset %Lu\n",
 179               length, block, pos );
 180      if (!prom_lseek( file->of_device, pos )) {
 181           DEBUG_F("prom_lseek failed\n");
 182           return 0;
 183      }
 184      return prom_read( file->of_device, buf, length );
 185 }
 186
 187
 188 static int
 189 journal_read( __u32 block, __u32 len, char *buffer )
 190 {
 191      return read_disk_block( INFO->file,
 192                              (INFO->journal_block + block), 0,
 193                              len, buffer );
 194 }
 195
 196 /* Read a block from ReiserFS file system, taking the journal into
 197  * account.  If the block nr is in the journal, the block from the
 198  * journal taken.
 199  */
 200 static int
 201 block_read( __u32 blockNr, __u32 start, __u32 len, char *buffer )
 202 {
 203      __u32 transactions = INFO->journal_transactions;
 204      __u32 desc_block = INFO->journal_first_desc;
 205      __u32 journal_mask = INFO->journal_block_count - 1;
 206      __u32 translatedNr = blockNr;
 207      __u32 *journal_table = JOURNAL_START;
 208
 209 //    DEBUG_F( "block_read( %u, %u, %u, ..)\n", blockNr, start, len );
 210
 211      while ( transactions-- > 0 )
 212      {
 213           int i = 0;
 214           int j_len;
 215
 216           if ( *journal_table != 0xffffffff )
 217           {
 218                /* Search for the blockNr in cached journal */
 219                j_len = le32_to_cpu(*journal_table++);
 220                while ( i++ < j_len )
 221                {
 222                     if ( le32_to_cpu(*journal_table++) == blockNr )
 223                     {
 224                          journal_table += j_len - i;
 225                          goto found;
 226                     }
 227                }
 228           }
 229           else
 230           {
 231                /* This is the end of cached journal marker.  The remaining
 232                 * transactions are still on disk. */
 233                struct reiserfs_journal_desc desc;
 234                struct reiserfs_journal_commit commit;
 235
 236                if ( !journal_read( desc_block, sizeof(desc), (char *) &desc ) )
 237                     return 0;
 238
 239                j_len = le32_to_cpu(desc.j_len);
 240                while ( i < j_len && i < JOURNAL_TRANS_HALF )
 241                     if ( le32_to_cpu(desc.j_realblock[i++]) == blockNr )
 242                          goto found;
 243
 244                if ( j_len >= JOURNAL_TRANS_HALF )
 245                {
 246                     int commit_block = ( desc_block + 1 + j_len ) & journal_mask;
 247
 248                     if ( !journal_read( commit_block,
 249                                         sizeof(commit), (char *) &commit ) )
 250                          return 0;
 251
 252                     while ( i < j_len )
 253                          if ( le32_to_cpu(commit.j_realblock[i++ - JOURNAL_TRANS_HALF]) == blockNr )
 254                               goto found;
 255                }
 256           }
 257           goto not_found;
 258
 259      found:
 260           translatedNr =
 261                INFO->journal_block + ( ( desc_block + i ) & journal_mask );
 262
 263           DEBUG_F( "block_read: block %u is mapped to journal block %u.\n",
 264                    blockNr, translatedNr - INFO->journal_block );
 265
 266           /* We must continue the search, as this block may be overwritten in
 267            * later transactions. */
 268      not_found:
 269           desc_block = (desc_block + 2 + j_len) & journal_mask;
 270      }
 271
 272      return read_disk_block( INFO->file, translatedNr, start, len, buffer );
 273 }
 274
 275 /* Init the journal data structure.  We try to cache as much as
 276  * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
 277  * we can still read the rest from the disk on demand.
 278  *
 279  * The first number of valid transactions and the descriptor block of the
 280  * first valid transaction are held in INFO.  The transactions are all
 281  * adjacent, but we must take care of the journal wrap around.
 282  */
 283 static int
 284 journal_init( void )
 285 {
 286      struct reiserfs_journal_header header;
 287      struct reiserfs_journal_desc desc;
 288      struct reiserfs_journal_commit commit;
 289      __u32 block_count = INFO->journal_block_count;
 290      __u32 desc_block;
 291      __u32 commit_block;
 292      __u32 next_trans_id;
 293      __u32 *journal_table = JOURNAL_START;
 294
 295      journal_read( block_count, sizeof ( header ), ( char * ) &header );
 296      desc_block = le32_to_cpu(header.j_first_unflushed_offset);
 297      if ( desc_block >= block_count )
 298           return 0;
 299
 300      INFO->journal_transactions = 0;
 301      INFO->journal_first_desc = desc_block;
 302      next_trans_id = le32_to_cpu(header.j_last_flush_trans_id) + 1;
 303
 304      DEBUG_F( "journal_init: last flushed %u\n", le32_to_cpu(header.j_last_flush_trans_id) );
 305
 306      while ( 1 )
 307      {
 308           journal_read( desc_block, sizeof(desc), (char *) &desc );
 309           if ( strcmp( JOURNAL_DESC_MAGIC, desc.j_magic ) != 0
 310                || desc.j_trans_id != next_trans_id
 311                || desc.j_mount_id != header.j_mount_id )
 312                /* no more valid transactions */
 313                break;
 314
 315           commit_block = ( desc_block + le32_to_cpu(desc.j_len) + 1 ) & ( block_count - 1 );
 316           journal_read( commit_block, sizeof(commit), (char *) &commit );
 317           if ( desc.j_trans_id != commit.j_trans_id
 318                || desc.j_len != commit.j_len )
 319                /* no more valid transactions */
 320                break;
 321
 322
 323           DEBUG_F( "Found valid transaction %u/%u at %u.\n",
 324                    le32_to_cpu(desc.j_trans_id), le32_to_cpu(desc.j_mount_id),
 325                    desc_block );
 326
 327
 328           next_trans_id++;
 329           if ( journal_table < JOURNAL_END )
 330           {
 331                if ( ( journal_table + 1 + le32_to_cpu(desc.j_len) ) >= JOURNAL_END )
 332                {
 333                     /* The table is almost full; mark the end of the cached * *
 334                      * journal. */
 335                     *journal_table = 0xffffffff;
 336                     journal_table = JOURNAL_END;
 337                }
 338                else
 339                {
 340                     int i;
 341
 342                     /* Cache the length and the realblock numbers in the table. *
 343                      * The block number of descriptor can easily be computed. *
 344                      * and need not to be stored here. */
 345                     *journal_table++ = desc.j_len;
 346                     for ( i = 0; i < le32_to_cpu(desc.j_len) && i < JOURNAL_TRANS_HALF; i++ )
 347                     {
 348                          *journal_table++ = desc.j_realblock[i];
 349
 350                          DEBUG_F( "block %u is in journal %u.\n",
 351                                   le32_to_cpu(desc.j_realblock[i]), desc_block );
 352
 353                     }
 354                     for ( ; i < le32_to_cpu(desc.j_len); i++ )
 355                     {
 356                          *journal_table++ =
 357                               commit.j_realblock[i - JOURNAL_TRANS_HALF];
 358
 359                          DEBUG_F( "block %u is in journal %u.\n",
 360                                   le32_to_cpu(commit.j_realblock[i - JOURNAL_TRANS_HALF]),
 361                                   desc_block );
 362
 363                     }
 364                }
 365           }
 366           desc_block = (commit_block + 1) & (block_count - 1);
 367      }
 368
 369      DEBUG_F( "Transaction %u/%u at %u isn't valid.\n",
 370               le32_to_cpu(desc.j_trans_id), le32_to_cpu(desc.j_mount_id),
 371               desc_block );
 372
 373
 374      INFO->journal_transactions
 375           = next_trans_id - le32_to_cpu(header.j_last_flush_trans_id) - 1;
 376      return (errnum == 0);
 377 }
 378
 379 /* check filesystem types and read superblock into memory buffer */
 380 static int
 381 reiserfs_read_super( void )
 382 {
 383      struct reiserfs_super_block super;
 384      __u64 superblock = REISERFS_SUPERBLOCK_BLOCK;
 385
 386      if (read_disk_block(INFO->file, superblock, 0, sizeof(super), &super) != sizeof(super)) {
 387           DEBUG_F("read_disk_block failed!\n");
 388           return 0;
 389      }
 390
 391      DEBUG_F( "Found super->magic: \"%s\"\n", super.s_magic );
 392
 393      if( strcmp( REISER2FS_SUPER_MAGIC_STRING, super.s_magic ) != 0 &&
 394          strcmp( REISERFS_SUPER_MAGIC_STRING, super.s_magic ) != 0 )
 395      {
 396           /* Try old super block position */
 397           superblock = REISERFS_OLD_SUPERBLOCK_BLOCK;
 398
 399           if (read_disk_block( INFO->file, superblock, 0, sizeof (super),  &super ) != sizeof(super)) {
 400                DEBUG_F("read_disk_block failed!\n");
 401                return 0;
 402           }
 403
 404           if ( strcmp( REISER2FS_SUPER_MAGIC_STRING, super.s_magic ) != 0 &&
 405                strcmp( REISERFS_SUPER_MAGIC_STRING, super.s_magic ) != 0 )
 406           {
 407                /* pre journaling super block - untested */
 408                if ( strcmp( REISERFS_SUPER_MAGIC_STRING,
 409                             (char *) ((__u32) &super + 20 ) ) != 0 )
 410                     return 0;
 411
 412                super.s_blocksize = cpu_to_le16(REISERFS_OLD_BLOCKSIZE);
 413                super.s_journal_block = 0;
 414                super.s_version = 0;
 415           }
 416      }
 417
 418      DEBUG_F( "ReiserFS superblock data:\n" );
 419      DEBUG_F( "Block count: %u\n", le32_to_cpu(super.s_block_count) )
 420           DEBUG_F( "Free blocks: %u\n", le32_to_cpu(super.s_free_blocks) );
 421      DEBUG_F( "Journal block: %u\n", le32_to_cpu(super.s_journal_block) );
 422      DEBUG_F( "Journal size (in blocks): %u\n",
 423               le32_to_cpu(super.s_orig_journal_size) );
 424      DEBUG_F( "Root block: %u\n\n", le32_to_cpu(super.s_root_block) );
 425
 426
 427      INFO->version = le16_to_cpu(super.s_version);
 428      INFO->blocksize = le16_to_cpu(super.s_blocksize);
 429      INFO->blocksize_shift = log2( INFO->blocksize );
 430
 431      INFO->journal_block = le32_to_cpu(super.s_journal_block);
 432      INFO->journal_block_count = le32_to_cpu(super.s_orig_journal_size);
 433
 434      INFO->cached_slots = (FSYSREISER_CACHE_SIZE >> INFO->blocksize_shift) - 1;
 435
 436      /* At this point, we've found a valid superblock. If we run into problems
 437       * mounting the FS, the user should probably know. */
 438
 439      /* A few sanity checks ... */
 440      if ( INFO->version > REISERFS_MAX_SUPPORTED_VERSION )
 441      {
 442           prom_printf( "ReiserFS: Unsupported version field: %u\n",
 443                        INFO->version );
 444           return 0;
 445      }
 446
 447      if ( INFO->blocksize < FSYSREISER_MIN_BLOCKSIZE
 448           || INFO->blocksize > FSYSREISER_MAX_BLOCKSIZE )
 449      {
 450           prom_printf( "ReiserFS: Unsupported block size: %u\n",
 451                        INFO->blocksize );
 452           return 0;
 453      }
 454
 455      /* Setup the journal.. */
 456      if ( INFO->journal_block != 0 )
 457      {
 458           if ( !is_power_of_two( INFO->journal_block_count ) )
 459           {
 460                prom_printf( "ReiserFS: Unsupported journal size, "
 461                             "not a power of 2: %u\n",
 462                             INFO->journal_block_count );
 463                return 0;
 464           }
 465
 466           journal_init();
 467           /* Read in super block again, maybe it is in the journal */
 468           block_read( superblock, 0, sizeof (struct reiserfs_super_block),
 469                       (char *) &super );
 470      }
 471
 472      /* Read in the root block */
 473      if ( !block_read( le32_to_cpu(super.s_root_block), 0,
 474                        INFO->blocksize, ROOT ) )
 475      {
 476           prom_printf( "ReiserFS: Failed to read in root block\n" );
 477           return 0;
 478      }
 479
 480      /* The root node is always the "deepest", so we can
 481         determine the hieght of the tree using it. */
 482      INFO->tree_depth = blkh_level(BLOCKHEAD(ROOT));
 483
 484
 485      DEBUG_F( "root read_in: block=%u, depth=%u\n",
 486               le32_to_cpu(super.s_root_block), INFO->tree_depth );
 487
 488      if ( INFO->tree_depth >= REISERFS_MAX_TREE_HEIGHT )
 489      {
 490           prom_printf( "ReiserFS: Unsupported tree depth (too deep): %u\n",
 491                        INFO->tree_depth );
 492           return 0;
 493      }
 494
 495      if ( INFO->tree_depth == BLKH_LEVEL_LEAF )
 496      {
 497           /* There is only one node in the whole filesystem, which is
 498              simultanously leaf and root */
 499           memcpy( LEAF, ROOT, INFO->blocksize );
 500      }
 501      return 1;
 502 }
 503
 504 /***************** TREE ACCESSING METHODS *****************************/
 505
 506 /* I assume you are familiar with the ReiserFS tree, if not go to
 507  * http://devlinux.com/projects/reiserfs/
 508  *
 509  * My tree node cache is organized as following
 510  *   0   ROOT node
 511  *   1   LEAF node  (if the ROOT is also a LEAF it is copied here
 512  *   2-n other nodes on current path from bottom to top.
 513  *       if there is not enough space in the cache, the top most are
 514  *       omitted.
 515  *
 516  * I have only two methods to find a key in the tree:
 517  *   search_stat(dir_id, objectid) searches for the stat entry (always
 518  *       the first entry) of an object.
 519  *   next_key() gets the next key in tree order.
 520  *
 521  * This means, that I can only sequential reads of files are
 522  * efficient, but this really doesn't hurt for grub.
 523  */
 524
 525 /* Read in the node at the current path and depth into the node cache.
 526  * You must set INFO->blocks[depth] before.
 527  */
 528 static char *
 529 read_tree_node( __u32 blockNr, __u16 depth )
 530 {
 531      char *cache = CACHE(depth);
 532      int num_cached = INFO->cached_slots;
 533      errnum = 0;
 534
 535      if ( depth < num_cached )
 536      {
 537           /* This is the cached part of the path.
 538              Check if same block is needed. */
 539           if ( blockNr == INFO->blocks[depth] )
 540                return cache;
 541      }
 542      else
 543           cache = CACHE(num_cached);
 544
 545      DEBUG_F( "  next read_in: block=%u (depth=%u)\n", blockNr, depth );
 546
 547      if ( !block_read( blockNr, 0, INFO->blocksize, cache ) )
 548      {
 549           DEBUG_F( "block_read failed\n" );
 550           return 0;
 551      }
 552
 553      DEBUG_F( "FOUND: blk_level=%u, blk_nr_item=%u, blk_free_space=%u\n",
 554               blkh_level(BLOCKHEAD(cache)),
 555               blkh_nr_item(BLOCKHEAD(cache)),
 556               le16_to_cpu(BLOCKHEAD(cache)->blk_free_space) );
 557
 558      /* Make sure it has the right node level */
 559      if ( blkh_level(BLOCKHEAD(cache)) != depth )
 560      {
 561           DEBUG_F( "depth = %u != %u\n", blkh_level(BLOCKHEAD(cache)), depth );
 562           DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
 563           errnum = FILE_ERR_BAD_FSYS;
 564           return 0;
 565      }
 566
 567      INFO->blocks[depth] = blockNr;
 568      return cache;
 569 }
 570
 571 /* Get the next key, i.e. the key following the last retrieved key in
 572  * tree order.  INFO->current_ih and
 573  * INFO->current_info are adapted accordingly.  */
 574 static int
 575 next_key( void )
 576 {
 577      __u16 depth;
 578      struct item_head *ih = INFO->current_ih + 1;
 579      char *cache;
 580
 581
 582      DEBUG_F( "next_key:\n  old ih: key %u:%u:%u:%u version:%u\n",
 583               le32_to_cpu(INFO->current_ih->ih_key.k_dir_id),
 584               le32_to_cpu(INFO->current_ih->ih_key.k_objectid),
 585               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_offset),
 586               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_uniqueness),
 587               ih_version(INFO->current_ih) );
 588
 589
 590      if ( ih == &ITEMHEAD[blkh_nr_item(BLOCKHEAD( LEAF ))] )
 591      {
 592           depth = BLKH_LEVEL_LEAF;
 593           /* The last item, was the last in the leaf node. * Read in the next
 594            * * block */
 595           do
 596           {
 597                if ( depth == INFO->tree_depth )
 598                {
 599                     /* There are no more keys at all. * Return a dummy item with
 600                      * * MAX_KEY */
 601                     ih =
 602                          ( struct item_head * )
 603                          &BLOCKHEAD( LEAF )->blk_right_delim_key;
 604                     goto found;
 605                }
 606                depth++;
 607
 608                DEBUG_F( "  depth=%u, i=%u\n", depth, INFO->next_key_nr[depth] );
 609
 610           }
 611           while ( INFO->next_key_nr[depth] == 0 );
 612
 613           if ( depth == INFO->tree_depth )
 614                cache = ROOT;
 615           else if ( depth <= INFO->cached_slots )
 616                cache = CACHE( depth );
 617           else
 618           {
 619                cache = read_tree_node( INFO->blocks[depth], --depth );
 620                if ( !cache )
 621                     return 0;
 622           }
 623
 624           do
 625           {
 626                __u16 nr_item = blkh_nr_item(BLOCKHEAD( cache ));
 627                int key_nr = INFO->next_key_nr[depth]++;
 628
 629
 630                DEBUG_F( "  depth=%u, i=%u/%u\n", depth, key_nr, nr_item );
 631
 632                if ( key_nr == nr_item )
 633                     /* This is the last item in this block, set the next_key_nr *
 634                      * to 0 */
 635                     INFO->next_key_nr[depth] = 0;
 636
 637                cache =
 638                     read_tree_node( dc_block_number( &(DC( cache )[key_nr])),
 639                                     --depth );
 640                if ( !cache )
 641                     return 0;
 642           }
 643           while ( depth > BLKH_LEVEL_LEAF );
 644
 645           ih = ITEMHEAD;
 646      }
 647 found:
 648      INFO->current_ih = ih;
 649      INFO->current_item = &LEAF[ih_location(ih)];
 650
 651      DEBUG_F( "  new ih: key %u:%u:%u:%u version:%u\n",
 652               le32_to_cpu(INFO->current_ih->ih_key.k_dir_id),
 653               le32_to_cpu(INFO->current_ih->ih_key.k_objectid),
 654               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_offset),
 655               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_uniqueness),
 656               ih_version(INFO->current_ih) );
 657
 658      return 1;
 659 }
 660
 661 /* preconditions: reiserfs_read_super already executed, therefore
 662  *   INFO block is valid
 663  * returns: 0 if error (errnum is set),
 664  *   nonzero iff we were able to find the key successfully.
 665  * postconditions: on a nonzero return, the current_ih and
 666  *   current_item fields describe the key that equals the
 667  *   searched key.  INFO->next_key contains the next key after
 668  *   the searched key.
 669  * side effects: messes around with the cache.
 670  */
 671 static int
 672 search_stat( __u32 dir_id, __u32 objectid )
 673 {
 674      char *cache;
 675      int depth;
 676      int nr_item;
 677      int i;
 678      struct item_head *ih;
 679      errnum = 0;
 680
 681      DEBUG_F( "search_stat:\n  key %u:%u:0:0\n", le32_to_cpu(dir_id),
 682               le32_to_cpu(objectid) );
 683
 684
 685      depth = INFO->tree_depth;
 686      cache = ROOT;
 687
 688      DEBUG_F( "depth = %d\n", depth );
 689      while ( depth > BLKH_LEVEL_LEAF )
 690      {
 691           struct key *key;
 692
 693           nr_item = blkh_nr_item(BLOCKHEAD( cache ));
 694
 695           key = KEY( cache );
 696
 697           for ( i = 0; i < nr_item; i++ )
 698           {
 699                if (le32_to_cpu(key->k_dir_id) > le32_to_cpu(dir_id)
 700                    || (key->k_dir_id == dir_id
 701                        && (le32_to_cpu(key->k_objectid) > le32_to_cpu(objectid)
 702                            || (key->k_objectid == objectid
 703                                && (key->u.k_offset_v1.k_offset
 704                                    | key->u.k_offset_v1.k_uniqueness) > 0))))
 705                     break;
 706                key++;
 707           }
 708
 709
 710           DEBUG_F( "  depth=%d, i=%d/%d\n", depth, i, nr_item );
 711
 712           INFO->next_key_nr[depth] = ( i == nr_item ) ? 0 : i + 1;
 713           cache = read_tree_node( dc_block_number(&(DC(cache)[i])), --depth );
 714           if ( !cache )
 715                return 0;
 716      }
 717
 718      /* cache == LEAF */
 719      nr_item = blkh_nr_item(BLOCKHEAD(LEAF));
 720      ih = ITEMHEAD;
 721      DEBUG_F( "nr_item = %d\n", nr_item );
 722      for ( i = 0; i < nr_item; i++ )
 723      {
 724           if ( ih->ih_key.k_dir_id == dir_id
 725                && ih->ih_key.k_objectid == objectid
 726                && ih->ih_key.u.k_offset_v1.k_offset == 0
 727                && ih->ih_key.u.k_offset_v1.k_uniqueness == 0 )
 728           {
 729
 730                DEBUG_F( "  depth=%d, i=%d/%d\n", depth, i, nr_item );
 731
 732                INFO->current_ih = ih;
 733                INFO->current_item = &LEAF[ih_location(ih)];
 734
 735                return 1;
 736           }
 737
 738           ih++;
 739      }
 740
 741      DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
 742      errnum = FILE_ERR_BAD_FSYS;
 743      return 0;
 744 }
 745
 746 static int
 747 reiserfs_read_data( char *buf, __u32 len )
 748 {
 749      __u32 blocksize;
 750      __u32 offset;
 751      __u32 to_read;
 752      char *prev_buf = buf;
 753      errnum = 0;
 754
 755      DEBUG_F( "reiserfs_read_data: INFO->file->pos=%Lu len=%u, offset=%Lu\n",
 756               INFO->file->pos, len, (__u64) IH_KEY_OFFSET(INFO->current_ih) - 1 );
 757
 758
 759      if ( INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
 760           || IH_KEY_OFFSET( INFO->current_ih ) > INFO->file->pos + 1 )
 761      {
 762           search_stat( INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid );
 763           goto get_next_key;
 764      }
 765
 766      while ( errnum == 0 )
 767      {
 768           if ( INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid )
 769                break;
 770
 771           offset = INFO->file->pos - IH_KEY_OFFSET( INFO->current_ih ) + 1;
 772           blocksize = ih_item_len(INFO->current_ih);
 773
 774
 775           DEBUG_F( "  loop: INFO->file->pos=%Lu len=%u, offset=%u blocksize=%u\n",
 776                    INFO->file->pos, len, offset, blocksize );
 777
 778
 779           if ( IH_KEY_ISTYPE( INFO->current_ih, TYPE_DIRECT )
 780                && offset < blocksize )
 781           {
 782                to_read = blocksize - offset;
 783                if ( to_read > len )
 784                     to_read = len;
 785
 786                memcpy( buf, INFO->current_item + offset, to_read );
 787                goto update_buf_len;
 788           }
 789           else if ( IH_KEY_ISTYPE( INFO->current_ih, TYPE_INDIRECT ) )
 790           {
 791                blocksize = ( blocksize >> 2 ) << INFO->blocksize_shift;
 792
 793                while ( offset < blocksize )
 794                {
 795                     __u32 blocknr = le32_to_cpu(((__u32 *)
 796                                                  INFO->current_item)[offset >> INFO->blocksize_shift]);
 797
 798                     int blk_offset = offset & (INFO->blocksize - 1);
 799
 800                     to_read = INFO->blocksize - blk_offset;
 801                     if ( to_read > len )
 802                          to_read = len;
 803
 804                     /* Journal is only for meta data.
 805                        Data blocks can be read directly without using block_read */
 806                     read_disk_block( INFO->file, blocknr, blk_offset, to_read,
 807                                      buf );
 808
 809                update_buf_len:
 810                     len -= to_read;
 811                     buf += to_read;
 812                     offset += to_read;
 813                     INFO->file->pos += to_read;
 814                     if ( len == 0 )
 815                          goto done;
 816                }
 817           }
 818      get_next_key:
 819           next_key();
 820      }
 821 done:
 822      return (errnum != 0) ? 0 : buf - prev_buf;
 823 }
 824
 825
 826 /* preconditions: reiserfs_read_super already executed, therefore
 827  *   INFO block is valid
 828  * returns: 0 if error, nonzero iff we were able to find the file successfully
 829  * postconditions: on a nonzero return, INFO->fileinfo contains the info
 830  *   of the file we were trying to look up, filepos is 0 and filemax is
 831  *   the size of the file.
 832  */
 833 static int
 834 reiserfs_open_file( char *dirname )
 835 {
 836      struct reiserfs_de_head *de_head;
 837      char *rest, ch;
 838      __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
 839
 840      char linkbuf[PATH_MAX];    /* buffer for following symbolic links */
 841      int link_count = 0;
 842      int mode;
 843      errnum = 0;
 844
 845      dir_id = cpu_to_le32(REISERFS_ROOT_PARENT_OBJECTID);
 846      objectid = cpu_to_le32(REISERFS_ROOT_OBJECTID);
 847
 848      while ( 1 )
 849      {
 850
 851           DEBUG_F( "dirname=%s\n", dirname );
 852
 853           /* Search for the stat info first. */
 854           if ( !search_stat( dir_id, objectid ) )
 855                return 0;
 856
 857
 858           DEBUG_F( "sd_mode=0%o sd_size=%Lu\n",
 859                    sd_mode((struct stat_data *) INFO->current_item ),
 860                    sd_size(INFO->current_ih, INFO->current_item ));
 861
 862
 863           mode = sd_mode((struct stat_data *)INFO->current_item);
 864
 865           /* If we've got a symbolic link, then chase it. */
 866           if ( S_ISLNK( mode ) )
 867           {
 868                int len = 0;
 869
 870                DEBUG_F("link count = %d\n", link_count);
 871                DEBUG_SLEEP;
 872                if ( ++link_count > MAX_LINK_COUNT )
 873                {
 874                     DEBUG_F("Symlink loop\n");
 875                     errnum = FILE_ERR_SYMLINK_LOOP;
 876                     return 0;
 877                }
 878
 879                /* Get the symlink size. */
 880                INFO->file->len = sd_size(INFO->current_ih, INFO->current_item);
 881
 882                /* Find out how long our remaining name is. */
 883                while ( dirname[len] && !isspace( dirname[len] ) )
 884                     len++;
 885
 886                if ( INFO->file->len + len > sizeof ( linkbuf ) - 1 )
 887                {
 888                     errnum = FILE_ERR_LENGTH;
 889                     return 0;
 890                }
 891
 892                /* Copy the remaining name to the end of the symlink data. Note *
 893                 * that DIRNAME and LINKBUF may overlap! */
 894                memmove( linkbuf + INFO->file->len, dirname, len + 1 );
 895
 896                INFO->fileinfo.k_dir_id = dir_id;
 897                INFO->fileinfo.k_objectid = objectid;
 898                INFO->file->pos = 0;
 899                if ( !next_key()
 900                     || reiserfs_read_data( linkbuf, INFO->file->len ) != INFO->file->len ) {
 901                     DEBUG_F("reiserfs_open_file - if !next_key || reiserfs_read_data\n");
 902                     DEBUG_SLEEP;
 903                     errnum = FILE_IOERR;
 904                     return 0;
 905                }
 906
 907
 908                DEBUG_F( "symlink=%s\n", linkbuf );
 909                DEBUG_SLEEP;
 910
 911                dirname = linkbuf;
 912                if ( *dirname == '/' )
 913                {
 914                     /* It's an absolute link, so look it up in root. */
 915                     dir_id = cpu_to_le32(REISERFS_ROOT_PARENT_OBJECTID);
 916                     objectid = cpu_to_le32(REISERFS_ROOT_OBJECTID);
 917                }
 918                else
 919                {
 920                     /* Relative, so look it up in our parent directory. */
 921                     dir_id = parent_dir_id;
 922                     objectid = parent_objectid;
 923                }
 924
 925                /* Now lookup the new name. */
 926                continue;
 927           }
 928
 929           /* if we have a real file (and we're not just printing *
 930            * possibilities), then this is where we want to exit */
 931
 932           if ( !*dirname || isspace( *dirname ) )
 933           {
 934                if ( !S_ISREG( mode ) )
 935                {
 936                     errnum = FILE_ERR_BAD_TYPE;
 937                     return 0;
 938                }
 939
 940                INFO->file->pos = 0;
 941                INFO->file->len = sd_size(INFO->current_ih, INFO->current_item);
 942
 943                INFO->fileinfo.k_dir_id = dir_id;
 944                INFO->fileinfo.k_objectid = objectid;
 945                return next_key();
 946           }
 947
 948           /* continue with the file/directory name interpretation */
 949           while ( *dirname == '/' )
 950                dirname++;
 951           if ( !S_ISDIR( mode ) )
 952           {
 953                errnum = FILE_ERR_NOTDIR;
 954                return 0;
 955           }
 956           for ( rest = dirname; ( ch = *rest ) && !isspace( ch ) && ch != '/';
 957                 rest++ ) ;
 958           *rest = 0;
 959
 960           while ( 1 )
 961           {
 962                char *name_end;
 963                int num_entries;
 964
 965                if ( !next_key() )
 966                     return 0;
 967
 968                if ( INFO->current_ih->ih_key.k_objectid != objectid )
 969                     break;
 970
 971                name_end = INFO->current_item + ih_item_len(INFO->current_ih);
 972                de_head = ( struct reiserfs_de_head * ) INFO->current_item;
 973                num_entries = ih_entry_count(INFO->current_ih);
 974                while ( num_entries > 0 )
 975                {
 976                     char *filename = INFO->current_item + deh_location(de_head);
 977                     char tmp = *name_end;
 978
 979                     if( deh_state(de_head) & (1 << DEH_Visible))
 980                     {
 981                          int cmp;
 982
 983                          /* Directory names in ReiserFS are not null * terminated.
 984                           * We write a temporary 0 behind it. * NOTE: that this
 985                           * may overwrite the first block in * the tree cache.
 986                           * That doesn't hurt as long as we * don't call next_key
 987                           * () in between. */
 988                          *name_end = 0;
 989                          cmp = strcmp( dirname, filename );
 990                          *name_end = tmp;
 991                          if ( cmp == 0 )
 992                               goto found;
 993                     }
 994                     /* The beginning of this name marks the end of the next name.
 995                      */
 996                     name_end = filename;
 997                     de_head++;
 998                     num_entries--;
 999                }
1000           }
1001
1002           errnum = FILE_ERR_NOTFOUND;
1003           *rest = ch;
1004           return 0;
1005
1006      found:
1007           *rest = ch;
1008           dirname = rest;
1009
1010           parent_dir_id = dir_id;
1011           parent_objectid = objectid;
1012           dir_id = de_head->deh_dir_id; /* LE */
1013           objectid = de_head->deh_objectid; /* LE */
1014      }
1015 }
1016
1017
1018
1019 #ifndef __LITTLE_ENDIAN
1020 typedef union {
1021      struct offset_v2 offset_v2;
1022      __u64 linear;
1023 } offset_v2_esafe_overlay;
1024
1025 inline __u16
1026 offset_v2_k_type( struct offset_v2 *v2 )
1027 {
1028      offset_v2_esafe_overlay tmp = *(offset_v2_esafe_overlay *)v2;
1029      tmp.linear = le64_to_cpu( tmp.linear );
1030      return tmp.offset_v2.k_type;
1031 }
1032
1033 inline loff_t
1034 offset_v2_k_offset( struct offset_v2 *v2 )
1035 {
1036      offset_v2_esafe_overlay tmp = *(offset_v2_esafe_overlay *)v2;
1037      tmp.linear = le64_to_cpu( tmp.linear );
1038      return tmp.offset_v2.k_offset;
1039 }
1040 #endif
1041
1042 inline int
1043 uniqueness2type (__u32 uniqueness)
1044 {
1045      switch (uniqueness) {
1046      case V1_SD_UNIQUENESS: return TYPE_STAT_DATA;
1047      case V1_INDIRECT_UNIQUENESS: return TYPE_INDIRECT;
1048      case V1_DIRECT_UNIQUENESS: return TYPE_DIRECT;
1049      case V1_DIRENTRY_UNIQUENESS: return TYPE_DIRENTRY;
1050      }
1051      return TYPE_ANY;
1052 }
1053
1054 /*
1055  * Local variables:
1056  * c-file-style: "k&r"
1057  * c-basic-offset: 5
1058  * End:
1059  */