second/fs_reiserfs.c

   1 /* ReiserFS filesystem
   2
   3    Copyright (C) 2001 Jeffrey Mahoney (jeffm@suse.com)
   4
   5    Adapted from GRUB
   6
   7    Copyright (C) 2000, 2001 Free Software Foundation, Inc.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22
  23 */
  24 #include "types.h"
  25 #include "ctype.h"
  26 #include "string.h"
  27 #include "stdlib.h"
  28 #include "fs.h"
  29 #include "errors.h"
  30 #include "debug.h"
  31 #include "reiserfs/reiserfs.h"
  32
  33 /* Exported in struct fs_t */
  34 static int reiserfs_open( struct boot_file_t *file, const char *dev_name,
  35                           struct partition_t *part, const char *file_name );
  36 static int reiserfs_read( struct boot_file_t *file, unsigned int size,
  37
  38                           void *buffer );
  39 static int reiserfs_seek( struct boot_file_t *file, unsigned int newpos );
  40 static int reiserfs_close( struct boot_file_t *file );
  41
  42 struct fs_t reiserfs_filesystem = {
  43      name:"reiserfs",
  44      open:reiserfs_open,
  45      read:reiserfs_read,
  46      seek:reiserfs_seek,
  47      close:reiserfs_close
  48 };
  49
  50 static int reiserfs_read_super( void );
  51 static int reiserfs_open_file( char *dirname );
  52 static int reiserfs_read_data( char *buf, __u32 len );
  53
  54
  55 static struct reiserfs_state reiserfs;
  56 static struct reiserfs_state *INFO = &reiserfs;
  57
  58 /* Adapted from GRUB: */
  59 static char FSYS_BUF[FSYSREISER_CACHE_SIZE];
  60 int errnum;
  61
  62
  63 static int
  64 reiserfs_open( struct boot_file_t *file, const char *dev_name,
  65                struct partition_t *part, const char *file_name )
  66 {
  67      static char buffer[1024];
  68
  69      DEBUG_ENTER;
  70      DEBUG_OPEN;
  71
  72      memset( INFO, 0, sizeof(struct reiserfs_state) );
  73      INFO->file = file;
  74
  75      if (part)
  76      {
  77           DEBUG_F( "Determining offset for partition %d\n", part->part_number );
  78           INFO->partition_offset = ((uint64_t)part->part_start) * part->blocksize;
  79           DEBUG_F( "%Lu = %lu * %hu\n", INFO->partition_offset,
  80                    part->part_start,
  81                    part->blocksize );
  82      }
  83      else
  84           INFO->partition_offset = 0;
  85
  86      sprintf( buffer, "%s:%d", dev_name, 0 ); /* 0 is full disk in OF */
  87      file->of_device = prom_open( buffer );
  88      DEBUG_F( "Trying to open dev_name=%s; filename=%s; partition offset=%Lu\n",
  89               buffer, file_name, INFO->partition_offset );
  90
  91      if ( file->of_device == PROM_INVALID_HANDLE || file->of_device == NULL )
  92      {
  93           DEBUG_F( "Can't open device %p\n", file->of_device );
  94           DEBUG_LEAVE(FILE_ERR_BADDEV);
  95           return FILE_ERR_BADDEV;
  96      }
  97
  98      DEBUG_F("%p was successfully opened\n", file->of_device);
  99
 100      if ( reiserfs_read_super() != 1 )
 101      {
 102           DEBUG_F( "Couldn't open ReiserFS @ %s/%Lu\n", buffer, INFO->partition_offset );
 103           prom_close( file->of_device );
 104           DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
 105           return FILE_ERR_BAD_FSYS;
 106      }
 107
 108      DEBUG_F( "Attempting to open %s\n", file_name );
 109      strcpy(buffer, file_name); /* reiserfs_open_file modifies argument */
 110      if (reiserfs_open_file(buffer) == 0)
 111      {
 112           DEBUG_F( "reiserfs_open_file failed. errnum = %d\n", errnum );
 113           prom_close( file->of_device );
 114           DEBUG_LEAVE_F(errnum);
 115           return errnum;
 116      }
 117
 118      DEBUG_F( "Successfully opened %s\n", file_name );
 119
 120      DEBUG_LEAVE(FILE_ERR_OK);
 121      DEBUG_SLEEP;
 122      return FILE_ERR_OK;
 123 }
 124
 125 static int
 126 reiserfs_read( struct boot_file_t *file, unsigned int size, void *buffer )
 127 {
 128      return reiserfs_read_data( buffer, size );
 129 }
 130
 131 static int
 132 reiserfs_seek( struct boot_file_t *file, unsigned int newpos )
 133 {
 134      file->pos = newpos;
 135      return FILE_ERR_OK;
 136 }
 137
 138 static int
 139 reiserfs_close( struct boot_file_t *file )
 140 {
 141      if( file->of_device )
 142      {
 143           prom_close(file->of_device);
 144           file->of_device = 0;
 145           DEBUG_F("reiserfs_close called\n");
 146      }
 147      return FILE_ERR_OK;
 148 }
 149
 150
 151 static __inline__ __u32
 152 log2( __u32 word )
 153 {
 154      int i = 0;
 155      while( word && (word & (1 << ++i)) == 0 );
 156      return i;
 157 }
 158
 159 static __inline__ int
 160 is_power_of_two( unsigned long word )
 161 {
 162      return ( word & -word ) == word;
 163 }
 164
 165 static int
 166 read_disk_block( struct boot_file_t *file, __u32 block, __u32 start,
 167                  __u32 length, void *buf )
 168 {
 169      __u16 fs_blocksize = INFO->blocksize == 0 ? REISERFS_OLD_BLOCKSIZE
 170           : INFO->blocksize;
 171      unsigned long long pos = (unsigned long long)block * (unsigned long long)fs_blocksize;
 172      pos += (unsigned long long)INFO->partition_offset + (unsigned long long)start;
 173      DEBUG_F( "Reading %u bytes, starting at block %u, disk offset %Lu\n",
 174               length, block, pos );
 175      if (!prom_lseek( file->of_device, pos )) {
 176           DEBUG_F("prom_lseek failed\n");
 177           return 0;
 178      }
 179      return prom_read( file->of_device, buf, length );
 180 }
 181
 182
 183 static int
 184 journal_read( __u32 block, __u32 len, char *buffer )
 185 {
 186      return read_disk_block( INFO->file,
 187                              (INFO->journal_block + block), 0,
 188                              len, buffer );
 189 }
 190
 191 /* Read a block from ReiserFS file system, taking the journal into
 192  * account.  If the block nr is in the journal, the block from the
 193  * journal taken.
 194  */
 195 static int
 196 block_read( __u32 blockNr, __u32 start, __u32 len, char *buffer )
 197 {
 198      __u32 transactions = INFO->journal_transactions;
 199      __u32 desc_block = INFO->journal_first_desc;
 200      __u32 journal_mask = INFO->journal_block_count - 1;
 201      __u32 translatedNr = blockNr;
 202      __u32 *journal_table = JOURNAL_START;
 203
 204 //    DEBUG_F( "block_read( %u, %u, %u, ..)\n", blockNr, start, len );
 205
 206      while ( transactions-- > 0 )
 207      {
 208           int i = 0;
 209           int j_len;
 210
 211           if ( *journal_table != 0xffffffff )
 212           {
 213                /* Search for the blockNr in cached journal */
 214                j_len = le32_to_cpu(*journal_table++);
 215                while ( i++ < j_len )
 216                {
 217                     if ( le32_to_cpu(*journal_table++) == blockNr )
 218                     {
 219                          journal_table += j_len - i;
 220                          goto found;
 221                     }
 222                }
 223           }
 224           else
 225           {
 226                /* This is the end of cached journal marker.  The remaining
 227                 * transactions are still on disk. */
 228                struct reiserfs_journal_desc desc;
 229                struct reiserfs_journal_commit commit;
 230
 231                if ( !journal_read( desc_block, sizeof(desc), (char *) &desc ) )
 232                     return 0;
 233
 234                j_len = le32_to_cpu(desc.j_len);
 235                while ( i < j_len && i < JOURNAL_TRANS_HALF )
 236                     if ( le32_to_cpu(desc.j_realblock[i++]) == blockNr )
 237                          goto found;
 238
 239                if ( j_len >= JOURNAL_TRANS_HALF )
 240                {
 241                     int commit_block = ( desc_block + 1 + j_len ) & journal_mask;
 242
 243                     if ( !journal_read( commit_block,
 244                                         sizeof(commit), (char *) &commit ) )
 245                          return 0;
 246
 247                     while ( i < j_len )
 248                          if ( le32_to_cpu(commit.j_realblock[i++ - JOURNAL_TRANS_HALF]) == blockNr )
 249                               goto found;
 250                }
 251           }
 252           goto not_found;
 253
 254      found:
 255           translatedNr =
 256                INFO->journal_block + ( ( desc_block + i ) & journal_mask );
 257
 258           DEBUG_F( "block_read: block %u is mapped to journal block %u.\n",
 259                    blockNr, translatedNr - INFO->journal_block );
 260
 261           /* We must continue the search, as this block may be overwritten in
 262            * later transactions. */
 263      not_found:
 264           desc_block = (desc_block + 2 + j_len) & journal_mask;
 265      }
 266
 267      return read_disk_block( INFO->file, translatedNr, start, len, buffer );
 268 }
 269
 270 /* Init the journal data structure.  We try to cache as much as
 271  * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
 272  * we can still read the rest from the disk on demand.
 273  *
 274  * The first number of valid transactions and the descriptor block of the
 275  * first valid transaction are held in INFO.  The transactions are all
 276  * adjacent, but we must take care of the journal wrap around.
 277  */
 278 static int
 279 journal_init( void )
 280 {
 281      struct reiserfs_journal_header header;
 282      struct reiserfs_journal_desc desc;
 283      struct reiserfs_journal_commit commit;
 284      __u32 block_count = INFO->journal_block_count;
 285      __u32 desc_block;
 286      __u32 commit_block;
 287      __u32 next_trans_id;
 288      __u32 *journal_table = JOURNAL_START;
 289
 290      journal_read( block_count, sizeof ( header ), ( char * ) &header );
 291      desc_block = le32_to_cpu(header.j_first_unflushed_offset);
 292      if ( desc_block >= block_count )
 293           return 0;
 294
 295      INFO->journal_transactions = 0;
 296      INFO->journal_first_desc = desc_block;
 297      next_trans_id = le32_to_cpu(header.j_last_flush_trans_id) + 1;
 298
 299      DEBUG_F( "journal_init: last flushed %u\n", le32_to_cpu(header.j_last_flush_trans_id) );
 300
 301      while ( 1 )
 302      {
 303           journal_read( desc_block, sizeof(desc), (char *) &desc );
 304           if ( strcmp( JOURNAL_DESC_MAGIC, desc.j_magic ) != 0
 305                || desc.j_trans_id != next_trans_id
 306                || desc.j_mount_id != header.j_mount_id )
 307                /* no more valid transactions */
 308                break;
 309
 310           commit_block = ( desc_block + le32_to_cpu(desc.j_len) + 1 ) & ( block_count - 1 );
 311           journal_read( commit_block, sizeof(commit), (char *) &commit );
 312           if ( desc.j_trans_id != commit.j_trans_id
 313                || desc.j_len != commit.j_len )
 314                /* no more valid transactions */
 315                break;
 316
 317
 318           DEBUG_F( "Found valid transaction %u/%u at %u.\n",
 319                    le32_to_cpu(desc.j_trans_id), le32_to_cpu(desc.j_mount_id),
 320                    desc_block );
 321
 322
 323           next_trans_id++;
 324           if ( journal_table < JOURNAL_END )
 325           {
 326                if ( ( journal_table + 1 + le32_to_cpu(desc.j_len) ) >= JOURNAL_END )
 327                {
 328                     /* The table is almost full; mark the end of the cached * *
 329                      * journal. */
 330                     *journal_table = 0xffffffff;
 331                     journal_table = JOURNAL_END;
 332                }
 333                else
 334                {
 335                     int i;
 336
 337                     /* Cache the length and the realblock numbers in the table. *
 338                      * The block number of descriptor can easily be computed. *
 339                      * and need not to be stored here. */
 340                     *journal_table++ = desc.j_len;
 341                     for ( i = 0; i < le32_to_cpu(desc.j_len) && i < JOURNAL_TRANS_HALF; i++ )
 342                     {
 343                          *journal_table++ = desc.j_realblock[i];
 344
 345                          DEBUG_F( "block %u is in journal %u.\n",
 346                                   le32_to_cpu(desc.j_realblock[i]), desc_block );
 347
 348                     }
 349                     for ( ; i < le32_to_cpu(desc.j_len); i++ )
 350                     {
 351                          *journal_table++ =
 352                               commit.j_realblock[i - JOURNAL_TRANS_HALF];
 353
 354                          DEBUG_F( "block %u is in journal %u.\n",
 355                                   le32_to_cpu(commit.j_realblock[i - JOURNAL_TRANS_HALF]),
 356                                   desc_block );
 357
 358                     }
 359                }
 360           }
 361           desc_block = (commit_block + 1) & (block_count - 1);
 362      }
 363
 364      DEBUG_F( "Transaction %u/%u at %u isn't valid.\n",
 365               le32_to_cpu(desc.j_trans_id), le32_to_cpu(desc.j_mount_id),
 366               desc_block );
 367
 368
 369      INFO->journal_transactions
 370           = next_trans_id - le32_to_cpu(header.j_last_flush_trans_id) - 1;
 371      return (errnum == 0);
 372 }
 373
 374 /* check filesystem types and read superblock into memory buffer */
 375 static int
 376 reiserfs_read_super( void )
 377 {
 378      struct reiserfs_super_block super;
 379      __u64 superblock = REISERFS_SUPERBLOCK_BLOCK;
 380
 381      if (read_disk_block(INFO->file, superblock, 0, sizeof(super), &super) != sizeof(super)) {
 382           DEBUG_F("read_disk_block failed!\n");
 383           return 0;
 384      }
 385
 386      DEBUG_F( "Found super->magic: \"%s\"\n", super.s_magic );
 387
 388      if( strcmp( REISER2FS_SUPER_MAGIC_STRING, super.s_magic ) != 0 &&
 389          strcmp( REISERFS_SUPER_MAGIC_STRING, super.s_magic ) != 0 )
 390      {
 391           /* Try old super block position */
 392           superblock = REISERFS_OLD_SUPERBLOCK_BLOCK;
 393
 394           if (read_disk_block( INFO->file, superblock, 0, sizeof (super),  &super ) != sizeof(super)) {
 395                DEBUG_F("read_disk_block failed!\n");
 396                return 0;
 397           }
 398
 399           if ( strcmp( REISER2FS_SUPER_MAGIC_STRING, super.s_magic ) != 0 &&
 400                strcmp( REISERFS_SUPER_MAGIC_STRING, super.s_magic ) != 0 )
 401           {
 402                /* pre journaling super block - untested */
 403                if ( strcmp( REISERFS_SUPER_MAGIC_STRING,
 404                             (char *) ((__u32) &super + 20 ) ) != 0 )
 405                     return 0;
 406
 407                super.s_blocksize = cpu_to_le16(REISERFS_OLD_BLOCKSIZE);
 408                super.s_journal_block = 0;
 409                super.s_version = 0;
 410           }
 411      }
 412
 413      DEBUG_F( "ReiserFS superblock data:\n" );
 414      DEBUG_F( "Block count: %u\n", le32_to_cpu(super.s_block_count) )
 415           DEBUG_F( "Free blocks: %u\n", le32_to_cpu(super.s_free_blocks) );
 416      DEBUG_F( "Journal block: %u\n", le32_to_cpu(super.s_journal_block) );
 417      DEBUG_F( "Journal size (in blocks): %u\n",
 418               le32_to_cpu(super.s_orig_journal_size) );
 419      DEBUG_F( "Root block: %u\n\n", le32_to_cpu(super.s_root_block) );
 420
 421
 422      INFO->version = le16_to_cpu(super.s_version);
 423      INFO->blocksize = le16_to_cpu(super.s_blocksize);
 424      INFO->blocksize_shift = log2( INFO->blocksize );
 425
 426      INFO->journal_block = le32_to_cpu(super.s_journal_block);
 427      INFO->journal_block_count = le32_to_cpu(super.s_orig_journal_size);
 428
 429      INFO->cached_slots = (FSYSREISER_CACHE_SIZE >> INFO->blocksize_shift) - 1;
 430
 431      /* At this point, we've found a valid superblock. If we run into problems
 432       * mounting the FS, the user should probably know. */
 433
 434      /* A few sanity checks ... */
 435      if ( INFO->version > REISERFS_MAX_SUPPORTED_VERSION )
 436      {
 437           prom_printf( "ReiserFS: Unsupported version field: %u\n",
 438                        INFO->version );
 439           return 0;
 440      }
 441
 442      if ( INFO->blocksize < FSYSREISER_MIN_BLOCKSIZE
 443           || INFO->blocksize > FSYSREISER_MAX_BLOCKSIZE )
 444      {
 445           prom_printf( "ReiserFS: Unsupported block size: %u\n",
 446                        INFO->blocksize );
 447           return 0;
 448      }
 449
 450      /* Setup the journal.. */
 451      if ( INFO->journal_block != 0 )
 452      {
 453           if ( !is_power_of_two( INFO->journal_block_count ) )
 454           {
 455                prom_printf( "ReiserFS: Unsupported journal size, "
 456                             "not a power of 2: %u\n",
 457                             INFO->journal_block_count );
 458                return 0;
 459           }
 460
 461           journal_init();
 462           /* Read in super block again, maybe it is in the journal */
 463           block_read( superblock, 0, sizeof (struct reiserfs_super_block),
 464                       (char *) &super );
 465      }
 466
 467      /* Read in the root block */
 468      if ( !block_read( le32_to_cpu(super.s_root_block), 0,
 469                        INFO->blocksize, ROOT ) )
 470      {
 471           prom_printf( "ReiserFS: Failed to read in root block\n" );
 472           return 0;
 473      }
 474
 475      /* The root node is always the "deepest", so we can
 476         determine the hieght of the tree using it. */
 477      INFO->tree_depth = blkh_level(BLOCKHEAD(ROOT));
 478
 479
 480      DEBUG_F( "root read_in: block=%u, depth=%u\n",
 481               le32_to_cpu(super.s_root_block), INFO->tree_depth );
 482
 483      if ( INFO->tree_depth >= REISERFS_MAX_TREE_HEIGHT )
 484      {
 485           prom_printf( "ReiserFS: Unsupported tree depth (too deep): %u\n",
 486                        INFO->tree_depth );
 487           return 0;
 488      }
 489
 490      if ( INFO->tree_depth == BLKH_LEVEL_LEAF )
 491      {
 492           /* There is only one node in the whole filesystem, which is
 493              simultanously leaf and root */
 494           memcpy( LEAF, ROOT, INFO->blocksize );
 495      }
 496      return 1;
 497 }
 498
 499 /***************** TREE ACCESSING METHODS *****************************/
 500
 501 /* I assume you are familiar with the ReiserFS tree, if not go to
 502  * http://devlinux.com/projects/reiserfs/
 503  *
 504  * My tree node cache is organized as following
 505  *   0   ROOT node
 506  *   1   LEAF node  (if the ROOT is also a LEAF it is copied here
 507  *   2-n other nodes on current path from bottom to top.
 508  *       if there is not enough space in the cache, the top most are
 509  *       omitted.
 510  *
 511  * I have only two methods to find a key in the tree:
 512  *   search_stat(dir_id, objectid) searches for the stat entry (always
 513  *       the first entry) of an object.
 514  *   next_key() gets the next key in tree order.
 515  *
 516  * This means, that I can only sequential reads of files are
 517  * efficient, but this really doesn't hurt for grub.
 518  */
 519
 520 /* Read in the node at the current path and depth into the node cache.
 521  * You must set INFO->blocks[depth] before.
 522  */
 523 static char *
 524 read_tree_node( __u32 blockNr, __u16 depth )
 525 {
 526      char *cache = CACHE(depth);
 527      int num_cached = INFO->cached_slots;
 528      errnum = 0;
 529
 530      if ( depth < num_cached )
 531      {
 532           /* This is the cached part of the path.
 533              Check if same block is needed. */
 534           if ( blockNr == INFO->blocks[depth] )
 535                return cache;
 536      }
 537      else
 538           cache = CACHE(num_cached);
 539
 540      DEBUG_F( "  next read_in: block=%u (depth=%u)\n", blockNr, depth );
 541
 542      if ( !block_read( blockNr, 0, INFO->blocksize, cache ) )
 543      {
 544           DEBUG_F( "block_read failed\n" );
 545           return 0;
 546      }
 547
 548      DEBUG_F( "FOUND: blk_level=%u, blk_nr_item=%u, blk_free_space=%u\n",
 549               blkh_level(BLOCKHEAD(cache)),
 550               blkh_nr_item(BLOCKHEAD(cache)),
 551               le16_to_cpu(BLOCKHEAD(cache)->blk_free_space) );
 552
 553      /* Make sure it has the right node level */
 554      if ( blkh_level(BLOCKHEAD(cache)) != depth )
 555      {
 556           DEBUG_F( "depth = %u != %u\n", blkh_level(BLOCKHEAD(cache)), depth );
 557           DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
 558           errnum = FILE_ERR_BAD_FSYS;
 559           return 0;
 560      }
 561
 562      INFO->blocks[depth] = blockNr;
 563      return cache;
 564 }
 565
 566 /* Get the next key, i.e. the key following the last retrieved key in
 567  * tree order.  INFO->current_ih and
 568  * INFO->current_info are adapted accordingly.  */
 569 static int
 570 next_key( void )
 571 {
 572      __u16 depth;
 573      struct item_head *ih = INFO->current_ih + 1;
 574      char *cache;
 575
 576
 577      DEBUG_F( "next_key:\n  old ih: key %u:%u:%u:%u version:%u\n",
 578               le32_to_cpu(INFO->current_ih->ih_key.k_dir_id),
 579               le32_to_cpu(INFO->current_ih->ih_key.k_objectid),
 580               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_offset),
 581               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_uniqueness),
 582               ih_version(INFO->current_ih) );
 583
 584
 585      if ( ih == &ITEMHEAD[blkh_nr_item(BLOCKHEAD( LEAF ))] )
 586      {
 587           depth = BLKH_LEVEL_LEAF;
 588           /* The last item, was the last in the leaf node. * Read in the next
 589            * * block */
 590           do
 591           {
 592                if ( depth == INFO->tree_depth )
 593                {
 594                     /* There are no more keys at all. * Return a dummy item with
 595                      * * MAX_KEY */
 596                     ih =
 597                          ( struct item_head * )
 598                          &BLOCKHEAD( LEAF )->blk_right_delim_key;
 599                     goto found;
 600                }
 601                depth++;
 602
 603                DEBUG_F( "  depth=%u, i=%u\n", depth, INFO->next_key_nr[depth] );
 604
 605           }
 606           while ( INFO->next_key_nr[depth] == 0 );
 607
 608           if ( depth == INFO->tree_depth )
 609                cache = ROOT;
 610           else if ( depth <= INFO->cached_slots )
 611                cache = CACHE( depth );
 612           else
 613           {
 614                cache = read_tree_node( INFO->blocks[depth], --depth );
 615                if ( !cache )
 616                     return 0;
 617           }
 618
 619           do
 620           {
 621                __u16 nr_item = blkh_nr_item(BLOCKHEAD( cache ));
 622                int key_nr = INFO->next_key_nr[depth]++;
 623
 624
 625                DEBUG_F( "  depth=%u, i=%u/%u\n", depth, key_nr, nr_item );
 626
 627                if ( key_nr == nr_item )
 628                     /* This is the last item in this block, set the next_key_nr *
 629                      * to 0 */
 630                     INFO->next_key_nr[depth] = 0;
 631
 632                cache =
 633                     read_tree_node( dc_block_number( &(DC( cache )[key_nr])),
 634                                     --depth );
 635                if ( !cache )
 636                     return 0;
 637           }
 638           while ( depth > BLKH_LEVEL_LEAF );
 639
 640           ih = ITEMHEAD;
 641      }
 642 found:
 643      INFO->current_ih = ih;
 644      INFO->current_item = &LEAF[ih_location(ih)];
 645
 646      DEBUG_F( "  new ih: key %u:%u:%u:%u version:%u\n",
 647               le32_to_cpu(INFO->current_ih->ih_key.k_dir_id),
 648               le32_to_cpu(INFO->current_ih->ih_key.k_objectid),
 649               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_offset),
 650               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_uniqueness),
 651               ih_version(INFO->current_ih) );
 652
 653      return 1;
 654 }
 655
 656 /* preconditions: reiserfs_read_super already executed, therefore
 657  *   INFO block is valid
 658  * returns: 0 if error (errnum is set),
 659  *   nonzero iff we were able to find the key successfully.
 660  * postconditions: on a nonzero return, the current_ih and
 661  *   current_item fields describe the key that equals the
 662  *   searched key.  INFO->next_key contains the next key after
 663  *   the searched key.
 664  * side effects: messes around with the cache.
 665  */
 666 static int
 667 search_stat( __u32 dir_id, __u32 objectid )
 668 {
 669      char *cache;
 670      int depth;
 671      int nr_item;
 672      int i;
 673      struct item_head *ih;
 674      errnum = 0;
 675
 676      DEBUG_F( "search_stat:\n  key %u:%u:0:0\n", le32_to_cpu(dir_id),
 677               le32_to_cpu(objectid) );
 678
 679
 680      depth = INFO->tree_depth;
 681      cache = ROOT;
 682
 683      DEBUG_F( "depth = %d\n", depth );
 684      while ( depth > BLKH_LEVEL_LEAF )
 685      {
 686           struct key *key;
 687
 688           nr_item = blkh_nr_item(BLOCKHEAD( cache ));
 689
 690           key = KEY( cache );
 691
 692           for ( i = 0; i < nr_item; i++ )
 693           {
 694                if (le32_to_cpu(key->k_dir_id) > le32_to_cpu(dir_id)
 695                    || (key->k_dir_id == dir_id
 696                        && (le32_to_cpu(key->k_objectid) > le32_to_cpu(objectid)
 697                            || (key->k_objectid == objectid
 698                                && (key->u.k_offset_v1.k_offset
 699                                    | key->u.k_offset_v1.k_uniqueness) > 0))))
 700                     break;
 701                key++;
 702           }
 703
 704
 705           DEBUG_F( "  depth=%d, i=%d/%d\n", depth, i, nr_item );
 706
 707           INFO->next_key_nr[depth] = ( i == nr_item ) ? 0 : i + 1;
 708           cache = read_tree_node( dc_block_number(&(DC(cache)[i])), --depth );
 709           if ( !cache )
 710                return 0;
 711      }
 712
 713      /* cache == LEAF */
 714      nr_item = blkh_nr_item(BLOCKHEAD(LEAF));
 715      ih = ITEMHEAD;
 716      DEBUG_F( "nr_item = %d\n", nr_item );
 717      for ( i = 0; i < nr_item; i++ )
 718      {
 719           if ( ih->ih_key.k_dir_id == dir_id
 720                && ih->ih_key.k_objectid == objectid
 721                && ih->ih_key.u.k_offset_v1.k_offset == 0
 722                && ih->ih_key.u.k_offset_v1.k_uniqueness == 0 )
 723           {
 724
 725                DEBUG_F( "  depth=%d, i=%d/%d\n", depth, i, nr_item );
 726
 727                INFO->current_ih = ih;
 728                INFO->current_item = &LEAF[ih_location(ih)];
 729
 730                return 1;
 731           }
 732
 733           ih++;
 734      }
 735
 736      DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
 737      errnum = FILE_ERR_BAD_FSYS;
 738      return 0;
 739 }
 740
 741 static int
 742 reiserfs_read_data( char *buf, __u32 len )
 743 {
 744      __u32 blocksize;
 745      __u32 offset;
 746      __u32 to_read;
 747      char *prev_buf = buf;
 748      errnum = 0;
 749
 750      DEBUG_F( "reiserfs_read_data: INFO->file->pos=%Lu len=%u, offset=%Lu\n",
 751               INFO->file->pos, len, (__u64) IH_KEY_OFFSET(INFO->current_ih) - 1 );
 752
 753
 754      if ( INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
 755           || IH_KEY_OFFSET( INFO->current_ih ) > INFO->file->pos + 1 )
 756      {
 757           search_stat( INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid );
 758           goto get_next_key;
 759      }
 760
 761      while ( errnum == 0 )
 762      {
 763           if ( INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid )
 764                break;
 765
 766           offset = INFO->file->pos - IH_KEY_OFFSET( INFO->current_ih ) + 1;
 767           blocksize = ih_item_len(INFO->current_ih);
 768
 769
 770           DEBUG_F( "  loop: INFO->file->pos=%Lu len=%u, offset=%u blocksize=%u\n",
 771                    INFO->file->pos, len, offset, blocksize );
 772
 773
 774           if ( IH_KEY_ISTYPE( INFO->current_ih, TYPE_DIRECT )
 775                && offset < blocksize )
 776           {
 777                to_read = blocksize - offset;
 778                if ( to_read > len )
 779                     to_read = len;
 780
 781                memcpy( buf, INFO->current_item + offset, to_read );
 782                goto update_buf_len;
 783           }
 784           else if ( IH_KEY_ISTYPE( INFO->current_ih, TYPE_INDIRECT ) )
 785           {
 786                blocksize = ( blocksize >> 2 ) << INFO->blocksize_shift;
 787
 788                while ( offset < blocksize )
 789                {
 790                     __u32 blocknr = le32_to_cpu(((__u32 *)
 791                                                  INFO->current_item)[offset >> INFO->blocksize_shift]);
 792
 793                     int blk_offset = offset & (INFO->blocksize - 1);
 794
 795                     to_read = INFO->blocksize - blk_offset;
 796                     if ( to_read > len )
 797                          to_read = len;
 798
 799                     /* Journal is only for meta data.
 800                        Data blocks can be read directly without using block_read */
 801                     read_disk_block( INFO->file, blocknr, blk_offset, to_read,
 802                                      buf );
 803
 804                update_buf_len:
 805                     len -= to_read;
 806                     buf += to_read;
 807                     offset += to_read;
 808                     INFO->file->pos += to_read;
 809                     if ( len == 0 )
 810                          goto done;
 811                }
 812           }
 813      get_next_key:
 814           next_key();
 815      }
 816 done:
 817      return (errnum != 0) ? 0 : buf - prev_buf;
 818 }
 819
 820
 821 /* preconditions: reiserfs_read_super already executed, therefore
 822  *   INFO block is valid
 823  * returns: 0 if error, nonzero iff we were able to find the file successfully
 824  * postconditions: on a nonzero return, INFO->fileinfo contains the info
 825  *   of the file we were trying to look up, filepos is 0 and filemax is
 826  *   the size of the file.
 827  */
 828 static int
 829 reiserfs_open_file( char *dirname )
 830 {
 831      struct reiserfs_de_head *de_head;
 832      char *rest, ch;
 833      __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
 834
 835      char linkbuf[PATH_MAX];    /* buffer for following symbolic links */
 836      int link_count = 0;
 837      int mode;
 838      errnum = 0;
 839
 840      dir_id = cpu_to_le32(REISERFS_ROOT_PARENT_OBJECTID);
 841      objectid = cpu_to_le32(REISERFS_ROOT_OBJECTID);
 842
 843      while ( 1 )
 844      {
 845
 846           DEBUG_F( "dirname=%s\n", dirname );
 847
 848           /* Search for the stat info first. */
 849           if ( !search_stat( dir_id, objectid ) )
 850                return 0;
 851
 852
 853           DEBUG_F( "sd_mode=0%o sd_size=%Lu\n",
 854                    sd_mode((struct stat_data *) INFO->current_item ),
 855                    sd_size(INFO->current_ih, INFO->current_item ));
 856
 857
 858           mode = sd_mode((struct stat_data *)INFO->current_item);
 859
 860           /* If we've got a symbolic link, then chase it. */
 861           if ( S_ISLNK( mode ) )
 862           {
 863                int len = 0;
 864
 865                DEBUG_F("link count = %d\n", link_count);
 866                DEBUG_SLEEP;
 867                if ( ++link_count > MAX_LINK_COUNT )
 868                {
 869                     DEBUG_F("Symlink loop\n");
 870                     errnum = FILE_ERR_SYMLINK_LOOP;
 871                     return 0;
 872                }
 873
 874                /* Get the symlink size. */
 875                INFO->file->len = sd_size(INFO->current_ih, INFO->current_item);
 876
 877                /* Find out how long our remaining name is. */
 878                while ( dirname[len] && !isspace( dirname[len] ) )
 879                     len++;
 880
 881                if ( INFO->file->len + len > sizeof ( linkbuf ) - 1 )
 882                {
 883                     errnum = FILE_ERR_LENGTH;
 884                     return 0;
 885                }
 886
 887                /* Copy the remaining name to the end of the symlink data. Note *
 888                 * that DIRNAME and LINKBUF may overlap! */
 889                memmove( linkbuf + INFO->file->len, dirname, len + 1 );
 890
 891                INFO->fileinfo.k_dir_id = dir_id;
 892                INFO->fileinfo.k_objectid = objectid;
 893                INFO->file->pos = 0;
 894                if ( !next_key()
 895                     || reiserfs_read_data( linkbuf, INFO->file->len ) != INFO->file->len ) {
 896                     DEBUG_F("reiserfs_open_file - if !next_key || reiserfs_read_data\n");
 897                     DEBUG_SLEEP;
 898                     errnum = FILE_IOERR;
 899                     return 0;
 900                }
 901
 902
 903                DEBUG_F( "symlink=%s\n", linkbuf );
 904                DEBUG_SLEEP;
 905
 906                dirname = linkbuf;
 907                if ( *dirname == '/' )
 908                {
 909                     /* It's an absolute link, so look it up in root. */
 910                     dir_id = cpu_to_le32(REISERFS_ROOT_PARENT_OBJECTID);
 911                     objectid = cpu_to_le32(REISERFS_ROOT_OBJECTID);
 912                }
 913                else
 914                {
 915                     /* Relative, so look it up in our parent directory. */
 916                     dir_id = parent_dir_id;
 917                     objectid = parent_objectid;
 918                }
 919
 920                /* Now lookup the new name. */
 921                continue;
 922           }
 923
 924           /* if we have a real file (and we're not just printing *
 925            * possibilities), then this is where we want to exit */
 926
 927           if ( !*dirname || isspace( *dirname ) )
 928           {
 929                if ( !S_ISREG( mode ) )
 930                {
 931                     errnum = FILE_ERR_BAD_TYPE;
 932                     return 0;
 933                }
 934
 935                INFO->file->pos = 0;
 936                INFO->file->len = sd_size(INFO->current_ih, INFO->current_item);
 937
 938                INFO->fileinfo.k_dir_id = dir_id;
 939                INFO->fileinfo.k_objectid = objectid;
 940                return next_key();
 941           }
 942
 943           /* continue with the file/directory name interpretation */
 944           while ( *dirname == '/' )
 945                dirname++;
 946           if ( !S_ISDIR( mode ) )
 947           {
 948                errnum = FILE_ERR_NOTDIR;
 949                return 0;
 950           }
 951           for ( rest = dirname; ( ch = *rest ) && !isspace( ch ) && ch != '/';
 952                 rest++ ) ;
 953           *rest = 0;
 954
 955           while ( 1 )
 956           {
 957                char *name_end;
 958                int num_entries;
 959
 960                if ( !next_key() )
 961                     return 0;
 962
 963                if ( INFO->current_ih->ih_key.k_objectid != objectid )
 964                     break;
 965
 966                name_end = INFO->current_item + ih_item_len(INFO->current_ih);
 967                de_head = ( struct reiserfs_de_head * ) INFO->current_item;
 968                num_entries = ih_entry_count(INFO->current_ih);
 969                while ( num_entries > 0 )
 970                {
 971                     char *filename = INFO->current_item + deh_location(de_head);
 972                     char tmp = *name_end;
 973
 974                     if( deh_state(de_head) & (1 << DEH_Visible))
 975                     {
 976                          int cmp;
 977
 978                          /* Directory names in ReiserFS are not null * terminated.
 979                           * We write a temporary 0 behind it. * NOTE: that this
 980                           * may overwrite the first block in * the tree cache.
 981                           * That doesn't hurt as long as we * don't call next_key
 982                           * () in between. */
 983                          *name_end = 0;
 984                          cmp = strcmp( dirname, filename );
 985                          *name_end = tmp;
 986                          if ( cmp == 0 )
 987                               goto found;
 988                     }
 989                     /* The beginning of this name marks the end of the next name.
 990                      */
 991                     name_end = filename;
 992                     de_head++;
 993                     num_entries--;
 994                }
 995           }
 996
 997           errnum = FILE_ERR_NOTFOUND;
 998           *rest = ch;
 999           return 0;
1000
1001      found:
1002           *rest = ch;
1003           dirname = rest;
1004
1005           parent_dir_id = dir_id;
1006           parent_objectid = objectid;
1007           dir_id = de_head->deh_dir_id; /* LE */
1008           objectid = de_head->deh_objectid; /* LE */
1009      }
1010 }
1011
1012
1013
1014 #ifndef __LITTLE_ENDIAN
1015 typedef union {
1016      struct offset_v2 offset_v2;
1017      __u64 linear;
1018 } offset_v2_esafe_overlay;
1019
1020 inline __u16
1021 offset_v2_k_type( struct offset_v2 *v2 )
1022 {
1023      offset_v2_esafe_overlay tmp = *(offset_v2_esafe_overlay *)v2;
1024      tmp.linear = le64_to_cpu( tmp.linear );
1025      return tmp.offset_v2.k_type;
1026 }
1027
1028 inline loff_t
1029 offset_v2_k_offset( struct offset_v2 *v2 )
1030 {
1031      offset_v2_esafe_overlay tmp = *(offset_v2_esafe_overlay *)v2;
1032      tmp.linear = le64_to_cpu( tmp.linear );
1033      return tmp.offset_v2.k_offset;
1034 }
1035 #endif
1036
1037 inline int
1038 uniqueness2type (__u32 uniqueness)
1039 {
1040      switch (uniqueness) {
1041      case V1_SD_UNIQUENESS: return TYPE_STAT_DATA;
1042      case V1_INDIRECT_UNIQUENESS: return TYPE_INDIRECT;
1043      case V1_DIRECT_UNIQUENESS: return TYPE_DIRECT;
1044      case V1_DIRENTRY_UNIQUENESS: return TYPE_DIRENTRY;
1045      }
1046      return TYPE_ANY;
1047 }
1048
1049 /*
1050  * Local variables:
1051  * c-file-style: "k&r"
1052  * c-basic-offset: 5
1053  * End:
1054  */