]> git.ozlabs.org Git - yaboot.git/blob - second/fs_reiserfs.c
Allocate tftp temporary buffer from top of address space
[yaboot.git] / second / fs_reiserfs.c
1 /*
2  *  fs_reiserfs.c - an implementation for the Reiser filesystem
3  *
4  *  Copyright (C) 2001 Jeffrey Mahoney (jeffm@suse.com)
5  *
6  *  Adapted from Grub
7  *
8  *  Copyright (C) 2000, 2001 Free Software Foundation, Inc.
9  *
10  *  This program is free software; you can redistribute it and/or modify
11  *  it under the terms of the GNU General Public License as published by
12  *  the Free Software Foundation; either version 2 of the License, or
13  *  (at your option) any later version.
14  *
15  *  This program is distributed in the hope that it will be useful,
16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  *  GNU General Public License for more details.
19  *
20  *  You should have received a copy of the GNU General Public License
21  *  along with this program; if not, write to the Free Software
22  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
23  */
24
25 #include "types.h"
26 #include "ctype.h"
27 #include "string.h"
28 #include "stdlib.h"
29 #include "fs.h"
30 #include "errors.h"
31 #include "debug.h"
32 #include "bootinfo.h"
33 #include "reiserfs/reiserfs.h"
34
35 /* Exported in struct fs_t */
36 static int reiserfs_open( struct boot_file_t *file, struct partition_t *part,
37                           struct boot_fspec_t *fspec);
38 static int reiserfs_read( struct boot_file_t *file, unsigned int size,
39
40                           void *buffer );
41 static int reiserfs_seek( struct boot_file_t *file, unsigned int newpos );
42 static int reiserfs_close( struct boot_file_t *file );
43
44 struct fs_t reiserfs_filesystem = {
45      name:"reiserfs",
46      open:reiserfs_open,
47      read:reiserfs_read,
48      seek:reiserfs_seek,
49      close:reiserfs_close
50 };
51
52 static int reiserfs_read_super( void );
53 static int reiserfs_open_file( char *dirname );
54 static int reiserfs_read_data( char *buf, __u32 len );
55
56
57 static struct reiserfs_state reiserfs;
58 static struct reiserfs_state *INFO = &reiserfs;
59
60 /* Adapted from GRUB: */
61 static char FSYS_BUF[FSYSREISER_CACHE_SIZE];
62 int errnum;
63
64
65 static int
66 reiserfs_open( struct boot_file_t *file, struct partition_t *part,
67                 struct boot_fspec_t *fspec)
68 {
69      static char buffer[1024];
70      char *dev_name = fspec->dev;
71      char *file_name = fspec->file;
72
73      DEBUG_ENTER;
74      DEBUG_OPEN;
75
76      memset( INFO, 0, sizeof(struct reiserfs_state) );
77      INFO->file = file;
78
79      if (fspec->part)
80      {
81           DEBUG_F( "Determining offset for partition %d\n", part->part_number );
82           INFO->partition_offset = ((uint64_t)part->part_start) * part->blocksize;
83           DEBUG_F( "%Lu = %lu * %hu\n", INFO->partition_offset,
84                    part->part_start,
85                    part->blocksize );
86      }
87      else
88           INFO->partition_offset = 0;
89
90      strncpy(buffer, dev_name, 1020);
91      if (_machine != _MACH_bplan)
92           strcat(buffer, ":0");  /* 0 is full disk in (non-buggy) OF */
93
94      file->of_device = prom_open( buffer );
95      DEBUG_F( "Trying to open dev_name=%s; filename=%s; partition offset=%Lu\n",
96               buffer, file_name, INFO->partition_offset );
97
98      if ( file->of_device == PROM_INVALID_HANDLE || file->of_device == NULL )
99      {
100           DEBUG_F( "Can't open device %p\n", file->of_device );
101           DEBUG_LEAVE(FILE_ERR_BADDEV);
102           return FILE_ERR_BADDEV;
103      }
104
105      DEBUG_F("%p was successfully opened\n", file->of_device);
106
107      if ( reiserfs_read_super() != 1 )
108      {
109           DEBUG_F( "Couldn't open ReiserFS @ %s/%Lu\n", buffer, INFO->partition_offset );
110           prom_close( file->of_device );
111           DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
112           return FILE_ERR_BAD_FSYS;
113      }
114
115      DEBUG_F( "Attempting to open %s\n", file_name );
116      strcpy(buffer, file_name); /* reiserfs_open_file modifies argument */
117      if (reiserfs_open_file(buffer) == 0)
118      {
119           DEBUG_F( "reiserfs_open_file failed. errnum = %d\n", errnum );
120           prom_close( file->of_device );
121           DEBUG_LEAVE_F(errnum);
122           return errnum;
123      }
124
125      DEBUG_F( "Successfully opened %s\n", file_name );
126
127      DEBUG_LEAVE(FILE_ERR_OK);
128      DEBUG_SLEEP;
129      return FILE_ERR_OK;
130 }
131
132 static int
133 reiserfs_read( struct boot_file_t *file, unsigned int size, void *buffer )
134 {
135      return reiserfs_read_data( buffer, size );
136 }
137
138 static int
139 reiserfs_seek( struct boot_file_t *file, unsigned int newpos )
140 {
141      file->pos = newpos;
142      return FILE_ERR_OK;
143 }
144
145 static int
146 reiserfs_close( struct boot_file_t *file )
147 {
148      if( file->of_device )
149      {
150           prom_close(file->of_device);
151           file->of_device = 0;
152           DEBUG_F("reiserfs_close called\n");
153      }
154      return FILE_ERR_OK;
155 }
156
157
158 static __inline__ __u32
159 log2( __u32 word )
160 {
161      int i = 0;
162      while( word && (word & (1 << ++i)) == 0 );
163      return i;
164 }
165
166 static __inline__ int
167 is_power_of_two( unsigned long word )
168 {
169      return ( word & -word ) == word;
170 }
171
172 static int
173 read_disk_block( struct boot_file_t *file, __u32 block, __u32 start,
174                  __u32 length, void *buf )
175 {
176      __u16 fs_blocksize = INFO->blocksize == 0 ? REISERFS_OLD_BLOCKSIZE
177           : INFO->blocksize;
178      unsigned long long pos = (unsigned long long)block * (unsigned long long)fs_blocksize;
179      pos += (unsigned long long)INFO->partition_offset + (unsigned long long)start;
180      DEBUG_F( "Reading %u bytes, starting at block %u, disk offset %Lu\n",
181               length, block, pos );
182      if (!prom_lseek( file->of_device, pos )) {
183           DEBUG_F("prom_lseek failed\n");
184           return 0;
185      }
186      return prom_read( file->of_device, buf, length );
187 }
188
189
190 static int
191 journal_read( __u32 block, __u32 len, char *buffer )
192 {
193      return read_disk_block( INFO->file,
194                              (INFO->journal_block + block), 0,
195                              len, buffer );
196 }
197
198 /* Read a block from ReiserFS file system, taking the journal into
199  * account.  If the block nr is in the journal, the block from the
200  * journal taken.
201  */
202 static int
203 block_read( __u32 blockNr, __u32 start, __u32 len, char *buffer )
204 {
205      __u32 transactions = INFO->journal_transactions;
206      __u32 desc_block = INFO->journal_first_desc;
207      __u32 journal_mask = INFO->journal_block_count - 1;
208      __u32 translatedNr = blockNr;
209      __u32 *journal_table = JOURNAL_START;
210
211 //    DEBUG_F( "block_read( %u, %u, %u, ..)\n", blockNr, start, len );
212
213      while ( transactions-- > 0 )
214      {
215           int i = 0;
216           int j_len = 0;
217
218           if ( *journal_table != 0xffffffff )
219           {
220                /* Search for the blockNr in cached journal */
221                j_len = le32_to_cpu(*journal_table++);
222                while ( i++ < j_len )
223                {
224                     if ( le32_to_cpu(*journal_table++) == blockNr )
225                     {
226                          journal_table += j_len - i;
227                          goto found;
228                     }
229                }
230           }
231           else
232           {
233                /* This is the end of cached journal marker.  The remaining
234                 * transactions are still on disk. */
235                struct reiserfs_journal_desc desc;
236                struct reiserfs_journal_commit commit;
237
238                if ( !journal_read( desc_block, sizeof(desc), (char *) &desc ) )
239                     return 0;
240
241                j_len = le32_to_cpu(desc.j_len);
242                while ( i < j_len && i < JOURNAL_TRANS_HALF )
243                     if ( le32_to_cpu(desc.j_realblock[i++]) == blockNr )
244                          goto found;
245
246                if ( j_len >= JOURNAL_TRANS_HALF )
247                {
248                     int commit_block = ( desc_block + 1 + j_len ) & journal_mask;
249
250                     if ( !journal_read( commit_block,
251                                         sizeof(commit), (char *) &commit ) )
252                          return 0;
253
254                     while ( i < j_len )
255                          if ( le32_to_cpu(commit.j_realblock[i++ - JOURNAL_TRANS_HALF]) == blockNr )
256                               goto found;
257                }
258           }
259           goto not_found;
260
261      found:
262           translatedNr =
263                INFO->journal_block + ( ( desc_block + i ) & journal_mask );
264
265           DEBUG_F( "block_read: block %u is mapped to journal block %u.\n",
266                    blockNr, translatedNr - INFO->journal_block );
267
268           /* We must continue the search, as this block may be overwritten in
269            * later transactions. */
270      not_found:
271           desc_block = (desc_block + 2 + j_len) & journal_mask;
272      }
273
274      return read_disk_block( INFO->file, translatedNr, start, len, buffer );
275 }
276
277 /* Init the journal data structure.  We try to cache as much as
278  * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
279  * we can still read the rest from the disk on demand.
280  *
281  * The first number of valid transactions and the descriptor block of the
282  * first valid transaction are held in INFO.  The transactions are all
283  * adjacent, but we must take care of the journal wrap around.
284  */
285 static int
286 journal_init( void )
287 {
288      struct reiserfs_journal_header header;
289      struct reiserfs_journal_desc desc;
290      struct reiserfs_journal_commit commit;
291      __u32 block_count = INFO->journal_block_count;
292      __u32 desc_block;
293      __u32 commit_block;
294      __u32 next_trans_id;
295      __u32 *journal_table = JOURNAL_START;
296
297      journal_read( block_count, sizeof ( header ), ( char * ) &header );
298      desc_block = le32_to_cpu(header.j_first_unflushed_offset);
299      if ( desc_block >= block_count )
300           return 0;
301
302      INFO->journal_transactions = 0;
303      INFO->journal_first_desc = desc_block;
304      next_trans_id = le32_to_cpu(header.j_last_flush_trans_id) + 1;
305
306      DEBUG_F( "journal_init: last flushed %u\n", le32_to_cpu(header.j_last_flush_trans_id) );
307
308      while ( 1 )
309      {
310           journal_read( desc_block, sizeof(desc), (char *) &desc );
311           if ( strcmp( JOURNAL_DESC_MAGIC, desc.j_magic ) != 0
312                || desc.j_trans_id != next_trans_id
313                || desc.j_mount_id != header.j_mount_id )
314                /* no more valid transactions */
315                break;
316
317           commit_block = ( desc_block + le32_to_cpu(desc.j_len) + 1 ) & ( block_count - 1 );
318           journal_read( commit_block, sizeof(commit), (char *) &commit );
319           if ( desc.j_trans_id != commit.j_trans_id
320                || desc.j_len != commit.j_len )
321                /* no more valid transactions */
322                break;
323
324
325           DEBUG_F( "Found valid transaction %u/%u at %u.\n",
326                    le32_to_cpu(desc.j_trans_id), le32_to_cpu(desc.j_mount_id),
327                    desc_block );
328
329
330           next_trans_id++;
331           if ( journal_table < JOURNAL_END )
332           {
333                if ( ( journal_table + 1 + le32_to_cpu(desc.j_len) ) >= JOURNAL_END )
334                {
335                     /* The table is almost full; mark the end of the cached * *
336                      * journal. */
337                     *journal_table = 0xffffffff;
338                     journal_table = JOURNAL_END;
339                }
340                else
341                {
342                     int i;
343
344                     /* Cache the length and the realblock numbers in the table. *
345                      * The block number of descriptor can easily be computed. *
346                      * and need not to be stored here. */
347                     *journal_table++ = desc.j_len;
348                     for ( i = 0; i < le32_to_cpu(desc.j_len) && i < JOURNAL_TRANS_HALF; i++ )
349                     {
350                          *journal_table++ = desc.j_realblock[i];
351
352                          DEBUG_F( "block %u is in journal %u.\n",
353                                   le32_to_cpu(desc.j_realblock[i]), desc_block );
354
355                     }
356                     for ( ; i < le32_to_cpu(desc.j_len); i++ )
357                     {
358                          *journal_table++ =
359                               commit.j_realblock[i - JOURNAL_TRANS_HALF];
360
361                          DEBUG_F( "block %u is in journal %u.\n",
362                                   le32_to_cpu(commit.j_realblock[i - JOURNAL_TRANS_HALF]),
363                                   desc_block );
364
365                     }
366                }
367           }
368           desc_block = (commit_block + 1) & (block_count - 1);
369      }
370
371      DEBUG_F( "Transaction %u/%u at %u isn't valid.\n",
372               le32_to_cpu(desc.j_trans_id), le32_to_cpu(desc.j_mount_id),
373               desc_block );
374
375
376      INFO->journal_transactions
377           = next_trans_id - le32_to_cpu(header.j_last_flush_trans_id) - 1;
378      return (errnum == 0);
379 }
380
381 /* check filesystem types and read superblock into memory buffer */
382 static int
383 reiserfs_read_super( void )
384 {
385      struct reiserfs_super_block super;
386      __u64 superblock = REISERFS_SUPERBLOCK_BLOCK;
387
388      if (read_disk_block(INFO->file, superblock, 0, sizeof(super), &super) != sizeof(super)) {
389           DEBUG_F("read_disk_block failed!\n");
390           return 0;
391      }
392
393      DEBUG_F( "Found super->magic: \"%s\"\n", super.s_magic );
394
395      if( strcmp( REISER2FS_SUPER_MAGIC_STRING, super.s_magic ) != 0 &&
396          strcmp( REISERFS_SUPER_MAGIC_STRING, super.s_magic ) != 0 )
397      {
398           /* Try old super block position */
399           superblock = REISERFS_OLD_SUPERBLOCK_BLOCK;
400
401           if (read_disk_block( INFO->file, superblock, 0, sizeof (super),  &super ) != sizeof(super)) {
402                DEBUG_F("read_disk_block failed!\n");
403                return 0;
404           }
405
406           if ( strcmp( REISER2FS_SUPER_MAGIC_STRING, super.s_magic ) != 0 &&
407                strcmp( REISERFS_SUPER_MAGIC_STRING, super.s_magic ) != 0 )
408           {
409                /* pre journaling super block - untested */
410                if ( strcmp( REISERFS_SUPER_MAGIC_STRING,
411                             (char *) ((__u32) &super + 20 ) ) != 0 )
412                     return 0;
413
414                super.s_blocksize = cpu_to_le16(REISERFS_OLD_BLOCKSIZE);
415                super.s_journal_block = 0;
416                super.s_version = 0;
417           }
418      }
419
420      DEBUG_F( "ReiserFS superblock data:\n" );
421      DEBUG_F( "Block count: %u\n", le32_to_cpu(super.s_block_count) )
422           DEBUG_F( "Free blocks: %u\n", le32_to_cpu(super.s_free_blocks) );
423      DEBUG_F( "Journal block: %u\n", le32_to_cpu(super.s_journal_block) );
424      DEBUG_F( "Journal size (in blocks): %u\n",
425               le32_to_cpu(super.s_orig_journal_size) );
426      DEBUG_F( "Root block: %u\n\n", le32_to_cpu(super.s_root_block) );
427
428
429      INFO->version = le16_to_cpu(super.s_version);
430      INFO->blocksize = le16_to_cpu(super.s_blocksize);
431      INFO->blocksize_shift = log2( INFO->blocksize );
432
433      INFO->journal_block = le32_to_cpu(super.s_journal_block);
434      INFO->journal_block_count = le32_to_cpu(super.s_orig_journal_size);
435
436      INFO->cached_slots = (FSYSREISER_CACHE_SIZE >> INFO->blocksize_shift) - 1;
437
438      /* At this point, we've found a valid superblock. If we run into problems
439       * mounting the FS, the user should probably know. */
440
441      /* A few sanity checks ... */
442      if ( INFO->version > REISERFS_MAX_SUPPORTED_VERSION )
443      {
444           prom_printf( "ReiserFS: Unsupported version field: %u\n",
445                        INFO->version );
446           return 0;
447      }
448
449      if ( INFO->blocksize < FSYSREISER_MIN_BLOCKSIZE
450           || INFO->blocksize > FSYSREISER_MAX_BLOCKSIZE )
451      {
452           prom_printf( "ReiserFS: Unsupported block size: %u\n",
453                        INFO->blocksize );
454           return 0;
455      }
456
457      /* Setup the journal.. */
458      if ( INFO->journal_block != 0 )
459      {
460           if ( !is_power_of_two( INFO->journal_block_count ) )
461           {
462                prom_printf( "ReiserFS: Unsupported journal size, "
463                             "not a power of 2: %u\n",
464                             INFO->journal_block_count );
465                return 0;
466           }
467
468           journal_init();
469           /* Read in super block again, maybe it is in the journal */
470           block_read( superblock, 0, sizeof (struct reiserfs_super_block),
471                       (char *) &super );
472      }
473
474      /* Read in the root block */
475      if ( !block_read( le32_to_cpu(super.s_root_block), 0,
476                        INFO->blocksize, ROOT ) )
477      {
478           prom_printf( "ReiserFS: Failed to read in root block\n" );
479           return 0;
480      }
481
482      /* The root node is always the "deepest", so we can
483         determine the hieght of the tree using it. */
484      INFO->tree_depth = blkh_level(BLOCKHEAD(ROOT));
485
486
487      DEBUG_F( "root read_in: block=%u, depth=%u\n",
488               le32_to_cpu(super.s_root_block), INFO->tree_depth );
489
490      if ( INFO->tree_depth >= REISERFS_MAX_TREE_HEIGHT )
491      {
492           prom_printf( "ReiserFS: Unsupported tree depth (too deep): %u\n",
493                        INFO->tree_depth );
494           return 0;
495      }
496
497      if ( INFO->tree_depth == BLKH_LEVEL_LEAF )
498      {
499           /* There is only one node in the whole filesystem, which is
500              simultanously leaf and root */
501           memcpy( LEAF, ROOT, INFO->blocksize );
502      }
503      return 1;
504 }
505
506 /***************** TREE ACCESSING METHODS *****************************/
507
508 /* I assume you are familiar with the ReiserFS tree, if not go to
509  * http://devlinux.com/projects/reiserfs/
510  *
511  * My tree node cache is organized as following
512  *   0   ROOT node
513  *   1   LEAF node  (if the ROOT is also a LEAF it is copied here
514  *   2-n other nodes on current path from bottom to top.
515  *       if there is not enough space in the cache, the top most are
516  *       omitted.
517  *
518  * I have only two methods to find a key in the tree:
519  *   search_stat(dir_id, objectid) searches for the stat entry (always
520  *       the first entry) of an object.
521  *   next_key() gets the next key in tree order.
522  *
523  * This means, that I can only sequential reads of files are
524  * efficient, but this really doesn't hurt for grub.
525  */
526
527 /* Read in the node at the current path and depth into the node cache.
528  * You must set INFO->blocks[depth] before.
529  */
530 static char *
531 read_tree_node( __u32 blockNr, __u16 depth )
532 {
533      char *cache = CACHE(depth);
534      int num_cached = INFO->cached_slots;
535      errnum = 0;
536
537      if ( depth < num_cached )
538      {
539           /* This is the cached part of the path.
540              Check if same block is needed. */
541           if ( blockNr == INFO->blocks[depth] )
542                return cache;
543      }
544      else
545           cache = CACHE(num_cached);
546
547      DEBUG_F( "  next read_in: block=%u (depth=%u)\n", blockNr, depth );
548
549      if ( !block_read( blockNr, 0, INFO->blocksize, cache ) )
550      {
551           DEBUG_F( "block_read failed\n" );
552           return 0;
553      }
554
555      DEBUG_F( "FOUND: blk_level=%u, blk_nr_item=%u, blk_free_space=%u\n",
556               blkh_level(BLOCKHEAD(cache)),
557               blkh_nr_item(BLOCKHEAD(cache)),
558               le16_to_cpu(BLOCKHEAD(cache)->blk_free_space) );
559
560      /* Make sure it has the right node level */
561      if ( blkh_level(BLOCKHEAD(cache)) != depth )
562      {
563           DEBUG_F( "depth = %u != %u\n", blkh_level(BLOCKHEAD(cache)), depth );
564           DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
565           errnum = FILE_ERR_BAD_FSYS;
566           return 0;
567      }
568
569      INFO->blocks[depth] = blockNr;
570      return cache;
571 }
572
573 /* Get the next key, i.e. the key following the last retrieved key in
574  * tree order.  INFO->current_ih and
575  * INFO->current_info are adapted accordingly.  */
576 static int
577 next_key( void )
578 {
579      __u16 depth;
580      struct item_head *ih = INFO->current_ih + 1;
581      char *cache;
582
583
584      DEBUG_F( "next_key:\n  old ih: key %u:%u:%u:%u version:%u\n",
585               le32_to_cpu(INFO->current_ih->ih_key.k_dir_id),
586               le32_to_cpu(INFO->current_ih->ih_key.k_objectid),
587               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_offset),
588               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_uniqueness),
589               ih_version(INFO->current_ih) );
590
591
592      if ( ih == &ITEMHEAD[blkh_nr_item(BLOCKHEAD( LEAF ))] )
593      {
594           depth = BLKH_LEVEL_LEAF;
595           /* The last item, was the last in the leaf node. * Read in the next
596            * * block */
597           do
598           {
599                if ( depth == INFO->tree_depth )
600                {
601                     /* There are no more keys at all. * Return a dummy item with
602                      * * MAX_KEY */
603                     ih =
604                          ( struct item_head * )
605                          &BLOCKHEAD( LEAF )->blk_right_delim_key;
606                     goto found;
607                }
608                depth++;
609
610                DEBUG_F( "  depth=%u, i=%u\n", depth, INFO->next_key_nr[depth] );
611
612           }
613           while ( INFO->next_key_nr[depth] == 0 );
614
615           if ( depth == INFO->tree_depth )
616                cache = ROOT;
617           else if ( depth <= INFO->cached_slots )
618                cache = CACHE( depth );
619           else
620           {
621                cache = read_tree_node( INFO->blocks[depth], --depth );
622                if ( !cache )
623                     return 0;
624           }
625
626           do
627           {
628                __u16 nr_item = blkh_nr_item(BLOCKHEAD( cache ));
629                int key_nr = INFO->next_key_nr[depth]++;
630
631
632                DEBUG_F( "  depth=%u, i=%u/%u\n", depth, key_nr, nr_item );
633
634                if ( key_nr == nr_item )
635                     /* This is the last item in this block, set the next_key_nr *
636                      * to 0 */
637                     INFO->next_key_nr[depth] = 0;
638
639                cache =
640                     read_tree_node( dc_block_number( &(DC( cache )[key_nr])),
641                                     --depth );
642                if ( !cache )
643                     return 0;
644           }
645           while ( depth > BLKH_LEVEL_LEAF );
646
647           ih = ITEMHEAD;
648      }
649 found:
650      INFO->current_ih = ih;
651      INFO->current_item = &LEAF[ih_location(ih)];
652
653      DEBUG_F( "  new ih: key %u:%u:%u:%u version:%u\n",
654               le32_to_cpu(INFO->current_ih->ih_key.k_dir_id),
655               le32_to_cpu(INFO->current_ih->ih_key.k_objectid),
656               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_offset),
657               le32_to_cpu(INFO->current_ih->ih_key.u.k_offset_v1.k_uniqueness),
658               ih_version(INFO->current_ih) );
659
660      return 1;
661 }
662
663 /* preconditions: reiserfs_read_super already executed, therefore
664  *   INFO block is valid
665  * returns: 0 if error (errnum is set),
666  *   nonzero iff we were able to find the key successfully.
667  * postconditions: on a nonzero return, the current_ih and
668  *   current_item fields describe the key that equals the
669  *   searched key.  INFO->next_key contains the next key after
670  *   the searched key.
671  * side effects: messes around with the cache.
672  */
673 static int
674 search_stat( __u32 dir_id, __u32 objectid )
675 {
676      char *cache;
677      int depth;
678      int nr_item;
679      int i;
680      struct item_head *ih;
681      errnum = 0;
682
683      DEBUG_F( "search_stat:\n  key %u:%u:0:0\n", le32_to_cpu(dir_id),
684               le32_to_cpu(objectid) );
685
686
687      depth = INFO->tree_depth;
688      cache = ROOT;
689
690      DEBUG_F( "depth = %d\n", depth );
691      while ( depth > BLKH_LEVEL_LEAF )
692      {
693           struct key *key;
694
695           nr_item = blkh_nr_item(BLOCKHEAD( cache ));
696
697           key = KEY( cache );
698
699           for ( i = 0; i < nr_item; i++ )
700           {
701                if (le32_to_cpu(key->k_dir_id) > le32_to_cpu(dir_id)
702                    || (key->k_dir_id == dir_id
703                        && (le32_to_cpu(key->k_objectid) > le32_to_cpu(objectid)
704                            || (key->k_objectid == objectid
705                                && (key->u.k_offset_v1.k_offset
706                                    | key->u.k_offset_v1.k_uniqueness) > 0))))
707                     break;
708                key++;
709           }
710
711
712           DEBUG_F( "  depth=%d, i=%d/%d\n", depth, i, nr_item );
713
714           INFO->next_key_nr[depth] = ( i == nr_item ) ? 0 : i + 1;
715           cache = read_tree_node( dc_block_number(&(DC(cache)[i])), --depth );
716           if ( !cache )
717                return 0;
718      }
719
720      /* cache == LEAF */
721      nr_item = blkh_nr_item(BLOCKHEAD(LEAF));
722      ih = ITEMHEAD;
723      DEBUG_F( "nr_item = %d\n", nr_item );
724      for ( i = 0; i < nr_item; i++ )
725      {
726           if ( ih->ih_key.k_dir_id == dir_id
727                && ih->ih_key.k_objectid == objectid
728                && ih->ih_key.u.k_offset_v1.k_offset == 0
729                && ih->ih_key.u.k_offset_v1.k_uniqueness == 0 )
730           {
731
732                DEBUG_F( "  depth=%d, i=%d/%d\n", depth, i, nr_item );
733
734                INFO->current_ih = ih;
735                INFO->current_item = &LEAF[ih_location(ih)];
736
737                return 1;
738           }
739
740           ih++;
741      }
742
743      DEBUG_LEAVE(FILE_ERR_BAD_FSYS);
744      errnum = FILE_ERR_BAD_FSYS;
745      return 0;
746 }
747
748 static int
749 reiserfs_read_data( char *buf, __u32 len )
750 {
751      __u32 blocksize;
752      __u32 offset;
753      __u32 to_read;
754      char *prev_buf = buf;
755      errnum = 0;
756
757      DEBUG_F( "reiserfs_read_data: INFO->file->pos=%Lu len=%u, offset=%Lu\n",
758               INFO->file->pos, len, (__u64) IH_KEY_OFFSET(INFO->current_ih) - 1 );
759
760
761      if ( INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
762           || IH_KEY_OFFSET( INFO->current_ih ) > INFO->file->pos + 1 )
763      {
764           search_stat( INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid );
765           goto get_next_key;
766      }
767
768      while ( errnum == 0 )
769      {
770           if ( INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid )
771                break;
772
773           offset = INFO->file->pos - IH_KEY_OFFSET( INFO->current_ih ) + 1;
774           blocksize = ih_item_len(INFO->current_ih);
775
776
777           DEBUG_F( "  loop: INFO->file->pos=%Lu len=%u, offset=%u blocksize=%u\n",
778                    INFO->file->pos, len, offset, blocksize );
779
780
781           if ( IH_KEY_ISTYPE( INFO->current_ih, TYPE_DIRECT )
782                && offset < blocksize )
783           {
784                to_read = blocksize - offset;
785                if ( to_read > len )
786                     to_read = len;
787
788                memcpy( buf, INFO->current_item + offset, to_read );
789                goto update_buf_len;
790           }
791           else if ( IH_KEY_ISTYPE( INFO->current_ih, TYPE_INDIRECT ) )
792           {
793                blocksize = ( blocksize >> 2 ) << INFO->blocksize_shift;
794
795                while ( offset < blocksize )
796                {
797                     __u32 blocknr = le32_to_cpu(((__u32 *)
798                                                  INFO->current_item)[offset >> INFO->blocksize_shift]);
799
800                     int blk_offset = offset & (INFO->blocksize - 1);
801
802                     to_read = INFO->blocksize - blk_offset;
803                     if ( to_read > len )
804                          to_read = len;
805
806                     /* Journal is only for meta data.
807                        Data blocks can be read directly without using block_read */
808                     read_disk_block( INFO->file, blocknr, blk_offset, to_read,
809                                      buf );
810
811                update_buf_len:
812                     len -= to_read;
813                     buf += to_read;
814                     offset += to_read;
815                     INFO->file->pos += to_read;
816                     if ( len == 0 )
817                          goto done;
818                }
819           }
820      get_next_key:
821           next_key();
822      }
823 done:
824      return (errnum != 0) ? 0 : buf - prev_buf;
825 }
826
827
828 /* preconditions: reiserfs_read_super already executed, therefore
829  *   INFO block is valid
830  * returns: 0 if error, nonzero iff we were able to find the file successfully
831  * postconditions: on a nonzero return, INFO->fileinfo contains the info
832  *   of the file we were trying to look up, filepos is 0 and filemax is
833  *   the size of the file.
834  */
835 static int
836 reiserfs_open_file( char *dirname )
837 {
838      struct reiserfs_de_head *de_head;
839      char *rest, ch;
840      __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
841
842      char linkbuf[PATH_MAX];    /* buffer for following symbolic links */
843      int link_count = 0;
844      int mode;
845      errnum = 0;
846
847      dir_id = cpu_to_le32(REISERFS_ROOT_PARENT_OBJECTID);
848      objectid = cpu_to_le32(REISERFS_ROOT_OBJECTID);
849
850      while ( 1 )
851      {
852
853           DEBUG_F( "dirname=%s\n", dirname );
854
855           /* Search for the stat info first. */
856           if ( !search_stat( dir_id, objectid ) )
857                return 0;
858
859
860           DEBUG_F( "sd_mode=0%o sd_size=%Lu\n",
861                    sd_mode((struct stat_data *) INFO->current_item ),
862                    sd_size(INFO->current_ih, INFO->current_item ));
863
864
865           mode = sd_mode((struct stat_data *)INFO->current_item);
866
867           /* If we've got a symbolic link, then chase it. */
868           if ( S_ISLNK( mode ) )
869           {
870                int len = 0;
871
872                DEBUG_F("link count = %d\n", link_count);
873                DEBUG_SLEEP;
874                if ( ++link_count > MAX_LINK_COUNT )
875                {
876                     DEBUG_F("Symlink loop\n");
877                     errnum = FILE_ERR_SYMLINK_LOOP;
878                     return 0;
879                }
880
881                /* Get the symlink size. */
882                INFO->file->len = sd_size(INFO->current_ih, INFO->current_item);
883
884                /* Find out how long our remaining name is. */
885                while ( dirname[len] && !isspace( dirname[len] ) )
886                     len++;
887
888                if ( INFO->file->len + len > sizeof ( linkbuf ) - 1 )
889                {
890                     errnum = FILE_ERR_LENGTH;
891                     return 0;
892                }
893
894                /* Copy the remaining name to the end of the symlink data. Note *
895                 * that DIRNAME and LINKBUF may overlap! */
896                memmove( linkbuf + INFO->file->len, dirname, len + 1 );
897
898                INFO->fileinfo.k_dir_id = dir_id;
899                INFO->fileinfo.k_objectid = objectid;
900                INFO->file->pos = 0;
901                if ( !next_key()
902                     || reiserfs_read_data( linkbuf, INFO->file->len ) != INFO->file->len ) {
903                     DEBUG_F("reiserfs_open_file - if !next_key || reiserfs_read_data\n");
904                     DEBUG_SLEEP;
905                     errnum = FILE_IOERR;
906                     return 0;
907                }
908
909
910                DEBUG_F( "symlink=%s\n", linkbuf );
911                DEBUG_SLEEP;
912
913                dirname = linkbuf;
914                if ( *dirname == '/' )
915                {
916                     /* It's an absolute link, so look it up in root. */
917                     dir_id = cpu_to_le32(REISERFS_ROOT_PARENT_OBJECTID);
918                     objectid = cpu_to_le32(REISERFS_ROOT_OBJECTID);
919                }
920                else
921                {
922                     /* Relative, so look it up in our parent directory. */
923                     dir_id = parent_dir_id;
924                     objectid = parent_objectid;
925                }
926
927                /* Now lookup the new name. */
928                continue;
929           }
930
931           /* if we have a real file (and we're not just printing *
932            * possibilities), then this is where we want to exit */
933
934           if ( !*dirname || isspace( *dirname ) )
935           {
936                if ( !S_ISREG( mode ) )
937                {
938                     errnum = FILE_ERR_BAD_TYPE;
939                     return 0;
940                }
941
942                INFO->file->pos = 0;
943                INFO->file->len = sd_size(INFO->current_ih, INFO->current_item);
944
945                INFO->fileinfo.k_dir_id = dir_id;
946                INFO->fileinfo.k_objectid = objectid;
947                return next_key();
948           }
949
950           /* continue with the file/directory name interpretation */
951           while ( *dirname == '/' )
952                dirname++;
953           if ( !S_ISDIR( mode ) )
954           {
955                errnum = FILE_ERR_NOTDIR;
956                return 0;
957           }
958           for ( rest = dirname; ( ch = *rest ) && !isspace( ch ) && ch != '/';
959                 rest++ ) ;
960           *rest = 0;
961
962           while ( 1 )
963           {
964                char *name_end;
965                int num_entries;
966
967                if ( !next_key() )
968                     return 0;
969
970                if ( INFO->current_ih->ih_key.k_objectid != objectid )
971                     break;
972
973                name_end = INFO->current_item + ih_item_len(INFO->current_ih);
974                de_head = ( struct reiserfs_de_head * ) INFO->current_item;
975                num_entries = ih_entry_count(INFO->current_ih);
976                while ( num_entries > 0 )
977                {
978                     char *filename = INFO->current_item + deh_location(de_head);
979                     char tmp = *name_end;
980
981                     if( deh_state(de_head) & (1 << DEH_Visible))
982                     {
983                          int cmp;
984
985                          /* Directory names in ReiserFS are not null * terminated.
986                           * We write a temporary 0 behind it. * NOTE: that this
987                           * may overwrite the first block in * the tree cache.
988                           * That doesn't hurt as long as we * don't call next_key
989                           * () in between. */
990                          *name_end = 0;
991                          cmp = strcmp( dirname, filename );
992                          *name_end = tmp;
993                          if ( cmp == 0 )
994                               goto found;
995                     }
996                     /* The beginning of this name marks the end of the next name.
997                      */
998                     name_end = filename;
999                     de_head++;
1000                     num_entries--;
1001                }
1002           }
1003
1004           errnum = FILE_ERR_NOTFOUND;
1005           *rest = ch;
1006           return 0;
1007
1008      found:
1009           *rest = ch;
1010           dirname = rest;
1011
1012           parent_dir_id = dir_id;
1013           parent_objectid = objectid;
1014           dir_id = de_head->deh_dir_id; /* LE */
1015           objectid = de_head->deh_objectid; /* LE */
1016      }
1017 }
1018
1019
1020
1021 #ifndef __LITTLE_ENDIAN
1022 typedef union {
1023      struct offset_v2 offset_v2;
1024      __u64 linear;
1025 } offset_v2_esafe_overlay;
1026
1027 inline __u16
1028 offset_v2_k_type( struct offset_v2 *v2 )
1029 {
1030      offset_v2_esafe_overlay tmp = *(offset_v2_esafe_overlay *)v2;
1031      tmp.linear = le64_to_cpu( tmp.linear );
1032      return tmp.offset_v2.k_type;
1033 }
1034
1035 inline loff_t
1036 offset_v2_k_offset( struct offset_v2 *v2 )
1037 {
1038      offset_v2_esafe_overlay tmp = *(offset_v2_esafe_overlay *)v2;
1039      tmp.linear = le64_to_cpu( tmp.linear );
1040      return tmp.offset_v2.k_offset;
1041 }
1042 #endif
1043
1044 inline int
1045 uniqueness2type (__u32 uniqueness)
1046 {
1047      switch (uniqueness) {
1048      case V1_SD_UNIQUENESS: return TYPE_STAT_DATA;
1049      case V1_INDIRECT_UNIQUENESS: return TYPE_INDIRECT;
1050      case V1_DIRECT_UNIQUENESS: return TYPE_DIRECT;
1051      case V1_DIRENTRY_UNIQUENESS: return TYPE_DIRENTRY;
1052      }
1053      return TYPE_ANY;
1054 }
1055
1056 /*
1057  * Local variables:
1058  * c-file-style: "k&r"
1059  * c-basic-offset: 5
1060  * End:
1061  */