stree.c 72.6 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
/*
 *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
 */

/*
 *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
 *  Programm System Institute
 *  Pereslavl-Zalessky Russia
 */

/*
 *  This file contains functions dealing with S+tree
 *
 * B_IS_IN_TREE
 * copy_short_key
 * copy_item_head
 * comp_short_keys
 * comp_keys
 * comp_cpu_keys
 * comp_short_le_keys
 * comp_short_cpu_keys
 * cpu_key2cpu_key
 * le_key2cpu_key
 * comp_le_keys
 * bin_search
 * get_lkey
 * get_rkey
 * key_in_buffer
 * decrement_bcount
 * decrement_counters_in_path
 * reiserfs_check_path
 * pathrelse_and_restore
 * pathrelse
 * search_by_key_reada
 * search_by_key
 * search_for_position_by_key
 * comp_items
 * prepare_for_direct_item
 * prepare_for_direntry_item
 * prepare_for_delete_or_cut
 * calc_deleted_bytes_number
 * init_tb_struct
 * padd_item
 * reiserfs_delete_item
 * reiserfs_delete_solid_item
 * reiserfs_delete_object
 * maybe_indirect_to_direct
 * indirect_to_direct_roll_back
 * reiserfs_cut_from_item
 * truncate_directory
 * reiserfs_do_truncate
 * reiserfs_paste_into_item
 * reiserfs_insert_item
 */

Linus Torvalds's avatar
Linus Torvalds committed
56
#include <linux/config.h>
57
#include <linux/time.h>
Linus Torvalds's avatar
Linus Torvalds committed
58 59 60 61
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/reiserfs_fs.h>
#include <linux/smp_lock.h>
62
#include <linux/buffer_head.h>
63
#include <linux/quotaops.h>
Linus Torvalds's avatar
Linus Torvalds committed
64 65

/* Does the buffer contain a disk block which is in the tree. */
Linus Torvalds's avatar
Linus Torvalds committed
66
inline int B_IS_IN_TREE (const struct buffer_head * p_s_bh)
Linus Torvalds's avatar
Linus Torvalds committed
67 68
{

Linus Torvalds's avatar
Linus Torvalds committed
69 70
  RFALSE( B_LEVEL (p_s_bh) > MAX_HEIGHT,
	  "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh);
Linus Torvalds's avatar
Linus Torvalds committed
71 72 73 74

  return ( B_LEVEL (p_s_bh) != FREE_LEVEL );
}

Linus Torvalds's avatar
Linus Torvalds committed
75
inline void copy_short_key (void * to, const void * from)
Linus Torvalds's avatar
Linus Torvalds committed
76 77 78 79 80 81 82
{
    memcpy (to, from, SHORT_KEY_SIZE);
}

//
// to gets item head in le form
//
Linus Torvalds's avatar
Linus Torvalds committed
83 84
inline void copy_item_head(struct item_head * p_v_to, 
			   const struct item_head * p_v_from)
Linus Torvalds's avatar
Linus Torvalds committed
85 86 87 88 89 90 91 92 93 94 95
{
  memcpy (p_v_to, p_v_from, IH_SIZE);
}


/* k1 is pointer to on-disk structure which is stored in little-endian
   form. k2 is pointer to cpu variable. For key of items of the same
   object this returns 0.
   Returns: -1 if key1 < key2 
   0 if key1 == key2
   1 if key1 > key2 */
Linus Torvalds's avatar
Linus Torvalds committed
96 97
inline int  comp_short_keys (const struct key * le_key, 
			     const struct cpu_key * cpu_key)
Linus Torvalds's avatar
Linus Torvalds committed
98 99 100 101 102
{
  __u32 * p_s_le_u32, * p_s_cpu_u32;
  int n_key_length = REISERFS_SHORT_KEY_LEN;

  p_s_le_u32 = (__u32 *)le_key;
103
  p_s_cpu_u32 = (__u32 *)&cpu_key->on_disk_key;
Linus Torvalds's avatar
Linus Torvalds committed
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
  for( ; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32 ) {
    if ( le32_to_cpu (*p_s_le_u32) < *p_s_cpu_u32 )
      return -1;
    if ( le32_to_cpu (*p_s_le_u32) > *p_s_cpu_u32 )
      return 1;
  }

  return 0;
}


/* k1 is pointer to on-disk structure which is stored in little-endian
   form. k2 is pointer to cpu variable.
   Compare keys using all 4 key fields.
   Returns: -1 if key1 < key2 0
   if key1 = key2 1 if key1 > key2 */
Linus Torvalds's avatar
Linus Torvalds committed
120
inline int  comp_keys (const struct key * le_key, const struct cpu_key * cpu_key)
Linus Torvalds's avatar
Linus Torvalds committed
121 122 123 124 125 126
{
  int retval;

  retval = comp_short_keys (le_key, cpu_key);
  if (retval)
      return retval;
127
  if (le_key_k_offset (le_key_version(le_key), le_key) < cpu_key_k_offset (cpu_key))
Linus Torvalds's avatar
Linus Torvalds committed
128
      return -1;
129
  if (le_key_k_offset (le_key_version(le_key), le_key) > cpu_key_k_offset (cpu_key))
Linus Torvalds's avatar
Linus Torvalds committed
130 131 132 133 134 135
      return 1;

  if (cpu_key->key_length == 3)
      return 0;

  /* this part is needed only when tail conversion is in progress */
136
  if (le_key_k_type (le_key_version(le_key), le_key) < cpu_key_k_type (cpu_key))
Linus Torvalds's avatar
Linus Torvalds committed
137 138
    return -1;

139
  if (le_key_k_type (le_key_version(le_key), le_key) > cpu_key_k_type (cpu_key))
Linus Torvalds's avatar
Linus Torvalds committed
140 141 142 143 144 145 146 147 148
    return 1;

  return 0;
}


//
// FIXME: not used yet
//
Linus Torvalds's avatar
Linus Torvalds committed
149 150
inline int comp_cpu_keys (const struct cpu_key * key1, 
			  const struct cpu_key * key2)
Linus Torvalds's avatar
Linus Torvalds committed
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
{
    if (key1->on_disk_key.k_dir_id < key2->on_disk_key.k_dir_id)
	return -1;
    if (key1->on_disk_key.k_dir_id > key2->on_disk_key.k_dir_id)
	return 1;

    if (key1->on_disk_key.k_objectid < key2->on_disk_key.k_objectid)
	return -1;
    if (key1->on_disk_key.k_objectid > key2->on_disk_key.k_objectid)
	return 1;

    if (cpu_key_k_offset (key1) < cpu_key_k_offset (key2))
	return -1;
    if (cpu_key_k_offset (key1) > cpu_key_k_offset (key2))
	return 1;

167
    reiserfs_warning (NULL, "comp_cpu_keys: type are compared for %K and %K",
Linus Torvalds's avatar
Linus Torvalds committed
168 169 170 171 172 173 174 175 176
		      key1, key2);

    if (cpu_key_k_type (key1) < cpu_key_k_type (key2))
	return -1;
    if (cpu_key_k_type (key1) > cpu_key_k_type (key2))
	return 1;
    return 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
177
inline int comp_short_le_keys (const struct key * key1, const struct key * key2)
Linus Torvalds's avatar
Linus Torvalds committed
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
{
  __u32 * p_s_1_u32, * p_s_2_u32;
  int n_key_length = REISERFS_SHORT_KEY_LEN;

  p_s_1_u32 = (__u32 *)key1;
  p_s_2_u32 = (__u32 *)key2;
  for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) {
    if ( le32_to_cpu (*p_s_1_u32) < le32_to_cpu (*p_s_2_u32) )
      return -1;
    if ( le32_to_cpu (*p_s_1_u32) > le32_to_cpu (*p_s_2_u32) )
      return 1;
  }
  return 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
193 194
inline int comp_short_cpu_keys (const struct cpu_key * key1, 
				const struct cpu_key * key2)
Linus Torvalds's avatar
Linus Torvalds committed
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
{
  __u32 * p_s_1_u32, * p_s_2_u32;
  int n_key_length = REISERFS_SHORT_KEY_LEN;

  p_s_1_u32 = (__u32 *)key1;
  p_s_2_u32 = (__u32 *)key2;

  for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) {
    if ( *p_s_1_u32 < *p_s_2_u32 )
      return -1;
    if ( *p_s_1_u32 > *p_s_2_u32 )
      return 1;
  }
  return 0;
}



Linus Torvalds's avatar
Linus Torvalds committed
213
inline void cpu_key2cpu_key (struct cpu_key * to, const struct cpu_key * from)
Linus Torvalds's avatar
Linus Torvalds committed
214 215 216 217 218
{
    memcpy (to, from, sizeof (struct cpu_key));
}


Linus Torvalds's avatar
Linus Torvalds committed
219
inline void le_key2cpu_key (struct cpu_key * to, const struct key * from)
Linus Torvalds's avatar
Linus Torvalds committed
220 221 222 223 224 225
{
    to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id);
    to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid);
    
    // find out version of the key
    to->version = le_key_version (from);
Linus Torvalds's avatar
Linus Torvalds committed
226
    if (to->version == KEY_FORMAT_3_5) {
Linus Torvalds's avatar
Linus Torvalds committed
227 228 229
	to->on_disk_key.u.k_offset_v1.k_offset = le32_to_cpu (from->u.k_offset_v1.k_offset);
	to->on_disk_key.u.k_offset_v1.k_uniqueness = le32_to_cpu (from->u.k_offset_v1.k_uniqueness);
    } else {
Linus Torvalds's avatar
Linus Torvalds committed
230 231
	to->on_disk_key.u.k_offset_v2.k_offset = offset_v2_k_offset(&from->u.k_offset_v2);
	to->on_disk_key.u.k_offset_v2.k_type = offset_v2_k_type(&from->u.k_offset_v2);
Linus Torvalds's avatar
Linus Torvalds committed
232 233 234 235 236 237 238
    } 
}



// this does not say which one is bigger, it only returns 1 if keys
// are not equal, 0 otherwise
Linus Torvalds's avatar
Linus Torvalds committed
239
inline int comp_le_keys (const struct key * k1, const struct key * k2)
Linus Torvalds's avatar
Linus Torvalds committed
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
{
    return memcmp (k1, k2, sizeof (struct key));
}

/**************************************************************************
 *  Binary search toolkit function                                        *
 *  Search for an item in the array by the item key                       *
 *  Returns:    1 if found,  0 if not found;                              *
 *        *p_n_pos = number of the searched element if found, else the    *
 *        number of the first element that is larger than p_v_key.        *
 **************************************************************************/
/* For those not familiar with binary search: n_lbound is the leftmost item that it
 could be, n_rbound the rightmost item that it could be.  We examine the item
 halfway between n_lbound and n_rbound, and that tells us either that we can increase
 n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that
 there are no possible items, and we have not found it. With each examination we
 cut the number of possible items it could be by one more than half rounded down,
 or we find it. */
inline	int bin_search (
Linus Torvalds's avatar
Linus Torvalds committed
259 260
              const void * p_v_key, /* Key to search for.                   */
	      const void * p_v_base,/* First item in the array.             */
Linus Torvalds's avatar
Linus Torvalds committed
261 262 263 264 265 266 267 268 269 270 271
	      int       p_n_num,    /* Number of items in the array.        */
	      int       p_n_width,  /* Item size in the array.
				       searched. Lest the reader be
				       confused, note that this is crafted
				       as a general function, and when it
				       is applied specifically to the array
				       of item headers in a node, p_n_width
				       is actually the item header size not
				       the item size.                      */
	      int     * p_n_pos     /* Number of the searched for element. */
            ) {
Linus Torvalds's avatar
Linus Torvalds committed
272 273 274 275 276 277 278 279 280 281 282 283 284
    int   n_rbound, n_lbound, n_j;

   for ( n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0))/2; n_lbound <= n_rbound; n_j = (n_rbound + n_lbound)/2 )
     switch( COMP_KEYS((struct key *)((char * )p_v_base + n_j * p_n_width), (struct cpu_key *)p_v_key) )  {
     case -1: n_lbound = n_j + 1; continue;
     case  1: n_rbound = n_j - 1; continue;
     case  0: *p_n_pos = n_j;     return ITEM_FOUND; /* Key found in the array.  */
        }

    /* bin_search did not find given key, it returns position of key,
        that is minimal and greater than the given one. */
    *p_n_pos = n_lbound;
    return ITEM_NOT_FOUND;
Linus Torvalds's avatar
Linus Torvalds committed
285 286 287 288 289 290 291 292 293
}

#ifdef CONFIG_REISERFS_CHECK
extern struct tree_balance * cur_tb;
#endif



/* Minimal possible key. It is never in the tree. */
Linus Torvalds's avatar
Linus Torvalds committed
294
const struct key  MIN_KEY = {0, 0, {{0, 0},}};
Linus Torvalds's avatar
Linus Torvalds committed
295 296

/* Maximal possible key. It is never in the tree. */
Linus Torvalds's avatar
Linus Torvalds committed
297
const struct key  MAX_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}};
Linus Torvalds's avatar
Linus Torvalds committed
298 299 300 301 302 303


/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
   of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
   the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
   case we return a special key, either MIN_KEY or MAX_KEY. */
Linus Torvalds's avatar
Linus Torvalds committed
304 305 306
inline	const struct  key * get_lkey  (
	                const struct path         * p_s_chk_path,
                        const struct super_block  * p_s_sb
Linus Torvalds's avatar
Linus Torvalds committed
307 308 309 310
                      ) {
  int                   n_position, n_path_offset = p_s_chk_path->path_length;
  struct buffer_head  * p_s_parent;
  
Linus Torvalds's avatar
Linus Torvalds committed
311
  RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET, 
312
	  "PAP-5010: invalid offset in the path");
Linus Torvalds's avatar
Linus Torvalds committed
313 314 315 316

  /* While not higher in path than first element. */
  while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) {

Linus Torvalds's avatar
Linus Torvalds committed
317 318
    RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
	    "PAP-5020: parent is not uptodate");
Linus Torvalds's avatar
Linus Torvalds committed
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342

    /* Parent at the path is not in the tree now. */
    if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
      return &MAX_KEY;
    /* Check whether position in the parent is correct. */
    if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) )
       return &MAX_KEY;
    /* Check whether parent at the path really points to the child. */
    if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
	 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr )
      return &MAX_KEY;
    /* Return delimiting key if position in the parent is not equal to zero. */
    if ( n_position )
      return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
  }
  /* Return MIN_KEY if we are in the root of the buffer tree. */
  if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
       SB_ROOT_BLOCK (p_s_sb) )
    return &MIN_KEY;
  return  &MAX_KEY;
}


/* Get delimiting key of the buffer at the path and its right neighbor. */
Linus Torvalds's avatar
Linus Torvalds committed
343 344 345
inline	const struct  key * get_rkey  (
	                const struct path         * p_s_chk_path,
                        const struct super_block  * p_s_sb
Linus Torvalds's avatar
Linus Torvalds committed
346 347 348 349 350
                      ) {
  int                   n_position,
    			n_path_offset = p_s_chk_path->path_length;
  struct buffer_head  * p_s_parent;

Linus Torvalds's avatar
Linus Torvalds committed
351
  RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
352
	  "PAP-5030: invalid offset in the path");
Linus Torvalds's avatar
Linus Torvalds committed
353 354 355

  while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) {

Linus Torvalds's avatar
Linus Torvalds committed
356 357
    RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
	    "PAP-5040: parent is not uptodate");
Linus Torvalds's avatar
Linus Torvalds committed
358 359 360 361

    /* Parent at the path is not in the tree now. */
    if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
      return &MIN_KEY;
Linus Torvalds's avatar
Linus Torvalds committed
362
    /* Check whether position in the parent is correct. */
Linus Torvalds's avatar
Linus Torvalds committed
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
    if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) )
      return &MIN_KEY;
    /* Check whether parent at the path really points to the child. */
    if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
                                        PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr )
      return &MIN_KEY;
    /* Return delimiting key if position in the parent is not the last one. */
    if ( n_position != B_NR_ITEMS(p_s_parent) )
      return B_N_PDELIM_KEY(p_s_parent, n_position);
  }
  /* Return MAX_KEY if we are in the root of the buffer tree. */
  if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
       SB_ROOT_BLOCK (p_s_sb) )
    return &MAX_KEY;
  return  &MIN_KEY;
}


/* Check whether a key is contained in the tree rooted from a buffer at a path. */
/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
   the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
   buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
   this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
static  inline  int key_in_buffer (
                      struct path         * p_s_chk_path, /* Path which should be checked.  */
Linus Torvalds's avatar
Linus Torvalds committed
388
                      const struct cpu_key      * p_s_key,      /* Key which should be checked.   */
Linus Torvalds's avatar
Linus Torvalds committed
389 390 391
                      struct super_block  * p_s_sb        /* Super block pointer.           */
		      ) {

Linus Torvalds's avatar
Linus Torvalds committed
392 393
  RFALSE( ! p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET ||
	  p_s_chk_path->path_length > MAX_HEIGHT,
394
	  "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
Linus Torvalds's avatar
Linus Torvalds committed
395
	  p_s_key, p_s_chk_path->path_length);
396
  RFALSE( !PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
Linus Torvalds's avatar
Linus Torvalds committed
397
	  "PAP-5060: device must not be NODEV");
Linus Torvalds's avatar
Linus Torvalds committed
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414

  if ( COMP_KEYS(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1 )
    /* left delimiting key is bigger, that the key we look for */
    return 0;
  //  if ( COMP_KEYS(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
  if ( COMP_KEYS(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1 )
    /* p_s_key must be less than right delimitiing key */
    return 0;
  return 1;
}


inline void decrement_bcount(
              struct buffer_head  * p_s_bh
            ) { 
  if ( p_s_bh ) {
    if ( atomic_read (&(p_s_bh->b_count)) ) {
Linus Torvalds's avatar
Linus Torvalds committed
415
      put_bh(p_s_bh) ;
Linus Torvalds's avatar
Linus Torvalds committed
416 417 418 419 420 421 422 423 424 425 426 427 428
      return;
    }
    reiserfs_panic(NULL, "PAP-5070: decrement_bcount: trying to free free buffer %b", p_s_bh);
  }
}


/* Decrement b_count field of the all buffers in the path. */
void decrement_counters_in_path (
              struct path * p_s_search_path
            ) {
  int n_path_offset = p_s_search_path->path_length;

Linus Torvalds's avatar
Linus Torvalds committed
429 430
  RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
	  n_path_offset > EXTENDED_MAX_HEIGHT - 1,
431
	  "PAP-5080: invalid path offset of %d", n_path_offset);
Linus Torvalds's avatar
Linus Torvalds committed
432 433 434 435 436 437 438 439 440 441 442 443

  while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) {
    struct buffer_head * bh;

    bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
    decrement_bcount (bh);
  }
  p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
}


int reiserfs_check_path(struct path *p) {
Linus Torvalds's avatar
Linus Torvalds committed
444 445
  RFALSE( p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
	  "path not properly relsed") ;
Linus Torvalds's avatar
Linus Torvalds committed
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
  return 0 ;
}


/* Release all buffers in the path. Restore dirty bits clean
** when preparing the buffer for the log
**
** only called from fix_nodes()
*/
void  pathrelse_and_restore (
	struct super_block *s, 
        struct path * p_s_search_path
      ) {
  int n_path_offset = p_s_search_path->path_length;

Linus Torvalds's avatar
Linus Torvalds committed
461
  RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, 
462
	  "clm-4000: invalid path offset");
Linus Torvalds's avatar
Linus Torvalds committed
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
  
  while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET )  {
    reiserfs_restore_prepared_buffer(s, PATH_OFFSET_PBUFFER(p_s_search_path, 
                                     n_path_offset));
    brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
  }
  p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
}

/* Release all buffers in the path. */
void  pathrelse (
        struct path * p_s_search_path
      ) {
  int n_path_offset = p_s_search_path->path_length;

Linus Torvalds's avatar
Linus Torvalds committed
478
  RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
479
	  "PAP-5090: invalid path offset");
Linus Torvalds's avatar
Linus Torvalds committed
480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
  
  while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET )  
    brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));

  p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
}



static int is_leaf (char * buf, int blocksize, struct buffer_head * bh)
{
    struct block_head * blkh;
    struct item_head * ih;
    int used_space;
    int prev_location;
    int i;
    int nr;

    blkh = (struct block_head *)buf;
Linus Torvalds's avatar
Linus Torvalds committed
499
    if ( blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
500
	reiserfs_warning (NULL, "is_leaf: this should be caught earlier");
Linus Torvalds's avatar
Linus Torvalds committed
501 502 503
	return 0;
    }

Linus Torvalds's avatar
Linus Torvalds committed
504
    nr = blkh_nr_item(blkh);
Linus Torvalds's avatar
Linus Torvalds committed
505 506
    if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
	/* item number is too big or too small */
507
	reiserfs_warning (NULL, "is_leaf: nr_item seems wrong: %z", bh);
Linus Torvalds's avatar
Linus Torvalds committed
508 509 510 511
	return 0;
    }
    ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
    used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih));
Linus Torvalds's avatar
Linus Torvalds committed
512
    if (used_space != blocksize - blkh_free_space(blkh)) {
Linus Torvalds's avatar
Linus Torvalds committed
513
	/* free space does not match to calculated amount of use space */
514
	reiserfs_warning (NULL, "is_leaf: free space seems wrong: %z", bh);
Linus Torvalds's avatar
Linus Torvalds committed
515 516 517 518 519 520 521 522 523 524
	return 0;
    }

    // FIXME: it is_leaf will hit performance too much - we may have
    // return 1 here

    /* check tables of item heads */
    ih = (struct item_head *)(buf + BLKH_SIZE);
    prev_location = blocksize;
    for (i = 0; i < nr; i ++, ih ++) {
525
	if ( le_ih_k_type(ih) == TYPE_ANY) {
526
	    reiserfs_warning (NULL, "is_leaf: wrong item type for item %h",ih);
527 528
	    return 0;
	}
Linus Torvalds's avatar
Linus Torvalds committed
529
	if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) {
530
	    reiserfs_warning (NULL, "is_leaf: item location seems wrong: %h", ih);
Linus Torvalds's avatar
Linus Torvalds committed
531 532 533
	    return 0;
	}
	if (ih_item_len (ih) < 1 || ih_item_len (ih) > MAX_ITEM_LEN (blocksize)) {
534
	    reiserfs_warning (NULL, "is_leaf: item length seems wrong: %h", ih);
Linus Torvalds's avatar
Linus Torvalds committed
535 536 537
	    return 0;
	}
	if (prev_location - ih_location (ih) != ih_item_len (ih)) {
538
	    reiserfs_warning (NULL, "is_leaf: item location seems wrong (second one): %h", ih);
Linus Torvalds's avatar
Linus Torvalds committed
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
	    return 0;
	}
	prev_location = ih_location (ih);
    }

    // one may imagine much more checks
    return 1;
}


/* returns 1 if buf looks like an internal node, 0 otherwise */
static int is_internal (char * buf, int blocksize, struct buffer_head * bh)
{
    struct block_head * blkh;
    int nr;
    int used_space;

    blkh = (struct block_head *)buf;
Linus Torvalds's avatar
Linus Torvalds committed
557 558
    nr = blkh_level(blkh);
    if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
Linus Torvalds's avatar
Linus Torvalds committed
559
	/* this level is not possible for internal nodes */
560
	reiserfs_warning (NULL, "is_internal: this should be caught earlier");
Linus Torvalds's avatar
Linus Torvalds committed
561 562 563
	return 0;
    }
    
Linus Torvalds's avatar
Linus Torvalds committed
564
    nr = blkh_nr_item(blkh);
Linus Torvalds's avatar
Linus Torvalds committed
565 566
    if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
	/* for internal which is not root we might check min number of keys */
567
	reiserfs_warning (NULL, "is_internal: number of key seems wrong: %z", bh);
Linus Torvalds's avatar
Linus Torvalds committed
568 569 570 571
	return 0;
    }

    used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
Linus Torvalds's avatar
Linus Torvalds committed
572
    if (used_space != blocksize - blkh_free_space(blkh)) {
573
	reiserfs_warning (NULL, "is_internal: free space seems wrong: %z", bh);
Linus Torvalds's avatar
Linus Torvalds committed
574 575 576 577 578 579 580 581 582 583 584 585 586
	return 0;
    }

    // one may imagine much more checks
    return 1;
}


// make sure that bh contains formatted node of reiserfs tree of
// 'level'-th level
static int is_tree_node (struct buffer_head * bh, int level)
{
    if (B_LEVEL (bh) != level) {
587
	reiserfs_warning (NULL, "is_tree_node: node level %d does not match to the expected one %d",
Linus Torvalds's avatar
Linus Torvalds committed
588 589 590 591 592 593 594 595 596 597 598
		B_LEVEL (bh), level);
	return 0;
    }
    if (level == DISK_LEAF_NODE_LEVEL)
	return is_leaf (bh->b_data, bh->b_size, bh);

    return is_internal (bh->b_data, bh->b_size, bh);
}



599
#define SEARCH_BY_KEY_READA 16
Linus Torvalds's avatar
Linus Torvalds committed
600 601

/* The function is NOT SCHEDULE-SAFE! */
602 603 604
static void search_by_key_reada (struct super_block * s,
                                 struct buffer_head **bh,
				 unsigned long *b, int num)
Linus Torvalds's avatar
Linus Torvalds committed
605
{
606
    int i,j;
Linus Torvalds's avatar
Linus Torvalds committed
607
  
608 609 610 611 612 613 614 615 616 617 618
    for (i = 0 ; i < num ; i++) {
	bh[i] = sb_getblk (s, b[i]);
    }
    for (j = 0 ; j < i ; j++) {
	/*
	 * note, this needs attention if we are getting rid of the BKL
	 * you have to make sure the prepared bit isn't set on this buffer
	 */
	if (!buffer_uptodate(bh[j]))
	    ll_rw_block(READA, 1, bh + j);
    	brelse(bh[j]);
Linus Torvalds's avatar
Linus Torvalds committed
619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
    }
}

/**************************************************************************
 * Algorithm   SearchByKey                                                *
 *             look for item in the Disk S+Tree by its key                *
 * Input:  p_s_sb   -  super block                                        *
 *         p_s_key  - pointer to the key to search                        *
 * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR                         *
 *         p_s_search_path - path from the root to the needed leaf        *
 **************************************************************************/

/* This function fills up the path from the root to the leaf as it
   descends the tree looking for the key.  It uses reiserfs_bread to
   try to find buffers in the cache given their block number.  If it
   does not find them in the cache it reads them from disk.  For each
   node search_by_key finds using reiserfs_bread it then uses
   bin_search to look through that node.  bin_search will find the
   position of the block_number of the next node if it is looking
   through an internal node.  If it is looking through a leaf node
   bin_search will find the position of the item which has key either
   equal to given key, or which is the maximal key less than the given
   key.  search_by_key returns a path that must be checked for the
   correctness of the top of the path but need not be checked for the
   correctness of the bottom of the path */
/* The function is NOT SCHEDULE-SAFE! */
int search_by_key (struct super_block * p_s_sb,
Linus Torvalds's avatar
Linus Torvalds committed
646
		   const struct cpu_key * p_s_key, /* Key to search. */
Linus Torvalds's avatar
Linus Torvalds committed
647 648 649 650 651 652 653 654 655
		   struct path * p_s_search_path, /* This structure was
						     allocated and initialized
						     by the calling
						     function. It is filled up
						     by this function.  */
		   int n_stop_level /* How far down the tree to search. To
                                       stop at leaf level - set to
                                       DISK_LEAF_NODE_LEVEL */
    ) {
656 657
    int  n_block_number;
    int  expected_level;
Linus Torvalds's avatar
Linus Torvalds committed
658 659 660 661 662
    struct buffer_head  *       p_s_bh;
    struct path_element *       p_s_last_element;
    int				n_node_level, n_retval;
    int 			right_neighbor_of_leaf_node;
    int				fs_gen;
663 664 665
    struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
    unsigned long      reada_blocks[SEARCH_BY_KEY_READA];
    int reada_count = 0;
Linus Torvalds's avatar
Linus Torvalds committed
666 667 668 669

#ifdef CONFIG_REISERFS_CHECK
    int n_repeat_counter = 0;
#endif
Linus Torvalds's avatar
Linus Torvalds committed
670 671 672
    
    PROC_INFO_INC( p_s_sb, search_by_key );
    
Linus Torvalds's avatar
Linus Torvalds committed
673 674 675 676 677 678 679 680 681 682 683
    /* As we add each node to a path we increase its count.  This means that
       we must be careful to release all nodes in a path before we either
       discard the path struct or re-use the path struct, as we do here. */

    decrement_counters_in_path(p_s_search_path);

    right_neighbor_of_leaf_node = 0;

    /* With each iteration of this loop we search through the items in the
       current node, and calculate the next current node(next path element)
       for the next iteration of this loop.. */
684
    n_block_number = SB_ROOT_BLOCK (p_s_sb);
685
    expected_level = -1;
Linus Torvalds's avatar
Linus Torvalds committed
686 687 688 689
    while ( 1 ) {

#ifdef CONFIG_REISERFS_CHECK
	if ( !(++n_repeat_counter % 50000) )
690
	    reiserfs_warning (p_s_sb, "PAP-5100: search_by_key: %s:"
Linus Torvalds's avatar
Linus Torvalds committed
691
			      "there were %d iterations of while loop "
692
			      "looking for key %K",
Linus Torvalds's avatar
Linus Torvalds committed
693 694 695 696 697 698 699 700 701
			      current->comm, n_repeat_counter, p_s_key);
#endif

	/* prep path to have another element added to it. */
	p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path, ++p_s_search_path->path_length);
	fs_gen = get_generation (p_s_sb);

	/* Read the next tree node, and set the last element in the path to
           have a pointer to it. */
702 703 704 705 706 707 708 709 710 711 712 713
	if ((p_s_bh = p_s_last_element->pe_buffer =
	     sb_getblk(p_s_sb, n_block_number)) ) {
	    if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
		search_by_key_reada (p_s_sb, reada_bh,
		                     reada_blocks, reada_count);
	    }
	    ll_rw_block(READ, 1, &p_s_bh);
	    wait_on_buffer(p_s_bh);
	    if (!buffer_uptodate(p_s_bh))
	        goto io_error;
	} else {
io_error:
Linus Torvalds's avatar
Linus Torvalds committed
714 715 716 717
	    p_s_search_path->path_length --;
	    pathrelse(p_s_search_path);
	    return IO_ERROR;
	}
718
	reada_count = 0;
719 720 721
	if (expected_level == -1)
		expected_level = SB_TREE_HEIGHT (p_s_sb);
	expected_level --;
Linus Torvalds's avatar
Linus Torvalds committed
722

Linus Torvalds's avatar
Linus Torvalds committed
723
	/* It is possible that schedule occurred. We must check whether the key
Linus Torvalds's avatar
Linus Torvalds committed
724 725 726
	   to search is still in the tree rooted from the current buffer. If
	   not then repeat search from the root. */
	if ( fs_changed (fs_gen, p_s_sb) && 
727 728 729
	    (!B_IS_IN_TREE (p_s_bh) ||
	     B_LEVEL(p_s_bh) != expected_level ||
	     !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) {
730
	    PROC_INFO_INC( p_s_sb, search_by_key_fs_changed );
731
	    PROC_INFO_INC( p_s_sb, search_by_key_restarted );
Linus Torvalds's avatar
Linus Torvalds committed
732
	    PROC_INFO_INC( p_s_sb, sbk_restarted[ expected_level - 1 ] );
Linus Torvalds's avatar
Linus Torvalds committed
733 734 735
	    decrement_counters_in_path(p_s_search_path);
	    
	    /* Get the root block number so that we can repeat the search
736
	       starting from the root. */
Linus Torvalds's avatar
Linus Torvalds committed
737
	    n_block_number = SB_ROOT_BLOCK (p_s_sb);
738
	    expected_level = -1;
Linus Torvalds's avatar
Linus Torvalds committed
739 740 741 742 743 744
	    right_neighbor_of_leaf_node = 0;
	    
	    /* repeat search from the root */
	    continue;
	}

Linus Torvalds's avatar
Linus Torvalds committed
745 746 747 748 749
        /* only check that the key is in the buffer if p_s_key is not
           equal to the MAX_KEY. Latter case is only possible in
           "finish_unfinished()" processing during mount. */
        RFALSE( COMP_KEYS( &MAX_KEY, p_s_key ) && 
                ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb),
Linus Torvalds's avatar
Linus Torvalds committed
750
		"PAP-5130: key is not in the buffer");
Linus Torvalds's avatar
Linus Torvalds committed
751 752 753 754 755 756 757 758 759 760
#ifdef CONFIG_REISERFS_CHECK
	if ( cur_tb ) {
	    print_cur_tb ("5140");
	    reiserfs_panic(p_s_sb, "PAP-5140: search_by_key: schedule occurred in do_balance!");
	}
#endif

	// make sure, that the node contents look like a node of
	// certain level
	if (!is_tree_node (p_s_bh, expected_level)) {
761 762
	    reiserfs_warning (p_s_sb, "vs-5150: search_by_key: "
			      "invalid format found in block %ld. Fsck?",
Linus Torvalds's avatar
Linus Torvalds committed
763
			      p_s_bh->b_blocknr);
Linus Torvalds's avatar
Linus Torvalds committed
764 765 766 767 768 769 770
	    pathrelse (p_s_search_path);
	    return IO_ERROR;
	}
	
	/* ok, we have acquired next formatted node in the tree */
	n_node_level = B_LEVEL (p_s_bh);

Linus Torvalds's avatar
Linus Torvalds committed
771 772
	PROC_INFO_BH_STAT( p_s_sb, p_s_bh, n_node_level - 1 );

Linus Torvalds's avatar
Linus Torvalds committed
773
	RFALSE( n_node_level < n_stop_level,
Linus Torvalds's avatar
Linus Torvalds committed
774
		"vs-5152: tree level (%d) is less than stop level (%d)",
Linus Torvalds's avatar
Linus Torvalds committed
775
		n_node_level, n_stop_level);
Linus Torvalds's avatar
Linus Torvalds committed
776

Linus Torvalds's avatar
Linus Torvalds committed
777 778 779 780
	n_retval = bin_search( p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
                B_NR_ITEMS(p_s_bh),
                ( n_node_level == DISK_LEAF_NODE_LEVEL ) ? IH_SIZE : KEY_SIZE,
                &(p_s_last_element->pe_position));
Linus Torvalds's avatar
Linus Torvalds committed
781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798
	if (n_node_level == n_stop_level) {
	    return n_retval;
	}

	/* we are not in the stop level */
	if (n_retval == ITEM_FOUND)
	    /* item has been found, so we choose the pointer which is to the right of the found one */
	    p_s_last_element->pe_position++;

	/* if item was not found we choose the position which is to
	   the left of the found item. This requires no code,
	   bin_search did it already.*/

	/* So we have chosen a position in the current node which is
	   an internal node.  Now we calculate child block number by
	   position in the node. */
	n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);

799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
	/* if we are going to read leaf nodes, try for read ahead as well */
	if ((p_s_search_path->reada & PATH_READA) &&
	    n_node_level == DISK_LEAF_NODE_LEVEL + 1)
	{
	    int pos = p_s_last_element->pe_position;
	    int limit = B_NR_ITEMS(p_s_bh);
	    struct key *le_key;

	    if (p_s_search_path->reada & PATH_READA_BACK)
		limit = 0;
	    while(reada_count < SEARCH_BY_KEY_READA) {
		if (pos == limit)
		    break;
	        reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos);
		if (p_s_search_path->reada & PATH_READA_BACK)
		    pos--;
		else
		    pos++;

		/*
		 * check to make sure we're in the same object
		 */
		le_key = B_N_PDELIM_KEY(p_s_bh, pos);
		if (le32_to_cpu(le_key->k_objectid) !=
		    p_s_key->on_disk_key.k_objectid)
		{
		    break;
		}
	    }
        }
Linus Torvalds's avatar
Linus Torvalds committed
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849
    }
}


/* Form the path to an item and position in this item which contains
   file byte defined by p_s_key. If there is no such item
   corresponding to the key, we point the path to the item with
   maximal key less than p_s_key, and *p_n_pos_in_item is set to one
   past the last entry/byte in the item.  If searching for entry in a
   directory item, and it is not found, *p_n_pos_in_item is set to one
   entry more than the entry with maximal key which is less than the
   sought key.

   Note that if there is no entry in this same node which is one more,
   then we point to an imaginary entry.  for direct items, the
   position is in units of bytes, for indirect items the position is
   in units of blocknr entries, for directory items the position is in
   units of directory entries.  */

/* The function is NOT SCHEDULE-SAFE! */
int search_for_position_by_key (struct super_block  * p_s_sb,         /* Pointer to the super block.          */
Linus Torvalds's avatar
Linus Torvalds committed
850
				const struct cpu_key  * p_cpu_key,      /* Key to search (cpu variable)         */
Linus Torvalds's avatar
Linus Torvalds committed
851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
				struct path         * p_s_search_path /* Filled up by this function.          */
    ) {
    struct item_head    * p_le_ih; /* pointer to on-disk structure */
    int                   n_blk_size;
    loff_t item_offset, offset;
    struct reiserfs_dir_entry de;
    int retval;

    /* If searching for directory entry. */
    if ( is_direntry_cpu_key (p_cpu_key) )
	return  search_by_entry_key (p_s_sb, p_cpu_key, p_s_search_path, &de);

    /* If not searching for directory entry. */
    
    /* If item is found. */
    retval = search_item (p_s_sb, p_cpu_key, p_s_search_path);
    if (retval == IO_ERROR)
	return retval;
    if ( retval == ITEM_FOUND )  {

Linus Torvalds's avatar
Linus Torvalds committed
871 872 873 874
	RFALSE( ! ih_item_len(
                B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
			       PATH_LAST_POSITION(p_s_search_path))),
	        "PAP-5165: item length equals zero");
Linus Torvalds's avatar
Linus Torvalds committed
875 876 877 878 879

	pos_in_item(p_s_search_path) = 0;
	return POSITION_FOUND;
    }

Linus Torvalds's avatar
Linus Torvalds committed
880
    RFALSE( ! PATH_LAST_POSITION(p_s_search_path),
Linus Torvalds's avatar
Linus Torvalds committed
881
	    "PAP-5170: position equals zero");
Linus Torvalds's avatar
Linus Torvalds committed
882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908

    /* Item is not found. Set path to the previous item. */
    p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), --PATH_LAST_POSITION(p_s_search_path));
    n_blk_size = p_s_sb->s_blocksize;

    if (comp_short_keys (&(p_le_ih->ih_key), p_cpu_key)) {
	return FILE_NOT_FOUND;
    }

    // FIXME: quite ugly this far

    item_offset = le_ih_k_offset (p_le_ih);
    offset = cpu_key_k_offset (p_cpu_key);

    /* Needed byte is contained in the item pointed to by the path.*/
    if (item_offset <= offset &&
	item_offset + op_bytes_number (p_le_ih, n_blk_size) > offset) {
	pos_in_item (p_s_search_path) = offset - item_offset;
	if ( is_indirect_le_ih(p_le_ih) ) {
	    pos_in_item (p_s_search_path) /= n_blk_size;
	}
	return POSITION_FOUND;
    }

    /* Needed byte is not contained in the item pointed to by the
     path. Set pos_in_item out of the item. */
    if ( is_indirect_le_ih (p_le_ih) )
Linus Torvalds's avatar
Linus Torvalds committed
909
	pos_in_item (p_s_search_path) = ih_item_len(p_le_ih) / UNFM_P_SIZE;
Linus Torvalds's avatar
Linus Torvalds committed
910
    else
Linus Torvalds's avatar
Linus Torvalds committed
911
        pos_in_item (p_s_search_path) = ih_item_len( p_le_ih );
Linus Torvalds's avatar
Linus Torvalds committed
912 913 914 915 916 917
  
    return POSITION_NOT_FOUND;
}


/* Compare given item and item pointed to by the path. */
Linus Torvalds's avatar
Linus Torvalds committed
918
int comp_items (const struct item_head * stored_ih, const struct path * p_s_path)
Linus Torvalds's avatar
Linus Torvalds committed
919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958
{
    struct buffer_head  * p_s_bh;
    struct item_head    * ih;

    /* Last buffer at the path is not in the tree. */
    if ( ! B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)) )
	return 1;

    /* Last path position is invalid. */
    if ( PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh) )
	return 1;

    /* we need only to know, whether it is the same item */
    ih = get_ih (p_s_path);
    return memcmp (stored_ih, ih, IH_SIZE);
}


/* unformatted nodes are not logged anymore, ever.  This is safe
** now
*/
#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)

// block can not be forgotten as it is in I/O or held by someone
#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))



// prepare for delete or cut of direct item
static inline int prepare_for_direct_item (struct path * path,
					   struct item_head * le_ih,
					   struct inode * inode,
					   loff_t new_file_length,
					   int * cut_size)
{
    loff_t round_len;


    if ( new_file_length == max_reiserfs_offset (inode) ) {
	/* item has to be deleted */
Linus Torvalds's avatar
Linus Torvalds committed
959
	*cut_size = -(IH_SIZE + ih_item_len(le_ih));
Linus Torvalds's avatar
Linus Torvalds committed
960 961 962 963
	return M_DELETE;
    }
	
    // new file gets truncated
Linus Torvalds's avatar
Linus Torvalds committed
964
    if (get_inode_item_key_version (inode) == KEY_FORMAT_3_6) {
Linus Torvalds's avatar
Linus Torvalds committed
965 966 967 968
	// 
	round_len = ROUND_UP (new_file_length); 
	/* this was n_new_file_length < le_ih ... */
	if ( round_len < le_ih_k_offset (le_ih) )  {
Linus Torvalds's avatar
Linus Torvalds committed
969
	    *cut_size = -(IH_SIZE + ih_item_len(le_ih));
Linus Torvalds's avatar
Linus Torvalds committed
970 971 972 973
	    return M_DELETE; /* Delete this item. */
	}
	/* Calculate first position and size for cutting from item. */
	pos_in_item (path) = round_len - (le_ih_k_offset (le_ih) - 1);
Linus Torvalds's avatar
Linus Torvalds committed
974
	*cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
Linus Torvalds's avatar
Linus Torvalds committed
975 976 977 978 979 980 981 982
	
	return M_CUT; /* Cut from this item. */
    }


    // old file: items may have any length

    if ( new_file_length < le_ih_k_offset (le_ih) )  {
Linus Torvalds's avatar
Linus Torvalds committed
983
	*cut_size = -(IH_SIZE + ih_item_len(le_ih));
Linus Torvalds's avatar
Linus Torvalds committed
984 985 986
	return M_DELETE; /* Delete this item. */
    }
    /* Calculate first position and size for cutting from item. */
Linus Torvalds's avatar
Linus Torvalds committed
987
    *cut_size = -(ih_item_len(le_ih) -
Linus Torvalds's avatar
Linus Torvalds committed
988 989 990 991 992 993 994 995 996 997 998 999 1000
		      (pos_in_item (path) = new_file_length + 1 - le_ih_k_offset (le_ih)));
    return M_CUT; /* Cut from this item. */
}


static inline int prepare_for_direntry_item (struct path * path,
					     struct item_head * le_ih,
					     struct inode * inode,
					     loff_t new_file_length,
					     int * cut_size)
{
    if (le_ih_k_offset (le_ih) == DOT_OFFSET && 
	new_file_length == max_reiserfs_offset (inode)) {
Linus Torvalds's avatar
Linus Torvalds committed
1001
	RFALSE( ih_entry_count (le_ih) != 2,
Linus Torvalds's avatar
Linus Torvalds committed
1002 1003
	        "PAP-5220: incorrect empty directory item (%h)", le_ih);
	*cut_size = -(IH_SIZE + ih_item_len(le_ih));
Linus Torvalds's avatar
Linus Torvalds committed
1004 1005 1006 1007 1008 1009
	return M_DELETE; /* Delete the directory item containing "." and ".." entry. */
    }
    
    if ( ih_entry_count (le_ih) == 1 )  {
	/* Delete the directory item such as there is one record only
	   in this item*/
Linus Torvalds's avatar
Linus Torvalds committed
1010
	*cut_size = -(IH_SIZE + ih_item_len(le_ih));
Linus Torvalds's avatar
Linus Torvalds committed
1011 1012 1013 1014
	return M_DELETE;
    }
    
    /* Cut one record from the directory item. */
Linus Torvalds's avatar
Linus Torvalds committed
1015
    *cut_size = -(DEH_SIZE + entry_length (get_last_bh (path), le_ih, pos_in_item (path)));
Linus Torvalds's avatar
Linus Torvalds committed
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
    return M_CUT; 
}


/*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
    If the path points to an indirect item, remove some number of its unformatted nodes.
    In case of file truncate calculate whether this item must be deleted/truncated or last
    unformatted node of this item will be converted to a direct item.
    This function returns a determination of what balance mode the calling function should employ. */
static char  prepare_for_delete_or_cut(
				       struct reiserfs_transaction_handle *th, 
				       struct inode * inode,
				       struct path         * p_s_path,
Linus Torvalds's avatar
Linus Torvalds committed
1029
				       const struct cpu_key      * p_s_item_key,
Linus Torvalds's avatar
Linus Torvalds committed
1030 1031 1032 1033 1034 1035 1036 1037 1038
				       int                 * p_n_removed,      /* Number of unformatted nodes which were removed
										  from end of the file. */
				       int                 * p_n_cut_size,
				       unsigned long long    n_new_file_length /* MAX_KEY_OFFSET in case of delete. */
    ) {
    struct super_block  * p_s_sb = inode->i_sb;
    struct item_head    * p_le_ih = PATH_PITEM_HEAD(p_s_path);
    struct buffer_head  * p_s_bh = PATH_PLAST_BUFFER(p_s_path);

1039 1040
    BUG_ON (!th->t_trans_id);

Linus Torvalds's avatar
Linus Torvalds committed
1041 1042 1043
    /* Stat_data item. */
    if ( is_statdata_le_ih (p_le_ih) ) {

Linus Torvalds's avatar
Linus Torvalds committed
1044 1045
	RFALSE( n_new_file_length != max_reiserfs_offset (inode),
		"PAP-5210: mode must be M_DELETE");
Linus Torvalds's avatar
Linus Torvalds committed
1046

Linus Torvalds's avatar
Linus Torvalds committed
1047
	*p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
Linus Torvalds's avatar
Linus Torvalds committed
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
	return M_DELETE;
    }


    /* Directory item. */
    if ( is_direntry_le_ih (p_le_ih) )
	return prepare_for_direntry_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);

    /* Direct item. */
    if ( is_direct_le_ih (p_le_ih) )
	return prepare_for_direct_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);


    /* Case of an indirect item. */
    {
	int                   n_unfm_number,    /* Number of the item unformatted nodes. */
	    n_counter,
	    n_blk_size;
	__u32               * p_n_unfm_pointer; /* Pointer to the unformatted node number. */
	__u32 tmp;
	struct item_head      s_ih;           /* Item header. */
	char                  c_mode;           /* Returned mode of the balance. */
	int need_research;


	n_blk_size = p_s_sb->s_blocksize;

	/* Search for the needed object indirect item until there are no unformatted nodes to be removed. */
	do  {
	    need_research = 0;
            p_s_bh = PATH_PLAST_BUFFER(p_s_path);
	    /* Copy indirect item header to a temp variable. */
	    copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
	    /* Calculate number of unformatted nodes in this item. */
	    n_unfm_number = I_UNFM_NUM(&s_ih);

Linus Torvalds's avatar
Linus Torvalds committed
1084 1085
	    RFALSE( ! is_indirect_le_ih(&s_ih) || ! n_unfm_number ||
		    pos_in_item (p_s_path) + 1 !=  n_unfm_number,
1086
		    "PAP-5240: invalid item %h "
Linus Torvalds's avatar
Linus Torvalds committed
1087 1088
		    "n_unfm_number = %d *p_n_pos_in_item = %d", 
		    &s_ih, n_unfm_number, pos_in_item (p_s_path));
Linus Torvalds's avatar
Linus Torvalds committed
1089 1090 1091 1092

	    /* Calculate balance mode and position in the item to remove unformatted nodes. */
	    if ( n_new_file_length == max_reiserfs_offset (inode) ) {/* Case of delete. */
		pos_in_item (p_s_path) = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1093
		*p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
Linus Torvalds's avatar
Linus Torvalds committed
1094 1095 1096 1097 1098
		c_mode = M_DELETE;
	    }
	    else  { /* Case of truncate. */
		if ( n_new_file_length < le_ih_k_offset (&s_ih) )  {
		    pos_in_item (p_s_path) = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1099
		    *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
Linus Torvalds's avatar
Linus Torvalds committed
1100 1101 1102 1103 1104 1105
		    c_mode = M_DELETE; /* Delete this item. */
		}
		else  {
		    /* indirect item must be truncated starting from *p_n_pos_in_item-th position */
		    pos_in_item (p_s_path) = (n_new_file_length + n_blk_size - le_ih_k_offset (&s_ih) ) >> p_s_sb->s_blocksize_bits;

Linus Torvalds's avatar
Linus Torvalds committed
1106
		    RFALSE( pos_in_item (p_s_path) > n_unfm_number,
1107
			    "PAP-5250: invalid position in the item");
Linus Torvalds's avatar
Linus Torvalds committed
1108 1109 1110 1111 1112 1113 1114 1115

		    /* Either convert last unformatted node of indirect item to direct item or increase
		       its free space.  */
		    if ( pos_in_item (p_s_path) == n_unfm_number )  {
			*p_n_cut_size = 0; /* Nothing to cut. */
			return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */
		    }
		    /* Calculate size to cut. */
Linus Torvalds's avatar
Linus Torvalds committed
1116
		    *p_n_cut_size = -(ih_item_len(&s_ih) - pos_in_item(p_s_path) * UNFM_P_SIZE);
Linus Torvalds's avatar
Linus Torvalds committed
1117 1118 1119 1120 1121

		    c_mode = M_CUT;     /* Cut from this indirect item. */
		}
	    }

Linus Torvalds's avatar
Linus Torvalds committed
1122
	    RFALSE( n_unfm_number <= pos_in_item (p_s_path),
1123
		    "PAP-5260: invalid position in the indirect item");
Linus Torvalds's avatar
Linus Torvalds committed
1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141

	    /* pointers to be cut */
	    n_unfm_number -= pos_in_item (p_s_path);
	    /* Set pointer to the last unformatted node pointer that is to be cut. */
	    p_n_unfm_pointer = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed;


	    /* We go through the unformatted nodes pointers of the indirect
	       item and look for the unformatted nodes in the cache. If we
	       found some of them we free it, zero corresponding indirect item
	       entry and log buffer containing that indirect item. For this we
	       need to prepare last path element for logging. If some
	       unformatted node has b_count > 1 we must not free this
	       unformatted node since it is in use. */
	    reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
	    // note: path could be changed, first line in for loop takes care
	    // of it

Linus Torvalds's avatar
Linus Torvalds committed
1142 1143
	    for (n_counter = *p_n_removed;
		 n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) {
Linus Torvalds's avatar
Linus Torvalds committed
1144

1145
		cond_resched();
Linus Torvalds's avatar
Linus Torvalds committed
1146 1147 1148 1149
		if (item_moved (&s_ih, p_s_path)) {
		    need_research = 1 ;
		    break;
		}
Linus Torvalds's avatar
Linus Torvalds committed
1150 1151 1152
		RFALSE( p_n_unfm_pointer < (__u32 *)B_I_PITEM(p_s_bh, &s_ih) ||
			p_n_unfm_pointer > (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1,
			"vs-5265: pointer out of range");
Linus Torvalds's avatar
Linus Torvalds committed
1153

Linus Torvalds's avatar
Linus Torvalds committed
1154 1155
		/* Hole, nothing to remove. */
		if ( ! get_block_num(p_n_unfm_pointer,0) )  {
Linus Torvalds's avatar
Linus Torvalds committed
1156
			(*p_n_removed)++;
Linus Torvalds's avatar
Linus Torvalds committed
1157
			continue;
Linus Torvalds's avatar
Linus Torvalds committed
1158 1159
		}

Linus Torvalds's avatar
Linus Torvalds committed
1160
		(*p_n_removed)++;
Linus Torvalds's avatar
Linus Torvalds committed
1161

Linus Torvalds's avatar
Linus Torvalds committed
1162 1163
		tmp = get_block_num(p_n_unfm_pointer,0);
		put_block_num(p_n_unfm_pointer, 0, 0);
Linus Torvalds's avatar
Linus Torvalds committed
1164
		journal_mark_dirty (th, p_s_sb, p_s_bh);
1165
		reiserfs_free_block(th, inode, tmp, 1);
Linus Torvalds's avatar
Linus Torvalds committed
1166
		if ( item_moved (&s_ih, p_s_path) )  {
Linus Torvalds's avatar
Linus Torvalds committed
1167 1168 1169
			need_research = 1;
			break ;
		}
Linus Torvalds's avatar
Linus Torvalds committed
1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182
	    }

	    /* a trick.  If the buffer has been logged, this
	    ** will do nothing.  If we've broken the loop without
	    ** logging it, it will restore the buffer
	    **
	    */
	    reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);

	    /* This loop can be optimized. */
	} while ( (*p_n_removed < n_unfm_number || need_research) &&
		  search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND );

Linus Torvalds's avatar
Linus Torvalds committed
1183 1184 1185 1186
	RFALSE( *p_n_removed < n_unfm_number, 
		"PAP-5310: indirect item is not found");
	RFALSE( item_moved (&s_ih, p_s_path), 
		"after while, comp failed, retry") ;
Linus Torvalds's avatar
Linus Torvalds committed
1187 1188 1189 1190 1191 1192 1193

	if (c_mode == M_CUT)
	    pos_in_item (p_s_path) *= UNFM_P_SIZE;
	return c_mode;
    }
}

1194
/* Calculate number of bytes which will be deleted or cut during balance */
Linus Torvalds's avatar
Linus Torvalds committed
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
int calc_deleted_bytes_number(
    struct  tree_balance  * p_s_tb,
    char                    c_mode
    ) {
    int                     n_del_size;
    struct  item_head     * p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);

    if ( is_statdata_le_ih (p_le_ih) )
	return 0;

1205
    n_del_size = ( c_mode == M_DELETE ) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
Linus Torvalds's avatar
Linus Torvalds committed
1206 1207 1208 1209 1210
    if ( is_direntry_le_ih (p_le_ih) ) {
	// return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
	// we can't use EMPTY_DIR_SIZE, as old format dirs have a different
	// empty size.  ick. FIXME, is this right?
	//
1211
	return n_del_size ;
Linus Torvalds's avatar
Linus Torvalds committed
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226
    }

    if ( is_indirect_le_ih (p_le_ih) )
	n_del_size = (n_del_size/UNFM_P_SIZE)*
	  (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size);// - get_ih_free_space (p_le_ih);
    return n_del_size;
}

static void init_tb_struct(
    struct reiserfs_transaction_handle *th,
    struct tree_balance * p_s_tb,
    struct super_block  * p_s_sb,
    struct path         * p_s_path,
    int                   n_size
    ) {
1227 1228 1229

    BUG_ON (!th->t_trans_id);

Linus Torvalds's avatar
Linus Torvalds committed
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
    memset (p_s_tb,'\0',sizeof(struct tree_balance));
    p_s_tb->transaction_handle = th ;
    p_s_tb->tb_sb = p_s_sb;
    p_s_tb->tb_path = p_s_path;
    PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
    PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
    p_s_tb->insert_size[0] = n_size;
}



void padd_item (char * item, int total_length, int length)
{
    int i;

    for (i = total_length; i > length; )
	item [--i] = 0;
}

1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275
#ifdef REISERQUOTA_DEBUG
char key2type(struct key *ih)
{
  if (is_direntry_le_key(2, ih))
    return 'd';
  if (is_direct_le_key(2, ih))
    return 'D';
  if (is_indirect_le_key(2, ih))
    return 'i';
  if (is_statdata_le_key(2, ih))
    return 's';
  return 'u';
}

char head2type(struct item_head *ih)
{
  if (is_direntry_le_ih(ih))
    return 'd';
  if (is_direct_le_ih(ih))
    return 'D';
  if (is_indirect_le_ih(ih))
    return 'i';
  if (is_statdata_le_ih(ih))
    return 's';
  return 'u';
}
#endif
Linus Torvalds's avatar
Linus Torvalds committed
1276 1277 1278 1279

/* Delete object item. */
int reiserfs_delete_item (struct reiserfs_transaction_handle *th, 
			  struct path * p_s_path, /* Path to the deleted item. */
Linus Torvalds's avatar
Linus Torvalds committed
1280
			  const struct cpu_key * p_s_item_key, /* Key to search for the deleted item.  */
1281
			  struct inode * p_s_inode,/* inode is here just to update i_blocks and quotas */
Linus Torvalds's avatar
Linus Torvalds committed
1282 1283 1284 1285 1286
			  struct buffer_head  * p_s_un_bh)    /* NULL or unformatted node pointer.    */
{
    struct super_block * p_s_sb = p_s_inode->i_sb;
    struct tree_balance   s_del_balance;
    struct item_head      s_ih;
1287 1288
    struct item_head      *q_ih;
    int			  quota_cut_bytes;
Linus Torvalds's avatar
Linus Torvalds committed
1289 1290 1291 1292 1293 1294 1295 1296 1297
    int                   n_ret_value,
	n_del_size,
	n_removed;

#ifdef CONFIG_REISERFS_CHECK
    char                  c_mode;
    int			n_iter = 0;
#endif

1298 1299
    BUG_ON (!th->t_trans_id);

Linus Torvalds's avatar
Linus Torvalds committed
1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310
    init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, 0/*size is unknown*/);

    while ( 1 ) {
	n_removed = 0;

#ifdef CONFIG_REISERFS_CHECK
	n_iter++;
	c_mode =
#endif
	    prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, &n_del_size, max_reiserfs_offset (p_s_inode));

Linus Torvalds's avatar
Linus Torvalds committed
1311
	RFALSE( c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
Linus Torvalds's avatar
Linus Torvalds committed
1312 1313 1314 1315

	copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
	s_del_balance.insert_size[0] = n_del_size;

1316
	n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
1317 1318 1319
	if ( n_ret_value != REPEAT_SEARCH )
	    break;

1320 1321
	PROC_INFO_INC( p_s_sb, delete_item_restarted );

Linus Torvalds's avatar
Linus Torvalds committed
1322 1323 1324 1325 1326
	// file system changed, repeat search
	n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
	if (n_ret_value == IO_ERROR)
	    break;
	if (n_ret_value == FILE_NOT_FOUND) {
1327 1328
	    reiserfs_warning (p_s_sb, "vs-5340: reiserfs_delete_item: "
			      "no items of the file %K found", p_s_item_key);
Linus Torvalds's avatar
Linus Torvalds committed
1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339
	    break;
	}
    } /* while (1) */

    if ( n_ret_value != CARRY_ON ) {
	unfix_nodes(&s_del_balance);
	return 0;
    }

    // reiserfs_delete_item returns item length when success
    n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355
    q_ih = get_ih(p_s_path) ;
    quota_cut_bytes = ih_item_len(q_ih) ;

    /* hack so the quota code doesn't have to guess if the file
    ** has a tail.  On tail insert, we allocate quota for 1 unformatted node.
    ** We test the offset because the tail might have been
    ** split into multiple items, and we only want to decrement for
    ** the unfm node once
    */
    if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
        if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) {
            quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
        } else {
	    quota_cut_bytes = 0 ;
	}
    }
Linus Torvalds's avatar
Linus Torvalds committed
1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371

    if ( p_s_un_bh )  {
	int off;
        char *data ;

	/* We are in direct2indirect conversion, so move tail contents
           to the unformatted node */
	/* note, we do the copy before preparing the buffer because we
	** don't care about the contents of the unformatted node yet.
	** the only thing we really care about is the direct item's data
	** is in the unformatted node.
	**
	** Otherwise, we would have to call reiserfs_prepare_for_journal on
	** the unformatted node, which might schedule, meaning we'd have to
	** loop all the way back up to the start of the while loop.
	**
Linus Torvalds's avatar
Linus Torvalds committed
1372 1373
	** The unformatted node must be dirtied later on.  We can't be
	** sure here if the entire tail has been deleted yet.
Linus Torvalds's avatar
Linus Torvalds committed
1374 1375 1376
        **
        ** p_s_un_bh is from the page cache (all unformatted nodes are
        ** from the page cache) and might be a highmem page.  So, we
1377
        ** can't use p_s_un_bh->b_data.
Linus Torvalds's avatar
Linus Torvalds committed
1378 1379 1380
	** -clm
	*/

1381
        data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
Linus Torvalds's avatar
Linus Torvalds committed
1382 1383 1384
	off = ((le_ih_k_offset (&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
	memcpy(data + off,
	       B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), n_ret_value);
1385
	kunmap_atomic(data, KM_USER0);
Linus Torvalds's avatar
Linus Torvalds committed
1386 1387 1388 1389
    }
    /* Perform balancing after all resources have been collected at once. */ 
    do_balance(&s_del_balance, NULL, NULL, M_DELETE);

1390
#ifdef REISERQUOTA_DEBUG
1391
    reiserfs_debug (p_s_sb, "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
1392 1393 1394
#endif
    DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);

Linus Torvalds's avatar
Linus Torvalds committed
1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417
    /* Return deleted body length */
    return n_ret_value;
}


/* Summary Of Mechanisms For Handling Collisions Between Processes:

 deletion of the body of the object is performed by iput(), with the
 result that if multiple processes are operating on a file, the
 deletion of the body of the file is deferred until the last process
 that has an open inode performs its iput().

 writes and truncates are protected from collisions by use of
 semaphores.

 creates, linking, and mknod are protected from collisions with other
 processes by making the reiserfs_add_entry() the last step in the
 creation, and then rolling back all changes if there was a collision.
 - Hans
*/


/* this deletes item which never gets split */
Linus Torvalds's avatar
Linus Torvalds committed
1418
void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th,
1419
				 struct inode *inode,
Linus Torvalds's avatar
Linus Torvalds committed
1420
				 struct key * key)
Linus Torvalds's avatar
Linus Torvalds committed
1421 1422 1423
{
    struct tree_balance tb;
    INITIALIZE_PATH (path);
1424
    int item_len = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1425 1426 1427
    int tb_init = 0 ;
    struct cpu_key cpu_key;
    int retval;
1428
    int quota_cut_bytes = 0;
1429 1430

    BUG_ON (!th->t_trans_id);
Linus Torvalds's avatar
Linus Torvalds committed
1431 1432 1433 1434 1435 1436
    
    le_key2cpu_key (&cpu_key, key);
    
    while (1) {
	retval = search_item (th->t_super, &cpu_key, &path);
	if (retval == IO_ERROR) {
1437 1438 1439 1440
	    reiserfs_warning (th->t_super,
			      "vs-5350: reiserfs_delete_solid_item: "
			      "i/o failure occurred trying to delete %K",
			      &cpu_key);
Linus Torvalds's avatar
Linus Torvalds committed
1441 1442 1443 1444
	    break;
	}
	if (retval != ITEM_FOUND) {
	    pathrelse (&path);
1445 1446 1447
	    // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
	    if ( !( (unsigned long long) GET_HASH_VALUE (le_key_k_offset (le_key_version (key), key)) == 0 && \
		 (unsigned long long) GET_GENERATION_NUMBER (le_key_k_offset (le_key_version (key), key)) == 1 ) )
1448
		reiserfs_warning (th->t_super, "vs-5355: reiserfs_delete_solid_item: %k not found", key);
Linus Torvalds's avatar
Linus Torvalds committed
1449 1450 1451 1452
	    break;
	}
	if (!tb_init) {
	    tb_init = 1 ;
Linus Torvalds's avatar
Linus Torvalds committed
1453
	    item_len = ih_item_len( PATH_PITEM_HEAD(&path) );
Linus Torvalds's avatar
Linus Torvalds committed
1454 1455
	    init_tb_struct (th, &tb, th->t_super, &path, - (IH_SIZE + item_len));
	}
1456
	quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)) ;
Linus Torvalds's avatar
Linus Torvalds committed
1457

1458
	retval = fix_nodes (M_DELETE, &tb, NULL, NULL);
1459 1460
	if (retval == REPEAT_SEARCH) {
	    PROC_INFO_INC( th -> t_super, delete_solid_item_restarted );
Linus Torvalds's avatar
Linus Torvalds committed
1461
	    continue;
1462
	}
Linus Torvalds's avatar
Linus Torvalds committed
1463 1464

	if (retval == CARRY_ON) {
1465
	    do_balance (&tb, NULL, NULL, M_DELETE);
1466 1467
	    if (inode) {	/* Should we count quota for item? (we don't count quotas for save-links) */
#ifdef REISERQUOTA_DEBUG
1468
		reiserfs_debug (th->t_super, "reiserquota delete_solid_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, key2type(key));
1469 1470 1471
#endif
		DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes);
	    }
Linus Torvalds's avatar
Linus Torvalds committed
1472 1473 1474 1475
	    break;
	}

	// IO_ERROR, NO_DISK_SPACE, etc
1476 1477
	reiserfs_warning (th->t_super, "vs-5360: reiserfs_delete_solid_item: "
			  "could not delete %K due to fix_nodes failure", &cpu_key);
Linus Torvalds's avatar
Linus Torvalds committed
1478 1479 1480 1481 1482 1483 1484 1485
	unfix_nodes (&tb);
	break;
    }

    reiserfs_check_path(&path) ;
}


1486
int reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * inode)
Linus Torvalds's avatar
Linus Torvalds committed
1487
{
1488
    int err;
Linus Torvalds's avatar
Linus Torvalds committed
1489
    inode->i_size = 0;
1490
    BUG_ON (!th->t_trans_id);
Linus Torvalds's avatar
Linus Torvalds committed
1491 1492

    /* for directory this deletes item containing "." and ".." */
1493 1494 1495
    err = reiserfs_do_truncate (th, inode, NULL, 0/*no timestamp updates*/);
    if (err)
        return err;
Linus Torvalds's avatar
Linus Torvalds committed
1496
    
Linus Torvalds's avatar
Linus Torvalds committed
1497 1498 1499 1500 1501 1502
#if defined( USE_INODE_GENERATION_COUNTER )
    if( !old_format_only ( th -> t_super ) )
      {
       __u32 *inode_generation;
       
       inode_generation = 
1503
         &REISERFS_SB(th -> t_super) -> s_rs -> s_inode_generation;
Linus Torvalds's avatar
Linus Torvalds committed
1504 1505 1506
       *inode_generation = cpu_to_le32( le32_to_cpu( *inode_generation ) + 1 );
      }
/* USE_INODE_GENERATION_COUNTER */
Linus Torvalds's avatar
Linus Torvalds committed
1507
#endif
1508
    reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode));
1509 1510

    return err;
Linus Torvalds's avatar
Linus Torvalds committed
1511 1512
}

1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547
static void
unmap_buffers(struct page *page, loff_t pos) {
    struct buffer_head *bh ;
    struct buffer_head *head ;
    struct buffer_head *next ;
    unsigned long tail_index ;
    unsigned long cur_index ;

    if (page) {
	if (page_has_buffers(page)) {
	    tail_index = pos & (PAGE_CACHE_SIZE - 1) ;
	    cur_index = 0 ;
	    head = page_buffers(page) ;
	    bh = head ;
	    do {
		next = bh->b_this_page ;

		/* we want to unmap the buffers that contain the tail, and
		** all the buffers after it (since the tail must be at the
		** end of the file).  We don't want to unmap file data
		** before the tail, since it might be dirty and waiting to
		** reach disk
		*/
		cur_index += bh->b_size ;
		if (cur_index > tail_index) {
		    reiserfs_unmap_buffer(bh) ;
		}
		bh = next ;
	    } while (bh != head) ;
	    if ( PAGE_SIZE == bh->b_size ) {
		clear_page_dirty(page);
	    }
	}
    }
}
Linus Torvalds's avatar
Linus Torvalds committed
1548 1549 1550 1551 1552

static int maybe_indirect_to_direct (struct reiserfs_transaction_handle *th, 
			      struct inode * p_s_inode,
			      struct page *page, 
			      struct path         * p_s_path,
Linus Torvalds's avatar
Linus Torvalds committed
1553
			      const struct cpu_key      * p_s_item_key,
Linus Torvalds's avatar
Linus Torvalds committed
1554 1555 1556 1557 1558 1559
			      loff_t         n_new_file_size,
			      char                * p_c_mode
			      ) {
    struct super_block * p_s_sb = p_s_inode->i_sb;
    int n_block_size = p_s_sb->s_blocksize;
    int cut_bytes;
1560
    BUG_ON (!th->t_trans_id);
Linus Torvalds's avatar
Linus Torvalds committed
1561 1562 1563 1564 1565 1566 1567 1568 1569 1570

    if (n_new_file_size != p_s_inode->i_size)
	BUG ();

    /* the page being sent in could be NULL if there was an i/o error
    ** reading in the last block.  The user will hit problems trying to
    ** read the file, but for now we just skip the indirect2direct
    */
    if (atomic_read(&p_s_inode->i_count) > 1 || 
        !tail_has_to_be_packed (p_s_inode) || 
Linus Torvalds's avatar
Linus Torvalds committed
1571
	!page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
Linus Torvalds's avatar
Linus Torvalds committed
1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592
	// leave tail in an unformatted node	
	*p_c_mode = M_SKIP_BALANCING;
	cut_bytes = n_block_size - (n_new_file_size & (n_block_size - 1));
	pathrelse(p_s_path);
	return cut_bytes;
    }
    /* Permorm the conversion to a direct_item. */
    /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);*/
    return indirect2direct (th, p_s_inode, page, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);
}


/* we did indirect_to_direct conversion. And we have inserted direct
   item successesfully, but there were no disk space to cut unfm
   pointer being converted. Therefore we have to delete inserted
   direct item(s) */
static void indirect_to_direct_roll_back (struct reiserfs_transaction_handle *th, struct inode * inode, struct path * path)
{
    struct cpu_key tail_key;
    int tail_len;
    int removed;
1593
    BUG_ON (!th->t_trans_id);
Linus Torvalds's avatar
Linus Torvalds committed
1594 1595 1596 1597 1598 1599 1600 1601 1602

    make_cpu_key (&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);// !!!!
    tail_key.key_length = 4;

    tail_len = (cpu_key_k_offset (&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
    while (tail_len) {
	/* look for the last byte of the tail */
	if (search_for_position_by_key (inode->i_sb, &tail_key, path) == POSITION_NOT_FOUND)
	    reiserfs_panic (inode->i_sb, "vs-5615: indirect_to_direct_roll_back: found invalid item");
Linus Torvalds's avatar
Linus Torvalds committed
1603 1604
	RFALSE( path->pos_in_item != ih_item_len(PATH_PITEM_HEAD (path)) - 1,
	        "vs-5616: appended bytes found");
Linus Torvalds's avatar
Linus Torvalds committed
1605 1606
	PATH_LAST_POSITION (path) --;
	
1607
	removed = reiserfs_delete_item (th, path, &tail_key, inode, NULL/*unbh not needed*/);
Linus Torvalds's avatar
Linus Torvalds committed
1608
	RFALSE( removed <= 0 || removed > tail_len,
Linus Torvalds's avatar
Linus Torvalds committed
1609 1610
	        "vs-5617: there was tail %d bytes, removed item length %d bytes",
                tail_len, removed);
Linus Torvalds's avatar
Linus Torvalds committed
1611 1612 1613
	tail_len -= removed;
	set_cpu_key_k_offset (&tail_key, cpu_key_k_offset (&tail_key) - removed);
    }
1614
    reiserfs_warning (inode->i_sb, "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space");
Linus Torvalds's avatar
Linus Torvalds committed
1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
    //mark_file_without_tail (inode);
    mark_inode_dirty (inode);
}


/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, 
			    struct path * p_s_path,
			    struct cpu_key * p_s_item_key,
			    struct inode * p_s_inode,
			    struct page *page, 
			    loff_t n_new_file_size)
{
    struct super_block * p_s_sb = p_s_inode->i_sb;
    /* Every function which is going to call do_balance must first
       create a tree_balance structure.  Then it must fill up this
       structure by using the init_tb_struct and fix_nodes functions.
       After that we can make tree balancing. */
    struct tree_balance s_cut_balance;
1634
    struct item_head *p_le_ih;
Linus Torvalds's avatar
Linus Torvalds committed
1635 1636 1637 1638 1639 1640
    int n_cut_size = 0,        /* Amount to be cut. */
	n_ret_value = CARRY_ON,
	n_removed = 0,     /* Number of the removed unformatted nodes. */
	n_is_inode_locked = 0;
    char                c_mode;            /* Mode of the balance. */
    int retval2 = -1;
1641
    int quota_cut_bytes;
1642
    loff_t tail_pos = 0;
1643 1644

    BUG_ON (!th->t_trans_id);
Linus Torvalds's avatar
Linus Torvalds committed
1645 1646 1647 1648 1649
    
    init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, n_cut_size);


    /* Repeat this loop until we either cut the item without needing
1650
       to balance, or we fix_nodes without schedule occurring */
Linus Torvalds's avatar
Linus Torvalds committed
1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661
    while ( 1 ) {
	/* Determine the balance mode, position of the first byte to
	   be cut, and size to be cut.  In case of the indirect item
	   free unformatted nodes which are pointed to by the cut
	   pointers. */
      
	c_mode = prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, 
					   &n_cut_size, n_new_file_size);
	if ( c_mode == M_CONVERT )  {
	    /* convert last unformatted node to direct item or leave
               tail in the unformatted node */
Linus Torvalds's avatar
Linus Torvalds committed
1662
	    RFALSE( n_ret_value != CARRY_ON, "PAP-5570: can not convert twice");
Linus Torvalds's avatar
Linus Torvalds committed
1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683

	    n_ret_value = maybe_indirect_to_direct (th, p_s_inode, page, p_s_path, p_s_item_key,
						    n_new_file_size, &c_mode);
	    if ( c_mode == M_SKIP_BALANCING )
		/* tail has been left in the unformatted node */
		return n_ret_value;

	    n_is_inode_locked = 1;
	  
	    /* removing of last unformatted node will change value we
               have to return to truncate. Save it */
	    retval2 = n_ret_value;
	    /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1));*/
	  
	    /* So, we have performed the first part of the conversion:
	       inserting the new direct item.  Now we are removing the
	       last unformatted node pointer. Set key to search for
	       it. */
      	    set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT);
	    p_s_item_key->key_length = 4;
	    n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1));
1684
	    tail_pos = n_new_file_size;
Linus Torvalds's avatar
Linus Torvalds committed
1685 1686 1687
	    set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1);
	    if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){
		print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1);
1688
		reiserfs_panic(p_s_sb, "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)", p_s_item_key);
Linus Torvalds's avatar
Linus Torvalds committed
1689 1690 1691 1692 1693 1694 1695 1696 1697 1698
	    }
	    continue;
	}
	if (n_cut_size == 0) {
	    pathrelse (p_s_path);
	    return 0;
	}

	s_cut_balance.insert_size[0] = n_cut_size;
	
1699
	n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
1700 1701 1702
      	if ( n_ret_value != REPEAT_SEARCH )
	    break;
	
1703 1704
	PROC_INFO_INC( p_s_sb, cut_from_item_restarted );

Linus Torvalds's avatar
Linus Torvalds committed
1705 1706 1707 1708
	n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
	if (n_ret_value == POSITION_FOUND)
	    continue;

1709
	reiserfs_warning (p_s_sb, "PAP-5610: reiserfs_cut_from_item: item %K not found", p_s_item_key);
Linus Torvalds's avatar
Linus Torvalds committed
1710
	unfix_nodes (&s_cut_balance);
Linus Torvalds's avatar
Linus Torvalds committed
1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721
	return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
    } /* while */
  
    // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
    if ( n_ret_value != CARRY_ON ) {
	if ( n_is_inode_locked ) {
	    // FIXME: this seems to be not needed: we are always able
	    // to cut item
	    indirect_to_direct_roll_back (th, p_s_inode, p_s_path);
	}
	if (n_ret_value == NO_DISK_SPACE)
1722
	    reiserfs_warning (p_s_sb, "NO_DISK_SPACE");
Linus Torvalds's avatar
Linus Torvalds committed
1723 1724 1725 1726 1727 1728
	unfix_nodes (&s_cut_balance);
	return -EIO;
    }

    /* go ahead and perform balancing */
    
1729
    RFALSE( c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode");
Linus Torvalds's avatar
Linus Torvalds committed
1730 1731

    /* Calculate number of bytes that need to be cut from the item. */
1732
    quota_cut_bytes = ( c_mode == M_DELETE ) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.insert_size[0];
Linus Torvalds's avatar
Linus Torvalds committed
1733 1734 1735 1736
    if (retval2 == -1)
	n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
    else
	n_ret_value = retval2;
1737 1738 1739 1740 1741 1742 1743 1744 1745


    /* For direct items, we only change the quota when deleting the last
    ** item.
    */
    p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
    if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
        if (c_mode == M_DELETE &&
	   (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) {
Linus Torvalds's avatar
Linus Torvalds committed
1746
	    // FIXME: this is to keep 3.5 happy
Linus Torvalds's avatar
Linus Torvalds committed
1747
	    REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX;
1748 1749 1750
	    quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE ;
        } else {
	    quota_cut_bytes = 0 ;
Linus Torvalds's avatar
Linus Torvalds committed
1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762
	}
    }
#ifdef CONFIG_REISERFS_CHECK
    if (n_is_inode_locked) {
	struct item_head * le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
	/* we are going to complete indirect2direct conversion. Make
           sure, that we exactly remove last unformatted node pointer
           of the item */
	if (!is_indirect_le_ih (le_ih))
	    reiserfs_panic (p_s_sb, "vs-5652: reiserfs_cut_from_item: "
			    "item must be indirect %h", le_ih);

Linus Torvalds's avatar
Linus Torvalds committed
1763
	if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
Linus Torvalds's avatar
Linus Torvalds committed
1764
	    reiserfs_panic (p_s_sb, "vs-5653: reiserfs_cut_from_item: "
Linus Torvalds's avatar
Linus Torvalds committed
1765
			    "completing indirect2direct conversion indirect item %h "
Linus Torvalds's avatar
Linus Torvalds committed
1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779
			    "being deleted must be of 4 byte long", le_ih);

	if (c_mode == M_CUT && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
	    reiserfs_panic (p_s_sb, "vs-5654: reiserfs_cut_from_item: "
			    "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)",
			    le_ih, s_cut_balance.insert_size[0]);
	}
	/* it would be useful to make sure, that right neighboring
           item is direct item of this file */
    }
#endif
    
    do_balance(&s_cut_balance, NULL, NULL, c_mode);
    if ( n_is_inode_locked ) {
1780 1781
	/* we've done an indirect->direct conversion.  when the data block
	** was freed, it was removed from the list of blocks that must
1782 1783
	** be flushed before the transaction commits, make sure to
	** unmap and invalidate it
Linus Torvalds's avatar
Linus Torvalds committed
1784
	*/
1785
	unmap_buffers(page, tail_pos);
Linus Torvalds's avatar
Linus Torvalds committed
1786
	REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask ;
Linus Torvalds's avatar
Linus Torvalds committed
1787
    }
1788
#ifdef REISERQUOTA_DEBUG
1789
    reiserfs_debug (p_s_inode->i_sb, "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, '?');
1790 1791
#endif
    DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
Linus Torvalds's avatar
Linus Torvalds committed
1792 1793 1794 1795 1796
    return n_ret_value;
}

static void truncate_directory (struct reiserfs_transaction_handle *th, struct inode * inode)
{
1797
    BUG_ON (!th->t_trans_id);
Linus Torvalds's avatar
Linus Torvalds committed
1798
    if (inode->i_nlink)
1799 1800
	reiserfs_warning (inode->i_sb,
			  "vs-5655: truncate_directory: link count != 0");
Linus Torvalds's avatar
Linus Torvalds committed
1801

Linus Torvalds's avatar
Linus Torvalds committed
1802 1803
    set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), DOT_OFFSET);
    set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_DIRENTRY);
1804 1805
    reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode));
    reiserfs_update_sd(th, inode) ;
Linus Torvalds's avatar
Linus Torvalds committed
1806 1807
    set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), SD_OFFSET);
    set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_STAT_DATA);    
Linus Torvalds's avatar
Linus Torvalds committed
1808 1809 1810 1811 1812 1813 1814
}




/* Truncate file to the new size. Note, this must be called with a transaction
   already started */
1815
int reiserfs_do_truncate (struct reiserfs_transaction_handle *th,
Linus Torvalds's avatar
Linus Torvalds committed
1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830
			   struct  inode * p_s_inode, /* ->i_size contains new
                                                         size */
			   struct page *page, /* up to date for last block */
			   int update_timestamps  /* when it is called by
						     file_release to convert
						     the tail - no timestamps
						     should be updated */
    ) {
    INITIALIZE_PATH (s_search_path);       /* Path to the current object item. */
    struct item_head    * p_le_ih;         /* Pointer to an item header. */
    struct cpu_key      s_item_key;     /* Key to search for a previous file item. */
    loff_t         n_file_size,    /* Old file size. */
	n_new_file_size;/* New file size. */
    int                   n_deleted;      /* Number of deleted or truncated bytes. */
    int retval;
1831
    int err = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1832

1833
    BUG_ON (!th->t_trans_id);
Linus Torvalds's avatar
Linus Torvalds committed
1834
    if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) )
1835
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1836 1837 1838 1839

    if (S_ISDIR(p_s_inode->i_mode)) {
	// deletion of directory - no need to update timestamps
	truncate_directory (th, p_s_inode);
1840
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1841 1842 1843 1844 1845 1846 1847 1848 1849 1850
    }

    /* Get new file size. */
    n_new_file_size = p_s_inode->i_size;

    // FIXME: note, that key type is unimportant here
    make_cpu_key (&s_item_key, p_s_inode, max_reiserfs_offset (p_s_inode), TYPE_DIRECT, 3);

    retval = search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path);
    if (retval == IO_ERROR) {
1851 1852
	reiserfs_warning (p_s_inode->i_sb, "vs-5657: reiserfs_do_truncate: "
			  "i/o failure occurred trying to truncate %K", &s_item_key);
1853 1854
        err = -EIO;
        goto out;
Linus Torvalds's avatar
Linus Torvalds committed
1855 1856
    }
    if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1857 1858
	reiserfs_warning (p_s_inode->i_sb, "PAP-5660: reiserfs_do_truncate: "
			  "wrong result %d of search for %K", retval, &s_item_key);
1859 1860 1861

        err = -EIO;
        goto out;
Linus Torvalds's avatar
Linus Torvalds committed
1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878
    }

    s_search_path.pos_in_item --;

    /* Get real file size (total length of all file items) */
    p_le_ih = PATH_PITEM_HEAD(&s_search_path);
    if ( is_statdata_le_ih (p_le_ih) )
	n_file_size = 0;
    else {
	loff_t offset = le_ih_k_offset (p_le_ih);
	int bytes = op_bytes_number (p_le_ih,p_s_inode->i_sb->s_blocksize);

	/* this may mismatch with real file size: if last direct item
           had no padding zeros and last unformatted node had no free
           space, this file would have this file size */
	n_file_size = offset + bytes - 1;
    }
1879 1880 1881 1882 1883 1884
    /*
     * are we doing a full truncate or delete, if so
     * kick in the reada code
     */
    if (n_new_file_size == 0)
        s_search_path.reada = PATH_READA | PATH_READA_BACK;
Linus Torvalds's avatar
Linus Torvalds committed
1885 1886

    if ( n_file_size == 0 || n_file_size < n_new_file_size ) {
1887
	goto update_and_out ;
Linus Torvalds's avatar
Linus Torvalds committed
1888
    }
Linus Torvalds's avatar
Linus Torvalds committed
1889

Linus Torvalds's avatar
Linus Torvalds committed
1890 1891 1892 1893 1894 1895 1896
    /* Update key to search for the last file item. */
    set_cpu_key_k_offset (&s_item_key, n_file_size);

    do  {
	/* Cut or delete file item. */
	n_deleted = reiserfs_cut_from_item(th, &s_search_path, &s_item_key, p_s_inode,  page, n_new_file_size);
	if (n_deleted < 0) {
1897
	    reiserfs_warning (p_s_inode->i_sb, "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
Linus Torvalds's avatar
Linus Torvalds committed
1898
	    reiserfs_check_path(&s_search_path) ;
1899
	    return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1900 1901
	}

Linus Torvalds's avatar
Linus Torvalds committed
1902
	RFALSE( n_deleted > n_file_size,
Oleg Drokin's avatar
Oleg Drokin committed
1903
		"PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
Linus Torvalds's avatar
Linus Torvalds committed
1904
		n_deleted, n_file_size, &s_item_key);
Linus Torvalds's avatar
Linus Torvalds committed
1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928

	/* Change key to search the last file item. */
	n_file_size -= n_deleted;

	set_cpu_key_k_offset (&s_item_key, n_file_size);

	/* While there are bytes to truncate and previous file item is presented in the tree. */

	/*
	** This loop could take a really long time, and could log 
	** many more blocks than a transaction can hold.  So, we do a polite
	** journal end here, and if the transaction needs ending, we make
	** sure the file is consistent before ending the current trans
	** and starting a new one
	*/
        if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
	  int orig_len_alloc = th->t_blocks_allocated ;
	  decrement_counters_in_path(&s_search_path) ;

	  if (update_timestamps) {
	      p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME;
	  } 
	  reiserfs_update_sd(th, p_s_inode) ;

1929 1930 1931 1932 1933 1934 1935
	  err = journal_end(th, p_s_inode->i_sb, orig_len_alloc) ;
	  if (err)
	    goto out;
	  err = journal_begin (th, p_s_inode->i_sb,
                               JOURNAL_PER_BALANCE_CNT * 6);
	  if (err)
	    goto out;
Linus Torvalds's avatar
Linus Torvalds committed
1936
	  reiserfs_update_inode_transaction(p_s_inode) ;
Linus Torvalds's avatar
Linus Torvalds committed
1937 1938 1939 1940
	}
    } while ( n_file_size > ROUND_UP (n_new_file_size) &&
	      search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path) == POSITION_FOUND )  ;

Linus Torvalds's avatar
Linus Torvalds committed
1941
    RFALSE( n_file_size > ROUND_UP (n_new_file_size),
1942
	    "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
Linus Torvalds's avatar
Linus Torvalds committed
1943
	    n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
Linus Torvalds's avatar
Linus Torvalds committed
1944

1945
update_and_out:
Linus Torvalds's avatar
Linus Torvalds committed
1946 1947 1948 1949 1950 1951
    if (update_timestamps) {
	// this is truncate, not file closing
	p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME;
    }
    reiserfs_update_sd (th, p_s_inode);

1952
out:
Linus Torvalds's avatar
Linus Torvalds committed
1953
    pathrelse(&s_search_path) ;
1954
    return err;
Linus Torvalds's avatar
Linus Torvalds committed
1955 1956 1957 1958 1959
}


#ifdef CONFIG_REISERFS_CHECK
// this makes sure, that we __append__, not overwrite or add holes
Linus Torvalds's avatar
Linus Torvalds committed
1960 1961
static void check_research_for_paste (struct path * path, 
				      const struct cpu_key * p_s_key)
Linus Torvalds's avatar
Linus Torvalds committed
1962 1963 1964 1965
{
    struct item_head * found_ih = get_ih (path);
    
    if (is_direct_le_ih (found_ih)) {
Linus Torvalds's avatar
Linus Torvalds committed
1966
	if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) !=
Linus Torvalds's avatar
Linus Torvalds committed
1967
	    cpu_key_k_offset (p_s_key) ||
Linus Torvalds's avatar
Linus Torvalds committed
1968
	    op_bytes_number (found_ih, get_last_bh (path)->b_size) != pos_in_item (path))
1969
	    reiserfs_panic (NULL, "PAP-5720: check_research_for_paste: "
Linus Torvalds's avatar
Linus Torvalds committed
1970 1971 1972 1973
			    "found direct item %h or position (%d) does not match to key %K",
			    found_ih, pos_in_item (path), p_s_key);
    }
    if (is_indirect_le_ih (found_ih)) {
Linus Torvalds's avatar
Linus Torvalds committed
1974
	if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) != cpu_key_k_offset (p_s_key) || 
Linus Torvalds's avatar
Linus Torvalds committed
1975 1976
	    I_UNFM_NUM (found_ih) != pos_in_item (path) ||
	    get_ih_free_space (found_ih) != 0)
1977
	    reiserfs_panic (NULL, "PAP-5730: check_research_for_paste: "
Linus Torvalds's avatar
Linus Torvalds committed
1978 1979 1980 1981 1982 1983 1984 1985 1986 1987
			    "found indirect item (%h) or position (%d) does not match to key (%K)",
			    found_ih, pos_in_item (path), p_s_key);
    }
}
#endif /* config reiserfs check */


/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, 
			      struct path         * p_s_search_path,	/* Path to the pasted item.          */
Linus Torvalds's avatar
Linus Torvalds committed
1988
			      const struct cpu_key      * p_s_key,        	/* Key to search for the needed item.*/
1989
			      struct inode	  * inode,		/* Inode item belongs to */
Linus Torvalds's avatar
Linus Torvalds committed
1990 1991 1992 1993 1994
			      const char          * p_c_body,       	/* Pointer to the bytes to paste.    */
			      int                   n_pasted_size)  	/* Size of pasted bytes.             */
{
    struct tree_balance s_paste_balance;
    int                 retval;
1995 1996
    int			fs_gen;

1997 1998
    BUG_ON (!th->t_trans_id);

1999
    fs_gen = get_generation(inode->i_sb) ;
Linus Torvalds's avatar
Linus Torvalds committed
2000

2001
#ifdef REISERQUOTA_DEBUG
2002
    reiserfs_debug (inode->i_sb, "reiserquota paste_into_item(): allocating %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key)));
2003 2004 2005 2006 2007 2008
#endif

    if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) {
	pathrelse(p_s_search_path);
	return -EDQUOT;
    }
Linus Torvalds's avatar
Linus Torvalds committed
2009
    init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size);
2010 2011 2012
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
    s_paste_balance.key = p_s_key->on_disk_key;
#endif
2013 2014 2015 2016 2017 2018 2019 2020 2021

    /* DQUOT_* can schedule, must check before the fix_nodes */
    if (fs_changed(fs_gen, inode->i_sb)) {
	goto search_again;
    }

    while ((retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) ==
REPEAT_SEARCH ) {
search_again:
Linus Torvalds's avatar
Linus Torvalds committed
2022
	/* file system changed while we were in the fix_nodes */
2023
	PROC_INFO_INC( th -> t_super, paste_into_item_restarted );
Linus Torvalds's avatar
Linus Torvalds committed
2024
	retval = search_for_position_by_key (th->t_super, p_s_key, p_s_search_path);
Linus Torvalds's avatar
Linus Torvalds committed
2025 2026 2027 2028
	if (retval == IO_ERROR) {
	    retval = -EIO ;
	    goto error_out ;
	}
Linus Torvalds's avatar
Linus Torvalds committed
2029
	if (retval == POSITION_FOUND) {
2030
	    reiserfs_warning (inode->i_sb, "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", p_s_key);
Linus Torvalds's avatar
Linus Torvalds committed
2031 2032
	    retval = -EEXIST ;
	    goto error_out ;
Linus Torvalds's avatar
Linus Torvalds committed
2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045
	}
	
#ifdef CONFIG_REISERFS_CHECK
	check_research_for_paste (p_s_search_path, p_s_key);
#endif
    }

    /* Perform balancing after all resources are collected by fix_nodes, and
       accessing them will not risk triggering schedule. */
    if ( retval == CARRY_ON ) {
	do_balance(&s_paste_balance, NULL/*ih*/, p_c_body, M_PASTE);
	return 0;
    }
Linus Torvalds's avatar
Linus Torvalds committed
2046 2047 2048
    retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
error_out:
    /* this also releases the path */
Linus Torvalds's avatar
Linus Torvalds committed
2049
    unfix_nodes(&s_paste_balance);
2050
#ifdef REISERQUOTA_DEBUG
2051
    reiserfs_debug (inode->i_sb, "reiserquota paste_into_item(): freeing %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key)));
2052 2053
#endif
    DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
Linus Torvalds's avatar
Linus Torvalds committed
2054
    return retval ;
Linus Torvalds's avatar
Linus Torvalds committed
2055 2056 2057 2058 2059 2060
}


/* Insert new item into the buffer at the path. */
int reiserfs_insert_item(struct reiserfs_transaction_handle *th, 
			 struct path         * 	p_s_path,         /* Path to the inserteded item.         */
Linus Torvalds's avatar
Linus Torvalds committed
2061
			 const struct cpu_key      * key,
Linus Torvalds's avatar
Linus Torvalds committed
2062
			 struct item_head    * 	p_s_ih,           /* Pointer to the item header to insert.*/
2063
			 struct inode        * inode,
Linus Torvalds's avatar
Linus Torvalds committed
2064 2065 2066 2067
			 const char          * 	p_c_body)         /* Pointer to the bytes to insert.      */
{
    struct tree_balance s_ins_balance;
    int                 retval;
2068 2069 2070
    int fs_gen = 0 ;
    int quota_bytes = 0 ;

2071 2072
    BUG_ON (!th->t_trans_id);

2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083
    if (inode) {      /* Do we count quotas for item? */
	fs_gen = get_generation(inode->i_sb);
	quota_bytes = ih_item_len(p_s_ih);

	/* hack so the quota code doesn't have to guess if the file has
	 ** a tail, links are always tails, so there's no guessing needed
	 */
	if (!S_ISLNK (inode->i_mode) && is_direct_le_ih(p_s_ih)) {
	    quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE ;
	}
#ifdef REISERQUOTA_DEBUG
2084
	reiserfs_debug (inode->i_sb, "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih));
2085 2086 2087 2088 2089 2090 2091 2092
#endif
	/* We can't dirty inode here. It would be immediately written but
	 * appropriate stat item isn't inserted yet... */
	if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) {
	    pathrelse(p_s_path);
	    return -EDQUOT;
	}
    }
Linus Torvalds's avatar
Linus Torvalds committed
2093
    init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih));
2094 2095 2096
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
    s_ins_balance.key = key->on_disk_key;
#endif
2097 2098 2099 2100
    /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
    if (inode && fs_changed(fs_gen, inode->i_sb)) {
	goto search_again;
    }
Linus Torvalds's avatar
Linus Torvalds committed
2101 2102

    while ( (retval = fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, p_c_body)) == REPEAT_SEARCH) {
2103
search_again:
Linus Torvalds's avatar
Linus Torvalds committed
2104
	/* file system changed while we were in the fix_nodes */
2105
	PROC_INFO_INC( th -> t_super, insert_item_restarted );
Linus Torvalds's avatar
Linus Torvalds committed
2106
	retval = search_item (th->t_super, key, p_s_path);
Linus Torvalds's avatar
Linus Torvalds committed
2107 2108 2109 2110
	if (retval == IO_ERROR) {
	    retval = -EIO;
	    goto error_out ;
	}
Linus Torvalds's avatar
Linus Torvalds committed
2111
	if (retval == ITEM_FOUND) {
2112 2113
	    reiserfs_warning (th->t_super, "PAP-5760: reiserfs_insert_item: "
			      "key %K already exists in the tree", key);
Linus Torvalds's avatar
Linus Torvalds committed
2114 2115
	    retval = -EEXIST ;
	    goto error_out; 
Linus Torvalds's avatar
Linus Torvalds committed
2116 2117 2118 2119 2120 2121 2122 2123 2124
	}
    }

    /* make balancing after all resources will be collected at a time */ 
    if ( retval == CARRY_ON ) {
	do_balance (&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
	return 0;
    }

Linus Torvalds's avatar
Linus Torvalds committed
2125 2126 2127
    retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
error_out:
    /* also releases the path */
Linus Torvalds's avatar
Linus Torvalds committed
2128
    unfix_nodes(&s_ins_balance);
2129
#ifdef REISERQUOTA_DEBUG
2130
    reiserfs_debug (th->t_super, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih));
2131 2132 2133
#endif
    if (inode)
	DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes) ;
Linus Torvalds's avatar
Linus Torvalds committed
2134
    return retval; 
Linus Torvalds's avatar
Linus Torvalds committed
2135 2136 2137 2138 2139
}