Commit d7dab39b authored by Eric Sandeen's avatar Eric Sandeen Committed by Jan Kara

ext3: return 32/64-bit dir name hash according to usage type

This is based on commit d1f5273e
ext4: return 32/64-bit dir name hash according to usage type
by Fan Yong <yong.fan@whamcloud.com>

Traditionally ext2/3/4 has returned a 32-bit hash value from llseek()
to appease NFSv2, which can only handle a 32-bit cookie for seekdir()
and telldir().  However, this causes problems if there are 32-bit hash
collisions, since the NFSv2 server can get stuck resending the same
entries from the directory repeatedly.

Allow ext3 to return a full 64-bit hash (both major and minor) for
telldir to decrease the chance of hash collisions.

This patch does implement a new ext3_dir_llseek op, because with 64-bit
hashes, nfs will attempt to seek to a hash "offset" which is much
larger than ext3's s_maxbytes.  So for dx dirs, we call
generic_file_llseek_size() with the appropriate max hash value as the
maximum seekable size.  Otherwise we just pass through to
generic_file_llseek().
Patch-updated-by: default avatarBernd Schubert <bernd.schubert@itwm.fraunhofer.de>
Patch-updated-by: default avatarEric Sandeen <sandeen@redhat.com>
(blame us if something is not correct)
Signed-off-by: default avatarEric Sandeen <sandeen@redhat.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
parent a80b12c3
...@@ -21,30 +21,15 @@ ...@@ -21,30 +21,15 @@
* *
*/ */
#include <linux/compat.h>
#include "ext3.h" #include "ext3.h"
static unsigned char ext3_filetype_table[] = { static unsigned char ext3_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
}; };
static int ext3_readdir(struct file *, void *, filldir_t);
static int ext3_dx_readdir(struct file * filp, static int ext3_dx_readdir(struct file * filp,
void * dirent, filldir_t filldir); void * dirent, filldir_t filldir);
static int ext3_release_dir (struct inode * inode,
struct file * filp);
const struct file_operations ext3_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = ext3_readdir, /* we take BKL. needed?*/
.unlocked_ioctl = ext3_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext3_compat_ioctl,
#endif
.fsync = ext3_sync_file, /* BKL held */
.release = ext3_release_dir,
};
static unsigned char get_dtype(struct super_block *sb, int filetype) static unsigned char get_dtype(struct super_block *sb, int filetype)
{ {
...@@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) ...@@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
return (ext3_filetype_table[filetype]); return (ext3_filetype_table[filetype]);
} }
/**
* Check if the given dir-inode refers to an htree-indexed directory
* (or a directory which chould potentially get coverted to use htree
* indexing).
*
* Return 1 if it is a dx dir, 0 if not
*/
static int is_dx_dir(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
EXT3_FEATURE_COMPAT_DIR_INDEX) &&
((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
((inode->i_size >> sb->s_blocksize_bits) == 1)))
return 1;
return 0;
}
int ext3_check_dir_entry (const char * function, struct inode * dir, int ext3_check_dir_entry (const char * function, struct inode * dir,
struct ext3_dir_entry_2 * de, struct ext3_dir_entry_2 * de,
...@@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp, ...@@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp,
unsigned long offset; unsigned long offset;
int i, stored; int i, stored;
struct ext3_dir_entry_2 *de; struct ext3_dir_entry_2 *de;
struct super_block *sb;
int err; int err;
struct inode *inode = filp->f_path.dentry->d_inode; struct inode *inode = filp->f_path.dentry->d_inode;
struct super_block *sb = inode->i_sb;
int ret = 0; int ret = 0;
int dir_has_error = 0; int dir_has_error = 0;
sb = inode->i_sb; if (is_dx_dir(inode)) {
if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
EXT3_FEATURE_COMPAT_DIR_INDEX) &&
((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
((inode->i_size >> sb->s_blocksize_bits) == 1))) {
err = ext3_dx_readdir(filp, dirent, filldir); err = ext3_dx_readdir(filp, dirent, filldir);
if (err != ERR_BAD_DX_DIR) { if (err != ERR_BAD_DX_DIR) {
ret = err; ret = err;
...@@ -227,22 +226,87 @@ static int ext3_readdir(struct file * filp, ...@@ -227,22 +226,87 @@ static int ext3_readdir(struct file * filp,
return ret; return ret;
} }
static inline int is_32bit_api(void)
{
#ifdef CONFIG_COMPAT
return is_compat_task();
#else
return (BITS_PER_LONG == 32);
#endif
}
/* /*
* These functions convert from the major/minor hash to an f_pos * These functions convert from the major/minor hash to an f_pos
* value. * value for dx directories
* *
* Currently we only use major hash numer. This is unfortunate, but * Upper layer (for example NFS) should specify FMODE_32BITHASH or
* on 32-bit machines, the same VFS interface is used for lseek and * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted
* llseek, so if we use the 64 bit offset, then the 32-bit versions of * directly on both 32-bit and 64-bit nodes, under such case, neither
* lseek/telldir/seekdir will blow out spectacularly, and from within * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
* the ext2 low-level routine, we don't know if we're being called by
* a 64-bit version of the system call or the 32-bit version of the
* system call. Worse yet, NFSv2 only allows for a 32-bit readdir
* cookie. Sigh.
*/ */
#define hash2pos(major, minor) (major >> 1) static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) {
#define pos2min_hash(pos) (0) if ((filp->f_mode & FMODE_32BITHASH) ||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
return major >> 1;
else
return ((__u64)(major >> 1) << 32) | (__u64)minor;
}
static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
{
if ((filp->f_mode & FMODE_32BITHASH) ||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
return (pos << 1) & 0xffffffff;
else
return ((pos >> 32) << 1) & 0xffffffff;
}
static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
{
if ((filp->f_mode & FMODE_32BITHASH) ||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
return 0;
else
return pos & 0xffffffff;
}
/*
* Return 32- or 64-bit end-of-file for dx directories
*/
static inline loff_t ext3_get_htree_eof(struct file *filp)
{
if ((filp->f_mode & FMODE_32BITHASH) ||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
return EXT3_HTREE_EOF_32BIT;
else
return EXT3_HTREE_EOF_64BIT;
}
/*
* ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both
* non-htree and htree directories, where the "offset" is in terms
* of the filename hash value instead of the byte offset.
*
* Because we may return a 64-bit hash that is well beyond s_maxbytes,
* we need to pass the max hash as the maximum allowable offset in
* the htree directory case.
*
* NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
* will be invalid once the directory was converted into a dx directory
*/
loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
{
struct inode *inode = file->f_mapping->host;
int dx_dir = is_dx_dir(inode);
if (likely(dx_dir))
return generic_file_llseek_size(file, offset, origin,
ext3_get_htree_eof(file));
else
return generic_file_llseek(file, offset, origin);
}
/* /*
* This structure holds the nodes of the red-black tree used to store * This structure holds the nodes of the red-black tree used to store
...@@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root) ...@@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root)
} }
static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp,
loff_t pos)
{ {
struct dir_private_info *p; struct dir_private_info *p;
p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
if (!p) if (!p)
return NULL; return NULL;
p->curr_hash = pos2maj_hash(pos); p->curr_hash = pos2maj_hash(filp, pos);
p->curr_minor_hash = pos2min_hash(pos); p->curr_minor_hash = pos2min_hash(filp, pos);
return p; return p;
} }
...@@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent, ...@@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent,
printk("call_filldir: called with null fname?!?\n"); printk("call_filldir: called with null fname?!?\n");
return 0; return 0;
} }
curr_pos = hash2pos(fname->hash, fname->minor_hash); curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
while (fname) { while (fname) {
error = filldir(dirent, fname->name, error = filldir(dirent, fname->name,
fname->name_len, curr_pos, fname->name_len, curr_pos,
...@@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp, ...@@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp,
int ret; int ret;
if (!info) { if (!info) {
info = ext3_htree_create_dir_info(filp->f_pos); info = ext3_htree_create_dir_info(filp, filp->f_pos);
if (!info) if (!info)
return -ENOMEM; return -ENOMEM;
filp->private_data = info; filp->private_data = info;
} }
if (filp->f_pos == EXT3_HTREE_EOF) if (filp->f_pos == ext3_get_htree_eof(filp))
return 0; /* EOF */ return 0; /* EOF */
/* Some one has messed with f_pos; reset the world */ /* Some one has messed with f_pos; reset the world */
...@@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp, ...@@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp,
free_rb_tree_fname(&info->root); free_rb_tree_fname(&info->root);
info->curr_node = NULL; info->curr_node = NULL;
info->extra_fname = NULL; info->extra_fname = NULL;
info->curr_hash = pos2maj_hash(filp->f_pos); info->curr_hash = pos2maj_hash(filp, filp->f_pos);
info->curr_minor_hash = pos2min_hash(filp->f_pos); info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
} }
/* /*
...@@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp, ...@@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp,
if (ret < 0) if (ret < 0)
return ret; return ret;
if (ret == 0) { if (ret == 0) {
filp->f_pos = EXT3_HTREE_EOF; filp->f_pos = ext3_get_htree_eof(filp);
break; break;
} }
info->curr_node = rb_first(&info->root); info->curr_node = rb_first(&info->root);
...@@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp, ...@@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp,
info->curr_minor_hash = fname->minor_hash; info->curr_minor_hash = fname->minor_hash;
} else { } else {
if (info->next_hash == ~0) { if (info->next_hash == ~0) {
filp->f_pos = EXT3_HTREE_EOF; filp->f_pos = ext3_get_htree_eof(filp);
break; break;
} }
info->curr_hash = info->next_hash; info->curr_hash = info->next_hash;
...@@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp) ...@@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp)
return 0; return 0;
} }
const struct file_operations ext3_dir_operations = {
.llseek = ext3_dir_llseek,
.read = generic_read_dir,
.readdir = ext3_readdir,
.unlocked_ioctl = ext3_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext3_compat_ioctl,
#endif
.fsync = ext3_sync_file,
.release = ext3_release_dir,
};
...@@ -920,7 +920,11 @@ struct dx_hash_info ...@@ -920,7 +920,11 @@ struct dx_hash_info
u32 *seed; u32 *seed;
}; };
#define EXT3_HTREE_EOF 0x7fffffff
/* 32 and 64 bit signed EOF for dx directories */
#define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
#define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
/* /*
* Control parameters used by ext3_htree_next_block * Control parameters used by ext3_htree_next_block
......
...@@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) ...@@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
return -1; return -1;
} }
hash = hash & ~1; hash = hash & ~1;
if (hash == (EXT3_HTREE_EOF << 1)) if (hash == (EXT3_HTREE_EOF_32BIT << 1))
hash = (EXT3_HTREE_EOF-1) << 1; hash = (EXT3_HTREE_EOF_32BIT - 1) << 1;
hinfo->hash = hash; hinfo->hash = hash;
hinfo->minor_hash = minor_hash; hinfo->minor_hash = minor_hash;
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment