Csaba, Anand, does this meet your requirements?
Other comments?
Thanks,
Miklos
---
From: John Muir <mu...@nortel.com>
Add notification messages that allow the filesystem to invalidate VFS
caches. This is a much requested feature in FUSE.
Two notifications are added:
1) inode invalidation
- invalidate cached attributes
- invalidate a range of pages in the page cache (this is optional)
2) dentry invalidation
- try to invalidate a subtree in the dentry cache
Care must be taken while accessing the 'struct super_block' for the
mount, as it can go away while an invalidation is in progress. To
prevent this, introduce a rw-semaphore, that is taken for read during
the invalidation and taken for write in the ->kill_sb callback.
Cc: Csaba Henk <cs...@gluster.com>
Cc: Anand Avati <av...@zresearch.com>
Signed-off-by: Miklos Szeredi <msze...@suse.cz>
---
fs/fuse/dev.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++
fs/fuse/dir.c | 37 +++++++++++++++++++++++
fs/fuse/fuse_i.h | 24 +++++++++++++++
fs/fuse/inode.c | 59 +++++++++++++++++++++++++++++++++++--
include/linux/fuse.h | 16 ++++++++++
5 files changed, 214 insertions(+), 3 deletions(-)
Index: linux-2.6/fs/fuse/dev.c
===================================================================
--- linux-2.6.orig/fs/fuse/dev.c 2009-06-02 16:20:59.000000000 +0200
+++ linux-2.6/fs/fuse/dev.c 2009-06-02 16:21:23.000000000 +0200
@@ -849,6 +849,81 @@ err:
return err;
}
+static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_inval_inode_out outarg;
+ int err = -EINVAL;
+
+ if (size != sizeof(outarg))
+ goto err;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto err;
+ fuse_copy_finish(cs);
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (!fc->sb)
+ goto err_unlock;
+
+ err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
+ outarg.off, outarg.len);
+
+err_unlock:
+ up_read(&fc->killsb);
+ return err;
+
+err:
+ fuse_copy_finish(cs);
+ return err;
+}
+
+static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_inval_entry_out outarg;
+ int err = -EINVAL;
+ char buf[FUSE_NAME_MAX+1];
+ struct qstr name;
+
+ if (size < sizeof(outarg))
+ goto err;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto err;
+
+ err = -ENAMETOOLONG;
+ if (outarg.namelen > FUSE_NAME_MAX)
+ goto err;
+
+ name.name = buf;
+ name.len = outarg.namelen;
+ err = fuse_copy_one(cs, buf, outarg.namelen + 1);
+ if (err)
+ goto err;
+ fuse_copy_finish(cs);
+ buf[outarg.namelen] = 0;
+ name.hash = full_name_hash(name.name, name.len);
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (!fc->sb)
+ goto err_unlock;
+
+ err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
+
+err_unlock:
+ up_read(&fc->killsb);
+ return err;
+
+err:
+ fuse_copy_finish(cs);
+ return err;
+}
+
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
@@ -856,6 +931,12 @@ static int fuse_notify(struct fuse_conn
case FUSE_NOTIFY_POLL:
return fuse_notify_poll(fc, size, cs);
+ case FUSE_NOTIFY_INVAL_INODE:
+ return fuse_notify_inval_inode(fc, size, cs);
+
+ case FUSE_NOTIFY_INVAL_ENTRY:
+ return fuse_notify_inval_entry(fc, size, cs);
+
default:
fuse_copy_finish(cs);
return -EINVAL;
Index: linux-2.6/fs/fuse/dir.c
===================================================================
--- linux-2.6.orig/fs/fuse/dir.c 2009-06-02 16:20:59.000000000 +0200
+++ linux-2.6/fs/fuse/dir.c 2009-06-02 16:21:23.000000000 +0200
@@ -845,6 +845,43 @@ int fuse_update_attributes(struct inode
return err;
}
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+ struct qstr *name)
+{
+ int err = -ENOTDIR;
+ struct inode *parent;
+ struct dentry *dir;
+ struct dentry *entry;
+
+ parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
+ if (!parent)
+ return -ENOENT;
+
+ mutex_lock(&parent->i_mutex);
+ if (!S_ISDIR(parent->i_mode))
+ goto unlock;
+
+ err = -ENOENT;
+ dir = d_find_alias(parent);
+ if (!dir)
+ goto unlock;
+
+ entry = d_lookup(dir, name);
+ dput(dir);
+ if (!entry)
+ goto unlock;
+
+ fuse_invalidate_attr(parent);
+ fuse_invalidate_entry(entry);
+ dput(entry);
+ err = 0;
+
+ unlock:
+ mutex_unlock(&parent->i_mutex);
+ iput(parent);
+ return err;
+}
+
/*
* Calling into a user-controlled filesystem gives the filesystem
* daemon ptrace-like capabilities over the requester process. This
Index: linux-2.6/fs/fuse/fuse_i.h
===================================================================
--- linux-2.6.orig/fs/fuse/fuse_i.h 2009-06-02 16:20:59.000000000 +0200
+++ linux-2.6/fs/fuse/fuse_i.h 2009-06-02 16:21:23.000000000 +0200
@@ -481,6 +481,12 @@ struct fuse_conn {
/** Called on final put */
void (*release)(struct fuse_conn *);
+
+ /** Super block for this connection. */
+ struct super_block *sb;
+
+ /** Read/write semaphore to hold when accessing sb. */
+ struct rw_semaphore killsb;
};
static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -509,6 +515,11 @@ extern const struct file_operations fuse
extern const struct dentry_operations fuse_dentry_operations;
/**
+ * Inode to nodeid comparison.
+ */
+int fuse_inode_eq(struct inode *inode, void *_nodeidp);
+
+/**
* Get a filled in inode
*/
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
@@ -708,6 +719,19 @@ void fuse_release_nowrite(struct inode *
u64 fuse_get_attr_version(struct fuse_conn *fc);
+/**
+ * File-system tells the kernel to invalidate cache for the given node id.
+ */
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+ loff_t offset, loff_t len);
+
+/**
+ * File-system tells the kernel to invalidate parent attributes and
+ * the dentry matching parent/name.
+ */
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+ struct qstr *name);
+
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
bool isdir);
ssize_t fuse_direct_io(struct file *file, const char __user *buf,
Index: linux-2.6/fs/fuse/inode.c
===================================================================
--- linux-2.6.orig/fs/fuse/inode.c 2009-06-02 16:20:59.000000000 +0200
+++ linux-2.6/fs/fuse/inode.c 2009-06-02 16:21:23.000000000 +0200
@@ -207,7 +207,7 @@ static void fuse_init_inode(struct inode
BUG();
}
-static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
+int fuse_inode_eq(struct inode *inode, void *_nodeidp)
{
u64 nodeid = *(u64 *) _nodeidp;
if (get_node_id(inode) == nodeid)
@@ -258,6 +258,31 @@ struct inode *fuse_iget(struct super_blo
return inode;
}
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+ loff_t offset, loff_t len)
+{
+ struct inode *inode;
+ pgoff_t pg_start;
+ pgoff_t pg_end;
+
+ inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
+ if (!inode)
+ return -ENOENT;
+
+ fuse_invalidate_attr(inode);
+ if (offset >= 0) {
+ pg_start = offset >> PAGE_CACHE_SHIFT;
+ if (len <= 0)
+ pg_end = -1;
+ else
+ pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+ invalidate_inode_pages2_range(inode->i_mapping,
+ pg_start, pg_end);
+ }
+ iput(inode);
+ return 0;
+}
+
static void fuse_umount_begin(struct super_block *sb)
{
lock_kernel();
@@ -483,6 +508,7 @@ void fuse_conn_init(struct fuse_conn *fc
memset(fc, 0, sizeof(*fc));
spin_lock_init(&fc->lock);
mutex_init(&fc->inst_mutex);
+ init_rwsem(&fc->killsb);
atomic_set(&fc->count, 1);
init_waitqueue_head(&fc->waitq);
init_waitqueue_head(&fc->blocked_waitq);
@@ -863,6 +889,7 @@ static int fuse_fill_super(struct super_
fuse_conn_init(fc);
fc->dev = sb->s_dev;
+ fc->sb = sb;
err = fuse_bdi_init(fc, sb);
if (err)
goto err_put_conn;
@@ -944,12 +971,25 @@ static int fuse_get_sb(struct file_syste
return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
}
+static void fuse_kill_sb_anon(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ if (fc) {
+ down_write(&fc->killsb);
+ fc->sb = NULL;
+ up_write(&fc->killsb);
+ }
+
+ kill_anon_super(sb);
+}
+
static struct file_system_type fuse_fs_type = {
.owner = THIS_MODULE,
.name = "fuse",
.fs_flags = FS_HAS_SUBTYPE,
.get_sb = fuse_get_sb,
- .kill_sb = kill_anon_super,
+ .kill_sb = fuse_kill_sb_anon,
};
#ifdef CONFIG_BLOCK
@@ -961,11 +1001,24 @@ static int fuse_get_sb_blk(struct file_s
mnt);
}
+static void fuse_kill_sb_blk(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ if (fc) {
+ down_write(&fc->killsb);
+ fc->sb = NULL;
+ up_write(&fc->killsb);
+ }
+
+ kill_block_super(sb);
+}
+
static struct file_system_type fuseblk_fs_type = {
.owner = THIS_MODULE,
.name = "fuseblk",
.get_sb = fuse_get_sb_blk,
- .kill_sb = kill_block_super,
+ .kill_sb = fuse_kill_sb_blk,
.fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
};
Index: linux-2.6/include/linux/fuse.h
===================================================================
--- linux-2.6.orig/include/linux/fuse.h 2009-06-02 16:20:59.000000000 +0200
+++ linux-2.6/include/linux/fuse.h 2009-06-02 16:21:23.000000000 +0200
@@ -25,6 +25,8 @@
* - add IOCTL message
* - add unsolicited notification support
* - add POLL message and NOTIFY_POLL notification
+ * - add notification messages for invalidation of inodes and
+ * directory entries
*/
#ifndef _LINUX_FUSE_H
@@ -224,6 +226,8 @@ enum fuse_opcode {
enum fuse_notify_code {
FUSE_NOTIFY_POLL = 1,
+ FUSE_NOTIFY_INVAL_INODE = 2,
+ FUSE_NOTIFY_INVAL_ENTRY = 3,
FUSE_NOTIFY_CODE_MAX,
};
@@ -508,4 +512,16 @@ struct fuse_dirent {
#define FUSE_DIRENT_SIZE(d) \
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+struct fuse_notify_inval_inode_out {
+ __u64 ino;
+ __s64 off;
+ __s64 len;
+};
+
+struct fuse_notify_inval_entry_out {
+ __u64 parent;
+ __u32 namelen;
+ __u32 padding;
+};
+
#endif /* _LINUX_FUSE_H */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
> John, thanks for the patch. If no problems are found I'd like to add
> it to the 2.6.31 queue.
>
> Csaba, Anand, does this meet your requirements?
>
> Other comments?
I was wondering how the userland libfuse would be made aware of the presence of these two notification features?
Avati
Here's a patch against fuse CVS. It's also from John Muir.
I'm still thinking a bit about the API, how it would work in the
presence of stacked filesystems or multiple channels.
Thanks,
Miklos
Index: fuse/include/fuse_kernel.h
===================================================================
--- fuse.orig/include/fuse_kernel.h 2009-06-04 11:09:56.000000000 +0200
+++ fuse/include/fuse_kernel.h 2009-06-04 11:10:11.000000000 +0200
@@ -51,6 +51,8 @@
* - add IOCTL message
* - add unsolicited notification support
* - add POLL message and NOTIFY_POLL notification
+ * - add notification messages for invalidation of inodes and
+ * directory entries
*/
#ifndef _LINUX_FUSE_H
@@ -58,6 +60,7 @@
#include <sys/types.h>
#define __u64 uint64_t
+#define __s64 int64_t
#define __u32 uint32_t
#define __s32 int32_t
@@ -243,6 +246,8 @@ enum fuse_opcode {
enum fuse_notify_code {
FUSE_NOTIFY_POLL = 1,
+ FUSE_NOTIFY_INVAL_INODE = 2,
+ FUSE_NOTIFY_INVAL_ENTRY = 3,
FUSE_NOTIFY_CODE_MAX,
};
@@ -506,4 +511,16 @@ struct fuse_dirent {
#define FUSE_DIRENT_SIZE(d) \
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+struct fuse_notify_inval_inode_out {
+ __u64 ino;
+ __s64 off;
+ __s64 len;
+};
+
+struct fuse_notify_inval_entry_out {
+ __u64 parent;
+ __u32 namelen;
+ __u32 padding;
+};
+
#endif /* _LINUX_FUSE_H */
Index: fuse/include/fuse_lowlevel.h
===================================================================
--- fuse.orig/include/fuse_lowlevel.h 2009-06-04 11:09:56.000000000 +0200
+++ fuse/include/fuse_lowlevel.h 2009-06-04 11:10:11.000000000 +0200
@@ -1136,6 +1136,32 @@ int fuse_reply_poll(fuse_req_t req, unsi
*/
int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph);
+/**
+ * Notify to invalidate cache for an inode
+ *
+ * @param ch the channel through which to send the invalidation
+ * @param ino the inode number
+ * @param off the offset in the inode where to start invalidating
+ * or negative to invalidate attributes only
+ * @param len the amount of cache to invalidate or 0 for all
+ * @return zero for success, -errno for failure
+ */
+int fuse_lowlevel_notify_inval_inode(struct fuse_chan *ch, fuse_ino_t ino,
+ off_t off, off_t len);
+
+/**
+ * Notify to invalidate parent attributes and the dentry matching
+ * parent/name
+ *
+ * @param ch the channel through which to send the invalidation
+ * @param parent inode number
+ * @param name file name
+ * @param namelen strlen() of file name
+ * @return zero for success, -errno for failure
+ */
+int fuse_lowlevel_notify_inval_entry(struct fuse_chan *ch, fuse_ino_t parent,
+ const char *name, size_t namelen);
+
/* ----------------------------------------------------------- *
* Utility functions *
* ----------------------------------------------------------- */
@@ -1340,6 +1366,14 @@ void fuse_session_reset(struct fuse_sess
int fuse_session_exited(struct fuse_session *se);
/**
+ * Get the user data provided to the session
+ *
+ * @param se the session
+ * @return the user data
+ */
+void *fuse_session_data(struct fuse_session *se);
+
+/**
* Enter a single threaded event loop
*
* @param se the session
Index: fuse/lib/fuse_lowlevel.c
===================================================================
--- fuse.orig/lib/fuse_lowlevel.c 2009-06-04 11:09:56.000000000 +0200
+++ fuse/lib/fuse_lowlevel.c 2009-06-04 11:10:11.000000000 +0200
@@ -1280,6 +1280,56 @@ int fuse_lowlevel_notify_poll(struct fus
}
}
+int fuse_lowlevel_notify_inval_inode(struct fuse_chan *ch, fuse_ino_t ino,
+ off_t off, off_t len)
+{
+ struct fuse_notify_inval_inode_out outarg;
+ struct fuse_ll *f;
+ struct iovec iov[2];
+
+ if (!ch)
+ return -EINVAL;
+
+ f = (struct fuse_ll *)fuse_session_data(fuse_chan_session(ch));
+ if (!f)
+ return -ENODEV;
+
+ outarg.ino = ino;
+ outarg.off = off;
+ outarg.len = len;
+
+ iov[1].iov_base = &outarg;
+ iov[1].iov_len = sizeof(outarg);
+
+ return send_notify_iov(f, ch, FUSE_NOTIFY_INVAL_INODE, iov, 2);
+}
+
+int fuse_lowlevel_notify_inval_entry(struct fuse_chan *ch, fuse_ino_t parent,
+ const char *name, size_t namelen)
+{
+ struct fuse_notify_inval_entry_out outarg;
+ struct fuse_ll *f;
+ struct iovec iov[3];
+
+ if (!ch)
+ return -EINVAL;
+
+ f = (struct fuse_ll *)fuse_session_data(fuse_chan_session(ch));
+ if (!f)
+ return -ENODEV;
+
+ outarg.parent = parent;
+ outarg.namelen = namelen;
+ outarg.padding = 0;
+
+ iov[1].iov_base = &outarg;
+ iov[1].iov_len = sizeof(outarg);
+ iov[2].iov_base = (void *)name;
+ iov[2].iov_len = namelen + 1;
+
+ return send_notify_iov(f, ch, FUSE_NOTIFY_INVAL_ENTRY, iov, 3);
+}
+
void *fuse_req_userdata(fuse_req_t req)
{
return req->f->userdata;
Index: fuse/lib/fuse_session.c
===================================================================
--- fuse.orig/lib/fuse_session.c 2009-06-04 11:09:56.000000000 +0200
+++ fuse/lib/fuse_session.c 2009-06-04 11:10:11.000000000 +0200
@@ -121,6 +121,11 @@ int fuse_session_exited(struct fuse_sess
return se->exited;
}
+void *fuse_session_data(struct fuse_session *se)
+{
+ return se->data;
+}
+
static struct fuse_chan *fuse_chan_new_common(struct fuse_chan_ops *op, int fd,
size_t bufsize, void *data,
int compat)
Index: fuse/lib/fuse_versionscript
===================================================================
--- fuse.orig/lib/fuse_versionscript 2009-06-04 11:09:56.000000000 +0200
+++ fuse/lib/fuse_versionscript 2009-06-04 11:10:11.000000000 +0200
@@ -165,6 +165,8 @@ FUSE_2.8 {
fuse_reply_ioctl;
fuse_reply_ioctl_retry;
fuse_reply_poll;
+ fuse_lowlevel_notify_inval_inode;
+ fuse_lowlevel_notify_inval_entry;
local:
*;
In fact, what we were meditating about is the lack of the
FUSE_KERNEL_MINOR_VERSION bump. Without that, how do the two parties
negotiate if the rev inval messages are understood? One of John's
patches in his patchset had it bumped though...
Csaba
Yes, I removed that intentionally as hopefully this will go in 2.6.31
with the other 10 -> 11 API changes.
Thanks,
Miklos