Let's use generic_file_llseek() instead.
Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Arnd Bergmann <ar...@arndb.de>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Andrew Morton <ak...@linux-foundation.org>
Cc: Ingo Molnar <mi...@elte.hu>
Cc: John Kacur <jka...@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezaw...@jp.fujitsu.com>
Cc: Al Viro <vi...@ZenIV.linux.org.uk>
---
fs/proc/vmcore.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 0872afa..f942ecb 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -162,6 +162,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
static const struct file_operations proc_vmcore_operations = {
.read = read_vmcore,
+ .lseek = generic_file_llseek,
};
static struct vmcore* __init get_new_element(void)
--
1.6.2.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
We don't use the BKL elsewhere, so use generic_file_llseek
so we can avoid default_llseek taking the BKL.
Signed-off-by: Arnd Bergmann <ar...@arndb.de>
[restore proc_fdinfo_file_operations as non-seekable]
Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Andrew Morton <ak...@linux-foundation.org>
Cc: Ingo Molnar <mi...@elte.hu>
Cc: John Kacur <jka...@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezaw...@jp.fujitsu.com>
Cc: Al Viro <vi...@ZenIV.linux.org.uk>
---
fs/proc/base.c | 10 +++++++++-
1 files changed, 9 insertions(+), 1 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a731084..95d91cf 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -728,6 +728,7 @@ out_no_task:
static const struct file_operations proc_info_file_operations = {
.read = proc_info_read,
+ .llseek = generic_file_llseek,
};
static int proc_single_show(struct seq_file *m, void *v)
@@ -985,6 +986,7 @@ out_no_task:
static const struct file_operations proc_environ_operations = {
.read = environ_read,
+ .llseek = generic_file_llseek,
};
static ssize_t oom_adjust_read(struct file *file, char __user *buf,
@@ -1058,6 +1060,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
static const struct file_operations proc_oom_adjust_operations = {
.read = oom_adjust_read,
.write = oom_adjust_write,
+ .llseek = generic_file_llseek,
};
#ifdef CONFIG_AUDITSYSCALL
@@ -1129,6 +1132,7 @@ out_free_page:
static const struct file_operations proc_loginuid_operations = {
.read = proc_loginuid_read,
.write = proc_loginuid_write,
+ .llseek = generic_file_llseek,
};
static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
@@ -1149,6 +1153,7 @@ static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
static const struct file_operations proc_sessionid_operations = {
.read = proc_sessionid_read,
+ .llseek = generic_file_llseek,
};
#endif
@@ -1200,6 +1205,7 @@ static ssize_t proc_fault_inject_write(struct file * file,
static const struct file_operations proc_fault_inject_operations = {
.read = proc_fault_inject_read,
.write = proc_fault_inject_write,
+ .llseek = generic_file_llseek,
};
#endif
@@ -1941,7 +1947,7 @@ static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
}
static const struct file_operations proc_fdinfo_file_operations = {
- .open = nonseekable_open,
+ .open = nonseekable_open,
.read = proc_fdinfo_read,
};
@@ -2225,6 +2231,7 @@ out_no_task:
static const struct file_operations proc_pid_attr_operations = {
.read = proc_pid_attr_read,
.write = proc_pid_attr_write,
+ .llseek = generic_file_llseek,
};
static const struct pid_entry attr_dir_stuff[] = {
@@ -2345,6 +2352,7 @@ static ssize_t proc_coredump_filter_write(struct file *file,
static const struct file_operations proc_coredump_filter_operations = {
.read = proc_coredump_filter_read,
.write = proc_coredump_filter_write,
+ .llseek = generic_file_llseek,
};
#endif
Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Arnd Bergmann <ar...@arndb.de>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Andrew Morton <ak...@linux-foundation.org>
Cc: Ingo Molnar <mi...@elte.hu>
Cc: John Kacur <jka...@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezaw...@jp.fujitsu.com>
Cc: Al Viro <vi...@ZenIV.linux.org.uk>
---
fs/proc/inode.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 445a02b..afcda85 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -231,9 +231,9 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
if (rv == -ENOIOCTLCMD)
rv = -EINVAL;
} else if (ioctl) {
- lock_kernel();
+ WARN_ONCE(1, "Procfs ioctl handlers must use unlocked_ioctl, "
+ "%pf will be called without the Bkl held\n", ioctl);
rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
- unlock_kernel();
}
pde_users_dec(pde);
Use generic_file_llseek explicitly.
Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Arnd Bergmann <ar...@arndb.de>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Andrew Morton <ak...@linux-foundation.org>
Cc: Ingo Molnar <mi...@elte.hu>
Cc: John Kacur <jka...@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezaw...@jp.fujitsu.com>
Cc: Al Viro <vi...@ZenIV.linux.org.uk>
---
fs/proc/kmsg.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index cfe90a4..bd4b5a7 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -53,6 +53,7 @@ static const struct file_operations proc_kmsg_operations = {
.poll = kmsg_poll,
.open = kmsg_open,
.release = kmsg_release,
+ .llseek = generic_file_llseek,
};
static int __init proc_kmsg_init(void)
There is no warning for default default_llseek case.
This should be done same way as proc ioctls.
Then delete the branch.
Or go through formal feature-removal procedure.
I don't think we should. We have overriden the llseek for
the procfs users located in the proc core (just fs/proc)
but we haven't touched all of the external users, and since
there are hundreds of them, I guess a lot don't implement
llseek.
We would need to first override those that are visible upstream
and warn for the further ones after this step only.
I thought about it. I even started to write something in this
feature-removal file but realized that I can't remove the
.ioctl() callback from file operations. We still need to check
the user hasn't made the mistake of implementing it.
What I can plan as a feature removal, though, is to keep the warning
but don't actually call the ioctl.
I believe we can actually remove ioctl from file_operations. The patch I did
to convert all users to ".unlocked_ioctl = default_ioctl," should really catch
all cases, and I think we can enforce this by renaming fops->ioctl to locked_ioctl
or old_ioctl to make sure we didn't miss any, and then mandate that this one
is only used when unlocked_ioctl is set to default_ioctl.
I also remember going through procfs ioctl operations some time ago and finding
exactly three users, which I believe are the same ones that Frederic found.
Arnd
I just looked at the patch in question and noted that the changelog
is pretty high, but how could it be else.
Actually it's not that large, but highly spread:
Documentation/DocBook/kernel-hacking.tmpl | 2 +-
Documentation/filesystems/vfs.txt | 3 +-
arch/arm/kernel/etm.c | 1 +
arch/cris/arch-v10/drivers/ds1302.c | 3 ++
arch/cris/arch-v10/drivers/gpio.c | 2 +
arch/cris/arch-v10/drivers/i2c.c | 2 +
arch/cris/arch-v10/drivers/pcf8563.c | 3 ++
arch/cris/arch-v10/drivers/sync_serial.c | 4 ++-
arch/cris/arch-v32/drivers/cryptocop.c | 4 ++-
arch/cris/arch-v32/drivers/i2c.c | 2 +
arch/cris/arch-v32/drivers/mach-a3/gpio.c | 2 +
arch/cris/arch-v32/drivers/mach-fs/gpio.c | 2 +
arch/cris/arch-v32/drivers/pcf8563.c | 5 +++-
arch/cris/arch-v32/drivers/sync_serial.c | 4 ++-
arch/ia64/kernel/perfmon.c | 2 +
arch/ia64/sn/kernel/sn2/sn_hwperf.c | 2 +
arch/m68k/bvme6000/rtc.c | 2 +
arch/m68k/mvme16x/rtc.c | 2 +
arch/um/drivers/harddog_kern.c | 2 +
arch/um/drivers/hostaudio_kern.c | 3 ++
arch/um/drivers/mmapper_kern.c | 3 ++
drivers/block/DAC960.c | 3 +-
drivers/block/paride/pg.c | 2 +
drivers/block/paride/pt.c | 2 +
drivers/block/pktcdvd.c | 3 ++
drivers/char/apm-emulation.c | 2 +
drivers/char/applicom.c | 2 +
drivers/char/ds1302.c | 1 +
drivers/char/ds1620.c | 2 +
drivers/char/dtlk.c | 2 +
drivers/char/generic_nvram.c | 2 +
drivers/char/genrtc.c | 2 +
drivers/char/i8k.c | 2 +
drivers/char/ip2/ip2main.c | 1 +
drivers/char/ipmi/ipmi_devintf.c | 2 +
drivers/char/ipmi/ipmi_watchdog.c | 2 +
drivers/char/istallion.c | 1 +
drivers/char/lp.c | 1 +
drivers/char/mmtimer.c | 1 +
drivers/char/nwflash.c | 1 +
drivers/char/raw.c | 4 +++
drivers/char/rio/rio_linux.c | 1 +
drivers/char/stallion.c | 1 +
drivers/char/sx.c | 1 +
drivers/char/uv_mmtimer.c | 1 +
drivers/char/viotape.c | 1 +
drivers/firewire/core-cdev.c | 2 +
drivers/gpu/drm/i810/i810_dma.c | 2 +
drivers/gpu/drm/i830/i830_dma.c | 2 +
drivers/hid/usbhid/hiddev.c | 3 +-
drivers/hwmon/fschmd.c | 2 +
drivers/ide/ide-tape.c | 2 +
drivers/ieee1394/dv1394.c | 2 +
drivers/ieee1394/raw1394.c | 2 +
drivers/ieee1394/video1394.c | 4 ++-
drivers/infiniband/core/ucm.c | 2 +
drivers/infiniband/core/ucma.c | 2 +
drivers/infiniband/core/user_mad.c | 7 +++-
drivers/infiniband/core/uverbs_main.c | 10 +++++--
drivers/input/misc/hp_sdc_rtc.c | 2 +
drivers/input/misc/uinput.c | 1 +
drivers/isdn/capi/capi.c | 1 +
drivers/isdn/divert/divert_procfs.c | 2 +
drivers/isdn/i4l/isdn_common.c | 1 +
drivers/isdn/mISDN/timerdev.c | 3 ++
drivers/macintosh/ans-lcd.c | 2 +
drivers/macintosh/nvram.c | 2 +
drivers/macintosh/via-pmu.c | 2 +
drivers/media/dvb/bt8xx/dst_ca.c | 1 +
drivers/media/dvb/dvb-core/dmxdev.c | 3 ++
drivers/media/dvb/dvb-core/dvb_ca_en50221.c | 3 ++
drivers/media/dvb/dvb-core/dvb_frontend.c | 5 +++-
drivers/media/dvb/dvb-core/dvb_net.c | 3 ++
drivers/media/dvb/firewire/firedtv-ci.c | 3 ++
drivers/media/dvb/ttpci/av7110.c | 3 ++
drivers/media/dvb/ttpci/av7110_av.c | 5 +++
drivers/media/dvb/ttpci/av7110_ca.c | 3 ++
drivers/media/video/cpia.c | 2 +
drivers/media/video/v4l2-dev.c | 2 +
drivers/mtd/mtdchar.c | 1 +
drivers/mtd/ubi/cdev.c | 2 +
drivers/net/ppp_generic.c | 4 ++-
drivers/net/wireless/airo.c | 9 ++++++
drivers/net/wireless/ray_cs.c | 3 ++
drivers/rtc/rtc-m41t80.c | 1 +
drivers/s390/char/fs3270.c | 1 +
drivers/s390/char/tape_char.c | 2 +-
drivers/s390/cio/chsc_sch.c | 2 +
drivers/s390/crypto/zcrypt_api.c | 1 +
drivers/s390/scsi/zfcp_cfdc.c | 2 +
drivers/sbus/char/envctrl.c | 1 +
drivers/sbus/char/openprom.c | 1 +
drivers/scsi/3w-9xxx.c | 2 +
drivers/scsi/3w-sas.c | 2 +
drivers/scsi/3w-xxxx.c | 2 +
drivers/scsi/aacraid/linit.c | 1 +
drivers/scsi/dpt_i2o.c | 2 +
drivers/scsi/gdth.c | 2 +
drivers/scsi/megaraid.c | 2 +
drivers/scsi/megaraid/megaraid_mm.c | 2 +
drivers/scsi/megaraid/megaraid_sas.c | 1 +
drivers/scsi/mpt2sas/mpt2sas_ctl.c | 1 +
drivers/scsi/osd/osd_uld.c | 2 +
drivers/scsi/osst.c | 2 +
drivers/scsi/pmcraid.c | 2 +
drivers/scsi/sg.c | 2 +
drivers/scsi/st.c | 1 +
drivers/spi/spidev.c | 2 +
drivers/staging/b3dfg/b3dfg.c | 2 +
drivers/staging/comedi/comedi_fops.c | 2 +
drivers/staging/dream/pmem.c | 3 ++
drivers/staging/dream/qdsp5/audio_aac.c | 2 +
drivers/staging/dream/qdsp5/audio_mp3.c | 2 +
drivers/staging/poch/poch.c | 3 ++
drivers/staging/sep/sep_driver.c | 3 ++
drivers/staging/vme/devices/vme_user.c | 2 +
drivers/telephony/ixj.c | 1 +
drivers/usb/class/usblp.c | 2 +
drivers/usb/gadget/printer.c | 1 +
drivers/usb/misc/idmouse.c | 2 +
drivers/usb/misc/iowarrior.c | 1 +
drivers/usb/misc/rio500.c | 1 +
drivers/usb/misc/vstusb.c | 2 +
fs/autofs/root.c | 1 +
fs/autofs4/dev-ioctl.c | 2 +
fs/btrfs/super.c | 1 +
fs/coda/pioctl.c | 3 ++
fs/coda/psdev.c | 2 +
fs/ecryptfs/file.c | 2 +
fs/ecryptfs/miscdev.c | 2 +
fs/hfsplus/dir.c | 2 +
fs/hfsplus/inode.c | 2 +
fs/ioctl.c | 11 ++------
fs/ncpfs/dir.c | 2 +
fs/ncpfs/file.c | 1 +
fs/read_write.c | 34 ------------------------
fs/smbfs/dir.c | 2 +
fs/smbfs/file.c | 1 +
fs/udf/dir.c | 2 +
fs/udf/file.c | 1 +
include/linux/fs.h | 5 ---
include/linux/smp_lock.h | 5 +++
lib/kernel_lock.c | 37 +++++++++++++++++++++++++-
net/socket.c | 1 +
sound/core/control.c | 2 +
sound/core/oss/pcm_oss.c | 2 +
sound/core/pcm_native.c | 2 +
sound/core/seq/seq_clientmgr.c | 2 +
sound/oss/au1550_ac97.c | 30 +++++++++++----------
sound/oss/dmasound/dmasound_core.c | 2 +
sound/oss/msnd_pinnacle.c | 2 +
sound/oss/sh_dac_audio.c | 3 ++
sound/oss/soundcard.c | 1 +
sound/oss/swarm_cs4297a.c | 3 ++
sound/oss/vwsnd.c | 2 +
sound/soc/soc-core.c | 2 +
virt/kvm/kvm_main.c | 4 +++
157 files changed, 372 insertions(+), 80 deletions(-)
I wonder if we should actually just turn all these into unlocked_ioctl
directly. And then bring a warn on ioctl, and finally schedule the removal
of this callback.
What do you think?
You plan looks good but I fear this actually carries the problem forward
in that we won't be able to remove .ioctl after that.
I can handle that if you agree.
I don't think the warning helps all that much, at least not across an
entire release. We could leave it in for the merge window and fix all
users for -rc1, then submit a patch that kills everything that came
in during the merge window and remove it completely in -rc2.
Getting rid of ioctl completely is a lot of work though, covering the
entire lot of ~150 device drivers. I think the patch as is (or the
variant renaming .ioctl to .locked_ioctl) is far less work and has
less potential of introducing regressions.
> You plan looks good but I fear this actually carries the problem forward
> in that we won't be able to remove .ioctl after that.
>
> I can handle that if you agree.
I don't think we really need to get rid of it this soon in the obsolete
drivers, pushing down the BKL into an unlocked_ioctl function only slightly
shifts the problem around, since the driver still depends on the BKL then
and gets disabled if you build with CONFIG_BKL=n.
IMHO, a better use of your time would be to completely remove the BKL
along with the ioctl function from any of the drivers in this lists that
looks like it could be relevant to real users.
In the meantime, we can move the declaration of the .locked_ioctl callback
into an #ifdef CONFIG_BKL, to make sure nobody builds a driver with an
ioctl function that does not get called.
Another crazy idea I had was to simply turn the BKL into a regular mutex
as soon as we can show that all remaining users are of the non-recursive
kind and don't rely on the autorelease-on-sleep. Doing that would be
much easier without the pushdown into .unlocked_ioctl than it would be
with it.
Arnd
I just looked at all the users of lock_kernel remaining with my patch
series. For 90% of them, it is completely obvious that they don't rely
on nested locking, and they very much look like they don't need the
autorelease either, because the BKL was simply pushed down into the
open, ioctl and llseek functions.
There are a few file systems (udf, ncpfs, autofs, coda, ...) and some
network protocols (appletalk, ipx, irnet and x25) for which it is not
obviously, though still quite likely, the case.
So we could actually remove the BKL recursion code soon, or even turn
all of it into a regular mutex, at least as an experimental option.
The recursive users that I've removed in my series are the block, tty,
input and sound subsystems, as well as the init code.
Hmm, yeah you're right actually. Since we have this CONFIG_BKL thing
plus a future check to prevent from people implementing new ioctl
(checking ioctl without default_ioctl), it's actually better than
a big pushdown as it's less invasive.
> In the meantime, we can move the declaration of the .locked_ioctl callback
> into an #ifdef CONFIG_BKL, to make sure nobody builds a driver with an
> ioctl function that does not get called.
Ok, now how to get this all merged? A single monolithic patch is probably
not appropriate.
The simplest is to have a single branch with the default_ioctl implemented,
and then attributed to drivers in a set cut by subsystems/drivers. And
push the whole for the next -rc1.
The other solution is to push default_ioctl for this release and get
the driver changes to each concerned tree. That said, I suspect a good
part of them are unmaintained, hence the other solution looks better
to me.
Hmm?
There are some very subtle recursive cases in the tty code - hangup
triggered close and consoles being an absolute gem I've just had to debug
in my lock removal bits so far...
Alan
This is a solution that has been tried more than once already. But Linus
has told he wouldn't pull something that turns the bkl into a mutex or a
semaphore.
Plus it's quite hard to tell that it does or not auto-release somewhere
This is often something you can really spot on runtime or on small path
only.
The simple fact the bkl is not always a leaf lock makes it need the
auto-release, otherwise you experience very bad unexpected lock
dependencies.
Yes, I've seen some of them. What I meant above is that with
CONFIG_TTY_MUTEX=y, the TTY code no longer uses the BKL in a
nested way, and quite likely no either code does either.
The TTY code with my patch now has tty_lock() for all cases that
I concluded are never nested in another tty_lock, and tty_lock_nested()
for those I did not understand or that I know they are nested (the
latter type usually comes with a comment). The only difference
between the two is a WARN_ON(tty_locked()) in tty_lock, so we can
see where the analysis was wrong.
Arnd
A side note: A considerable portion of this particular commit in Arnd's
git actually does not deal with .ioctl->.unlocked_ioctl at all, but
purely with .llseek. Many(?) of these changes deal with .ioctl and
.llseek together. (Arnd also says so in the last paragraph of his
changelog.)
IOW there are less .ioctl implementations left than one could think from
a look at the diffstat.
--
Stefan Richter
-=====-==-=- -=-- ----=
http://arcgraph.de/sr/
Ok. Starting from the same observation of simplicity in the remaining code,
we should also be able to find a semi-automatic way of turning the BKL usage
in these drivers into a per-module mutex.
> Plus it's quite hard to tell that it does or not auto-release somewhere
> This is often something you can really spot on runtime or on small path
> only.
Well, the autorelease by itself is not needed anywhere. What is needed
is the consequence of autorelease avoiding AB-BA type deadlocks.
> The simple fact the bkl is not always a leaf lock makes it need the
> auto-release, otherwise you experience very bad unexpected lock
> dependencies.
I'm arguing that we can probably show the BKL to be the outermost
lock for the majority of the remaining drivers, which only get it
from their open(), ioctl() or llseek() functions, which are all
called without any locks held. If the BKL is a regular mutex, lockdep
should warn of the other cases.
Arnd
I don't care much about the unmaintained parts, we can always have a
tree collecting all the patches for those drivers and merge it in -rc1.
I'd say the nicest way would be to get Linus to merge the patch
below now, so we can start queuing stuff in maintainer trees on top
of it, and check against linux-next what is still missing and push
all of them from our branch.
Arnd
---
Subject: [PATCH] introduce CONFIG_BKL and default_ioctl
This is a preparation for the removal of the big kernel lock that
introduces new interfaces for device drivers still using it.
We can start marking those device drivers as 'depends on CONFIG_BKL'
now, and make that symbol optional later, when the point has come
at which we are able to build a kernel without the BKL.
Similarly, device drivers that currently make use of the implicit
BKL locking around the ioctl function can now get annotated by
changing
.ioctl = foo_ioctl,
to
.locked_ioctl = foo_ioctl,
.unlocked_ioctl = default_ioctl,
As soon as no driver remains using the old ioctl callback, it can
get removed.
Signed-off-by: Arnd Bergmann <ar...@arndb.de>
---
fs/ioctl.c | 22 ++++++++++++++++++++++
include/linux/fs.h | 3 +++
include/linux/smp_lock.h | 4 ++++
lib/Kconfig.debug | 10 ++++++++++
4 files changed, 39 insertions(+), 0 deletions(-)
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6c75110..52c2698 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -58,6 +58,28 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
return error;
}
+#ifdef CONFIG_BKL
+/*
+ * default_ioctl - call unlocked_ioctl with BKL held
+ *
+ * Setting only the the ioctl operation but not unlocked_ioctl will become
+ * invalid in the future, all drivers that are not converted to unlocked_ioctl
+ * should set .unlocked_ioctl = default_ioctl now.
+ */
+long default_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ int error = -ENOTTY;
+ if (filp->f_op->locked_ioctl) {
+ lock_kernel();
+ error = filp->f_op->locked_ioctl(filp->f_path.dentry->d_inode,
+ filp, cmd, arg);
+ unlock_kernel();
+ }
+ return error;
+}
+EXPORT_SYMBOL_GPL(default_ioctl);
+#endif
+
static int ioctl_fibmap(struct file *filp, int __user *p)
{
struct address_space *mapping = filp->f_mapping;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 10b8ded..93afdb4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1492,6 +1492,9 @@ struct file_operations {
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
+#ifdef CONFIG_BKL
+ int (*locked_ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
+#endif
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index 2ea1dd1..9cec605 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -62,4 +62,8 @@ static inline void cycle_kernel_lock(void)
#define kernel_locked() 1
#endif /* CONFIG_LOCK_KERNEL */
+
+loff_t default_llseek(struct file *file, loff_t offset, int origin);
+long default_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+
#endif /* __LINUX_SMPLOCK_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1fafb4b..a4852d6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -444,6 +444,16 @@ config DEBUG_MUTEXES
This feature allows mutex semantics violations to be detected and
reported.
+config BKL
+ def_bool y
+ help
+ This is the traditional lock that is used in old code instead
+ of proper locking. All drivers that use the BKL should depend
+ on this symbol.
+ This configuration option will become user-selectable in the
+ future, as soon as it is possible to build a kernel without
+ it.
+
config DEBUG_LOCK_ALLOC
bool "Lock debugging: detect incorrect freeing of live locks"
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
Given our recent discussions on the llseek topic, it's probably better to
revert most of the changes that purely deal with llseek. My current idea
is to use an explicit default_llseek only if one of the following is given:
- we convert ioctl to unlocked_ioctl in the same file_operations, or
- the module uses the big kernel lock explicitly elsewhere.
Even then, there may be a number of cases where we can show it not
to be necessary, e.g. when the driver does not care about f_pos.
Concurrent llseek is racy by nature, so in most drivers, using the
BKL in llseek does not gain anything over using i_mutex.
Arnd
> On Wednesday 31 March 2010 22:21:23 Arnd Bergmann wrote:
> > Another crazy idea I had was to simply turn the BKL into a regular
> mutex
> > as soon as we can show that all remaining users are of the
> non-recursive
> > kind and don't rely on the autorelease-on-sleep. Doing that would
> be
> > much easier without the pushdown into .unlocked_ioctl than it would
> be
> > with it.
>
> I just looked at all the users of lock_kernel remaining with my patch
> series. For 90% of them, it is completely obvious that they don't
> rely
> on nested locking, and they very much look like they don't need the
> autorelease either, because the BKL was simply pushed down into the
> open, ioctl and llseek functions.
>
> There are a few file systems (udf, ncpfs, autofs, coda, ...) and some
> network protocols (appletalk, ipx, irnet and x25) for which it is not
> obviously, though still quite likely, the case.
>
> So we could actually remove the BKL recursion code soon, or even turn
> all of it into a regular mutex, at least as an experimental option.
Well, that would be quite similar to what we do in real-time with the
"Big Kernel Semaphore". However, Linus didn't want that pushed into
mainstream. As an experimental tree it's fine, but we're really stuck
with removing the BKL one by one until it's gone.
Typo. default_ioctl - call locked_ioctl with BKL held
--
Stefan Richter
-=====-==-=- -=-- ---==
http://arcgraph.de/sr/
So you mean we should attribute explicit default_llseek to the evil
places instead of explicit generic_file_llseek in the safe ones?
That's not a bad idea as it would result in much less changes.
The problem happens the day you switch to generic_file_llseek() as the
new default llseek(), how do you prove that all remaining fops
that don't implement .llseek don't use the bkl? There will be
hundreds of them and saying "we've looked all of them and they don't
need it" will be a scary justification.
On the opposite, attributing explicit generic_file_llseek or
non_seekable_open on the safe places and default_llseek on
the dozens of others doubtful places is easier to get a
safe conclusion.
But yeah we should try, at least attributing explicit
default_llseek won't harm, quite the opposite.
Al, we would like to make this for 2.6.34, so that we can start
converting the drivers that use bkl'ed ioctl to this new scheme
in the relevant maintainers trees.
Can we get your ack?
Thanks.
Do you mind if I rename this to deprecated_ioctl()?
This "default" naming suggests a fallback everyone that don't
need tricky things should use.
Al, we would like to make this for 2.6.34, so that we can start
converting the drivers that use bkl'ed ioctl to this new scheme
in the relevant maintainers trees.
Can we get your ack?
Thanks.
--
Note that an lssek that actually does something is the wrong default,
even if we have it that way currently. If the default is changed it
should be changed to give the semantics that nonseekable_open()
gives us. Given that you guys are so motivated to do something in
this area it might be a good idea to do this in a few simple steps:
- make sure every file operation either has a ->llseek instead or
calls nonseekable_open from ->open
- remove nonseekable_open and all calls to it
- switch all users of no_llseek to not set a ->llsek after auditing
that there's no corner case where we want to allow pread/pwrite
but not lseek, which is rather unlikely
- walk through the instances now using default_llseek and chose
a better implementation for this particular instance. Often
this will be just removing the the lssek method as not allowing
seeks is the right thing to do for character drivers, even if it
is a behaviour change from the current version which usually
is the result of sloppy coding.
Sounds good. The idea was to keep the naming consistent with
default_llseek, but after the discussion on llseek, we will
probably do something else with it anyway.
Arnd
I still think it would be better to always set llseek if we do that,
even if nonseekable_open is already there. I can come up with scripts
that check that case, but checking that the open function always
calls nonseekable_open when it returns success is beyond my grep
skills ;-)
> - remove nonseekable_open and all calls to it
> - switch all users of no_llseek to not set a ->llsek after auditing
> that there's no corner case where we want to allow pread/pwrite
> but not lseek, which is rather unlikely
This parts seems fine.
> - walk through the instances now using default_llseek and chose
> a better implementation for this particular instance. Often
> this will be just removing the the lssek method as not allowing
> seeks is the right thing to do for character drivers, even if it
> is a behaviour change from the current version which usually
> is the result of sloppy coding.
This part is really hard. While in many cases, the driver maintainer
might know what user space is potentially opening some character
device, it's really hard to tell for outsiders whether the behaviour
should be no_llseek (then the default) or noop_llseek to work around
broken user space.
I think the rule set for the conversion needs to be one that can
be done purely based on the code. How about this:
For each file operation {
if (uses f_pos) {
if (same module uses BKL)
-> default_llseek
else
-> generic_file_llseek
} else {
if (driver maintained)
-> no_llseek (with maintainer ACK)
else
-> noop_llseek
}
}
Once that is done, we can turn the default into nonseekable
behavior and start removing instances of explicit no_llseek
and nonseekable_open.
Should we also rename default_llseek to deprecated_llseek in the
process, to go along with the approach for ioctl?
Arnd
Also even if llseek is useless for a module, turning it into
unseekable somehow changes the userspace ABI. I guess this
is harmless 99% of the time, but still. And maintainers tend
not to like that.
>
> I think the rule set for the conversion needs to be one that can
> be done purely based on the code. How about this:
>
> For each file operation {
> if (uses f_pos) {
> if (same module uses BKL)
> -> default_llseek
> else
> -> generic_file_llseek
> } else {
> if (driver maintained)
> -> no_llseek (with maintainer ACK)
> else
> -> noop_llseek
> }
> }
It is also hard to determine a given driver really doesn't use
the bkl. A sole lock_kernel() grep in its files is not sufficient.
But a manual second pass should do the trick.
>
> Once that is done, we can turn the default into nonseekable
> behavior and start removing instances of explicit no_llseek
> and nonseekable_open.
Sounds good.
> Should we also rename default_llseek to deprecated_llseek in the
> process, to go along with the approach for ioctl?
Yeah, preferably.
Thanks.
Why not? In my 2.6.33 based series, I have removed all implicit
uses of the BKL, so we can be sure that it doesn't use the BKL
unless the module is part of that series. The only two cases
I can think of are:
- ioctl callback, which we should do in the same change, like I
originally did. If a driver defines ->ioctl(), make it use
deprecated_ioctl() and default_llseek()/deprecated_llseek.
- Any of the file systems from Jan's series.
Arnd
Yes, it's not quite easily greppable. Making no seek allowed the
implicit default will fortunately allow us to get rid of that oddness.
> > - walk through the instances now using default_llseek and chose
> > a better implementation for this particular instance. Often
> > this will be just removing the the lssek method as not allowing
> > seeks is the right thing to do for character drivers, even if it
> > is a behaviour change from the current version which usually
> > is the result of sloppy coding.
>
> This part is really hard. While in many cases, the driver maintainer
> might know what user space is potentially opening some character
> device, it's really hard to tell for outsiders whether the behaviour
> should be no_llseek (then the default) or noop_llseek to work around
> broken user space.
That's why it's last on the list.
> I think the rule set for the conversion needs to be one that can
> be done purely based on the code. How about this:
>
> For each file operation {
> if (uses f_pos) {
> if (same module uses BKL)
> -> default_llseek
> else
> -> generic_file_llseek
> } else {
> if (driver maintained)
> -> no_llseek (with maintainer ACK)
> else
> -> noop_llseek
> }
> }
>
> Once that is done, we can turn the default into nonseekable
> behavior and start removing instances of explicit no_llseek
> and nonseekable_open.
That plan sounds good to me.
> Should we also rename default_llseek to deprecated_llseek in the
> process, to go along with the approach for ioctl?
I wouldn't bother. If you can actually work on your plan default_llseek
should be gone soon enough.
Ok looks like a good plan then.
Thanks.