Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.

Dismiss

Linux Kernel Patch v2.4, patch-2.4.10 (189/197)

8 views

Skip to first unread message

Thomas Kobienia

unread,

Sep 24, 2001, 8:00:52 PM9/24/01

Archive-name: v2.4/patch-2.4.10/part189

#!/bin/sh -x
# this is part 189 of a 197 - part archive
# do not concatenate these parts, unpack them in order with /bin/sh
# file patch-2.4.10 continued
if test ! -r _shar_seq_.tmp; then
echo 'Please unpack part 1 first!'
exit 1
fi
(read Scheck
if test "$Scheck" != 189; then
echo "Please unpack part $Scheck next!"
exit 1
else
exit 0
fi
) < _shar_seq_.tmp || exit 1
if test ! -f _shar_wnt_.tmp; then
echo 'x - still skipping patch-2.4.10'
else
echo 'x - continuing with patch-2.4.10'
sed 's/^X//' << 'SHAR_EOF' >> 'patch-2.4.10' &&
X */
X
X /*
- * $Id: hci_core.h,v 1.1 2001/06/01 08:12:11 davem Exp $
+ * $Id: hci_core.h,v 1.11 2001/08/05 06:02:15 maxk Exp $
X */
X
-#ifndef __IF_HCI_CORE_H
-#define __IF_HCI_CORE_H
+#ifndef __HCI_CORE_H
+#define __HCI_CORE_H
X
-#include "hci.h"
+#include <net/bluetooth/hci.h>
X
X /* HCI upper protocols */
X #define HCI_MAX_PROTO 1
@@ -53,155 +53,132 @@
X struct inquiry_entry *list;
X };
X
-static __inline__ void inquiry_cache_init(struct inquiry_cache *cache)
+static inline void inquiry_cache_init(struct inquiry_cache *cache)
X {
X spin_lock_init(&cache->lock);
X cache->list = NULL;
X }
X
-static __inline__ void inquiry_cache_lock(struct inquiry_cache *cache)
+static inline void inquiry_cache_lock(struct inquiry_cache *cache)
X {
X spin_lock(&cache->lock);
X }
X
-static __inline__ void inquiry_cache_unlock(struct inquiry_cache *cache)
+static inline void inquiry_cache_unlock(struct inquiry_cache *cache)
X {
X spin_unlock(&cache->lock);
X }
X
-static __inline__ void inquiry_cache_lock_bh(struct inquiry_cache *cache)
+static inline void inquiry_cache_lock_bh(struct inquiry_cache *cache)
X {
X spin_lock_bh(&cache->lock);
X }
X
-static __inline__ void inquiry_cache_unlock_bh(struct inquiry_cache *cache)
+static inline void inquiry_cache_unlock_bh(struct inquiry_cache *cache)
X {
X spin_unlock_bh(&cache->lock);
X }
X
-static __inline__ long inquiry_cache_age(struct inquiry_cache *cache)
+static inline long inquiry_cache_age(struct inquiry_cache *cache)
X {
X return jiffies - cache->timestamp;
X }
X
-static __inline__ long inquiry_entry_age(struct inquiry_entry *e)
+static inline long inquiry_entry_age(struct inquiry_entry *e)
X {
X return jiffies - e->timestamp;
X }
X extern void inquiry_cache_flush(struct inquiry_cache *cache);
X
-/* ----- Connection hash ----- */
-#define HCI_MAX_CONN 10
+struct hci_dev;
+
+/* ----- HCI Connections ----- */
+struct hci_conn {
+ struct list_head list;
+ bdaddr_t dst;
+ __u16 handle;
+ __u8 type;
+ unsigned int sent;
+
+ struct hci_dev *hdev;
+ void *l2cap_data;
+ void *priv;
+
+ struct sk_buff_head data_q;
+};
X
-/* FIXME:
- * We assume that handle is a number - 0 ... HCI_MAX_CONN.
- */
X struct conn_hash {
- spinlock_t lock;
- unsigned int num;
- void *conn[HCI_MAX_CONN];
+ struct list_head list;
+ spinlock_t lock;
+ unsigned int num;
X };
X
-static __inline__ void conn_hash_init(struct conn_hash *h)
+static inline void conn_hash_init(struct conn_hash *h)
X {
- memset(h, 0, sizeof(struct conn_hash));
+ INIT_LIST_HEAD(&h->list);
X spin_lock_init(&h->lock);
+ h->num = 0;
X }
X
-static __inline__ void conn_hash_lock(struct conn_hash *h)
+static inline void conn_hash_lock(struct conn_hash *h)
X {
X spin_lock(&h->lock);
X }
X
-static __inline__ void conn_hash_unlock(struct conn_hash *h)
+static inline void conn_hash_unlock(struct conn_hash *h)
X {
X spin_unlock(&h->lock);
X }
X
-static __inline__ void *__conn_hash_add(struct conn_hash *h, __u16 handle, void *conn)
+static inline void __conn_hash_add(struct conn_hash *h, __u16 handle, struct hci_conn *c)
X {
- if (!h->conn[handle]) {
- h->conn[handle] = conn;
- h->num++;
- return conn;
- } else
- return NULL;
+ list_add(&c->list, &h->list);
+ h->num++;
X }
X
-static __inline__ void *conn_hash_add(struct conn_hash *h, __u16 handle, void *conn)
+static inline void conn_hash_add(struct conn_hash *h, __u16 handle, struct hci_conn *c)
X {
- if (handle >= HCI_MAX_CONN)
- return NULL;
-
X conn_hash_lock(h);
- conn = __conn_hash_add(h, handle, conn);
+ __conn_hash_add(h, handle, c);
X conn_hash_unlock(h);
-
- return conn;
X }
X
-static __inline__ void *__conn_hash_del(struct conn_hash *h, __u16 handle)
+static inline void __conn_hash_del(struct conn_hash *h, struct hci_conn *c)
X {
- void *conn = h->conn[handle];
-
- if (conn) {
- h->conn[handle] = NULL;
- h->num--;
- return conn;
- } else
- return NULL;
+ list_del(&c->list);
+ h->num--;
X }
X
-static __inline__ void *conn_hash_del(struct conn_hash *h, __u16 handle)
+static inline void conn_hash_del(struct conn_hash *h, struct hci_conn *c)
X {
- void *conn;
-
- if (handle >= HCI_MAX_CONN)
- return NULL;
X conn_hash_lock(h);
- conn = __conn_hash_del(h, handle);
+ __conn_hash_del(h, c);
X conn_hash_unlock(h);
-
- return conn;
X }
X
-static __inline__ void *__conn_hash_lookup(struct conn_hash *h, __u16 handle)
+static inline struct hci_conn *__conn_hash_lookup(struct conn_hash *h, __u16 handle)
X {
- return h->conn[handle];
+ register struct list_head *p;
+ register struct hci_conn *c;
+
+ list_for_each(p, &h->list) {
+ c = list_entry(p, struct hci_conn, list);
+ if (c->handle == handle)
+ return c;
+ }
+ return NULL;
X }
X
-static __inline__ void *conn_hash_lookup(struct conn_hash *h, __u16 handle)
+static inline struct hci_conn *conn_hash_lookup(struct conn_hash *h, __u16 handle)
X {
- void *conn;
-
- if (handle >= HCI_MAX_CONN)
- return NULL;
+ struct hci_conn *conn;
X
X conn_hash_lock(h);
X conn = __conn_hash_lookup(h, handle);
X conn_hash_unlock(h);
-
X return conn;
X }
X
-struct hci_dev;
-
-/* ----- HCI Connections ----- */
-struct hci_conn {
- bdaddr_t dst;
- __u16 handle;
-
- unsigned int acl_sent;
- unsigned int sco_sent;
-
- struct hci_dev *hdev;
- void *l2cap_data;
- void *priv;
-
- struct sk_buff_head acl_q;
- struct sk_buff_head sco_q;
-};
-
X /* ----- HCI Devices ----- */
X struct hci_dev {
X atomic_t refcnt;
@@ -211,6 +188,9 @@
X __u16 id;
X __u8 type;
X bdaddr_t bdaddr;
+ __u8 features[8];
+
+ __u16 pkt_type;
X
X atomic_t cmd_cnt;
X unsigned int acl_cnt;
@@ -232,7 +212,8 @@
X struct sk_buff_head rx_q;
X struct sk_buff_head raw_q;
X struct sk_buff_head cmd_q;
- struct sk_buff *cmd_sent;
+
+ struct sk_buff *sent_cmd;
X
X struct semaphore req_lock;
X wait_queue_head_t req_wait_q;
@@ -251,20 +232,17 @@
X int (*send)(struct sk_buff *skb);
X };
X
-static __inline__ void hci_dev_hold(struct hci_dev *hdev)
+static inline void hci_dev_hold(struct hci_dev *hdev)
X {
X atomic_inc(&hdev->refcnt);
X }
X
-static __inline__ void hci_dev_put(struct hci_dev *hdev)
+static inline void hci_dev_put(struct hci_dev *hdev)
X {
X atomic_dec(&hdev->refcnt);
X }
X
X extern struct hci_dev *hci_dev_get(int index);
-
-#define SENT_CMD_PARAM(X) (((X->cmd_sent->data) + HCI_COMMAND_HDR_SIZE))
-
X extern int hci_register_dev(struct hci_dev *hdev);
X extern int hci_unregister_dev(struct hci_dev *hdev);
X extern int hci_dev_open(__u16 dev);
@@ -275,6 +253,8 @@
X extern int hci_dev_list(unsigned long arg);
X extern int hci_dev_setscan(unsigned long arg);
X extern int hci_dev_setauth(unsigned long arg);
+extern int hci_dev_setptype(unsigned long arg);
+extern int hci_conn_list(unsigned long arg);
X extern int hci_inquiry(unsigned long arg);
X
X extern __u32 hci_dev_setmode(struct hci_dev *hdev, __u32 mode);
@@ -282,18 +262,21 @@
X
X extern int hci_recv_frame(struct sk_buff *skb);
X
+/* ----- LMP capabilities ----- */
+#define lmp_rswitch_capable(dev) (dev->features[0] & LMP_RSWITCH)
+
X /* ----- HCI tasks ----- */
-static __inline__ void hci_sched_cmd(struct hci_dev *hdev)
+static inline void hci_sched_cmd(struct hci_dev *hdev)
X {
X tasklet_schedule(&hdev->cmd_task);
X }
X
-static __inline__ void hci_sched_rx(struct hci_dev *hdev)
+static inline void hci_sched_rx(struct hci_dev *hdev)
X {
X tasklet_schedule(&hdev->rx_task);
X }
X
-static __inline__ void hci_sched_tx(struct hci_dev *hdev)
+static inline void hci_sched_tx(struct hci_dev *hdev)
X {
X tasklet_schedule(&hdev->tx_task);
X }
@@ -330,9 +313,9 @@
X /* HCI info for socket */
X #define hci_pi(sk) ((struct hci_pinfo *) &sk->protinfo)
X struct hci_pinfo {
- struct hci_dev *hdev;
- __u32 cmsg_flags;
- __u32 mask;
+ struct hci_dev *hdev;
+ struct hci_filter filter;
+ __u32 cmsg_mask;
X };
X
X /* ----- HCI requests ----- */
@@ -340,4 +323,4 @@
X #define HCI_REQ_PEND 1
X #define HCI_REQ_CANCELED 2
X
-#endif /* __IF_HCI_CORE_H */
+#endif /* __HCI_CORE_H */
diff -u --recursive --new-file v2.4.9/linux/include/net/bluetooth/hci_emu.h linux/include/net/bluetooth/hci_emu.h
--- v2.4.9/linux/include/net/bluetooth/hci_emu.h Tue Jul 3 17:08:22 2001
+++ linux/include/net/bluetooth/hci_emu.h Wed Dec 31 16:00:00 1969
@@ -1,52 +0,0 @@
-/*
- BlueZ - Bluetooth protocol stack for Linux
- Copyright (C) 2000-2001 Qualcomm Incorporated
-
- Written 2000,2001 by Maxim Krasnyansky <ma...@qualcomm.com>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License version 2 as
- published by the Free Software Foundation;
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
- IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
- CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
- ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
- COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
- SOFTWARE IS DISCLAIMED.
-*/
-
-/*
- * $Id: hci_emu.h,v 1.1 2001/06/01 08:12:11 davem Exp $
- */
-
-#ifndef __IF_HCI_EMU_H
-#define __IF_HCI_EMU_H
-
-#ifdef __KERNEL__
-
-struct hci_emu_struct {
- struct hci_dev hdev;
- __u32 flags;
- wait_queue_head_t read_wait;
- struct sk_buff_head readq;
- struct fasync_struct *fasync;
-};
-
-#endif /* __KERNEL__ */
-
-#define HCI_EMU_MINOR 250
-
-/* Max frame size */
-#define HCI_EMU_MAX_FRAME 4096
-
-/* HCI_EMU device flags */
-#define HCI_EMU_FASYNC 0x0010
-
-#endif /* __IF_HCI_EMU_H */
diff -u --recursive --new-file v2.4.9/linux/include/net/bluetooth/hci_uart.h linux/include/net/bluetooth/hci_uart.h
--- v2.4.9/linux/include/net/bluetooth/hci_uart.h Tue Jul 3 17:08:22 2001
+++ linux/include/net/bluetooth/hci_uart.h Fri Sep 7 09:28:38 2001
@@ -23,10 +23,12 @@
X */
X
X /*
- * $Id: hci_uart.h,v 1.1 2001/06/01 08:12:11 davem Exp $
+ * $Id: hci_uart.h,v 1.2 2001/06/02 01:40:08 maxk Exp $
X */
X
-#define HCI_MAX_READ 2048
+#ifndef N_HCI
+#define N_HCI 15
+#endif
X
X #ifdef __KERNEL__
X
diff -u --recursive --new-file v2.4.9/linux/include/net/bluetooth/hci_usb.h linux/include/net/bluetooth/hci_usb.h
--- v2.4.9/linux/include/net/bluetooth/hci_usb.h Tue Jul 3 17:08:22 2001
+++ linux/include/net/bluetooth/hci_usb.h Fri Sep 7 09:28:38 2001
@@ -23,12 +23,10 @@
X */
X
X /*
- * $Id: hci_usb.h,v 1.1 2001/06/01 08:12:11 davem Exp $
+ * $Id: hci_usb.h,v 1.3 2001/06/02 01:40:08 maxk Exp $
X */
X
X #ifdef __KERNEL__
-
-#define HCI_USB_MAX_READ 2048
X
X /* Class, SubClass, and Protocol codes that describe a Bluetooth device */
X #define HCI_DEV_CLASS 0xe0 /* Wireless class */
diff -u --recursive --new-file v2.4.9/linux/include/net/bluetooth/hci_vhci.h linux/include/net/bluetooth/hci_vhci.h
--- v2.4.9/linux/include/net/bluetooth/hci_vhci.h Wed Dec 31 16:00:00 1969
+++ linux/include/net/bluetooth/hci_vhci.h Fri Sep 7 09:28:38 2001
@@ -0,0 +1,50 @@
+/*
+ BlueZ - Bluetooth protocol stack for Linux
+ Copyright (C) 2000-2001 Qualcomm Incorporated
+
+ Written 2000,2001 by Maxim Krasnyansky <ma...@qualcomm.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License version 2 as
+ published by the Free Software Foundation;
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+ IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+ CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+ ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+ COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+ SOFTWARE IS DISCLAIMED.
+*/
+
+/*
+ * $Id: hci_vhci.h,v 1.2 2001/08/01 01:02:20 maxk Exp $
+ */
+
+#ifndef __HCI_VHCI_H
+#define __HCI_VHCI_H
+
+#ifdef __KERNEL__
+
+struct hci_vhci_struct {
+ struct hci_dev hdev;
+ __u32 flags;
+ wait_queue_head_t read_wait;
+ struct sk_buff_head readq;
+ struct fasync_struct *fasync;
+};
+
+/* VHCI device flags */
+#define VHCI_FASYNC 0x0010
+
+#endif /* __KERNEL__ */
+
+#define VHCI_DEV "/dev/vhci"
+#define VHCI_MINOR 250
+
+#endif /* __HCI_VHCI_H */
diff -u --recursive --new-file v2.4.9/linux/include/net/bluetooth/l2cap.h linux/include/net/bluetooth/l2cap.h
--- v2.4.9/linux/include/net/bluetooth/l2cap.h Tue Jul 3 17:08:22 2001
+++ linux/include/net/bluetooth/l2cap.h Fri Sep 7 09:28:38 2001
@@ -23,7 +23,7 @@
X */
X
X /*
- * $Id: l2cap.h,v 1.1 2001/06/01 08:12:11 davem Exp $
+ * $Id: l2cap.h,v 1.5 2001/06/14 21:28:26 maxk Exp $
X */
X
X #ifndef __L2CAP_H
@@ -32,6 +32,14 @@
X #include <asm/types.h>
X #include <asm/byteorder.h>
X
+/* L2CAP defaults */
+#define L2CAP_DEFAULT_MTU 672
+#define L2CAP_DEFAULT_FLUSH_TO 0xFFFF
+
+#define L2CAP_CONN_TIMEOUT (HZ * 40)
+#define L2CAP_DISCONN_TIMEOUT (HZ * 2)
+#define L2CAP_CONN_IDLE_TIMEOUT (HZ * 60)
+
X /* L2CAP socket address */
X struct sockaddr_l2 {
X sa_family_t l2_family;
@@ -52,11 +60,10 @@
X __u32 delay_var;
X };
X
-/* L2CAP defaults */
-#define L2CAP_DEFAULT_MTU 672
-#define L2CAP_DEFAULT_FLUSH_TO 0xFFFF
-
-#define L2CAP_CONN_TIMEOUT (HZ * 40)
+#define L2CAP_CONNINFO 0x02
+struct l2cap_conninfo {
+ __u16 hci_handle;
+};
X
X /* L2CAP command codes */
X #define L2CAP_COMMAND_REJ 0x01
diff -u --recursive --new-file v2.4.9/linux/include/net/bluetooth/l2cap_core.h linux/include/net/bluetooth/l2cap_core.h
--- v2.4.9/linux/include/net/bluetooth/l2cap_core.h Tue Jul 3 17:08:22 2001
+++ linux/include/net/bluetooth/l2cap_core.h Fri Sep 7 09:28:38 2001
@@ -23,7 +23,7 @@
X */
X
X /*
- * $Id: l2cap_core.h,v 1.1 2001/06/01 08:12:11 davem Exp $
+ * $Id: l2cap_core.h,v 1.6 2001/08/03 04:19:49 maxk Exp $
X */
X
X #ifndef __L2CAP_CORE_H
@@ -41,12 +41,12 @@
X struct list_head conn_list;
X };
X
-static __inline__ void l2cap_iff_lock(struct l2cap_iff *iff)
+static inline void l2cap_iff_lock(struct l2cap_iff *iff)
X {
X spin_lock(&iff->lock);
X }
X
-static __inline__ void l2cap_iff_unlock(struct l2cap_iff *iff)
+static inline void l2cap_iff_unlock(struct l2cap_iff *iff)
X {
X spin_unlock(&iff->lock);
X }
@@ -78,14 +78,16 @@
X __u8 tx_ident;
X
X struct l2cap_chan_list chan_list;
+
+ struct timer_list timer;
X };
X
-static __inline__ void __l2cap_conn_link(struct l2cap_iff *iff, struct l2cap_conn *c)
+static inline void __l2cap_conn_link(struct l2cap_iff *iff, struct l2cap_conn *c)
X {
X list_add(&c->list, &iff->conn_list);
X }
X
-static __inline__ void __l2cap_conn_unlink(struct l2cap_iff *iff, struct l2cap_conn *c)
+static inline void __l2cap_conn_unlink(struct l2cap_iff *iff, struct l2cap_conn *c)
X {
X list_del(&c->list);
X }
@@ -126,9 +128,9 @@
X struct l2cap_accept_q accept_q;
X };
X
-#define CONF_INPUT 0x01
-#define CONF_OUTPUT 0x02
-#define CONF_DONE (CONF_INPUT | CONF_OUTPUT)
+#define CONF_REQ_SENT 0x01
+#define CONF_INPUT_DONE 0x02
+#define CONF_OUTPUT_DONE 0x04
X
X extern struct bluez_sock_list l2cap_sk_list;
X extern struct list_head l2cap_iff_list;
diff -u --recursive --new-file v2.4.9/linux/include/net/irda/irda-usb.h linux/include/net/irda/irda-usb.h
--- v2.4.9/linux/include/net/irda/irda-usb.h Tue May 1 16:05:00 2001
+++ linux/include/net/irda/irda-usb.h Thu Sep 13 16:26:52 2001
@@ -1,13 +1,14 @@
X /*****************************************************************************
X *
X * Filename: irda-usb.h
- * Version: 0.8
+ * Version: 0.9a
X * Description: IrDA-USB Driver
X * Status: Experimental
X * Author: Dag Brattli <d...@brattli.net>
X *
- * Copyright (C) 2001, Dag Brattli <d...@brattli.net>
- * Copyright (C) 2000, Roman Weissgaerber <wei...@vienna.at>
+ * Copyright (C) 2001, Roman Weissgaerber <wei...@vienna.at>
+ * Copyright (C) 2000, Dag Brattli <d...@brattli.net>
+ * Copyright (C) 2001, Jean Tourrilhes <j...@hpl.hp.com>
X *
X * This program is free software; you can redistribute it and/or modify
X * it under the terms of the GNU General Public License as published by
@@ -35,13 +36,44 @@
X #define IRDA_USB_MAX_MTU 2051
X #define IRDA_USB_SPEED_MTU 64 /* Weird, but work like this */
X
-/*
- * Maximum number of URB on the Rx and Tx path, a number larger than 1
- * is required for handling back-to-back (brickwalled) frames
- */
-#define IU_MAX_ACTIVE_RX_URBS 1
-#define IU_MAX_RX_URBS (IU_MAX_ACTIVE_RX_URBS + 1)
-#define IU_MAX_TX_URBS 1
+/* Maximum number of active URB on the Rx path
+ * This is the amount of buffers the we keep between the USB harware and the
+ * IrDA stack.
+ *
+ * Note : the network layer does also queue the packets between us and the
+ * IrDA stack, and is actually pretty fast and efficient in doing that.
+ * Therefore, we don't need to have a large number of URBs, and we can
+ * perfectly live happy with only one. We certainly don't need to keep the
+ * full IrTTP window around here...
+ * I repeat for those who have trouble to understand : 1 URB is plenty
+ * good enough to handle back-to-back (brickwalled) frames. I tried it,
+ * it works (it's the hardware that has trouble doing it).
+ *
+ * Having 2 URBs would allow the USB stack to process one URB while we take
+ * care of the other and then swap the URBs...
+ * On the other hand, increasing the number of URB will have penalities
+ * in term of latency and will interact with the link management in IrLAP...
+ * Jean II */
+#define IU_MAX_ACTIVE_RX_URBS 1 /* Don't touch !!! */
+
+/* When a Rx URB is passed back to us, we can't reuse it immediately,
+ * because it may still be referenced by the USB layer. Therefore we
+ * need to keep one extra URB in the Rx path.
+ * Jean II */
+#define IU_MAX_RX_URBS (IU_MAX_ACTIVE_RX_URBS + 1)
+
+/* Various ugly stuff to try to workaround generic problems */
+/* The USB layer should send empty frames at the end of packets multiple
+ * of the frame size. As it doesn't do it by default, we need to do it
+ * ourselves... See also following option. */
+#undef IU_BUG_KICK_TX
+/* Use the USB_ZERO_PACKET flag instead of sending empty frame (above)
+ * Work only with usb-uhci.o so far. Please fix uhic.c and usb-ohci.c */
+#define IU_USE_USB_ZERO_FLAG
+/* Send speed command in case of timeout, just for trying to get things sane */
+#define IU_BUG_KICK_TIMEOUT
+/* Show the USB class descriptor */
+#undef IU_DUMP_CLASS_DESC
X
X /* Inbound header */
X #define MEDIA_BUSY 0x80
@@ -56,15 +88,19 @@
X #define SPEED_1152000 0x08
X #define SPEED_4000000 0x09
X
-/* device_info flags in struct usb_device_id */
+/* Basic capabilities */
X #define IUC_DEFAULT 0x00 /* Basic device compliant with 1.0 spec */
+/* Main bugs */
X #define IUC_SPEED_BUG 0x01 /* Device doesn't set speed after the frame */
-#define IUC_SIR_ONLY 0x02 /* Device doesn't behave at FIR speeds */
-#define IUC_SMALL_PKT 0x04 /* Device doesn't behave with big Rx packets */
-#define IUC_NO_WINDOW 0x08 /* Device doesn't behave with big Rx window */
-#define IUC_MAX_WINDOW 0x10 /* Device underestimate the Rx window */
-#define IUC_MAX_XBOFS 0x20 /* Device need more xbofs than advertised */
+#define IUC_NO_WINDOW 0x02 /* Device doesn't behave with big Rx window */
+#define IUC_NO_TURN 0x04 /* Device doesn't do turnaround by itself */
+/* Not currently used */
+#define IUC_SIR_ONLY 0x08 /* Device doesn't behave at FIR speeds */
+#define IUC_SMALL_PKT 0x10 /* Device doesn't behave with big Rx packets */
+#define IUC_MAX_WINDOW 0x20 /* Device underestimate the Rx window */
+#define IUC_MAX_XBOFS 0x40 /* Device need more xbofs than advertised */
X
+/* USB class definitions */
X #define USB_IRDA_HEADER 0x01
X #define USB_CLASS_IRDA 0x02 /* USB_CLASS_APP_SPEC subclass */
X #define USB_DT_IRDA 0x21
@@ -89,23 +125,29 @@
X int netopen; /* Device is active for network */
X int present; /* Device is present on the bus */
X __u32 capability; /* Capability of the hardware */
- __u8 bulk_in_ep, bulk_out_ep; /* Endpoint assignments */
- __u16 bulk_out_mtu;
-
+ __u8 bulk_in_ep; /* Rx Endpoint assignments */
+ __u8 bulk_out_ep; /* Tx Endpoint assignments */
+ __u16 bulk_out_mtu; /* Max Tx packet size in bytes */
+ __u8 bulk_int_ep; /* Interrupt Endpoint assignments */
+
X wait_queue_head_t wait_q; /* for timeouts */
X
- struct urb rx_urb[IU_MAX_RX_URBS]; /* URBs used to receive data frames */
- struct urb *rx_idle_urb; /* Pointer to idle URB in Rx path */
+ struct urb rx_urb[IU_MAX_RX_URBS]; /* URBs used to receive data frames */
+ struct urb *idle_rx_urb; /* Pointer to idle URB in Rx path */
X struct urb tx_urb; /* URB used to send data frames */
X struct urb speed_urb; /* URB used to send speed commands */
+#ifdef IU_BUG_KICK_TX
+ struct urb empty_urb; /* URB used to send empty commands */
+#endif IU_BUG_KICK_TX
X
X struct net_device *netdev; /* Yes! we are some kind of netdev. */
X struct net_device_stats stats;
X struct irlap_cb *irlap; /* The link layer we are binded to */
- struct qos_info qos;
+ struct qos_info qos;
X hashbin_t *tx_list; /* Queued transmit skb's */
+ char *speed_buff; /* Buffer for speed changes */
X
- struct timeval stamp;
+ struct timeval stamp;
X struct timeval now;
X
X spinlock_t lock; /* For serializing operations */
@@ -116,5 +158,4 @@
X __s32 new_speed; /* speed we need to set */
X __u32 flags; /* Interface flags */
X };
-
X
diff -u --recursive --new-file v2.4.9/linux/include/net/irda/irias_object.h linux/include/net/irda/irias_object.h
--- v2.4.9/linux/include/net/irda/irias_object.h Mon Dec 11 13:33:09 2000
+++ linux/include/net/irda/irias_object.h Thu Sep 13 16:26:52 2001
@@ -78,7 +78,7 @@
X struct ias_value *value; /* Attribute value */
X };
X
-char *strdup(char *str);
+char *strndup(char *str, int max);
X
X struct ias_object *irias_new_object(char *name, int id);
X void irias_insert_object(struct ias_object *obj);
diff -u --recursive --new-file v2.4.9/linux/include/net/irda/irlap.h linux/include/net/irda/irlap.h
--- v2.4.9/linux/include/net/irda/irlap.h Wed Jul 25 17:10:26 2001
+++ linux/include/net/irda/irlap.h Thu Sep 13 16:26:52 2001
@@ -66,7 +66,9 @@
X irda_queue_t q; /* Must be first */
X magic_t magic;
X
+ /* Device we are attached to */
X struct net_device *netdev;
+ char hw_name[2*IFNAMSIZ + 1];
X
X /* Connection state */
X volatile IRLAP_STATE state; /* Current state */
@@ -163,7 +165,8 @@
X int irlap_init(void);
X void irlap_cleanup(void);
X
-struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos);
+struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
+ char * hw_name);
X void irlap_close(struct irlap_cb *self);
X
X void irlap_connect_request(struct irlap_cb *self, __u32 daddr,
diff -u --recursive --new-file v2.4.9/linux/include/net/irda/irlmp.h linux/include/net/irda/irlmp.h
--- v2.4.9/linux/include/net/irda/irlmp.h Mon Dec 11 12:59:38 2000
+++ linux/include/net/irda/irlmp.h Thu Sep 13 16:26:52 2001
@@ -217,6 +217,7 @@
X void irlmp_discovery_confirm(hashbin_t *discovery_log);
X void irlmp_discovery_request(int nslots);
X struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask);
+void irlmp_do_expiry(void);
X void irlmp_do_discovery(int nslots);
X discovery_t *irlmp_get_discovery_response(void);
X void irlmp_discovery_expiry(discovery_t *expiry);
@@ -246,6 +247,7 @@
X extern int sysctl_discovery_timeout;
X extern int sysctl_discovery_slots;
X extern int sysctl_discovery;
+extern int sysctl_lap_keepalive_time; /* in ms, default is LM_IDLE_TIMEOUT */
X extern struct irlmp_cb *irlmp;
X
X static inline hashbin_t *irlmp_get_cachelog(void) { return irlmp->cachelog; }
diff -u --recursive --new-file v2.4.9/linux/include/net/sock.h linux/include/net/sock.h
--- v2.4.9/linux/include/net/sock.h Mon Aug 27 12:41:49 2001
+++ linux/include/net/sock.h Sun Sep 23 10:31:33 2001
@@ -1245,7 +1245,7 @@
X
X static inline int sock_rcvlowat(struct sock *sk, int waitall, int len)
X {
- return (waitall ? len : min(int, sk->rcvlowat, len)) ? : 1;
+ return (waitall ? len : min_t(int, sk->rcvlowat, len)) ? : 1;
X }
X
X /* Alas, with timeout socket operations are not restartable.
diff -u --recursive --new-file v2.4.9/linux/include/net/tcp.h linux/include/net/tcp.h
--- v2.4.9/linux/include/net/tcp.h Mon Aug 27 12:41:49 2001
+++ linux/include/net/tcp.h Sun Sep 23 10:31:58 2001
@@ -919,12 +919,12 @@
X static inline void tcp_initialize_rcv_mss(struct sock *sk)
X {
X struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
- unsigned int hint = min(unsigned int, tp->advmss, tp->mss_cache);
+ unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
X
- hint = min(unsigned int, hint, tp->rcv_wnd/2);
+ hint = min_t(unsigned int, hint, tp->rcv_wnd/2);
X
- tp->ack.rcv_mss = max(unsigned int,
- min(unsigned int,
+ tp->ack.rcv_mss = max_t(unsigned int,
+ min_t(unsigned int,
X hint, TCP_MIN_RCVMSS),
X TCP_MIN_MSS);
X }
@@ -1077,7 +1077,7 @@
X */
X static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
X {
- return max(u32, tp->snd_cwnd >> 1, 2);
+ return max_t(u32, tp->snd_cwnd >> 1, 2);
X }
X
X /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
@@ -1089,7 +1089,7 @@
X if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
X return tp->snd_ssthresh;
X else
- return max(u32, tp->snd_ssthresh,
+ return max_t(u32, tp->snd_ssthresh,
X ((tp->snd_cwnd >> 1) +
X (tp->snd_cwnd >> 2)));
X }
@@ -1126,7 +1126,7 @@
X {
X tp->undo_marker = 0;
X tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
- tp->snd_cwnd = min(u32, tp->snd_cwnd,
+ tp->snd_cwnd = min_t(u32, tp->snd_cwnd,
X tcp_packets_in_flight(tp) + 1);
X tp->snd_cwnd_cnt = 0;
X tp->high_seq = tp->snd_nxt;
@@ -1493,7 +1493,7 @@
X /* If no clamp set the clamp to the max possible scaled window */
X if (*window_clamp == 0)
X (*window_clamp) = (65535 << 14);
- space = min(u32, *window_clamp, space);
+ space = min_t(u32, *window_clamp, space);
X
X /* Quantize space offering to a multiple of mss if possible. */
X if (space > mss)
@@ -1505,7 +1505,7 @@
X * our initial window offering to 32k. There should also
X * be a sysctl option to stop being nice.
X */
- (*rcv_wnd) = min(int, space, MAX_TCP_WINDOW);
+ (*rcv_wnd) = min_t(int, space, MAX_TCP_WINDOW);
X (*rcv_wscale) = 0;
X if (wscale_ok) {
X /* See RFC1323 for an explanation of the limit to 14 */
@@ -1514,7 +1514,7 @@
X (*rcv_wscale)++;
X }
X if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
- space - max(unsigned int, (space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
+ space - max_t(unsigned int, (space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
X (*rcv_wscale)--;
X }
X
@@ -1532,7 +1532,7 @@
X *rcv_wnd = init_cwnd*mss;
X }
X /* Set the clamp no higher than max representable value */
- (*window_clamp) = min(u32, 65535 << (*rcv_wscale), *window_clamp);
+ (*window_clamp) = min_t(u32, 65535 << (*rcv_wscale), *window_clamp);
X }
X
X static inline int tcp_win_from_space(int space)
@@ -1698,8 +1698,8 @@
X static inline void tcp_moderate_sndbuf(struct sock *sk)
X {
X if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) {
- sk->sndbuf = min(int, sk->sndbuf, sk->wmem_queued/2);
- sk->sndbuf = max(int, sk->sndbuf, SOCK_MIN_SNDBUF);
+ sk->sndbuf = min_t(int, sk->sndbuf, sk->wmem_queued/2);
+ sk->sndbuf = max_t(int, sk->sndbuf, SOCK_MIN_SNDBUF);
X }
X }
X
diff -u --recursive --new-file v2.4.9/linux/include/scsi/sg.h linux/include/scsi/sg.h
--- v2.4.9/linux/include/scsi/sg.h Wed Jul 25 17:10:26 2001
+++ linux/include/scsi/sg.h Fri Sep 7 09:28:37 2001
@@ -11,9 +11,13 @@
X Version 2 and 3 extensions to driver:
X * Copyright (C) 1998 - 2001 Douglas Gilbert
X
- Version: 3.1.19 (20010623)
+ Version: 3.1.20 (20010814)
X This version is for 2.4 series kernels.
X
+ Changes since 3.1.19 (20010623)
+ - add SG_GET_ACCESS_COUNT ioctl
+ - make open() increment and close() decrement access_count
+ - only register first 256 devices, reject subsequent devices
X Changes since 3.1.18 (20010505)
X - fix bug that caused long wait when large buffer requested
X - fix leak in error case of sg_new_read() [report: Eric Barton]
@@ -220,6 +224,9 @@
X /* How to treat EINTR during SG_IO ioctl(), only in SG 3.x series */
X #define SG_SET_KEEP_ORPHAN 0x2287 /* 1 -> hold for read(), 0 -> drop (def) */
X #define SG_GET_KEEP_ORPHAN 0x2288
+
+/* yields scsi midlevel's access_count for this SCSI device */
+#define SG_GET_ACCESS_COUNT 0x2289
X
X
X #define SG_SCATTER_SZ (8 * 4096) /* PAGE_SIZE not available to user */
diff -u --recursive --new-file v2.4.9/linux/include/video/fbcon.h linux/include/video/fbcon.h
--- v2.4.9/linux/include/video/fbcon.h Mon Nov 27 17:11:26 2000
+++ linux/include/video/fbcon.h Thu Sep 13 16:25:07 2001
@@ -206,7 +206,7 @@
X #define fb_writel sbus_writel
X #define fb_memset sbus_memset_io
X
-#elif defined(__i386__) || defined(__alpha__)
+#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__)
X
X #define fb_readb __raw_readb
X #define fb_readw __raw_readw
diff -u --recursive --new-file v2.4.9/linux/init/main.c linux/init/main.c
--- v2.4.9/linux/init/main.c Wed Jul 25 17:10:26 2001
+++ linux/init/main.c Thu Sep 20 21:02:01 2001
@@ -21,7 +21,6 @@
X #include <linux/utsname.h>
X #include <linux/ioport.h>
X #include <linux/init.h>
-#include <linux/raid/md.h>
X #include <linux/smp_lock.h>
X #include <linux/blk.h>
X #include <linux/hdreg.h>
@@ -483,7 +482,7 @@
X #ifdef CONFIG_X86_IO_APIC
X static void __init smp_init(void)
X {
- IO_APIC_init_uniprocessor();
+ APIC_init_uniprocessor();
X }
X #else
X #define smp_init() do { } while (0)
diff -u --recursive --new-file v2.4.9/linux/ipc/msg.c linux/ipc/msg.c
--- v2.4.9/linux/ipc/msg.c Mon Feb 19 10:18:18 2001
+++ linux/ipc/msg.c Fri Sep 14 14:17:00 2001
@@ -613,7 +613,7 @@
X wake_up_process(msr->r_tsk);
X } else {
X msr->r_msg = msg;
- msq->q_lspid = msr->r_tsk->pid;
+ msq->q_lrpid = msr->r_tsk->pid;
X msq->q_rtime = CURRENT_TIME;
X wake_up_process(msr->r_tsk);
X return 1;
@@ -683,6 +683,9 @@
X goto retry;
X }
X
+ msq->q_lspid = current->pid;
+ msq->q_stime = CURRENT_TIME;
+
X if(!pipelined_send(msq,msg)) {
X /* noone is waiting for this message, enqueue it */
X list_add_tail(&msg->m_list,&msq->q_messages);
@@ -694,8 +697,6 @@
X
X err = 0;
X msg = NULL;
- msq->q_lspid = current->pid;
- msq->q_stime = CURRENT_TIME;
X
X out_unlock_free:
X msg_unlock(msqid);
@@ -742,6 +743,10 @@
X if(msq==NULL)
X return -EINVAL;
X retry:
+ err = -EIDRM;
+ if (msg_checkid(msq,msqid))
+ goto out_unlock;
+
X err=-EACCES;
X if (ipcperms (&msq->q_perm, S_IRUGO))
X goto out_unlock;
diff -u --recursive --new-file v2.4.9/linux/ipc/shm.c linux/ipc/shm.c
--- v2.4.9/linux/ipc/shm.c Sat May 19 17:47:55 2001
+++ linux/ipc/shm.c Fri Sep 14 14:17:38 2001
@@ -606,6 +606,11 @@
X shp = shm_lock(shmid);
X if(shp == NULL)
X return -EINVAL;
+ err = shm_checkid(shp,shmid);
+ if (err) {
+ shm_unlock(shmid);
+ return err;
+ }
X if (ipcperms(&shp->shm_perm, acc_mode)) {
X shm_unlock(shmid);
X return -EACCES;
diff -u --recursive --new-file v2.4.9/linux/kernel/Makefile linux/kernel/Makefile
--- v2.4.9/linux/kernel/Makefile Fri Dec 29 14:07:24 2000
+++ linux/kernel/Makefile Sun Sep 16 21:22:40 2001
@@ -9,7 +9,7 @@
X
X O_TARGET := kernel.o
X
-export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o
+export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o
X
X obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \
X module.o exit.o itimer.o info.o time.o softirq.o resource.o \
diff -u --recursive --new-file v2.4.9/linux/kernel/exec_domain.c linux/kernel/exec_domain.c
--- v2.4.9/linux/kernel/exec_domain.c Mon Jun 26 11:36:43 2000
+++ linux/kernel/exec_domain.c Fri Sep 21 11:22:38 2001
@@ -1,11 +1,30 @@
-#include <linux/mm.h>
-#include <linux/smp_lock.h>
+/*
+ * Handling of different ABIs (personalities).
+ *
+ * We group personalities into execution domains which have their
+ * own handlers for kernel entry points, signal mapping, etc...
+ *
+ * 2001-05-06 Complete rewrite, Christoph Hellwig (h...@caldera.de)
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
X #include <linux/module.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+#include <linux/sysctl.h>
+#include <linux/types.h>
X
-static asmlinkage void no_lcall7(int segment, struct pt_regs * regs);
X
+static void default_handler(int, struct pt_regs *);
X
-static unsigned long ident_map[32] = {
+static struct exec_domain *exec_domains = &default_exec_domain;
+static rwlock_t exec_domains_lock = RW_LOCK_UNLOCKED;
+
+
+static u_long ident_map[32] = {
X 0, 1, 2, 3, 4, 5, 6, 7,
X 8, 9, 10, 11, 12, 13, 14, 15,
X 16, 17, 18, 19, 20, 21, 22, 23,
@@ -13,151 +32,259 @@
X };
X
X struct exec_domain default_exec_domain = {
- "Linux", /* name */
- no_lcall7, /* lcall7 causes a seg fault. */
- 0, 0xff, /* All personalities. */
- ident_map, /* Identity map signals. */
- ident_map, /* - both ways. */
- NULL, /* No usage counter. */
- NULL /* Nothing after this in the list. */
+ "Linux", /* name */
+ default_handler, /* lcall7 causes a seg fault. */
+ 0, 0, /* PER_LINUX personality. */
+ ident_map, /* Identity map signals. */
+ ident_map, /* - both ways. */
X };
X
-static struct exec_domain *exec_domains = &default_exec_domain;
-static rwlock_t exec_domains_lock = RW_LOCK_UNLOCKED;
X
-static asmlinkage void no_lcall7(int segment, struct pt_regs * regs)
+static void
+default_handler(int segment, struct pt_regs *regp)
X {
- /*
- * This may have been a static linked SVr4 binary, so we would have the
- * personality set incorrectly. Check to see whether SVr4 is available,
- * and use it, otherwise give the user a SEGV.
- */
- set_personality(PER_SVR4);
+ u_long pers = 0;
X
- if (current->exec_domain && current->exec_domain->handler
- && current->exec_domain->handler != no_lcall7) {
- current->exec_domain->handler(segment, regs);
- return;
+ /*
+ * This may have been a static linked SVr4 binary, so we would
+ * have the personality set incorrectly. Or it might have been
+ * a Solaris/x86 binary. We can tell which because the former
+ * uses lcall7, while the latter used lcall 0x27.
+ * Try to find or load the appropriate personality, and fall back
+ * to just forcing a SEGV.
+ *
+ * XXX: this is IA32-specific and should be moved to the MD-tree.
+ */
+ switch (segment) {
+#ifdef __i386__
+ case 0x07:
+ pers = abi_defhandler_lcall7;
+ break;
+ case 0x27:
+ pers = PER_SOLARIS;
+ break;
+#endif
X }
+ set_personality(pers);
X
- send_sig(SIGSEGV, current, 1);
+ if (current->exec_domain->handler != default_handler)
+ current->exec_domain->handler(segment, regp);
+ else
+ send_sig(SIGSEGV, current, 1);
X }
X
-static struct exec_domain *lookup_exec_domain(unsigned long personality)
+static struct exec_domain *
+lookup_exec_domain(u_long personality)
X {
- unsigned long pers = personality & PER_MASK;
- struct exec_domain *it;
-
+ struct exec_domain * ep;
+ char buffer[30];
+ u_long pers = personality(personality);
+
X read_lock(&exec_domains_lock);
- for (it=exec_domains; it; it=it->next)
- if (pers >= it->pers_low && pers <= it->pers_high) {
- if (!try_inc_mod_count(it->module))
- continue;
- read_unlock(&exec_domains_lock);
- return it;
- }
+ for (ep = exec_domains; ep; ep = ep->next) {
+ if (pers >= ep->pers_low && pers <= ep->pers_high)
+ if (try_inc_mod_count(ep->module))
+ goto out;
+ }
+
+#ifdef CONFIG_KMOD
X read_unlock(&exec_domains_lock);
+ sprintf(buffer, "personality-%ld", pers);
+ request_module(buffer);
+ read_lock(&exec_domains_lock);
+
+ for (ep = exec_domains; ep; ep = ep->next) {
+ if (pers >= ep->pers_low && pers <= ep->pers_high)
+ if (try_inc_mod_count(ep->module))
+ goto out;
+ }
+#endif
X
- /* Should never get this far. */
- printk(KERN_ERR "No execution domain for personality 0x%02lx\n", pers);
- return NULL;
+ ep = &default_exec_domain;
+out:
+ read_unlock(&exec_domains_lock);
+ return (ep);
X }
X
-int register_exec_domain(struct exec_domain *it)
+int
+register_exec_domain(struct exec_domain *ep)
X {
- struct exec_domain *tmp;
+ struct exec_domain *tmp;
+ int err = -EBUSY;
X
- if (!it)
+ if (ep == NULL)
X return -EINVAL;
- if (it->next)
+
+ if (ep->next != NULL)
X return -EBUSY;
+
X write_lock(&exec_domains_lock);
- for (tmp=exec_domains; tmp; tmp=tmp->next)
- if (tmp == it) {
- write_unlock(&exec_domains_lock);
- return -EBUSY;
- }
- it->next = exec_domains;
- exec_domains = it;
+ for (tmp = exec_domains; tmp; tmp = tmp->next) {
+ if (tmp == ep)
+ goto out;
+ }
+
+ ep->next = exec_domains;
+ exec_domains = ep;
+ err = 0;
+
+out:
X write_unlock(&exec_domains_lock);
- return 0;
+ return (err);
X }
X
-int unregister_exec_domain(struct exec_domain *it)
+int
+unregister_exec_domain(struct exec_domain *ep)
X {
- struct exec_domain ** tmp;
+ struct exec_domain **epp;
X
- tmp = &exec_domains;
+ epp = &exec_domains;
X write_lock(&exec_domains_lock);
- while (*tmp) {
- if (it == *tmp) {
- *tmp = it->next;
- it->next = NULL;
- write_unlock(&exec_domains_lock);
- return 0;
- }
- tmp = &(*tmp)->next;
+ for (epp = &exec_domains; *epp; epp = &(*epp)->next) {
+ if (ep == *epp)
+ goto unregister;
X }
X write_unlock(&exec_domains_lock);
X return -EINVAL;
+
+unregister:
+ *epp = ep->next;
+ ep->next = NULL;
+ write_unlock(&exec_domains_lock);
+ return 0;
X }
X
-void __set_personality(unsigned long personality)
+int
+__set_personality(u_long personality)
X {
- struct exec_domain *it, *prev;
+ struct exec_domain *ep, *oep;
X
- it = lookup_exec_domain(personality);
- if (it == current->exec_domain) {
+ ep = lookup_exec_domain(personality);
+ if (ep == NULL)
+ return -EINVAL;
+ if (ep == current->exec_domain) {
X current->personality = personality;
- return;
+ return 0;
X }
- if (!it)
- return;
+
X if (atomic_read(&current->fs->count) != 1) {
- struct fs_struct *new = copy_fs_struct(current->fs);
- struct fs_struct *old;
- if (!new) {
- put_exec_domain(it);
- return;
+ struct fs_struct *fsp, *ofsp;
+
+ fsp = copy_fs_struct(current->fs);
+ if (fsp == NULL) {
+ put_exec_domain(ep);
+ return -ENOMEM;;
X }
+
X task_lock(current);
- old = current->fs;
- current->fs = new;
+ ofsp = current->fs;
+ current->fs = fsp;
X task_unlock(current);
- put_fs_struct(old);
+
+ put_fs_struct(ofsp);
X }
+
X /*
X * At that point we are guaranteed to be the sole owner of
X * current->fs.
X */
+
X current->personality = personality;
- prev = current->exec_domain;
- current->exec_domain = it;
+ oep = current->exec_domain;
+ current->exec_domain = ep;
X set_fs_altroot();
- put_exec_domain(prev);
-}
X
-asmlinkage long sys_personality(unsigned long personality)
-{
- int ret = current->personality;
- if (personality != 0xffffffff) {
- set_personality(personality);
- if (current->personality != personality)
- ret = -EINVAL;
- }
- return ret;
+ put_exec_domain(oep);
+
+ printk(KERN_DEBUG "[%s:%d]: set personality to %lx\n",
+ current->comm, current->pid, personality);
+ return 0;
X }
X
-int get_exec_domain_list(char * page)
+int
+get_exec_domain_list(char *page)
X {
- int len = 0;
- struct exec_domain * e;
+ struct exec_domain *ep;
+ int len = 0;
X
X read_lock(&exec_domains_lock);
- for (e=exec_domains; e && len < PAGE_SIZE - 80; e=e->next)
- len += sprintf(page+len, "%d-%d\t%-16s\t[%s]\n",
- e->pers_low, e->pers_high, e->name,
- e->module ? e->module->name : "kernel");
+ for (ep = exec_domains; ep && len < PAGE_SIZE - 80; ep = ep->next)
+ len += sprintf(page + len, "%d-%d\t%-16s\t[%s]\n",
+ ep->pers_low, ep->pers_high, ep->name,
+ ep->module ? ep->module->name : "kernel");
X read_unlock(&exec_domains_lock);
- return len;
+ return (len);
X }
+
+asmlinkage long
+sys_personality(u_long personality)
+{
+ if (personality == 0xffffffff)
+ goto ret;
+ set_personality(personality);
+ if (current->personality != personality)
+ return -EINVAL;
+ret:
+ return (current->personality);
+}
+
+
+EXPORT_SYMBOL(register_exec_domain);
+EXPORT_SYMBOL(unregister_exec_domain);
+EXPORT_SYMBOL(__set_personality);
+
+/*
+ * We have to have all sysctl handling for the Linux-ABI
+ * in one place as the dynamic registration of sysctls is
+ * horribly crufty in Linux <= 2.4.
+ *
+ * I hope the new sysctl schemes discussed for future versions
+ * will obsolete this.
+ *
+ * --hch
+ */
+
+u_long abi_defhandler_coff = PER_SCOSVR3;
+u_long abi_defhandler_elf = PER_LINUX;
+u_long abi_defhandler_lcall7 = PER_SVR4;
+u_long abi_defhandler_libcso = PER_SVR4;
+u_int abi_traceflg;
+int abi_fake_utsname;
+
+static struct ctl_table abi_table[] = {
+ {ABI_DEFHANDLER_COFF, "defhandler_coff", &abi_defhandler_coff,
+ sizeof(int), 0644, NULL, &proc_doulongvec_minmax},
+ {ABI_DEFHANDLER_ELF, "defhandler_elf", &abi_defhandler_elf,
+ sizeof(int), 0644, NULL, &proc_doulongvec_minmax},
+ {ABI_DEFHANDLER_LCALL7, "defhandler_lcall7", &abi_defhandler_lcall7,
+ sizeof(int), 0644, NULL, &proc_doulongvec_minmax},
+ {ABI_DEFHANDLER_LIBCSO, "defhandler_libcso", &abi_defhandler_libcso,
+ sizeof(int), 0644, NULL, &proc_doulongvec_minmax},
+ {ABI_TRACE, "trace", &abi_traceflg,
+ sizeof(u_int), 0644, NULL, &proc_dointvec},
+ {ABI_FAKE_UTSNAME, "fake_utsname", &abi_fake_utsname,
+ sizeof(int), 0644, NULL, &proc_dointvec},
+ {0}
+};
+
+static struct ctl_table abi_root_table[] = {
+ {CTL_ABI, "abi", NULL, 0, 0555, abi_table},
+ {0}
+};
+
+static int __init
+abi_register_sysctl(void)
+{
+ register_sysctl_table(abi_root_table, 1);
+ return 0;
+}
+
+__initcall(abi_register_sysctl);
+
+
+EXPORT_SYMBOL(abi_defhandler_coff);
+EXPORT_SYMBOL(abi_defhandler_elf);
+EXPORT_SYMBOL(abi_defhandler_lcall7);
+EXPORT_SYMBOL(abi_defhandler_libcso);
+EXPORT_SYMBOL(abi_traceflg);
+EXPORT_SYMBOL(abi_fake_utsname);
diff -u --recursive --new-file v2.4.9/linux/kernel/exit.c linux/kernel/exit.c
--- v2.4.9/linux/kernel/exit.c Mon Aug 27 12:41:49 2001
+++ linux/kernel/exit.c Mon Sep 10 13:04:33 2001
@@ -10,6 +10,7 @@
X #include <linux/smp_lock.h>
X #include <linux/module.h>
X #include <linux/completion.h>
+#include <linux/personality.h>
X #include <linux/tty.h>
X #ifdef CONFIG_BSD_PROCESS_ACCT
X #include <linux/acct.h>
diff -u --recursive --new-file v2.4.9/linux/kernel/fork.c linux/kernel/fork.c
--- v2.4.9/linux/kernel/fork.c Wed Jul 25 17:10:26 2001
+++ linux/kernel/fork.c Mon Sep 17 21:46:04 2001
@@ -8,7 +8,7 @@
X * 'fork.c' contains the help-routines for the 'fork' system call
X * (see also entry.S and others).
X * Fork is rather simple, once you get the hang of it, but the memory
- * management can be a bitch. See 'mm/memory.c': 'copy_page_tables()'
+ * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
X */
X
X #include <linux/config.h>
@@ -19,6 +19,7 @@
X #include <linux/module.h>
X #include <linux/vmalloc.h>
X #include <linux/completion.h>
+#include <linux/personality.h>
X
X #include <asm/pgtable.h>
X #include <asm/pgalloc.h>
@@ -39,8 +40,8 @@
X {
X unsigned long flags;
X
- wq_write_lock_irqsave(&q->lock, flags);
X wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+ wq_write_lock_irqsave(&q->lock, flags);
X __add_wait_queue(q, wait);
X wq_write_unlock_irqrestore(&q->lock, flags);
X }
@@ -49,8 +50,8 @@
X {
X unsigned long flags;
X
- wq_write_lock_irqsave(&q->lock, flags);
X wait->flags |= WQ_FLAG_EXCLUSIVE;
+ wq_write_lock_irqsave(&q->lock, flags);
X __add_wait_queue_tail(q, wait);
X wq_write_unlock_irqrestore(&q->lock, flags);
X }
@@ -71,7 +72,7 @@
X * value: the thread structures can take up at most half
X * of memory.
X */
- max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 2;
+ max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8;
X
X init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
X init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
@@ -101,6 +102,7 @@
X for_each_task(p) {
X if(p->pid == last_pid ||
X p->pgrp == last_pid ||
+ p->tgid == last_pid ||
X p->session == last_pid) {
X if(++last_pid >= next_safe) {
X if(last_pid & 0xffff8000)
@@ -131,12 +133,24 @@
X flush_cache_mm(current->mm);
X mm->locked_vm = 0;
X mm->mmap = NULL;
- mm->mmap_avl = NULL;
X mm->mmap_cache = NULL;
X mm->map_count = 0;
+ mm->rss = 0;
X mm->cpu_vm_mask = 0;
X mm->swap_address = 0;
X pprev = &mm->mmap;
+
+ /*
+ * Add it to the mmlist after the parent.
+ * Doing it this way means that we can order the list,
+ * and fork() won't mess up the ordering significantly.
+ * Add it first so that swapoff can see any swap entries.
+ */
+ spin_lock(&mmlist_lock);
+ list_add(&mm->mmlist, &current->mm->mmlist);
+ mmlist_nr++;
+ spin_unlock(&mmlist_lock);
+
X for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
X struct file *file;
X
@@ -149,7 +163,6 @@
X *tmp = *mpnt;
X tmp->vm_flags &= ~VM_LOCKED;
X tmp->vm_mm = mm;
- mm->map_count++;
X tmp->vm_next = NULL;
X file = tmp->vm_file;
X if (file) {
@@ -168,24 +181,25 @@
X spin_unlock(&inode->i_mapping->i_shared_lock);
X }
X
- /* Copy the pages, but defer checking for errors */
- retval = copy_page_range(mm, current->mm, tmp);
- if (!retval && tmp->vm_ops && tmp->vm_ops->open)
- tmp->vm_ops->open(tmp);
-
X /*
- * Link in the new vma even if an error occurred,
- * so that exit_mmap() can clean up the mess.
+ * Link in the new vma and copy the page table entries:
+ * link in first so that swapoff can see swap entries.
X */
+ spin_lock(&mm->page_table_lock);
X *pprev = tmp;
X pprev = &tmp->vm_next;
+ mm->map_count++;
+ retval = copy_page_range(mm, current->mm, tmp);
+ spin_unlock(&mm->page_table_lock);
+
+ if (tmp->vm_ops && tmp->vm_ops->open)
+ tmp->vm_ops->open(tmp);
X
X if (retval)
X goto fail_nomem;
X }
X retval = 0;
- if (mm->map_count >= AVL_MIN_MAP_COUNT)
- build_mmap_avl(mm);
+ build_mmap_rb(mm);
X
X fail_nomem:
X flush_tlb_mm(current->mm);
@@ -246,6 +260,9 @@
X void mmput(struct mm_struct *mm)
X {
X if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
+ extern struct mm_struct *swap_mm;
+ if (swap_mm == mm)
+ swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
X list_del(&mm->mmlist);
X mmlist_nr--;
X spin_unlock(&mmlist_lock);
@@ -320,18 +337,6 @@
X retval = dup_mmap(mm);
X up_write(&oldmm->mmap_sem);
X
- /*
- * Add it to the mmlist after the parent.
- *
- * Doing it this way means that we can order
- * the list, and fork() won't mess up the
- * ordering significantly.
- */
- spin_lock(&mmlist_lock);
- list_add(&mm->mmlist, &oldmm->mmlist);
- mmlist_nr++;
- spin_unlock(&mmlist_lock);
-
X if (retval)
X goto free_pt;
X
@@ -643,6 +648,8 @@
X #endif
X p->lock_depth = -1; /* -1 = no lock */
X p->start_time = jiffies;
+
+ INIT_LIST_HEAD(&p->local_pages);
X
X retval = -ENOMEM;
X /* copy all the process information */
diff -u --recursive --new-file v2.4.9/linux/kernel/ksyms.c linux/kernel/ksyms.c
--- v2.4.9/linux/kernel/ksyms.c Mon Aug 27 12:41:49 2001
+++ linux/kernel/ksyms.c Mon Sep 17 15:28:32 2001
@@ -210,6 +210,7 @@
X EXPORT_SYMBOL(generic_file_read);
X EXPORT_SYMBOL(do_generic_file_read);
X EXPORT_SYMBOL(generic_file_write);
+EXPORT_SYMBOL(generic_direct_IO);
X EXPORT_SYMBOL(generic_file_mmap);
X EXPORT_SYMBOL(generic_ro_fops);
X EXPORT_SYMBOL(generic_buffer_fdatasync);
@@ -284,8 +285,6 @@
X EXPORT_SYMBOL(tty_std_termios);
X
X /* block device driver support */
-EXPORT_SYMBOL(block_read);
-EXPORT_SYMBOL(block_write);
X EXPORT_SYMBOL(blksize_size);
X EXPORT_SYMBOL(hardsect_size);
X EXPORT_SYMBOL(blk_size);
@@ -299,7 +298,6 @@
X EXPORT_SYMBOL(blkdev_get);
X EXPORT_SYMBOL(blkdev_put);
X EXPORT_SYMBOL(ioctl_by_bdev);
-EXPORT_SYMBOL(gendisk_head);
X EXPORT_SYMBOL(grok_partitions);
X EXPORT_SYMBOL(register_disk);
X EXPORT_SYMBOL(tq_disk);
@@ -307,7 +305,6 @@
X EXPORT_SYMBOL(refile_buffer);
X EXPORT_SYMBOL(max_sectors);
X EXPORT_SYMBOL(max_readahead);
-EXPORT_SYMBOL(file_moveto);
X
X /* tty routines */
X EXPORT_SYMBOL(tty_hangup);
@@ -317,8 +314,6 @@
X EXPORT_SYMBOL(tty_flip_buffer_push);
X EXPORT_SYMBOL(tty_get_baud_rate);
X EXPORT_SYMBOL(do_SAK);
-EXPORT_SYMBOL(console_print);
-EXPORT_SYMBOL(console_loglevel);
X
X /* filesystem registration */
X EXPORT_SYMBOL(register_filesystem);
@@ -336,11 +331,6 @@
X EXPORT_SYMBOL(remove_arg_zero);
X EXPORT_SYMBOL(set_binfmt);
X
-/* execution environment registration */
-EXPORT_SYMBOL(register_exec_domain);
-EXPORT_SYMBOL(unregister_exec_domain);
-EXPORT_SYMBOL(__set_personality);
-
X /* sysctl table registration */
X EXPORT_SYMBOL(register_sysctl_table);
X EXPORT_SYMBOL(unregister_sysctl_table);
@@ -434,6 +424,7 @@
X /* process management */
X EXPORT_SYMBOL(complete_and_exit);
X EXPORT_SYMBOL(__wake_up);
+EXPORT_SYMBOL(__wake_up_sync);
X EXPORT_SYMBOL(wake_up_process);
X EXPORT_SYMBOL(sleep_on);
X EXPORT_SYMBOL(sleep_on_timeout);
@@ -455,9 +446,12 @@
X
X /* misc */
X EXPORT_SYMBOL(panic);
-EXPORT_SYMBOL(printk);
X EXPORT_SYMBOL(sprintf);
+EXPORT_SYMBOL(snprintf);
+EXPORT_SYMBOL(sscanf);
X EXPORT_SYMBOL(vsprintf);
+EXPORT_SYMBOL(vsnprintf);
+EXPORT_SYMBOL(vsscanf);
X EXPORT_SYMBOL(kdevname);
X EXPORT_SYMBOL(bdevname);
X EXPORT_SYMBOL(cdevname);
@@ -475,6 +469,7 @@
X EXPORT_SYMBOL(get_random_bytes);
X EXPORT_SYMBOL(securebits);
X EXPORT_SYMBOL(cap_bset);
+EXPORT_SYMBOL(reparent_to_init);
X EXPORT_SYMBOL(daemonize);
X EXPORT_SYMBOL(csum_partial); /* for networking and md */
X
@@ -493,6 +488,7 @@
X EXPORT_SYMBOL(sys_tz);
X EXPORT_SYMBOL(file_fsync);
X EXPORT_SYMBOL(fsync_inode_buffers);
+EXPORT_SYMBOL(fsync_inode_data_buffers);
X EXPORT_SYMBOL(clear_inode);
X EXPORT_SYMBOL(nr_async_pages);
X EXPORT_SYMBOL(___strtok);
@@ -523,10 +519,6 @@
X
X /* binfmt_aout */
X EXPORT_SYMBOL(get_write_access);
-
-/* dynamic registering of consoles */
-EXPORT_SYMBOL(register_console);
-EXPORT_SYMBOL(unregister_console);
X
X /* time */
X EXPORT_SYMBOL(get_fast_time);
diff -u --recursive --new-file v2.4.9/linux/kernel/module.c linux/kernel/module.c
--- v2.4.9/linux/kernel/module.c Mon Aug 27 12:41:49 2001
+++ linux/kernel/module.c Thu Sep 13 16:33:03 2001
@@ -246,9 +246,7 @@
X {
X kernel_module.nsyms = __stop___ksymtab - __start___ksymtab;
X
-#ifdef __alpha__
- __asm__("stq $29,%0" : "=m"(kernel_module.gp));
-#endif
+ arch_init_modules(&kernel_module);
X }
X
X /*
@@ -440,12 +438,6 @@
X printk(KERN_ERR "init_module: mod->flags invalid.\n");
X goto err2;
X }
-#ifdef __alpha__
- if (!mod_bound(mod->gp - 0x8000, 0, mod)) {
- printk(KERN_ERR "init_module: mod->gp out of bounds.\n");
- goto err2;
- }
-#endif
X if (mod_member_present(mod, can_unload)
X && mod->can_unload && !mod_bound(mod->can_unload, 0, mod)) {
X printk(KERN_ERR "init_module: mod->can_unload out of bounds.\n");
diff -u --recursive --new-file v2.4.9/linux/kernel/panic.c linux/kernel/panic.c
--- v2.4.9/linux/kernel/panic.c Mon Oct 16 12:58:51 2000
+++ linux/kernel/panic.c Sun Sep 16 21:22:40 2001
@@ -18,7 +18,6 @@
X #include <linux/interrupt.h>
X
X asmlinkage void sys_sync(void); /* it's really int */
-extern void unblank_console(void);
X
X int panic_timeout;
X
@@ -36,9 +35,8 @@
X * panic - halt the system
X * @fmt: The text string to print
X *
- * Display a message, then unblank the console and perform
- * cleanups. Functions in the panic notifier list are called
- * after the filesystem cache is flushed (when possible).
+ * Display a message, then perform cleanups. Functions in the panic
+ * notifier list are called after the filesystem cache is flushed (when possible).
X *
X * This function never returns.
X */
@@ -51,6 +49,7 @@
X unsigned long caller = (unsigned long) __builtin_return_address(0);
X #endif
X
+ bust_spinlocks(1);
X va_start(args, fmt);
X vsprintf(buf, fmt, args);
X va_end(args);
@@ -61,8 +60,7 @@
X printk(KERN_EMERG "In idle task - not syncing\n");
X else
X sys_sync();
-
- unblank_console();
+ bust_spinlocks(0);
X
X #ifdef CONFIG_SMP
X smp_send_stop();
diff -u --recursive --new-file v2.4.9/linux/kernel/printk.c linux/kernel/printk.c
--- v2.4.9/linux/kernel/printk.c Tue Feb 13 13:15:05 2001
+++ linux/kernel/printk.c Mon Sep 17 13:16:30 2001
@@ -12,6 +12,8 @@
X * Modified for sysctl support, 1/8/97, Chris Horn.
X * Fixed SMP synchronization, 08/08/99, Manfred Spraul
X * manf...@colorfullife.com
+ * Rewrote bits to get rid of console_lock
+ * 01Mar01 Andrew Morton <and...@uow.edu.au>
X */
X
X #include <linux/mm.h>
@@ -20,14 +22,14 @@
X #include <linux/smp_lock.h>
X #include <linux/console.h>
X #include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h> /* For in_interrupt() */
X
X #include <asm/uaccess.h>
X
-#define LOG_BUF_LEN (16384)
+#define LOG_BUF_LEN (16384) /* This must be a power of two */
X #define LOG_BUF_MASK (LOG_BUF_LEN-1)
X
-static char buf[1024];
-
X /* printk's without a loglevel use this.. */
X #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
X
@@ -35,7 +37,6 @@
X #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
X #define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
X
-unsigned long log_size;
X DECLARE_WAIT_QUEUE_HEAD(log_wait);
X
X /* Keep together for sysctl support */
@@ -44,15 +45,41 @@
X int minimum_console_loglevel = MINIMUM_CONSOLE_LOGLEVEL;
X int default_console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
X
-spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
+int oops_in_progress;
X
+/*
+ * console_sem protects the console_drivers list, and also
+ * provides serialisation for access to the entire console
+ * driver system.
+ */
+static DECLARE_MUTEX(console_sem);
X struct console *console_drivers;
+
+/*
+ * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
+ * It is also used in interesting ways to provide interlocking in
+ * release_console_sem().
+ */
+static spinlock_t logbuf_lock = SPIN_LOCK_UNLOCKED;
+
X static char log_buf[LOG_BUF_LEN];
-static unsigned long log_start;
-static unsigned long logged_chars;
+#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
+
+/*
+ * The indices into log_buf are not constrained to LOG_BUF_LEN - they
+ * must be masked before subscripting
+ */
+static unsigned long log_start; /* Index into log_buf: next char to be read by syslog() */
+static unsigned long con_start; /* Index into log_buf: next char to be sent to consoles */
+static unsigned long log_end; /* Index into log_buf: most-recently-written-char + 1 */
+static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */
+
X struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
X static int preferred_console = -1;
X
+/* Flag: console code may call schedule() */
+static int console_may_schedule;
+
X /*
X * Setup a list of consoles. Called from init/main.c
X */
@@ -120,6 +147,7 @@
X * 6 -- Disable printk's to console
X * 7 -- Enable printk's to console
X * 8 -- Set level of messages printed to console
+ * 9 -- Return number of unread characters in the log buffer
X */
X int do_syslog(int type, char * buf, int len)
X {
@@ -143,22 +171,21 @@
X error = verify_area(VERIFY_WRITE,buf,len);
X if (error)
X goto out;
- error = wait_event_interruptible(log_wait, log_size);
+ error = wait_event_interruptible(log_wait, (log_start - log_end));
X if (error)
X goto out;
X i = 0;
- spin_lock_irq(&console_lock);
- while (log_size && i < len) {
- c = log_buf[log_start & LOG_BUF_MASK];
+ spin_lock_irq(&logbuf_lock);
+ while ((log_start != log_end) && i < len) {
+ c = LOG_BUF(log_start);
X log_start++;
- log_size--;
- spin_unlock_irq(&console_lock);
+ spin_unlock_irq(&logbuf_lock);
X __put_user(c,buf);
X buf++;
X i++;
- spin_lock_irq(&console_lock);
+ spin_lock_irq(&logbuf_lock);
X }
- spin_unlock_irq(&console_lock);
+ spin_unlock_irq(&logbuf_lock);
X error = i;
X break;
X case 4: /* Read/clear last kernel messages */
@@ -177,12 +204,12 @@
X count = len;
X if (count > LOG_BUF_LEN)
X count = LOG_BUF_LEN;
- spin_lock_irq(&console_lock);
+ spin_lock_irq(&logbuf_lock);
X if (count > logged_chars)
X count = logged_chars;
X if (do_clear)
X logged_chars = 0;
- limit = log_start + log_size;
+ limit = log_end;
X /*
X * __put_user() could sleep, and while we sleep
X * printk() could overwrite the messages
@@ -191,14 +218,14 @@
X */
X for(i=0;i < count;i++) {
X j = limit-1-i;
- if (j+LOG_BUF_LEN < log_start+log_size)
+ if (j+LOG_BUF_LEN < log_end)
X break;
- c = log_buf[ j & LOG_BUF_MASK ];
- spin_unlock_irq(&console_lock);
+ c = LOG_BUF(j);
+ spin_unlock_irq(&logbuf_lock);
X __put_user(c,&buf[count-1-i]);
- spin_lock_irq(&console_lock);
+ spin_lock_irq(&logbuf_lock);
X }
- spin_unlock_irq(&console_lock);
+ spin_unlock_irq(&logbuf_lock);
X error = i;
X if(i != count) {
X int offset = count-error;
@@ -211,31 +238,36 @@
X
X break;
X case 5: /* Clear ring buffer */
- spin_lock_irq(&console_lock);
+ spin_lock_irq(&logbuf_lock);
X logged_chars = 0;
- spin_unlock_irq(&console_lock);
+ spin_unlock_irq(&logbuf_lock);
X break;
X case 6: /* Disable logging to console */
SHAR_EOF
true || echo 'restore of patch-2.4.10 failed'
fi
echo 'End of part 189'
echo 'File patch-2.4.10 is continued in part 190'
echo "190" > _shar_seq_.tmp
exit 0

Thomas Kobienia

unread,

Sep 24, 2001, 8:00:53 PM9/24/01

Archive-name: v2.4/patch-2.4.10/part190

#!/bin/sh -x
# this is part 190 of a 197 - part archive

# do not concatenate these parts, unpack them in order with /bin/sh
# file patch-2.4.10 continued
if test ! -r _shar_seq_.tmp; then
echo 'Please unpack part 1 first!'
exit 1
fi
(read Scheck

if test "$Scheck" != 190; then

echo "Please unpack part $Scheck next!"
exit 1
else
exit 0
fi
) < _shar_seq_.tmp || exit 1
if test ! -f _shar_wnt_.tmp; then
echo 'x - still skipping patch-2.4.10'
else
echo 'x - continuing with patch-2.4.10'
sed 's/^X//' << 'SHAR_EOF' >> 'patch-2.4.10' &&

- spin_lock_irq(&console_lock);
+ spin_lock_irq(&logbuf_lock);
X console_loglevel = minimum_console_loglevel;

- spin_unlock_irq(&console_lock);
+ spin_unlock_irq(&logbuf_lock);
X break;

X case 7: /* Enable logging to console */
- spin_lock_irq(&console_lock);
+ spin_lock_irq(&logbuf_lock);
X console_loglevel = default_console_loglevel;

- spin_unlock_irq(&console_lock);
+ spin_unlock_irq(&logbuf_lock);
X break;

- case 8:
+ case 8: /* Set level of messages printed to console */
X error = -EINVAL;
X if (len < 1 || len > 8)
X goto out;
X if (len < minimum_console_loglevel)
X len = minimum_console_loglevel;
- spin_lock_irq(&console_lock);
+ spin_lock_irq(&logbuf_lock);
X console_loglevel = len;
- spin_unlock_irq(&console_lock);
+ spin_unlock_irq(&logbuf_lock);
X error = 0;
X break;
+ case 9: /* Number of chars in the log buffer */
+ spin_lock_irq(&logbuf_lock);
+ error = log_end - log_start;
+ spin_unlock_irq(&logbuf_lock);
+ break;
X default:
X error = -EINVAL;
X break;
@@ -251,98 +283,250 @@
X return do_syslog(type, buf, len);
X }
X
-asmlinkage int printk(const char *fmt, ...)
+/*
+ * Call the console drivers on a range of log_buf
+ */
+static void __call_console_drivers(unsigned long start, unsigned long end)
X {
- va_list args;
- int i;
- char *msg, *p, *buf_end;
- int line_feed;
- static signed char msg_level = -1;
- long flags;
+ struct console *con;
X
- spin_lock_irqsave(&console_lock, flags);
- va_start(args, fmt);
- i = vsprintf(buf + 3, fmt, args); /* hopefully i < sizeof(buf)-4 */
- buf_end = buf + 3 + i;
- va_end(args);
- for (p = buf + 3; p < buf_end; p++) {
- msg = p;
- if (msg_level < 0) {
- if (
- p[0] != '<' ||
- p[1] < '0' ||
- p[1] > '7' ||
- p[2] != '>'
- ) {
- p -= 3;
- p[0] = '<';
- p[1] = default_message_loglevel + '0';
- p[2] = '>';
- } else
- msg += 3;
- msg_level = p[1] - '0';
+ for (con = console_drivers; con; con = con->next) {
+ if ((con->flags & CON_ENABLED) && con->write)
+ con->write(con, &LOG_BUF(start), end - start);
+ }
+}
+
+/*
+ * Write out chars from start to end - 1 inclusive
+ */
+static void _call_console_drivers(unsigned long start, unsigned long end, int msg_log_level)
+{
+ if (msg_log_level < console_loglevel && console_drivers && start != end) {
+ if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
+ /* wrapped write */
+ __call_console_drivers(start & LOG_BUF_MASK, LOG_BUF_LEN);
+ __call_console_drivers(0, end & LOG_BUF_MASK);
+ } else {
+ __call_console_drivers(start, end);
X }
- line_feed = 0;
- for (; p < buf_end; p++) {
- log_buf[(log_start+log_size) & LOG_BUF_MASK] = *p;
- if (log_size < LOG_BUF_LEN)
- log_size++;
- else
- log_start++;
-
- logged_chars++;
- if (*p == '\n') {
- line_feed = 1;
+ }
+}
+
+/*
+ * Call the console drivers, asking them to write out
+ * log_buf[start] to log_buf[end - 1].
+ * The console_sem must be held.
+ */
+static void call_console_drivers(unsigned long start, unsigned long end)
+{
+ unsigned long cur_index, start_print;
+ static int msg_level = -1;
+
+ if (((long)(start - end)) > 0)
+ BUG();
+
+ cur_index = start;
+ start_print = start;
+ while (cur_index != end) {
+ if ( msg_level < 0 &&
+ ((end - cur_index) > 2) &&
+ LOG_BUF(cur_index + 0) == '<' &&
+ LOG_BUF(cur_index + 1) >= '0' &&
+ LOG_BUF(cur_index + 1) <= '7' &&
+ LOG_BUF(cur_index + 2) == '>')
+ {
+ msg_level = LOG_BUF(cur_index + 1) - '0';
+ cur_index += 3;
+ start_print = cur_index;
+ }
+ while (cur_index != end) {
+ char c = LOG_BUF(cur_index);
+ cur_index++;
+
+ if (c == '\n') {
+ if (msg_level < 0) {
+ /*
+ * printk() has already given us loglevel tags in
+ * the buffer. This code is here in case the
+ * log buffer has wrapped right round and scribbled
+ * on those tags
+ */
+ msg_level = default_message_loglevel;
+ }
+ _call_console_drivers(start_print, cur_index, msg_level);
+ msg_level = -1;
+ start_print = cur_index;
X break;
X }
X }
- if (msg_level < console_loglevel && console_drivers) {
- struct console *c = console_drivers;
- while(c) {
- if ((c->flags & CON_ENABLED) && c->write)
- c->write(c, msg, p - msg + line_feed);
- c = c->next;
+ }
+ _call_console_drivers(start_print, end, msg_level);
+}
+
+static void emit_log_char(char c)
+{
+ LOG_BUF(log_end) = c;
+ log_end++;
+ if (log_end - log_start > LOG_BUF_LEN)
+ log_start = log_end - LOG_BUF_LEN;
+ if (log_end - con_start > LOG_BUF_LEN)
+ con_start = log_end - LOG_BUF_LEN;
+ if (logged_chars < LOG_BUF_LEN)
+ logged_chars++;
+}
+
+/*
+ * This is printk. It can be called from any context. We want it to work.
+ *
+ * We try to grab the console_sem. If we succeed, it's easy - we log the output and
+ * call the console drivers. If we fail to get the semaphore we place the output
+ * into the log buffer and return. The current holder of the console_sem will
+ * notice the new output in release_console_sem() and will send it to the
+ * consoles before releasing the semaphore.
+ *
+ * One effect of this deferred printing is that code which calls printk() and
+ * then changes console_loglevel may break. This is because console_loglevel
+ * is inspected when the actual printing occurs.
+ */
+asmlinkage int printk(const char *fmt, ...)
+{
+ va_list args;
+ unsigned long flags;
+ int printed_len;
+ char *p;
+ static char printk_buf[1024];
+ static int log_level_unknown = 1;
+
+ if (oops_in_progress) {
+ /* If a crash is occurring, make sure we can't deadlock */
+ spin_lock_init(&logbuf_lock);
+ /* And make sure that we print immediately */
+ init_MUTEX(&console_sem);
+ }
+
+ /* This stops the holder of console_sem just where we want him */
+ spin_lock_irqsave(&logbuf_lock, flags);
+
+ /* Emit the output into the temporary buffer */
+ va_start(args, fmt);
+ printed_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args);
+ va_end(args);
+
+ /*
+ * Copy the output into log_buf. If the caller didn't provide
+ * appropriate log level tags, we insert them here
+ */
+ for (p = printk_buf; *p; p++) {
+ if (log_level_unknown) {
+ if (p[0] != '<' || p[1] < '0' || p[1] > '7' || p[2] != '>') {
+ emit_log_char('<');
+ emit_log_char(default_message_loglevel + '0');
+ emit_log_char('>');
X }
+ log_level_unknown = 0;
X }
- if (line_feed)
- msg_level = -1;
+ emit_log_char(*p);
+ if (*p == '\n')
+ log_level_unknown = 1;
+ }
+
+ if (!down_trylock(&console_sem)) {
+ /*
+ * We own the drivers. We can drop the spinlock and let
+ * release_console_sem() print the text
+ */
+ spin_unlock_irqrestore(&logbuf_lock, flags);
+ console_may_schedule = 0;
+ release_console_sem();
+ } else {
+ /*
+ * Someone else owns the drivers. We drop the spinlock, which
+ * allows the semaphore holder to proceed and to call the
+ * console drivers with the output which we just produced.
+ */
+ spin_unlock_irqrestore(&logbuf_lock, flags);
X }
- spin_unlock_irqrestore(&console_lock, flags);
- wake_up_interruptible(&log_wait);
- return i;
+ return printed_len;
X }
+EXPORT_SYMBOL(printk);
X
-void console_print(const char *s)
+/**
+ * acquire_console_sem - lock the console system for exclusive use.
+ *
+ * Acquires a semaphore which guarantees that the caller has
+ * exclusive access to the console system and the console_drivers list.
+ *
+ * Can sleep, returns nothing.
+ */
+void acquire_console_sem(void)
+{
+ if (in_interrupt())
+ BUG();
+ down(&console_sem);
+ console_may_schedule = 1;
+}
+EXPORT_SYMBOL(acquire_console_sem);
+
+/**
+ * release_console_sem - unlock the console system
+ *
+ * Releases the semaphore which the caller holds on the console system
+ * and the console driver list.
+ *
+ * While the semaphore was held, console output may have been buffered
+ * by printk(). If this is the case, release_console_sem() emits
+ * the output prior to releasing the semaphore.
+ *
+ * If there is output waiting for klogd, we wake it up.
+ *
+ * release_console_sem() may be called from any context.
+ */
+void release_console_sem(void)
X {
- struct console *c;
X unsigned long flags;
- int len = strlen(s);
+ unsigned long _con_start, _log_end;
+ unsigned long must_wake_klogd = 0;
X
- spin_lock_irqsave(&console_lock, flags);
- c = console_drivers;
- while(c) {
- if ((c->flags & CON_ENABLED) && c->write)
- c->write(c, s, len);
- c = c->next;
+ for ( ; ; ) {
+ spin_lock_irqsave(&logbuf_lock, flags);
+ must_wake_klogd |= log_start - log_end;
+ if (con_start == log_end)
+ break; /* Nothing to print */
+ _con_start = con_start;
+ _log_end = log_end;
+ con_start = log_end; /* Flush */
+ spin_unlock_irqrestore(&logbuf_lock, flags);
+ call_console_drivers(_con_start, _log_end);
X }
- spin_unlock_irqrestore(&console_lock, flags);
+ console_may_schedule = 0;
+ up(&console_sem);
+ spin_unlock_irqrestore(&logbuf_lock, flags);
+ if (must_wake_klogd && !oops_in_progress)
+ wake_up_interruptible(&log_wait);
X }
X
-void unblank_console(void)
+/** console_conditional_schedule - yield the CPU if required
+ *
+ * If the console code is currently allowed to sleep, and
+ * if this CPU should yield the CPU to another task, do
+ * so here.
+ *
+ * Must be called within acquire_console_sem().
+ */
+void console_conditional_schedule(void)
X {
- struct console *c;
- unsigned long flags;
-
- spin_lock_irqsave(&console_lock, flags);
- c = console_drivers;
- while(c) {
- if ((c->flags & CON_ENABLED) && c->unblank)
- c->unblank();
- c = c->next;
+ if (console_may_schedule && current->need_resched) {
+ set_current_state(TASK_RUNNING);
+ schedule();
X }
- spin_unlock_irqrestore(&console_lock, flags);
X }
X
+void console_print(const char *s)
+{
+ printk(KERN_EMERG "%s", s);
+}
+EXPORT_SYMBOL(console_print);
+
X /*
X * The console driver calls this routine during kernel initialization
X * to register the console printing procedure with printk() and to
@@ -351,11 +535,7 @@
X */
X void register_console(struct console * console)
X {
- int i, j,len;
- int p;
- char buf[16];
- signed char msg_level = -1;
- char *q;
+ int i;

X unsigned long flags;
X

X /*
@@ -402,7 +582,7 @@
X * Put this console in the list - keep the
X * preferred driver at the head of the list.
X */
- spin_lock_irqsave(&console_lock, flags);
+ acquire_console_sem();
X if ((console->flags & CON_CONSDEV) || console_drivers == NULL) {
X console->next = console_drivers;
X console_drivers = console;
@@ -410,57 +590,28 @@
X console->next = console_drivers->next;
X console_drivers->next = console;
X }
- if ((console->flags & CON_PRINTBUFFER) == 0)
- goto done;
- /*
- * Print out buffered log messages.
- */
- p = log_start & LOG_BUF_MASK;
-
- for (i=0,j=0; i < log_size; i++) {
- buf[j++] = log_buf[p];
- p = (p+1) & LOG_BUF_MASK;
- if (buf[j-1] != '\n' && i < log_size - 1 && j < sizeof(buf)-1)
- continue;
- buf[j] = 0;
- q = buf;
- len = j;
- if (msg_level < 0) {
- if(buf[0] == '<' &&
- buf[1] >= '0' &&
- buf[1] <= '7' &&
- buf[2] == '>') {
- msg_level = buf[1] - '0';
- q = buf + 3;
- len -= 3;
- } else
- {
- msg_level = default_message_loglevel;
- }
- }
- if (msg_level < console_loglevel)
- console->write(console, q, len);
- if (buf[j-1] == '\n')
- msg_level = -1;
- j = 0;
+ if (console->flags & CON_PRINTBUFFER) {
+ /*
+ * release_cosole_sem() will print out the buffered messages for us.
+ */
+ spin_lock_irqsave(&logbuf_lock, flags);
+ con_start = log_start;
+ spin_unlock_irqrestore(&logbuf_lock, flags);
X }
-done:
- spin_unlock_irqrestore(&console_lock, flags);
+ release_console_sem();
X }
-
+EXPORT_SYMBOL(register_console);
X
X int unregister_console(struct console * console)
X {
X struct console *a,*b;
- unsigned long flags;
X int res = 1;
X
- spin_lock_irqsave(&console_lock, flags);
+ acquire_console_sem();
X if (console_drivers == console) {
X console_drivers=console->next;
X res = 0;
- } else
- {
+ } else {
X for (a=console_drivers->next, b=console_drivers ;
X a; b=a, a=b->next) {
X if (a == console) {
@@ -479,13 +630,15 @@
X preferred_console = -1;
X
X
- spin_unlock_irqrestore(&console_lock, flags);
+ release_console_sem();
X return res;
X }
+EXPORT_SYMBOL(unregister_console);
X
-/*
- * Write a message to a certain tty, not just the console. This is used for
- * messages that need to be redirected to a specific tty.
+/**
+ * tty_write_message - write a message to a certain tty, not just the console.
+ *
+ * This is used for messages that need to be redirected to a specific tty.
X * We don't put it into the syslog queue right now maybe in the future if
X * really needed.
X */
diff -u --recursive --new-file v2.4.9/linux/kernel/ptrace.c linux/kernel/ptrace.c
--- v2.4.9/linux/kernel/ptrace.c Wed Jul 25 17:10:26 2001
+++ linux/kernel/ptrace.c Tue Sep 18 16:32:16 2001
@@ -42,6 +42,8 @@
X
X /* Go */
X task->ptrace |= PT_PTRACED;
+ if (capable(CAP_SYS_PTRACE))
+ task->ptrace |= PT_PTRACE_CAP;
X task_unlock(task);
X
X write_lock_irq(&tasklist_lock);
@@ -60,6 +62,27 @@
X return -EPERM;
X }
X
+int ptrace_detach(struct task_struct *child, unsigned int data)
+{
+ if ((unsigned long) data > _NSIG)
+ return -EIO;
+
+ /* Architecture-specific hardware disable .. */
+ ptrace_disable(child);
+
+ /* .. re-parent .. */
+ child->ptrace = 0;
+ child->exit_code = data;
+ write_lock_irq(&tasklist_lock);
+ REMOVE_LINKS(child);
+ child->p_pptr = child->p_opptr;
+ SET_LINKS(child);
+ write_unlock_irq(&tasklist_lock);
+
+ /* .. and wake it up. */
+ wake_up_process(child);
+ return 0;
+}
X
X /*
X * Access another process' address space, one page at a time.
diff -u --recursive --new-file v2.4.9/linux/kernel/sched.c linux/kernel/sched.c
--- v2.4.9/linux/kernel/sched.c Wed Jul 25 17:10:26 2001
+++ linux/kernel/sched.c Mon Sep 17 23:03:09 2001
@@ -23,9 +23,11 @@
X #include <linux/mm.h>
X #include <linux/init.h>
X #include <linux/smp_lock.h>
+#include <linux/nmi.h>
X #include <linux/interrupt.h>
X #include <linux/kernel_stat.h>
X #include <linux/completion.h>
+#include <linux/prefetch.h>
X
X #include <asm/uaccess.h>
X #include <asm/mmu_context.h>
@@ -107,6 +109,7 @@
X #define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
X
X struct kernel_stat kstat;
+extern struct task_struct *child_reaper;

X
X #ifdef CONFIG_SMP
X

@@ -245,7 +248,7 @@
X */
X oldest_idle = (cycles_t) -1;
X target_tsk = NULL;
- max_prio = 1;
+ max_prio = 0;
X
X for (i = 0; i < smp_num_cpus; i++) {
X cpu = cpu_logical_map(i);
@@ -291,7 +294,7 @@
X struct task_struct *tsk;
X
X tsk = cpu_curr(this_cpu);
- if (preemption_goodness(tsk, p, this_cpu) > 1)
+ if (preemption_goodness(tsk, p, this_cpu) > 0)
X tsk->need_resched = 1;
X #endif
X }
@@ -534,6 +537,9 @@
X struct list_head *tmp;
X int this_cpu, c;
X
+
+ spin_lock_prefetch(&runqueue_lock);
+
X if (!current->active_mm) BUG();
X need_resched_back:
X prev = current;
@@ -719,18 +725,16 @@
X static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
X int nr_exclusive, const int sync)
X {
- struct list_head *tmp, *head;
+ struct list_head *tmp;
X struct task_struct *p;
X
X CHECK_MAGIC_WQHEAD(q);
- head = &q->task_list;
- WQ_CHECK_LIST_HEAD(head);
- tmp = head->next;
- while (tmp != head) {
+ WQ_CHECK_LIST_HEAD(&q->task_list);
+
+ list_for_each(tmp,&q->task_list) {
X unsigned int state;
X wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
X
- tmp = tmp->next;
X CHECK_MAGIC(curr->__magic);
X p = curr->task;
X state = p->state;
@@ -1050,7 +1054,7 @@
X #if CONFIG_SMP
X int i;
X
- // Substract non-idle processes running on other CPUs.
+ // Subtract non-idle processes running on other CPUs.
X for (i = 0; i < smp_num_cpus; i++) {
X int cpu = cpu_logical_map(i);
X if (aligned_data[cpu].schedule_data.curr != idle_task(cpu))
@@ -1172,7 +1176,7 @@
X else
X printk(" (NOTLB)\n");
X
-#if defined(CONFIG_X86) || defined(CONFIG_SPARC64) || defined(CONFIG_ARM)
+#if defined(CONFIG_X86) || defined(CONFIG_SPARC64) || defined(CONFIG_ARM) || defined(CONFIG_ALPHA)
X /* This is very useful, but only works on ARM, x86 and sparc64 right now */
X {
X extern void show_trace_task(struct task_struct *tsk);
@@ -1210,9 +1214,68 @@
X printk(" task PC stack pid father child younger older\n");
X #endif
X read_lock(&tasklist_lock);
- for_each_task(p)
+ for_each_task(p) {
+ /*
+ * reset the NMI-timeout, listing all files on a slow
+ * console might take alot of time:
+ */
+ touch_nmi_watchdog();
X show_task(p);
+ }
X read_unlock(&tasklist_lock);
+}
+
+/**
+ * reparent_to_init() - Reparent the calling kernel thread to the init task.
+ *
+ * If a kernel thread is launched as a result of a system call, or if
+ * it ever exits, it should generally reparent itself to init so that
+ * it is correctly cleaned up on exit.
+ *
+ * The various task state such as scheduling policy and priority may have
+ * been inherited fro a user process, so we reset them to sane values here.
+ *
+ * NOTE that reparent_to_init() gives the caller full capabilities.
+ */
+void reparent_to_init(void)
+{
+ struct task_struct *this_task = current;
+
+ write_lock_irq(&tasklist_lock);
+
+ /* Reparent to init */
+ REMOVE_LINKS(this_task);
+ this_task->p_pptr = child_reaper;
+ this_task->p_opptr = child_reaper;
+ SET_LINKS(this_task);
+
+ /* Set the exit signal to SIGCHLD so we signal init on exit */
+ if (this_task->exit_signal != 0) {
+ printk(KERN_ERR "task `%s' exit_signal %d in "
+ __FUNCTION__ "\n",
+ this_task->comm, this_task->exit_signal);
+ }
+ this_task->exit_signal = SIGCHLD;
+
+ /* We also take the runqueue_lock while altering task fields
+ * which affect scheduling decisions */
+ spin_lock(&runqueue_lock);
+
+ this_task->ptrace = 0;
+ this_task->nice = DEF_NICE;
+ this_task->policy = SCHED_OTHER;
+ /* cpus_allowed? */
+ /* rt_priority? */
+ /* signals? */
+ this_task->cap_effective = CAP_INIT_EFF_SET;
+ this_task->cap_inheritable = CAP_INIT_INH_SET;
+ this_task->cap_permitted = CAP_FULL_SET;
+ this_task->keep_capabilities = 0;
+ memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim)));
+ this_task->user = INIT_USER;
+
+ spin_unlock(&runqueue_lock);
+ write_unlock_irq(&tasklist_lock);
X }
X
X /*
diff -u --recursive --new-file v2.4.9/linux/kernel/signal.c linux/kernel/signal.c
--- v2.4.9/linux/kernel/signal.c Wed Jan 3 20:45:26 2001
+++ linux/kernel/signal.c Mon Sep 17 16:40:01 2001
@@ -242,16 +242,16 @@
X #endif
X
X sig = next_signal(current, mask);
- if (current->notifier) {
- if (sigismember(current->notifier_mask, sig)) {
- if (!(current->notifier)(current->notifier_data)) {
- current->sigpending = 0;
- return 0;
+ if (sig) {
+ if (current->notifier) {
+ if (sigismember(current->notifier_mask, sig)) {
+ if (!(current->notifier)(current->notifier_data)) {
+ current->sigpending = 0;
+ return 0;
+ }
X }
X }
- }
X
- if (sig) {
X if (!collect_signal(sig, &current->pending, info))
X sig = 0;
X
@@ -467,11 +467,6 @@
X {
X t->sigpending = 1;
X
- if (t->state & TASK_INTERRUPTIBLE) {
- wake_up_process(t);
- return;
- }
-
X #ifdef CONFIG_SMP
X /*
X * If the task is running on a different CPU
@@ -488,6 +483,11 @@
X smp_send_reschedule(t->processor);
X spin_unlock(&runqueue_lock);
X #endif /* CONFIG_SMP */
+
+ if (t->state & TASK_INTERRUPTIBLE) {
+ wake_up_process(t);
+ return;
+ }
X }
X
X static int deliver_signal(int sig, struct siginfo *info, struct task_struct *t)
@@ -544,8 +544,6 @@
X ret = deliver_signal(sig, info, t);
X out:
X spin_unlock_irqrestore(&t->sigmask_lock, flags);
- if ((t->state & TASK_INTERRUPTIBLE) && signal_pending(t))
- wake_up_process(t);
X out_nolock:
X #if DEBUG_SIG
X printk(" %d -> %d\n", signal_pending(t), ret);
diff -u --recursive --new-file v2.4.9/linux/kernel/softirq.c linux/kernel/softirq.c
--- v2.4.9/linux/kernel/softirq.c Sun Aug 12 13:28:01 2001
+++ linux/kernel/softirq.c Sat Sep 8 12:02:32 2001
@@ -108,6 +108,9 @@
X local_irq_restore(flags);
X }
X
+/*
+ * This function must run with irq disabled!
+ */
X inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
X {
X __cpu_raise_softirq(cpu, nr);
@@ -127,7 +130,11 @@
X
X void raise_softirq(unsigned int nr)
X {
+ long flags;
+
+ local_irq_save(flags);
X cpu_raise_softirq(smp_processor_id(), nr);
+ local_irq_restore(flags);
X }
X
X void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
@@ -195,8 +202,8 @@
X local_irq_disable();
X t->next = tasklet_vec[cpu].list;
X tasklet_vec[cpu].list = t;
- local_irq_enable();
X __cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+ local_irq_enable();
X }
X }
X
@@ -229,8 +236,8 @@
X local_irq_disable();
X t->next = tasklet_hi_vec[cpu].list;
X tasklet_hi_vec[cpu].list = t;
- local_irq_enable();
X __cpu_raise_softirq(cpu, HI_SOFTIRQ);
+ local_irq_enable();
X }
X }
X
diff -u --recursive --new-file v2.4.9/linux/kernel/sys.c linux/kernel/sys.c
--- v2.4.9/linux/kernel/sys.c Sun Aug 12 13:28:01 2001
+++ linux/kernel/sys.c Tue Sep 18 14:10:43 2001
@@ -39,6 +39,7 @@
X */
X
X int C_A_D = 1;
+int cad_pid = 1;
X
X
X /*
@@ -350,7 +351,7 @@
X if (C_A_D)
X schedule_task(&cad_tq);
X else
- kill_proc(1, SIGINT, 1);
+ kill_proc(cad_pid, SIGINT, 1);
X }
X
X
@@ -1208,7 +1209,7 @@
X switch (option) {
X case PR_SET_PDEATHSIG:
X sig = arg2;
- if (sig > _NSIG) {
+ if (sig < 0 || sig > _NSIG) {
X error = -EINVAL;
X break;
X }
diff -u --recursive --new-file v2.4.9/linux/kernel/sysctl.c linux/kernel/sysctl.c
--- v2.4.9/linux/kernel/sysctl.c Sun Aug 12 13:28:01 2001
+++ linux/kernel/sysctl.c Tue Sep 18 14:10:43 2001
@@ -47,6 +47,8 @@
X extern int max_threads;
X extern int nr_queued_signals, max_queued_signals;
X extern int sysrq_enabled;
+extern int core_uses_pid;
+extern int cad_pid;
X
X /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
X static int maxolduid = 65535;
@@ -83,8 +85,8 @@
X extern int sysctl_userprocess_debug;
X #endif
X
-#ifdef __powerpc__
-extern unsigned long htab_reclaim_on, zero_paged_on, powersave_nap;
+#ifdef CONFIG_PPC32
+extern unsigned long zero_paged_on, powersave_nap;
X int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
X void *buffer, size_t *lenp);
X #endif
@@ -166,6 +168,8 @@
X 0644, NULL, &proc_doutsstring, &sysctl_string},
X {KERN_PANIC, "panic", &panic_timeout, sizeof(int),
X 0644, NULL, &proc_dointvec},
+ {KERN_CORE_USES_PID, "core_uses_pid", &core_uses_pid, sizeof(int),
+ 0644, NULL, &proc_dointvec},
X {KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t),
X 0600, NULL, &proc_dointvec_bset},
X #ifdef CONFIG_BLK_DEV_INITRD
@@ -178,9 +182,7 @@
X {KERN_SPARC_STOP_A, "stop-a", &stop_a_enabled, sizeof (int),
X 0644, NULL, &proc_dointvec},
X #endif
-#ifdef __powerpc__
- {KERN_PPC_HTABRECLAIM, "htab-reclaim", &htab_reclaim_on, sizeof(int),
- 0644, NULL, &proc_dointvec},
+#ifdef CONFIG_PPC32
X {KERN_PPC_ZEROPAGED, "zero-paged", &zero_paged_on, sizeof(int),
X 0644, NULL, &proc_dointvec},
X {KERN_PPC_POWERSAVE_NAP, "powersave-nap", &powersave_nap, sizeof(int),
@@ -232,6 +234,8 @@
X {KERN_SYSRQ, "sysrq", &sysrq_enabled, sizeof (int),
X 0644, NULL, &proc_dointvec},
X #endif
+ {KERN_CADPID, "cad_pid", &cad_pid, sizeof (int),
+ 0600, NULL, &proc_dointvec},
X {KERN_MAX_THREADS, "threads-max", &max_threads, sizeof(int),
X 0644, NULL, &proc_dointvec},
X {KERN_RANDOM, "random", NULL, 0, 0555, random_table},
@@ -253,17 +257,11 @@
X };
X
X static ctl_table vm_table[] = {
- {VM_FREEPG, "freepages",
- &freepages, sizeof(freepages_t), 0444, NULL, &proc_dointvec},
X {VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0644, NULL,
X &proc_dointvec_minmax, &sysctl_intvec, NULL,
X &bdflush_min, &bdflush_max},
X {VM_OVERCOMMIT_MEMORY, "overcommit_memory", &sysctl_overcommit_memory,
X sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec},
- {VM_BUFFERMEM, "buffermem",
- &buffer_mem, sizeof(buffer_mem_t), 0644, NULL, &proc_dointvec},
- {VM_PAGECACHE, "pagecache",
- &page_cache, sizeof(buffer_mem_t), 0644, NULL, &proc_dointvec},
X {VM_PAGERDAEMON, "kswapd",
X &pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec},
X {VM_PGT_CACHE, "pagetable_cache",
diff -u --recursive --new-file v2.4.9/linux/lib/Makefile linux/lib/Makefile
--- v2.4.9/linux/lib/Makefile Wed Apr 25 13:31:03 2001
+++ linux/lib/Makefile Mon Sep 17 15:31:15 2001
@@ -8,9 +8,9 @@
X
X L_TARGET := lib.a
X
-export-objs := cmdline.o rwsem-spinlock.o rwsem.o
+export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o
X
-obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o
+obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o
X
X obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
X obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff -u --recursive --new-file v2.4.9/linux/lib/bust_spinlocks.c linux/lib/bust_spinlocks.c
--- v2.4.9/linux/lib/bust_spinlocks.c Wed Dec 31 16:00:00 1969
+++ linux/lib/bust_spinlocks.c Sun Sep 16 21:22:40 2001
@@ -0,0 +1,41 @@
+/*
+ * lib/bust_spinlocks.c
+ *
+ * Provides a minimal bust_spinlocks for architectures which don't have one of their own.
+ *
+ * bust_spinlocks() clears any spinlocks which would prevent oops, die(), BUG()
+ * and panic() information from reaching the user.

+ */
+
+#include <linux/config.h>

+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/tty.h>
+#include <linux/wait.h>
+#include <linux/vt_kern.h>
+
+extern spinlock_t timerlist_lock;
+
+void bust_spinlocks(int yes)
+{
+ spin_lock_init(&timerlist_lock);
+ if (yes) {
+ oops_in_progress = 1;
+ } else {
+ int loglevel_save = console_loglevel;
+#ifdef CONFIG_VT
+ unblank_screen();
+#endif
+ oops_in_progress = 0;
+ /*
+ * OK, the message is on the console. Now we call printk()
+ * without oops_in_progress set so that printk() will give klogd
+ * and the blanked console a poke. Hold onto your hats...
+ */
+ console_loglevel = 15; /* NMI oopser may have shut the console up */
+ printk(" ");
+ console_loglevel = loglevel_save;
+ }
+}
+
+
diff -u --recursive --new-file v2.4.9/linux/lib/dec_and_lock.c linux/lib/dec_and_lock.c
--- v2.4.9/linux/lib/dec_and_lock.c Fri Jul 7 16:22:48 2000
+++ linux/lib/dec_and_lock.c Tue Aug 28 07:11:33 2001
@@ -1,3 +1,4 @@
+#include <linux/module.h>
X #include <linux/spinlock.h>
X #include <asm/atomic.h>
X
@@ -34,4 +35,6 @@
X spin_unlock(lock);
X return 0;
X }
+
+EXPORT_SYMBOL(atomic_dec_and_lock);
X #endif
diff -u --recursive --new-file v2.4.9/linux/lib/rbtree.c linux/lib/rbtree.c
--- v2.4.9/linux/lib/rbtree.c Wed Dec 31 16:00:00 1969
+++ linux/lib/rbtree.c Mon Sep 17 15:30:23 2001
@@ -0,0 +1,293 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <and...@suse.de>

+
+ This program is free software; you can redistribute it and/or modify

+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/lib/rbtree.c
+*/
+
+#include <linux/rbtree.h>
+
+static void __rb_rotate_left(rb_node_t * node, rb_root_t * root)
+{
+ rb_node_t * right = node->rb_right;
+
+ if ((node->rb_right = right->rb_left))
+ right->rb_left->rb_parent = node;
+ right->rb_left = node;
+
+ if ((right->rb_parent = node->rb_parent))
+ {
+ if (node == node->rb_parent->rb_left)
+ node->rb_parent->rb_left = right;
+ else
+ node->rb_parent->rb_right = right;
+ }
+ else
+ root->rb_node = right;
+ node->rb_parent = right;
+}
+
+static void __rb_rotate_right(rb_node_t * node, rb_root_t * root)
+{
+ rb_node_t * left = node->rb_left;
+
+ if ((node->rb_left = left->rb_right))
+ left->rb_right->rb_parent = node;
+ left->rb_right = node;
+
+ if ((left->rb_parent = node->rb_parent))
+ {
+ if (node == node->rb_parent->rb_right)
+ node->rb_parent->rb_right = left;
+ else
+ node->rb_parent->rb_left = left;
+ }
+ else
+ root->rb_node = left;
+ node->rb_parent = left;
+}
+
+void rb_insert_color(rb_node_t * node, rb_root_t * root)
+{
+ rb_node_t * parent, * gparent;
+
+ while ((parent = node->rb_parent) && parent->rb_color == RB_RED)
+ {
+ gparent = parent->rb_parent;
+
+ if (parent == gparent->rb_left)
+ {
+ {
+ register rb_node_t * uncle = gparent->rb_right;
+ if (uncle && uncle->rb_color == RB_RED)
+ {
+ uncle->rb_color = RB_BLACK;
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->rb_right == node)
+ {
+ register rb_node_t * tmp;
+ __rb_rotate_left(parent, root);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ __rb_rotate_right(gparent, root);
+ } else {
+ {
+ register rb_node_t * uncle = gparent->rb_left;
+ if (uncle && uncle->rb_color == RB_RED)
+ {
+ uncle->rb_color = RB_BLACK;
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->rb_left == node)
+ {
+ register rb_node_t * tmp;
+ __rb_rotate_right(parent, root);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ __rb_rotate_left(gparent, root);
+ }
+ }
+
+ root->rb_node->rb_color = RB_BLACK;
+}
+
+static void __rb_erase_color(rb_node_t * node, rb_node_t * parent,
+ rb_root_t * root)
+{
+ rb_node_t * other;
+
+ while ((!node || node->rb_color == RB_BLACK) && node != root->rb_node)
+ {
+ if (parent->rb_left == node)
+ {
+ other = parent->rb_right;
+ if (other->rb_color == RB_RED)
+ {
+ other->rb_color = RB_BLACK;
+ parent->rb_color = RB_RED;
+ __rb_rotate_left(parent, root);
+ other = parent->rb_right;
+ }
+ if ((!other->rb_left ||
+ other->rb_left->rb_color == RB_BLACK)
+ && (!other->rb_right ||
+ other->rb_right->rb_color == RB_BLACK))
+ {
+ other->rb_color = RB_RED;
+ node = parent;
+ parent = node->rb_parent;
+ }
+ else
+ {
+ if (!other->rb_right ||
+ other->rb_right->rb_color == RB_BLACK)
+ {
+ register rb_node_t * o_left;
+ if ((o_left = other->rb_left))
+ o_left->rb_color = RB_BLACK;
+ other->rb_color = RB_RED;
+ __rb_rotate_right(other, root);
+ other = parent->rb_right;
+ }
+ other->rb_color = parent->rb_color;
+ parent->rb_color = RB_BLACK;
+ if (other->rb_right)
+ other->rb_right->rb_color = RB_BLACK;
+ __rb_rotate_left(parent, root);
+ node = root->rb_node;
+ break;
+ }
+ }
+ else
+ {
+ other = parent->rb_left;
+ if (other->rb_color == RB_RED)
+ {
+ other->rb_color = RB_BLACK;
+ parent->rb_color = RB_RED;
+ __rb_rotate_right(parent, root);
+ other = parent->rb_left;
+ }
+ if ((!other->rb_left ||
+ other->rb_left->rb_color == RB_BLACK)
+ && (!other->rb_right ||
+ other->rb_right->rb_color == RB_BLACK))
+ {
+ other->rb_color = RB_RED;
+ node = parent;
+ parent = node->rb_parent;
+ }
+ else
+ {
+ if (!other->rb_left ||
+ other->rb_left->rb_color == RB_BLACK)
+ {
+ register rb_node_t * o_right;
+ if ((o_right = other->rb_right))
+ o_right->rb_color = RB_BLACK;
+ other->rb_color = RB_RED;
+ __rb_rotate_left(other, root);
+ other = parent->rb_left;
+ }
+ other->rb_color = parent->rb_color;
+ parent->rb_color = RB_BLACK;
+ if (other->rb_left)
+ other->rb_left->rb_color = RB_BLACK;
+ __rb_rotate_right(parent, root);
+ node = root->rb_node;
+ break;
+ }
+ }
+ }
+ if (node)
+ node->rb_color = RB_BLACK;
+}
+
+void rb_erase(rb_node_t * node, rb_root_t * root)
+{
+ rb_node_t * child, * parent;
+ int color;
+
+ if (!node->rb_left)
+ child = node->rb_right;
+ else if (!node->rb_right)
+ child = node->rb_left;
+ else
+ {
+ rb_node_t * old = node, * left;
+
+ node = node->rb_right;
+ while ((left = node->rb_left))
+ node = left;
+ child = node->rb_right;
+ parent = node->rb_parent;
+ color = node->rb_color;
+
+ if (child)
+ child->rb_parent = parent;
+ if (parent)
+ {
+ if (parent->rb_left == node)
+ parent->rb_left = child;
+ else
+ parent->rb_right = child;
+ }
+ else
+ root->rb_node = child;
+
+ if (node->rb_parent == old)
+ parent = node;
+ node->rb_parent = old->rb_parent;
+ node->rb_color = old->rb_color;
+ node->rb_right = old->rb_right;
+ node->rb_left = old->rb_left;
+
+ if (old->rb_parent)
+ {
+ if (old->rb_parent->rb_left == old)
+ old->rb_parent->rb_left = node;
+ else
+ old->rb_parent->rb_right = node;
+ } else
+ root->rb_node = node;
+
+ old->rb_left->rb_parent = node;
+ if (old->rb_right)
+ old->rb_right->rb_parent = node;
+ goto color;
+ }
+
+ parent = node->rb_parent;
+ color = node->rb_color;
+
+ if (child)
+ child->rb_parent = parent;
+ if (parent)
+ {
+ if (parent->rb_left == node)
+ parent->rb_left = child;
+ else
+ parent->rb_right = child;
+ }
+ else
+ root->rb_node = child;
+
+ color:
+ if (color == RB_BLACK)
+ __rb_erase_color(child, parent, root);
+}
diff -u --recursive --new-file v2.4.9/linux/lib/vsprintf.c linux/lib/vsprintf.c
--- v2.4.9/linux/lib/vsprintf.c Sun Aug 12 13:28:01 2001
+++ linux/lib/vsprintf.c Sun Sep 16 11:26:10 2001
@@ -308,6 +308,10 @@
X if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
X qualifier = *fmt;
X ++fmt;
+ if (qualifier == 'l' && *fmt == 'l') {
+ qualifier = 'L';
+ ++fmt;
+ }
X }
X
X /* default base */
@@ -502,3 +506,187 @@
X return i;
X }
X
+/**
+ * vsscanf - Unformat a buffer into a list of arguments
+ * @buf: input buffer
+ * @fmt: format of buffer
+ * @args: arguments
+ */
+int vsscanf(const char * buf, const char * fmt, va_list args)
+{
+ const char *str = buf;
+ char *next;
+ int num = 0;
+ int qualifier;
+ int base;
+ unsigned int field_width;
+ int is_sign = 0;
+
+ for (; *fmt; fmt++) {
+ /* skip any white space in format */
+ if (isspace(*fmt)) {
+ continue;
+ }
+
+ /* anything that is not a conversion must match exactly */
+ if (*fmt != '%') {
+ if (*fmt++ != *str++)
+ return num;
+ continue;
+ }
+ ++fmt;
+
+ /* skip this conversion.
+ * advance both strings to next white space
+ */
+ if (*fmt == '*') {
+ while (!isspace(*fmt))
+ fmt++;
+ while(!isspace(*str))
+ str++;
+ continue;
+ }
+
+ /* get field width */
+ field_width = 0xffffffffUL;
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+
+ /* get conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt == 'Z') {
+ qualifier = *fmt;
+ fmt++;
+ }
+ base = 10;
+ is_sign = 0;
+
+ switch(*fmt) {
+ case 'c':
+ {
+ char *s = (char *) va_arg(args,char*);
+ do {
+ *s++ = *str++;
+ } while(field_width-- > 0);
+ num++;
+ }
+ continue;
+ case 's':
+ {
+ char *s = (char *) va_arg(args, char *);
+ /* first, skip leading white space in buffer */
+ while (isspace(*str))
+ str++;
+
+ /* now copy until next white space */
+ while (!isspace(*str) && field_width--) {
+ *s++ = *str++;
+ }
+ *s = '\0';
+ num++;
+ }
+ continue;
+ case 'n':
+ /* return number of characters read so far */
+ {
+ int *i = (int *)va_arg(args,int*);
+ *i = str - buf;
+ }
+ continue;
+ case 'o':
+ base = 8;
+ break;
+ case 'x':
+ case 'X':
+ base = 16;
+ break;
+ case 'd':
+ case 'i':
+ is_sign = 1;
+ case 'u':
+ break;
+ case '%':
+ /* looking for '%' in str */
+ if (*str++ != '%')
+ return num;
+ continue;
+ default:
+ /* invalid format; stop here */
+ return num;
+ }
+
+ /* have some sort of integer conversion.
+ * first, skip white space in buffer.
+ */
+ while (isspace(*str))
+ str++;
+
+ switch(qualifier) {
+ case 'h':
+ if (is_sign) {
+ short *s = (short *) va_arg(args,short *);
+ *s = (short) simple_strtol(str,&next,base);
+ } else {
+ unsigned short *s = (unsigned short *) va_arg(args, unsigned short *);
+ *s = (unsigned short) simple_strtoul(str, &next, base);
+ }
+ break;
+ case 'l':
+ if (is_sign) {
+ long *l = (long *) va_arg(args,long *);
+ *l = simple_strtol(str,&next,base);
+ } else {
+ unsigned long *l = (unsigned long*) va_arg(args,unsigned long*);
+ *l = simple_strtoul(str,&next,base);
+ }
+ break;
+ case 'L':
+ if (is_sign) {
+ long long *l = (long long*) va_arg(args,long long *);
+ *l = simple_strtoll(str,&next,base);
+ } else {
+ unsigned long long *l = (unsigned long long*) va_arg(args,unsigned long long*);
+ *l = simple_strtoull(str,&next,base);
+ }
+ break;
+ case 'Z':
+ {
+ size_t *s = (size_t*) va_arg(args,size_t*);
+ *s = (size_t) simple_strtoul(str,&next,base);
+ }
+ break;
+ default:
+ if (is_sign) {
+ int *i = (int *) va_arg(args, int*);
+ *i = (int) simple_strtol(str,&next,base);
+ } else {
+ unsigned int *i = (unsigned int*) va_arg(args, unsigned int*);
+ *i = (unsigned int) simple_strtoul(str,&next,base);
+ }
+ break;
+ }
+ num++;
+
+ if (!next)
+ break;
+ str = next;
+ }
+ return num;
+}
+
+/**
+ * sscanf - Unformat a buffer into a list of arguments
+ * @buf: input buffer
+ * @fmt: formatting of buffer
+ * @...: resulting arguments
+ */
+int sscanf(const char * buf, const char * fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args,fmt);
+ i = vsscanf(buf,fmt,args);
+ va_end(args);
+ return i;
+}
diff -u --recursive --new-file v2.4.9/linux/mm/bootmem.c linux/mm/bootmem.c
--- v2.4.9/linux/mm/bootmem.c Tue Jul 3 17:08:22 2001
+++ linux/mm/bootmem.c Tue Sep 18 14:10:43 2001
@@ -344,7 +344,8 @@
X /*
X * Whoops, we cannot satisfy the allocation request.
X */
- BUG();
+ printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
+ panic("Out of memory");
X return NULL;
X }
X
diff -u --recursive --new-file v2.4.9/linux/mm/filemap.c linux/mm/filemap.c
--- v2.4.9/linux/mm/filemap.c Mon Aug 27 12:41:49 2001
+++ linux/mm/filemap.c Sat Sep 22 11:04:53 2001
@@ -22,6 +22,7 @@
X #include <linux/swapctl.h>
X #include <linux/init.h>
X #include <linux/mm.h>
+#include <linux/iobuf.h>
X
X #include <asm/pgalloc.h>
X #include <asm/uaccess.h>
@@ -45,12 +46,12 @@
X unsigned int page_hash_bits;
X struct page **page_hash_table;
X
-spinlock_t __cacheline_aligned pagecache_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t pagecache_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
X /*
X * NOTE: to avoid deadlocking you must never acquire the pagecache_lock with
X * the pagemap_lru_lock held.
X */
-spinlock_t __cacheline_aligned pagemap_lru_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t pagemap_lru_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
X
X #define CLUSTER_PAGES (1 << page_cluster)
X #define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)
@@ -200,7 +201,7 @@
X
X }
X
-static inline void truncate_complete_page(struct page *page)
+static void truncate_complete_page(struct page *page)
X {
X /* Leave it on the LRU if it gets converted into anonymous buffers */
X if (!page->buffers || block_flushpage(page, 0))
@@ -224,8 +225,10 @@
X {
X struct list_head *curr;
X struct page * page;
+ int unlocked = 0;
X
- curr = head->next;
+ restart:
+ curr = head->prev;
X while (curr != head) {
X unsigned long offset;
X
@@ -234,33 +237,46 @@
X
X /* Is one of the pages to truncate? */
X if ((offset >= start) || (*partial && (offset + 1) == start)) {
- list_del(head);
- list_add(head, curr);
- if (TryLockPage(page)) {
- page_cache_get(page);
- spin_unlock(&pagecache_lock);
- wait_on_page(page);
- goto out_restart;
- }
+ int failed;
+
X page_cache_get(page);
+ failed = TryLockPage(page);
+
+ list_del(head);
+ if (!failed)
+ /* Restart after this page */
+ list_add_tail(head, curr);
+ else
+ /* Restart on this page */
+ list_add(head, curr);
+
X spin_unlock(&pagecache_lock);
+ unlocked = 1;
X
- if (*partial && (offset + 1) == start) {
- truncate_partial_page(page, *partial);
- *partial = 0;
- } else
- truncate_complete_page(page);
+ if (!failed) {
+ if (*partial && (offset + 1) == start) {
+ truncate_partial_page(page, *partial);
+ *partial = 0;
+ } else
+ truncate_complete_page(page);
+
+ UnlockPage(page);
+ } else
+ wait_on_page(page);
X
- UnlockPage(page);
- goto out_restart;
+ page_cache_release(page);
+
+ if (current->need_resched) {
+ __set_current_state(TASK_RUNNING);
+ schedule();
+ }
+
+ spin_lock(&pagecache_lock);
+ goto restart;
X }
- curr = curr->next;
+ curr = curr->prev;
X }
- return 0;
-out_restart:
- page_cache_release(page);
- spin_lock(&pagecache_lock);
- return 1;
+ return unlocked;
X }
X
X
@@ -277,22 +293,118 @@
X {
X unsigned long start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
X unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
- int complete;
+ int unlocked;
X
X spin_lock(&pagecache_lock);
X do {
- complete = 1;
- while (truncate_list_pages(&mapping->clean_pages, start, &partial))
- complete = 0;
- while (truncate_list_pages(&mapping->dirty_pages, start, &partial))
- complete = 0;
- while (truncate_list_pages(&mapping->locked_pages, start, &partial))
- complete = 0;
- } while (!complete);
+ unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial);
+ unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial);
+ unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial);
+ } while (unlocked);
X /* Traversed all three lists without dropping the lock */
X spin_unlock(&pagecache_lock);
X }
X
+static inline int invalidate_this_page2(struct page * page,
+ struct list_head * curr,
+ struct list_head * head)
+{
+ int unlocked = 1;
+
+ /*
+ * The page is locked and we hold the pagecache_lock as well
+ * so both page_count(page) and page->buffers stays constant here.
+ */
+ if (page_count(page) == 1 + !!page->buffers) {
+ /* Restart after this page */
+ list_del(head);
+ list_add_tail(head, curr);
+
+ page_cache_get(page);
+ spin_unlock(&pagecache_lock);
+ truncate_complete_page(page);
+ } else {
+ if (page->buffers) {
+ /* Restart after this page */
+ list_del(head);
+ list_add_tail(head, curr);
+
+ page_cache_get(page);
+ spin_unlock(&pagecache_lock);
+ block_invalidate_page(page);
+ } else
+ unlocked = 0;
+
+ ClearPageDirty(page);
+ ClearPageUptodate(page);
+ }
+
+ return unlocked;
+}
+
+static int FASTCALL(invalidate_list_pages2(struct list_head *));
+static int invalidate_list_pages2(struct list_head *head)
+{
+ struct list_head *curr;
+ struct page * page;
+ int unlocked = 0;
+
+ restart:
+ curr = head->prev;
+ while (curr != head) {
+ page = list_entry(curr, struct page, list);
+
+ if (!TryLockPage(page)) {
+ int __unlocked;
+
+ __unlocked = invalidate_this_page2(page, curr, head);
+ UnlockPage(page);
+ unlocked |= __unlocked;
+ if (!__unlocked) {
+ curr = curr->prev;
+ continue;
+ }
+ } else {
+ /* Restart on this page */
+ list_del(head);
+ list_add(head, curr);
+
+ page_cache_get(page);
+ spin_unlock(&pagecache_lock);
+ unlocked = 1;
+ wait_on_page(page);
+ }
+
+ page_cache_release(page);
+ if (current->need_resched) {
+ __set_current_state(TASK_RUNNING);
+ schedule();
+ }
+
+ spin_lock(&pagecache_lock);
+ goto restart;
+ }
+ return unlocked;
+}
+
+/**
+ * invalidate_inode_pages2 - Clear all the dirty bits around if it can't
+ * free the pages because they're mapped.
+ * @mapping: the address_space which pages we want to invalidate
+ */
+void invalidate_inode_pages2(struct address_space * mapping)
+{
+ int unlocked;
+
+ spin_lock(&pagecache_lock);
+ do {
+ unlocked = invalidate_list_pages2(&mapping->clean_pages);
+ unlocked |= invalidate_list_pages2(&mapping->dirty_pages);
+ unlocked |= invalidate_list_pages2(&mapping->locked_pages);
+ } while (unlocked);
+ spin_unlock(&pagecache_lock);
+}
+
X static inline struct page * __find_page_nolock(struct address_space *mapping, unsigned long offset, struct page *page)
X {
X goto inside;
@@ -307,8 +419,6 @@
X if (page->index == offset)
X break;
X }
- /* Mark the page referenced, kswapd will find it later. */
- SetPageReferenced(page);
X
X not_found:
X return page;
@@ -487,9 +597,9 @@
X if (!PageLocked(page))
X BUG();
X
+ page->index = index;
X page_cache_get(page);
X spin_lock(&pagecache_lock);
- page->index = index;
X add_page_to_inode_queue(mapping, page);
X add_page_to_hash_queue(page, page_hash(mapping, index));
X lru_cache_add(page);
@@ -509,7 +619,7 @@
X if (PageLocked(page))
X BUG();
X
- flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_dirty) | (1 << PG_referenced) | (1 << PG_arch_1) | (1 << PG_checked));
+ flags = page->flags & ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_dirty | 1 << PG_referenced | 1 << PG_arch_1 | 1 << PG_checked);
X page->flags = flags | (1 << PG_locked);
X page_cache_get(page);
X page->index = offset;
@@ -549,7 +659,8 @@
X * This adds the requested page to the page cache if it isn't already there,
X * and schedules an I/O to read in its contents from disk.
X */
-static inline int page_cache_read(struct file * file, unsigned long offset)
+static int FASTCALL(page_cache_read(struct file * file, unsigned long offset));
+static int page_cache_read(struct file * file, unsigned long offset)
X {
X struct inode *inode = file->f_dentry->d_inode;
X struct address_space *mapping = inode->i_mapping;
@@ -557,7 +668,7 @@
X struct page *page;
X
X spin_lock(&pagecache_lock);
- page = __find_page_nolock(mapping, offset, *hash);
+ page = __find_page_nolock(mapping, offset, *hash);
X spin_unlock(&pagecache_lock);
X if (page)
X return 0;
@@ -575,7 +686,7 @@
X * We arrive here in the unlikely event that someone
X * raced with us and added our page to the cache first.
X */
- page_cache_free(page);
+ page_cache_release(page);
X return 0;
X }
X
@@ -583,6 +694,8 @@
X * Read in an entire cluster at once. A cluster is usually a 64k-
X * aligned block that includes the page requested in "offset."
X */
+static int FASTCALL(read_cluster_nonblocking(struct file * file, unsigned long offset,
+ unsigned long filesize));
X static int read_cluster_nonblocking(struct file * file, unsigned long offset,
X unsigned long filesize)
X {
@@ -613,11 +726,10 @@
X
X add_wait_queue(&page->wait, &wait);
X do {
- sync_page(page);
X set_task_state(tsk, TASK_UNINTERRUPTIBLE);
X if (!PageLocked(page))
X break;
- run_task_queue(&tq_disk);
+ sync_page(page);
X schedule();
X } while (PageLocked(page));
X tsk->state = TASK_RUNNING;
@@ -635,12 +747,10 @@
X
X add_wait_queue_exclusive(&page->wait, &wait);
X for (;;) {
- sync_page(page);
X set_task_state(tsk, TASK_UNINTERRUPTIBLE);
X if (PageLocked(page)) {
- run_task_queue(&tq_disk);
+ sync_page(page);
X schedule();
- continue;
X }
X if (!TryLockPage(page))
X break;
@@ -682,34 +792,6 @@
X }
X
X /*
- * Find a swapcache page (and get a reference) or return NULL.
- * The SwapCache check is protected by the pagecache lock.
- */
-struct page * __find_get_swapcache_page(struct address_space *mapping,
- unsigned long offset, struct page **hash)
-{
- struct page *page;
-
- /*
- * We need the LRU lock to protect against page_launder().
- */
-
- spin_lock(&pagecache_lock);
- page = __find_page_nolock(mapping, offset, *hash);
- if (page) {
- spin_lock(&pagemap_lru_lock);
- if (PageSwapCache(page))
- page_cache_get(page);
- else
- page = NULL;
- spin_unlock(&pagemap_lru_lock);
- }
- spin_unlock(&pagecache_lock);
-
- return page;
-}
-
-/*
X * Same as the above, but lock the page too, verifying that
X * it's still valid once we own it.
X */
@@ -874,16 +956,42 @@
X return max_readahead[MAJOR(inode->i_dev)][MINOR(inode->i_dev)];
X }
X
+static inline unsigned long calc_end_index(struct inode * inode)
+{
+ unsigned long end_index;
+
+ if (!S_ISBLK(inode->i_mode))
+ end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+ else
+ end_index = buffered_blk_size(inode->i_rdev) >> (PAGE_CACHE_SHIFT - BLOCK_SIZE_BITS);
+
+ return end_index;
+}
+
+static inline loff_t calc_rsize(struct inode * inode)
+{
+ loff_t rsize;
+
+ if (!S_ISBLK(inode->i_mode))
+ rsize = inode->i_size;
+ else
+ rsize = (loff_t) buffered_blk_size(inode->i_rdev) << BLOCK_SIZE_BITS;
+
+ return rsize;
+}
+
X static void generic_file_readahead(int reada_ok,
X struct file * filp, struct inode * inode,
X struct page * page)
X {
- unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+ unsigned long end_index;
X unsigned long index = page->index;
X unsigned long max_ahead, ahead;
X unsigned long raend;
X int max_readahead = get_max_readahead(inode);
X
+ end_index = calc_end_index(inode);
+
X raend = filp->f_raend;
X max_ahead = 0;
X
@@ -976,17 +1084,24 @@
X return;
X }
X
-
-static inline void check_used_once (struct page *page)
+/*
+ * Mark a page as having seen activity.
+ *
+ * If it was already so marked, move it
+ * to the active queue and drop the referenced
+ * bit. Otherwise, just mark it for future
+ * action..
+ */
+void mark_page_accessed(struct page *page)
X {
- if (!PageActive(page)) {
- if (page->age)
- activate_page(page);
- else {
- page->age = PAGE_AGE_START;
- ClearPageReferenced(page);
- }
+ if (!PageActive(page) && PageReferenced(page)) {
+ activate_page(page);
+ ClearPageReferenced(page);
+ return;
X }
+
+ /* Mark the page referenced, AFTER checking for previous usage.. */
+ SetPageReferenced(page);
X }
X
X /*
@@ -1054,12 +1169,13 @@
X struct page *page, **hash;
X unsigned long end_index, nr, ret;
X
- end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+ end_index = calc_end_index(inode);
+
X if (index > end_index)
X break;
X nr = PAGE_CACHE_SIZE;
X if (index == end_index) {
- nr = inode->i_size & ~PAGE_CACHE_MASK;
+ nr = calc_rsize(inode) & ~PAGE_CACHE_MASK;
X if (nr <= offset)
X break;
X }
@@ -1105,7 +1221,7 @@
X index += offset >> PAGE_CACHE_SHIFT;
X offset &= ~PAGE_CACHE_MASK;
X
- check_used_once (page);
+ mark_page_accessed(page);
X page_cache_release(page);
X if (ret == nr && desc->count)
X continue;
@@ -1196,10 +1312,96 @@
X *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
X filp->f_reada = 1;
X if (cached_page)
- page_cache_free(cached_page);
+ page_cache_release(cached_page);
X UPDATE_ATIME(inode);
X }
X
+static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset)
+{
+ ssize_t retval;
+ int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
+ struct kiobuf * iobuf;
+ struct inode * inode = filp->f_dentry->d_inode;
+ struct address_space * mapping = inode->i_mapping;
+
+ new_iobuf = 0;
+ iobuf = filp->f_iobuf;
+ if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
+ /*
+ * A parallel read/write is using the preallocated iobuf
+ * so just run slow and allocate a new one.
+ */
+ retval = alloc_kiovec(1, &iobuf);
+ if (retval)
+ goto out;
+ new_iobuf = 1;
+ }
+
+ if (!S_ISBLK(inode->i_mode)) {
+ blocksize = inode->i_sb->s_blocksize;
+ blocksize_bits = inode->i_sb->s_blocksize_bits;
+ } else {
+ blocksize = BUFFERED_BLOCKSIZE;
+ blocksize_bits = BUFFERED_BLOCKSIZE_BITS;
+ }
+ blocksize_mask = blocksize - 1;
+ chunk_size = KIO_MAX_ATOMIC_IO << 10;
+
+ retval = -EINVAL;
+ if ((offset & blocksize_mask) || (count & blocksize_mask))
+ goto out_free;
+ if (!mapping->a_ops->direct_IO)
+ goto out_free;
+
+ /*
+ * Flush to disk exlusively the _data_, metadata must remains
+ * completly asynchronous or performance will go to /dev/null.
+ */
+ filemap_fdatasync(mapping);
+ retval = fsync_inode_data_buffers(inode);
+ filemap_fdatawait(mapping);
+ if (retval < 0)
+ goto out_free;
+
+ progress = retval = 0;
+ while (count > 0) {
+ iosize = count;
+ if (iosize > chunk_size)
+ iosize = chunk_size;
+
+ retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
+ if (retval)
+ break;
+
+ retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize);
+
+ if (rw == READ && retval > 0)
+ mark_dirty_kiobuf(iobuf, retval);
+
+ if (retval >= 0) {
+ count -= retval;
+ buf += retval;
+ progress += retval;
+ }
+
+ unmap_kiobuf(iobuf);
+
+ if (retval != iosize)
+ break;
+ }
+
+ if (progress)
+ retval = progress;
+
+ out_free:
+ if (!new_iobuf)
+ clear_bit(0, &filp->f_iobuf_lock);
+ else
+ free_kiovec(1, &iobuf);
+ out:
+ return retval;
+}
+
X int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
X {
X char *kaddr;
@@ -1230,6 +1432,12 @@
X {
X ssize_t retval;
X
+ if ((ssize_t) count < 0)
+ return -EINVAL;
+
+ if (filp->f_flags & O_DIRECT)
+ goto o_direct;
+
X retval = -EFAULT;
X if (access_ok(VERIFY_WRITE, buf, count)) {
X retval = 0;
@@ -1248,7 +1456,28 @@
X retval = desc.error;
X }
X }
+ out:
X return retval;
+
+ o_direct:
+ {
+ loff_t pos = *ppos, size;
+ struct inode * inode = filp->f_dentry->d_inode;
+
+ retval = 0;
+ if (!count)
+ goto out; /* skip atime */
+ size = calc_rsize(inode);
+ if (pos < size) {
+ if (pos + count > size)
+ count = size - pos;
+ retval = generic_file_direct_IO(READ, filp, buf, count, pos);
+ if (retval > 0)
+ *ppos = pos + retval;
+ }
+ UPDATE_ATIME(filp->f_dentry->d_inode);
+ goto out;
+ }
X }
X
X static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
@@ -1433,6 +1662,7 @@
X struct address_space *mapping = inode->i_mapping;
X struct page *page, **hash, *old_page;
X unsigned long size, pgoff;
+ loff_t rsize;
X
X pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
X
@@ -1441,7 +1671,8 @@
X * An external ptracer can access pages that normally aren't
X * accessible..
X */
- size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ rsize = calc_rsize(inode);
+ size = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
X if ((pgoff >= size) && (area->vm_mm == current->mm))
X return NULL;
X
@@ -1473,6 +1704,7 @@
X * and possibly copy it over to another page..
X */
X old_page = page;
+ mark_page_accessed(page);
X if (no_share) {
X struct page *new_page = alloc_page(GFP_HIGHUSER);
X
@@ -1682,22 +1914,7 @@
X return error;
X }
X
-/*
- * Shared mappings need to be able to do the right thing at
- * close/unmap/sync. They will also use the private file as
- * backing-store for swapping..
- */
-static struct vm_operations_struct file_shared_mmap = {
- nopage: filemap_nopage,
-};
-
-/*
- * Private mappings just need to be able to load in the map.
- *
- * (This is actually used for shared mappings as well, if we
- * know they can't ever get write permissions..)
- */
-static struct vm_operations_struct file_private_mmap = {
+static struct vm_operations_struct generic_file_vm_ops = {
X nopage: filemap_nopage,
X };
X
@@ -1705,21 +1922,18 @@
X
X int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
X {
- struct vm_operations_struct * ops;
X struct inode *inode = file->f_dentry->d_inode;
X
- ops = &file_private_mmap;
X if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
X if (!inode->i_mapping->a_ops->writepage)
X return -EINVAL;
- ops = &file_shared_mmap;
X }
X if (!inode->i_sb || !S_ISREG(inode->i_mode))
X return -EACCES;
X if (!inode->i_mapping->a_ops->readpage)
X return -ENOEXEC;
X UPDATE_ATIME(inode);
- vma->vm_ops = ops;
+ vma->vm_ops = &generic_file_vm_ops;
X return 0;
X }
X
@@ -1826,6 +2040,7 @@
X unsigned long end, int behavior)
X {
X struct vm_area_struct * n;
+ struct mm_struct * mm = vma->vm_mm;
X
X n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
X if (!n)
@@ -1838,12 +2053,12 @@
X get_file(n->vm_file);
X if (n->vm_ops && n->vm_ops->open)
X n->vm_ops->open(n);
- lock_vma_mappings(vma);
- spin_lock(&vma->vm_mm->page_table_lock);
X vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT;
+ lock_vma_mappings(vma);
+ spin_lock(&mm->page_table_lock);
X vma->vm_start = end;
- __insert_vm_struct(current->mm, n);
- spin_unlock(&vma->vm_mm->page_table_lock);
+ __insert_vm_struct(mm, n);
+ spin_unlock(&mm->page_table_lock);
X unlock_vma_mappings(vma);
X return 0;
X }
@@ -1852,6 +2067,7 @@
X unsigned long start, int behavior)
X {
X struct vm_area_struct * n;
+ struct mm_struct * mm = vma->vm_mm;
X
X n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
X if (!n)
@@ -1866,10 +2082,10 @@
X if (n->vm_ops && n->vm_ops->open)
X n->vm_ops->open(n);
X lock_vma_mappings(vma);
- spin_lock(&vma->vm_mm->page_table_lock);
+ spin_lock(&mm->page_table_lock);
X vma->vm_end = start;
- __insert_vm_struct(current->mm, n);
- spin_unlock(&vma->vm_mm->page_table_lock);
+ __insert_vm_struct(mm, n);
+ spin_unlock(&mm->page_table_lock);
X unlock_vma_mappings(vma);
X return 0;
X }
@@ -1878,6 +2094,7 @@
X unsigned long start, unsigned long end, int behavior)
X {
X struct vm_area_struct * left, * right;
+ struct mm_struct * mm = vma->vm_mm;
X
X left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
X if (!left)
@@ -1901,16 +2118,16 @@
X vma->vm_ops->open(left);
X vma->vm_ops->open(right);
X }
- lock_vma_mappings(vma);

SHAR_EOF
true || echo 'restore of patch-2.4.10 failed'
fi

echo 'End of part 190'
echo 'File patch-2.4.10 is continued in part 191'
echo "191" > _shar_seq_.tmp
exit 0

Thomas Kobienia

unread,

Sep 24, 2001, 8:00:54 PM9/24/01

Archive-name: v2.4/patch-2.4.10/part191

#!/bin/sh -x
# this is part 191 of a 197 - part archive

# do not concatenate these parts, unpack them in order with /bin/sh
# file patch-2.4.10 continued
if test ! -r _shar_seq_.tmp; then
echo 'Please unpack part 1 first!'
exit 1
fi
(read Scheck

if test "$Scheck" != 191; then

- spin_lock(&vma->vm_mm->page_table_lock);
X vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT;
+ vma->vm_raend = 0;

+ lock_vma_mappings(vma);
+ spin_lock(&mm->page_table_lock);

X vma->vm_start = start;
X vma->vm_end = end;
X setup_read_behavior(vma, behavior);
- vma->vm_raend = 0;
- __insert_vm_struct(current->mm, left);
- __insert_vm_struct(current->mm, right);
- spin_unlock(&vma->vm_mm->page_table_lock);
+ __insert_vm_struct(mm, left);
+ __insert_vm_struct(mm, right);

+ spin_unlock(&mm->page_table_lock);
X unlock_vma_mappings(vma);
X return 0;
X }

@@ -1954,13 +2171,14 @@
X long error = -EBADF;
X struct file * file;
X unsigned long size, rlim_rss;
+ loff_t rsize;
X
X /* Doesn't work if there's no mapped file. */
X if (!vma->vm_file)
X return error;
X file = vma->vm_file;
- size = (file->f_dentry->d_inode->i_size + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+ rsize = calc_rsize(file->f_dentry->d_inode);

+ size = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
X

X start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
X if (end > vma->vm_end)
@@ -2025,9 +2243,7 @@
X if (vma->vm_flags & VM_LOCKED)
X return -EINVAL;
X
- flush_cache_range(vma->vm_mm, start, end);
X zap_page_range(vma->vm_mm, start, end - start);
- flush_tlb_range(vma->vm_mm, start, end);

X return 0;
X }
X

@@ -2337,7 +2553,7 @@
X }
X }

X if (cached_page)
- page_cache_free(cached_page);
+ page_cache_release(cached_page);

X return page;
X }
X
@@ -2355,7 +2571,10 @@
X
X retry:
X page = __read_cache_page(mapping, index, filler, data);
- if (IS_ERR(page) || Page_Uptodate(page))
+ if (IS_ERR(page))
+ goto out;
+ mark_page_accessed(page);
+ if (Page_Uptodate(page))
X goto out;
X
X lock_page(page);
@@ -2406,7 +2625,7 @@
X struct page *cached_page = NULL;
X struct page *page = __grab_cache_page(mapping,index,&cached_page);

X if (cached_page)
- page_cache_free(cached_page);
+ page_cache_release(cached_page);

X return page;
X }
X
@@ -2441,7 +2660,7 @@
X * ok...@monad.swb.de
X */
X ssize_t
-generic_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
+generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)

X {
X struct inode *inode = file->f_dentry->d_inode;
X struct address_space *mapping = inode->i_mapping;

@@ -2449,15 +2668,18 @@
X loff_t pos;
X struct page *page, *cached_page;
X unsigned long written;
- long status;
+ long status = 0;
X int err;
X unsigned bytes;
X
- cached_page = NULL;

+ if ((ssize_t) count < 0)
+ return -EINVAL;

X
X if (!access_ok(VERIFY_READ, buf, count))
X return -EFAULT;
-
+
+ cached_page = NULL;
+
X down(&inode->i_sem);
X
X pos = *ppos;
@@ -2473,7 +2695,8 @@
X
X written = 0;
X
- if (file->f_flags & O_APPEND)
+ /* FIXME: this is for backwards compatibility with 2.4 */
+ if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
X pos = inode->i_size;
X
X /*
@@ -2516,31 +2739,51 @@
X * Linus frestrict idea will clean these up nicely..
X */
X
- if (pos >= inode->i_sb->s_maxbytes)
- {
- if (count || pos > inode->i_sb->s_maxbytes) {
- send_sig(SIGXFSZ, current, 0);
- err = -EFBIG;
+ if (!S_ISBLK(inode->i_mode)) {
+ if (pos >= inode->i_sb->s_maxbytes)
+ {
+ if (count || pos > inode->i_sb->s_maxbytes) {
+ send_sig(SIGXFSZ, current, 0);
+ err = -EFBIG;
+ goto out;
+ }
+ /* zero-length writes at ->s_maxbytes are OK */
+ }
+
+ if (pos + count > inode->i_sb->s_maxbytes)
+ count = inode->i_sb->s_maxbytes - pos;
+ } else {
+ if (is_read_only(inode->i_rdev)) {
+ err = -EPERM;
X goto out;
X }
- /* zero-length writes at ->s_maxbytes are OK */
- }
+ if (pos >= calc_rsize(inode)) {
+ if (count || pos > calc_rsize(inode)) {
+ /* FIXME: this is for backwards compatibility with 2.4 */
+ err = -ENOSPC;
+ goto out;
+ }
+ /* zero-length writes at blkdev end are OK */
+ }
X
- if (pos + count > inode->i_sb->s_maxbytes)
- count = inode->i_sb->s_maxbytes - pos;
+ if (pos + count > calc_rsize(inode))
+ count = calc_rsize(inode) - pos;
+ }
X
- if (count == 0) {
- err = 0;
+ err = 0;
+ if (count == 0)
X goto out;
- }
X
- status = 0;
X remove_suid(inode);
X inode->i_ctime = inode->i_mtime = CURRENT_TIME;
X mark_inode_dirty_sync(inode);
X
- while (count) {
+ if (file->f_flags & O_DIRECT)
+ goto o_direct;
+
+ do {
X unsigned long index, offset;
+ long page_fault;
X char *kaddr;
X
X /*
@@ -2574,15 +2817,15 @@
X PAGE_BUG(page);
X }
X
+ kaddr = kmap(page);
X status = mapping->a_ops->prepare_write(file, page, offset, offset+bytes);
X if (status)
X goto unlock;
- kaddr = page_address(page);
- status = __copy_from_user(kaddr+offset, buf, bytes);
+ page_fault = __copy_from_user(kaddr+offset, buf, bytes);
X flush_dcache_page(page);
- if (status)
- goto fail_write;
X status = mapping->a_ops->commit_write(file, page, offset, offset+bytes);
+ if (page_fault)
+ goto fail_write;
X if (!status)
X status = bytes;
X
@@ -2593,24 +2836,26 @@
X buf += status;
X }
X unlock:
+ kunmap(page);
X /* Mark it unlocked again and drop the page.. */
+ SetPageReferenced(page);
X UnlockPage(page);
- check_used_once(page);
X page_cache_release(page);
X
X if (status < 0)
X break;
- }
+ } while (count);
X *ppos = pos;
X

X if (cached_page)
- page_cache_free(cached_page);
+ page_cache_release(cached_page);
X

X /* For now, when the user asks for O_SYNC, we'll actually
X * provide O_DSYNC. */
X if ((status >= 0) && (file->f_flags & O_SYNC))
- status = generic_osync_inode(inode, 1); /* 1 means datasync */
+ status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA);
X
+out_status:
X err = written ? written : status;
X out:
X
@@ -2618,9 +2863,26 @@
X return err;
X fail_write:
X status = -EFAULT;
- ClearPageUptodate(page);
- kunmap(page);
X goto unlock;
+
+o_direct:
+ written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos);
+ if (written > 0) {
+ loff_t end = pos + written;
+ if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
+ inode->i_size = end;
+ mark_inode_dirty(inode);
+ }
+ *ppos = end;
+ invalidate_inode_pages2(mapping);
+ }
+ /*
+ * Sync the fs metadata but not the minor inode changes and
+ * of course not the data as we did direct DMA for the IO.
+ */
+ if (written >= 0 && file->f_flags & O_SYNC)
+ status = generic_osync_inode(inode, OSYNC_METADATA);
+ goto out_status;
X }
X
X void __init page_cache_init(unsigned long mempages)
diff -u --recursive --new-file v2.4.9/linux/mm/highmem.c linux/mm/highmem.c
--- v2.4.9/linux/mm/highmem.c Mon Aug 27 12:41:49 2001
+++ linux/mm/highmem.c Tue Sep 18 14:10:43 2001
@@ -212,9 +212,9 @@
X
X p_from = from->b_page;
X
- vfrom = kmap_atomic(p_from, KM_BOUNCE_WRITE);
+ vfrom = kmap_atomic(p_from, KM_USER0);
X memcpy(to->b_data, vfrom + bh_offset(from), to->b_size);
- kunmap_atomic(vfrom, KM_BOUNCE_WRITE);
+ kunmap_atomic(vfrom, KM_USER0);
X }
X
X static inline void copy_to_high_bh_irq (struct buffer_head *to,
@@ -273,6 +273,13 @@
X
X static __init int init_emergency_pool(void)
X {
+ struct sysinfo i;
+ si_meminfo(&i);
+ si_swapinfo(&i);
+
+ if (!i.totalhigh)
+ return 0;
+
X spin_lock_irq(&emergency_lock);
X while (nr_emergency_pages < POOL_SIZE) {
X struct page * page = alloc_page(GFP_ATOMIC);
@@ -321,7 +328,7 @@

X struct page *page;
X

X repeat_alloc:
- page = alloc_page(GFP_NOIO);
+ page = alloc_page(GFP_NOHIGHIO);
X if (page)
X return page;
X /*
@@ -359,7 +366,7 @@
X struct buffer_head *bh;
X
X repeat_alloc:
- bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO);
+ bh = kmem_cache_alloc(bh_cachep, SLAB_NOHIGHIO);
X if (bh)
X return bh;
X /*
diff -u --recursive --new-file v2.4.9/linux/mm/memory.c linux/mm/memory.c
--- v2.4.9/linux/mm/memory.c Mon Aug 13 16:16:41 2001
+++ linux/mm/memory.c Sat Sep 22 20:36:50 2001
@@ -47,6 +47,7 @@

X
X #include <asm/pgalloc.h>
X #include <asm/uaccess.h>

+#include <asm/tlb.h>
X
X unsigned long max_mapnr;
X unsigned long num_physpages;
@@ -70,6 +71,27 @@
X mem_map_t * mem_map;
X
X /*
+ * Called by TLB shootdown
+ */
+void __free_pte(pte_t pte)
+{
+ struct page *page = pte_page(pte);
+ if ((!VALID_PAGE(page)) || PageReserved(page))
+ return;
+ /*
+ * free_page() used to be able to clear swap cache
+ * entries. We may now have to do it manually.
+ */
+ if (page->mapping) {
+ if (pte_dirty(pte))
+ set_page_dirty(page);
+ }
+
+ free_page_and_swap_cache(page);
+}
+
+
+/*
X * Note: this doesn't free the actual pages themselves. That
X * has been handled earlier when unmapping all the memory regions.
X */
@@ -103,8 +125,10 @@
X }
X pmd = pmd_offset(dir, 0);
X pgd_clear(dir);
- for (j = 0; j < PTRS_PER_PMD ; j++)
+ for (j = 0; j < PTRS_PER_PMD ; j++) {
+ prefetchw(pmd+j+(PREFETCH_STRIDE/16));
X free_one_pmd(pmd+j);
+ }
X pmd_free(pmd);
X }
X
@@ -128,11 +152,13 @@
X {
X pgd_t * page_dir = mm->pgd;
X
+ spin_lock(&mm->page_table_lock);
X page_dir += first;
X do {
X free_one_pgd(page_dir);
X page_dir++;
X } while (--nr);
+ spin_unlock(&mm->page_table_lock);
X
X /* keep the page table cache within bounds */
X check_pgt_cache();
@@ -148,6 +174,9 @@
X *
X * 08Jan98 Merged into one routine from several inline routines to reduce
X * variable count and make things faster. -jj
+ *
+ * dst->page_table_lock is held on entry and exit,
+ * but may be dropped within pmd_alloc() and pte_alloc().
X */
X int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
X struct vm_area_struct *vma)
@@ -159,8 +188,7 @@
X
X src_pgd = pgd_offset(src, address)-1;
X dst_pgd = pgd_offset(dst, address)-1;
-
- spin_lock(&dst->page_table_lock);
+
X for (;;) {
X pmd_t * src_pmd, * dst_pmd;
X
@@ -234,6 +262,7 @@
X pte = pte_mkclean(pte);
X pte = pte_mkold(pte);
X get_page(ptepage);
+ dst->rss++;
X
X cont_copy_pte_range: set_pte(dst_pte, pte);
X cont_copy_pte_range_noset: address += PAGE_SIZE;
@@ -251,48 +280,27 @@
X out_unlock:
X spin_unlock(&src->page_table_lock);
X out:
- spin_unlock(&dst->page_table_lock);
X return 0;
-
X nomem:
- spin_unlock(&dst->page_table_lock);
X return -ENOMEM;
X }
X
X /*
X * Return indicates whether a page was freed so caller can adjust rss
X */
-static inline int free_pte(pte_t pte)
-{
- if (pte_present(pte)) {
- struct page *page = pte_page(pte);
- if ((!VALID_PAGE(page)) || PageReserved(page))
- return 0;
- /*
- * free_page() used to be able to clear swap cache
- * entries. We may now have to do it manually.
- */
- if (pte_dirty(pte) && page->mapping)
- set_page_dirty(page);
- free_page_and_swap_cache(page);
- return 1;
- }
- swap_free(pte_to_swp_entry(pte));

- return 0;
-}
-

X static inline void forget_pte(pte_t page)
X {
X if (!pte_none(page)) {
X printk("forget_pte: old mapping existed!\n");
- free_pte(page);
+ BUG();
X }
X }
X
-static inline int zap_pte_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size)
+static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
X {
- pte_t * pte;
- int freed;
+ unsigned long offset;
+ pte_t * ptep;
+ int freed = 0;
X
X if (pmd_none(*pmd))
X return 0;
@@ -301,27 +309,29 @@
X pmd_clear(pmd);
X return 0;
X }
- pte = pte_offset(pmd, address);
- address &= ~PMD_MASK;
- if (address + size > PMD_SIZE)
- size = PMD_SIZE - address;
- size >>= PAGE_SHIFT;
- freed = 0;
- for (;;) {
- pte_t page;
- if (!size)
- break;
- page = ptep_get_and_clear(pte);
- pte++;
- size--;
- if (pte_none(page))
+ ptep = pte_offset(pmd, address);
+ offset = address & ~PMD_MASK;
+ if (offset + size > PMD_SIZE)
+ size = PMD_SIZE - offset;
+ size &= PAGE_MASK;
+ for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
+ pte_t pte = *ptep;
+ if (pte_none(pte))
X continue;
- freed += free_pte(page);
+ if (pte_present(pte)) {
+ freed ++;
+ /* This will eventually call __free_pte on the pte. */
+ tlb_remove_page(tlb, ptep, address + offset);
+ } else {
+ swap_free(pte_to_swp_entry(pte));
+ pte_clear(ptep);
+ }
X }
+
X return freed;
X }
X
-static inline int zap_pmd_range(struct mm_struct *mm, pgd_t * dir, unsigned long address, unsigned long size)
+static inline int zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size)
X {
X pmd_t * pmd;
X unsigned long end;
@@ -335,13 +345,12 @@
X return 0;
X }
X pmd = pmd_offset(dir, address);
- address &= ~PGDIR_MASK;
X end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
+ if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
+ end = ((address + PGDIR_SIZE) & PGDIR_MASK);
X freed = 0;
X do {
- freed += zap_pte_range(mm, pmd, address, end - address);
+ freed += zap_pte_range(tlb, pmd, address, end - address);
X address = (address + PMD_SIZE) & PMD_MASK;
X pmd++;
X } while (address < end);
@@ -353,8 +362,9 @@
X */
X void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
X {
+ mmu_gather_t *tlb;
X pgd_t * dir;
- unsigned long end = address + size;
+ unsigned long start = address, end = address + size;
X int freed = 0;
X
X dir = pgd_offset(mm, address);
@@ -369,11 +379,18 @@
X if (address >= end)
X BUG();
X spin_lock(&mm->page_table_lock);
+ flush_cache_range(mm, address, end);
+ tlb = tlb_gather_mmu(mm);
+
X do {
- freed += zap_pmd_range(mm, dir, address, end - address);
+ freed += zap_pmd_range(tlb, dir, address, end - address);
X address = (address + PGDIR_SIZE) & PGDIR_MASK;
X dir++;
X } while (address && (address < end));
+
+ /* this will flush any remaining tlb entries */
+ tlb_finish_mmu(tlb, start, end);
+
X /*
X * Update rss for the mm_struct (not necessarily current->mm)
X * Notice that rss is an unsigned long.
@@ -920,6 +937,8 @@
X break;
X /* Recheck swapcachedness once the page is locked */
X can_reuse = exclusive_swap_page(old_page);
+ if (can_reuse)
+ delete_from_swap_cache(old_page);
X UnlockPage(old_page);
X if (!can_reuse)
X break;
@@ -979,9 +998,7 @@
X
X /* mapping wholly truncated? */
X if (mpnt->vm_pgoff >= pgoff) {
- flush_cache_range(mm, start, end);
X zap_page_range(mm, start, len);
- flush_tlb_range(mm, start, end);
X continue;
X }
X
@@ -994,12 +1011,9 @@
X /* Ok, partially affected.. */
X start += diff << PAGE_SHIFT;
X len = (len - diff) << PAGE_SHIFT;
- flush_cache_range(mm, start, end);
X zap_page_range(mm, start, len);
- flush_tlb_range(mm, start, end);
X } while ((mpnt = mpnt->vm_next_share) != NULL);
X }
-
X
X /*
X * Handle all mappings that got truncated by a "truncate()"
@@ -1009,7 +1023,7 @@
X * between the file and the memory map for a potential last
X * incomplete page. Ugly, but necessary.
X */
-void vmtruncate(struct inode * inode, loff_t offset)
+int vmtruncate(struct inode * inode, loff_t offset)
X {
X unsigned long pgoff;

X struct address_space *mapping = inode->i_mapping;

@@ -1054,11 +1068,9 @@
X unlock_kernel();
X }
X out:

- return;
+ return 0;
X }

X
-
-
X /*
X * Primitive swap readahead code. We simply read an aligned block of
X * (1 << page_cluster) entries in the swap area. This method is chosen
@@ -1072,23 +1084,19 @@

X unsigned long offset;
X

X /*
- * Get the number of handles we should do readahead io to. Also,
- * grab temporary references on them, releasing them as io completes.
+ * Get the number of handles we should do readahead io to.
X */
X num = valid_swaphandles(entry, &offset);
X for (i = 0; i < num; offset++, i++) {
X /* Don't block on I/O for read-ahead */
- if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster
- * (1 << page_cluster)) {
- while (i++ < num)
- swap_free(SWP_ENTRY(SWP_TYPE(entry), offset++));
+ if (atomic_read(&nr_async_pages) >=
+ pager_daemon.swap_cluster << page_cluster)
X break;
- }
X /* Ok, do the async read-ahead now */
X new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset));
- if (new_page != NULL)
- page_cache_release(new_page);
- swap_free(SWP_ENTRY(SWP_TYPE(entry), offset));
+ if (!new_page)
+ break;
+ page_cache_release(new_page);
X }
X return;
X }
@@ -1103,6 +1111,7 @@
X struct page *page;
X swp_entry_t entry = pte_to_swp_entry(orig_pte);
X pte_t pte;
+ int ret = 1;
X
X spin_unlock(&mm->page_table_lock);
X page = lookup_swap_cache(entry);
@@ -1119,6 +1128,9 @@
X */
X return pte_same(*page_table, orig_pte) ? -1 : 1;
X }
+
+ /* Had to read the page from swap area: Major fault */
+ ret = 2;
X }
X
X /*
@@ -1144,13 +1156,12 @@
X pte = mk_pte(page, vma->vm_page_prot);
X
X swap_free(entry);
- if (exclusive_swap_page(page)) {
- if (write_access)
- pte = pte_mkwrite(pte_mkdirty(pte));
- if (vm_swap_full()) {
- delete_from_swap_cache_nolock(page);
- pte = pte_mkdirty(pte);
- }
+ mark_page_accessed(page);
+ if (exclusive_swap_page(page)) {
+ if (vma->vm_flags & VM_WRITE)
+ pte = pte_mkwrite(pte);
+ pte = pte_mkdirty(pte);
+ delete_from_swap_cache(page);
X }
X UnlockPage(page);
X
@@ -1160,7 +1171,7 @@
X
X /* No need to invalidate - it was non-present before */
X update_mmu_cache(vma, address, pte);
- return 1; /* Minor fault */
+ return ret;
X }
X
X /*
@@ -1377,7 +1388,7 @@
X * Because we dropped the lock, we should re-check the
X * entry, as somebody else could have populated it..
X */
- if (pgd_present(*pgd)) {
+ if (!pgd_none(*pgd)) {
X pmd_free(new);
X goto out;
X }
@@ -1395,7 +1406,7 @@
X */
X pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
X {
- if (!pmd_present(*pmd)) {
+ if (pmd_none(*pmd)) {
X pte_t *new;
X
X /* "fast" allocation can happen without dropping the lock.. */
@@ -1411,7 +1422,7 @@
X * Because we dropped the lock, we should re-check the
X * entry, as somebody else could have populated it..
X */
- if (pmd_present(*pmd)) {
+ if (!pmd_none(*pmd)) {
X pte_free(new);
X goto out;
X }
diff -u --recursive --new-file v2.4.9/linux/mm/mlock.c linux/mm/mlock.c
--- v2.4.9/linux/mm/mlock.c Mon Mar 19 12:35:08 2001
+++ linux/mm/mlock.c Mon Sep 17 15:30:23 2001
@@ -36,9 +36,9 @@

X get_file(n->vm_file);
X if (n->vm_ops && n->vm_ops->open)
X n->vm_ops->open(n);

+ vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT;
X lock_vma_mappings(vma);
X spin_lock(&vma->vm_mm->page_table_lock);
- vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT;

X vma->vm_start = end;

X __insert_vm_struct(current->mm, n);
X spin_unlock(&vma->vm_mm->page_table_lock);
@@ -100,13 +100,13 @@

X vma->vm_ops->open(left);
X vma->vm_ops->open(right);
X }

+ vma->vm_raend = 0;
+ vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT;
X lock_vma_mappings(vma);
X spin_lock(&vma->vm_mm->page_table_lock);
- vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT;
X vma->vm_start = start;
X vma->vm_end = end;
X vma->vm_flags = newflags;
- vma->vm_raend = 0;
X __insert_vm_struct(current->mm, left);
X __insert_vm_struct(current->mm, right);
X spin_unlock(&vma->vm_mm->page_table_lock);
diff -u --recursive --new-file v2.4.9/linux/mm/mmap.c linux/mm/mmap.c
--- v2.4.9/linux/mm/mmap.c Thu May 24 15:20:18 2001
+++ linux/mm/mmap.c Thu Sep 20 20:30:21 2001
@@ -13,10 +13,17 @@
X #include <linux/init.h>
X #include <linux/file.h>
X #include <linux/fs.h>
+#include <linux/personality.h>
X
X #include <asm/uaccess.h>
X #include <asm/pgalloc.h>
X
+/*
+ * WARNING: the debugging will use recursive algorithms so never enable this
+ * unless you know what you are doing.
+ */
+#undef DEBUG_MM_RB
+
X /* description of effects of mapping type and prot in current implementation.
X * this is due to the limited x86 page protection hardware. The expected
X * behavior is in parens:
@@ -204,14 +211,193 @@
X #undef _trans
X }
X
+#ifdef DEBUG_MM_RB
+static int browse_rb(rb_node_t * rb_node) {
+ int i = 0;
+ if (rb_node) {
+ i++;
+ i += browse_rb(rb_node->rb_left);
+ i += browse_rb(rb_node->rb_right);
+ }
+ return i;
+}
+
+static void validate_mm(struct mm_struct * mm) {
+ int bug = 0;
+ int i = 0;
+ struct vm_area_struct * tmp = mm->mmap;
+ while (tmp) {
+ tmp = tmp->vm_next;
+ i++;
+ }
+ if (i != mm->map_count)
+ printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
+ i = browse_rb(mm->mm_rb.rb_node);
+ if (i != mm->map_count)
+ printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
+ if (bug)
+ BUG();
+}
+#else
+#define validate_mm(mm) do { } while (0)
+#endif
+
+static struct vm_area_struct * find_vma_prepare(struct mm_struct * mm, unsigned long addr,
+ struct vm_area_struct ** pprev,
+ rb_node_t *** rb_link, rb_node_t ** rb_parent)
+{
+ struct vm_area_struct * vma;
+ rb_node_t ** __rb_link, * __rb_parent, * rb_prev;
+
+ __rb_link = &mm->mm_rb.rb_node;
+ rb_prev = __rb_parent = NULL;
+ vma = NULL;
+
+ while (*__rb_link) {
+ struct vm_area_struct *vma_tmp;
+
+ __rb_parent = *__rb_link;
+ vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
+
+ if (vma_tmp->vm_end > addr) {
+ vma = vma_tmp;
+ if (vma_tmp->vm_start <= addr)
+ return vma;
+ __rb_link = &__rb_parent->rb_left;
+ } else {
+ rb_prev = __rb_parent;
+ __rb_link = &__rb_parent->rb_right;
+ }
+ }
+
+ *pprev = NULL;
+ if (rb_prev)
+ *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
+ *rb_link = __rb_link;
+ *rb_parent = __rb_parent;
+ return vma;
+}
+
+static inline void __vma_link_list(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
+ rb_node_t * rb_parent)
+{
+ if (prev) {
+ vma->vm_next = prev->vm_next;
+ prev->vm_next = vma;
+ } else {
+ mm->mmap = vma;
+ if (rb_parent)
+ vma->vm_next = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
+ else
+ vma->vm_next = NULL;
+ }
+}
+
+static inline void __vma_link_rb(struct mm_struct * mm, struct vm_area_struct * vma,
+ rb_node_t ** rb_link, rb_node_t * rb_parent)
+{
+ rb_link_node(&vma->vm_rb, rb_parent, rb_link);
+ rb_insert_color(&vma->vm_rb, &mm->mm_rb);
+}
+
+static inline void __vma_link_file(struct vm_area_struct * vma)
+{
+ struct file * file;
+
+ file = vma->vm_file;
+ if (file) {
+ struct inode * inode = file->f_dentry->d_inode;

+ struct address_space *mapping = inode->i_mapping;

+ struct vm_area_struct **head;
+
+ if (vma->vm_flags & VM_DENYWRITE)
+ atomic_dec(&inode->i_writecount);
+
+ head = &mapping->i_mmap;
+ if (vma->vm_flags & VM_SHARED)
+ head = &mapping->i_mmap_shared;
+
+ /* insert vma into inode's share list */
+ if((vma->vm_next_share = *head) != NULL)
+ (*head)->vm_pprev_share = &vma->vm_next_share;
+ *head = vma;
+ vma->vm_pprev_share = head;
+ }
+}
+
+static void __vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
+ rb_node_t ** rb_link, rb_node_t * rb_parent)
+{
+ __vma_link_list(mm, vma, prev, rb_parent);
+ __vma_link_rb(mm, vma, rb_link, rb_parent);
+ __vma_link_file(vma);
+}
+
+static inline void vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
+ rb_node_t ** rb_link, rb_node_t * rb_parent)
+{

+ lock_vma_mappings(vma);
+ spin_lock(&mm->page_table_lock);

+ __vma_link(mm, vma, prev, rb_link, rb_parent);
+ spin_unlock(&mm->page_table_lock);
+ unlock_vma_mappings(vma);
+
+ mm->map_count++;
+ validate_mm(mm);
+}
+
+static int vma_merge(struct mm_struct * mm, struct vm_area_struct * prev,
+ rb_node_t * rb_parent, unsigned long addr, unsigned long end, unsigned long vm_flags)
+{
+ spinlock_t * lock = &mm->page_table_lock;
+ if (!prev) {
+ prev = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
+ goto merge_next;
+ }
+ if (prev->vm_end == addr && can_vma_merge(prev, vm_flags)) {
+ struct vm_area_struct * next;
+
+ spin_lock(lock);
+ prev->vm_end = end;
+ next = prev->vm_next;
+ if (next && prev->vm_end == next->vm_start && can_vma_merge(next, vm_flags)) {
+ prev->vm_end = next->vm_end;
+ __vma_unlink(mm, next, prev);
+ spin_unlock(lock);
+
+ mm->map_count--;
+ kmem_cache_free(vm_area_cachep, next);
+ return 1;
+ }
+ spin_unlock(lock);
+ return 1;
+ }
+
+ prev = prev->vm_next;
+ if (prev) {
+ merge_next:
+ if (!can_vma_merge(prev, vm_flags))
+ return 0;
+ if (end == prev->vm_start) {
+ spin_lock(lock);
+ prev->vm_start = addr;
+ spin_unlock(lock);
+ return 1;
+ }
+ }
+

+ return 0;
+}
+

X unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len,
X unsigned long prot, unsigned long flags, unsigned long pgoff)
X {
X struct mm_struct * mm = current->mm;
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * prev;
X unsigned int vm_flags;
X int correct_wcount = 0;
X int error;
+ rb_node_t ** rb_link, * rb_parent;
X
X if (file && (!file->f_op || !file->f_op->mmap))
X return -ENODEV;
@@ -219,7 +405,7 @@
X if ((len = PAGE_ALIGN(len)) == 0)
X return addr;
X
- if (len > TASK_SIZE || addr > TASK_SIZE-len)
+ if (len > TASK_SIZE)
X return -EINVAL;
X
X /* offset overflow? */
@@ -293,8 +479,13 @@
X
X /* Clear old maps */
X error = -ENOMEM;
- if (do_munmap(mm, addr, len))
- return -ENOMEM;
+munmap_back:
+ vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+ if (vma && vma->vm_start < addr + len) {
+ if (do_munmap(mm, addr, len))
+ return -ENOMEM;
+ goto munmap_back;
+ }
X
X /* Check against address space limit. */
X if ((mm->total_vm << PAGE_SHIFT) + len
@@ -308,14 +499,9 @@
X return -ENOMEM;
X
X /* Can we just expand an old anonymous mapping? */
- if (addr && !file && !(vm_flags & VM_SHARED)) {
- struct vm_area_struct * vma = find_vma(mm, addr-1);
- if (vma && vma->vm_end == addr && !vma->vm_file &&
- vma->vm_flags == vm_flags) {
- vma->vm_end = addr + len;
+ if (!file && !(vm_flags & VM_SHARED) && rb_parent)
+ if (vma_merge(mm, prev, rb_parent, addr, addr + len, vm_flags))
X goto out;
- }
- }
X
X /* Determine the object being mapped and call the appropriate
X * specific mapper. the address has already been validated, but
@@ -337,6 +523,9 @@
X vma->vm_raend = 0;
X
X if (file) {
+ error = -EINVAL;
+ if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+ goto free_vma;
X if (vm_flags & VM_DENYWRITE) {
X error = deny_write_access(file);
X if (error)
@@ -361,7 +550,7 @@
X */
X addr = vma->vm_start;
X
- insert_vm_struct(mm, vma);
+ vma_link(mm, vma, prev, rb_link, rb_parent);
X if (correct_wcount)
X atomic_inc(&file->f_dentry->d_inode->i_writecount);
X
@@ -378,10 +567,9 @@
X atomic_inc(&file->f_dentry->d_inode->i_writecount);
X vma->vm_file = NULL;
X fput(file);
+
X /* Undo any partial mapping done by a device driver. */
- flush_cache_range(mm, vma->vm_start, vma->vm_end);
X zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
- flush_tlb_range(mm, vma->vm_start, vma->vm_end);
X free_vma:
X kmem_cache_free(vm_area_cachep, vma);
X return error;
@@ -405,9 +593,15 @@
X
X if (len > TASK_SIZE)
X return -ENOMEM;
- if (!addr)
- addr = TASK_UNMAPPED_BASE;
- addr = PAGE_ALIGN(addr);
+
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(current->mm, addr);
+ if (TASK_SIZE - len >= addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+ addr = PAGE_ALIGN(TASK_UNMAPPED_BASE);
X
X for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
X /* At this point: (!vma || addr < vma->vm_end). */
@@ -425,6 +619,8 @@
X unsigned long get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
X {
X if (flags & MAP_FIXED) {
+ if (addr > TASK_SIZE - len)
+ return -EINVAL;
X if (addr & ~PAGE_MASK)
X return -EINVAL;
X return addr;
@@ -436,10 +632,6 @@
X return arch_get_unmapped_area(file, addr, len, pgoff, flags);
X }
X
-#define vm_avl_empty (struct vm_area_struct *) NULL
-
-#include "mmap_avl.c"
-
X /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
X struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
X {
@@ -450,26 +642,23 @@
X /* (Cache hit rate is typically around 35%.) */
X vma = mm->mmap_cache;
X if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
- if (!mm->mmap_avl) {
- /* Go through the linear list. */
- vma = mm->mmap;
- while (vma && vma->vm_end <= addr)
- vma = vma->vm_next;
- } else {
- /* Then go through the AVL tree quickly. */
- struct vm_area_struct * tree = mm->mmap_avl;
- vma = NULL;
- for (;;) {
- if (tree == vm_avl_empty)
+ rb_node_t * rb_node;
+
+ rb_node = mm->mm_rb.rb_node;
+ vma = NULL;
+
+ while (rb_node) {
+ struct vm_area_struct * vma_tmp;
+
+ vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+
+ if (vma_tmp->vm_end > addr) {
+ vma = vma_tmp;
+ if (vma_tmp->vm_start <= addr)
X break;
- if (tree->vm_end > addr) {
- vma = tree;
- if (tree->vm_start <= addr)
- break;
- tree = tree->vm_avl_left;
- } else
- tree = tree->vm_avl_right;
- }
+ rb_node = rb_node->rb_left;
+ } else
+ rb_node = rb_node->rb_right;
X }
X if (vma)
X mm->mmap_cache = vma;
@@ -483,47 +672,42 @@
X struct vm_area_struct **pprev)
X {
X if (mm) {
- if (!mm->mmap_avl) {
- /* Go through the linear list. */
- struct vm_area_struct * prev = NULL;
- struct vm_area_struct * vma = mm->mmap;
- while (vma && vma->vm_end <= addr) {
- prev = vma;
- vma = vma->vm_next;
- }
- *pprev = prev;
- return vma;
- } else {
- /* Go through the AVL tree quickly. */
- struct vm_area_struct * vma = NULL;
- struct vm_area_struct * last_turn_right = NULL;
- struct vm_area_struct * prev = NULL;
- struct vm_area_struct * tree = mm->mmap_avl;
- for (;;) {
- if (tree == vm_avl_empty)
+ /* Go through the RB tree quickly. */
+ struct vm_area_struct * vma;
+ rb_node_t * rb_node, * rb_last_right, * rb_prev;
+
+ rb_node = mm->mm_rb.rb_node;
+ rb_last_right = rb_prev = NULL;
+ vma = NULL;
+
+ while (rb_node) {
+ struct vm_area_struct * vma_tmp;
+
+ vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+
+ if (vma_tmp->vm_end > addr) {
+ vma = vma_tmp;
+ rb_prev = rb_last_right;
+ if (vma_tmp->vm_start <= addr)
X break;
- if (tree->vm_end > addr) {
- vma = tree;
- prev = last_turn_right;
- if (tree->vm_start <= addr)
- break;
- tree = tree->vm_avl_left;
- } else {
- last_turn_right = tree;
- tree = tree->vm_avl_right;
- }
+ rb_node = rb_node->rb_left;
+ } else {
+ rb_last_right = rb_node;
+ rb_node = rb_node->rb_right;
X }
- if (vma) {
- if (vma->vm_avl_left != vm_avl_empty) {
- prev = vma->vm_avl_left;
- while (prev->vm_avl_right != vm_avl_empty)
- prev = prev->vm_avl_right;
- }
- if ((prev ? prev->vm_next : mm->mmap) != vma)
- printk("find_vma_prev: tree inconsistent with list\n");
- *pprev = prev;
- return vma;
+ }
+ if (vma) {
+ if (vma->vm_rb.rb_left) {
+ rb_prev = vma->vm_rb.rb_left;
+ while (rb_prev->rb_right)
+ rb_prev = rb_prev->rb_right;
X }
+ *pprev = NULL;
+ if (rb_prev)
+ *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
+ if ((rb_prev ? (*pprev)->vm_next : mm->mmap) != vma)
+ BUG();
+ return vma;
X }
X }
X *pprev = NULL;
@@ -598,11 +782,16 @@
X
X /* Work out to one of the ends. */
X if (end == area->vm_end) {
+ /*
+ * here area isn't visible to the semaphore-less readers
+ * so we don't need to update it under the spinlock.
+ */
X area->vm_end = addr;
X lock_vma_mappings(area);
X spin_lock(&mm->page_table_lock);
X } else if (addr == area->vm_start) {
X area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
+ /* same locking considerations of the above case */
X area->vm_start = end;
X lock_vma_mappings(area);
X spin_lock(&mm->page_table_lock);
@@ -748,8 +937,7 @@
X *npp = mpnt->vm_next;
X mpnt->vm_next = free;
X free = mpnt;
- if (mm->mmap_avl)
- avl_remove(mpnt, &mm->mmap_avl);
+ rb_erase(&mpnt->vm_rb, &mm->mm_rb);
X }
X mm->mmap_cache = NULL; /* Kill the cache. */
X spin_unlock(&mm->page_table_lock);
@@ -779,9 +967,7 @@
X remove_shared_vm_struct(mpnt);
X mm->map_count--;
X
- flush_cache_range(mm, st, end);
X zap_page_range(mm, st, size);
- flush_tlb_range(mm, st, end);
X
X /*
X * Fix the mapping, and free the old area if it wasn't reused.
@@ -790,6 +976,7 @@
X if (file)
X atomic_inc(&file->f_dentry->d_inode->i_writecount);
X }
+ validate_mm(mm);
X
X /* Release the extra vma struct if it wasn't used */
X if (extra)
@@ -819,8 +1006,9 @@
X unsigned long do_brk(unsigned long addr, unsigned long len)
X {
X struct mm_struct * mm = current->mm;
- struct vm_area_struct * vma;
- unsigned long flags, retval;
+ struct vm_area_struct * vma, * prev;
+ unsigned long flags;
+ rb_node_t ** rb_link, * rb_parent;
X
X len = PAGE_ALIGN(len);
X if (!len)
@@ -839,9 +1027,13 @@
X /*
X * Clear old maps. this also does some error checking for us
X */
- retval = do_munmap(mm, addr, len);
- if (retval != 0)
- return retval;
+ munmap_back:
+ vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+ if (vma && vma->vm_start < addr + len) {
+ if (do_munmap(mm, addr, len))
+ return -ENOMEM;
+ goto munmap_back;
+ }
X
X /* Check against address space limits *after* clearing old maps... */
X if ((mm->total_vm << PAGE_SHIFT) + len
@@ -858,16 +1050,10 @@
X MAP_FIXED|MAP_PRIVATE) | mm->def_flags;
X
X flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
-
+
X /* Can we just expand an old anonymous mapping? */
- if (addr) {
- struct vm_area_struct * vma = find_vma(mm, addr-1);
- if (vma && vma->vm_end == addr && !vma->vm_file &&
- vma->vm_flags == flags) {
- vma->vm_end = addr + len;
- goto out;
- }
- }
+ if (rb_parent && vma_merge(mm, prev, rb_parent, addr, addr + len, flags))
+ goto out;
X
X /*
X * create a vma struct for an anonymous mapping
@@ -886,7 +1072,7 @@
X vma->vm_file = NULL;
X vma->vm_private_data = NULL;
X
- insert_vm_struct(mm, vma);
+ vma_link(mm, vma, prev, rb_link, rb_parent);
X
X out:
X mm->total_vm += len >> PAGE_SHIFT;
@@ -897,14 +1083,20 @@
X return addr;
X }
X
-/* Build the AVL tree corresponding to the VMA list. */
-void build_mmap_avl(struct mm_struct * mm)
+/* Build the RB tree corresponding to the VMA list. */
+void build_mmap_rb(struct mm_struct * mm)
X {
X struct vm_area_struct * vma;
+ rb_node_t ** rb_link, * rb_parent;
X

- mm->mmap_avl = NULL;

- for (vma = mm->mmap; vma; vma = vma->vm_next)
- avl_insert(vma, &mm->mmap_avl);
+ mm->mm_rb = RB_ROOT;
+ rb_link = &mm->mm_rb.rb_node;
+ rb_parent = NULL;
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ __vma_link_rb(mm, vma, rb_link, rb_parent);
+ rb_parent = &vma->vm_rb;
+ rb_link = &rb_parent->rb_right;
+ }
X }
X
X /* Release all mmaps. */
@@ -915,7 +1107,8 @@
X release_segments(mm);
X spin_lock(&mm->page_table_lock);
X mpnt = mm->mmap;
- mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;
+ mm->mmap = mm->mmap_cache = NULL;
+ mm->mm_rb = RB_ROOT;
X mm->rss = 0;
X spin_unlock(&mm->page_table_lock);
X mm->total_vm = 0;
@@ -944,7 +1137,7 @@
X
X /* This is just debugging */
X if (mm->map_count)
- printk("exit_mmap: map count is %d\n", mm->map_count);
+ BUG();
X
X clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
X }
@@ -953,55 +1146,27 @@
X * and into the inode's i_mmap ring. If vm_file is non-NULL
X * then the i_shared_lock must be held here.
X */
-void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp)
+void __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
X {
- struct vm_area_struct **pprev;
- struct file * file;
-
- if (!mm->mmap_avl) {
- pprev = &mm->mmap;
- while (*pprev && (*pprev)->vm_start <= vmp->vm_start)
- pprev = &(*pprev)->vm_next;
- } else {
- struct vm_area_struct *prev, *next;
- avl_insert_neighbours(vmp, &mm->mmap_avl, &prev, &next);
- pprev = (prev ? &prev->vm_next : &mm->mmap);
- if (*pprev != next)
- printk("insert_vm_struct: tree inconsistent with list\n");
- }
- vmp->vm_next = *pprev;
- *pprev = vmp;
+ struct vm_area_struct * __vma, * prev;
+ rb_node_t ** rb_link, * rb_parent;
X
+ __vma = find_vma_prepare(mm, vma->vm_start, &prev, &rb_link, &rb_parent);
+ if (__vma && __vma->vm_start < vma->vm_end)
+ BUG();
+ __vma_link(mm, vma, prev, rb_link, rb_parent);
X mm->map_count++;
- if (mm->map_count >= AVL_MIN_MAP_COUNT && !mm->mmap_avl)
- build_mmap_avl(mm);
-
- file = vmp->vm_file;
- if (file) {
- struct inode * inode = file->f_dentry->d_inode;
- struct address_space *mapping = inode->i_mapping;
- struct vm_area_struct **head;
-
- if (vmp->vm_flags & VM_DENYWRITE)
- atomic_dec(&inode->i_writecount);
-
- head = &mapping->i_mmap;
- if (vmp->vm_flags & VM_SHARED)
- head = &mapping->i_mmap_shared;
-
- /* insert vmp into inode's share list */
- if((vmp->vm_next_share = *head) != NULL)
- (*head)->vm_pprev_share = &vmp->vm_next_share;
- *head = vmp;
- vmp->vm_pprev_share = head;
- }
+ validate_mm(mm);
X }
X
-void insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp)
+void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
X {
- lock_vma_mappings(vmp);
- spin_lock(&current->mm->page_table_lock);
- __insert_vm_struct(mm, vmp);
- spin_unlock(&current->mm->page_table_lock);
- unlock_vma_mappings(vmp);
+ struct vm_area_struct * __vma, * prev;
+ rb_node_t ** rb_link, * rb_parent;
+
+ __vma = find_vma_prepare(mm, vma->vm_start, &prev, &rb_link, &rb_parent);
+ if (__vma && __vma->vm_start < vma->vm_end)
+ BUG();
+ vma_link(mm, vma, prev, rb_link, rb_parent);
+ validate_mm(mm);
X }
diff -u --recursive --new-file v2.4.9/linux/mm/mmap_avl.c linux/mm/mmap_avl.c
--- v2.4.9/linux/mm/mmap_avl.c Tue Mar 20 13:54:27 2001
+++ linux/mm/mmap_avl.c Wed Dec 31 16:00:00 1969
@@ -1,374 +0,0 @@
-/*
- * Searching a VMA in the linear list task->mm->mmap is horribly slow.
- * Use an AVL (Adelson-Velskii and Landis) tree to speed up this search
- * from O(n) to O(log n), where n is the number of VMAs of the task
- * n is typically around 6, but may reach 3000 in some cases: object-oriented
- * databases, persistent store, generational garbage collection (Java, Lisp),
- * ElectricFence.
- * Written by Bruno Haible <hai...@ma2s2.mathematik.uni-karlsruhe.de>.
- */
-
-/* We keep the list and tree sorted by address. */
-#define vm_avl_key vm_end
-#define vm_avl_key_t unsigned long /* typeof(vma->avl_key) */
-
-/*
- * task->mm->mmap_avl is the AVL tree corresponding to task->mm->mmap
- * or, more exactly, its root.
- * A vm_area_struct has the following fields:
- * vm_avl_left left son of a tree node
- * vm_avl_right right son of a tree node
- * vm_avl_height 1+max(heightof(left),heightof(right))
- * The empty tree is represented as NULL.
- */
-
-/* Since the trees are balanced, their height will never be large. */
-#define avl_maxheight 41 /* why this? a small exercise */
-#define heightof(tree) ((tree) == vm_avl_empty ? 0 : (tree)->vm_avl_height)
-/*
- * Consistency and balancing rules:
- * 1. tree->vm_avl_height == 1+max(heightof(tree->vm_avl_left),heightof(tree->vm_avl_right))
- * 2. abs( heightof(tree->vm_avl_left) - heightof(tree->vm_avl_right) ) <= 1
- * 3. foreach node in tree->vm_avl_left: node->vm_avl_key <= tree->vm_avl_key,
- * foreach node in tree->vm_avl_right: node->vm_avl_key >= tree->vm_avl_key.
- */
-
-#ifdef DEBUG_AVL
-
-/* Look up the nodes at the left and at the right of a given node. */
-static void avl_neighbours (struct vm_area_struct * node, struct vm_area_struct * tree, struct vm_area_struct ** to_the_left, struct vm_area_struct ** to_the_right)
-{
- vm_avl_key_t key = node->vm_avl_key;
-
- *to_the_left = *to_the_right = NULL;
- for (;;) {
- if (tree == vm_avl_empty) {
- printk("avl_neighbours: node not found in the tree\n");
- return;
- }
- if (key == tree->vm_avl_key)
- break;
- if (key < tree->vm_avl_key) {
- *to_the_right = tree;
- tree = tree->vm_avl_left;
- } else {
- *to_the_left = tree;
- tree = tree->vm_avl_right;
- }
- }
- if (tree != node) {
- printk("avl_neighbours: node not exactly found in the tree\n");
- return;
- }
- if (tree->vm_avl_left != vm_avl_empty) {
- struct vm_area_struct * node;
- for (node = tree->vm_avl_left; node->vm_avl_right != vm_avl_empty; node = node->vm_avl_right)
- continue;
- *to_the_left = node;
- }
- if (tree->vm_avl_right != vm_avl_empty) {
- struct vm_area_struct * node;
- for (node = tree->vm_avl_right; node->vm_avl_left != vm_avl_empty; node = node->vm_avl_left)
- continue;
- *to_the_right = node;
- }
- if ((*to_the_left && ((*to_the_left)->vm_next != node)) || (node->vm_next != *to_the_right))
- printk("avl_neighbours: tree inconsistent with list\n");
-}
-
-#endif
-
-/*
- * Rebalance a tree.
- * After inserting or deleting a node of a tree we have a sequence of subtrees
- * nodes[0]..nodes[k-1] such that
- * nodes[0] is the root and nodes[i+1] = nodes[i]->{vm_avl_left|vm_avl_right}.
- */
-static void avl_rebalance (struct vm_area_struct *** nodeplaces_ptr, int count)
-{
- for ( ; count > 0 ; count--) {
- struct vm_area_struct ** nodeplace = *--nodeplaces_ptr;
- struct vm_area_struct * node = *nodeplace;
- struct vm_area_struct * nodeleft = node->vm_avl_left;
- struct vm_area_struct * noderight = node->vm_avl_right;
- int heightleft = heightof(nodeleft);
- int heightright = heightof(noderight);
- if (heightright + 1 < heightleft) {
- /* */
- /* * */
- /* / \ */
- /* n+2 n */
- /* */
- struct vm_area_struct * nodeleftleft = nodeleft->vm_avl_left;
- struct vm_area_struct * nodeleftright = nodeleft->vm_avl_right;
- int heightleftright = heightof(nodeleftright);
- if (heightof(nodeleftleft) >= heightleftright) {
- /* */
- /* * n+2|n+3 */
- /* / \ / \ */
- /* n+2 n --> / n+1|n+2 */
- /* / \ | / \ */
- /* n+1 n|n+1 n+1 n|n+1 n */
- /* */
- node->vm_avl_left = nodeleftright; nodeleft->vm_avl_right = node;
- nodeleft->vm_avl_height = 1 + (node->vm_avl_height = 1 + heightleftright);
- *nodeplace = nodeleft;
- } else {
- /* */
- /* * n+2 */
- /* / \ / \ */
- /* n+2 n --> n+1 n+1 */
- /* / \ / \ / \ */
- /* n n+1 n L R n */
- /* / \ */
- /* L R */
- /* */
- nodeleft->vm_avl_right = nodeleftright->vm_avl_left;
- node->vm_avl_left = nodeleftright->vm_avl_right;
- nodeleftright->vm_avl_left = nodeleft;
- nodeleftright->vm_avl_right = node;
- nodeleft->vm_avl_height = node->vm_avl_height = heightleftright;
- nodeleftright->vm_avl_height = heightleft;
- *nodeplace = nodeleftright;
- }
- }
- else if (heightleft + 1 < heightright) {
- /* similar to the above, just interchange 'left' <--> 'right' */
- struct vm_area_struct * noderightright = noderight->vm_avl_right;
- struct vm_area_struct * noderightleft = noderight->vm_avl_left;
- int heightrightleft = heightof(noderightleft);
- if (heightof(noderightright) >= heightrightleft) {
- node->vm_avl_right = noderightleft; noderight->vm_avl_left = node;
- noderight->vm_avl_height = 1 + (node->vm_avl_height = 1 + heightrightleft);
- *nodeplace = noderight;
- } else {
- noderight->vm_avl_left = noderightleft->vm_avl_right;
- node->vm_avl_right = noderightleft->vm_avl_left;
- noderightleft->vm_avl_right = noderight;
- noderightleft->vm_avl_left = node;
- noderight->vm_avl_height = node->vm_avl_height = heightrightleft;
- noderightleft->vm_avl_height = heightright;
- *nodeplace = noderightleft;
- }
- }
- else {
- int height = (heightleft<heightright ? heightright : heightleft) + 1;
- if (height == node->vm_avl_height)
- break;
- node->vm_avl_height = height;
- }
- }
-}
-
-/* Insert a node into a tree. */
-static inline void avl_insert (struct vm_area_struct * new_node, struct vm_area_struct ** ptree)
-{
- vm_avl_key_t key = new_node->vm_avl_key;
- struct vm_area_struct ** nodeplace = ptree;
- struct vm_area_struct ** stack[avl_maxheight];
- int stack_count = 0;
- struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
- for (;;) {
- struct vm_area_struct * node = *nodeplace;
- if (node == vm_avl_empty)
- break;
- *stack_ptr++ = nodeplace; stack_count++;
- if (key < node->vm_avl_key)
- nodeplace = &node->vm_avl_left;
- else
- nodeplace = &node->vm_avl_right;
- }
- new_node->vm_avl_left = vm_avl_empty;
- new_node->vm_avl_right = vm_avl_empty;
- new_node->vm_avl_height = 1;
- *nodeplace = new_node;
- avl_rebalance(stack_ptr,stack_count);
-}
-
-/* Insert a node into a tree, and
- * return the node to the left of it and the node to the right of it.
- */
-static inline void avl_insert_neighbours (struct vm_area_struct * new_node, struct vm_area_struct ** ptree,
- struct vm_area_struct ** to_the_left, struct vm_area_struct ** to_the_right)
-{
- vm_avl_key_t key = new_node->vm_avl_key;
- struct vm_area_struct ** nodeplace = ptree;
- struct vm_area_struct ** stack[avl_maxheight];
- int stack_count = 0;
- struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
- *to_the_left = *to_the_right = NULL;
- for (;;) {
- struct vm_area_struct * node = *nodeplace;
- if (node == vm_avl_empty)
- break;
- *stack_ptr++ = nodeplace; stack_count++;
- if (key < node->vm_avl_key) {
- *to_the_right = node;
- nodeplace = &node->vm_avl_left;
- } else {
- *to_the_left = node;
- nodeplace = &node->vm_avl_right;
- }
- }
- new_node->vm_avl_left = vm_avl_empty;
- new_node->vm_avl_right = vm_avl_empty;
- new_node->vm_avl_height = 1;
- *nodeplace = new_node;
- avl_rebalance(stack_ptr,stack_count);
-}
-
-/* Removes a node out of a tree. */
-static void avl_remove (struct vm_area_struct * node_to_delete, struct vm_area_struct ** ptree)
-{
- vm_avl_key_t key = node_to_delete->vm_avl_key;
- struct vm_area_struct ** nodeplace = ptree;
- struct vm_area_struct ** stack[avl_maxheight];
- int stack_count = 0;
- struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
- struct vm_area_struct ** nodeplace_to_delete;
- for (;;) {
- struct vm_area_struct * node = *nodeplace;
-#ifdef DEBUG_AVL
- if (node == vm_avl_empty) {
- /* what? node_to_delete not found in tree? */
- printk("avl_remove: node to delete not found in tree\n");
- return;
- }
-#endif
- *stack_ptr++ = nodeplace; stack_count++;
- if (key == node->vm_avl_key)
- break;
- if (key < node->vm_avl_key)
- nodeplace = &node->vm_avl_left;
- else
- nodeplace = &node->vm_avl_right;
- }
- nodeplace_to_delete = nodeplace;
- /* Have to remove node_to_delete = *nodeplace_to_delete. */
- if (node_to_delete->vm_avl_left == vm_avl_empty) {
- *nodeplace_to_delete = node_to_delete->vm_avl_right;
- stack_ptr--; stack_count--;
- } else {
- struct vm_area_struct *** stack_ptr_to_delete = stack_ptr;
- struct vm_area_struct ** nodeplace = &node_to_delete->vm_avl_left;
- struct vm_area_struct * node;
- for (;;) {
- node = *nodeplace;
- if (node->vm_avl_right == vm_avl_empty)
- break;
- *stack_ptr++ = nodeplace; stack_count++;
- nodeplace = &node->vm_avl_right;
- }
- *nodeplace = node->vm_avl_left;
- /* node replaces node_to_delete */
- node->vm_avl_left = node_to_delete->vm_avl_left;
- node->vm_avl_right = node_to_delete->vm_avl_right;
- node->vm_avl_height = node_to_delete->vm_avl_height;
- *nodeplace_to_delete = node; /* replace node_to_delete */
- *stack_ptr_to_delete = &node->vm_avl_left; /* replace &node_to_delete->vm_avl_left */
- }
- avl_rebalance(stack_ptr,stack_count);
-}
-
-#ifdef DEBUG_AVL
-
-/* print a list */
-static void printk_list (struct vm_area_struct * vma)
-{
- printk("[");
- while (vma) {
- printk("%08lX-%08lX", vma->vm_start, vma->vm_end);
- vma = vma->vm_next;
- if (!vma)
- break;
- printk(" ");
- }
- printk("]");
-}
-
-/* print a tree */
-static void printk_avl (struct vm_area_struct * tree)
-{
- if (tree != vm_avl_empty) {
- printk("(");
- if (tree->vm_avl_left != vm_avl_empty) {
- printk_avl(tree->vm_avl_left);
- printk("<");
- }
- printk("%08lX-%08lX", tree->vm_start, tree->vm_end);
- if (tree->vm_avl_right != vm_avl_empty) {
- printk(">");
- printk_avl(tree->vm_avl_right);
- }
- printk(")");
- }
-}
-
-static char *avl_check_point = "somewhere";
-
-/* check a tree's consistency and balancing */
-static void avl_checkheights (struct vm_area_struct * tree)
-{
- int h, hl, hr;
-
- if (tree == vm_avl_empty)
- return;
- avl_checkheights(tree->vm_avl_left);
- avl_checkheights(tree->vm_avl_right);
- h = tree->vm_avl_height;
- hl = heightof(tree->vm_avl_left);
- hr = heightof(tree->vm_avl_right);
- if ((h == hl+1) && (hr <= hl) && (hl <= hr+1))
- return;
- if ((h == hr+1) && (hl <= hr) && (hr <= hl+1))
- return;
- printk("%s: avl_checkheights: heights inconsistent\n",avl_check_point);
-}
-
-/* check that all values stored in a tree are < key */
-static void avl_checkleft (struct vm_area_struct * tree, vm_avl_key_t key)
-{
- if (tree == vm_avl_empty)
- return;
- avl_checkleft(tree->vm_avl_left,key);
- avl_checkleft(tree->vm_avl_right,key);
- if (tree->vm_avl_key < key)
- return;
- printk("%s: avl_checkleft: left key %lu >= top key %lu\n",avl_check_point,tree->vm_avl_key,key);
-}
-
-/* check that all values stored in a tree are > key */
-static void avl_checkright (struct vm_area_struct * tree, vm_avl_key_t key)
-{
- if (tree == vm_avl_empty)
- return;
- avl_checkright(tree->vm_avl_left,key);
- avl_checkright(tree->vm_avl_right,key);
- if (tree->vm_avl_key > key)
- return;
- printk("%s: avl_checkright: right key %lu <= top key %lu\n",avl_check_point,tree->vm_avl_key,key);
-}
-
-/* check that all values are properly increasing */
-static void avl_checkorder (struct vm_area_struct * tree)
-{
- if (tree == vm_avl_empty)
- return;
- avl_checkorder(tree->vm_avl_left);
- avl_checkorder(tree->vm_avl_right);
- avl_checkleft(tree->vm_avl_left,tree->vm_avl_key);
- avl_checkright(tree->vm_avl_right,tree->vm_avl_key);
-}
-
-/* all checks */
-static void avl_check (struct task_struct * task, char *caller)
-{
- avl_check_point = caller;
-/* printk("task \"%s\", %s\n",task->comm,caller); */
-/* printk("task \"%s\" list: ",task->comm); printk_list(task->mm->mmap); printk("\n"); */
-/* printk("task \"%s\" tree: ",task->comm); printk_avl(task->mm->mmap_avl); printk("\n"); */
- avl_checkheights(task->mm->mmap_avl);
- avl_checkorder(task->mm->mmap_avl);
-}
-
-#endif
diff -u --recursive --new-file v2.4.9/linux/mm/mprotect.c linux/mm/mprotect.c
--- v2.4.9/linux/mm/mprotect.c Mon Mar 19 12:35:08 2001
+++ linux/mm/mprotect.c Mon Sep 17 15:30:23 2001
@@ -91,22 +91,52 @@
X return;
X }
X
-static inline int mprotect_fixup_all(struct vm_area_struct * vma,
+static inline int mprotect_fixup_all(struct vm_area_struct * vma, struct vm_area_struct ** pprev,
X int newflags, pgprot_t prot)
X {
- spin_lock(&vma->vm_mm->page_table_lock);
+ struct vm_area_struct * prev = *pprev;

+ struct mm_struct * mm = vma->vm_mm;

+
+ if (prev && prev->vm_end == vma->vm_start && can_vma_merge(prev, newflags) &&
+ !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+ spin_lock(&mm->page_table_lock);
+ prev->vm_end = vma->vm_end;
+ __vma_unlink(mm, vma, prev);
+ spin_unlock(&mm->page_table_lock);
+
+ kmem_cache_free(vm_area_cachep, vma);
+ mm->map_count--;
+

+ return 0;
+ }
+

+ spin_lock(&mm->page_table_lock);
X vma->vm_flags = newflags;
X vma->vm_page_prot = prot;
- spin_unlock(&vma->vm_mm->page_table_lock);
+ spin_unlock(&mm->page_table_lock);
+
+ *pprev = vma;
+

X return 0;
X }
X

-static inline int mprotect_fixup_start(struct vm_area_struct * vma,
+static inline int mprotect_fixup_start(struct vm_area_struct * vma, struct vm_area_struct ** pprev,
X unsigned long end,
X int newflags, pgprot_t prot)
X {
- struct vm_area_struct * n;
+ struct vm_area_struct * n, * prev = *pprev;
+
+ *pprev = vma;
+
+ if (prev && prev->vm_end == vma->vm_start && can_vma_merge(prev, newflags) &&
+ !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+ spin_lock(&vma->vm_mm->page_table_lock);
+ prev->vm_end = end;
+ vma->vm_start = end;
+ spin_unlock(&vma->vm_mm->page_table_lock);
X
+ return 0;
+ }

X n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
X if (!n)

X return -ENOMEM;
@@ -119,17 +149,18 @@

X get_file(n->vm_file);
X if (n->vm_ops && n->vm_ops->open)
X n->vm_ops->open(n);

+ vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT;
X lock_vma_mappings(vma);
X spin_lock(&vma->vm_mm->page_table_lock);
- vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT;

X vma->vm_start = end;

X __insert_vm_struct(current->mm, n);
X spin_unlock(&vma->vm_mm->page_table_lock);
X unlock_vma_mappings(vma);
+

X return 0;
X }
X

-static inline int mprotect_fixup_end(struct vm_area_struct * vma,
+static inline int mprotect_fixup_end(struct vm_area_struct * vma, struct vm_area_struct ** pprev,
X unsigned long start,
X int newflags, pgprot_t prot)
X {
@@ -154,10 +185,13 @@
X __insert_vm_struct(current->mm, n);
X spin_unlock(&vma->vm_mm->page_table_lock);
X unlock_vma_mappings(vma);
+
+ *pprev = n;
+

X return 0;
X }
X

-static inline int mprotect_fixup_middle(struct vm_area_struct * vma,
+static inline int mprotect_fixup_middle(struct vm_area_struct * vma, struct vm_area_struct ** pprev,

X unsigned long start, unsigned long end,

X int newflags, pgprot_t prot)
X {
@@ -184,39 +218,44 @@

X vma->vm_ops->open(left);
X vma->vm_ops->open(right);
X }

+ vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT;
+ vma->vm_raend = 0;
+ vma->vm_page_prot = prot;
X lock_vma_mappings(vma);
X spin_lock(&vma->vm_mm->page_table_lock);
- vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT;
X vma->vm_start = start;
X vma->vm_end = end;
X vma->vm_flags = newflags;
- vma->vm_raend = 0;
- vma->vm_page_prot = prot;
X __insert_vm_struct(current->mm, left);
X __insert_vm_struct(current->mm, right);
X spin_unlock(&vma->vm_mm->page_table_lock);
X unlock_vma_mappings(vma);
+
+ *pprev = right;
+

X return 0;
X }
X

-static int mprotect_fixup(struct vm_area_struct * vma,
+static int mprotect_fixup(struct vm_area_struct * vma, struct vm_area_struct ** pprev,
X unsigned long start, unsigned long end, unsigned int newflags)
X {
X pgprot_t newprot;
X int error;
X
- if (newflags == vma->vm_flags)
+ if (newflags == vma->vm_flags) {
+ *pprev = vma;
X return 0;
+ }
X newprot = protection_map[newflags & 0xf];
X if (start == vma->vm_start) {
X if (end == vma->vm_end)
- error = mprotect_fixup_all(vma, newflags, newprot);
+ error = mprotect_fixup_all(vma, pprev, newflags, newprot);
X else
- error = mprotect_fixup_start(vma, end, newflags, newprot);
+ error = mprotect_fixup_start(vma, pprev, end, newflags, newprot);
X } else if (end == vma->vm_end)
- error = mprotect_fixup_end(vma, start, newflags, newprot);
+ error = mprotect_fixup_end(vma, pprev, start, newflags, newprot);
X else
- error = mprotect_fixup_middle(vma, start, end, newflags, newprot);
+ error = mprotect_fixup_middle(vma, pprev, start, end, newflags, newprot);
X
X if (error)
X return error;
@@ -228,7 +267,7 @@
X asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot)
X {
X unsigned long nstart, end, tmp;
- struct vm_area_struct * vma, * next;
+ struct vm_area_struct * vma, * next, * prev;
X int error = -EINVAL;
X
X if (start & ~PAGE_MASK)
@@ -242,41 +281,55 @@
X if (end == start)
X return 0;
X
- /* XXX: maybe this could be down_read ??? - Rik */
X down_write(&current->mm->mmap_sem);
X
- vma = find_vma(current->mm, start);
+ vma = find_vma_prev(current->mm, start, &prev);
X error = -EFAULT;
X if (!vma || vma->vm_start > start)
X goto out;
X
X for (nstart = start ; ; ) {
X unsigned int newflags;
+ int last = 0;
X
X /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
X
X newflags = prot | (vma->vm_flags & ~(PROT_READ | PROT_WRITE | PROT_EXEC));
X if ((newflags & ~(newflags >> 4)) & 0xf) {
X error = -EACCES;
- break;
+ goto out;
X }
X
- if (vma->vm_end >= end) {
- error = mprotect_fixup(vma, nstart, end, newflags);
- break;
+ if (vma->vm_end > end) {
+ error = mprotect_fixup(vma, &prev, nstart, end, newflags);
+ goto out;
X }
+ if (vma->vm_end == end)
+ last = 1;
X
X tmp = vma->vm_end;
X next = vma->vm_next;
- error = mprotect_fixup(vma, nstart, tmp, newflags);
+ error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
X if (error)
+ goto out;
+ if (last)
X break;
X nstart = tmp;
X vma = next;
X if (!vma || vma->vm_start != nstart) {
X error = -EFAULT;
- break;
+ goto out;
X }
+ }
+ if (next && prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags) &&
+ !prev->vm_file && !(prev->vm_flags & VM_SHARED)) {
+ spin_lock(&prev->vm_mm->page_table_lock);
+ prev->vm_end = next->vm_end;
+ __vma_unlink(prev->vm_mm, next, prev);
+ spin_unlock(&prev->vm_mm->page_table_lock);
+
+ kmem_cache_free(vm_area_cachep, next);
+ prev->vm_mm->map_count--;

SHAR_EOF
true || echo 'restore of patch-2.4.10 failed'
fi

echo 'End of part 191'
echo 'File patch-2.4.10 is continued in part 192'
echo "192" > _shar_seq_.tmp
exit 0

0 new messages