Linux Kernel Patch v2.2, patch-2.2.1 (0/2)

Skip to first unread message

Jan 29, 1999, 3:00:00 AM1/29/99
Archive-name: v2.2/patch-2.2.1/part0

lines added deleted
linux/CREDITS : 14 2 2
linux/Documentation/proc.txt : 1353 1353 0
linux/Makefile : 7 1 1
linux/arch/alpha/lib/semaphore.S : 8 1 1
linux/drivers/sound/gus_wave.c : 29 5 4
linux/drivers/sound/sb_card.c : 15 1 1
linux/fs/nfsd/vfs.c : 69 31 9
linux/fs/proc/array.c : 402 64 114
linux/include/asm-i386/page.h : 13 3 4
linux/mm/mmap.c : 16 2 1
Thomas Koenig,, ig...@dkauni2.bitnet.
The joy of engineering is to find a straight line on a double
logarithmic diagram.

Jan 29, 1999, 3:00:00 AM1/29/99
Archive-name: v2.2/patch-2.2.1/part2

# this is part 2 of a 2 - part archive
# do not concatenate these parts, unpack them in order with /bin/sh
# file patch-2.2.1 continued
if test ! -r _shar_seq_.tmp; then
echo 'Please unpack part 1 first!'
exit 1
(read Scheck
if test "$Scheck" != 2; then
echo Please unpack part "$Scheck" next!
exit 1
exit 0
) < _shar_seq_.tmp || exit 1
if test ! -f _shar_wnt_.tmp; then
echo 'x - still skipping patch-2.2.1'
echo 'x - continuing with patch-2.2.1'
sed 's/^X//' << 'SHAR_EOF' >> 'patch-2.2.1' &&
+ ((dir) == (dentry)->d_parent->d_inode && !list_empty(&dentry->d_hash))
X * This follows the model of double_lock() in the VFS.
X */
X static inline void nfsd_double_down(struct semaphore *s1, struct semaphore *s2)
@@ -1048,6 +1062,10 @@
X if (IS_ERR(odentry))
X goto out_nfserr;
+ err = -ENOENT;
+ if (!odentry->d_inode)
+ goto out_dput_old;
X ndentry = lookup_dentry(tname, dget(tdentry), 0);
X err = PTR_ERR(ndentry);
X if (IS_ERR(ndentry))
@@ -1057,13 +1075,18 @@
X * Lock the parent directories.
X */
X nfsd_double_down(&tdir->i_sem, &fdir->i_sem);
- /* N.B. check for parent changes after locking?? */
- err = vfs_rename(fdir, odentry, tdir, ndentry);
- if (!err && EX_ISSYNC(tfhp->fh_export)) {
- write_inode_now(fdir);
- write_inode_now(tdir);
- }
+ err = -ENOENT;
+ /* GAM3 check for parent changes after locking. */
+ if (check_parent(fdir, odentry) &&
+ check_parent(tdir, ndentry)) {
+ err = vfs_rename(fdir, odentry, tdir, ndentry);
+ if (!err && EX_ISSYNC(tfhp->fh_export)) {
+ write_inode_now(fdir);
+ write_inode_now(tdir);
+ }
+ } else
+ dprintk("nfsd: Caught race in nfsd_rename");
@@ -1137,10 +1160,9 @@
X if (!fhp->fh_pre_mtime)
X fhp->fh_pre_mtime = dirp->i_mtime;
X fhp->fh_locked = 1;
- /* CHECKME: Should we do something with the child? */
X err = -ENOENT;
- if (rdentry->d_parent->d_inode == dirp)
+ if (check_parent(dirp, rdentry))
X err = vfs_rmdir(dirp, rdentry);
X rdentry->d_count--;
diff -u --recursive --new-file v2.2.0/linux/fs/proc/array.c linux/fs/proc/array.c
--- v2.2.0/linux/fs/proc/array.c Mon Jan 25 17:44:34 1999
+++ linux/fs/proc/array.c Thu Jan 28 10:08:53 1999
@@ -24,7 +24,7 @@
X * <Jeff_T...@Mitel.COM>
X *
X * Bruno Haible : remove 4K limit for the maps file
- * <>
+ * <>
X *
X * Yves Arrouye : remove removal of trailing spaces in get_array.
X * <>
@@ -42,8 +42,6 @@
X * Alan Cox : security fixes.
X * <>
X *
- * Andi Kleen : Race Fixes.
- *
X */
X #include <linux/types.h>
@@ -388,46 +386,21 @@
X return sprintf(buffer, "%s\n", saved_command_line);
X }
- * Caller must release_mm the mm_struct later.
- * You don't get any access to init_mm.
- */
-static struct mm_struct *get_mm_and_lock(int pid)
- struct mm_struct *mm = NULL;
- struct task_struct *tsk;
- read_lock(&tasklist_lock);
- tsk = find_task_by_pid(pid);
- if (tsk && tsk->mm && tsk->mm != &init_mm)
- mmget(mm = tsk->mm);
- read_unlock(&tasklist_lock);
- if (mm != NULL)
- down(&mm->mmap_sem);
- return mm;
-static void release_mm(struct mm_struct *mm)
- up(&mm->mmap_sem);
- mmput(mm);
-static unsigned long get_phys_addr(struct mm_struct *mm, unsigned long ptr)
+static unsigned long get_phys_addr(struct task_struct * p, unsigned long ptr)
X {
X pgd_t *page_dir;
X pmd_t *page_middle;
X pte_t pte;
- if (ptr >= TASK_SIZE)
+ if (!p || !p->mm || ptr >= TASK_SIZE)
X return 0;
X /* Check for NULL pgd .. shouldn't happen! */
- if (!mm->pgd) {
- printk(KERN_DEBUG "missing pgd for mm %p\n", mm);
+ if (!p->mm->pgd) {
+ printk("get_phys_addr: pid %d has NULL pgd!\n", p->pid);
X return 0;
X }
- page_dir = pgd_offset(mm,ptr);
+ page_dir = pgd_offset(p->mm,ptr);
X if (pgd_none(*page_dir))
X return 0;
X if (pgd_bad(*page_dir)) {
@@ -449,7 +422,7 @@
X return pte_page(pte) + (ptr & ~PAGE_MASK);
X }
-static int get_array(struct mm_struct *mm, unsigned long start, unsigned long end, char * buffer)
+static int get_array(struct task_struct *p, unsigned long start, unsigned long end, char * buffer)
X {
X unsigned long addr;
X int size = 0, result = 0;
@@ -458,7 +431,7 @@
X if (start >= end)
X return result;
X for (;;) {
- addr = get_phys_addr(mm, start);
+ addr = get_phys_addr(p, start);
X if (!addr)
X return result;
X do {
@@ -480,28 +453,27 @@
X static int get_env(int pid, char * buffer)
X {
- struct mm_struct *mm;
- int res = 0;
+ struct task_struct *p;
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
- mm = get_mm_and_lock(pid);
- if (mm) {
- res = get_array(mm, mm->env_start, mm->env_end, buffer);
- release_mm(mm);
- }
- return res;
+ if (!p || !p->mm)
+ return 0;
+ return get_array(p, p->mm->env_start, p->mm->env_end, buffer);
X }
X static int get_arg(int pid, char * buffer)
X {
- struct mm_struct *mm;
- int res = 0;
+ struct task_struct *p;
- mm = get_mm_and_lock(pid);
- if (mm) {
- res = get_array(mm, mm->arg_start, mm->arg_end, buffer);
- release_mm(mm);
- }
- return res;
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
+ if (!p || !p->mm)
+ return 0;
+ return get_array(p, p->mm->arg_start, p->mm->arg_end, buffer);
X }
X /*
@@ -606,7 +578,7 @@
X #ifdef __sparc_v9__
X bias = STACK_BIAS;
X #endif
- fp = p->tss.ksp + bias;
+ fp = p->tss.ksp + bias;
X do {
X /* Bogus frame pointer? */
X if (fp < (task_base + sizeof(struct task_struct)) ||
@@ -642,7 +614,7 @@
X #define KSTK_EIP(tsk) \
X ({ \
X unsigned long eip = 0; \
- if ((tsk)->tss.esp0 > PAGE_SIZE && \
+ if ((tsk)->tss.esp0 > PAGE_SIZE && \
X MAP_NR((tsk)->tss.esp0) < max_mapnr) \
X eip = ((struct pt_regs *) (tsk)->tss.esp0)->pc; \
X eip; })
@@ -754,14 +726,11 @@
X {
X struct mm_struct * mm = p->mm;
- if (!mm)
- return buffer;
- if (mm != &init_mm) {
- struct vm_area_struct * vma;
+ if (mm && mm != &init_mm) {
+ struct vm_area_struct * vma = mm->mmap;
X unsigned long data = 0, stack = 0;
X unsigned long exec = 0, lib = 0;
- down(&mm->mmap_sem);
X for (vma = mm->mmap; vma; vma = vma->vm_next) {
X unsigned long len = (vma->vm_end - vma->vm_start) >> 10;
X if (!vma->vm_file) {
@@ -779,7 +748,6 @@
X lib += len;
X }
X }
- up(&mm->mmap_sem);
X buffer += sprintf(buffer,
X "VmSize:\t%8lu kB\n"
X "VmLck:\t%8lu kB\n"
@@ -849,31 +817,15 @@
X cap_t(p->cap_effective));
X }
-static struct task_struct *grab_task(int pid)
- struct task_struct *tsk = current;
- if (pid != tsk->pid) {
- read_lock(&tasklist_lock);
- tsk = find_task_by_pid(pid);
- if (tsk && tsk->mm && tsk->mm != &init_mm)
- mmget(tsk->mm);
- read_unlock(&tasklist_lock);
- }
- return tsk;
-static void release_task(struct task_struct *tsk)
- if (tsk != current && tsk->mm && tsk->mm != &init_mm)
- mmput(tsk->mm);
X static int get_status(int pid, char * buffer)
X {
X char * orig = buffer;
X struct task_struct *tsk;
- tsk = grab_task(pid);
+ read_lock(&tasklist_lock);
+ tsk = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
X if (!tsk)
X return 0;
X buffer = task_name(tsk, buffer);
@@ -881,7 +833,6 @@
X buffer = task_mem(tsk, buffer);
X buffer = task_sig(tsk, buffer);
X buffer = task_cap(tsk, buffer);
- release_task(tsk);
X return buffer - orig;
X }
@@ -893,22 +844,20 @@
X int tty_pgrp;
X sigset_t sigign, sigcatch;
X char state;
- int res;
- tsk = grab_task(pid);
+ read_lock(&tasklist_lock);
+ tsk = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
X if (!tsk)
X return 0;
X state = *get_task_state(tsk);
X vsize = eip = esp = 0;
X if (tsk->mm && tsk->mm != &init_mm) {
- struct vm_area_struct *vma;
- down(&tsk->mm->mmap_sem);
- for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) {
+ struct vm_area_struct *vma = tsk->mm->mmap;
+ while (vma) {
X vsize += vma->vm_end - vma->vm_start;
+ vma = vma->vm_next;
X }
- up(&tsk->mm->mmap_sem);
X eip = KSTK_EIP(tsk);
X esp = KSTK_ESP(tsk);
X }
@@ -929,7 +878,7 @@
X nice = tsk->priority;
X nice = 20 - (nice * 20 + DEF_PRIORITY / 2) / DEF_PRIORITY;
- res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
+ return sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
X %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \
X %lu %lu %lu %lu %lu %lu %lu %lu %d\n",
X pid,
@@ -974,9 +923,6 @@
X tsk->nswap,
X tsk->cnswap,
X tsk->exit_signal);
- release_task(tsk);
- return res;
X }
X static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size,
@@ -1054,15 +1000,19 @@
X static int get_statm(int pid, char * buffer)
X {
+ struct task_struct *tsk;
X int size=0, resident=0, share=0, trs=0, lrs=0, drs=0, dt=0;
- struct mm_struct *mm;
- mm = get_mm_and_lock(pid);
- if (mm) {
- struct vm_area_struct * vma = mm->mmap;
+ read_lock(&tasklist_lock);
+ tsk = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
+ if (!tsk)
+ return 0;
+ if (tsk->mm && tsk->mm != &init_mm) {
+ struct vm_area_struct * vma = tsk->mm->mmap;
X while (vma) {
- pgd_t *pgd = pgd_offset(mm, vma->vm_start);
+ pgd_t *pgd = pgd_offset(tsk->mm, vma->vm_start);
X int pages = 0, shared = 0, dirty = 0, total = 0;
X statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total);
@@ -1080,7 +1030,6 @@
X drs += pages;
X vma = vma->vm_next;
X }
- release_mm(mm);
X }
X return sprintf(buffer,"%d %d %d %d %d %d %d\n",
X size, resident, share, trs, lrs, drs, dt);
@@ -1118,7 +1067,7 @@
-/* FIXME: this does not do proper mm locking */
X static ssize_t read_maps (int pid, struct file * file, char * buf,
X size_t count, loff_t *ppos)
X {
@@ -1250,11 +1199,15 @@
X #ifdef __SMP__
X static int get_pidcpu(int pid, char * buffer)
X {
- struct task_struct * tsk;
+ struct task_struct * tsk = current ;
X int i, len;
- tsk = grab_task(pid);
- if (!tsk)
+ read_lock(&tasklist_lock);
+ if (pid != tsk->pid)
+ tsk = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
+ if (tsk == NULL)
X return 0;
X len = sprintf(buffer,
@@ -1268,7 +1221,6 @@
X tsk->per_cpu_utime[cpu_logical_map(i)],
X tsk->per_cpu_stime[cpu_logical_map(i)]);
- release_task(tsk);
X return len;
X }
X #endif
@@ -1306,7 +1258,7 @@
X return get_meminfo(page);
- case PROC_PCI:
+ case PROC_PCI:
X return get_pci_list(page);
X #endif
@@ -1364,11 +1316,11 @@
X return get_cmdline(page);
- case PROC_MTAB:
- return get_filesystem_info( page );
+ case PROC_MTAB:
+ return get_filesystem_info( page );
- case PROC_SWAP:
- return get_swaparea_info(page);
+ case PROC_SWAP:
+ return get_swaparea_info(page);
X case PROC_RTC:
X return get_rtc_status(page);
@@ -1398,6 +1350,7 @@
X * Grab the lock, find the task, save the uid and
X * check it has an mm still (ie its not dead)
X */
X p = find_task_by_pid(pid);
X if(p)
X {
@@ -1468,7 +1421,6 @@
X ssize_t end;
X unsigned int type, pid;
X struct proc_dir_entry *dp;
- int err;
X if (count > PROC_BLOCK_SIZE)
@@ -1497,10 +1449,8 @@
X return length;
X }
X if (start != NULL) {
- if (length > count)
- length = count;
X /* We have had block-adjusting processing! */
- err = copy_to_user(buf, start, length);
+ copy_to_user(buf, start, length);
X *ppos += length;
X count = length;
X } else {
@@ -1512,11 +1462,11 @@
X if (count + *ppos > length)
X count = length - *ppos;
X end = count + *ppos;
- err = copy_to_user(buf, (char *) page + *ppos, count);
+ copy_to_user(buf, (char *) page + *ppos, count);
X *ppos = end;
X }
X free_page(page);
- return err ? -EFAULT : count;
+ return count;
X }
X static struct file_operations proc_array_operations = {
diff -u --recursive --new-file v2.2.0/linux/include/asm-i386/page.h linux/include/asm-i386/page.h
--- v2.2.0/linux/include/asm-i386/page.h Wed Apr 8 19:36:29 1998
+++ linux/include/asm-i386/page.h Tue Jan 26 13:04:02 1999
@@ -69,10 +69,9 @@
X * you want to use more physical memory, change this define.
X *
X * For example, if you have 2GB worth of physical memory, you
- * could change this define to 0x70000000, which gives the
- * kernel slightly more than 2GB of virtual memory (enough to
- * map all your physical memory + a bit extra for various
- * io-memory mappings)
+ * could change this define to 0x80000000, which gives the
+ * kernel 2GB of virtual memory (enough to most of your physical memory
+ * as the kernel needs a bit extra for various io-memory mappings)
X *
X *
diff -u --recursive --new-file v2.2.0/linux/mm/mmap.c linux/mm/mmap.c
--- v2.2.0/linux/mm/mmap.c Tue Jan 19 11:32:53 1999
+++ linux/mm/mmap.c Wed Jan 27 10:43:41 1999
@@ -393,6 +393,7 @@
X } else {
X /* Then go through the AVL tree quickly. */
X struct vm_area_struct * tree = mm->mmap_avl;
+ vma = NULL;
X for (;;) {
X if (tree == vm_avl_empty)
X break;
@@ -556,7 +557,7 @@
X unsigned long start, unsigned long end)
X {
X unsigned long first = start & PGDIR_MASK;
- unsigned long last = (end & PGDIR_MASK) + PGDIR_SIZE;
+ unsigned long last = (end + PGDIR_SIZE - 1) & PGDIR_MASK;
X if (!prev) {
X prev = mm->mmap;
true || echo 'restore of patch-2.2.1 failed'
echo 'File patch-2.2.1 is complete' &&
chmod 644 patch-2.2.1 ||
echo 'restore of patch-2.2.1 failed'
Cksum="`cksum < 'patch-2.2.1'`"
if ! test "3691003991 71203" = "$Cksum"
echo 'patch-2.2.1: original Checksum 3691003991 71203, current one' "$Cksum"
rm -f _shar_wnt_.tmp
rm -f _shar_seq_.tmp
exit 1
rm -f _shar_wnt_.tmp
rm -f _shar_seq_.tmp
echo 'You have unpacked the last part.'
exit 0

Jan 29, 1999, 3:00:00 AM1/29/99
Archive-name: v2.2/patch-2.2.1/part1

# This is a shell archive
# To extract the files from this archive, save it to a file, remove
# everything above the "!/bin/sh" line above, and type "sh file_name".
# existing files will NOT be overwritten unless -c is specified
# This is part 1 of a 2 - part archive

# do not concatenate these parts, unpack them in order with /bin/sh

if test -r _shar_seq_.tmp; then
echo 'Must unpack archives in sequence!'
echo Please unpack part `cat _shar_seq_.tmp` next
exit 1
# ============= patch-2.2.1 ==============
if test -f 'patch-2.2.1' -a X"$1" != X"-c"; then
echo 'x - skipping patch-2.2.1 (File already exists)'
rm -f _shar_wnt_.tmp
> _shar_wnt_.tmp
echo 'x - extracting patch-2.2.1 (Text)'

sed 's/^X//' << 'SHAR_EOF' > 'patch-2.2.1' &&

diff -u --recursive --new-file v2.2.0/linux/CREDITS linux/CREDITS
--- v2.2.0/linux/CREDITS Wed Jan 20 23:14:03 1999
+++ linux/CREDITS Thu Jan 28 11:25:04 1999
@@ -49,12 +49,12 @@
X N: Erik Andersen
X E:
X P: 1024/FC4CFFED 78 3C 6A 19 FA 5D 92 5A FB AC 7B A5 A5 E1 FF 8E
X D: Maintainer of ide-cd and Uniform CD-ROM driver,
X D: ATAPI CD-Changer support, Major 2.1.x CD-ROM update.
X S: 4538 South Carnegie Tech Street
-S: West Valley City, Utah 84120
+S: Salt Lake City, Utah 84120
X N: H. Peter Anvin
diff -u --recursive --new-file v2.2.0/linux/Documentation/proc.txt linux/Documentation/proc.txt
--- v2.2.0/linux/Documentation/proc.txt Wed Dec 31 16:00:00 1969
+++ linux/Documentation/proc.txt Thu Jan 28 12:16:40 1999
@@ -0,0 +1,1353 @@
+ T H E /proc F I L E S Y S T E M
+/proc/sys Terrehon Bowden <> January 27 1999
+ Bodo Bauer <>
+Version 1.1 Kernel version 2.2
+1 Introduction/Credits
+1.1 Legal Issues
+2 The /proc file system
+2.1 Process specific subdirectories
+2.2 Kernel data
+2.3 IDE devices in /proc/ide
+2.4 Networking info in /proc/net
+2.5 SCSI info
+2.6 Parallel port info in /proc/parport
+2.7 TTY info in /proc/tty
+3 Reading and modifying kernel parameters
+3.1 /proc/sys/debug and /proc/sys/proc
+3.2 /proc/fs - File system data
+3.3 /proc/fs/binfmt_misc - Miscellaneous binary formats
+3.4 /proc/sys/kernel - General kernel parameters
+3.5 /proc/sys/vm - The virtual memory subsystem
+3.6 /proc/sys/dev - Device specific parameters
+3.7 /proc/sys/sunrpc - Remote procedure calls
+3.8 /proc/sys/net - Networking stuff
+3.9 /proc/sys/net/ipv4 - IPV4 settings=20
+3.10 Appletalk
+3.11 IPX
+1 Introduction/Credits
+This documentation is part of a soon to be released book published by
+IDG Books on the SuSE Linux distribution. As there is no complete
+documentation for the /proc file system and we've used many freely
+available sources to write this chapter, it seems only fair to give
+the work back to the Linux community. This work is based on the
+2.1.132 and 2.2.0-pre-kernel versions. I'm afraid it's still far from
+complete, but we hope it will be useful. As far as we know, it is the
+first 'all-in-one’ document about the /proc file system. It is
+focused on the Intel x86 hardware, so if you are looking for PPC, ARM,
+SPARC, APX, etc., features, you probably won't find what you are
+looking for. It also only covers IPv4 networking, not IPv6 nor other
+protocols - sorry.
+We'd like to thank Alan Cox, Rik van Riel, and Alexey Kuznetsov. We'd
+also like to extend a special thank you to Andi Kleen for
+documentation, which we relied on heavily to create this document, as
+well as the additional information he provided. Thanks to everybody
+else who contributed source or docs to the Linux kernel and helped
+create a great piece of software... :)
+If you have any comments, corrections or additions, please don't
+hesitate to contact Bodo Bauer at We'll be happy to
+add them to this document.
+The latest version of this document is available online at
+ in HTML, ASCII, and as
+Postscript file.
+1.1 Legal Stuff
+We don't guarantee the correctness of this document, and if you come
+to us complaining about how you screwed up your system because of
+incorrect documentation, we won't feel responsible...
+2 The /proc file system
+The proc file system acts as an interface to internal data structures
+in the kernel. It can be used to obtain information about the system
+and to change certain kernel parameters at runtime. It contains
+(among other things) one subdirectory for each process running on the
+system which is named after the process id (PID) of the process. The
+link self points to the process reading the file system.
+2.1 Process specific subdirectories
+Each process subdirectory has the in table 1.1 listed entries.
+ _________________________________________________
+ cmdline Command line arguments
+ environ Values of environment variables
+ fd Directory, which contains all file descriptors
+ mem Memory held by this process
+ stat Process status
+ status Process status in human readable form
+ cwd Link to the current working directory
+ exe Link to the executable of this process
+ maps Memory maps
+ root Link to the root directory of this process
+ statm Process memory status information
+ _________________________________________________
+ Table 1.1: Process specific entries in /proc
+For example, to get the status information of a process, all you have
+to do is read the file /proc/PID/status:
+> cat /proc/self/status
+Name: cat
+State: R (running)
+Pid: 5633
+PPid: 5609
+Uid: 501 501 501 501
+Gid: 100 100 100 100
+Groups: 100 16
+VmSize: 804 kB
+VmLck: 0 kB
+VmRSS: 344 kB
+VmData: 68 kB
+VmStk: 20 kB
+VmExe: 12 kB
+VmLib: 660 kB
+SigPnd: 0000000000000000
+SigBlk: 0000000000000000
+SigIgn: 0000000000000000
+SigCgt: 0000000000000000
+CapInh: 00000000fffffeff
+CapPrm: 0000000000000000
+CapEff: 0000000000000000
+This shows you almost the same information as you would get if you
+viewed it with the ps command. In fact, ps uses the proc file system
+to obtain its information.
+The statm file contains more detailed information about the process
+memory usage. It contains seven values with the following meanings:
+size total program size
+resident size of in memory portions
+shared number of the pages that are shared
+trs number of pages that are 'code'
+drs number of pages of data/stack
+lrs number of pages of library
+dt number of dirty pages
+The ratio text/data/library is approximate only by heuristics.
+2.2 Kernel data
+Similar to the process entries, these are files which give information
+about the running kernel. The files used to obtain this information
+are contained in /proc and are listed in table 1.2. Not all of these
+will be present in your system. It depends on the kernel configuration
+and the loaded modules, which files are there, and which are missing.
+ ________________________________________________
+ apm Advanced power management info
+ cmdline Kernel command line
+ cpuinfo Info about the CPU
+ devices Available devices (block and character)
+ dma Used DMS channels
+ filesystems Supported filesystems
+ interrupts Interrupt usage
+ ioports I/O port usage
+ kcore Kernel core image
+ kmsg Kernel messages
+ ksyms Kernel symbol table
+ loadavg Load average
+ locks Kernel locks
+ meminfo Memory info
+ misc Miscellaneous
+ modules List of loaded modules
+ mounts Mounted filesystems
+ partitions Table of partitions known to the system
+ rtc Real time clock
+ slabinfo Slab pool info
+ stat Overall statistics
+ swaps Swap space utilization
+ uptime System uptime
+ version Kernel version
+ ________________________________________________
+ Table 1.2: Kernel info in /proc
+You can, for example, check which interrupts are currently in use and
+what they are used for by looking in the file /proc/interrupts:
+> cat /proc/interrupts
+ CPU0
+ 0: 8728810 XT-PIC timer
+ 1: 895 XT-PIC keyboard
+ 2: 0 XT-PIC cascade
+ 3: 531695 XT-PIC aha152x
+ 4: 2014133 XT-PIC serial
+ 5: 44401 XT-PIC pcnet_cs
+ 8: 2 XT-PIC rtc
+ 11: 8 XT-PIC i82365
+ 12: 182918 XT-PIC PS/2 Mouse
+ 13: 1 XT-PIC fpu
+ 14: 1232265 XT-PIC ide0
+ 15: 7 XT-PIC ide1
+NMI: 0
+There three more important subdirectories in /proc: net, scsi and
+sys. The general rule is that the contents, or even the existence of
+these directories, depends on your kernel configuration. If SCSI is
+not enabled, the directory scsi may not exist. The same is true with
+the net, which is only there when networking support is present in the
+running kernel.
+The slabinfo file gives information about memory usage on the slab
+level. Linux uses slab pools for memory management above page level
+in version 2.2. Commonly used objects have their own slab pool (like
+network buffers, directory cache, etc.).
+2.3 IDE devices in /proc/ide
+This subdirectory contains information about all IDE devices that the
+kernel is aware of. There is one subdirectory for each device
+(i.e. hard disk) containing the following files:
+ cache The cache
+ capacity Capacity of the medium
+ driver Driver and version
+ geometry Physical and logical geometry
+ identify Device identify block
+ media Media type
+ model Device identifier
+ settings Device setup
+ smart_thresholds IDE disk management thresholds
+ smart_values IDE disk management values
+2.4 Networking info in /proc/net
+This directory follows the usual pattern. Table 1.3 lists the files
+and their meaning.
+ ____________________________________________________
+ arp Kernel ARP table
+ dev network devices with statistics
+ dev_mcast Lists the Layer2 multicast groups a
+ device is listening to (interface index,
+ label, number of references, number of
+ bound addresses).
+ dev_stat network device status
+ ip_fwchains Firewall chain linkage
+ ip_fwnames Firewall chains
+ ip_masq Directory containing the masquerading
+ tables.
+ ip_masquerade Major masquerading table
+ netstat Network statistics
+ raw Raw device statistics
+ route Kernel routing table
+ rpc Directory containing rpc info
+ rt_cache Routing cache
+ snmp SNMP data
+ sockstat Socket statistics
+ tcp TCP sockets
+ tr_rif Token ring RIF routing table
+ udp UDP sockets
+ unix UNIX domain sockets
+ wireless Wireless interface data (Wavelan etc)
+ igmp IP multicast addresses, which this host joined
+ psched Global packet scheduler parameters.
+ netlink List of PF_NETLINK sockets.
+ ip_mr_vifs List of multicast virtual interfaces.
+ ip_mr_cache List of multicast routing cache.
+ udp6 UDP sockets (IPv6)
+ tcp6 TCP sockets (IPv6)
+ raw6 Raw device statistics (IPv6)
+ igmp6 IP multicast addresses, which this host joineed (IPv6)
+ if_inet6 List of IPv6 interface addresses.
+ ipv6_route Kernel routing table for IPv6
+ rt6_stats global IPv6 routing tables statistics.
+ sockstat6 Socket statistics (IPv6)
+ snmp6 Snmp data (IPv6)
+ ____________________________________________________
+ Table 1.3: Network info in /proc/net
+You can use this information to see which network devices are
+available in your system and how much traffic was routed over those
+> cat /proc/net/dev
+Inter-|Receive |[...
+ face |bytes packets errs drop fifo frame compressed multicast|[...
+ lo: 908188 5596 0 0 0 0 0 0 [...
+ ppp0:15475140 20721 410 0 0 410 0 0 [...
+ eth0: 614530 7085 0 0 0 0 0 1 [...
+...] Transmit
+...] bytes packets errs drop fifo colls carrier compressed
+...] 908188 5596 0 0 0 0 0 0
+...] 1375103 17405 0 0 0 0 0 0
+...] 1703981 5535 0 0 0 3 0 0
+2.5 SCSI info
+If you have a SCSI host adapter in your system, you'll find a
+subdirectory named after the driver for this adapter in /proc/scsi.
+You'll also see a list of all recognized SCSI devices in /proc/scsi:
+>cat /proc/scsi/scsi
+Attached devices:
+Host: scsi0 Channel: 00 Id: 00 Lun: 00
+ Vendor: QUANTUM Model: XP34550W Rev: LXY4
+ Type: Direct-Access ANSI SCSI revision: 02
+Host: scsi0 Channel: 00 Id: 01 Lun: 00
+ Vendor: SEAGATE Model: ST34501W Rev: 0018
+ Type: Direct-Access ANSI SCSI revision: 02
+Host: scsi0 Channel: 00 Id: 02 Lun: 00
+ Vendor: SEAGATE Model: ST34501W Rev: 0017
+ Type: Direct-Access ANSI SCSI revision: 02
+Host: scsi0 Channel: 00 Id: 04 Lun: 00
+ Vendor: ARCHIVE Model: Python 04106-XXX Rev: 703b
+ Type: Sequential-Access ANSI SCSI revision: 02
+The directory named after the driver has one file for each adapter
+found in the system. These files contain information about
+the controller, including the used IRQ and the IO address range:
+>cat /proc/scsi/ncr53c8xx/0
+General information:
+ Chip NCR53C875, device id 0xf, revision id 0x4
+ IO port address 0xec00, IRQ number 11
+ Synchronous period factor 12, max commands per lun 4
+2.6 Parallel port info in /proc/parport
+The directory /proc/parport contains information about the parallel
+ports of your system. It has one subdirectory for each port, named
+after the port number (0,1,2,...).
+This directory contains four files:
+ autoprobe Autoprobe results of this port
+ devices Connected device modules
+ hardware Hardware info (port type, io-port, DMA, IRQ, etc.)
+ irq Used interrupt, if any
+2.7 TTY info in /proc/tty
+Information about the available and the actually used tty's can be
+found in /proc/tty. You'll find entries for drivers and line
+disciplines in this directory, as shown in the table below:
+ drivers List of drivers and their usage
+ ldiscs Registered line disciplines
+ driver/serial Usage statistic and status of single tty lines
+To see which tty's are currently in use, you can simply look into the
+file /proc/tty/drivers:
+>cat /proc/tty/drivers
+pty_slave /dev/pts 136 0-255 pty:slave
+pty_master /dev/ptm 128 0-255 pty:master
+pty_slave /dev/ttyp 3 0-255 pty:slave
+pty_master /dev/pty 2 0-255 pty:master
+serial /dev/cua 5 64-67 serial:callout
+serial /dev/ttyS 4 64-67 serial
+/dev/tty0 /dev/tty0 4 0 system:vtmaster
+/dev/ptmx /dev/ptmx 5 2 system
+/dev/console /dev/console 5 1 system:console
+/dev/tty /dev/tty 5 0 system:/dev/tty
+unknown /dev/tty 4 1-63 console
+3 Reading and modifying kernel parameters
+A very interesting part of /proc is the directory /proc/sys. This not
+only provides information, it also allows you to change parameters
+within the kernel. Be very careful when trying this. You can optimize
+your system, but you also can crash it. Never play around with kernel
+parameters on a production system. Set up a development machine and
+test to make sure that everything works the way you want it to. You
+may have no alternative but to reboot the machine once an error has
+been made.
+To change a value, simply echo the new value into the file. An example
+is given below in the section on the file system data. You need to be
+root to do this. You can create your own boot script to get this done
+every time your system boots.
+The files in /proc/sys can be used to tune and monitor miscellaneous
+and general things in the operation of the Linux kernel. Since some
+of the files can inadvertently disrupt your system, it is advisable to
+read both documentation and source before actually making
+adjustments. In any case, be very careful when writing to any of these
+files. The entries in /proc may change slightly between the 2.1.* and
+the 2.2 kernel, so review the kernel documentation if there is any
+doubt. You'll find the documentation in the directory
+/usr/src/linux/Documentation/sys. This chapter is heavily based on the
+documentation included in the pre 2.2 kernels. Thanks to Rick van Riel
+for providing this information.
+3.1 /proc/sys/debug and /proc/sys/proc
+These two subdirectories are empty.
+3.2 /proc/fs - File system data
+This subdirectory contains specific file system, file handle, inode,
+dentry and quota information.
+Currently, these files are in /proc/sys/fs:
+ Status of the directory cache. Since directory entries are
+ dynamically allocated and deallocated, this file gives information
+ about the current status. It holds six values, in which the last
+ two are not used and are always zero. The other four mean:
+ nr_dentry Seems to be zero all the time
+ nr_unused Number of unused cache entries
+ age_limit Age in seconds after the entry may be
+ reclaimed, when memory is short
+ want_pages internal
+dquot-nr and dquot-max
+ The file dquot-max shows the maximum number of cached disk quota
+ entries.
+ The file dquot-nr shows the number of allocated disk quota
+ entries and the number of free disk quota entries.
+ If the number of free cached disk quotas is very low and you have
+ a large number of simultaneous system users, you might want
+ to raise the limit.
+file-nr and file-max
+ The kernel allocates file handles dynamically, but as yet
+ doesn't free them again.
+ The value in file-max denotes the maximum number of file handles
+ that the Linux kernel will allocate. When you get a lot of error
+ messages about running out of file handles, you might want to raise
+ this limit. The default value is 4096. To change it, just write the
+ new number into the file:
+ # cat /proc/sys/fs/file-max
+ 4096
+ # echo 8192 > /proc/sys/fs/file-max
+ # cat /proc/sys/fs/file-max
+ 8192
+ This method of revision is useful for all customizable parameters
+ of the kernel - simply echo the new value to the corresponding
+ file.
+ The three values in file-nr denote the number of allocated file
+ handles, the number of used file handles, and the maximum number of
+ file handles. When the allocated file handles come close to the
+ maximum, but the number of actually used ones is far behind, you've
+ encountered a peak in your usage of file handles and you don't need
+ to increase the maximum.
+ However, there is still a per process limit of open files, which
+ unfortunatly can't be changed that easily. It is set to 1024 by
+ default. To change this you have to edit the files limits.h and
+ fs.h in the directory /usr/src/linux/include/linux. Change the
+ definition of NR_OPEN and recompile the kernel.
+inode-state, inode-nr and inode-max
+ As with file handles, the kernel allocates the inode structures
+ dynamically, but can't free them yet.
+ The value in inode-max denotes the maximum number of inode
+ handlers. This value should be 3 to 4 times larger than the value
+ in file-max, since stdin, stdout, and network sockets also need an
+ inode struct to handle them. If you regularly run out of inodes,
+ you should increase this value.
+ The file inode-nr contains the first two items from inode-state, so
+ we'll skip to that file...
+ inode-state contains three actual numbers and four dummy values. The
+ actual numbers are (in order of appearance) nr_inodes, nr_free_inodes,
+ and preshrink.
+ nr_inodes
+ Denotes the number of inodes the system has allocated. This can
+ be slightly more than inode-max because Linux allocates them one
+ pageful at a time.
+ nr_free_inodes
+ Represents the number of free inodes and pre shrink is nonzero
+ when the nr_inodes > inode-max and the system needs to prune the
+ inode list instead of allocating more.
+super-nr and super-max
+ Again, super block structures are allocated by the kernel,
+ but not freed. The file super-max contains the maximum number of
+ super block handlers, where super-nr shows the number of
+ currently allocated ones.
+ Every mounted file system needs a super block, so if you plan to
+ mount lots of file systems, you may want to increase these
+ numbers.
+3.3 /proc/fs/binfmt_misc - Miscellaneous binary formats
+Besides these files, there is the subdirectory
+/proc/sys/fs/binfmt_misc. This handles the kernel support for
+miscellaneous binary formats.
+Binfmt_misc provides the ability to register additional binary formats
+to the Kernel without compiling an additional module/kernel. Therefore
+binfmt_misc needs to know magic numbers at the beginning or the
+filename extension of the binary.
+It works by maintaining a linked list of structs, that contain a
+description of a binary format, including a magic with size (or the
+filename extension), offset and mask, and the interpreter name. On
+request it invokes the given interpreter with the original program as
+argument, as binfmt_java and binfmt_em86 and binfmt_mz do.
+Since binfmt_misc does not define any default binary-formats, you have to
+register an additional binary-format.
+There are two general files in binfmt_misc and one file per registered
+format. The two general files are register and status.
+Registering a new binary format
+echo :name:type:offset:magic:mask:interpreter: > /proc/sys/fs/binfmt_misc/register
+with appropriate name (the name for the /proc-dir entry), offset
+(defaults to 0, if omitted), magic and mask (which can be omitted,
+defaults to all 0xff) and last but not least, the interpreter that is
+to be invoked (for example and testing '/bin/echo'). Type can be M for
+usual magic matching or E for filename extension matching (give
+extension in place of magic).
+To check or reset the status of the binary format handler:
+If you do a cat on the file /proc/sys/fs/binfmt_misc/status, you will
+get the current status (enabled/disabled) of binfmt_misc. Change the
+status by echoing 0 (disables) or 1 (enables) or -1 (caution: this
+clears all previously registered binary formats) to status. For
+example echo 0 > status to disable binfmt_misc (temporarily).
+Status of a single handler
+Each registered handler has an entry in /proc/sys/fs/binfmt_misc.
+These files perform the same function as status, but their scope is
+limited to the actual binary format. By cating this file, you also
+receive all related information about the interpreter/magic of the
+Example usage of binfmt_misc (emulate binfmt_java)
+cd /proc/sys/fs/binfmt_misc
+echo ':Java:M::\xca\xfe\xba\xbe::/usr/local/java/bin/javawrapper:' > register
+echo ':HTML:E::html::/usr/local/java/bin/appletviewer:' > register
+echo ':Applet:M::<!--applet::/usr/local/java/bin/appletviewer:' > register
+echo ':DEXE:M::\x0eDEX::/usr/bin/dosexec:' > register
+These three lines add support for Java executables and Java applets
+(like binfmt_java, additionally recognizing the .html extension with
+no need to put <!--applet> to every applet file). You have to install
+the JDK and the shell-script /usr/local/java/bin/javawrapper too. It
+works around the brokenness of the Java filename handling. To add a
+Java binary, just create a link to the class-file somewhere in the
+3.4 /proc/sys/kernel - general kernel parameters
+This directory reflects general kernel behaviors. As I've said before,
+the contents are depend on your configuration. I'll list the most
+important files, along with descriptions of what they mean and how to
+use them.
+ The file contains three values; highwater, lowwater, and
+ frequency.
+ It exists only when BSD-style process accounting is enabled. These
+ values control its behavior. If the free space on the file system
+ where the log lives goes below lowwater%, accounting suspends. If
+ it goes above highwater%, accounting resumes. Frequency determines
+ how often you check the amount of free space (value is in
+ seconds). Default settings are: 4, 2, and 30. That is, suspend
+ accounting if there left <= 2% free; resume it if we have a value
+ >=3%; consider information about the amount of free space valid
+ for 30 seconds
+ When the value in this file is 0, ctrl-alt-del is trapped and sent
+ to the init(1) program to handle a graceful restart. However, when
+ the value is > 0, Linux's reaction to this key combination will be
+ an immediate reboot, without syncing its dirty buffers.
+ Note: when a program (like dosemu) has the keyboard in raw mode,
+ the ctrl-alt-del is intercepted by the program before it ever
+ reaches the kernel tty layer, and it is up to the program to decide
+ what to do with it.
+domainname and hostname
+ These files can be controlled to set the NIS domainname and
+ hostname of your box. For the classic a simple:
+ # echo "darkstar" > /proc/sys/kernel/hostname
+ # echo "" > /proc/sys/kernel/domainname
+ would suffice to set your hostname and NIS domainname.
+osrelease, ostype and version
+ The names make it pretty obvious what these fields contain:
+ >cat /proc/sys/kernel/osrelease
+ 2.1.131
+ >cat /proc/sys/kernel/ostype
+ Linux
+ >cat /proc/sys/kernel/version
+ #8 Mon Jan 25 19:45:02 PST 1999
+ The files osrelease and ostype should be clear enough. Version
+ needs a little more clarification however. The #8 means that this
+ is the 8th kernel built from this source base and the date behind
+ it indicates the time the kernel was built. The only way to tune
+ these values is to rebuild the kernel.
+ The value in this file represents the number of seconds the kernel
+ waits before rebooting on a panic. When you use the software
+ watchdog, the recommended setting is 60. If set to 0, the auto
+ reboot after a kernel panic is disabled, this is the default
+ setting.
+ The four values in printk denote console_loglevel,
+ default_message_loglevel, minimum_console_level, and
+ default_console_loglevel respectively.
+ These values influence printk() behavior when printing or logging
+ error messages, which come from inside the kernel. See syslog(2)
+ for more information on the different log levels.
+ console_loglevel
+ Messages with a higher priority than this will be printed to
+ the console.
+ default_message_level
+ Messages without an explicit priority will be printed with
+ this priority.
+ minimum_console_loglevel
+ Minimum (highest) value to which the console_loglevel can be set.
+ default_console_loglevel
+ Default value for console_loglevel.
+ This file shows the size of the generic SCSI (sg) buffer. At this
+ point, you can’t tune it yet, but you can change it at compile time
+ by editing include/scsi/sg.h and changing the value of
+ If you use a scanner with SANE (Scanner Access now easy) you
+ might want to set this to a higher value. Look into the SANE
+ documentation on this issue.
+ The location where the modprobe binary is located. The kernel
+ uses this program to load modules on demand.
+3.5 /proc/sys/vm - The virtual memory subsystem
+The files in this directory can be used to tune the operation of the
+virtual memory (VM) subsystem of the Linux kernel. In addition, one of
+the files (bdflush) has a little influence on disk usage.
+ This file controls the operation of the bdflush kernel daemon. It
+ currently contains 9 integer values, 6 of which are actually used
+ by the kernel:
+ nfract Percentage of buffer cache dirty to
+ activate bdflush
+ ndirty Maximum number of dirty blocks to
+ write out per-wake-cycle
+ nrefill Number of clean buffers to try to obtain
+ each time we call refill
+ nref_dirt Dirty buffer threshold for activating bdflush
+ when trying to refill buffers.
+ dummy unused
+ age_buffer Time for normal buffer to age before you flush it
+ age_super Time for superblock to age before you flush it
+ dummy unused
+ dummy unused
+ nfract
+ This parameter governs the maximum number of dirty buffers
+ in the buffer cache. Dirty means that the contents of the
+ buffer still have to be written to disk (as opposed to a
+ clean buffer, which can just be forgotten about). Setting
+ this to a high value means that Linux can delay disk writes
+ for a long time, but it also means that it will have to do a
+ lot of I/O at once when memory becomes short. A low value
+ will spread out disk I/O more evenly.
+ ndirty
+ Ndirty gives the maximum number of dirty buffers that
+ bdflush can write to the disk at one time. A high value will
+ mean delayed, bursty I/O, while a small value can lead to
+ memory shortage when bdflush isn't woken up often enough.
+ nrefill
+ This the number of buffers that bdflush will add to the list
+ of free buffers when refill_freelist() is called. It is
+ necessary to allocate free buffers beforehand, since the
+ buffers are often different sizes than the memory pages
+ and some bookkeeping needs to be done beforehand. The
+ higher the number, the more memory will be wasted and the
+ less often refill_freelist() will need to run.
+ nref_dirt
+ When refill_freelist() comes across more than nref_dirt
+ dirty buffers, it will wake up bdflush.
+ age_buffer and age_super
+ Finally, the age_buffer and age_super parameters govern the
+ maximum time Linux waits before writing out a dirty buffer
+ to disk. The value is expressed in jiffies (clockticks), the
+ number of jiffies per second is 100. Age_buffer is the
+ maximum age for data blocks, while age_super is for
+ filesystems meta data.
+ The three values in this file control how much memory should be
+ used for buffer memory. The percentage is calculated as a
+ percentage of total system memory.
+ The values are:
+ min_percent
+ This is the minimum percentage of memory that should be
+ spent on buffer memory.
+ borrow_percent
+ When Linux is short on memory, and the buffer cache uses more
+ than it has been allotted, the memory mangement (MM) subsystem
+ will prune the buffer cache more heavily than other memory to
+ compensate.
+ max_percent
+ This is the maximum amount of memory that can be used for
+ buffer memory.
+ This file contains three values: min, low and high:
+ min
+ When the number of free pages in the system reaches this number,
+ only the kernel can allocate more memory.
+ low
+ If the number of free pages gets below this point, the kernel
+ starts swapping aggressively.
+ high
+ The kernel tries to keep up to this amount of memory free; if
+ memory comes below this point, the kernel gently starts swapping
+ in the hopes that it never has to do really aggressive swapping.
+ Kswapd is the kernel swap out daemon. That is, kswapd is that piece
+ of the kernel that frees memory when it gets fragmented or
+ full. Since every system is different, you'll probably want some
+ control over this piece of the system.
+ The file contains three numbers:
+ tries_base
+ The maximum number of pages kswapd tries to free in one round is
+ calculated from this number. Usually this number will be divided
+ by 4 or 8 (see mm/vmscan.c), so it isn't as big as it looks.
+ When you need to increase the bandwidth to/from swap, you'll want
+ to increase this number.
+ tries_min
+ This is the minimum number of times kswapd tries to free a page
+ each time it is called. Basically it's just there to make sure
+ that kswapd frees some pages even when it's being called with
+ minimum priority.
+ swap_cluster
+ This is probably the greatest influence on system
+ performance. swap_cluster is the number of pages kswapd writes in
+ one turn. You’ll want this value to be large so that kswapd does
+ its I/O in large chunks and the disk doesn’t have to seek as
+ often., but you don’t want it to be too large since that would
+ flood the request queue.
+ This file contains one value. The following algorithm is used to
+ decide if there's enough memory: if the value of overcommit_memory
+ is positive, then there's always enough memory. This is a useful
+ feature, since programs often malloc() huge amounts of memory 'just
+ in case', while they only use a small part of it. Leaving this
+ value at 0 will lead to the failure of such a huge malloc(), when
+ in fact the system has enough memory for the program to run.
+ On the other hand, enabling this feature can cause you to run out
+ of memory and thrash the system to death, so large and/or important
+ servers will want to set this value to 0.
+ This file does exactly the same as buffermem, only this file
+ controls the amount of memory allowed for memory mapping and
+ generic caching of files.
+ You don't want the minimum level to be too low, otherwise your
+ system might thrash when memory is tight or fragmentation is
+ high.
+ The kernel keeps a number of page tables in a per-processor cache
+ (this helps a lot on SMP systems). The cache size for each
+ processor will be between the low and the high value.
+ On a low-memory, single CPU system, you can safely set these values
+ to 0 so you don't waste memory. It is used on SMP systems so that
+ the system can perform fast pagetable allocations without having to
+ aquire the kernel memory lock.
+ For large systems, the settings are probably fine. For normal
+ systems they won't hurt a bit. For small systems (<16MB ram) it
+ might be advantageous to set both values to 0.
+ This file contains no less than 8 variables. All of these values
+ are used by kswapd.
+ The first four variables sc_max_page_age, sc_page_advance,
+ sc_page_decline and sc_page_initial_age are used to keep track of
+ Linux's page aging. Page aging is a bookkeeping method to track
+ which pages of memory are often used, and which pages can be
+ swapped out without consequences.
+ When a page is swapped in, it starts at sc_page_initial_age
+ (default 3) and when the page is scanned by kswapd, its age is
+ adjusted according to the following scheme:
+ o If the page was used since the last time we scanned, its age
+ is increased by sc_page_advance (default 3) up to a
+ maximum of sc_max_page_age (default 20).
+ o Else (meaning it wasn't used) its age is decreased by
+ sc_page_decline (default 1).
+ When a page reaches age 0, it's ready to be swapped out.
+ The next four variables sc_age_cluster_fract, sc_age_cluster_min,
+ sc_pageout_weight and sc_bufferout_weight, can be used to control
+ kswapd's aggressiveness in swapping out pages.
+ Sc_age_cluster_fract is used to calculate how many pages from a
+ process are to be scanned by kswapd. The formula used is
+ sc_age_cluster_fract
+ -------------------- * resident set size
+ 1024 =20
+ So if you want kswapd to scan the whole process,
+ sc_age_cluster_fract needs to have a value of 1024. The minimum
+ number of pages kswapd will scan is represented by
+ sc_age_cluster_min, this is done so kswapd will also scan small
+ processes.
+ The values of sc_pageout_weight and sc_bufferout_weight are used
+ to control how many tries kswapd will make in order to swap out
+ one page/buffer. These values can be used to fine-tune the ratio
+ between user pages and buffer/cache memory. When you find that
+ your Linux system is swapping out too many process pages in order
+ to satisfy buffer memory demands, you might want to either
+ increase sc_bufferout_weight, or decrease the value of
+ sc_pageout_weight.
+3.6 /proc/sys/dev - Device specific parameters
+Currently there is only support for CDROM drives, and for those, there
+is only one read only file containing information about the CD-ROM
+drives attached to the system:
+>cat /proc/sys/dev/cdrom/info
+CD-ROM information
+drive name: sr0 hdc
+drive speed: 0 6
+drive # of slots: 1 0
+Can close tray: 1 1
+Can open tray: 1 1
+Can lock tray: 1 1
+Can change speed: 1 1
+Can select disk: 0 1
+Can read multisession: 1 1
+Can read MCN: 1 1
+Reports media changed: 1 1
+Can play audio: 1 1
+You see two drives, sr0 and hdc, and their lists of features.
+3.7 /proc/sys/sunrpc - Remote procedure calls
+This directory contains four files, which enable or disable debugging
+for the RPC functions NFS, NFS-daemon, RPC and NLM. The default values
+are 0. They can be set to one, to turn debugging on. (The default
+value is 0 for each)
+3.8 /proc/sys/net - Networking stuff
+The interface to the networking parts of the kernel is located in
+/proc/sys/net. The table below shows all possible subdirectories. You
+may see only some of them, depending on the configuration of your
+| core General parameter |appletalk Appletalk protocol |
+| unix Unix domain sockets |netrom NET/ROM |
+| 802 E802 protocol |ax25 AX25 |
+| ethernet Ethernet protocol |rose X.25 PLP layer |
+| ipv4 IP version 4 |x25 X.25 protocol |
+| ipx IPX |token-ring IBM token ring |
+| bridge Bridging |decnet DEC net |
+| ipv6 IP version 6 | |
+We will concentrate on IP networking here. As AX15, X.25, and DEC Net
+are only minor players in the Linux world, we'll skip them in this
+chapter. You'll find some short info to Appletalk and IPX further down
+in section 3.10 and 3.11. Please look in the online documentation and
+the kernel source to get a detailed view of the parameters for those
+protocols. In this section we'll discuss the subdirectories printed in
+bold letters in the table above. As default values are suitable for
+most needs, there is no need to change these values.
+/proc/sys/net/core - Network core options
+ The default setting of the socket receive buffer in bytes.
+ The maximum receive socket buffer size in bytes.
+ The default setting (in bytes) of the socket send buffer.
+ The maximum send socket buffer size in bytes.
+message_burst and message_cost
+ These parameters are used to limit the warning messages written to
+ the kernel log from the networking code. They enforce a rate limit
+ to make a denial-of-service attack impossible. The higher the
+ message_cost factor is, the less messages will be
+ written. Message_burst controls when messages will be dropped. The
+ default settings limit warning messages to one every five seconds.
+ Maximal number of packets, queued on INPUT side, when the interface
+ receives packets faster than kernel can process them.
+ Maximum ancillary buffer size allowed per socket. Ancillary data is
+ a sequence of struct cmsghdr structures with appended data.
+/proc/sys/net/unix - Parameters for UNIX domain sockets
+There are only two files in this subdirectory. They control the delays
+for deleting and destroying socket descriptors.
+3.9 /proc/sys/net/ipv4 - IPV4 settings
+IP version 4 is still the most used protocol in Unix networking. It
+will be replaced by IP version 6 in the next couple of years, but for
+the moment it's the de facto standard for the internet and is used in
+most networking environments around the world. Because of the
+importance of this protocol, we'll have a deeper look into the subtree
+controlling the behavior of the IPv4 subsystem of the Linux kernel.
+Let's start with the entries in /proc/sys/net/ipv4 itself.
+ICMP settings
+icmp_echo_ignore_all and icmp_echo_ignore_broadcasts
+ Turn on (1) or off (0), if the kernel should ignore all ICMP ECHO
+ requests, or just those to broadcast and multicast addresses.
+ Please note that if you accept ICMP echo requests with a
+ broadcast/multicast destination address your network may be used
+ as an exploder for denial of service packet flooding attacks to
+ other hosts.
+icmp_destunreach_rate, icmp_echoreply_rate,
+icmp_paramprob_rate and icmp_timeexeed_rate
+ Sets limits for sending ICMP packets to specific targets. A value of
+ zero disables all limiting. Any positive value sets the maximum
+ package rate in hundredths of a second (on Intel systems).
+IP settings
+ This file contains one, if the host got its IP configuration by
+ RARP, BOOTP, DHCP or a similar mechanism. Otherwise it is zero.
+ TTL (Time To Live) for IPv4 interfaces. This is simply the
+ maximum number of hops a packet may travel.
+ Enable dynamic socket address rewriting on interface address change. This
+ is useful for dialup interface with changing IP addresses.
+ Enable or disable forwarding of IP packages between interfaces. A
+ change of this value resets all other parameters to their default
+ values. They differ if the kernel is configured as host or router.
+ Range of ports used by TCP and UDP to choose the local
+ port. Contains two numbers, the first number is the lowest port,
+ the second number the highest local port. Default is 1024-4999.
+ Should be changed to 32768-61000 for high-usage systems.
+ Global switch to turn path MTU discovery off. It can also be set
+ on a per socket basis by the applications or on a per route
+ basis.
+ Enable/disable debugging of IP masquerading.
+IP fragmentation settings
+ipfrag_high_trash and ipfrag_low_trash
+ Maximum memory used to reassemble IP fragments. When
+ ipfrag_high_thresh bytes of memory is allocated for this purpose,
+ the fragment handler will toss packets until ipfrag_low_thresh is
+ reached.
+ Time in seconds to keep an IP fragment in memory.
+TCP settings
+ Bug-to-bug compatibility with some broken printers. On retransmit
+ try to send bigger packets to work around bugs in certain TCP
+ stacks. Can be turned off by setting it to zero.
+ Number of keep alive probes TCP sends out, until it decides that the
+ connection is broken.
+ How often TCP sends out keep alive messages, when keep alive is
+ enabled. The default is 2 hours.
+ Number of times initial SYNs for a TCP connection attempt will be
+ retransmitted. Should not be higher than 255. This is only the
+ timeout for outgoing connections, for incoming connections the
+ number of retransmits is defined by tcp_retries1.
+ Enable select acknowledgments after RFC2018.
+ Enable timestamps as defined in RFC1323.
+ Enable the strict RFC793 interpretation of the TCP urgent pointer
+ field. The default is to use the BSD compatible interpretation
+ of the urgent pointer pointing to the first byte after the urgent
+ data. The RFC793 interpretation is to have it point to the last
+ byte of urgent data. Enabling this option may lead to
+ interoperatibility problems. Disabled by default.
+ Only valid when the kernel was compiled with
+ CONFIG_SYNCOOKIES. Send out syncookies when the syn backlog queue
+ of a socket overflows. This is to prevent against the common 'syn
+ flood attack'. Disabled by default.
+ Note that the concept of a socket backlog is abandoned, this
+ means the peer may not receive reliable error messages from an
+ over loaded server with syncookies enabled.
+ Enable window scaling as defined in RFC1323.
+ How many seconds to wait for a final FIN before the socket is
+ always closed. This is strictly a violation of the TCP
+ specification, but required to prevent denial-of-service attacks.
+ How many keepalive probes are sent per slow timer run. Shouldn't be
+ set too high to prevent bursts.
+ Length of the per socket backlog queue. Since Linux 2.2 the backlog
+ specified in listen(2) only specifies the length of the backlog
+ queue of already established sockets. When more connection requests
+ arrive Linux starts to drop packets. When syncookies are enabled
+ the packets are still answered and the maximum queue is effectively
+ ignored.
+ Defines how often an answer to a TCP connection request is
+ retransmitted before giving up.
+ Defines how often a TCP packet is retransmitted before giving up.
+Interface specific settings
+In the directory /proc/sys/net/ipv4/conf you'll find one subdirectory
+for each interface the system knows about and one directory calls
+all. Changes in the all subdirectory affect all interfaces, where
+changes in the other subdirectories affect only one interface.
+All directories have the same entries:
+ This switch decides if the kernel accepts ICMP redirect messages
+ or not. The default is 'yes', if the kernel is configured for a
+ regular host; and 'no' for a router configuration.
+ Should source routed packages be accepted or declined. The
+ default is dependent on the kernel configuration. It's 'yes' for
+ routers and 'np' for hosts.
+ Accept packets with source address 0.b.c.d destined not to this
+ host as local ones. It is supposed that BOOTP relay daemon will
+ catch and forward such packets.
+ The default is 'no', as this feature is not implemented yet
+ (kernel version 2.2.0-pre?).
+ Enable or disable IP forwarding on this interface.
+ Log packets with source addresses with no known route to kernel log.
+ Do multicast routing. The kernel needs to be compiled with
+ CONFIG_MROUTE and a multicast routing daemon is required.
+ Do (1) or don't (0) do proxy ARP.
+ Integer value deciding if source validation should be made.
+ 1 means yes, 0 means no. Disabled by default, but
+ local/broadcast address spoofing is always on.
+ If you set this to 1 on a router that is the only connection
+ for a network to the net , it evidently prevents spoofing attacks
+ against your internal networks (external addresses can still be
+ spoofed), without the need for additional firewall rules.
+ Accept ICMP redirect messages only for gateways, listed in
+ default gateway list. Enabled by default.
+ If it is not set the kernel does not assume that different subnets
+ on this device can communicate directly. Default setting is 'yes'.
+ Determines if or if not to send ICMP redirects to other hosts.
+Routing settings
+The directory /proc/sys/net/ipv4/route contains several file to
+control routing issues.
+error_burst and error_cost
+ These parameters are used to limit the warning messages written to
+ the kernel log from the routing code. The higher the error_cost
+ factor is, the fewer messages will be written. Error_burst controls
+ when messages will be dropped. The default settings limit warning
+ messages to one every five seconds.
+ Writing to this file results in a flush of the routing cache.
+gc_elastic, gc_interval, gc_min_interval, gc_tresh, gc_timeout
+ Values to control the frequency and behavior of the garbage
+ collection algorithm for the routing cache.
+ Maximum size of the routing cache. Old entries will be purged
+ once the cache has this size.
+max_delay, min_delay
+ Delays for flushing the routing cache.
+redirect_load, redirect_number
+ Factors which determine if more ICPM redirects should be sent to
+ a specific host. No redirects will be sent once the load limit or
+ the maximum number of redirects has been reached.
+ Timeout for redirects. After this period redirects will be sent
+ again, even if this has been stopped, because the load or number
+ limit has been reached.
+Network Neighbor handling
+Settings about how to handle connections with direct neighbors (nodes
+attached to the same link) can be found in the directory
+As we saw it in the conf directory, there is a default subdirectory
+which holds the default values, and one directory for each
+interface. The contents of the directories are identical, with the
+single exception that the default settings contain additional options
+to set garbage collection parameters.
+In the interface directories you'll find the following entries:
+ A base value used for computing the random reachable time value
+ as specified in RFC2461.
+ The time, expressed in jiffies (1/100 sec), between retransmitted
+ Neighbor Solicitation messages. Used for address resolution and to
+ determine if a neighbor is unreachable.
+ Maximum queue length for a pending arp request - how many packets
+ are accepted from other layers while the arp address is still
+ resolved.
+ Maximum for random delay of answers to neighbor solicitation
+ messages in jiffies (1/100 sec). Not yet implemented (Linux does
+ not have anycast support yet).
+ Maximum number of retries for unicast solicitation.
+ Maximum number of retries for multicast solicitation.
+ Delay for the first time probe if the neighbor is reachable. (see
+ gc_stale_time).
+ An ARP/neighbor entry is only replaced with a new one if the old
+ is at least locktime old. This prevents ARP cache thrashing.
+ Maximum time (real time is random [0..proxytime]) before
+ answering to an arp request for which we have an proxy arp entry.
+ In some cases, this is used to prevent network flooding.
+ Maximum queue length of the delayed proxy arp timer (see
+ proxy_delay).
+ Determines the number of requests to send to the user level arp
+ daemon. 0 to turn off.
+ Determines how often to check for stale ARP entries. After an ARP
+ entry is stale it will be resolved again (useful when an IP address
+ migrates to another machine). When ucast_solicit is > 0 it first
+ tries to send an ARP packet directly to the known host, when that
+ fails and mcast_solicit is > 0, an ARP request is broadcasted.
+3.10 Appletalk
+The /proc/sys/net/appletalk directory holds the Appletalk
+configuration data when Appletalk is loaded. The configurable
+parameters are:
+ The amount of time we keep an AARP entry before expiring
+ it. Used to age out old hosts.
+ The amount of time we will spend trying to resolve an Appletalk
+ address.
+ The number of times we will retransmit a query before giving up.
+ Controls the rate at which expiries are checked.
+The directory /proc/net/appletalk holds the list of active appletalk
+sockets on a machine.
+The fields indicate the DDP type, the local address (in network:node
+format) the remote address, the size of the transmit pending queue,
+the size of the received queue (bytes waiting for applications to
+read) the state and the uid owning the socket.
+/proc/net/atalk_iface lists all the interfaces configured for
+appletalk.It shows the name of the interface, its appletalk address,
+the network range on that ad- dress (or network number for phase 1
+networks), and the status of the interface.
+/proc/net/atalk_route lists each known network route. It lists the
+target (network) that the route leads to, the router (may be directly
+connected), the route flags, and the device the route is via.
+3.11 IPX
+The IPX protocol has no tunable values in /proc/sys/net.
+The IPX protocol does, however, provide /proc/net/ipx. This lists each
+IPX socket giving the local and remote addresses in Novell format
+(that is network:node:port). In accordance with the strange Novell
+tradition, everything but the port is in hex. Not_Connected is
+displayed for sockets that are not tied to a specific remote
+address. The Tx and Rx queue sizes indicate the number of bytes
+pending for transmit and receive. The state indicates the state the
+socket is in and the uid is the owning uid of the socket.
+The /proc/net/ipx_interface file lists all IPX interfaces. For each
+interface it gives the network number, the node number, and indicates
+if the network is the primary network. It also indicates which device it is bound to (or
+Internal for internal networks) and the Frame Type if
+appropriate. Linux supports 802.3, 802.2, 802.2 SNAP and DIX (Blue
+Book) ethernet framing for IPX.
+The /proc/net/ipx_route table holds a list of IPX routes. For each
+route it gives the destination network, the router node (or Directly)
+and the network address of the router (or Connected) for internal
diff -u --recursive --new-file v2.2.0/linux/Makefile linux/Makefile
--- v2.2.0/linux/Makefile Mon Jan 25 17:44:34 1999
+++ linux/Makefile Mon Jan 25 17:46:35 1999
@@ -1,6 +1,6 @@
X ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
diff -u --recursive --new-file v2.2.0/linux/arch/alpha/lib/semaphore.S linux/arch/alpha/lib/semaphore.S
--- v2.2.0/linux/arch/alpha/lib/semaphore.S Mon Jan 25 17:44:34 1999
+++ linux/arch/alpha/lib/semaphore.S Wed Jan 27 10:18:03 1999
@@ -101,7 +101,7 @@
X .prologue 1
X mov $24, $16
- jsr __down
+ jsr __down_interruptible
X mov $0, $24
X ldq $28, 0*8($30)
diff -u --recursive --new-file v2.2.0/linux/drivers/sound/gus_wave.c linux/drivers/sound/gus_wave.c
--- v2.2.0/linux/drivers/sound/gus_wave.c Fri Jan 8 22:36:11 1999
+++ linux/drivers/sound/gus_wave.c Thu Jan 28 11:25:04 1999
@@ -144,7 +144,6 @@
X static int freq_div_table[] =
X {
- 44100,
X 44100, /* 14 */
X 41160, /* 15 */
X 38587, /* 16 */
@@ -2234,9 +2233,6 @@
X gus_busy = 1;
X active_device = 0;
- gus_reset();
- reset_sample_memory();
- gus_select_max_voices(14);
X saved_iw_mode = iw_mode;
X if (iw_mode)
X {
@@ -2244,6 +2240,11 @@
X gus_write8(0x19, gus_read8(0x19) & ~0x01); /* Disable enhanced mode */
X iw_mode = 0;
X }
+ gus_reset();
+ reset_sample_memory();
+ gus_select_max_voices(14);
X pcm_active = 0;
X dma_active = 0;
X pcm_opened = 1;
diff -u --recursive --new-file v2.2.0/linux/drivers/sound/sb_card.c linux/drivers/sound/sb_card.c
--- v2.2.0/linux/drivers/sound/sb_card.c Mon Jan 25 17:44:34 1999
+++ linux/drivers/sound/sb_card.c Thu Jan 28 10:58:47 1999
@@ -113,6 +113,7 @@
X }
X int sb_be_quiet=0;
+int esstype = 0; /* ESS chip type */
X #ifdef MODULE
@@ -136,7 +137,6 @@
X int pas2 = 0; /* Set pas2=1 to load this as support for pas2 */
X int sm_games = 0; /* Mixer - see sb_mixer.c */
X int acer = 0; /* Do acer notebook init */
-int esstype = 0; /* ESS chip type */
X MODULE_PARM(io, "i");
X MODULE_PARM(irq, "i");
diff -u --recursive --new-file v2.2.0/linux/fs/nfsd/vfs.c linux/fs/nfsd/vfs.c
--- v2.2.0/linux/fs/nfsd/vfs.c Tue Jan 19 11:32:52 1999
+++ linux/fs/nfsd/vfs.c Wed Jan 27 13:49:46 1999
@@ -988,6 +988,20 @@
X }
X /*
+ * We need to do a check-parent every time
+ * after we have locked the parent - to verify
+ * that the parent is still our parent and
+ * that we are still hashed onto it..
+ *
+ * This is requied in case two processes race
+ * on removing (or moving) the same entry: the
+ * parent lock will serialize them, but the
+ * other process will be too late..
+ */
+#define check_parent(dir, dentry) \

true || echo 'restore of patch-2.2.1 failed'

echo 'End of part 1'
echo 'File patch-2.2.1 is continued in part 2'
echo 2 > _shar_seq_.tmp

Reply all
Reply to author
0 new messages