Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.

Dismiss

Linux Kernel Patch v2.4, patch-2.4.10 (192/197)

1 view

Skip to first unread message

Thomas Kobienia

unread,

Sep 24, 2001, 8:00:55 PM9/24/01

Archive-name: v2.4/patch-2.4.10/part192

#!/bin/sh -x
# this is part 192 of a 197 - part archive
# do not concatenate these parts, unpack them in order with /bin/sh
# file patch-2.4.10 continued
if test ! -r _shar_seq_.tmp; then
echo 'Please unpack part 1 first!'
exit 1
fi
(read Scheck
if test "$Scheck" != 192; then
echo "Please unpack part $Scheck next!"
exit 1
else
exit 0
fi
) < _shar_seq_.tmp || exit 1
if test ! -f _shar_wnt_.tmp; then
echo 'x - still skipping patch-2.4.10'
else
echo 'x - continuing with patch-2.4.10'
sed 's/^X//' << 'SHAR_EOF' >> 'patch-2.4.10' &&
X }
X out:
X up_write(&current->mm->mmap_sem);
diff -u --recursive --new-file v2.4.9/linux/mm/mremap.c linux/mm/mremap.c
--- v2.4.9/linux/mm/mremap.c Fri Apr 13 15:17:21 2001
+++ linux/mm/mremap.c Thu Sep 20 20:31:26 2001
@@ -119,7 +119,6 @@
X while ((offset += PAGE_SIZE) < len)
X move_one_page(mm, new_addr + offset, old_addr + offset);
X zap_page_range(mm, new_addr, len);
- flush_tlb_range(mm, new_addr, new_addr + len);
X return -1;
X }
X
@@ -127,11 +126,58 @@
X unsigned long addr, unsigned long old_len, unsigned long new_len,
X unsigned long new_addr)
X {
- struct vm_area_struct * new_vma;
+ struct mm_struct * mm = vma->vm_mm;
+ struct vm_area_struct * new_vma, * next, * prev;
+ int allocated_vma;
X
- new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (new_vma) {
- if (!move_page_tables(current->mm, new_addr, addr, old_len)) {
+ new_vma = NULL;
+ next = find_vma_prev(mm, new_addr, &prev);
+ if (next) {
+ if (prev && prev->vm_end == new_addr &&
+ can_vma_merge(prev, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+ spin_lock(&mm->page_table_lock);
+ prev->vm_end = new_addr + new_len;
+ spin_unlock(&mm->page_table_lock);
+ new_vma = prev;
+ if (next != prev->vm_next)
+ BUG();
+ if (prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags)) {
+ spin_lock(&mm->page_table_lock);
+ prev->vm_end = next->vm_end;
+ __vma_unlink(mm, next, prev);
+ spin_unlock(&mm->page_table_lock);
+
+ mm->map_count--;
+ kmem_cache_free(vm_area_cachep, next);
+ }
+ } else if (next->vm_start == new_addr + new_len &&
+ can_vma_merge(next, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+ spin_lock(&mm->page_table_lock);
+ next->vm_start = new_addr;
+ spin_unlock(&mm->page_table_lock);
+ new_vma = next;
+ }
+ } else {
+ prev = find_vma(mm, new_addr-1);
+ if (prev && prev->vm_end == new_addr &&
+ can_vma_merge(prev, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+ spin_lock(&mm->page_table_lock);
+ prev->vm_end = new_addr + new_len;
+ spin_unlock(&mm->page_table_lock);
+ new_vma = prev;
+ }
+ }
+
+ allocated_vma = 0;
+ if (!new_vma) {
+ new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!new_vma)
+ goto out;
+ allocated_vma = 1;
+ }
+
+ if (!move_page_tables(current->mm, new_addr, addr, old_len)) {
+ if (allocated_vma) {
X *new_vma = *vma;
X new_vma->vm_start = new_addr;
X new_vma->vm_end = new_addr+new_len;
@@ -142,17 +188,19 @@
X if (new_vma->vm_ops && new_vma->vm_ops->open)
X new_vma->vm_ops->open(new_vma);
X insert_vm_struct(current->mm, new_vma);
- do_munmap(current->mm, addr, old_len);
- current->mm->total_vm += new_len >> PAGE_SHIFT;
- if (new_vma->vm_flags & VM_LOCKED) {
- current->mm->locked_vm += new_len >> PAGE_SHIFT;
- make_pages_present(new_vma->vm_start,
- new_vma->vm_end);
- }
- return new_addr;
X }
- kmem_cache_free(vm_area_cachep, new_vma);
+ do_munmap(current->mm, addr, old_len);
+ current->mm->total_vm += new_len >> PAGE_SHIFT;
+ if (new_vma->vm_flags & VM_LOCKED) {
+ current->mm->locked_vm += new_len >> PAGE_SHIFT;
+ make_pages_present(new_vma->vm_start,
+ new_vma->vm_end);
+ }
+ return new_addr;
X }
+ if (allocated_vma)
+ kmem_cache_free(vm_area_cachep, new_vma);
+ out:
X return -ENOMEM;
X }
X
diff -u --recursive --new-file v2.4.9/linux/mm/numa.c linux/mm/numa.c
--- v2.4.9/linux/mm/numa.c Tue Jul 3 17:08:22 2001
+++ linux/mm/numa.c Mon Sep 17 16:15:02 2001
@@ -31,7 +31,7 @@
X
X #endif /* !CONFIG_DISCONTIGMEM */
X
-struct page * alloc_pages_node(int nid, int gfp_mask, unsigned long order)
+struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order)
X {
X #ifdef CONFIG_NUMA
X return __alloc_pages(gfp_mask, order, NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK));
@@ -82,8 +82,8 @@
X memset(pgdat->valid_addr_bitmap, 0, size);
X }
X
-static struct page * alloc_pages_pgdat(pg_data_t *pgdat, int gfp_mask,
- unsigned long order)
+static struct page * alloc_pages_pgdat(pg_data_t *pgdat, unsigned int gfp_mask,
+ unsigned int order)
X {
X return __alloc_pages(gfp_mask, order, pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK));
X }
@@ -92,7 +92,7 @@
X * This can be refined. Currently, tries to do round robin, instead
X * should do concentratic circle search, starting from current node.
X */
-struct page * _alloc_pages(unsigned int gfp_mask, unsigned long order)
+struct page * _alloc_pages(unsigned int gfp_mask, unsigned int order)
X {
X struct page *ret = 0;
X pg_data_t *start, *temp;
diff -u --recursive --new-file v2.4.9/linux/mm/oom_kill.c linux/mm/oom_kill.c
--- v2.4.9/linux/mm/oom_kill.c Mon Aug 27 12:41:49 2001
+++ linux/mm/oom_kill.c Mon Sep 17 16:15:02 2001
@@ -192,43 +192,3 @@
X schedule();
X return;
X }
-
-/**
- * out_of_memory - is the system out of memory?
- *
- * Returns 0 if there is still enough memory left,
- * 1 when we are out of memory (otherwise).
- */
-int out_of_memory(void)
-{
- long cache_mem, limit;
-
- /* Enough free memory? Not OOM. */
- if (nr_free_pages() > freepages.min)
- return 0;
-
- if (nr_free_pages() + nr_inactive_clean_pages() > freepages.low)
- return 0;
-
- /*
- * If the buffer and page cache (excluding swap cache) are over
- * their (/proc tunable) minimum, we're still not OOM. We test
- * this to make sure we don't return OOM when the system simply
- * has a hard time with the cache.
- */
- cache_mem = atomic_read(&page_cache_size);
- cache_mem += atomic_read(&buffermem_pages);
- cache_mem -= swapper_space.nrpages;
- limit = (page_cache.min_percent + buffer_mem.min_percent);
- limit *= num_physpages / 100;
-
- if (cache_mem > limit)
- return 0;
-
- /* Enough swap space left? Not OOM. */
- if (nr_swap_pages > 0)
- return 0;
-
- /* Else... */
- return 1;
-}
diff -u --recursive --new-file v2.4.9/linux/mm/page_alloc.c linux/mm/page_alloc.c
--- v2.4.9/linux/mm/page_alloc.c Mon Aug 27 12:41:49 2001
+++ linux/mm/page_alloc.c Fri Sep 21 22:40:40 2001
@@ -17,19 +17,20 @@
X #include <linux/pagemap.h>
X #include <linux/bootmem.h>
X #include <linux/slab.h>
+#include <linux/compiler.h>
X
X int nr_swap_pages;
X int nr_active_pages;
-int nr_inactive_dirty_pages;
+int nr_inactive_pages;
+struct list_head inactive_list;
+struct list_head active_list;
X pg_data_t *pgdat_list;
X
X static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
-static int zone_balance_ratio[MAX_NR_ZONES] = { 32, 128, 128, };
-static int zone_balance_min[MAX_NR_ZONES] = { 10 , 10, 10, };
-static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 255, };
+static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 32, 128, 128, };
+static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, };
+static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, };
X
-struct list_head active_list;
-struct list_head inactive_dirty_list;
X /*
X * Free_page() adds the page to the free lists. This is optimized for
X * fast normal cases (no error jumps taken normally).
@@ -61,8 +62,8 @@
X * Hint: -mask = 1+~mask
X */
X
-static void FASTCALL(__free_pages_ok (struct page *page, unsigned long order));
-static void __free_pages_ok (struct page *page, unsigned long order)
+static void FASTCALL(__free_pages_ok (struct page *page, unsigned int order));
+static void __free_pages_ok (struct page *page, unsigned int order)
X {
X unsigned long index, page_idx, mask, flags;
X free_area_t *area;
@@ -83,14 +84,14 @@
X BUG();
X if (PageActive(page))
X BUG();
- if (PageInactiveDirty(page))
- BUG();
- if (PageInactiveClean(page))
+ if (PageInactive(page))
X BUG();
-
X page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty));
- page->age = PAGE_AGE_START;
-
+
+ if (current->flags & PF_FREE_PAGES)
+ goto local_freelist;
+ back_local_freelist:
+
X zone = page->zone;
X
X mask = (~0UL) << order;
@@ -135,14 +136,21 @@
X memlist_add_head(&(base + page_idx)->list, &area->free_list);
X
X spin_unlock_irqrestore(&zone->lock, flags);
+ return;
X
+ local_freelist:
X /*
- * We don't want to protect this variable from race conditions
- * since it's nothing important, but we do want to make sure
- * it never gets negative.
+ * This is a little subtle: if the allocation order
+ * wanted is major than zero we'd better take all the pages
+ * local since we must deal with fragmentation too and we
+ * can't rely on the nr_local_pages information.
X */
- if (memory_pressure > NR_CPUS)
- memory_pressure--;
+ if (current->nr_local_pages && !current->allocation_order)
+ goto back_local_freelist;
+
+ list_add(&page->list, &current->local_pages);
+ page->index = order;
+ current->nr_local_pages++;
X }
X
X #define MARK_USED(index, order, area) \
@@ -169,11 +177,11 @@
X return page;
X }
X
-static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned long order));
-static struct page * rmqueue(zone_t *zone, unsigned long order)
+static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned int order));
+static struct page * rmqueue(zone_t *zone, unsigned int order)
X {
X free_area_t * area = zone->free_area + order;
- unsigned long curr_order = order;
+ unsigned int curr_order = order;
X struct list_head *head, *curr;
X unsigned long flags;
X struct page *page;
@@ -193,7 +201,7 @@
X index = page - zone->zone_mem_map;
X if (curr_order != MAX_ORDER-1)
X MARK_USED(index, curr_order, area);
- zone->free_pages -= 1 << order;
+ zone->free_pages -= 1UL << order;
X
X page = expand(zone, page, index, order, curr_order, area);
X spin_unlock_irqrestore(&zone->lock, flags);
@@ -201,7 +209,7 @@
X set_page_count(page, 1);
X if (BAD_RANGE(zone,page))
X BUG();
- DEBUG_ADD_PAGE
+ DEBUG_LRU_PAGE(page);
X return page;
X }
X curr_order++;
@@ -212,304 +220,203 @@
X return NULL;
X }
X
-#define PAGES_MIN 0
-#define PAGES_LOW 1
-#define PAGES_HIGH 2
-
-/*
- * This function does the dirty work for __alloc_pages
- * and is separated out to keep the code size smaller.
- * (suggested by Davem at 1:30 AM, typed by Rik at 6 AM)
- */
-static struct page * __alloc_pages_limit(zonelist_t *zonelist,
- unsigned long order, int limit, int direct_reclaim)
+#ifndef CONFIG_DISCONTIGMEM
+struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
X {
- zone_t **zone = zonelist->zones;
+ return __alloc_pages(gfp_mask, order,
+ contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
+}
+#endif
X
- for (;;) {
- zone_t *z = *(zone++);
- unsigned long water_mark;
+static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *));
+static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed)
+{
+ struct page * page = NULL;
+ int __freed = 0;
X
- if (!z)
- break;
- if (!z->size)
- BUG();
+ if (!(gfp_mask & __GFP_WAIT))
+ goto out;
+ if (in_interrupt())
+ BUG();
+
+ current->allocation_order = order;
+ current->flags |= PF_MEMALLOC | PF_FREE_PAGES;
+
+ __freed = try_to_free_pages(classzone, gfp_mask, order);
+
+ current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES);
+
+ if (current->nr_local_pages) {
+ struct list_head * entry, * local_pages;
+ struct page * tmp;
+ int nr_pages;
+
+ local_pages = &current->local_pages;
+
+ if (likely(__freed)) {
+ /* pick from the last inserted so we're lifo */
+ entry = local_pages->next;
+ do {
+ tmp = list_entry(entry, struct page, list);
+ if (tmp->index == order && memclass(tmp->zone, classzone)) {
+ list_del(entry);
+ current->nr_local_pages--;
+ set_page_count(tmp, 1);
+ page = tmp;
+
+ if (page->buffers)
+ BUG();
+ if (page->mapping)
+ BUG();
+ if (!VALID_PAGE(page))
+ BUG();
+ if (PageSwapCache(page))
+ BUG();
+ if (PageLocked(page))
+ BUG();
+ if (PageDecrAfter(page))
+ BUG();
+ if (PageActive(page))
+ BUG();
+ if (PageInactive(page))
+ BUG();
+ if (PageDirty(page))
+ BUG();
X
- /*
- * We allocate if the number of free + inactive_clean
- * pages is above the watermark.
- */
- switch (limit) {
- default:
- case PAGES_MIN:
- water_mark = z->pages_min;
- break;
- case PAGES_LOW:
- water_mark = z->pages_low;
- break;
- case PAGES_HIGH:
- water_mark = z->pages_high;
+ break;
+ }
+ } while ((entry = entry->next) != local_pages);
X }
X
- if (z->free_pages + z->inactive_clean_pages >= water_mark) {
- struct page *page = NULL;
- /* If possible, reclaim a page directly. */
- if (direct_reclaim)
- page = reclaim_page(z);
- /* If that fails, fall back to rmqueue. */
- if (!page)
- page = rmqueue(z, order);
- if (page)
- return page;
+ nr_pages = current->nr_local_pages;
+ /* free in reverse order so that the global order will be lifo */
+ while ((entry = local_pages->prev) != local_pages) {
+ list_del(entry);
+ tmp = list_entry(entry, struct page, list);
+ __free_pages_ok(tmp, tmp->index);
+ if (!nr_pages--)
+ BUG();
X }
+ current->nr_local_pages = 0;
X }
-
- /* Found nothing. */
- return NULL;
+ out:
+ *freed = __freed;
+ return page;
X }
X
-#ifndef CONFIG_DISCONTIGMEM
-struct page *_alloc_pages(unsigned int gfp_mask, unsigned long order)
+static inline unsigned long zone_free_pages(zone_t * zone, unsigned int order)
X {
- return __alloc_pages(gfp_mask, order,
- contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
+ long free = zone->free_pages - (1UL << order);
+ return free >= 0 ? free : 0;
X }
-#endif
X
X /*
X * This is the 'heart' of the zoned buddy allocator:
X */
-struct page * __alloc_pages(unsigned int gfp_mask, unsigned long order, zonelist_t *zonelist)
+struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)
X {
- zone_t **zone;
- int direct_reclaim = 0;
+ zone_t **zone, * classzone;
X struct page * page;
+ int freed;
X
- /*
- * Allocations put pressure on the VM subsystem.
- */
- memory_pressure++;
+ zone = zonelist->zones;
+ classzone = *zone;
+ for (;;) {
+ zone_t *z = *(zone++);
+ if (!z)
+ break;
X
- /*
- * (If anyone calls gfp from interrupts nonatomically then it
- * will sooner or later tripped up by a schedule().)
- *
- * We are falling back to lower-level zones if allocation
- * in a higher zone fails.
- */
+ if (zone_free_pages(z, order) > z->pages_low) {
+ page = rmqueue(z, order);
+ if (page)
+ return page;
+ }
+ }
X
- /*
- * Can we take pages directly from the inactive_clean
- * list?
- */
- if (order == 0 && (gfp_mask & __GFP_WAIT))
- direct_reclaim = 1;
+ classzone->need_balance = 1;
+ mb();
+ if (waitqueue_active(&kswapd_wait))
+ wake_up_interruptible(&kswapd_wait);
X
-try_again:
- /*
- * First, see if we have any zones with lots of free memory.
- *
- * We allocate free memory first because it doesn't contain
- * any data ... DUH!
- */
X zone = zonelist->zones;
X for (;;) {
+ unsigned long min;
X zone_t *z = *(zone++);
X if (!z)
X break;
- if (!z->size)
- BUG();
X
- if (z->free_pages >= z->pages_low) {
+ min = z->pages_min;
+ if (!(gfp_mask & __GFP_WAIT))
+ min >>= 2;
+ if (zone_free_pages(z, order) > min) {
X page = rmqueue(z, order);
X if (page)
X return page;
- } else if (z->free_pages < z->pages_min &&
- waitqueue_active(&kreclaimd_wait)) {
- wake_up_interruptible(&kreclaimd_wait);
X }
X }
X
- /*
- * Try to allocate a page from a zone with a HIGH
- * amount of free + inactive_clean pages.
- *
- * If there is a lot of activity, inactive_target
- * will be high and we'll have a good chance of
- * finding a page using the HIGH limit.
- */
- page = __alloc_pages_limit(zonelist, order, PAGES_HIGH, direct_reclaim);
- if (page)
- return page;
+ /* here we're in the low on memory slow path */
X
- /*
- * Then try to allocate a page from a zone with more
- * than zone->pages_low free + inactive_clean pages.
- *
- * When the working set is very large and VM activity
- * is low, we're most likely to have our allocation
- * succeed here.
- */
- page = __alloc_pages_limit(zonelist, order, PAGES_LOW, direct_reclaim);
- if (page)
- return page;
+ if (current->flags & PF_MEMALLOC) {
+ zone = zonelist->zones;
+ for (;;) {
+ zone_t *z = *(zone++);
+ if (!z)
+ break;
X
- /*
- * OK, none of the zones on our zonelist has lots
- * of pages free.
- *
- * We wake up kswapd, in the hope that kswapd will
- * resolve this situation before memory gets tight.
- *
- * We also yield the CPU, because that:
- * - gives kswapd a chance to do something
- * - slows down allocations, in particular the
- * allocations from the fast allocator that's
- * causing the problems ...
- * - ... which minimises the impact the "bad guys"
- * have on the rest of the system
- * - if we don't have __GFP_IO set, kswapd may be
- * able to free some memory we can't free ourselves
- */
- wakeup_kswapd();
- if (gfp_mask & __GFP_WAIT) {
- __set_current_state(TASK_RUNNING);
- current->policy |= SCHED_YIELD;
- schedule();
+ page = rmqueue(z, order);
+ if (page)
+ return page;
+ }
+ return NULL;
X }
X
- /*
- * After waking up kswapd, we try to allocate a page
- * from any zone which isn't critical yet.
- *
- * Kswapd should, in most situations, bring the situation
- * back to normal in no time.
- */
- page = __alloc_pages_limit(zonelist, order, PAGES_MIN, direct_reclaim);
+ rebalance:
+ page = balance_classzone(classzone, gfp_mask, order, &freed);
X if (page)
X return page;
X
- /*
- * Damn, we didn't succeed.
- *
- * This can be due to 2 reasons:
- * - we're doing a higher-order allocation
- * --> move pages to the free list until we succeed
- * - we're /really/ tight on memory
- * --> try to free pages ourselves with page_launder
- */
- if (!(current->flags & PF_MEMALLOC)) {
- /*
- * Are we dealing with a higher order allocation?
- *
- * Move pages from the inactive_clean to the free list
- * in the hope of creating a large, physically contiguous
- * piece of free memory.
- */
- if (order > 0 && (gfp_mask & __GFP_WAIT)) {
- zone = zonelist->zones;
- /* First, clean some dirty pages. */
- current->flags |= PF_MEMALLOC;
- page_launder(gfp_mask, 1);
- current->flags &= ~PF_MEMALLOC;
- for (;;) {
- zone_t *z = *(zone++);
- if (!z)
- break;
- if (!z->size)
- continue;
- while (z->inactive_clean_pages) {
- struct page * page;
- /* Move one page to the free list. */
- page = reclaim_page(z);
- if (!page)
- break;
- __free_page(page);
- /* Try if the allocation succeeds. */
- page = rmqueue(z, order);
- if (page)
- return page;
- }
+ zone = zonelist->zones;
+ if (likely(freed)) {
+ for (;;) {
+ zone_t *z = *(zone++);
+ if (!z)
+ break;
+
+ if (zone_free_pages(z, order) > z->pages_min) {
+ page = rmqueue(z, order);
+ if (page)
+ return page;
X }
X }
- /*
- * When we arrive here, we are really tight on memory.
- * Since kswapd didn't succeed in freeing pages for us,
- * we try to help it.
- *
- * Single page allocs loop until the allocation succeeds.
- * Multi-page allocs can fail due to memory fragmentation;
- * in that case we bail out to prevent infinite loops and
- * hanging device drivers ...
- *
- * Another issue are GFP_NOFS allocations; because they
- * do not have __GFP_FS set it's possible we cannot make
- * any progress freeing pages, in that case it's better
- * to give up than to deadlock the kernel looping here.
+ goto rebalance;
+ } else {
+ /*
+ * Check that no other task is been killed meanwhile,
+ * in such a case we can succeed the allocation.
X */
- if (gfp_mask & __GFP_WAIT) {
- if (!order || free_shortage()) {
- int progress = try_to_free_pages(gfp_mask);
- if (progress || (gfp_mask & __GFP_FS))
- goto try_again;
- /*
- * Fail in case no progress was made and the
- * allocation may not be able to block on IO.
- */
- return NULL;
- }
- }
- }
-
- /*
- * Final phase: allocate anything we can!
- *
- * Higher order allocations, GFP_ATOMIC allocations and
- * recursive allocations (PF_MEMALLOC) end up here.
- *
- * Only recursive allocations can use the very last pages
- * in the system, otherwise it would be just too easy to
- * deadlock the system...
- */
- zone = zonelist->zones;
- for (;;) {
- zone_t *z = *(zone++);
- struct page * page = NULL;
- if (!z)
- break;
- if (!z->size)
- BUG();
+ for (;;) {
+ zone_t *z = *(zone++);
+ if (!z)
+ break;
X
- /*
- * SUBTLE: direct_reclaim is only possible if the task
- * becomes PF_MEMALLOC while looping above. This will
- * happen when the OOM killer selects this task for
- * instant execution...
- */
- if (direct_reclaim) {
- page = reclaim_page(z);
- if (page)
- return page;
+ if (zone_free_pages(z, order) > z->pages_high) {
+ page = rmqueue(z, order);
+ if (page)
+ return page;
+ }
X }
-
- /* XXX: is pages_min/4 a good amount to reserve for this? */
- if (z->free_pages < z->pages_min / 4 &&
- !(current->flags & PF_MEMALLOC))
- continue;
- page = rmqueue(z, order);
- if (page)
- return page;
X }
X
- /* No luck.. */
- printk(KERN_ERR "__alloc_pages: %lu-order allocation failed.\n", order);
+ printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i) from %p\n",
+ order, gfp_mask, !!(current->flags & PF_MEMALLOC), __builtin_return_address(0));
X return NULL;
X }
X
X /*
X * Common helper functions.
X */
-unsigned long __get_free_pages(int gfp_mask, unsigned long order)
+unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order)
X {
X struct page * page;
X
@@ -519,7 +426,7 @@
X return (unsigned long) page_address(page);
X }
X
-unsigned long get_zeroed_page(int gfp_mask)
+unsigned long get_zeroed_page(unsigned int gfp_mask)
X {
X struct page * page;
X
@@ -532,13 +439,13 @@
X return 0;
X }
X
-void __free_pages(struct page *page, unsigned long order)
+void __free_pages(struct page *page, unsigned int order)
X {
X if (!PageReserved(page) && put_page_testzero(page))
X __free_pages_ok(page, order);
X }
X
-void free_pages(unsigned long addr, unsigned long order)
+void free_pages(unsigned long addr, unsigned int order)
X {
X if (addr != 0)
X __free_pages(virt_to_page(addr), order);
@@ -563,47 +470,26 @@
X }
X
X /*
- * Total amount of inactive_clean (allocatable) RAM:
- */
-unsigned int nr_inactive_clean_pages (void)
-{
- unsigned int sum;
- zone_t *zone;
- pg_data_t *pgdat = pgdat_list;
-
- sum = 0;
- while (pgdat) {
- for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++)
- sum += zone->inactive_clean_pages;
- pgdat = pgdat->node_next;
- }
- return sum;
-}
-
-/*
X * Amount of free RAM allocatable as buffer memory:
X */
X unsigned int nr_free_buffer_pages (void)
X {
+ pg_data_t *pgdat = pgdat_list;
X unsigned int sum = 0;
X zonelist_t *zonelist;
X zone_t **zonep, *zone;
X
- zonelist = contig_page_data.node_zonelists + (GFP_NOFS & GFP_ZONEMASK);
- zonep = zonelist->zones;
+ do {
+ zonelist = pgdat->node_zonelists + __GFP_HIGHMEM;
+ zonep = zonelist->zones;
X
- for (zone = *zonep++; zone; zone = *zonep++) {
- unsigned int pages = zone->free_pages +
- zone->inactive_clean_pages +
- zone->inactive_dirty_pages;
-
- /* Allow the buffer cache to fill up at least "pages_high" pages */
- if (pages < zone->pages_high)
- pages = zone->pages_high;
- sum += pages;
- }
+ for (zone = *zonep++; zone; zone = *zonep++)
+ sum += zone->free_pages;
X
- return sum;
+ pgdat = pgdat->node_next;
+ } while (pgdat);
+
+ return sum + nr_active_pages + nr_inactive_pages;
X }
X
X #if CONFIG_HIGHMEM
@@ -627,21 +513,17 @@
X */
X void show_free_areas_core(pg_data_t *pgdat)
X {
- unsigned long order;
+ unsigned int order;
X unsigned type;
X
X printk("Free pages: %6dkB (%6dkB HighMem)\n",
X nr_free_pages() << (PAGE_SHIFT-10),
X nr_free_highpages() << (PAGE_SHIFT-10));
X
- printk("( Active: %d, inactive_dirty: %d, inactive_clean: %d, free: %d (%d %d %d) )\n",
- nr_active_pages,
- nr_inactive_dirty_pages,
- nr_inactive_clean_pages(),
- nr_free_pages(),
- freepages.min,
- freepages.low,
- freepages.high);
+ printk("( Active: %d, inactive: %d, free: %d )\n",
+ nr_active_pages,
+ nr_inactive_pages,
+ nr_free_pages());
X
X for (type = 0; type < MAX_NR_ZONES; type++) {
X struct list_head *head, *curr;
@@ -761,8 +643,8 @@
X
X printk("On node %d totalpages: %lu\n", nid, realtotalpages);
X
- memlist_init(&active_list);
- memlist_init(&inactive_dirty_list);
+ INIT_LIST_HEAD(&active_list);
+ INIT_LIST_HEAD(&inactive_list);
X
X /*
X * Some architectures (with lots of mem and discontinous memory
@@ -781,6 +663,7 @@
X pgdat->node_size = totalpages;
X pgdat->node_start_paddr = zone_start_paddr;
X pgdat->node_start_mapnr = (lmem_map - mem_map);
+ pgdat->nr_zones = 0;
X
X /*
X * Initially all pages are reserved - free ones are freed
@@ -810,12 +693,12 @@
X zone->lock = SPIN_LOCK_UNLOCKED;
X zone->zone_pgdat = pgdat;
X zone->free_pages = 0;
- zone->inactive_clean_pages = 0;
- zone->inactive_dirty_pages = 0;
- memlist_init(&zone->inactive_clean_list);
+ zone->need_balance = 0;
X if (!size)
X continue;
X
+ pgdat->nr_zones = j+1;
+
X mask = (realsize / zone_balance_ratio[j]);
X if (mask < zone_balance_min[j])
X mask = zone_balance_min[j];
@@ -824,20 +707,7 @@
X zone->pages_min = mask;
X zone->pages_low = mask*2;
X zone->pages_high = mask*3;
- /*
- * Add these free targets to the global free target;
- * we have to be SURE that freepages.high is higher
- * than SUM [zone->pages_min] for all zones, otherwise
- * we may have bad bad problems.
- *
- * This means we cannot make the freepages array writable
- * in /proc, but have to add a separate extra_free_target
- * for people who require it to catch load spikes in eg.
- * gigabit ethernet routing...
- */
- freepages.min += mask;
- freepages.low += mask*2;
- freepages.high += mask*3;
+
X zone->zone_mem_map = mem_map + offset;
X zone->zone_start_mapnr = offset;
X zone->zone_start_paddr = zone_start_paddr;
diff -u --recursive --new-file v2.4.9/linux/mm/shmem.c linux/mm/shmem.c
--- v2.4.9/linux/mm/shmem.c Sun Aug 12 13:28:01 2001
+++ linux/mm/shmem.c Sat Sep 22 20:36:50 2001
@@ -234,44 +234,55 @@
X int error;
X struct shmem_inode_info *info;
X swp_entry_t *entry, swap;
+ struct address_space *mapping;
+ unsigned long index;
X struct inode *inode;
X
X if (!PageLocked(page))
X BUG();
-
- inode = page->mapping->host;
+
+ mapping = page->mapping;
+ index = page->index;
+ inode = mapping->host;
X info = &inode->u.shmem_i;
- swap = __get_swap_page(2);
- error = -ENOMEM;
- if (!swap.val) {
- activate_page(page);
- goto out;
- }
X
X spin_lock(&info->lock);
- entry = shmem_swp_entry(info, page->index);
- if (IS_ERR(entry)) /* this had been allocted on page allocation */
+ entry = shmem_swp_entry(info, index);
+ if (IS_ERR(entry)) /* this had been allocated on page allocation */
X BUG();
- shmem_recalc_inode(page->mapping->host);
- error = -EAGAIN;
+ shmem_recalc_inode(inode);
X if (entry->val)
X BUG();
X
- *entry = swap;
- error = 0;
- /* Remove the from the page cache */
+ /* Remove it from the page cache */
X lru_cache_del(page);
X remove_inode_page(page);
X
+ swap_list_lock();
+ swap = get_swap_page();
+
+ if (!swap.val) {
+ swap_list_unlock();
+ /* Add it back to the page cache */
+ add_to_page_cache_locked(page, mapping, index);
+ activate_page(page);
+ SetPageDirty(page);
+ error = -ENOMEM;
+ goto out;
+ }
+
X /* Add it to the swap cache */
X add_to_swap_cache(page, swap);
- page_cache_release(page);
- info->swapped++;
+ swap_list_unlock();
X
- spin_unlock(&info->lock);
-out:
X set_page_dirty(page);
+ info->swapped++;
+ *entry = swap;
+ error = 0;
+out:
+ spin_unlock(&info->lock);
X UnlockPage(page);
+ page_cache_release(page);
X return error;
X }
X
@@ -311,7 +322,7 @@
X * cache and swap cache. We need to recheck the page cache
X * under the protection of the info->lock spinlock. */
X
- page = __find_get_page(mapping, idx, page_hash(mapping, idx));
+ page = find_get_page(mapping, idx);
X if (page) {
X if (TryLockPage(page))
X goto wait_retry;
@@ -324,18 +335,21 @@
X unsigned long flags;
X
X /* Look it up and read it in.. */
- page = __find_get_page(&swapper_space, entry->val,
- page_hash(&swapper_space, entry->val));
+ page = find_get_page(&swapper_space, entry->val);
X if (!page) {
+ swp_entry_t swap = *entry;
X spin_unlock (&info->lock);
X lock_kernel();
X swapin_readahead(*entry);
X page = read_swap_cache_async(*entry);
X unlock_kernel();
- if (!page)
+ if (!page) {
+ if (entry->val != swap.val)
+ goto repeat;
X return ERR_PTR(-ENOMEM);
+ }
X wait_on_page(page);
- if (!Page_Uptodate(page)) {
+ if (!Page_Uptodate(page) && entry->val == swap.val) {
X page_cache_release(page);
X return ERR_PTR(-EIO);
X }
@@ -352,8 +366,8 @@
X
X swap_free(*entry);
X *entry = (swp_entry_t) {0};
- delete_from_swap_cache_nolock(page);
- flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced) | (1 << PG_arch_1));
+ delete_from_swap_cache(page);
+ flags = page->flags & ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1);
X page->flags = flags | (1 << PG_dirty);
X add_to_page_cache_locked(page, mapping, idx);
X info->swapped--;
@@ -1158,6 +1172,7 @@
X #else
X static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT);
X #endif
+static struct vfsmount *shm_mnt;
X
X static int __init init_shmem_fs(void)
X {
@@ -1181,6 +1196,7 @@
X unregister_filesystem(&tmpfs_fs_type);
X return PTR_ERR(res);
X }
+ shm_mnt = res;
X
X /* The internal instance should not do size checking */
X if ((error = shmem_set_size(&res->mnt_sb->u.shmem_sb, ULONG_MAX, ULONG_MAX)))
@@ -1195,6 +1211,7 @@
X unregister_filesystem(&shmem_fs_type);
X #endif
X unregister_filesystem(&tmpfs_fs_type);
+ mntput(shm_mnt);
X }
X
X module_init(init_shmem_fs)
@@ -1240,7 +1257,7 @@
X return 0;
X found:
X add_to_page_cache(page, inode->i_mapping, offset + idx);
- set_page_dirty(page);
+ SetPageDirty(page);
X SetPageUptodate(page);
X UnlockPage(page);
X info->swapped--;
@@ -1292,7 +1309,7 @@
X this.name = name;
X this.len = strlen(name);
X this.hash = 0; /* will go */
- root = tmpfs_fs_type.kern_mnt->mnt_root;
+ root = shm_mnt->mnt_root;
X dentry = d_alloc(root, &this);
X if (!dentry)
X return ERR_PTR(-ENOMEM);
@@ -1310,7 +1327,7 @@
X d_instantiate(dentry, inode);
X dentry->d_inode->i_size = size;
X shmem_truncate(inode);
- file->f_vfsmnt = mntget(tmpfs_fs_type.kern_mnt);
+ file->f_vfsmnt = mntget(shm_mnt);
X file->f_dentry = dentry;
X file->f_op = &shmem_file_operations;
X file->f_mode = FMODE_WRITE | FMODE_READ;
diff -u --recursive --new-file v2.4.9/linux/mm/slab.c linux/mm/slab.c
--- v2.4.9/linux/mm/slab.c Tue May 22 10:23:16 2001
+++ linux/mm/slab.c Tue Sep 18 14:16:26 2001
@@ -72,6 +72,7 @@
X #include <linux/slab.h>
X #include <linux/interrupt.h>
X #include <linux/init.h>
+#include <linux/compiler.h>
X #include <asm/uaccess.h>
X
X /*
@@ -85,9 +86,15 @@
X * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
X */
X
+#ifdef CONFIG_DEBUG_SLAB
+#define DEBUG 1
+#define STATS 1
+#define FORCED_DEBUG 1
+#else
X #define DEBUG 0
X #define STATS 0
X #define FORCED_DEBUG 0
+#endif
X
X /*
X * Parameters for kmem_cache_reap
@@ -140,8 +147,7 @@
X *
X * Manages the objs in a slab. Placed either at the beginning of mem allocated
X * for a slab, or allocated from an general cache.
- * Slabs are chained into one ordered list: fully used, partial, then fully
- * free slabs.
+ * Slabs are chained into three list: fully used, partial, fully free slabs.
X */
X typedef struct slab_s {
X struct list_head list;
@@ -167,7 +173,7 @@
X } cpucache_t;
X
X #define cc_entry(cpucache) \
- ((void **)(((cpucache_t*)cpucache)+1))
+ ((void **)(((cpucache_t*)(cpucache))+1))
X #define cc_data(cachep) \
X ((cachep)->cpudata[smp_processor_id()])
X /*
@@ -181,8 +187,9 @@
X struct kmem_cache_s {
X /* 1) each alloc & free */
X /* full, partial first, then free */
- struct list_head slabs;
- struct list_head *firstnotfull;
+ struct list_head slabs_full;
+ struct list_head slabs_partial;
+ struct list_head slabs_free;
X unsigned int objsize;
X unsigned int flags; /* constant flags */
X unsigned int num; /* # of objs per slab */
@@ -345,8 +352,9 @@
X
X /* internal cache of cache description objs */
X static kmem_cache_t cache_cache = {
- slabs: LIST_HEAD_INIT(cache_cache.slabs),
- firstnotfull: &cache_cache.slabs,
+ slabs_full: LIST_HEAD_INIT(cache_cache.slabs_full),
+ slabs_partial: LIST_HEAD_INIT(cache_cache.slabs_partial),
+ slabs_free: LIST_HEAD_INIT(cache_cache.slabs_free),
X objsize: sizeof(kmem_cache_t),
X flags: SLAB_NO_REAP,
X spinlock: SPIN_LOCK_UNLOCKED,
@@ -777,8 +785,9 @@
X cachep->gfpflags |= GFP_DMA;
X spin_lock_init(&cachep->spinlock);
X cachep->objsize = size;
- INIT_LIST_HEAD(&cachep->slabs);
- cachep->firstnotfull = &cachep->slabs;
+ INIT_LIST_HEAD(&cachep->slabs_full);
+ INIT_LIST_HEAD(&cachep->slabs_partial);
+ INIT_LIST_HEAD(&cachep->slabs_free);
X
X if (flags & CFLGS_OFF_SLAB)
X cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
@@ -814,6 +823,33 @@
X return cachep;
X }
X
+
+#if DEBUG
+/*
+ * This check if the kmem_cache_t pointer is chained in the cache_cache
+ * list. -arca
+ */
+static int is_chained_kmem_cache(kmem_cache_t * cachep)
+{
+ struct list_head *p;
+ int ret = 0;
+
+ /* Find the cache in the chain of caches. */
+ down(&cache_chain_sem);
+ list_for_each(p, &cache_chain) {
+ if (p == &cachep->next) {
+ ret = 1;
+ break;
+ }
+ }
+ up(&cache_chain_sem);
+
+ return ret;
+}
+#else
+#define is_chained_kmem_cache(x) 1
+#endif
+
X #ifdef CONFIG_SMP
X /*
X * Waits for all CPUs to execute func().
@@ -886,23 +922,22 @@
X while (!cachep->growing) {
X struct list_head *p;
X
- p = cachep->slabs.prev;
- if (p == &cachep->slabs)
+ p = cachep->slabs_free.prev;
+ if (p == &cachep->slabs_free)
X break;
X
- slabp = list_entry(cachep->slabs.prev, slab_t, list);
+ slabp = list_entry(cachep->slabs_free.prev, slab_t, list);
+#if DEBUG
X if (slabp->inuse)
- break;
-
+ BUG();
+#endif
X list_del(&slabp->list);
- if (cachep->firstnotfull == &slabp->list)
- cachep->firstnotfull = &cachep->slabs;
X
X spin_unlock_irq(&cachep->spinlock);
X kmem_slab_destroy(cachep, slabp);
X spin_lock_irq(&cachep->spinlock);
X }
- ret = !list_empty(&cachep->slabs);
+ ret = !list_empty(&cachep->slabs_full) || !list_empty(&cachep->slabs_partial);
X spin_unlock_irq(&cachep->spinlock);
X return ret;
X }
@@ -916,7 +951,7 @@
X */
X int kmem_cache_shrink(kmem_cache_t *cachep)
X {
- if (!cachep || in_interrupt())
+ if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep))
X BUG();
X
X return __kmem_cache_shrink(cachep);
@@ -1128,9 +1163,7 @@
X cachep->growing--;
X
X /* Make slab active. */
- list_add_tail(&slabp->list,&cachep->slabs);
- if (cachep->firstnotfull == &cachep->slabs)
- cachep->firstnotfull = &slabp->list;
+ list_add_tail(&slabp->list, &cachep->slabs_free);
X STATS_INC_GROWN(cachep);
X cachep->failures = 0;
X
@@ -1175,7 +1208,6 @@
X
X static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
X {
-#if DEBUG
X if (flags & SLAB_DMA) {
X if (!(cachep->gfpflags & GFP_DMA))
X BUG();
@@ -1183,11 +1215,10 @@
X if (cachep->gfpflags & GFP_DMA)
X BUG();
X }
-#endif
X }
X
X static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
- slab_t *slabp)
+ slab_t *slabp)
X {
X void *objp;
X
@@ -1200,9 +1231,10 @@
X objp = slabp->s_mem + slabp->free*cachep->objsize;
X slabp->free=slab_bufctl(slabp)[slabp->free];
X
- if (slabp->free == BUFCTL_END)
- /* slab now full: move to next slab for next alloc */
- cachep->firstnotfull = slabp->list.next;
+ if (unlikely(slabp->free == BUFCTL_END)) {
+ list_del(&slabp->list);
+ list_add(&slabp->list, &cachep->slabs_full);
+ }
X #if DEBUG
X if (cachep->flags & SLAB_POISON)
X if (kmem_check_poison_obj(cachep, objp))
@@ -1228,15 +1260,22 @@
X */
X #define kmem_cache_alloc_one(cachep) \
X ({ \
- slab_t *slabp; \
+ struct list_head * slabs_partial, * entry; \
+ slab_t *slabp; \
X \
- /* Get slab alloc is to come from. */ \
- { \
- struct list_head* p = cachep->firstnotfull; \
- if (p == &cachep->slabs) \
+ slabs_partial = &(cachep)->slabs_partial; \
+ entry = slabs_partial->next; \
+ if (unlikely(entry == slabs_partial)) { \
+ struct list_head * slabs_free; \
+ slabs_free = &(cachep)->slabs_free; \
+ entry = slabs_free->next; \
+ if (unlikely(entry == slabs_free)) \
X goto alloc_new_slab; \
- slabp = list_entry(p,slab_t, list); \
+ list_del(entry); \
+ list_add(entry, slabs_partial); \
X } \
+ \
+ slabp = list_entry(entry, slab_t, list); \
X kmem_cache_alloc_one_tail(cachep, slabp); \
X })
X
@@ -1248,13 +1287,22 @@
X
X spin_lock(&cachep->spinlock);
X while (batchcount--) {
- /* Get slab alloc is to come from. */
- struct list_head *p = cachep->firstnotfull;
+ struct list_head * slabs_partial, * entry;
X slab_t *slabp;
+ /* Get slab alloc is to come from. */
+ slabs_partial = &(cachep)->slabs_partial;
+ entry = slabs_partial->next;
+ if (unlikely(entry == slabs_partial)) {
+ struct list_head * slabs_free;
+ slabs_free = &(cachep)->slabs_free;
+ entry = slabs_free->next;
+ if (unlikely(entry == slabs_free))
+ break;
+ list_del(entry);
+ list_add(entry, slabs_partial);
+ }
X
- if (p == &cachep->slabs)
- break;
- slabp = list_entry(p,slab_t, list);
+ slabp = list_entry(entry, slab_t, list);
X cc_entry(cc)[cc->avail++] =
X kmem_cache_alloc_one_tail(cachep, slabp);
X }
@@ -1386,42 +1434,18 @@
X }
X STATS_DEC_ACTIVE(cachep);
X
- /* fixup slab chain */
- if (slabp->inuse-- == cachep->num)
- goto moveslab_partial;
- if (!slabp->inuse)
- goto moveslab_free;
- return;
-
-moveslab_partial:
- /* was full.
- * Even if the page is now empty, we can set c_firstnotfull to
- * slabp: there are no partial slabs in this case
- */
+ /* fixup slab chains */
X {
- struct list_head *t = cachep->firstnotfull;
-
- cachep->firstnotfull = &slabp->list;
- if (slabp->list.next == t)
- return;
- list_del(&slabp->list);
- list_add_tail(&slabp->list, t);
- return;
- }
-moveslab_free:
- /*
- * was partial, now empty.
- * c_firstnotfull might point to slabp
- * FIXME: optimize
- */
- {
- struct list_head *t = cachep->firstnotfull->prev;
-
- list_del(&slabp->list);
- list_add_tail(&slabp->list, &cachep->slabs);
- if (cachep->firstnotfull == &slabp->list)
- cachep->firstnotfull = t->next;
- return;
+ int inuse = slabp->inuse;
+ if (unlikely(!--slabp->inuse)) {
+ /* Was partial or full, now empty. */
+ list_del(&slabp->list);
+ list_add(&slabp->list, &cachep->slabs_free);
+ } else if (unlikely(inuse == cachep->num)) {
+ /* Was full. */
+ list_del(&slabp->list);
+ list_add(&slabp->list, &cachep->slabs_partial);
+ }
X }
X }
X
@@ -1681,7 +1705,7 @@
X *
X * Called from do_try_to_free_pages() and __alloc_pages()
X */
-void kmem_cache_reap (int gfp_mask)
+int kmem_cache_reap (int gfp_mask)
X {
X slab_t *slabp;
X kmem_cache_t *searchp;
@@ -1689,12 +1713,13 @@
X unsigned int best_pages;
X unsigned int best_len;
X unsigned int scan;
+ int ret = 0;
X
X if (gfp_mask & __GFP_WAIT)
X down(&cache_chain_sem);
X else
X if (down_trylock(&cache_chain_sem))
- return;
+ return 0;
X
X scan = REAP_SCANLEN;
X best_len = 0;
@@ -1727,13 +1752,15 @@
X #endif
X
X full_free = 0;
- p = searchp->slabs.prev;
- while (p != &searchp->slabs) {
+ p = searchp->slabs_free.next;
+ while (p != &searchp->slabs_free) {
X slabp = list_entry(p, slab_t, list);
+#if DEBUG
X if (slabp->inuse)
- break;
+ BUG();
+#endif
X full_free++;
- p = p->prev;
+ p = p->next;
X }
X
X /*
@@ -1750,7 +1777,7 @@
X best_cachep = searchp;
X best_len = full_free;
X best_pages = pages;
- if (full_free >= REAP_PERFECT) {
+ if (pages >= REAP_PERFECT) {
X clock_searchp = list_entry(searchp->next.next,
X kmem_cache_t,next);
X goto perfect;
@@ -1770,22 +1797,22 @@
X
X spin_lock_irq(&best_cachep->spinlock);
X perfect:
- /* free only 80% of the free slabs */
- best_len = (best_len*4 + 1)/5;
+ /* free only 50% of the free slabs */
+ best_len = (best_len + 1)/2;
X for (scan = 0; scan < best_len; scan++) {
X struct list_head *p;
X
X if (best_cachep->growing)
X break;
- p = best_cachep->slabs.prev;
- if (p == &best_cachep->slabs)
+ p = best_cachep->slabs_free.prev;
+ if (p == &best_cachep->slabs_free)
X break;
X slabp = list_entry(p,slab_t,list);
+#if DEBUG
X if (slabp->inuse)
- break;
+ BUG();
+#endif
X list_del(&slabp->list);
- if (best_cachep->firstnotfull == &slabp->list)
- best_cachep->firstnotfull = &best_cachep->slabs;
X STATS_INC_REAPED(best_cachep);
X
X /* Safe to drop the lock. The slab is no longer linked to the
@@ -1796,9 +1823,10 @@
X spin_lock_irq(&best_cachep->spinlock);
X }
X spin_unlock_irq(&best_cachep->spinlock);
+ ret = scan * (1 << best_cachep->gfporder);
X out:
X up(&cache_chain_sem);
- return;
+ return ret;
X }
X
X #ifdef CONFIG_PROC_FS
@@ -1851,14 +1879,25 @@
X spin_lock_irq(&cachep->spinlock);
X active_objs = 0;
X num_slabs = 0;
- list_for_each(q,&cachep->slabs) {
+ list_for_each(q,&cachep->slabs_full) {
+ slabp = list_entry(q, slab_t, list);
+ if (slabp->inuse != cachep->num)
+ BUG();
+ active_objs += cachep->num;
+ active_slabs++;
+ }
+ list_for_each(q,&cachep->slabs_partial) {
X slabp = list_entry(q, slab_t, list);
+ if (slabp->inuse == cachep->num || !slabp->inuse)
+ BUG();
X active_objs += slabp->inuse;
- num_objs += cachep->num;
+ active_slabs++;
+ }
+ list_for_each(q,&cachep->slabs_free) {
+ slabp = list_entry(q, slab_t, list);
X if (slabp->inuse)
- active_slabs++;
- else
- num_slabs++;
+ BUG();
+ num_slabs++;
X }
X num_slabs+=active_slabs;
X num_objs = num_slabs*cachep->num;
diff -u --recursive --new-file v2.4.9/linux/mm/swap.c linux/mm/swap.c
--- v2.4.9/linux/mm/swap.c Sun Aug 12 13:28:01 2001
+++ linux/mm/swap.c Sat Sep 22 10:42:06 2001
@@ -24,50 +24,13 @@
X #include <asm/uaccess.h> /* for copy_to/from_user */
X #include <asm/pgtable.h>
X
-/*
- * We identify three levels of free memory. We never let free mem
- * fall below the freepages.min except for atomic allocations. We
- * start background swapping if we fall below freepages.high free
- * pages, and we begin intensive swapping below freepages.low.
- *
- * Actual initialization is done in mm/page_alloc.c
- */
-freepages_t freepages = {
- 0, /* freepages.min */
- 0, /* freepages.low */
- 0 /* freepages.high */
-};
-
X /* How many pages do we try to swap or page in/out together? */
X int page_cluster;
X
-/*
- * This variable contains the amount of page steals the system
- * is doing, averaged over a minute. We use this to determine how
- * many inactive pages we should have.
- *
- * In reclaim_page and __alloc_pages: memory_pressure++
- * In __free_pages_ok: memory_pressure--
- * In recalculate_vm_stats the value is decayed (once a second)
- */
-int memory_pressure;
-
X /* We track the number of pages currently being asynchronously swapped
X out, so that we don't try to swap TOO many pages out at once */
X atomic_t nr_async_pages = ATOMIC_INIT(0);
X
-buffer_mem_t buffer_mem = {
- 2, /* minimum percent buffer */
- 10, /* borrow percent buffer */
- 60 /* maximum percent buffer */
-};
-
-buffer_mem_t page_cache = {
- 2, /* minimum percent page cache */
- 15, /* borrow percent page cache */
- 75 /* maximum */
-};
-
X pager_daemon_t pager_daemon = {
X 512, /* base number for calculating the number of tries */
X SWAP_CLUSTER_MAX, /* minimum number of tries */
@@ -87,25 +50,9 @@
X */
X void deactivate_page_nolock(struct page * page)
X {
- /*
- * One for the cache, one for the extra reference the
- * caller has and (maybe) one for the buffers.
- *
- * This isn't perfect, but works for just about everything.
- * Besides, as long as we don't move unfreeable pages to the
- * inactive_clean list it doesn't need to be perfect...
- */
- int maxcount = (page->buffers ? 3 : 2);
- page->age = 0;
- ClearPageReferenced(page);
-
- /*
- * Don't touch it if it's not on the active list.
- * (some pages aren't on any list at all)
- */
- if (PageActive(page) && page_count(page) <= maxcount && !page_ramdisk(page)) {
+ if (PageActive(page)) {
X del_page_from_active_list(page);
- add_page_to_inactive_dirty_list(page);
+ add_page_to_inactive_list(page);
X }
X }
X
@@ -121,22 +68,10 @@
X */
X void activate_page_nolock(struct page * page)
X {
- if (PageInactiveDirty(page)) {
- del_page_from_inactive_dirty_list(page);
- add_page_to_active_list(page);
- } else if (PageInactiveClean(page)) {
- del_page_from_inactive_clean_list(page);
+ if (PageInactive(page)) {
+ del_page_from_inactive_list(page);
X add_page_to_active_list(page);
- } else {
- /*
- * The page was not on any list, so we take care
- * not to do anything.
- */
X }
-
- /* Make sure the page gets a fair chance at staying active. */
- if (page->age < PAGE_AGE_START)
- page->age = PAGE_AGE_START;
X }
X
X void activate_page(struct page * page)
@@ -152,11 +87,10 @@
X */
X void lru_cache_add(struct page * page)
X {
- spin_lock(&pagemap_lru_lock);
X if (!PageLocked(page))
X BUG();
- add_page_to_inactive_dirty_list(page);
- page->age = 0;
+ spin_lock(&pagemap_lru_lock);
+ add_page_to_inactive_list(page);
X spin_unlock(&pagemap_lru_lock);
X }
X
@@ -171,14 +105,11 @@
X {
X if (PageActive(page)) {
X del_page_from_active_list(page);
- } else if (PageInactiveDirty(page)) {
- del_page_from_inactive_dirty_list(page);
- } else if (PageInactiveClean(page)) {
- del_page_from_inactive_clean_list(page);
- } else {
+ } else if (PageInactive(page)) {
+ del_page_from_inactive_list(page);
+ } else
X printk("VM: __lru_cache_del, found unknown page ?!\n");
- }
- DEBUG_ADD_PAGE
+ DEBUG_LRU_PAGE(page);
X }
X
X /**
@@ -192,22 +123,6 @@
X spin_lock(&pagemap_lru_lock);
X __lru_cache_del(page);
X spin_unlock(&pagemap_lru_lock);
-}
-
-/**
- * recalculate_vm_stats - recalculate VM statistics
- *
- * This function should be called once a second to recalculate
- * some useful statistics the VM subsystem uses to determine
- * its behaviour.
- */
-void recalculate_vm_stats(void)
-{
- /*
- * Substract one second worth of memory_pressure from
- * memory_pressure.
- */
- memory_pressure -= (memory_pressure >> INACTIVE_SHIFT);
X }
X
X /*
diff -u --recursive --new-file v2.4.9/linux/mm/swap_state.c linux/mm/swap_state.c
--- v2.4.9/linux/mm/swap_state.c Wed Jul 25 17:10:26 2001
+++ linux/mm/swap_state.c Sat Sep 22 20:36:50 2001
@@ -23,17 +23,11 @@
X */
X static int swap_writepage(struct page *page)
X {
- /* One for the page cache, one for this user, one for page->buffers */
- if (page_count(page) > 2 + !!page->buffers)
- goto in_use;
- if (swap_count(page) > 1)
- goto in_use;
-
- /* We could remove it here, but page_launder will do it anyway */
- UnlockPage(page);
- return 0;
-
-in_use:
+ if (exclusive_swap_page(page)) {
+ delete_from_swap_cache(page);
+ UnlockPage(page);
+ return 0;
+ }
X rw_swap_page(WRITE, page);
X return 0;
X }
@@ -75,75 +69,57 @@
X #endif
X if (!PageLocked(page))
X BUG();
- if (PageTestandSetSwapCache(page))
- BUG();
X if (page->mapping)
X BUG();
- flags = page->flags & ~((1 << PG_error) | (1 << PG_arch_1));
+
+ /* clear PG_dirty so a subsequent set_page_dirty takes effect */
+ flags = page->flags & ~(1 << PG_error | 1 << PG_dirty | 1 << PG_arch_1 | 1 << PG_referenced);
X page->flags = flags | (1 << PG_uptodate);
- page->age = PAGE_AGE_START;
X add_to_page_cache_locked(page, &swapper_space, entry.val);
X }
X
-static inline void remove_from_swap_cache(struct page *page)
-{
- struct address_space *mapping = page->mapping;
-
- if (mapping != &swapper_space)
- BUG();
- if (!PageSwapCache(page) || !PageLocked(page))
- PAGE_BUG(page);
-
- PageClearSwapCache(page);
- ClearPageDirty(page);
- __remove_inode_page(page);
-}
-
X /*
X * This must be called only on pages that have
X * been verified to be in the swap cache.
X */
X void __delete_from_swap_cache(struct page *page)
X {
- swp_entry_t entry;
-
- entry.val = page->index;
-
X #ifdef SWAP_CACHE_INFO
X swap_cache_del_total++;
X #endif
- remove_from_swap_cache(page);
- swap_free(entry);
+ if (!PageLocked(page))
+ BUG();
+ if (!PageSwapCache(page))
+ BUG();
+
+ ClearPageDirty(page);
+ __remove_inode_page(page);
X }
X
X /*
- * This will never put the page into the free list, the caller has
- * a reference on the page.
+ * This must be called only on pages that have
+ * been verified to be in the swap cache and locked.
+ * It will never put the page into the free list,
+ * the caller has a reference on the page.
X */
-void delete_from_swap_cache_nolock(struct page *page)
+void delete_from_swap_cache(struct page *page)
X {
+ swp_entry_t entry;
+
X if (!PageLocked(page))
X BUG();
X
X if (block_flushpage(page, 0))
X lru_cache_del(page);
X
+ entry.val = page->index;
+
X spin_lock(&pagecache_lock);
- ClearPageDirty(page);
X __delete_from_swap_cache(page);
X spin_unlock(&pagecache_lock);
- page_cache_release(page);
-}
X
-/*
- * This must be called only on pages that have
- * been verified to be in the swap cache and locked.
- */
-void delete_from_swap_cache(struct page *page)
-{
- lock_page(page);
- delete_from_swap_cache_nolock(page);
- UnlockPage(page);
+ swap_free(entry);
+ page_cache_release(page);
X }
X
X /*
@@ -163,20 +139,18 @@
X */
X if (PageSwapCache(page) && !TryLockPage(page)) {
X if (exclusive_swap_page(page))
- delete_from_swap_cache_nolock(page);
+ delete_from_swap_cache(page);
X UnlockPage(page);
X }
X page_cache_release(page);
X }
X
-
X /*
X * Lookup a swap entry in the swap cache. A found page will be returned
X * unlocked and with its refcount incremented - we rely on the kernel
X * lock getting page table operations atomic even if we drop the page
X * lock before returning.
X */
-
X struct page * lookup_swap_cache(swp_entry_t entry)
X {
X struct page *found;
@@ -184,72 +158,80 @@
X #ifdef SWAP_CACHE_INFO
X swap_cache_find_total++;
X #endif
- while (1) {
- /*
- * Right now the pagecache is 32-bit only. But it's a 32 bit index. =)
- */
- found = find_get_swapcache_page(&swapper_space, entry.val);
- if (!found)
- return 0;
- if (!PageSwapCache(found))
- BUG();
- if (found->mapping != &swapper_space)
- BUG();
+ found = find_get_page(&swapper_space, entry.val);
+ /*
+ * Unsafe to assert PageSwapCache and mapping on page found:
+ * if SMP nothing prevents swapoff from deleting this page from
+ * the swap cache at this moment. find_lock_page would prevent
+ * that, but no need to change: we _have_ got the right page.
+ */
X #ifdef SWAP_CACHE_INFO
+ if (found)
X swap_cache_find_success++;
X #endif
- return found;
- }
+ return found;
X }
X
X /*
X * Locate a page of swap in physical memory, reserving swap cache space
- * and reading the disk if it is not already cached. If wait==0, we are
- * only doing readahead, so don't worry if the page is already locked.
- *
+ * and reading the disk if it is not already cached.
X * A failure return means that either the page allocation failed or that
X * the swap entry is no longer in use.
X */
-
X struct page * read_swap_cache_async(swp_entry_t entry)
X {
- struct page *found_page = 0, *new_page;
+ struct page *found_page, *new_page;
+ struct page **hash;
X
X /*
- * Make sure the swap entry is still in use.
+ * Look for the page in the swap cache. Since we normally call
+ * this only after lookup_swap_cache() failed, re-calling that
+ * would confuse the statistics: use __find_get_page() directly.
X */
- if (!swap_duplicate(entry)) /* Account for the swap cache */
- goto out;
- /*
- * Look for the page in the swap cache.
- */
- found_page = lookup_swap_cache(entry);
+ hash = page_hash(&swapper_space, entry.val);
+ found_page = __find_get_page(&swapper_space, entry.val, hash);
X if (found_page)
- goto out_free_swap;
+ goto out;
X
X new_page = alloc_page(GFP_HIGHUSER);
X if (!new_page)
- goto out_free_swap; /* Out of memory */
+ goto out; /* Out of memory */
+ if (TryLockPage(new_page))
+ BUG();
X
X /*
X * Check the swap cache again, in case we stalled above.
+ * swap_list_lock is guarding against races between this check
+ * and where the new page is added to the swap cache below.
+ * It is also guarding against race where try_to_swap_out
+ * allocates entry with get_swap_page then adds to cache.
X */
- found_page = lookup_swap_cache(entry);
+ swap_list_lock();
+ found_page = __find_get_page(&swapper_space, entry.val, hash);
X if (found_page)
X goto out_free_page;
+
+ /*
+ * Make sure the swap entry is still in use. It could have gone
+ * since caller dropped page_table_lock, while allocating page above,
+ * or while allocating page in prior call via swapin_readahead.
+ */
+ if (!swap_duplicate(entry)) /* Account for the swap cache */
+ goto out_free_page;
+
X /*
X * Add it to the swap cache and read its contents.
X */
- if (TryLockPage(new_page))
- BUG();
X add_to_swap_cache(new_page, entry);
+ swap_list_unlock();
+
X rw_swap_page(READ, new_page);
X return new_page;
X
X out_free_page:
+ swap_list_unlock();
+ UnlockPage(new_page);
X page_cache_release(new_page);
-out_free_swap:
- swap_free(entry);
X out:
X return found_page;
X }
diff -u --recursive --new-file v2.4.9/linux/mm/swapfile.c linux/mm/swapfile.c
--- v2.4.9/linux/mm/swapfile.c Sun Aug 12 13:28:01 2001
+++ linux/mm/swapfile.c Sat Sep 22 20:37:35 2001
@@ -14,39 +14,27 @@
X #include <linux/vmalloc.h>
X #include <linux/pagemap.h>
X #include <linux/shm.h>
+#include <linux/compiler.h>
X
X #include <asm/pgtable.h>
X
X spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
X unsigned int nr_swapfiles;
X int total_swap_pages;
+static int swap_overflow;
+
+static const char Bad_file[] = "Bad swap file entry ";
+static const char Unused_file[] = "Unused swap file entry ";
+static const char Bad_offset[] = "Bad swap offset entry ";
+static const char Unused_offset[] = "Unused swap offset entry ";
X
X struct swap_list_t swap_list = {-1, -1};
X
X struct swap_info_struct swap_info[MAX_SWAPFILES];
X
-/*
- * When swap space gets filled up, we will set this flag.
- * This will make do_swap_page(), in the page fault path,
- * free swap entries on swapin so we'll reclaim swap space
- * in order to be able to swap something out.
- *
- * At the moment we start reclaiming when swap usage goes
- * over 80% of swap space.
- *
- * XXX: Random numbers, fixme.
- */
-#define SWAP_FULL_PCT 80
-int vm_swap_full (void)
-{
- int swap_used = total_swap_pages - nr_swap_pages;
-
- return swap_used * 100 > total_swap_pages * SWAP_FULL_PCT;
-}
-
X #define SWAPFILE_CLUSTER 256
X
-static inline int scan_swap_map(struct swap_info_struct *si, unsigned short count)
+static inline int scan_swap_map(struct swap_info_struct *si)
X {
X unsigned long offset;
X /*
@@ -89,20 +77,33 @@
X for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
X if (si->swap_map[offset])
X continue;
+ si->lowest_bit = offset+1;
X got_page:
X if (offset == si->lowest_bit)
X si->lowest_bit++;
X if (offset == si->highest_bit)
X si->highest_bit--;
- si->swap_map[offset] = count;
+ if (si->lowest_bit > si->highest_bit) {
+ si->lowest_bit = si->max;
+ si->highest_bit = 0;
+ }
+ /* Initial count 1 for user reference + 1 for swap cache */
+ si->swap_map[offset] = 2;
X nr_swap_pages--;
X si->cluster_next = offset+1;
X return offset;
X }
+ si->lowest_bit = si->max;
+ si->highest_bit = 0;
X return 0;
X }
X
-swp_entry_t __get_swap_page(unsigned short count)
+/*
+ * Callers of get_swap_page must hold swap_list_lock across the call,
+ * and across the following add_to_swap_cache, to guard against races
+ * with read_swap_cache_async.
+ */
+swp_entry_t get_swap_page(void)
X {
X struct swap_info_struct * p;
X unsigned long offset;
@@ -110,20 +111,17 @@
X int type, wrapped = 0;
X
X entry.val = 0; /* Out of memory */
- if (count >= SWAP_MAP_MAX)
- goto bad_count;
- swap_list_lock();
X type = swap_list.next;
X if (type < 0)
X goto out;
- if (nr_swap_pages == 0)
+ if (nr_swap_pages <= 0)
X goto out;
X
X while (1) {
X p = &swap_info[type];
X if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
X swap_device_lock(p);
- offset = scan_swap_map(p, count);
+ offset = scan_swap_map(p);
X swap_device_unlock(p);
X if (offset) {
X entry = SWP_ENTRY(type,offset);
@@ -148,21 +146,14 @@
X goto out; /* out of swap space */
X }
X out:
- swap_list_unlock();
- return entry;
-
-bad_count:
- printk(KERN_ERR "get_swap_page: bad count %hd from %p\n",
- count, __builtin_return_address(0));
X return entry;
X }
X
-
X /*
X * Caller has made sure that the swapdevice corresponding to entry
X * is still around or has not been recycled.
X */
-void __swap_free(swp_entry_t entry, unsigned short count)
+void swap_free(swp_entry_t entry)
X {
X struct swap_info_struct * p;
X unsigned long offset, type;
@@ -186,9 +177,7 @@
X swap_list.next = type;
X swap_device_lock(p);
X if (p->swap_map[offset] < SWAP_MAP_MAX) {
- if (p->swap_map[offset] < count)
- goto bad_count;
- if (!(p->swap_map[offset] -= count)) {
+ if (!--(p->swap_map[offset])) {
X if (offset < p->lowest_bit)
SHAR_EOF
true || echo 'restore of patch-2.4.10 failed'
fi
echo 'End of part 192'
echo 'File patch-2.4.10 is continued in part 193'
echo "193" > _shar_seq_.tmp
exit 0

0 new messages