Implement ck1 patchset
/mm/vmscan.c
blob:6072d74a16f5f38ed0ea9db1026d89e8c467b48f -> blob:100700094e96fe18c7bfbe985a4e3c7baef89d6c
--- mm/vmscan.c
+++ mm/vmscan.c
@@ -37,6 +37,7 @@
#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
+#include <linux/timer.h>
#include <linux/freezer.h>
#include <linux/memcontrol.h>
#include <linux/delayacct.h>
@@ -148,7 +149,7 @@ struct scan_control {
/*
* From 0 .. 100. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness;
long vm_total_pages; /* The total number of pages which the VM controls */
static LIST_HEAD(shrinker_list);
@@ -932,7 +933,7 @@ cull_mlocked:
activate_locked:
/* Not a candidate for swapping, so reclaim swap space. */
- if (PageSwapCache(page) && vm_swap_full())
+ if (PageSwapCache(page))
try_to_free_swap(page);
VM_BUG_ON(PageActive(page));
SetPageActive(page);
@@ -1986,6 +1987,35 @@ restart:
}
/*
+ * Helper functions to adjust nice level of kswapd, based on the priority of
+ * the task (p) that called it. If it is already higher priority we do not
+ * demote its nice level since it is still working on behalf of a higher
+ * priority task. With kernel threads we leave it at nice 0.
+ *
+ * We don't ever run kswapd real time, so if a real time task calls kswapd we
+ * set it to highest SCHED_NORMAL priority.
+ */
+static inline int effective_sc_prio(struct task_struct *p)
+{
+ if (likely(p->mm)) {
+ if (rt_task(p))
+ return -20;
+ if (p->policy == SCHED_IDLEPRIO)
+ return 19;
+ return task_nice(p);
+ }
+ return 0;
+}
+
+static void set_kswapd_nice(struct task_struct *kswapd, int active)
+{
+ long nice = effective_sc_prio(current);
+
+ if (task_nice(kswapd) > nice || !active)
+ set_user_nice(kswapd, nice);
+}
+
+/*
* This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation
* request.
@@ -2706,6 +2736,8 @@ static void kswapd_try_to_sleep(pg_data_
finish_wait(&pgdat->kswapd_wait, &wait);
}
+#define WT_EXPIRY (HZ * 5) /* Time to wakeup watermark_timer */
+
/*
* The background pageout daemon, started as a kernel thread
* from the init process.
@@ -2757,6 +2789,9 @@ static int kswapd(void *p)
for ( ; ; ) {
int ret;
+ /* kswapd has been busy so delay watermark_timer */
+ mod_timer(&pgdat->watermark_timer, jiffies + WT_EXPIRY);
+
/*
* If the last balance_pgdat was unsuccessful it's unlikely a
* new request of a similar or harder type will succeed soon
@@ -2806,6 +2841,7 @@ static int kswapd(void *p)
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
{
pg_data_t *pgdat;
+ int active;
if (!populated_zone(zone))
return;
@@ -2817,7 +2853,9 @@ void wakeup_kswapd(struct zone *zone, in
pgdat->kswapd_max_order = order;
pgdat->classzone_idx = min(pgdat->classzone_idx, classzone_idx);
}
- if (!waitqueue_active(&pgdat->kswapd_wait))
+ active = waitqueue_active(&pgdat->kswapd_wait);
+ set_kswapd_nice(pgdat->kswapd, active);
+ if (!active)
return;
if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
return;
@@ -2930,20 +2968,57 @@ static int __devinit cpu_callback(struct
}
/*
+ * We wake up kswapd every WT_EXPIRY till free ram is above pages_lots
+ */
+static void watermark_wakeup(unsigned long data)
+{
+ pg_data_t *pgdat = (pg_data_t *)data;
+ struct timer_list *wt = &pgdat->watermark_timer;
+ int i;
+
+ if (!waitqueue_active(&pgdat->kswapd_wait) || above_background_load())
+ goto out;
+ for (i = pgdat->nr_zones - 1; i >= 0; i--) {
+ struct zone *z = pgdat->node_zones + i;
+
+ if (!populated_zone(z) || is_highmem(z)) {
+ /* We are better off leaving highmem full */
+ continue;
+ }
+ if (!zone_watermark_ok(z, 0, lots_wmark_pages(z), 0, 0)) {
+ wake_up_interruptible(&pgdat->kswapd_wait);
+ goto out;
+ }
+ }
+out:
+ mod_timer(wt, jiffies + WT_EXPIRY);
+ return;
+}
+
+/*
* This kswapd start function will be called by init and node-hot-add.
* On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
*/
int kswapd_run(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
+ struct timer_list *wt;
int ret = 0;
if (pgdat->kswapd)
return 0;
+ wt = &pgdat->watermark_timer;
+ init_timer(wt);
+ wt->data = (unsigned long)pgdat;
+ wt->function = watermark_wakeup;
+ wt->expires = jiffies + WT_EXPIRY;
+ add_timer(wt);
+
pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
if (IS_ERR(pgdat->kswapd)) {
/* failure at boot is fatal */
+ del_timer(wt);
BUG_ON(system_state == SYSTEM_BOOTING);
printk("Failed to start kswapd on node %d\n",nid);
ret = -1;