diff -purN -X /home/mbligh/.diff.exclude 850-mbind_part1/arch/i386/kernel/entry.S 851-mbind_part2/arch/i386/kernel/entry.S
--- 850-mbind_part1/arch/i386/kernel/entry.S	2003-10-01 11:40:40.000000000 -0700
+++ 851-mbind_part2/arch/i386/kernel/entry.S	2003-10-02 16:41:14.000000000 -0700
@@ -829,7 +829,7 @@ ENTRY(sys_call_table)
 	.long sys_getdents64	/* 220 */
 	.long sys_fcntl64
 	.long sys_ni_syscall	/* reserved for TUX */
-	.long sys_ni_syscall
+ 	.long sys_mbind
 	.long sys_gettid
 	.long sys_readahead	/* 225 */
 	.long sys_setxattr
diff -purN -X /home/mbligh/.diff.exclude 850-mbind_part1/fs/inode.c 851-mbind_part2/fs/inode.c
--- 850-mbind_part1/fs/inode.c	2003-10-01 11:47:01.000000000 -0700
+++ 851-mbind_part2/fs/inode.c	2003-10-02 16:41:14.000000000 -0700
@@ -147,6 +147,9 @@ static struct inode *alloc_inode(struct 
 		mapping->dirtied_when = 0;
 		mapping->assoc_mapping = NULL;
 		mapping->backing_dev_info = &default_backing_dev_info;
+#ifdef CONFIG_NUMA
+		mapping->binding = NULL;
+#endif
 		if (sb->s_bdev)
 			mapping->backing_dev_info = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
 		memset(&inode->u, 0, sizeof(inode->u));
diff -purN -X /home/mbligh/.diff.exclude 850-mbind_part1/include/asm-i386/unistd.h 851-mbind_part2/include/asm-i386/unistd.h
--- 850-mbind_part1/include/asm-i386/unistd.h	2003-10-01 11:41:15.000000000 -0700
+++ 851-mbind_part2/include/asm-i386/unistd.h	2003-10-02 16:41:14.000000000 -0700
@@ -228,7 +228,7 @@
 #define __NR_madvise1		219	/* delete when C lib stub is removed */
 #define __NR_getdents64		220
 #define __NR_fcntl64		221
-/* 223 is unused */
+#define __NR_mbind		223
 #define __NR_gettid		224
 #define __NR_readahead		225
 #define __NR_setxattr		226
diff -purN -X /home/mbligh/.diff.exclude 850-mbind_part1/include/linux/fs.h 851-mbind_part2/include/linux/fs.h
--- 850-mbind_part1/include/linux/fs.h	2003-10-01 11:48:25.000000000 -0700
+++ 851-mbind_part2/include/linux/fs.h	2003-10-02 16:41:14.000000000 -0700
@@ -336,6 +336,9 @@ struct address_space {
 	spinlock_t		private_lock;	/* for use by the address_space */
 	struct list_head	private_list;	/* ditto */
 	struct address_space	*assoc_mapping;	/* ditto */
+#ifdef CONFIG_NUMA
+	struct binding		*binding;	/* for memory bindings */
+#endif
 };
 
 struct block_device {
diff -purN -X /home/mbligh/.diff.exclude 850-mbind_part1/include/linux/mmzone.h 851-mbind_part2/include/linux/mmzone.h
--- 850-mbind_part1/include/linux/mmzone.h	2003-10-02 16:41:10.000000000 -0700
+++ 851-mbind_part2/include/linux/mmzone.h	2003-10-02 16:41:14.000000000 -0700
@@ -377,6 +377,11 @@ static inline struct zonelist *get_node_
 
 #define get_zonelist(gfp_mask) get_node_zonelist(numa_node_id(), gfp_mask)
 
+/* Structure to keep track of memory segment (VMA) bindings */
+struct binding {
+	struct zonelist	zonelist;
+};
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MMZONE_H */
diff -purN -X /home/mbligh/.diff.exclude 850-mbind_part1/include/linux/pagemap.h 851-mbind_part2/include/linux/pagemap.h
--- 850-mbind_part1/include/linux/pagemap.h	2003-10-02 16:41:10.000000000 -0700
+++ 851-mbind_part2/include/linux/pagemap.h	2003-10-02 16:41:14.000000000 -0700
@@ -50,11 +50,29 @@ static inline void mapping_set_gfp_mask(
 #define page_cache_release(page)	put_page(page)
 void release_pages(struct page **pages, int nr, int cold);
 
+#ifndef CONFIG_NUMA
+
 static inline struct page *__page_cache_alloc(struct address_space *x, int gfp_mask)
 {
 	return alloc_pages(gfp_mask, 0);
 }
 
+#else /* CONFIG_NUMA */
+
+static inline struct page *__page_cache_alloc(struct address_space *x, int gfp_mask)
+{
+	struct zonelist *zonelist;
+
+	if (!x->binding)
+		zonelist = get_zonelist(gfp_mask);
+	else
+		zonelist = &x->binding->zonelist;
+
+	return __alloc_pages(gfp_mask, 0, zonelist);
+}
+
+#endif /* !CONFIG_NUMA */
+
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
 	return __page_cache_alloc(x, mapping_gfp_mask(x));
diff -purN -X /home/mbligh/.diff.exclude 850-mbind_part1/kernel/sys.c 851-mbind_part2/kernel/sys.c
--- 850-mbind_part1/kernel/sys.c	2003-10-01 11:48:28.000000000 -0700
+++ 851-mbind_part2/kernel/sys.c	2003-10-02 16:41:14.000000000 -0700
@@ -235,6 +235,7 @@ cond_syscall(sys_epoll_ctl)
 cond_syscall(sys_epoll_wait)
 cond_syscall(sys_pciconfig_read)
 cond_syscall(sys_pciconfig_write)
+cond_syscall(sys_mbind)
 
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
diff -purN -X /home/mbligh/.diff.exclude 850-mbind_part1/mm/Makefile 851-mbind_part2/mm/Makefile
--- 850-mbind_part1/mm/Makefile	2003-10-01 11:47:15.000000000 -0700
+++ 851-mbind_part2/mm/Makefile	2003-10-02 16:41:14.000000000 -0700
@@ -7,8 +7,10 @@ mmu-$(CONFIG_MMU)	:= fremap.o highmem.o 
 			   mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
 			   shmem.o vmalloc.o
 
-obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
+obj-y			:= bootmem.o fadvise.o filemap.o mempool.o oom_kill.o \
 			   page_alloc.o page-writeback.o pdflush.o readahead.o \
 			   slab.o swap.o truncate.o vmscan.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
+
+obj-$(CONFIG_NUMA)	+= mbind.o
diff -purN -X /home/mbligh/.diff.exclude 850-mbind_part1/mm/mbind.c 851-mbind_part2/mm/mbind.c
--- 850-mbind_part1/mm/mbind.c	1969-12-31 16:00:00.000000000 -0800
+++ 851-mbind_part2/mm/mbind.c	2003-10-02 16:41:14.000000000 -0700
@@ -0,0 +1,147 @@
+/*
+ * mm/mbind.c
+ *
+ * Written by: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (C) 2003, IBM Corp.
+ *
+ * All rights reserved.          
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ */
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <asm/topology.h>
+#include <asm/uaccess.h>
+
+/* Translate a cpumask to a nodemask */
+static inline void cpumask_to_nodemask(unsigned long * cpumask, unsigned long * nodemask)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		if (test_bit(i, cpumask))
+			set_bit(cpu_to_node(i), nodemask);
+}
+
+/*
+ * Adds the zones belonging to @pgdat to @zonelist.  Returns the next 
+ * index in @zonelist.
+ */
+static inline int add_node(pg_data_t *pgdat, struct zonelist *zonelist, int zone_num)
+{
+	int i;
+	struct zone *zone;
+
+	for (i = MAX_NR_ZONES-1; i >=0 ; i--) {
+		zone = pgdat->node_zones + i;
+		if (zone->present_pages)
+			zonelist->zones[zone_num++] = zone;
+	}
+	return zone_num;
+}
+
+/* Builds a binding for a region of memory, based on a bitmask of nodes. */
+static inline int build_binding(unsigned long * nodemask, struct binding *binding)
+{
+	int node, zone_num;
+
+	memset(binding, 0, sizeof(struct binding));
+
+	/* Build binding zonelist */
+	for (node = 0, zone_num = 0; node < MAX_NUMNODES; node++)
+		if (test_bit(node, nodemask) && node_online(node))
+			zone_num = add_node(NODE_DATA(node), 
+				&binding->zonelist, zone_num);
+	binding->zonelist.zones[zone_num] = NULL;
+
+	if (zone_num == 0)
+		/* No zones were added to the zonelist.  Let the caller know. */
+		return -EINVAL;
+
+	return 0;
+} 
+
+
+/*
+ * mbind -  Bind a range of a process' VM space to a set of memory blocks according to
+ *            a predefined policy.
+ * @start:    beginning address of memory region to bind
+ * @len:      length of memory region to bind
+ * @mask_ptr: pointer to bitmask of cpus
+ * @mask_len: length of the bitmask
+ * @policy:   flag specifying the policy to use for the segment
+ */
+asmlinkage unsigned long sys_mbind(unsigned long start, unsigned long len, 
+		unsigned long *mask_ptr, unsigned int mask_len, unsigned long policy)
+{
+	DECLARE_BITMAP(cpu_mask, NR_CPUS);
+	DECLARE_BITMAP(node_mask, MAX_NUMNODES);
+	struct vm_area_struct *vma = NULL;
+	struct address_space *mapping;
+	int copy_len, error = 0;
+
+	/* Deal with getting cpu_mask from userspace & translating to node_mask */
+	CLEAR_BITMAP(cpu_mask, NR_CPUS);
+	CLEAR_BITMAP(node_mask, MAX_NUMNODES);
+	copy_len = min(mask_len, (unsigned int)NR_CPUS);
+	if (copy_from_user(cpu_mask, mask_ptr, (copy_len+7)/8)) {
+		error = -EFAULT;
+		goto out;
+	}
+	cpumask_to_nodemask(cpu_mask, node_mask);
+
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma(current->mm, start);
+	up_read(&current->mm->mmap_sem);
+	/* This is an ugly, gross hack.  This is purely because I've hurt my
+	 * brain trying to come up with a brilliant way of implementing this 
+	 * for VMA's in general.  Shared Memory VMA's lend themselves to binding
+	 * both because of how they're implemented, and their actual uses.
+	 * If anyone has a great place to squirrel-away some data about the 
+	 * requested binding, and a way to easily force the allocator to respect
+	 * these bindings, then send a patch, or let me know.  Otherwise, this 
+	 * will have to wait for a stroke of insight.
+	 */
+	if (!(vma && vma->vm_file && vma->vm_ops && 
+		vma->vm_ops->nopage == shmem_nopage)) {
+		/* This isn't a shm segment.  For now, we bail. */
+		error = -EINVAL;
+		goto out;
+	}
+
+	mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
+	if (mapping->binding) {
+		kfree(mapping->binding);
+		mapping->binding = NULL;
+	}
+	mapping->binding = kmalloc(sizeof(struct binding), GFP_KERNEL);
+	if (!mapping->binding) {
+		error = -ENOMEM;
+		goto out;
+	}
+	error = build_binding(node_mask, mapping->binding);
+	if (error) {
+		kfree(mapping->binding);
+		mapping->binding = NULL;
+	}
+
+out:
+	return error;
+}