diff -upr kernel-2.6.18-417.el5.orig/arch/alpha/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/alpha/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/alpha/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/alpha/kernel/init_task.c	2017-01-13 08:40:14.000000000 -0500
@@ -5,6 +5,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 #include <asm/uaccess.h>
 
 
@@ -13,6 +14,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 struct task_struct init_task = INIT_TASK(init_task);
 
 EXPORT_SYMBOL(init_mm);
diff -upr kernel-2.6.18-417.el5.orig/arch/alpha/kernel/osf_sys.c kernel-2.6.18-417.el5-028stab121/arch/alpha/kernel/osf_sys.c
--- kernel-2.6.18-417.el5.orig/arch/alpha/kernel/osf_sys.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/alpha/kernel/osf_sys.c	2017-01-13 08:40:16.000000000 -0500
@@ -406,15 +406,15 @@ osf_utsname(char __user *name)
 
 	down_read(&uts_sem);
 	error = -EFAULT;
-	if (copy_to_user(name + 0, system_utsname.sysname, 32))
+	if (copy_to_user(name + 0, utsname()->sysname, 32))
 		goto out;
-	if (copy_to_user(name + 32, system_utsname.nodename, 32))
+	if (copy_to_user(name + 32, utsname()->nodename, 32))
 		goto out;
-	if (copy_to_user(name + 64, system_utsname.release, 32))
+	if (copy_to_user(name + 64, utsname()->release, 32))
 		goto out;
-	if (copy_to_user(name + 96, system_utsname.version, 32))
+	if (copy_to_user(name + 96, utsname()->version, 32))
 		goto out;
-	if (copy_to_user(name + 128, system_utsname.machine, 32))
+	if (copy_to_user(name + 128, utsname()->machine, 32))
 		goto out;
 
 	error = 0;
@@ -453,8 +453,8 @@ osf_getdomainname(char __user *name, int
 
 	down_read(&uts_sem);
 	for (i = 0; i < len; ++i) {
-		__put_user(system_utsname.domainname[i], name + i);
-		if (system_utsname.domainname[i] == '\0')
+		__put_user(utsname()->domainname[i], name + i);
+		if (utsname()->domainname[i] == '\0')
 			break;
 	}
 	up_read(&uts_sem);
@@ -611,12 +611,12 @@ osf_sigstack(struct sigstack __user *uss
 asmlinkage long
 osf_sysinfo(int command, char __user *buf, long count)
 {
-	static char * sysinfo_table[] = {
-		system_utsname.sysname,
-		system_utsname.nodename,
-		system_utsname.release,
-		system_utsname.version,
-		system_utsname.machine,
+	char *sysinfo_table[] = {
+		utsname()->sysname,
+		utsname()->nodename,
+		utsname()->release,
+		utsname()->version,
+		utsname()->machine,
 		"alpha",	/* instruction set architecture */
 		"dummy",	/* hardware serial number */
 		"dummy",	/* hardware manufacturer */
@@ -963,7 +963,7 @@ osf_utimes(char __user *filename, struct
 			return -EFAULT;
 	}
 
-	return do_utimes(AT_FDCWD, filename, tvs ? ktvs : NULL);
+	return do_utimes(AT_FDCWD, filename, tvs ? ktvs : NULL, 0);
 }
 
 #define MAX_SELECT_SECONDS \
diff -upr kernel-2.6.18-417.el5.orig/arch/arm/Kconfig kernel-2.6.18-417.el5-028stab121/arch/arm/Kconfig
--- kernel-2.6.18-417.el5.orig/arch/arm/Kconfig	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/arm/Kconfig	2017-01-13 08:40:28.000000000 -0500
@@ -448,7 +448,7 @@ config NR_CPUS
 
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
-	depends on SMP && HOTPLUG && EXPERIMENTAL
+	depends on SMP && HOTPLUG && EXPERIMENTAL && !SCHED_VCPU
 	help
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu.
diff -upr kernel-2.6.18-417.el5.orig/arch/arm/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/arm/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/arm/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/arm/kernel/init_task.c	2017-01-13 08:40:14.000000000 -0500
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -17,6 +18,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/arm/kernel/setup.c kernel-2.6.18-417.el5-028stab121/arch/arm/kernel/setup.c
--- kernel-2.6.18-417.el5.orig/arch/arm/kernel/setup.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/arm/kernel/setup.c	2017-01-13 08:40:15.000000000 -0500
@@ -348,7 +348,7 @@ static void __init setup_processor(void)
 	       cpu_name, processor_id, (int)processor_id & 15,
 	       proc_arch[cpu_architecture()], cr_alignment);
 
-	sprintf(system_utsname.machine, "%s%c", list->arch_name, ENDIANNESS);
+	sprintf(init_utsname()->machine, "%s%c", list->arch_name, ENDIANNESS);
 	sprintf(elf_platform, "%s%c", list->elf_name, ENDIANNESS);
 	elf_hwcap = list->elf_hwcap;
 #ifndef CONFIG_ARM_THUMB
diff -upr kernel-2.6.18-417.el5.orig/arch/arm/kernel/smp.c kernel-2.6.18-417.el5-028stab121/arch/arm/kernel/smp.c
--- kernel-2.6.18-417.el5.orig/arch/arm/kernel/smp.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/arm/kernel/smp.c	2017-01-13 08:40:19.000000000 -0500
@@ -196,7 +196,7 @@ int __cpuexit __cpu_disable(void)
 	local_flush_tlb_all();
 
 	read_lock(&tasklist_lock);
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (p->mm)
 			cpu_clear(cpu, p->mm->cpu_vm_mask);
 	}
diff -upr kernel-2.6.18-417.el5.orig/arch/arm26/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/arm26/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/arm26/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/arm26/kernel/init_task.c	2017-01-13 08:40:14.000000000 -0500
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -20,6 +21,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/arm26/kernel/setup.c kernel-2.6.18-417.el5-028stab121/arch/arm26/kernel/setup.c
--- kernel-2.6.18-417.el5.orig/arch/arm26/kernel/setup.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/arm26/kernel/setup.c	2017-01-13 08:40:15.000000000 -0500
@@ -143,7 +143,7 @@ static void __init setup_processor(void)
 
 	dump_cpu_info();
 
-	sprintf(system_utsname.machine, "%s", list->arch_name);
+	sprintf(init_utsname()->machine, "%s", list->arch_name);
 	sprintf(elf_platform, "%s", list->elf_name);
 	elf_hwcap = list->elf_hwcap;
 
diff -upr kernel-2.6.18-417.el5.orig/arch/cris/kernel/setup.c kernel-2.6.18-417.el5-028stab121/arch/cris/kernel/setup.c
--- kernel-2.6.18-417.el5.orig/arch/cris/kernel/setup.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/cris/kernel/setup.c	2017-01-13 08:40:15.000000000 -0500
@@ -160,7 +160,7 @@ setup_arch(char **cmdline_p)
 	show_etrax_copyright();
 
 	/* Setup utsname */
-	strcpy(system_utsname.machine, cris_machine_name);
+	strcpy(init_utsname()->machine, cris_machine_name);
 }
 
 static void *c_start(struct seq_file *m, loff_t *pos)
diff -upr kernel-2.6.18-417.el5.orig/arch/frv/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/frv/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/frv/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/frv/kernel/init_task.c	2017-01-13 08:40:14.000000000 -0500
@@ -5,6 +5,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -15,6 +16,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/frv/mm/mmu-context.c kernel-2.6.18-417.el5-028stab121/arch/frv/mm/mmu-context.c
--- kernel-2.6.18-417.el5.orig/arch/frv/mm/mmu-context.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/frv/mm/mmu-context.c	2017-01-13 08:40:19.000000000 -0500
@@ -181,7 +181,7 @@ int cxn_pin_by_pid(pid_t pid)
 
 	/* get a handle on the mm_struct */
 	read_lock(&tasklist_lock);
-	tsk = find_task_by_pid(pid);
+	tsk = find_task_by_pid_ve(pid);
 	if (tsk) {
 		ret = -EINVAL;
 
diff -upr kernel-2.6.18-417.el5.orig/arch/h8300/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/h8300/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/h8300/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/h8300/kernel/init_task.c	2017-01-13 08:40:14.000000000 -0500
@@ -8,6 +8,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -17,6 +18,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/Kconfig kernel-2.6.18-417.el5-028stab121/arch/i386/Kconfig
--- kernel-2.6.18-417.el5.orig/arch/i386/Kconfig	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/Kconfig	2017-01-13 08:40:28.000000000 -0500
@@ -230,6 +230,55 @@ config ES7000_CLUSTERED_APIC
 
 source "arch/i386/Kconfig.cpu"
 
+config X86_4G
+	bool "4 GB kernel-space and 4 GB user-space virtual memory support"
+	depends on !X86_XEN
+	help
+          This option is only useful for systems that have more than 1 GB
+          of RAM.
+
+          The default kernel VM layout leaves 1 GB of virtual memory for
+          kernel-space mappings, and 3 GB of VM for user-space applications.
+          This option ups both the kernel-space VM and the user-space VM to
+          4 GB.
+
+          The cost of this option is additional TLB flushes done at
+          system-entry points that transition from user-mode into kernel-mode.
+          I.e. system calls and page faults, and IRQs that interrupt user-mode
+          code. There's also additional overhead to kernel operations that copy
+          memory to/from user-space. The overhead from this is hard to tell and
+          depends on the workload - it can be anything from no visible overhead
+          to 20-30% overhead. A good rule of thumb is to count with a runtime
+          overhead of 20%.
+
+          The upside is the much increased kernel-space VM, which more than
+          quadruples the maximum amount of RAM supported. Kernels compiled with
+          this option boot on 64GB of RAM and still have more than 3.1 GB of
+          'lowmem' left. Another bonus is that highmem IO bouncing decreases,
+          if used with drivers that still use bounce-buffers.
+
+          There's also a 33% increase in user-space VM size - database
+          applications might see a boost from this.
+
+          But the cost of the TLB flushes and the runtime overhead has to be
+          weighed against the bonuses offered by the larger VM spaces. The
+          dividing line depends on the actual workload - there might be 4 GB
+          systems that benefit from this option. Systems with less than 4 GB
+          of RAM will rarely see a benefit from this option - but it's not
+          out of question, the exact circumstances have to be considered.
+
+config X86_SWITCH_PAGETABLES
+	def_bool X86_4G
+
+config X86_4G_VM_LAYOUT
+	def_bool X86_4G
+
+config X86_UACCESS_INDIRECT
+	def_bool X86_4G
+
+config X86_HIGH_ENTRY
+	def_bool X86_4G
+
 config HPET_TIMER
 	bool "HPET Timer Support"
 	depends on !X86_XEN
@@ -268,6 +317,8 @@ config NR_CPUS
 	  This is purely to save memory - each supported CPU adds
 	  approximately eight kilobytes to the kernel image.
 
+source "kernel/Kconfig.fairsched"
+
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
 	depends on X86_HT
@@ -527,7 +578,7 @@ config HIGHMEM64G
 endchoice
 
 choice
-	depends on EXPERIMENTAL && !X86_PAE
+	depends on EXPERIMENTAL && !X86_PAE && !X86_4G_VM_LAYOUT
 	prompt "Memory split" if EMBEDDED
 	default VMSPLIT_3G
 	help
@@ -561,6 +612,7 @@ config PAGE_OFFSET
 	default 0xB0000000 if VMSPLIT_3G_OPT
 	default 0x78000000 if VMSPLIT_2G
 	default 0x40000000 if VMSPLIT_1G
+	default 0x02000000 if X86_4G_VM_LAYOUT
 	default 0xC0000000
 
 config HIGHMEM
@@ -845,7 +897,7 @@ config PHYSICAL_ALIGN
 
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
-	depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER
+	depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER && !SCHED_VCPU
 	---help---
 	  Say Y here to experiment with turning CPUs off and on, and to
 	  enable suspend on SMP systems. CPUs can be controlled through
@@ -1232,6 +1284,8 @@ endmenu
 
 source "arch/i386/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
@@ -1244,6 +1298,8 @@ endif
 
 source "lib/Kconfig"
 
+source "kernel/ub/Kconfig"
+
 #
 # Use the generic interrupt handling code in kernel/irq/:
 #
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/acpi/sleep.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/acpi/sleep.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/acpi/sleep.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/acpi/sleep.c	2017-01-13 08:40:14.000000000 -0500
@@ -19,6 +19,34 @@ extern char wakeup_start, wakeup_end;
 
 extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
 
+static void map_low(pgd_t *pgd_base, unsigned long start, unsigned long end)
+{
+	unsigned long vaddr;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	int i, j;
+
+	pgd = pgd_base;
+
+	for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
+		vaddr = i*PGDIR_SIZE;
+		if (end && (vaddr >= end))
+			break;
+		pud = pud_offset(pgd, 0);
+		pmd = pmd_offset(pud, 0);
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+			vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+			if (end && (vaddr >= end))
+				break;
+			if (vaddr < start)
+				continue;
+			set_pmd(pmd, __pmd(_KERNPG_TABLE + _PAGE_PSE +
+								vaddr - start));
+		}
+	}
+}
+
 /**
  * acpi_save_state_mem - save kernel state
  *
@@ -29,6 +57,9 @@ int acpi_save_state_mem(void)
 {
 	if (!acpi_wakeup_address)
 		return 1;
+	if (!cpu_has_pse)
+		return 1;
+	map_low(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE);
 	memcpy((void *)acpi_wakeup_address, &wakeup_start,
 	       &wakeup_end - &wakeup_start);
 	acpi_copy_wakeup_routine(acpi_wakeup_address);
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/acpi/wakeup.S kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/acpi/wakeup.S
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/acpi/wakeup.S	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/acpi/wakeup.S	2017-01-13 08:40:14.000000000 -0500
@@ -76,6 +76,13 @@ wakeup_code:
 	
 	# need a gdt -- use lgdtl to force 32-bit operands, in case
 	# the GDT is located past 16 megabytes.
+	#use the gdt copied in this low mem
+	lea	temp_gdt_table - wakeup_code, %eax
+	xor	%ebx, %ebx
+	movw	%ds, %bx
+	shll	$4, %ebx
+	addl	%ebx, %eax
+	movl	%eax, real_save_gdt + 2 - wakeup_code
 	lgdtl	real_save_gdt - wakeup_code
 
 	movl	real_save_cr0 - wakeup_code, %eax
@@ -101,6 +108,7 @@ video_flags:	.long 0
 real_efer_save_restore:	.long 0
 real_save_efer_edx: 	.long 0
 real_save_efer_eax: 	.long 0
+temp_gdt_table: .fill GDT_ENTRIES, 8, 0
 
 bogus_real_magic:
 	movw	$0x0e00 + 'B', %fs:(0x12)
@@ -257,6 +265,13 @@ ENTRY(acpi_copy_wakeup_routine)
 	movl	%edx, real_save_cr0 - wakeup_start (%eax)
 	sgdt    real_save_gdt - wakeup_start (%eax)
 
+	# gdt wont be addressable from real mode in 4g4g split
+	# copying it to the lower mem
+	xor	%ecx, %ecx
+	movw	saved_gdt, %cx
+	movl	saved_gdt + 2, %esi	
+	lea 	temp_gdt_table - wakeup_start (%eax), %edi	
+	rep movsb
 	movl	saved_videomode, %edx
 	movl	%edx, video_mode - wakeup_start (%eax)
 	movl	acpi_video_flags, %edx
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/asm-offsets.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/asm-offsets.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/asm-offsets.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/asm-offsets.c	2017-01-13 08:40:14.000000000 -0500
@@ -75,8 +75,19 @@ void foo(void)
 	DEFINE(SYSENTER_stack_esp0, 0);
 #endif
 
+#ifdef CONFIG_X86_4G
+	DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack));
+	DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack));
+	DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd));
+	DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr,
+			__fix_to_virt(FIX_ENTRY_TRAMPOLINE_0));
+#endif
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
 	DEFINE(VDSO_PRELINK, VDSO_PRELINK);
+#ifdef CONFIG_X86_4G
+	DEFINE(task_thread_db7,
+		offsetof (struct task_struct, thread.debugreg[7]));
+#endif
 
 	OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/common.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/common.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/common.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/common.c	2017-01-13 08:40:15.000000000 -0500
@@ -27,8 +27,8 @@
 DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
 EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
 
-DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
-EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+unsigned char cpu_16bit_stack[NR_CPUS][CPU_16BIT_STACK_SIZE] __attribute__((__section__(".data.stk16")));
+EXPORT_SYMBOL(cpu_16bit_stack);
 
 static int cachesize_override __cpuinitdata = -1;
 static int disable_x86_fxsr __cpuinitdata;
@@ -335,6 +335,9 @@ void __cpuinit generic_identify(struct c
 		}
 
 		init_scattered_cpuid_features(c);
+
+		if (c->x86_vendor == X86_VENDOR_INTEL)
+			early_init_intel(c);
 	}
 
 	early_intel_workaround(c);
@@ -622,10 +625,14 @@ void __init early_cpu_init(void)
 void __cpuinit cpu_init(void)
 {
 	int cpu = smp_processor_id();
-	struct tss_struct * t = &per_cpu(init_tss, cpu);
+	struct tss_struct * t = init_tss + cpu;
 	struct thread_struct *thread = &current->thread;
 	struct desc_struct *gdt;
-	__u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
+#ifdef CONFIG_X86_HIGH_ENTRY
+	__u32 stk16_off = fix_to_virt(FIX_16BIT_STACK_0) + cpu * CPU_16BIT_STACK_SIZE;
+#else
+	__u32 stk16_off = (__u32)cpu_16bit_stack[cpu];
+#endif
 	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
 
 	if (cpu_test_and_set(cpu, cpu_initialized)) {
@@ -649,24 +656,8 @@ void __cpuinit cpu_init(void)
 		memset(gdt, 0, PAGE_SIZE);
 		goto old_gdt;
 	}
-	/*
-	 * This is a horrible hack to allocate the GDT.  The problem
-	 * is that cpu_init() is called really early for the boot CPU
-	 * (and hence needs bootmem) but much later for the secondary
-	 * CPUs, when bootmem will have gone away
-	 */
-	if (NODE_DATA(0)->bdata->node_bootmem_map) {
-		gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
-		/* alloc_bootmem_pages panics on failure, so no check */
-		memset(gdt, 0, PAGE_SIZE);
-	} else {
-		gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
-		if (unlikely(!gdt)) {
-			printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
-			for (;;)
-				local_irq_enable();
-		}
-	}
+	gdt = (struct desc_struct *)cpu_gdt_table[cpu];
+
 old_gdt:
 	/*
 	 * Initialize the per-CPU GDT with the boot GDT,
@@ -698,13 +689,17 @@ old_gdt:
 	load_esp0(t, thread);
 	set_tss_desc(cpu,t);
 	load_TR_desc();
-	load_LDT(&init_mm.context);
+	if (cpu)
+		load_LDT(&init_mm.context);
 
 #ifdef CONFIG_DOUBLEFAULT
 	/* Set up doublefault TSS pointer in the GDT */
 	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
 #endif
 
+	if (cpu)
+		trap_init_virtual_GDT();
+
 	/* Clear %fs and %gs. */
 	asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
 
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c	2017-01-13 08:40:16.000000000 -0500
@@ -149,8 +149,9 @@ struct drv_cmd {
 	u32 val;
 };
 
-static void do_drv_read(struct drv_cmd *cmd)
+static void do_drv_read(void *_cmd)
 {
+	struct drv_cmd *cmd = _cmd;
 	u32 h;
 
 	switch (cmd->type) {
@@ -167,8 +168,9 @@ static void do_drv_read(struct drv_cmd *
 	}
 }
 
-static void do_drv_write(struct drv_cmd *cmd)
+static void do_drv_write(void *_cmd)
 {
+	struct drv_cmd *cmd = _cmd;
 	u32 lo, hi;
 
 	switch (cmd->type) {
@@ -189,26 +191,27 @@ static void do_drv_write(struct drv_cmd 
 
 static void drv_read(struct drv_cmd *cmd)
 {
-	cpumask_t saved_mask = current->cpus_allowed;
+	int this_cpu;
 	cmd->val = 0;
 
-	set_cpus_allowed(current, cmd->mask);
-	do_drv_read(cmd);
-	set_cpus_allowed(current, saved_mask);
+	this_cpu = get_cpu();
+	if (cpu_isset(this_cpu, cmd->mask))
+		do_drv_read(cmd);
+	else
+		smp_call_function_single(any_online_cpu(cmd->mask),
+				do_drv_read, cmd, 0, 1);
+	put_cpu();
 }
 
 static void drv_write(struct drv_cmd *cmd)
 {
-	cpumask_t saved_mask = current->cpus_allowed;
-	unsigned int i;
+	int this_cpu;
 
-	for_each_cpu_mask(i, cmd->mask) {
-		set_cpus_allowed(current, cpumask_of_cpu(i));
+	this_cpu = get_cpu();
+	if (cpu_isset(this_cpu, cmd->mask))
 		do_drv_write(cmd);
-	}
-
-	set_cpus_allowed(current, saved_mask);
-	return;
+	smp_call_function_many(&cmd->mask, do_drv_write, cmd, 0, 1);
+	put_cpu();
 }
 
 static u32 get_cur_val(cpumask_t mask)
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/cpufreq/mperf.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/cpufreq/mperf.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/cpufreq/mperf.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/cpufreq/mperf.c	2017-01-13 08:40:16.000000000 -0500
@@ -9,6 +9,17 @@
 
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
 
+static void get_aperfmperf(void *_data)
+{
+	u32 *data = _data;
+
+	rdmsr(MSR_IA32_APERF, data[0], data[1]);
+	rdmsr(MSR_IA32_MPERF, data[2], data[3]);
+
+	wrmsr(MSR_IA32_APERF, 0,0);
+	wrmsr(MSR_IA32_MPERF, 0,0);
+}
+
 /*
  * Return the measured active (C0) frequency on this CPU since last call
  * to this function.
@@ -31,19 +42,10 @@ unsigned int cpufreq_get_measured_perf(u
 		} split;
 		u64 whole;
 	} aperf_cur, mperf_cur;
-
-	cpumask_t saved_mask;
 	unsigned int perf_percent;
 	unsigned int retval;
 	struct cpufreq_policy *policy = NULL;
-
-	saved_mask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-	if (get_cpu() != cpu) {
-		/* We were not able to run on requested processor */
-		put_cpu();
-		return 0;
-	}
+	u32 data[4];
 
 	policy = cpufreq_cpu_get(cpu);
 	if (unlikely(!policy)) {
@@ -51,11 +53,15 @@ unsigned int cpufreq_get_measured_perf(u
 		return 0;
 	}
 
-	rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
-	rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
+	if (get_cpu() != cpu)
+		smp_call_function_single(cpu, get_aperfmperf, data, 0, 1);
+	else
+		get_aperfmperf(data);
 
-	wrmsr(MSR_IA32_APERF, 0,0);
-	wrmsr(MSR_IA32_MPERF, 0,0);
+	aperf_cur.split.lo = data[0];
+	aperf_cur.split.hi = data[1];
+	mperf_cur.split.lo = data[2];
+	mperf_cur.split.hi = data[3];
 
 #ifdef __i386__
 	/*
@@ -103,7 +109,6 @@ unsigned int cpufreq_get_measured_perf(u
 
 	cpufreq_cpu_put(policy);
 	put_cpu();
-	set_cpus_allowed(current, saved_mask);
 
 	dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
 	return retval;
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c	2017-01-13 08:40:15.000000000 -0500
@@ -63,7 +63,7 @@ static int cpufreq_p4_setdc(unsigned int
 	if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV))
 		return -EINVAL;
 
-	rdmsr(MSR_IA32_THERM_STATUS, l, h);
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
 
 	if (l & 0x01)
 		dprintk("CPU#%d currently thermal throttled\n", cpu);
@@ -71,10 +71,10 @@ static int cpufreq_p4_setdc(unsigned int
 	if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT))
 		newstate = DC_38PT;
 
-	rdmsr(MSR_IA32_THERM_CONTROL, l, h);
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
 	if (newstate == DC_DISABLE) {
 		dprintk("CPU#%d disabling modulation\n", cpu);
-		wrmsr(MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
+		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
 	} else {
 		dprintk("CPU#%d setting duty cycle to %d%%\n",
 			cpu, ((125 * newstate) / 10));
@@ -85,7 +85,7 @@ static int cpufreq_p4_setdc(unsigned int
 		 */
 		l = (l & ~14);
 		l = l | (1<<4) | ((newstate & 0x7)<<1);
-		wrmsr(MSR_IA32_THERM_CONTROL, l, h);
+		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h);
 	}
 
 	return 0;
@@ -112,7 +112,6 @@ static int cpufreq_p4_target(struct cpuf
 {
 	unsigned int    newstate = DC_RESV;
 	struct cpufreq_freqs freqs;
-	cpumask_t cpus_allowed;
 	int i;
 
 	if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate))
@@ -133,17 +132,8 @@ static int cpufreq_p4_target(struct cpuf
 	/* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
 	 * Developer's Manual, Volume 3
 	 */
-	cpus_allowed = current->cpus_allowed;
-
-	for_each_cpu_mask(i, policy->cpus) {
-		cpumask_t this_cpu = cpumask_of_cpu(i);
-
-		set_cpus_allowed(current, this_cpu);
-		BUG_ON(smp_processor_id() != i);
-
+	for_each_cpu_mask(i, policy->cpus)
 		cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
-	}
-	set_cpus_allowed(current, cpus_allowed);
 
 	/* notifiers */
 	for_each_cpu_mask(i, policy->cpus) {
@@ -267,17 +257,9 @@ static int cpufreq_p4_cpu_exit(struct cp
 
 static unsigned int cpufreq_p4_get(unsigned int cpu)
 {
-	cpumask_t cpus_allowed;
 	u32 l, h;
 
-	cpus_allowed = current->cpus_allowed;
-
-	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-	BUG_ON(smp_processor_id() != cpu);
-
-	rdmsr(MSR_IA32_THERM_CONTROL, l, h);
-
-	set_cpus_allowed(current, cpus_allowed);
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
 
 	if (l & 0x10) {
 		l = l >> 1;
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/cpufreq/powernow-k8.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/cpufreq/powernow-k8.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/cpufreq/powernow-k8.c	2017-01-13 08:40:15.000000000 -0500
@@ -116,14 +116,14 @@ static u32 convert_fid_to_vco_fid(u32 fi
  * Return 1 if the pending bit is set. Unless we just instructed the processor
  * to transition to a new state, seeing this bit set is really bad news.
  */
-static int pending_bit_stuck(void)
+static int pending_bit_stuck(unsigned int cpu)
 {
 	u32 lo, hi;
 
 	if (cpu_family == CPU_HW_PSTATE)
 		return 0;
 
-	rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	rdmsr_on_cpu(cpu, MSR_FIDVID_STATUS, &lo, &hi);
 	return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
 }
 
@@ -133,11 +133,12 @@ static int pending_bit_stuck(void)
  */
 static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
 {
+	unsigned int cpu = data->cpu;
 	u32 lo, hi;
 	u32 i = 0;
 
 	if (cpu_family == CPU_HW_PSTATE) {
-		rdmsr(MSR_PSTATE_STATUS, lo, hi);
+		rdmsr_on_cpu(cpu, MSR_PSTATE_STATUS, &lo, &hi);
 		i = lo & HW_PSTATE_MASK;
 		data->currpstate = i;
 
@@ -155,7 +156,7 @@ static int query_current_values_with_pen
 			dprintk("detected change pending stuck\n");
 			return 1;
 		}
-		rdmsr(MSR_FIDVID_STATUS, lo, hi);
+		rdmsr_on_cpu(cpu, MSR_FIDVID_STATUS, &lo, &hi);
 	} while (lo & MSR_S_LO_CHANGE_PENDING);
 
 	data->currvid = hi & MSR_S_HI_CURRENT_VID;
@@ -179,18 +180,18 @@ static void count_off_vst(struct powerno
 }
 
 /* need to init the control msr to a safe value (for each cpu) */
-static void fidvid_msr_init(void)
+static void fidvid_msr_init(unsigned int cpu)
 {
 	u32 lo, hi;
 	u8 fid, vid;
 
-	rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	rdmsr_on_cpu(cpu, MSR_FIDVID_STATUS, &lo, &hi);
 	vid = hi & MSR_S_HI_CURRENT_VID;
 	fid = lo & MSR_S_LO_CURRENT_FID;
 	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
 	hi = MSR_C_HI_STP_GNT_BENIGN;
 	dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
-	wrmsr(MSR_FIDVID_CTL, lo, hi);
+	wrmsr_on_cpu(cpu, MSR_FIDVID_CTL, lo, hi);
 }
 
 
@@ -213,17 +214,12 @@ static int write_new_fid(struct powernow
 
 	if (tscsync) {
 		int i;
-		cpumask_t oldmask = current->cpus_allowed;
-		for_each_online_cpu(i) {
-			set_cpus_allowed(current, cpumask_of_cpu(i));
-			schedule();
-			wrmsr(MSR_FIDVID_CTL, lo & ~MSR_C_LO_INIT_FID_VID, data->plllock * PLL_LOCK_CONVERSION);
-		}
-		set_cpus_allowed(current, oldmask);
-		schedule();
+
+		for_each_online_cpu(i)
+			wrmsr_on_cpu(i, MSR_FIDVID_CTL, lo & ~MSR_C_LO_INIT_FID_VID, data->plllock * PLL_LOCK_CONVERSION);
 	}
 	do {
-		wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
+		wrmsr_on_cpu(data->cpu, MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
 		if (i++ > 100) {
 			printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n");
 			return 1;
@@ -266,17 +262,12 @@ static int write_new_vid(struct powernow
 
 	if (tscsync) {
 		int i;
-		cpumask_t oldmask = current->cpus_allowed;
-		for_each_online_cpu(i) {
-			set_cpus_allowed(current, cpumask_of_cpu(i));
-			schedule();
-			wrmsr(MSR_FIDVID_CTL, lo & ~MSR_C_LO_INIT_FID_VID, STOP_GRANT_5NS);
-		}
-		set_cpus_allowed(current, oldmask);
-		schedule();
+
+		for_each_online_cpu(i)
+			wrmsr_on_cpu(i, MSR_FIDVID_CTL, lo & ~MSR_C_LO_INIT_FID_VID, STOP_GRANT_5NS);
 	}
 	do {
-		wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
+		wrmsr_on_cpu(data->cpu, MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
 		if (i++ > 100) {
 			printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
 			return 1;
@@ -319,7 +310,7 @@ static int decrease_vid_code_by_step(str
 /* Change hardware pstate by single MSR write */
 static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
 {
-	wrmsr(MSR_PSTATE_CTRL, pstate, 0);
+	wrmsr_on_cpu(data->cpu, MSR_PSTATE_CTRL, pstate, 0);
 	data->currpstate = pstate;
 	return 0;
 }
@@ -363,7 +354,7 @@ static int core_voltage_pre_transition(s
 		smp_processor_id(),
 		data->currfid, data->currvid, reqvid, data->rvo);
 
-	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
+	rdmsr_on_cpu(data->cpu, MSR_FIDVID_STATUS, &lo, &maxvid);
 	maxvid = 0x1f & (maxvid >> 16);
 	dprintk("ph1 maxvid=0x%x\n", maxvid);
 	if (reqvid < maxvid) /* lower numbers are higher voltages */
@@ -895,7 +886,8 @@ static int fill_powernow_table_pstate(st
 {
 	int i;
 	u32 hi = 0, lo = 0;
-	rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo);
+
+	rdmsr_on_cpu(data->cpu, MSR_PSTATE_CUR_LIMIT, &hi, &lo);
 	data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
 
 	for (i = 0; i < data->acpi_data->state_count; i++) {
@@ -1011,7 +1003,7 @@ static int transition_frequency_fidvid(s
          */
 
 	if (tscsync && req_state) {
-		req_state[smp_processor_id()] = index;
+		req_state[data->cpu] = index;
 		for_each_online_cpu(i) 
 			if (req_state[i] < index)
 				index = req_state[i];
@@ -1109,7 +1101,6 @@ static int transition_frequency_pstate(s
 /* Driver entry point to switch to the target frequency */
 static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
 {
-	cpumask_t oldmask = CPU_MASK_ALL;
 	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
 	u32 checkfid;
 	u32 checkvid;
@@ -1122,16 +1113,7 @@ static int powernowk8_target(struct cpuf
 	checkfid = data->currfid;
 	checkvid = data->currvid;
 
-	/* only run on specific CPU from here on */
-	oldmask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
-
-	if (smp_processor_id() != pol->cpu) {
-		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
-		goto err_out;
-	}
-
-	if (pending_bit_stuck()) {
+	if (pending_bit_stuck(pol->cpu)) {
 		printk(KERN_ERR PFX "failing targ, change pending bit set\n");
 		goto err_out;
 	}
@@ -1180,7 +1162,6 @@ static int powernowk8_target(struct cpuf
 	ret = 0;
 
 err_out:
-	set_cpus_allowed(current, oldmask);
 	return ret;
 }
 
@@ -1233,7 +1214,7 @@ static void __cpuinit powernowk8_cpu_ini
 {
 	struct init_on_cpu *init_on_cpu = _init_on_cpu;
 
-	if (pending_bit_stuck()) {
+	if (pending_bit_stuck(smp_processor_id())) {
 		printk(KERN_ERR PFX "failing init, change pending bit set\n");
 		init_on_cpu->rc = -ENODEV;
 		return;
@@ -1245,7 +1226,7 @@ static void __cpuinit powernowk8_cpu_ini
 	}
 
 	if (cpu_family == CPU_OPTERON)
-		fidvid_msr_init();
+		fidvid_msr_init(smp_processor_id());
 
 	init_on_cpu->rc = 0;
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c	2017-01-13 08:40:15.000000000 -0500
@@ -17,7 +17,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/cpufreq.h>
-#include <linux/sched.h>	/* current */
 #include <linux/delay.h>
 #include <linux/compiler.h>
 
@@ -319,14 +318,8 @@ static unsigned int get_cur_freq(unsigne
 {
 	unsigned l, h;
 	unsigned clock_freq;
-	cpumask_t saved_mask;
 
-	saved_mask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-	if (smp_processor_id() != cpu)
-		return 0;
-
-	rdmsr(MSR_IA32_PERF_STATUS, l, h);
+	rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h);
 	clock_freq = extract_clock(l, cpu, 0);
 
 	if (unlikely(clock_freq == 0)) {
@@ -336,11 +329,10 @@ static unsigned int get_cur_freq(unsigne
 		 * P-state transition (like TM2). Get the last freq set 
 		 * in PERF_CTL.
 		 */
-		rdmsr(MSR_IA32_PERF_CTL, l, h);
+		rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h);
 		clock_freq = extract_clock(l, cpu, 1);
 	}
 
-	set_cpus_allowed(current, saved_mask);
 	return clock_freq;
 }
 
@@ -550,15 +542,15 @@ static int centrino_cpu_init(struct cpuf
 
 	/* Check to see if Enhanced SpeedStep is enabled, and try to
 	   enable it if not. */
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	rdmsr_on_cpu(policy->cpu, MSR_IA32_MISC_ENABLE, &l, &h);
 
 	if (!(l & (1<<16))) {
 		l |= (1<<16);
 		dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
-		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
+		wrmsr_on_cpu(policy->cpu, MSR_IA32_MISC_ENABLE, l, h);
 
 		/* check to see if it stuck */
-		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+		rdmsr_on_cpu(policy->cpu, MSR_IA32_MISC_ENABLE, &l, &h);
 		if (!(l & (1<<16))) {
 			printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n");
 			return -ENODEV;
@@ -638,7 +630,6 @@ static int centrino_target (struct cpufr
 	unsigned int	msr, oldmsr = 0, h = 0, cpu = policy->cpu;
 	struct cpufreq_freqs	freqs;
 	cpumask_t		online_policy_cpus;
-	cpumask_t		saved_mask;
 	cpumask_t		set_mask;
 	cpumask_t		covered_cpus;
 	int			retval = 0;
@@ -662,7 +653,6 @@ static int centrino_target (struct cpufr
 	online_policy_cpus = policy->cpus;
 #endif
 
-	saved_mask = current->cpus_allowed;
 	first_cpu = 1;
 	cpus_clear(covered_cpus);
 	for_each_cpu_mask(j, online_policy_cpus) {
@@ -676,8 +666,7 @@ static int centrino_target (struct cpufr
 		else
 			cpu_set(j, set_mask);
 
-		set_cpus_allowed(current, set_mask);
-		if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) {
+		if (unlikely(!cpu_isset(j, set_mask))) {
 			dprintk("couldn't limit to CPUs in this domain\n");
 			retval = -EAGAIN;
 			if (first_cpu) {
@@ -690,7 +679,7 @@ static int centrino_target (struct cpufr
 		msr = centrino_model[cpu]->op_points[newstate].index;
 
 		if (first_cpu) {
-			rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+			rdmsr_on_cpu(j, MSR_IA32_PERF_CTL, &oldmsr, &h);
 			if (msr == (oldmsr & 0xffff)) {
 				dprintk("no change needed - msr was and needs "
 					"to be %x\n", oldmsr);
@@ -717,7 +706,7 @@ static int centrino_target (struct cpufr
 			oldmsr |= msr;
 		}
 
-		wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+		wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h);
 		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
 			break;
 
@@ -739,8 +728,7 @@ static int centrino_target (struct cpufr
 
 		if (!cpus_empty(covered_cpus)) {
 			for_each_cpu_mask(j, covered_cpus) {
-				set_cpus_allowed(current, cpumask_of_cpu(j));
-				wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+				wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h);
 			}
 		}
 
@@ -755,7 +743,6 @@ static int centrino_target (struct cpufr
 	}
 
 migrate_end:
-	set_cpus_allowed(current, saved_mask);
 	return 0;
 }
 
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/intel.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/intel.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/intel.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/intel.c	2017-01-13 08:40:14.000000000 -0500
@@ -10,6 +10,7 @@
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/uaccess.h>
+#include <asm/desc.h>
 
 #include "cpu.h"
 
@@ -19,8 +20,6 @@
 #include <mach_apic.h>
 #endif
 
-extern int trap_init_f00f_bug(void);
-
 #ifdef CONFIG_X86_INTEL_USERCOPY
 /*
  * Alignment at which movsl is preferred for bulk memory copies.
@@ -112,7 +111,7 @@ static void __cpuinit init_intel(struct 
 
 		c->f00f_bug = 1;
 		if ( !f00f_workaround_enabled ) {
-			trap_init_f00f_bug();
+			trap_init_virtual_IDT();
 			printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
 			f00f_workaround_enabled = 1;
 		}
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/intel_cacheinfo.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/intel_cacheinfo.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/intel_cacheinfo.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/intel_cacheinfo.c	2017-01-13 08:40:15.000000000 -0500
@@ -385,6 +385,23 @@ static int __init find_num_cache_leaves(
 	return i;
 }
 
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID        (1ULL << 22)
+void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
+{
+	/* Unmask CPUID levels if masked: */
+	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
+		u64 misc_enable;
+
+		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
+
+		if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
+			misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
+			wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
+			c->cpuid_level = cpuid_eax(0);
+		}
+	}
+}
+
 unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 {
 	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/mcheck/mce.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/mcheck/mce.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/mcheck/mce.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/mcheck/mce.c	2017-01-13 08:40:14.000000000 -0500
@@ -27,7 +27,8 @@ static fastcall void unexpected_machine_
 }
 
 /* Call the installed machine check handler for this CPU setup. */
-void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
+void fastcall (*machine_check_vector)(struct pt_regs *, long error_code)
+	__attribute__((__section__(".entry.text"))) = unexpected_machine_check;
 
 /* This has to be run for each processor */
 void mcheck_init(struct cpuinfo_x86 *c)
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/mtrr/if.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/mtrr/if.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/mtrr/if.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/mtrr/if.c	2017-01-13 08:40:19.000000000 -0500
@@ -392,7 +392,7 @@ static int __init mtrr_if_init(void)
 		return -ENODEV;
 
 	proc_root_mtrr =
-	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
+	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, NULL);
 	if (proc_root_mtrr) {
 		proc_root_mtrr->owner = THIS_MODULE;
 		proc_root_mtrr->proc_fops = &mtrr_fops;
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/proc.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/proc.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/cpu/proc.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/cpu/proc.c	2017-01-13 08:40:28.000000000 -0500
@@ -4,6 +4,7 @@
 #include <asm/semaphore.h>
 #include <linux/seq_file.h>
 #include <linux/cpufreq.h>
+#include <linux/vsched.h>
 
 /*
  *	Get CPU information for use by the procfs.
@@ -83,7 +84,7 @@ static int show_cpuinfo(struct seq_file 
 	int fpu_exception;
 
 #ifdef CONFIG_SMP
-	if (!cpu_online(n))
+	if (!vcpu_online(n))
 		return 0;
 #endif
 	seq_printf(m, "processor\t: %d\n"
@@ -103,9 +104,13 @@ static int show_cpuinfo(struct seq_file 
 		seq_printf(m, "stepping\t: unknown\n");
 
 	if ( cpu_has(c, X86_FEATURE_TSC) ) {
+#ifndef CONFIG_FAIRSCHED
 		unsigned int freq = cpufreq_quick_get(n);
 		if (!freq)
 			freq = cpu_khz;
+#else
+		unsigned int freq = ve_scale_khz(cpu_khz);
+#endif
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
 			freq / 1000, (freq % 1000));
 	}
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/doublefault.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/doublefault.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/doublefault.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/doublefault.c	2017-01-13 08:40:14.000000000 -0500
@@ -8,12 +8,13 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/desc.h>
+#include <asm/fixmap.h>
 
 #define DOUBLEFAULT_STACKSIZE (1024)
 static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
 #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
 
-#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000)
+#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START))
 
 static void doublefault_fn(void)
 {
@@ -39,8 +40,8 @@ static void doublefault_fn(void)
 
 			printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
 				t->eax, t->ebx, t->ecx, t->edx);
-			printk("esi = %08lx, edi = %08lx\n",
-				t->esi, t->edi);
+			printk("esi = %08lx, edi = %08lx, ebp = %08lx\n",
+				t->esi, t->edi, t->ebp);
 		}
 	}
 
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/entry.S kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/entry.S
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/entry.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/entry.S	2017-01-13 08:40:24.000000000 -0500
@@ -98,7 +98,109 @@ VM_MASK		= 0x00020000
 #define resume_userspace_sig	resume_userspace
 #endif
 
-#define SAVE_ALL \
+#ifdef CONFIG_X86_HIGH_ENTRY
+
+#ifdef CONFIG_X86_SWITCH_PAGETABLES
+
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+/*
+ * If task is preempted in __SWITCH_TO_KERNELSPACE, and moved to another cpu,
+ * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is
+ * left stale, so we must check whether to repeat the real stack calculation.
+ */
+#define repeat_if_esp_changed				\
+	xorl %esp, %ebp;				\
+	testl $-THREAD_SIZE, %ebp;			\
+	jnz 0b
+#else
+#define repeat_if_esp_changed
+#endif
+
+/*
+ * __SWITCH_TO_KERNELSPACE
+ *
+ * switches to kernel space if not already there
+ * Note: esp, cr3 reload is not atomic, but their reload
+ *	sequence helps us to handle it's interruption properly.
+ *
+ * clobbers: ebx, edx, ebp
+ */
+#define __SWITCH_TO_KERNELSPACE				\
+	cmpl $0xff000000, %esp;				\
+	jb 1f;						\
+							\
+	/*						\
+	 * switch pagetables and load the real stack,	\
+	 * keep the stack offset			\
+	 */						\
+							\
+	movl $swapper_pg_dir-__PAGE_OFFSET, %edx;	\
+							\
+	/* GET_THREAD_INFO(%ebp) intermixed */		\
+0:							\
+	movl %esp, %ebp;				\
+	movl %esp, %ebx;				\
+	andl $(-THREAD_SIZE), %ebp;			\
+	andl $(THREAD_SIZE-1), %ebx;			\
+	orl TI_real_stack(%ebp), %ebx;			\
+	repeat_if_esp_changed;				\
+							\
+	movl %edx, %cr3;				\
+	movl %ebx, %esp;				\
+1:
+
+#endif
+
+/*
+ * __SWITCH_TO_USERSPACE
+ *
+ * loads user esp and cr3 if returning to user space
+ * Note: esp + cr3 reload is not atomic, cli is required,
+ * 	but NMI still can interrupt (see nmi ISR)
+ */
+#define __SWITCH_TO_USERSPACE				\
+	movl EFLAGS(%esp),%ecx;				\
+	movb CS(%esp),%cl;				\
+	/* return to VM86 mode or user-space? */	\
+	testl $(VM_MASK | 3),%ecx;			\
+	jz 2f;	/* no, ret to kernel space */ 		\
+							\
+	/*						\
+	 * switch to the virtual stack, then switch to	\
+	 * the userspace pagetables.			\
+	 */						\
+							\
+	GET_THREAD_INFO(%ebp);				\
+	movl %esp, %ebx;				\
+	movl TI_virtual_stack(%ebp), %edx;		\
+	movl TI_user_pgd(%ebp), %ecx;			\
+							\
+	andl $(THREAD_SIZE-1), %ebx;			\
+	orl %ebx, %edx;					\
+							\
+	movl %edx, %esp; 		/* switch */	\
+							\
+	/* db7 preps should be done before cr3 switch */ \
+	movl TI_flags(%ebp), %ebx;			\
+	movl TI_task(%ebp), %edx;			\
+	movl task_thread_db7(%edx), %edx;		\
+							\
+	movl %ecx, %cr3;		/* switch */	\
+							\
+	/* reload db7 if necessary */			\
+	testb $_TIF_DB7, %bl;				\
+	jz 2f;						\
+	movl %edx, %db7;				\
+2:
+
+#else /* !CONFIG_X86_HIGH_ENTRY */
+
+#define __SWITCH_TO_KERNELSPACE
+#define __SWITCH_TO_USERSPACE
+
+#endif
+
+#define __SAVE_ALL \
 	cld; \
 	pushl %es; \
 	CFI_ADJUST_CFA_OFFSET 4;\
@@ -131,7 +233,7 @@ VM_MASK		= 0x00020000
 	movl %edx, %ds; \
 	movl %edx, %es;
 
-#define RESTORE_INT_REGS \
+#define __RESTORE_INT_REGS \
 	popl %ebx;	\
 	CFI_ADJUST_CFA_OFFSET -4;\
 	CFI_RESTORE ebx;\
@@ -154,26 +256,34 @@ VM_MASK		= 0x00020000
 	CFI_ADJUST_CFA_OFFSET -4;\
 	CFI_RESTORE eax
 
-#define RESTORE_REGS	\
-	RESTORE_INT_REGS; \
-1:	popl %ds;	\
+#define __RESTORE_REGS	\
+	__RESTORE_INT_REGS; \
+111:	popl %ds;	\
 	CFI_ADJUST_CFA_OFFSET -4;\
 	/*CFI_RESTORE ds;*/\
-2:	popl %es;	\
+222:	popl %es;	\
 	CFI_ADJUST_CFA_OFFSET -4;\
 	/*CFI_RESTORE es;*/\
 .section .fixup,"ax";	\
-3:	movl $0,(%esp);	\
-	jmp 1b;		\
-4:	movl $0,(%esp);	\
-	jmp 2b;		\
+444:	movl $0,(%esp);	\
+	jmp 111b;	\
+555:	movl $0,(%esp);	\
+	jmp 222b;	\
 .previous;		\
 .section __ex_table,"a";\
 	.align 4;	\
-	.long 1b,3b;	\
-	.long 2b,4b;	\
+	.long 111b,444b;\
+	.long 222b,555b;\
 .previous
 
+#define RESTORE_REGS	\
+	__SWITCH_TO_USERSPACE; \
+	__RESTORE_REGS;
+
+#define SAVE_ALL					\
+	__SAVE_ALL;					\
+	__SWITCH_TO_KERNELSPACE;
+
 #define RING0_INT_FRAME \
 	CFI_STARTPROC simple;\
 	CFI_DEF_CFA esp, 3*4;\
@@ -201,6 +311,8 @@ VM_MASK		= 0x00020000
 	CFI_OFFSET ecx, ECX-OLDESP;\
 	CFI_OFFSET ebx, EBX-OLDESP
 
+.section .entry.text,"ax"
+
 ENTRY(ret_from_fork)
 	CFI_STARTPROC
 	pushl %eax
@@ -209,6 +321,7 @@ ENTRY(ret_from_fork)
 	GET_THREAD_INFO(%ebp)
 	popl %eax
 	CFI_ADJUST_CFA_OFFSET -4
+ret_from_fork_tail:
 	pushl $0x0202			# Reset kernel eflags
 	CFI_ADJUST_CFA_OFFSET 4
 	popfl
@@ -216,6 +329,25 @@ ENTRY(ret_from_fork)
 	jmp syscall_exit
 	CFI_ENDPROC
 
+ENTRY(i386_ret_from_resume)
+	CFI_STARTPROC
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	call schedule_tail
+	GET_THREAD_INFO(%ebp)
+	popl %eax
+	CFI_ADJUST_CFA_OFFSET -4
+	movl (%esp),%eax
+	testl %eax,%eax
+	jz    1f
+	pushl %esp
+	call  *%eax
+	addl  $4,%esp
+1:
+	addl  $256,%esp
+	jmp   ret_from_fork_tail
+	CFI_ENDPROC
+
 /*
  * Return to user mode is not as complex as all this looks,
  * but we want the default path for a system call return to
@@ -296,6 +428,7 @@ sysenter_past_esp:
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET eip, 0
 
+#ifndef CONFIG_X86_HIGH_ENTRY
 /*
  * Load the potential sixth argument from user stack.
  * Careful about security.
@@ -307,6 +440,11 @@ sysenter_past_esp:
 	.align 4
 	.long 1b,syscall_fault
 .previous
+#else
+	/*
+	 * No six-argument syscall is ever used with sysenter.
+	 */
+#endif
 
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
@@ -325,16 +463,25 @@ sysenter_past_esp:
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx
 	jne syscall_exit_work
+
+	TRACE_IRQS_ON		/* should be before __SWITCH_TO_USERSPACE */
+#ifdef CONFIG_X86_SWITCH_PAGETABLES
+	__SWITCH_TO_USERSPACE
+	/*
+	 * only ebx is not restored by the userspace sysenter vsyscall
+	 * code, it assumes it to be callee-saved.
+	 */
+	movl EBX(%esp), %ebx
+#endif
+
 /* if something modifies registers it must also disable sysexit */
 	movl EIP(%esp), %edx
 	movl OLDESP(%esp), %ecx
 	xorl %ebp,%ebp
-	TRACE_IRQS_ON
 	sti
 	sysexit
 	CFI_ENDPROC
 
-
 	# system call handler stub
 ENTRY(system_call)
 	RING0_INT_FRAME			# can't unwind into user space anyway
@@ -494,6 +641,7 @@ syscall_exit_work:
 	CFI_ENDPROC
 
 	RING0_INT_FRAME			# can't unwind into user space anyway
+#ifndef CONFIG_X86_HIGH_ENTRY
 syscall_fault:
 	pushl %eax			# save orig_eax
 	CFI_ADJUST_CFA_OFFSET 4
@@ -501,6 +649,7 @@ syscall_fault:
 	GET_THREAD_INFO(%ebp)
 	movl $-EFAULT,EAX(%esp)
 	jmp resume_userspace
+#endif
 
 syscall_badsys:
 	movl $-ENOSYS,EAX(%esp)
@@ -521,17 +670,15 @@ syscall_badsys:
 	movl %ss, %eax; \
 	/* see if on 16bit stack */ \
 	cmpw $__ESPFIX_SS, %ax; \
-	je 28f; \
-27:	popl %eax; \
-	CFI_ADJUST_CFA_OFFSET -4; \
-.section .fixup,"ax"; \
+	jne 27f; \
+	/* for 4:4GB split, the whole piece of code should be in .entry.text section, not .fixup */ \
 28:	movl $__KERNEL_DS, %eax; \
 	movl %eax, %ds; \
 	movl %eax, %es; \
 	/* switch to 32bit stack */ \
 	FIXUP_ESPFIX_STACK; \
-	jmp 27b; \
-.previous
+27:	popl %eax; \
+	CFI_ADJUST_CFA_OFFSET -4; \
 
 /*
  * Build the entry stubs and pointer table with
@@ -539,7 +686,7 @@ syscall_badsys:
  */
 .data
 ENTRY(interrupt)
-.text
+.previous
 
 vector=0
 ENTRY(irq_entries_start)
@@ -554,7 +701,7 @@ ENTRY(irq_entries_start)
 	jmp common_interrupt
 .data
 	.long 1b
-.text
+.previous
 vector=vector+1
 .endr
 
@@ -629,13 +776,17 @@ error_code:
 	CFI_ADJUST_CFA_OFFSET -4
 	/*CFI_REGISTER es, ecx*/
 	movl ES(%esp), %edi		# get the function address
-	movl ORIG_EAX(%esp), %edx	# get the error code
-	movl %eax, ORIG_EAX(%esp)
 	movl %ecx, ES(%esp)
 	/*CFI_REL_OFFSET es, ES*/
 	movl $(__USER_DS), %ecx
 	movl %ecx, %ds
 	movl %ecx, %es
+
+/* clobbers edx, ebx and ebp */
+	__SWITCH_TO_KERNELSPACE
+
+	movl ORIG_EAX(%esp), %edx	# get the error code
+	movl %eax, ORIG_EAX(%esp)
 	movl %esp,%eax			# pt_regs pointer
 	call *%edi
 	jmp ret_from_exception
@@ -721,7 +872,7 @@ debug_stack_correct:
 	call do_debug
 	jmp ret_from_exception
 	CFI_ENDPROC
-	.previous .text
+	.previous
 /*
  * NMI is doubly nasty. It can happen _while_ we're handling
  * a debug fault, and the debug fault hasn't yet been able to
@@ -758,11 +909,46 @@ nmi_stack_correct:
 	/* We have a RING0_INT_FRAME here */
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
+	__SAVE_ALL
+#ifdef CONFIG_X86_4G
+/*
+ * NMI can be called when ints are masked and though we can be in kernel
+ * cr3 may still point to user space pgdir (e.g. we only entered the syscall).
+ * So we check esp here which is a flag whether we are totally in kernel or not.
+ * We could omit esp reloading for speedup, but to be unnoticable at all we do
+ * reload both esp and cr3 anyway...
+ */
+	/* save prev esp/cr3 then switch to kernel */
+	movl %esp, %edi
+	movl %cr3, %esi
+	__SWITCH_TO_KERNELSPACE
+#endif
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
+#ifdef CONFIG_X86_4G
+	pushl %esi		# cr3
+	pushl %edi		# esp
+#endif
+
 	call do_nmi
-	jmp restore_nocheck_notrace
+#ifdef CONFIG_X86_4G
+	/* switch back */
+	popl %eax		# esp
+	popl %ebx		# cr3
+	cmpl $0xff000000, %eax
+	jb 2f
+	movl %eax, %esp
+	movl %ebx, %cr3
+2:
+#endif
+	__RESTORE_REGS
+	addl $4, %esp
+	CFI_ADJUST_CFA_OFFSET -4
+1:	iret
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
 	CFI_ENDPROC
 
 nmi_stack_fixup:
@@ -799,12 +985,39 @@ nmi_16bit_stack:
 	.endr
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
+	__SAVE_ALL
+#ifdef CONFIG_X86_4G
+/*
+ * NMI can be called when ints are masked and though we can be in kernel
+ * cr3 may still point to user space pgdir (e.g. we only entered the syscall).
+ * So we check esp here which is a flag whether we are totally in kernel or not.
+ * We could omit esp reloading for speedup, but to be unnoticable at all we do
+ * reload both esp and cr3 anyway...
+ */
+	/* save prev esp/cr3 then switch to kernel */
+	movl %esp, %ebp
+	movl %cr3, %ebx
+	__SWITCH_TO_KERNELSPACE
+#endif
 	FIXUP_ESPFIX_STACK		# %eax == %esp
 	CFI_ADJUST_CFA_OFFSET -20	# the frame has now moved
 	xorl %edx,%edx			# zero error code
+#ifdef CONFIG_X86_4G
+	pushl %ebx		# cr3
+	pushl %ebp		# esp
+#endif
 	call do_nmi
-	RESTORE_REGS
+#ifdef CONFIG_X86_4G
+	/* switch back */
+	popl %eax		# esp
+	popl %ebx		# cr3
+	cmpl $0xff000000, %eax
+	jb 2f
+	movl %eax, %esp
+	movl %ebx, %cr3
+2:
+#endif
+	__RESTORE_REGS
 	lss 12+4(%esp), %esp		# back to 16bit stack
 1:	iret
 	CFI_ENDPROC
@@ -823,7 +1036,7 @@ KPROBE_ENTRY(int3)
 	call do_int3
 	jmp ret_from_exception
 	CFI_ENDPROC
-	.previous .text
+	.previous
 
 ENTRY(overflow)
 	RING0_INT_FRAME
@@ -888,7 +1101,7 @@ KPROBE_ENTRY(general_protection)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
-	.previous .text
+	.previous
 
 ENTRY(alignment_check)
 	RING0_EC_FRAME
@@ -903,7 +1116,7 @@ KPROBE_ENTRY(page_fault)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
-	.previous .text
+	.previous
 
 #ifdef CONFIG_X86_MCE
 ENTRY(machine_check)
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/entry_trampoline.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/entry_trampoline.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/entry_trampoline.c	2017-01-13 08:40:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/entry_trampoline.c	2017-01-13 08:40:14.000000000 -0500
@@ -0,0 +1,82 @@
+/*
+ * linux/arch/i386/kernel/entry_trampoline.c
+ *
+ * (C) Copyright 2003 Ingo Molnar
+ *
+ * This file contains the needed support code for 4GB userspace
+ */
+
+#ifndef CONFIG_XEN
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/highmem.h>
+#include <asm/desc.h>
+#include <asm/atomic_kmap.h>
+
+extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text;
+
+void __init init_entry_mappings(void)
+{
+#ifdef CONFIG_X86_HIGH_ENTRY
+
+	void *tramp;
+	int p;
+
+	/*
+	 * We need a high IDT and GDT for the 4G/4G split:
+	 */
+	trap_init_virtual_IDT();
+
+	tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0);
+	BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 8*PAGE_SIZE);
+	for (p = 0; p < FIX_ENTRY_TRAMPOLINE_0 - FIX_ENTRY_TRAMPOLINE_N; p++)
+		__set_fixmap(FIX_ENTRY_TRAMPOLINE_0 - p, __pa((unsigned long)&__entry_tramp_start) + p * PAGE_SIZE, PAGE_KERNEL_EXEC);
+
+	for (p = 0; p < (NR_CPUS * CPU_16BIT_STACK_SIZE) / PAGE_SIZE; p++)
+		__set_fixmap(FIX_16BIT_STACK_0 - p, __pa((unsigned long)&cpu_16bit_stack) + p * PAGE_SIZE, PAGE_KERNEL);
+			
+	printk("mapped 4G/4G trampoline to %p.\n", tramp);
+	BUG_ON((void *)&__start___entry_text != tramp);
+	/*
+	 * Virtual kernel stack:
+	 */
+	BUG_ON(__kmap_atomic_vaddr(KM_VSTACK_TOP) & (THREAD_SIZE-1));
+	BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE);
+
+	/*
+	 * set up the initial thread's virtual stack related
+	 * fields:
+	 */
+	for (p = 0; p < ARRAY_SIZE(current->thread.stack_page); p++)
+		current->thread.stack_page[p] = virt_to_page((char *)current->thread_info + (p*PAGE_SIZE));
+
+	current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK_TOP);
+
+	for (p = 0; p < ARRAY_SIZE(current->thread.stack_page); p++) {
+		__kunmap_atomic_type(KM_VSTACK_TOP-p);
+		__kmap_atomic(current->thread.stack_page[p], KM_VSTACK_TOP-p);
+	}
+#endif
+	current->thread_info->real_stack = (void *)current->thread_info;
+	current->thread_info->user_pgd = NULL;
+	current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE;
+}
+
+
+
+void __init entry_trampoline_setup(void)
+{
+	/*
+	 * old IRQ entries set up by the boot code will still hang
+	 * around - they are a sign of hw trouble anyway, now they'll
+	 * produce a double fault message.
+	 */
+	trap_init_virtual_GDT();
+}
+
+#endif /* !CONFIG_XEN */
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/entry-xen.S kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/entry-xen.S
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/entry-xen.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/entry-xen.S	2017-01-13 08:40:40.000000000 -0500
@@ -239,6 +239,7 @@ ENTRY(ret_from_fork)
 	GET_THREAD_INFO(%ebp)
 	popl %eax
 	CFI_ADJUST_CFA_OFFSET -4
+ret_from_fork_tail:
 	pushl $0x0202			# Reset kernel eflags
 	CFI_ADJUST_CFA_OFFSET 4
 	popfl
@@ -246,6 +247,25 @@ ENTRY(ret_from_fork)
 	jmp syscall_exit
 	CFI_ENDPROC
 
+ENTRY(i386_ret_from_resume)
+	CFI_STARTPROC
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	call schedule_tail
+	GET_THREAD_INFO(%ebp)
+	popl %eax
+	CFI_ADJUST_CFA_OFFSET -4
+	movl (%esp),%eax
+	testl %eax,%eax
+	jz    1f
+	pushl %esp
+	call  *%eax
+	addl  $4,%esp
+1:
+	addl  $256,%esp
+	jmp   ret_from_fork_tail
+	CFI_ENDPROC
+
 /*
  * Return to user mode is not as complex as all this looks,
  * but we want the default path for a system call return to
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/head.S kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/head.S
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/head.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/head.S	2017-01-13 08:40:14.000000000 -0500
@@ -129,10 +129,13 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 	stosl
 	addl $0x1000,%eax
 	loop 11b
-	/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
-	/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
-	leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
-	cmpl %ebp,%eax
+	/*
+	 * some smart code was used to be here...
+	 * but it didn't work, cause it didn't take into account the pages
+	 * to be allocated for pmd/pte pages itself.
+	 * Instead simply map first 32Mb of RAM and be happy. --dev@openvz.org
+	 */
+	cmpl $(32*1024*1024 + 0x007), %eax
 	jb 10b
 	movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
 
@@ -314,6 +317,9 @@ is386:	movl $2,%ecx		# set MP
 	call check_x87
 	lgdt cpu_gdt_descr
 	lidt idt_descr
+
+	movl $1f,%eax
+	movl (%eax),%eax
 	ljmp $(__KERNEL_CS),$1f
 1:	movl $(__KERNEL_DS),%eax	# reload all the segment registers
 	movl %eax,%ss			# after changing gdt.
@@ -483,6 +489,9 @@ ENTRY(boot_gdt_table)
 	.quad 0x00cf9a000000ffff	/* kernel 4GB code at 0x00000000 */
 	.quad 0x00cf92000000ffff	/* kernel 4GB data at 0x00000000 */
 
+/* align on page boundary to be sure that gdt will fit to one page */
+.section .data.gdt, "aw"
+
 /*
  * The Global Descriptor Table contains 28 quadwords, per-CPU.
  */
@@ -535,3 +544,6 @@ ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* 0xf0 - unused */
 	.quad 0x0000000000000000	/* 0xf8 - GDT entry 31: double-fault TSS */
 
+#ifdef CONFIG_SMP
+	.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
+#endif
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/i386_ksyms.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/i386_ksyms.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/i386_ksyms.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/i386_ksyms.c	2017-01-13 08:40:24.000000000 -0500
@@ -1,13 +1,15 @@
 #include <linux/module.h>
 #include <asm/checksum.h>
 #include <asm/desc.h>
+#include <asm/pgtable.h>
 
 EXPORT_SYMBOL(__down_failed);
 EXPORT_SYMBOL(__down_failed_interruptible);
 EXPORT_SYMBOL(__down_failed_trylock);
 EXPORT_SYMBOL(__up_wakeup);
 /* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial_copy_generic);
+
+EXPORT_SYMBOL(direct_csum_partial_copy_generic);
 
 EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_2);
@@ -27,4 +29,5 @@ EXPORT_SYMBOL(__write_lock_failed);
 EXPORT_SYMBOL(__read_lock_failed);
 #endif
 
+EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(csum_partial);
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/i387.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/i387.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/i387.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/i387.c	2017-01-13 08:40:14.000000000 -0500
@@ -238,6 +238,7 @@ convert_fxsr_env_to_i387(unsigned long e
 static int convert_fxsr_to_user(struct _fpstate __user *buf,
 				struct i387_fxsave_struct *fxsave)
 {
+	struct _fpreg tmp[8]; /* 80 bytes scratch area */
 	unsigned long env[7];
 	struct _fpreg __user *to;
 	struct _fpxreg *from;
@@ -247,17 +248,18 @@ static int convert_fxsr_to_user(struct _
 	if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
 		return 1;
 
-	to = &buf->_st[0];
+	to = tmp;
 	from = (struct _fpxreg *) &fxsave->st_space[0];
 	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
 		unsigned long __user *t = (unsigned long __user *)to;
 		unsigned long *f = (unsigned long *)from;
 
-		if (__put_user(*f, t) ||
-				__put_user(*(f + 1), t + 1) ||
-				__put_user(from->exponent, &to->exponent))
-			return 1;
+		*t = *f;
+		*(t + 1) = *(f+1);
+		to->exponent = from->exponent;
 	}
+	if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8])))
+		return 1;
 	return 0;
 }
 
@@ -278,6 +280,7 @@ convert_fxsr_env_from_i387(struct i387_f
 static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
 					  struct _fpstate __user *buf )
 {
+	struct _fpreg tmp[8]; /* 80 bytes scratch area */
 	unsigned long env[7];
 	struct _fpxreg *to;
 	struct _fpreg __user *from;
@@ -285,19 +288,20 @@ static int convert_fxsr_from_user( struc
 
 	if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
 		return 1;
+	if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8])))
+		return 1;
 
 	convert_fxsr_env_from_i387(fxsave, env);
 
 	to = (struct _fpxreg *) &fxsave->st_space[0];
-	from = &buf->_st[0];
+	from = tmp;
 	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
 		unsigned long *t = (unsigned long *)to;
 		unsigned long __user *f = (unsigned long __user *)from;
 
-		if (__get_user(*t, f) ||
-				__get_user(*(t + 1), f + 1) ||
-				__get_user(to->exponent, &from->exponent))
-			return 1;
+		*t = *f;
+		*(t + 1) = *(f + 1);
+		to->exponent = from->exponent;
 	}
 	return 0;
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/init_task.c	2017-01-13 08:40:14.000000000 -0500
@@ -5,6 +5,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -15,6 +16,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
@@ -27,7 +29,7 @@ EXPORT_SYMBOL(init_mm);
  */
 union thread_union init_thread_union 
 	__attribute__((__section__(".data.init_task"))) =
-		{ INIT_THREAD_INFO(init_task) };
+		{ INIT_THREAD_INFO(init_task, init_thread_union) };
 
 /*
  * Initial task structure.
@@ -40,7 +42,9 @@ EXPORT_SYMBOL(init_task);
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's.
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */ 
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
-
+struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS };
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/init_task-xen.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/init_task-xen.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/init_task-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/init_task-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -5,6 +5,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -21,6 +22,8 @@ struct mm_struct init_mm = INIT_MM(init_
 
 EXPORT_SYMBOL(init_mm);
 
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
+
 /*
  * Initial thread structure.
  *
@@ -30,7 +33,7 @@ EXPORT_SYMBOL(init_mm);
  */
 union thread_union init_thread_union 
 	__attribute__((__section__(".data.init_task"))) =
-		{ INIT_THREAD_INFO(init_task) };
+		{ INIT_THREAD_INFO(init_task, init_thread_union) };
 
 /*
  * Initial task structure.
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/ioport.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/ioport.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/ioport.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/ioport.c	2017-01-13 08:40:14.000000000 -0500
@@ -89,7 +89,7 @@ asmlinkage long sys_ioperm(unsigned long
 	 * because the ->io_bitmap_max value must match the bitmap
 	 * contents:
 	 */
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = init_tss + get_cpu();
 
 	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/irq.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/irq.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/irq.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/irq.c	2017-01-13 08:40:20.000000000 -0500
@@ -115,6 +115,8 @@ fastcall unsigned int do_IRQ(struct pt_r
 		/* build the stack frame on the IRQ stack */
 		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
 		irqctx->tinfo.task = curctx->tinfo.task;
+		irqctx->tinfo.real_stack = curctx->tinfo.real_stack;
+		irqctx->tinfo.virtual_stack = curctx->tinfo.virtual_stack;
 		irqctx->tinfo.previous_esp = current_stack_pointer;
 
 		/*
@@ -211,6 +213,8 @@ asmlinkage void do_softirq(void)
 		curctx = current_thread_info();
 		irqctx = softirq_ctx[smp_processor_id()];
 		irqctx->tinfo.task = curctx->task;
+		irqctx->tinfo.real_stack = curctx->real_stack;
+		irqctx->tinfo.virtual_stack = curctx->virtual_stack;
 		irqctx->tinfo.previous_esp = current_stack_pointer;
 
 		/* build the stack frame on the softirq stack */
@@ -292,6 +296,11 @@ skip:
 				per_cpu(irq_stat,j).apic_timer_irqs);
 		seq_putc(p, '\n');
 #endif
+		seq_printf(p, "RES: ");
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ",
+					__IRQ_STAT(j, __reschedule_count));
+		seq_putc(p, '\n');
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
 #if defined(CONFIG_X86_IO_APIC)
 		seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/kprobes.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/kprobes.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/kprobes.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/kprobes.c	2017-01-13 08:40:24.000000000 -0500
@@ -32,6 +32,7 @@
 #include <linux/ptrace.h>
 #include <linux/preempt.h>
 #include <linux/module.h>
+#include <linux/mm.h>
 #include <asm/cacheflush.h>
 #include <asm/kdebug.h>
 #include <asm/desc.h>
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/ldt.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/ldt.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/ldt.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/ldt.c	2017-01-13 08:40:25.000000000 -0500
@@ -2,7 +2,7 @@
  * linux/kernel/ldt.c
  *
  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 1999, 2003 Ingo Molnar <mingo@redhat.com>
  */
 
 #include <linux/errno.h>
@@ -10,15 +10,20 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
+#include <linux/module.h>
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/highmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
+#include <asm/atomic_kmap.h>
+
+#include <ub/ub_mem.h>
 
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
@@ -30,36 +35,35 @@ static void flush_ldt(void *null)
 
 static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 {
-	void *oldldt;
-	void *newldt;
-	int oldsize;
+	int oldsize, newsize, i;
 
 	if (mincount <= pc->size)
 		return 0;
+	/*
+	 * LDT got larger - reallocate if necessary.
+	 */
 	oldsize = pc->size;
 	mincount = (mincount+511)&(~511);
-	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
-	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-
-	if (!newldt)
-		return -ENOMEM;
-
-	if (oldsize)
-		memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
-	oldldt = pc->ldt;
-	memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
-	pc->ldt = newldt;
-	wmb();
+	newsize = mincount*LDT_ENTRY_SIZE;
+	for (i = 0; i < newsize; i += PAGE_SIZE) {
+		int nr = i/PAGE_SIZE;
+		BUG_ON(i >= 64*1024);
+		if (!pc->ldt_pages[nr]) {
+			pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER | __GFP_UBC);
+			if (!pc->ldt_pages[nr])
+				return -ENOMEM;
+			clear_highpage(pc->ldt_pages[nr]);
+		}
+	}
 	pc->size = mincount;
-	wmb();
-
 	if (reload) {
 #ifdef CONFIG_SMP
 		cpumask_t mask;
-		preempt_disable();
+
+		local_irq_disable();
 		load_LDT(pc);
+		local_irq_enable();
+		preempt_disable();
 		mask = cpumask_of_cpu(smp_processor_id());
 		if (!cpus_equal(current->mm->cpu_vm_mask, mask))
 			smp_call_function(flush_ldt, NULL, 1, 1);
@@ -68,24 +72,32 @@ static int alloc_ldt(mm_context_t *pc, i
 		load_LDT(pc);
 #endif
 	}
-	if (oldsize) {
-		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(oldldt);
-		else
-			kfree(oldldt);
-	}
 	return 0;
 }
 
 static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
 {
-	int err = alloc_ldt(new, old->size, 0);
-	if (err < 0)
+	int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE;
+
+	err = alloc_ldt(new, size, 0);
+	if (err < 0) {
+		new->size = 0;
 		return err;
-	memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+	}
+	for (i = 0; i < nr_pages; i++)
+		copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0);
 	return 0;
 }
 
+static void free_ldt(mm_context_t *mc)
+{
+	int i;
+
+	for (i = 0; i < MAX_LDT_PAGES; i++)
+		if (mc->ldt_pages[i])
+			__free_page(mc->ldt_pages[i]);
+}
+
 /*
  * we do not have to muck with descriptors here, that is
  * done in switch_mm() as needed.
@@ -97,34 +109,35 @@ int init_new_context(struct task_struct 
 
 	init_MUTEX(&mm->context.sem);
 	mm->context.size = 0;
+	memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES);
 	old_mm = current->mm;
 	if (old_mm && old_mm->context.size > 0) {
 		down(&old_mm->context.sem);
 		retval = copy_ldt(&mm->context, &old_mm->context);
+		if (retval < 0)
+			free_ldt(&mm->context);
 		up(&old_mm->context.sem);
 	}
 	return retval;
 }
+EXPORT_SYMBOL_GPL(init_new_context);
 
 /*
  * No need to lock the MM as we are the last user
+ * Do not touch the ldt register, we are already
+ * in the next thread.
  */
 void destroy_context(struct mm_struct *mm)
 {
-	if (mm->context.size) {
-		if (mm == current->active_mm)
-			clear_LDT();
-		if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(mm->context.ldt);
-		else
-			kfree(mm->context.ldt);
-		mm->context.size = 0;
-	}
+	/* we have to free *all* the pages, since alloc_ldt could allocate more pages,
+	   then we have according to size if ENOMEM happened on allocation */
+	free_ldt(&mm->context);
+	mm->context.size = 0;
 }
 
 static int read_ldt(void __user * ptr, unsigned long bytecount)
 {
-	int err;
+	int err, i;
 	unsigned long size;
 	struct mm_struct * mm = current->mm;
 
@@ -139,8 +152,25 @@ static int read_ldt(void __user * ptr, u
 		size = bytecount;
 
 	err = 0;
-	if (copy_to_user(ptr, mm->context.ldt, size))
-		err = -EFAULT;
+	/*
+	 * This is necessary just in case we got here straight from a
+	 * context-switch where the ptes were set but no tlb flush
+	 * was done yet. We rather avoid doing a TLB flush in the
+	 * context-switch path and do it here instead.
+	 */
+	__flush_tlb_global();
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		int nr = i / PAGE_SIZE, bytes;
+		char *kaddr = kmap(mm->context.ldt_pages[nr]);
+
+		bytes = size - i;
+		if (bytes > PAGE_SIZE)
+			bytes = PAGE_SIZE;
+		if (copy_to_user(ptr + i, kaddr, bytes))
+			err = -EFAULT;
+		kunmap(mm->context.ldt_pages[nr]);
+	}
 	up(&mm->context.sem);
 	if (err < 0)
 		goto error_return;
@@ -164,7 +194,7 @@ static int read_default_ldt(void __user 
 
 	err = 0;
 	address = &default_ldt[0];
-	size = 5*sizeof(struct desc_struct);
+	size = 5*LDT_ENTRY_SIZE;
 	if (size > bytecount)
 		size = bytecount;
 
@@ -206,6 +236,14 @@ static int write_ldt(void __user * ptr, 
 			goto out_unlock;
 	}
 
+	/*
+	 * No rescheduling allowed from this point to the install.
+	 *
+	 * We do a TLB flush for the same reason as in the read_ldt() path.
+	 */
+	preempt_disable();
+	__flush_tlb_global();
+
    	/* Allow LDTs to be cleared by the user. */
    	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
 		if (oldmode || LDT_empty(&ldt_info)) {
@@ -222,8 +260,10 @@ static int write_ldt(void __user * ptr, 
 
 	/* Install the new entry ...  */
 install:
-	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2);
+	write_ldt_entry((void *)__kmap_atomic_vaddr(KM_LDT_PAGE0),
+			ldt_info.entry_number, entry_1, entry_2);
 	error = 0;
+	preempt_enable();
 
 out_unlock:
 	up(&mm->context.sem);
@@ -251,3 +291,29 @@ asmlinkage int sys_modify_ldt(int func, 
 	}
 	return ret;
 }
+
+/*
+ * load one particular LDT into the current CPU
+ */
+void load_LDT_nolock(mm_context_t *pc, int cpu)
+{
+	struct page **pages = pc->ldt_pages;
+	int count = pc->size;
+	int nr_pages, i;
+
+	if (likely(!count)) {
+		pages = &default_ldt_page;
+		count = 5;
+	}
+       	nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE;
+
+	for (i = 0; i < nr_pages; i++) {
+		__kunmap_atomic_type(KM_LDT_PAGE0 - i);
+		__kmap_atomic(pages[i], KM_LDT_PAGE0 - i);
+	}
+	set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count);
+	load_LDT_desc();
+}
+
+EXPORT_SYMBOL(load_LDT_nolock);
+EXPORT_SYMBOL_GPL(default_ldt);
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/ldt-xen.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/ldt-xen.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/ldt-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/ldt-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -13,6 +13,7 @@
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -20,6 +21,8 @@
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 
+#include <ub/ub_mem.h>
+
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
 {
@@ -39,9 +42,9 @@ static int alloc_ldt(mm_context_t *pc, i
 	oldsize = pc->size;
 	mincount = (mincount+511)&(~511);
 	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+		newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
 	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+		newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
 
 	if (!newldt)
 		return -ENOMEM;
@@ -118,6 +121,7 @@ int init_new_context(struct task_struct 
 	}
 	return retval;
 }
+EXPORT_SYMBOL_GPL(init_new_context);
 
 /*
  * No need to lock the MM as we are the last user
@@ -268,3 +272,5 @@ asmlinkage int sys_modify_ldt(int func, 
 	}
 	return ret;
 }
+
+EXPORT_SYMBOL_GPL(default_ldt);
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/Makefile kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/Makefile
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/Makefile	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/Makefile	2017-01-13 08:40:14.000000000 -0500
@@ -8,7 +8,7 @@ obj-y	:= process.o semaphore.o signal.o 
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
 		pci-dma.o i386_ksyms.o i387.o bootflag.o \
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc.o \
-		perfctr-watchdog.o
+		perfctr-watchdog.o entry_trampoline.o
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/process.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/process.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/process.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/process.c	2017-01-13 08:40:41.000000000 -0500
@@ -38,6 +38,7 @@
 #include <linux/ptrace.h>
 #include <linux/random.h>
 #include <linux/personality.h>
+#include <linux/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -47,17 +48,21 @@
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/desc.h>
+#include <asm/atomic_kmap.h>
 #include <asm/vm86.h>
 #ifdef CONFIG_MATH_EMULATION
 #include <asm/math_emu.h>
 #endif
 
 #include <linux/err.h>
+#include <linux/utsrelease.h>
 
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void i386_ret_from_resume(void) __asm__("i386_ret_from_resume");
+EXPORT_SYMBOL_GPL(i386_ret_from_resume);
 
 static int hlt_counter;
 
@@ -298,18 +303,22 @@ __setup("idle=", idle_setup);
 void show_regs(struct pt_regs * regs)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+	extern int die_counter;
 
 	printk("\n");
-	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
-	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
-	print_symbol("EIP is at %s\n", regs->eip);
+	printk("Pid: %d, comm: %20s, oopses: %d\n",
+			current->pid, current->comm, die_counter);
+	printk("EIP: %04x:[<%08lx>] CPU: %d, VCPU: %d:%d\n",0xffff & regs->xcs,regs->eip, smp_processor_id(),
+			task_vsched_id(current), task_cpu(current));
+	if (decode_call_traces)
+		print_symbol("EIP is at %s\n", regs->eip);
 
 	if (user_mode_vm(regs))
 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
-	printk(" EFLAGS: %08lx    %s  (%s %.*s)\n",
-	       regs->eflags, print_tainted(), system_utsname.release,
-	       (int)strcspn(system_utsname.version, " "),
-	       system_utsname.version);
+	printk(" EFLAGS: %08lx    %s  (%s %.*s %s)\n",
+	       regs->eflags, print_tainted(), init_utsname()->release,
+	       (int)strcspn(init_utsname()->version, " "),
+	       init_utsname()->version, VZVERSION);
 	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
 		regs->eax,regs->ebx,regs->ecx,regs->edx);
 	printk("ESI: %08lx EDI: %08lx EBP: %08lx",
@@ -323,6 +332,8 @@ void show_regs(struct pt_regs * regs)
 	cr4 = read_cr4_safe();
 	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
 	show_trace(NULL, regs, &regs->esp);
+	if (!decode_call_traces)
+		printk(" EIP: [<%08lx>]\n",regs->eip);
 }
 
 /*
@@ -331,6 +342,7 @@ void show_regs(struct pt_regs * regs)
  * the "args".
  */
 extern void kernel_thread_helper(void);
+EXPORT_SYMBOL(kernel_thread_helper);
 
 /*
  * Create a kernel thread
@@ -340,6 +352,13 @@ int kernel_thread(int (*fn)(void *), voi
 	struct pt_regs regs;
 	int err;
 
+	/* Don't allow kernel_thread() inside VE */
+	if (!ve_allow_kthreads && !ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside container\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 
 	regs.ebx = (unsigned long) fn;
@@ -370,7 +389,7 @@ void exit_thread(void)
 		struct task_struct *tsk = current;
 		struct thread_struct *t = &tsk->thread;
 		int cpu = get_cpu();
-		struct tss_struct *tss = &per_cpu(init_tss, cpu);
+		struct tss_struct *tss = init_tss + cpu;
 
 		kfree(t->io_bitmap_ptr);
 		t->io_bitmap_ptr = NULL;
@@ -392,6 +411,9 @@ void flush_thread(void)
 	struct task_struct *tsk = current;
 
 	memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+#ifdef CONFIG_X86_HIGH_ENTRY
+	clear_thread_flag(TIF_DB7);
+#endif
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
 	clear_tsk_thread_flag(tsk, TIF_DEBUG);
 	/*
@@ -422,7 +444,7 @@ int copy_thread(int nr, unsigned long cl
 {
 	struct pt_regs * childregs;
 	struct task_struct *tsk;
-	int err;
+	int err, i;
 
 	childregs = task_pt_regs(p);
 	*childregs = *regs;
@@ -432,7 +454,18 @@ int copy_thread(int nr, unsigned long cl
 	p->thread.esp = (unsigned long) childregs;
 	p->thread.esp0 = (unsigned long) (childregs+1);
 
+	/*
+	 * get the two stack pages, for the virtual stack.
+	 *
+	 * IMPORTANT: this code relies on the fact that the task
+	 * structure is an THREAD_SIZE aligned piece of physical memory.
+	 */
+	for (i = 0; i < ARRAY_SIZE(p->thread.stack_page); i++)
+		p->thread.stack_page[i] =
+				virt_to_page((unsigned long)p->thread_info + (i*PAGE_SIZE));
+
 	p->thread.eip = (unsigned long) ret_from_fork;
+	p->thread_info->real_stack = p->thread_info;
 
 	savesegment(fs,p->thread.fs);
 	savesegment(gs,p->thread.gs);
@@ -647,7 +680,7 @@ struct task_struct fastcall * __switch_t
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
 	int cpu = smp_processor_id();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct tss_struct *tss = init_tss + cpu;
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
@@ -660,10 +693,42 @@ struct task_struct fastcall * __switch_t
 	if (next_p->fpu_counter > 5)
 		prefetch(&next->i387.fxsave);
 
+#ifdef CONFIG_X86_HIGH_ENTRY
+{
+	int i;
+	/*
+	 * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is
+	 * needed because otherwise NMIs could interrupt the
+	 * user-return code with a virtual stack and stale TLBs.)
+	 */
+	for (i = 0; i < ARRAY_SIZE(next->stack_page); i++) {
+		__kunmap_atomic_type(KM_VSTACK_TOP-i);
+		__kmap_atomic(next->stack_page[i], KM_VSTACK_TOP-i);
+	}
+	/*
+	 * NOTE: here we rely on the task being the stack as well
+	 */
+	next_p->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK_TOP);
+}
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+	/*
+	 * If next was preempted on entry from userspace to kernel,
+	 * and now it's on a different cpu, we need to adjust %esp.
+	 * This assumes that entry.S does not copy %esp while on the
+	 * virtual stack (with interrupts enabled): which is so,
+	 * except within __SWITCH_TO_KERNELSPACE itself.
+	 */
+	if (unlikely(next->esp >= TASK_SIZE)) {
+		next->esp &= THREAD_SIZE - 1;
+		next->esp |= (unsigned long) next_p->thread_info->virtual_stack;
+	}
+#endif
+#endif
+
 	/*
 	 * Reload esp0.
 	 */
-	load_esp0(tss, next);
+	load_virtual_esp0(tss, next_p);
 
 	/*
 	 * Save away %fs and %gs. No need to save %es and %ds, as
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/process-xen.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/process-xen.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/process-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/process-xen.c	2017-01-13 08:40:41.000000000 -0500
@@ -37,6 +37,7 @@
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
 #include <linux/random.h>
+#include <linux/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -61,6 +62,8 @@
 #include <asm/cpu.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void i386_ret_from_resume(void) __asm__("i386_ret_from_resume");
+EXPORT_SYMBOL_GPL(i386_ret_from_resume);
 
 static int hlt_counter;
 
@@ -203,18 +206,20 @@ void mwait_idle_with_hints(unsigned long
 void show_regs(struct pt_regs * regs)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+	extern int die_counter;
 
 	printk("\n");
-	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
-	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
-	print_symbol("EIP is at %s\n", regs->eip);
+	printk("Pid: %d, comm: %20s, oopses: %d\n",
+				current->pid, current->comm, die_counter);
+	printk("EIP: %04x:[<%08lx>] CPU: %d, VCPU: %d:%d\n",0xffff & regs->xcs,regs->eip, smp_processor_id(),
+			task_vsched_id(current), task_cpu(current));
 
 	if (user_mode_vm(regs))
 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
 	printk(" EFLAGS: %08lx    %s  (%s %.*s)\n",
-	       regs->eflags, print_tainted(), system_utsname.release,
-	       (int)strcspn(system_utsname.version, " "),
-	       system_utsname.version);
+		regs->eflags, print_tainted(), init_utsname()->release,
+		(int)strcspn(init_utsname()->version, " "),
+		init_utsname()->version);
 	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
 		regs->eax,regs->ebx,regs->ecx,regs->edx);
 	printk("ESI: %08lx EDI: %08lx EBP: %08lx",
@@ -236,6 +241,7 @@ void show_regs(struct pt_regs * regs)
  * the "args".
  */
 extern void kernel_thread_helper(void);
+EXPORT_SYMBOL(kernel_thread_helper);
 __asm__(".section .text\n"
 	".align 4\n"
 	"kernel_thread_helper:\n\t"
@@ -253,6 +259,13 @@ int kernel_thread(int (*fn)(void *), voi
 {
 	struct pt_regs regs;
 
+	/* Don't allow kernel_thread() inside VE */
+	if (!ve_allow_kthreads && !ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside container\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 
 	regs.ebx = (unsigned long) fn;
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/ptrace.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/ptrace.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/ptrace.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/ptrace.c	2017-01-13 08:40:25.000000000 -0500
@@ -19,6 +19,7 @@
 #include <linux/seccomp.h>
 #include <linux/signal.h>
 #include <linux/module.h>
+#include <linux/highmem.h>
 #include <linux/elf.h>
 
 #include <asm/tracehook.h>
@@ -31,7 +32,10 @@
 #include <asm/ldt.h>
 #include <asm/desc.h>
 #include <asm/tracehook.h>
-
+#include <asm/tlbflush.h>
+#ifdef CONFIG_VE
+#include <asm/unistd.h>
+#endif
 
 /*
  * Determines which flags the user has access to [1 = access, 0 = no access].
@@ -133,10 +137,29 @@ static int putreg(struct task_struct *ch
 	return 0;
 }
 
+
+#ifdef CONFIG_VE
+static inline unsigned long ptrace_hack_child_pid(struct task_struct *child, unsigned long regno, unsigned long value)
+{
+	struct pt_regs *regs = task_pt_regs(child);
+
+	if ((offsetof(struct pt_regs, eax) == regno) && 
+		ve_is_super(get_exec_env()) &&
+		!ve_is_super(child->ve_task_info.owner_env) && 
+		((regs->orig_eax == __NR_fork) || 
+		(regs->orig_eax == __NR_vfork) || 
+		(regs->orig_eax == __NR_clone)) && 
+		((long)value > 0))
+			value = vpid_to_pid_ve(value, child->ve_task_info.owner_env);
+	return value;
+}
+#endif
+
 static unsigned long getreg(struct task_struct *child,
 	unsigned long regno)
 {
 	unsigned long retval = ~0UL;
+	unsigned long orig_regno = regno;
 
 	switch (regno >> 2) {
 	case FS:
@@ -165,6 +188,9 @@ static unsigned long getreg(struct task_
 			regno -= 2*4;
 		regno = regno - sizeof(struct pt_regs);
 		retval &= get_stack_long(child, regno);
+#ifdef CONFIG_VE
+		retval = ptrace_hack_child_pid(child, orig_regno, retval);
+#endif
 	}
 	return retval;
 }
@@ -198,15 +224,27 @@ static unsigned long convert_eip_to_line
 		if (unlikely((seg >> 3) >= child->mm->context.size))
 			addr = -1L; /* bogus selector, access would fault */
 		else {
-			desc = child->mm->context.ldt + seg;
+#ifndef CONFIG_XEN 
+			/* horrible hack for 4/4 disabled kernels.
+			   I'm not quite sure what the TLB flush is good for,
+			   it's mindlessly copied from the read_ldt code */
+			__flush_tlb_global();
+			desc = kmap(current->mm->context.ldt_pages[(seg&~7UL)/PAGE_SIZE]);
+			desc = (void *)desc + ((seg & ~7UL) % PAGE_SIZE);
+#else
+			desc = child->mm->context.ldt + (seg & ~7UL);
+#endif
 			base = ((desc[0] >> 16) |
 				((desc[1] & 0xff) << 16) |
 				(desc[1] & 0xff000000));
 
-		/* 16-bit code segment? */
-		if (!((desc[1] >> 22) & 1))
-			addr &= 0xffff;
-		addr += base;
+			/* 16-bit code segment? */
+			if (!((desc[1] >> 22) & 1))
+				addr &= 0xffff;
+			addr += base;
+#ifndef CONFIG_XEN
+			kunmap((void *)((unsigned long)desc & PAGE_MASK));
+#endif
 		}
 		up(&child->mm->context.sem);
 	}
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/reboot.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/reboot.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/reboot.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/reboot.c	2017-01-13 08:40:14.000000000 -0500
@@ -219,12 +219,11 @@ void machine_real_restart(unsigned char 
 	CMOS_WRITE(0x00, 0x8f);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	/* Remap the kernel at virtual address zero, as well as offset zero
-	   from the kernel segment.  This assumes the kernel segment starts at
-	   virtual address PAGE_OFFSET. */
-
-	memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
-		sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
+	/*
+	 * Remap the first 16 MB of RAM (which includes the kernel image)
+	 * at virtual address zero:
+	 */
+	setup_identity_mappings(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE);
 
 	/*
 	 * Use `swapper_pg_dir' as our page directory.
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/reboot_fixups.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/reboot_fixups.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/reboot_fixups.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/reboot_fixups.c	2017-01-13 08:40:15.000000000 -0500
@@ -11,6 +11,7 @@
 #include <asm/delay.h>
 #include <linux/pci.h>
 #include <linux/reboot_fixups.h>
+#include <linux/interrupt.h>
 
 static void cs5530a_warm_reset(struct pci_dev *dev)
 {
@@ -43,6 +44,11 @@ void mach_reboot_fixups(void)
 	struct pci_dev *dev;
 	int i;
 
+	/* we can be called from sysrq-B code. In such a case it is
+	 * prohibited to dig PCI */
+	if (in_interrupt())
+		return;
+
 	for (i=0; i < ARRAY_SIZE(fixups_table); i++) {
 		cur = &(fixups_table[i]);
 		dev = pci_get_device(cur->vendor, cur->device, NULL);
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/setup.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/setup.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/setup.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/setup.c	2017-01-13 08:40:14.000000000 -0500
@@ -551,6 +551,9 @@ void __init add_memory_region(unsigned l
 			      unsigned long long size, int type)
 {
 	int x;
+#ifndef CONFIG_X86_4G
+	static int sillymemwarning = 0;
+#endif
 
 	if (!efi_enabled) {
        		x = e820.nr_map;
@@ -560,6 +563,19 @@ void __init add_memory_region(unsigned l
 		    return;
 		}
 
+#ifndef CONFIG_X86_4G
+		/*
+		 * For kernels without 4G/4G split, printk a note
+		 * pointing at the hugemem kernel from 8Gb onwards:
+		 */
+		if (start + size >= 0x200000000ULL && !sillymemwarning++) {
+			printk("**********************************************************\n");
+			printk("* This system has more than 8 Gigabyte of memory.        *\n");
+			printk("* It is recommended to install enterprise kernel version *\n");
+			printk("**********************************************************\n");
+		}
+#endif
+
 		e820.map[x].addr = start;
 		e820.map[x].size = size;
 		e820.map[x].type = type;
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/signal.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/signal.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/signal.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/signal.c	2017-01-13 08:40:15.000000000 -0500
@@ -99,28 +99,29 @@ sys_sigaltstack(unsigned long ebx)
  */
 
 static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
+restore_sigcontext(struct pt_regs *regs,
+		struct sigcontext __user *__sc, int *peax)
 {
-	unsigned int err = 0;
+	struct sigcontext scratch; /* 88 bytes of scratch area */
 
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-#define COPY(x)		err |= __get_user(regs->x, &sc->x)
+	if (copy_from_user(&scratch, __sc, sizeof(scratch)))
+		return -EFAULT;
+
+#define COPY(x)		regs->x = scratch.x
 
 #define COPY_SEG(seg)							\
-	{ unsigned short tmp;						\
-	  err |= __get_user(tmp, &sc->seg);				\
+	{ unsigned short tmp = scratch.seg;				\
 	  regs->x##seg = tmp; }
 
 #define COPY_SEG_STRICT(seg)						\
-	{ unsigned short tmp;						\
-	  err |= __get_user(tmp, &sc->seg);				\
+	{ unsigned short tmp = scratch.seg;				\
 	  regs->x##seg = tmp|3; }
 
 #define GET_SEG(seg)							\
-	{ unsigned short tmp;						\
-	  err |= __get_user(tmp, &sc->seg);				\
+	{ unsigned short tmp = scratch.seg;				\
 	  loadsegment(seg,tmp); }
 
 #define	FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_RF |		 \
@@ -144,19 +145,18 @@ restore_sigcontext(struct pt_regs *regs,
 	COPY_SEG_STRICT(ss);
 	
 	{
-		unsigned int tmpflags;
-		err |= __get_user(tmpflags, &sc->eflags);
+		unsigned int tmpflags = scratch.eflags;
 		regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
 		regs->orig_eax = -1;		/* disable syscall checks */
 	}
 
 	{
-		struct _fpstate __user * buf;
-		err |= __get_user(buf, &sc->fpstate);
+		struct _fpstate * buf = scratch.fpstate;
 		if (buf) {
 			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
-				goto badframe;
-			err |= restore_i387(buf);
+				return -EFAULT;
+			if (restore_i387(buf))
+				return -EFAULT;
 		} else {
 			struct task_struct *me = current;
 			if (used_math()) {
@@ -166,11 +166,8 @@ restore_sigcontext(struct pt_regs *regs,
 		}
 	}
 
-	err |= __get_user(*peax, &sc->eax);
-	return err;
-
-badframe:
-	return 1;
+	*peax = scratch.eax;
+	return 0;
 }
 
 asmlinkage int sys_sigreturn(unsigned long __unused)
@@ -239,46 +236,49 @@ badframe:
  */
 
 static int
-setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
+setup_sigcontext(struct sigcontext __user *__sc, struct _fpstate __user *fpstate,
 		 struct pt_regs *regs, unsigned long mask)
 {
-	int tmp, err = 0;
+	struct sigcontext sc; /* 88 bytes of scratch area */
+	int tmp;
 
 	tmp = 0;
 	savesegment(gs, tmp);
-	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+	*(unsigned int *)&sc.gs = tmp;
 	savesegment(fs, tmp);
-	err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
+	*(unsigned int *)&sc.fs = tmp;
 
-	err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
-	err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
-	err |= __put_user(regs->edi, &sc->edi);
-	err |= __put_user(regs->esi, &sc->esi);
-	err |= __put_user(regs->ebp, &sc->ebp);
-	err |= __put_user(regs->esp, &sc->esp);
-	err |= __put_user(regs->ebx, &sc->ebx);
-	err |= __put_user(regs->edx, &sc->edx);
-	err |= __put_user(regs->ecx, &sc->ecx);
-	err |= __put_user(regs->eax, &sc->eax);
-	err |= __put_user(current->thread.trap_no, &sc->trapno);
-	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user(regs->eip, &sc->eip);
-	err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs);
-	err |= __put_user(regs->eflags, &sc->eflags);
-	err |= __put_user(regs->esp, &sc->esp_at_signal);
-	err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss);
+	*(unsigned int *)&sc.es = regs->xes;
+	*(unsigned int *)&sc.ds = regs->xds;
+	sc.edi = regs->edi;
+	sc.esi = regs->esi;
+	sc.ebp = regs->ebp;
+	sc.esp = regs->esp;
+	sc.ebx = regs->ebx;
+	sc.edx = regs->edx;
+	sc.ecx = regs->ecx;
+	sc.eax = regs->eax;
+	sc.trapno = current->thread.trap_no;
+	sc.err = current->thread.error_code;
+	sc.eip = regs->eip;
+	*(unsigned int *)&sc.cs = regs->xcs;
+	sc.eflags = regs->eflags;
+	sc.esp_at_signal = regs->esp;
+	*(unsigned int *)&sc.ss = regs->xss;
 
 	tmp = save_i387(fpstate);
 	if (tmp < 0)
-	  err = 1;
-	else
-	  err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+		return 1;
+
+	sc.fpstate = tmp ? fpstate : NULL;
 
 	/* non-iBCS2 extensions.. */
-	err |= __put_user(mask, &sc->oldmask);
-	err |= __put_user(current->thread.cr2, &sc->cr2);
+	sc.oldmask = mask;
+	sc.cr2 = current->thread.cr2;
 
-	return err;
+	if (copy_to_user(__sc, &sc, sizeof(sc)))
+		return 1;
+	return 0;
 }
 
 /*
@@ -426,7 +426,7 @@ static int setup_rt_frame(int sig, struc
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(current->sas_ss_sp, (unsigned long *)&frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->esp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
@@ -570,6 +570,9 @@ static void fastcall do_signal(struct pt
 	if (!user_mode(regs))
 		return;
 
+	if (try_to_freeze() && !signal_pending(current))
+ 		goto no_signal;
+
 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
 		oldset = &current->saved_sigmask;
 	else
@@ -598,6 +601,7 @@ static void fastcall do_signal(struct pt
 		return;
 	}
 
+no_signal:
 	/* Did we come from a system call? */
 	if (regs->orig_eax >= 0) {
 		/* Restart the system call - no handlers present */
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/smpboot.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/smpboot.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/smpboot.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/smpboot.c	2017-01-13 08:40:19.000000000 -0500
@@ -321,6 +321,10 @@ static void __init synchronize_tsc_bp(vo
 	}
 	if (!buggy)
 		printk("passed.\n");
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+#endif
 }
 
 static void __cpuinit synchronize_tsc_ap(void)
@@ -348,6 +352,10 @@ static void __cpuinit synchronize_tsc_ap
 		while (atomic_read(&tsc.count_stop) != num_booting_cpus())
 			cpu_relax();
 	}
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+#endif
 }
 #undef NR_LOOPS
 
@@ -961,6 +969,13 @@ static int __cpuinit do_boot_cpu(int api
 	if (IS_ERR(idle))
 		panic("failed fork for CPU %d", cpu);
 	idle->thread.eip = (unsigned long) start_secondary;
+
+#ifdef CONFIG_VE
+	/* Cosmetic: sleep_time won't be changed afterwards for the idle
+	* thread;  keep it 0 rather than -cycles. */
+	VE_TASK_INFO(idle)->sleep_time = 0;
+#endif
+
 	/* start_eip had better be page-aligned! */
 	start_eip = setup_trampoline();
 
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/smp.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/smp.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/smp.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/smp.c	2017-01-13 08:40:24.000000000 -0500
@@ -341,10 +341,12 @@ fastcall void smp_invalidate_interrupt(s
 		 
 	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
 		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
 			if (flush_va == FLUSH_ALL)
 				local_flush_tlb();
 			else
 				__flush_tlb_one(flush_va);
+#endif
 		} else
 			leave_mm(cpu);
 	}
@@ -416,21 +418,6 @@ static void flush_tlb_others(cpumask_t c
 	spin_unlock(&tlbstate_lock);
 }
 	
-void flush_tlb_current_task(void)
-{
-	struct mm_struct *mm = current->mm;
-	cpumask_t cpu_mask;
-
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
-
-	local_flush_tlb();
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-	preempt_enable();
-}
-
 void flush_tlb_mm (struct mm_struct * mm)
 {
 	cpumask_t cpu_mask;
@@ -451,6 +438,8 @@ void flush_tlb_mm (struct mm_struct * mm
 	preempt_enable();
 }
 
+EXPORT_SYMBOL(flush_tlb_mm);
+
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -462,7 +451,10 @@ void flush_tlb_page(struct vm_area_struc
 
 	if (current->active_mm == mm) {
 		if(current->mm)
-			__flush_tlb_one(va);
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
+			__flush_tlb_one(va)
+#endif
+				;
 		 else
 		 	leave_mm(smp_processor_id());
 	}
@@ -713,6 +705,7 @@ void smp_send_stop(void)
 fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
+	__IRQ_STAT(smp_processor_id(), __reschedule_count)++;
 }
 
 fastcall void smp_call_function_interrupt(struct pt_regs *regs)
@@ -741,3 +734,7 @@ fastcall void smp_call_function_interrup
 	}
 }
 
+void send_nmi_ipi_allbutself(void)
+{
+	send_IPI_allbutself(NMI_VECTOR);
+}
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/smp-xen.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/smp-xen.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/smp-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/smp-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -21,6 +21,7 @@
 #include <linux/cpu.h>
 #include <linux/module.h>
 
+#include <asm/nmi.h>
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
 #include <asm/desc.h>
@@ -412,6 +413,7 @@ void flush_tlb_mm (struct mm_struct * mm
 
 	preempt_enable();
 }
+EXPORT_SYMBOL(flush_tlb_mm);
 
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
@@ -459,6 +461,7 @@ void flush_tlb_current_task(void)
 { xen_tlb_flush_mask(&current->mm->cpu_vm_mask); }
 void flush_tlb_mm(struct mm_struct * mm)
 { xen_tlb_flush_mask(&mm->cpu_vm_mask); }
+EXPORT_SYMBOL(flush_tlb_mm);
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 { xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
 EXPORT_SYMBOL(flush_tlb_page);
@@ -687,6 +690,7 @@ void smp_send_stop(void)
 irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
 				     struct pt_regs *regs)
 {
+	__IRQ_STAT(smp_processor_id(), __reschedule_count)++;
 
 	return IRQ_HANDLED;
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/syscall_table.S kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/syscall_table.S
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/syscall_table.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/syscall_table.S	2017-01-13 08:40:40.000000000 -0500
@@ -326,23 +326,44 @@ ENTRY(sys_call_table)
 	.long sys_vmsplice
 	.long sys_move_pages
 	.long sys_getcpu
-	.long sys_ni_syscall		/* sys_epoll_pwait */
-	.long sys_ni_syscall		/* 320 */ /* sys_utimensat */
-	.long sys_ni_syscall		/* sys_signalfd */
+	.long sys_epoll_pwait
+	.long sys_utimensat		/* 320 */
+	.long sys_signalfd
 	.long sys_ni_syscall		/* sys_timerfd_create */
 	.long sys_eventfd		/* sys_eventfd */
 	.long sys_fallocate
 	.long sys_ni_syscall		/* 325 */
 	.long sys_ni_syscall
+	.long sys_signalfd4
 	.long sys_ni_syscall
+	.long sys_epoll_create1
+	.long sys_dup3			/* 330 */
+	.long sys_pipe2
+	.long sys_inotify_init1
+	.long sys_preadv
+	.long sys_pwritev
+	.long sys_ni_syscall		/* 335 */
 	.long sys_ni_syscall
+	.long sys_recvmmsg
+	.long sys_prlimit64
+	.rept 500-(.-sys_call_table)/4
+		.long sys_ni_syscall
+	.endr
+	.long sys_fairsched_mknod	/* 500 */
+	.long sys_fairsched_rmnod
+	.long sys_fairsched_chwt
+	.long sys_fairsched_mvpr
+	.long sys_fairsched_rate
+	.long sys_fairsched_vcpus	/* 505 */
+	.long sys_fairsched_cpumask
 	.long sys_ni_syscall
-	.long sys_ni_syscall		/* 330 */
 	.long sys_ni_syscall
 	.long sys_ni_syscall
+	.long sys_getluid		/* 510 */
+	.long sys_setluid
+	.long sys_setublimit
+	.long sys_ubstat
 	.long sys_ni_syscall
 	.long sys_ni_syscall
-	.long sys_ni_syscall		/* 335 */
-	.long sys_ni_syscall
-	.long sys_recvmmsg
-	.long sys_prlimit64
+	.long sys_lchmod		/* 516 */
+	.long sys_lutime
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/sysenter.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/sysenter.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/sysenter.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/sysenter.c	2017-01-13 08:40:40.000000000 -0500
@@ -17,6 +17,8 @@
 #include <linux/elf.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -31,7 +33,7 @@
  * Should the kernel map a VDSO page into processes and pass its
  * address down to glibc upon exec()?
  */
-unsigned int __read_mostly vdso_enabled = 1;
+int __read_mostly vdso_enabled = 1;
 
 EXPORT_SYMBOL_GPL(vdso_enabled);
 
@@ -50,7 +52,11 @@ void enable_sep_cpu(void)
 {
 #ifndef CONFIG_X86_NO_TSS
 	int cpu = get_cpu();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+#ifdef CONFIG_X86_HIGH_ENTRY
+	struct tss_struct *tss = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu;
+#else
+	struct tss_struct *tss = init_tss + cpu;
+#endif
 
 	if (!boot_cpu_has(X86_FEATURE_SEP)) {
 		put_cpu();
@@ -73,10 +79,12 @@ void enable_sep_cpu(void)
 extern const char vsyscall_int80_start, vsyscall_int80_end;
 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
 static struct page *syscall_pages[1];
+void *syscall_page;
+EXPORT_SYMBOL_GPL(syscall_page);
 
 int __cpuinit sysenter_setup(void)
 {
-	void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+	syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
 	syscall_pages[0] = virt_to_page(syscall_page);
 
 #ifdef CONFIG_XEN
@@ -112,16 +120,23 @@ int __cpuinit sysenter_setup(void)
 
 /* Defined in vsyscall-sysenter.S */
 extern void SYSENTER_RETURN;
+EXPORT_SYMBOL_GPL(SYSENTER_RETURN);
 
 /* Setup a VMA at program startup for the vsyscall page */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp,
+				 unsigned long map_address)
 {
 	struct mm_struct *mm = current->mm;
-	unsigned long addr;
+	unsigned long addr = map_address;
 	int ret;
 
+	if (unlikely(!vdso_enabled) && map_address == 0) {
+		current->mm->context.vdso = NULL;
+		return 0;
+	}
+
 	down_write(&mm->mmap_sem);
-	addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
+	addr = get_unmapped_area_prot(NULL, addr, PAGE_SIZE, 0, 0, 1);
 	if (IS_ERR_VALUE(addr)) {
 		ret = addr;
 		goto up_fail;
@@ -150,6 +165,7 @@ up_fail:
 	up_write(&mm->mmap_sem);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(arch_setup_additional_pages);
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/sys_i386.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/sys_i386.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/sys_i386.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/sys_i386.c	2017-01-13 08:40:15.000000000 -0500
@@ -168,7 +168,7 @@ asmlinkage int sys_uname(struct old_utsn
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
@@ -184,16 +184,21 @@ asmlinkage int sys_olduname(struct oldol
   
   	down_read(&uts_sem);
 	
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
-	error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
-	error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
-	error |= __put_user(0,name->release+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
-	error |= __put_user(0,name->version+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
-	error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
+			       __OLD_UTS_LEN);
+	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->release, &utsname()->release,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->release + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->version, &utsname()->version,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->version + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->machine, &utsname()->machine,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
 	
 	up_read(&uts_sem);
 	
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/traps.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/traps.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/traps.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/traps.c	2017-01-13 08:40:40.000000000 -0500
@@ -55,16 +55,15 @@
 #include <asm/stacktrace.h>
 
 #include <linux/module.h>
+#include <linux/utsrelease.h>
 
 #include "mach_traps.h"
 
 int panic_on_unrecovered_nmi;
 int panic_on_io_nmi;
 
-asmlinkage int system_call(void);
-
-struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
-		{ 0, 0 }, { 0, 0 } };
+struct desc_struct default_ldt[] __attribute__((__section__(".data.default_ldt"))) = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } };
+struct page *default_ldt_page;
 
 /* Do we ignore FPU interrupts ? */
 char ignore_fpu_irq = 0;
@@ -218,7 +217,8 @@ static int print_trace_stack(void *data,
 static void print_trace_address(void *data, unsigned long addr)
 {
 	printk("%s [<%08lx>] ", (char *)data, addr);
-	print_symbol("%s\n", addr);
+	if (decode_call_traces)
+		print_symbol("%s\n", addr);
 	touch_nmi_watchdog();
 }
 
@@ -234,7 +234,10 @@ show_trace_log_lvl(struct task_struct *t
 		   unsigned long * stack, char *log_lvl)
 {
 	dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
-	printk("%s =======================\n", log_lvl);
+	if (decode_call_traces)
+		printk("%s =======================\n", log_lvl);
+	else
+		printk("%s =<ctx>=", log_lvl);
 }
 
 void show_trace(struct task_struct *task, struct pt_regs *regs,
@@ -264,8 +267,13 @@ static void show_stack_log_lvl(struct ta
 			printk("\n%s       ", log_lvl);
 		printk("%08lx ", *stack++);
 	}
-	printk("\n%sCall Trace:\n", log_lvl);
+	if (decode_call_traces)
+		printk("\n%s Call Trace:\n", log_lvl);
+	else
+		printk("\n%s Call Trace: ", log_lvl);
 	show_trace_log_lvl(task, regs, esp, log_lvl);
+	if (!decode_call_traces)
+		printk("\n");
 }
 
 void show_stack(struct task_struct *task, unsigned long *esp)
@@ -274,6 +282,8 @@ void show_stack(struct task_struct *task
 	show_stack_log_lvl(task, NULL, esp, "");
 }
 
+EXPORT_SYMBOL(show_stack);
+
 /*
  * The architecture-independent dump_stack generator
  */
@@ -282,6 +292,8 @@ void dump_stack(void)
 	unsigned long stack;
 
 	show_trace(current, NULL, &stack);
+	if (!decode_call_traces)
+		printk("\n");
 }
 
 EXPORT_SYMBOL(dump_stack);
@@ -301,12 +313,13 @@ void show_registers(struct pt_regs *regs
 		ss = regs->xss & 0xffff;
 	}
 	print_modules();
-	printk(KERN_EMERG "CPU:    %d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
-			"EFLAGS: %08lx   (%s %.*s) \n",
-		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
-		print_tainted(), regs->eflags, system_utsname.release,
-		(int)strcspn(system_utsname.version, " "),
-		system_utsname.version);
+	printk(KERN_EMERG "CPU:    %d, VCPU: %d.%d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
+			"EFLAGS: %08lx   (%s %.*s %s) \n",
+		smp_processor_id(), task_vsched_id(current), task_cpu(current),
+		0xffff & regs->xcs, regs->eip,
+		print_tainted(), regs->eflags, init_utsname()->release,
+		(int)strcspn(init_utsname()->version, " "),
+		init_utsname()->version, VZVERSION);
 	print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
 	printk(KERN_EMERG "eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
 		regs->eax, regs->ebx, regs->ecx, regs->edx);
@@ -314,8 +327,9 @@ void show_registers(struct pt_regs *regs
 		regs->esi, regs->edi, regs->ebp, esp);
 	printk(KERN_EMERG "ds: %04x   es: %04x   ss: %04x\n",
 		regs->xds & 0xffff, regs->xes & 0xffff, ss);
-	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
+	printk(KERN_EMERG "Process %.*s (pid: %d, veid: %d, ti=%p task=%p task.ti=%p)",
 		TASK_COMM_LEN, current->comm, current->pid,
+		VEID(VE_TASK_INFO(current)->owner_env),
 		current_thread_info(), current, current->thread_info);
 	/*
 	 * When in-kernel, we also print out the stack and code at the
@@ -333,7 +347,7 @@ void show_registers(struct pt_regs *regs
 		for (i = 0; i < 64; i++, eip++) {
 			unsigned char c;
 
-			if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
+			if (eip < (u8 __user *)PAGE_OFFSET || __direct_get_user(c, eip)) {
 				printk(" Bad EIP value.");
 				break;
 			}
@@ -351,9 +365,7 @@ static void handle_BUG(struct pt_regs *r
 	unsigned long eip = regs->eip;
 	unsigned short ud2;
 
-	if (eip < PAGE_OFFSET)
-		return;
-	if (__get_user(ud2, (unsigned short __user *)eip))
+	if (__direct_get_user(ud2, (unsigned short __user *)eip))
 		return;
 	if (ud2 != 0x0b0f)
 		return;
@@ -366,10 +378,10 @@ static void handle_BUG(struct pt_regs *r
 		char *file;
 		char c;
 
-		if (__get_user(line, (unsigned short __user *)(eip + 2)))
+		if (__direct_get_user(line, (unsigned short __user *)(eip + 4)))
 			break;
-		if (__get_user(file, (char * __user *)(eip + 4)) ||
-		    (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
+		if (__direct_get_user(file, (char * __user *)(eip + 7)) ||
+		    __direct_get_user(c, file))
 			file = "<bad filename>";
 
 		printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line);
@@ -379,6 +391,15 @@ static void handle_BUG(struct pt_regs *r
 	printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n");
 }
 
+int die_counter = 0;
+
+static void inline check_kernel_csum_bug(void)
+{
+	if (kernel_text_csum_broken)
+		printk("Kernel code checksum mismatch detected %d times\n",
+				kernel_text_csum_broken);
+}
+
 /* This is gone through when something in the kernel
  * has done something bad and is about to be terminated.
 */
@@ -393,7 +414,6 @@ void die(const char * str, struct pt_reg
 		.lock_owner =		-1,
 		.lock_owner_depth =	0
 	};
-	static int die_counter;
 	unsigned long flags;
 
 	oops_enter();
@@ -456,6 +476,7 @@ void die(const char * str, struct pt_reg
   	} else
 		printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
 
+	check_kernel_csum_bug();
 	bust_spinlocks(0);
 	die.lock_owner = -1;
 	spin_unlock_irqrestore(&die.lock, flags);
@@ -661,7 +682,7 @@ fastcall void __kprobes do_general_prote
 					      long error_code)
 {
 	int cpu = get_cpu();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct tss_struct *tss = init_tss+ cpu;
 	struct thread_struct *thread = &current->thread;
 	int ok;
 
@@ -797,6 +818,9 @@ void die_nmi (struct pt_regs *regs, cons
 	printk(" on CPU%d, eip %08lx, registers:\n",
 		smp_processor_id(), regs->eip);
 	show_registers(regs);
+	nmi_show_regs(regs, 1);
+	if (!decode_call_traces)
+		show_registers(regs);
 	console_silent();
 
 	/* If we are in kernel we are probably nested up pretty bad
@@ -831,7 +855,8 @@ static void default_do_nmi(struct pt_reg
 		 * Ok, so this is none of the documented NMI sources,
 		 * so it must be the NMI watchdog.
 		 */
-		if (nmi_watchdog_tick(regs))
+		if (nmi_watchdog_tick(regs) +
+				do_nmi_show_regs(regs, cpu))
 			return;
 
 		if (!do_nmi_callback(regs, cpu))
@@ -936,10 +961,18 @@ fastcall void __kprobes do_debug(struct 
 	if (regs->eflags & X86_EFLAGS_IF)
 		local_irq_enable();
 
-	/* Mask out spurious debug traps due to lazy DR7 setting */
+	/*
+	 * Mask out spurious debug traps due to lazy DR7 setting or
+	 * due to 4G/4G kernel mode:
+	 */
 	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
 		if (!tsk->thread.debugreg[7])
 			goto clear_dr7;
+		if (!user_mode(regs)) {
+			// restore upon return-to-userspace:
+			set_thread_flag(TIF_DB7);
+			goto clear_dr7;
+		}
 	}
 
 	if (regs->eflags & VM_MASK)
@@ -1145,7 +1178,11 @@ fastcall void setup_x86_bogus_stack(unsi
 	switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
 	regs = (struct pt_regs *)stk;
 	/* now the switch32 on 16bit stack */
-	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+#ifdef CONFIG_X86_HIGH_ENTRY
+	stack_bot = fix_to_virt(FIX_16BIT_STACK_0) + cpu * CPU_16BIT_STACK_SIZE;
+#else
+	stack_bot = (unsigned long)&cpu_16bit_stack[cpu];
+#endif
 	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
 	switch32_ptr = (unsigned long *)(stack_top - 8);
 	iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
@@ -1166,7 +1203,11 @@ fastcall unsigned char * fixup_x86_bogus
 	unsigned long stack_top, stack_bot;
 	int len;
 	int cpu = smp_processor_id();
-	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+#ifdef CONFIG_X86_HIGH_ENTRY
+	stack_bot = fix_to_virt(FIX_16BIT_STACK_0) + cpu * CPU_16BIT_STACK_SIZE;
+#else
+	stack_bot = (unsigned long)&cpu_16bit_stack[cpu];
+#endif
 	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
 	switch32_ptr = (unsigned long *)(stack_top - 8);
 	/* copy the data from 16bit stack to 32bit stack */
@@ -1213,20 +1254,51 @@ asmlinkage void math_emulate(long arg)
 
 #endif /* CONFIG_MATH_EMULATION */
 
-#ifdef CONFIG_X86_F00F_BUG
-void __init trap_init_f00f_bug(void)
+void __init trap_init_virtual_IDT(void)
 {
-	__set_fixmap(FIX_F00F_IDT, __pa_symbol(&idt_table), PAGE_KERNEL_RO);
-
 	/*
-	 * Update the IDT descriptor and reload the IDT so that
-	 * it uses the read-only mapped virtual address.
+	 * "idt" is magic - it overlaps the idt_descr
+	 * variable so that updating idt will automatically
+	 * update the idt descriptor..
 	 */
-	idt_descr.address = fix_to_virt(FIX_F00F_IDT);
+	__set_fixmap(FIX_IDT, __pa_symbol(&idt_table), PAGE_KERNEL_RO);
+	idt_descr.address = __fix_to_virt(FIX_IDT);
 	load_idt(&idt_descr);
 }
+
+void __init trap_init_virtual_GDT(void)
+{
+	int cpu = smp_processor_id();
+	struct Xgt_desc_struct *gdt_desc = &per_cpu(cpu_gdt_descr, cpu);
+	struct tss_struct *t;
+
+#ifdef CONFIG_X86_HIGH_ENTRY
+	if (!cpu) {
+		int i;
+		__set_fixmap(FIX_GDT_0, __pa_symbol(cpu_gdt_table), PAGE_KERNEL);
+		__set_fixmap(FIX_GDT_1, __pa_symbol(cpu_gdt_table) + PAGE_SIZE, PAGE_KERNEL);
+		/* due to IO_BITMAP_BITS each tss takes 4 pages */
+		for(i = 0; i < FIX_TSS_COUNT; i++)
+			__set_fixmap(FIX_TSS_0 - i,
+				__pa_symbol(init_tss) + i * PAGE_SIZE, PAGE_KERNEL);
+	}
+
+	gdt_desc->address = __fix_to_virt(FIX_GDT_0) + sizeof(cpu_gdt_table[0]) * cpu;
+#else
+	gdt_desc->address = (unsigned long)cpu_gdt_table[cpu];
 #endif
+	load_gdt(gdt_desc);
 
+#ifdef CONFIG_X86_HIGH_ENTRY
+	t = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu;
+#else
+	t = init_tss + cpu;
+#endif
+	set_tss_desc(cpu, t);
+	cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
+	load_TR_desc();
+}
+ 
 #define _set_gate(gate_addr,type,dpl,addr,seg) \
 do { \
   int __d0, __d1; \
@@ -1240,6 +1312,12 @@ do { \
 	 "3" ((char *) (addr)),"2" ((seg) << 16)); \
 } while (0)
 
+#ifdef CONFIG_X86_HIGH_ENTRY
+#define CHECK_ISR(x) BUG_ON((unsigned long)(x) < 0xff000000)
+#else
+#define CHECK_ISR(x)
+#endif
+
 
 /*
  * This needs to use 'idt_table' rather than 'idt', and
@@ -1249,6 +1327,7 @@ do { \
  */
 void set_intr_gate(unsigned int n, void *addr)
 {
+	CHECK_ISR(addr);
 	_set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
 }
 
@@ -1257,16 +1336,19 @@ void set_intr_gate(unsigned int n, void 
  */
 static inline void set_system_intr_gate(unsigned int n, void *addr)
 {
+	CHECK_ISR(addr);
 	_set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS);
 }
 
 static void __init set_trap_gate(unsigned int n, void *addr)
 {
+	CHECK_ISR(addr);
 	_set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
 }
 
 static void __init set_system_gate(unsigned int n, void *addr)
 {
+	CHECK_ISR(addr);
 	_set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
 }
 
@@ -1289,6 +1371,7 @@ void __init trap_init(void)
 #ifdef CONFIG_X86_LOCAL_APIC
 	init_apic_mappings();
 #endif
+	init_entry_mappings();
 
 	set_trap_gate(0,&divide_error);
 	set_intr_gate(1,&debug);
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/traps-xen.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/traps-xen.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/traps-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/traps-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -266,7 +266,7 @@ static void show_stack_log_lvl(struct ta
 			printk("\n%s       ", log_lvl);
 		printk("%08lx ", *stack++);
 	}
-	printk("\n%sCall Trace:\n", log_lvl);
+	printk("\n%s Call Trace:\n", log_lvl);
 	show_trace_log_lvl(task, regs, esp, log_lvl);
 }
 
@@ -276,6 +276,8 @@ void show_stack(struct task_struct *task
 	show_stack_log_lvl(task, NULL, esp, "");
 }
 
+EXPORT_SYMBOL(show_stack);
+
 /*
  * The architecture-independent dump_stack generator
  */
@@ -303,12 +305,13 @@ void show_registers(struct pt_regs *regs
 		ss = regs->xss & 0xffff;
 	}
 	print_modules();
-	printk(KERN_EMERG "CPU:    %d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
+	printk(KERN_EMERG "CPU:    %d, VCPU: %d.%d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
 			"EFLAGS: %08lx   (%s %.*s) \n",
-		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
-		print_tainted(), regs->eflags, system_utsname.release,
-		(int)strcspn(system_utsname.version, " "),
-		system_utsname.version);
+		smp_processor_id(), task_vsched_id(current), task_cpu(current),
+		0xffff & regs->xcs, regs->eip,
+		print_tainted(), regs->eflags, init_utsname()->release,
+		(int)strcspn(init_utsname()->version, " "),
+		init_utsname()->version);
 	print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
 	printk(KERN_EMERG "eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
 		regs->eax, regs->ebx, regs->ecx, regs->edx);
@@ -316,8 +319,9 @@ void show_registers(struct pt_regs *regs
 		regs->esi, regs->edi, regs->ebp, esp);
 	printk(KERN_EMERG "ds: %04x   es: %04x   ss: %04x\n",
 		regs->xds & 0xffff, regs->xes & 0xffff, ss);
-	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
+	printk(KERN_EMERG "Process %.*s (pid: %d, veid: %d, ti=%p task=%p task.ti=%p)",
 		TASK_COMM_LEN, current->comm, current->pid,
+		VEID(VE_TASK_INFO(current)->owner_env),
 		current_thread_info(), current, current->thread_info);
 	/*
 	 * When in-kernel, we also print out the stack and code at the
@@ -367,9 +371,9 @@ static void handle_BUG(struct pt_regs *r
 		char *file;
 		char c;
 
-		if (__get_user(line, (unsigned short __user *)(eip + 2)))
+		if (__get_user(line, (unsigned short __user *)(eip + 4)))
 			break;
-		if (__get_user(file, (char * __user *)(eip + 4)) ||
+		if (__get_user(file, (char * __user *)(eip + 7)) ||
 		    (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
 			file = "<bad filename>";
 
@@ -380,9 +384,11 @@ static void handle_BUG(struct pt_regs *r
 	printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n");
 }
 
+int die_counter = 0;
+
 /* This is gone through when something in the kernel
  * has done something bad and is about to be terminated.
-*/
+ */
 void die(const char * str, struct pt_regs * regs, long err)
 {
 	static struct {
@@ -394,7 +400,6 @@ void die(const char * str, struct pt_reg
 		.lock_owner =		-1,
 		.lock_owner_depth =	0
 	};
-	static int die_counter;
 	unsigned long flags;
 
 	oops_enter();
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/vm86.c kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/vm86.c
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/vm86.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/vm86.c	2017-01-13 08:40:14.000000000 -0500
@@ -125,11 +125,11 @@ struct pt_regs * fastcall save_v86_state
 	}
 
 #ifndef CONFIG_X86_NO_TSS
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = __get_cpu_tss(get_cpu());
 #endif
 	current->thread.esp0 = current->thread.saved_esp0;
 	current->thread.sysenter_cs = __KERNEL_CS;
-	load_esp0(tss, &current->thread);
+	load_virtual_esp0(tss, current);
 	current->thread.saved_esp0 = 0;
 #ifndef CONFIG_X86_NO_TSS
 	put_cpu();
@@ -305,12 +305,13 @@ static void do_sys_vm86(struct kernel_vm
 	savesegment(gs, tsk->thread.saved_gs);
 
 #ifndef CONFIG_X86_NO_TSS
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = __get_cpu_tss(get_cpu());
 #endif
 	tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
 	if (cpu_has_sep)
 		tsk->thread.sysenter_cs = 0;
-	load_esp0(tss, &tsk->thread);
+
+	load_virtual_esp0(tss, tsk);
 #ifndef CONFIG_X86_NO_TSS
 	put_cpu();
 #endif
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/vmlinux.lds.S kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/vmlinux.lds.S
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/vmlinux.lds.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/vmlinux.lds.S	2017-01-13 08:40:14.000000000 -0500
@@ -10,6 +10,9 @@
 #include <asm/cache.h>
 #include <asm/boot.h>
 
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+
 OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH(i386)
 ENTRY(phys_startup_32)
@@ -41,12 +44,32 @@ SECTIONS
 	*(.text)
 	SCHED_TEXT
 	LOCK_TEXT
+#ifndef CONFIG_X86_4G
 	KPROBES_TEXT
+	*(.entry.text)
+#endif
 	*(.fixup)
 	*(.gnu.warning)
-	_etext = .;		/* End of text section */
   } :text = 0x9090
 
+#ifdef CONFIG_X86_4G
+  . = ALIGN(PAGE_SIZE_asm);
+  __entry_tramp_start = .;
+  . = FIX_ENTRY_TRAMPOLINE_0_addr;
+  __start___entry_text = .;
+  .entry.text : AT (__entry_tramp_start - LOAD_OFFSET) {
+	*(.entry.text)
+	/* some of ISRs are placed in .kprobes.text section,
+	   need to move them to .entry.text for 4GB split */
+	KPROBES_TEXT
+  }
+  __end___entry_text = .;
+  __entry_tramp_end = __entry_tramp_start + SIZEOF(.entry.text);
+  . = __entry_tramp_end;
+#endif
+  . = ALIGN(PAGE_SIZE_asm);
+  _etext = .;		/* End of text section */
+
   . = ALIGN(16);		/* Exception table */
   __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
 	__start___ex_table = .;
@@ -54,8 +77,36 @@ SECTIONS
 	__stop___ex_table = .;
   }
 
+  . = ALIGN(PAGE_SIZE_asm);
+ 
   RODATA
 
+  . = ALIGN(PAGE_SIZE_asm);
+  .data.page_aligned_tss : AT(ADDR(.data.page_aligned_tss) - LOAD_OFFSET) {
+	*(.data.tss)
+  }
+
+  . = ALIGN(PAGE_SIZE_asm);
+  .data.page_aligned_default_ldt : AT(ADDR(.data.page_aligned_default_ldt) - LOAD_OFFSET) {
+	*(.data.default_ldt)
+  }
+
+  . = ALIGN(PAGE_SIZE_asm);
+  .data.page_aligned_idt : AT(ADDR(.data.page_aligned_idt) - LOAD_OFFSET) {
+	*(.data.idt)
+  }
+
+  . = ALIGN(PAGE_SIZE_asm);
+  .data.page_aligned_gdt : AT(ADDR(.data.page_aligned_gdt) - LOAD_OFFSET) {
+	*(.data.gdt)
+  }
+
+  . = ALIGN(PAGE_SIZE_asm);
+  .data.page_aligned_stk16 : AT(ADDR(.data.page_aligned_stk16) - LOAD_OFFSET) {
+	*(.data.stk16)
+  }
+
+  . = ALIGN(PAGE_SIZE_asm);
   . = ALIGN(4);
   .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
 	__tracedata_start = .;
@@ -72,19 +123,14 @@ SECTIONS
 	CONSTRUCTORS
   } :data
 
-  . = ALIGN(4096);
+  . = ALIGN(PAGE_SIZE_asm);
   .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
 	__nosave_begin = .;
 	*(.data.nosave)
-	. = ALIGN(4096);
+        . = ALIGN(PAGE_SIZE_asm);
 	__nosave_end = .;
   }
 
-  . = ALIGN(4096);
-  .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-	*(.data.idt)
-  }
-
   . = ALIGN(32);
   .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
 	*(.data.cacheline_aligned)
@@ -112,7 +158,7 @@ SECTIONS
   }
 
   /* might get freed after init */
-  . = ALIGN(4096);
+  . = ALIGN(PAGE_SIZE_asm);
   .smp_alternatives : AT(ADDR(.smp_alternatives) - LOAD_OFFSET) {
 	__smp_alt_begin = .;
 	__smp_alt_instructions = .;
@@ -176,7 +222,7 @@ SECTIONS
      from .altinstructions and .eh_frame */
   .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
   .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
-  . = ALIGN(4096);
+  . = ALIGN(PAGE_SIZE_asm);
   .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
 	__initramfs_start = .;
 	*(.init.ramfs)
@@ -188,7 +234,7 @@ SECTIONS
 	*(.data.percpu)
 	__per_cpu_end = .;
   }
-  . = ALIGN(4096);
+  . = ALIGN(PAGE_SIZE_asm);
   /* freed after init ends here */
 	
   .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
@@ -200,7 +246,7 @@ SECTIONS
 	__bss_stop = .;
   	_end = . ;
 	/* This is where the kernel creates the early boot page tables */
-	. = ALIGN(4096);
+        . = ALIGN(PAGE_SIZE_asm);
 	pg0 = . ;
   }
 
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/vsyscall-sigreturn.S kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/vsyscall-sigreturn.S
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/vsyscall-sigreturn.S	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/vsyscall-sigreturn.S	2017-01-13 08:40:26.000000000 -0500
@@ -15,7 +15,7 @@
 */
 
 	.text
-	.org __kernel_vsyscall+32,0x90
+	.org __kernel_vsyscall+0x100,0x90
 	.globl __kernel_sigreturn
 	.type __kernel_sigreturn,@function
 __kernel_sigreturn:
@@ -27,6 +27,7 @@ __kernel_sigreturn:
 	.size __kernel_sigreturn,.-.LSTART_sigreturn
 
 	.balign 32
+	.org __kernel_vsyscall+0x200,0x90
 	.globl __kernel_rt_sigreturn
 	.type __kernel_rt_sigreturn,@function
 __kernel_rt_sigreturn:
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/kernel/vsyscall-sysenter.S kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/vsyscall-sysenter.S
--- kernel-2.6.18-417.el5.orig/arch/i386/kernel/vsyscall-sysenter.S	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/kernel/vsyscall-sysenter.S	2017-01-13 08:40:26.000000000 -0500
@@ -30,6 +30,11 @@
 	.type __kernel_vsyscall,@function
 __kernel_vsyscall:
 .LSTART_vsyscall:
+	cmpl $192, %eax
+	jne 1f
+	int $0x80
+	ret
+1:
 	push %ecx
 .Lpush_ecx:
 	push %edx
@@ -39,12 +44,12 @@ __kernel_vsyscall:
 	movl %esp,%ebp
 	sysenter
 
-	/* 7: align return point with nop's to make disassembly easier */
-	.space 7,0x90
+	/* 17: align return point with nop's to make disassembly easier */
+	.space 13,0x90
 
-	/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
+	/* 30: System call restart point is here! (SYSENTER_RETURN-2) */
 	jmp .Lenter_kernel
-	/* 16: System call normal return point is here! */
+	/* 32: System call normal return point is here! */
 	.globl SYSENTER_RETURN	/* Symbol used by sysenter.c  */
 SYSENTER_RETURN:
 	pop %ebp
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/lib/checksum.S kernel-2.6.18-417.el5-028stab121/arch/i386/lib/checksum.S
--- kernel-2.6.18-417.el5.orig/arch/i386/lib/checksum.S	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/lib/checksum.S	2017-01-13 08:40:14.000000000 -0500
@@ -279,14 +279,14 @@ unsigned int csum_partial_copy_generic (
 	.previous
 
 .align 4
-.globl csum_partial_copy_generic
+.globl direct_csum_partial_copy_generic
 				
 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
 
 #define ARGBASE 16		
 #define FP		12
 		
-csum_partial_copy_generic:
+direct_csum_partial_copy_generic:
 	subl  $4,%esp	
 	pushl %edi
 	pushl %esi
@@ -421,7 +421,7 @@ DST(	movb %cl, (%edi)	)
 
 #define ARGBASE 12
 		
-csum_partial_copy_generic:
+direct_csum_partial_copy_generic:
 	pushl %ebx
 	pushl %edi
 	pushl %esi
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/lib/cpuid-on-cpu.c kernel-2.6.18-417.el5-028stab121/arch/i386/lib/cpuid-on-cpu.c
--- kernel-2.6.18-417.el5.orig/arch/i386/lib/cpuid-on-cpu.c	2017-01-13 08:40:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/lib/cpuid-on-cpu.c	2017-01-13 08:40:15.000000000 -0500
@@ -0,0 +1,73 @@
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+
+struct cpuid_info {
+	unsigned int cpu;
+	u32 op;
+	u32 eax, ebx, ecx, edx;
+};
+
+static void __cpuid_on_cpu(void *info)
+{
+	struct cpuid_info *rv = info;
+
+	if (smp_processor_id() == rv->cpu)
+		cpuid(rv->op, &rv->eax, &rv->ebx, &rv->ecx, &rv->edx);
+}
+
+void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+	preempt_disable();
+	if (smp_processor_id() == cpu)
+		cpuid(op, eax, ebx, ecx, edx);
+	else {
+		struct cpuid_info rv;
+
+		rv.cpu = cpu;
+		rv.op = op;
+		smp_call_function(__cpuid_on_cpu, &rv, 0, 1);
+		*eax = rv.eax;
+		*ebx = rv.ebx;
+		*ecx = rv.ecx;
+		*edx = rv.edx;
+	}
+	preempt_enable();
+}
+
+struct cpuid_eax_info {
+	unsigned int cpu;
+	u32 op;
+	u32 eax;
+};
+
+static void __cpuid_eax_on_cpu(void *info)
+{
+	struct cpuid_info *rv = info;
+
+	if (smp_processor_id() == rv->cpu)
+		rv->eax = cpuid_eax(rv->op);
+}
+
+u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op)
+{
+	u32 ret;
+
+	preempt_disable();
+	if (smp_processor_id() == cpu)
+		ret = cpuid_eax(op);
+	else {
+		struct cpuid_eax_info rv;
+
+		rv.cpu = cpu;
+		rv.op = op;
+		smp_call_function(__cpuid_eax_on_cpu, &rv, 0, 1);
+		ret = rv.eax;
+	}
+	preempt_enable();
+	return ret;
+}
+
+EXPORT_SYMBOL(cpuid_on_cpu);
+EXPORT_SYMBOL(cpuid_eax_on_cpu);
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/lib/getuser.S kernel-2.6.18-417.el5-028stab121/arch/i386/lib/getuser.S
--- kernel-2.6.18-417.el5.orig/arch/i386/lib/getuser.S	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/lib/getuser.S	2017-01-13 08:40:14.000000000 -0500
@@ -9,6 +9,7 @@
  * return value.
  */
 #include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
 
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/lib/Makefile kernel-2.6.18-417.el5-028stab121/arch/i386/lib/Makefile
--- kernel-2.6.18-417.el5.orig/arch/i386/lib/Makefile	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/lib/Makefile	2017-01-13 08:40:15.000000000 -0500
@@ -9,5 +9,5 @@ lib-y = checksum.o delay.o usercopy.o ge
 lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
 
 ifndef CONFIG_XEN
-obj-$(CONFIG_SMP)	+= msr-on-cpu.o
+obj-$(CONFIG_SMP) += cpuid-on-cpu.o msr-on-cpu.o
 endif
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/lib/usercopy.c kernel-2.6.18-417.el5-028stab121/arch/i386/lib/usercopy.c
--- kernel-2.6.18-417.el5.orig/arch/i386/lib/usercopy.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/lib/usercopy.c	2017-01-13 08:40:14.000000000 -0500
@@ -8,7 +8,6 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/blkdev.h>
-#include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/mmx.h>
 
@@ -77,13 +76,13 @@ do {									   \
  * and returns @count.
  */
 long
-__strncpy_from_user(char *dst, const char __user *src, long count)
+__direct_strncpy_from_user(char *dst, const char __user *src, long count)
 {
 	long res;
 	__do_strncpy_from_user(dst, src, count, res);
 	return res;
 }
-EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(__direct_strncpy_from_user);
 
 /**
  * strncpy_from_user: - Copy a NUL terminated string from userspace.
@@ -104,14 +103,14 @@ EXPORT_SYMBOL(__strncpy_from_user);
  * and returns @count.
  */
 long
-strncpy_from_user(char *dst, const char __user *src, long count)
+direct_strncpy_from_user(char *dst, const char __user *src, long count)
 {
 	long res = -EFAULT;
 	if (access_ok(VERIFY_READ, src, 1))
 		__do_strncpy_from_user(dst, src, count, res);
 	return res;
 }
-EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(direct_strncpy_from_user);
 
 /*
  * Zero Userspace
@@ -150,14 +149,14 @@ do {									\
  * On success, this will be zero.
  */
 unsigned long
-clear_user(void __user *to, unsigned long n)
+direct_clear_user(void __user *to, unsigned long n)
 {
 	might_sleep();
 	if (access_ok(VERIFY_WRITE, to, n))
 		__do_clear_user(to, n);
 	return n;
 }
-EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(direct_clear_user);
 
 /**
  * __clear_user: - Zero a block of memory in user space, with less checking.
@@ -171,12 +170,12 @@ EXPORT_SYMBOL(clear_user);
  * On success, this will be zero.
  */
 unsigned long
-__clear_user(void __user *to, unsigned long n)
+__direct_clear_user(void __user *to, unsigned long n)
 {
 	__do_clear_user(to, n);
 	return n;
 }
-EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__direct_clear_user);
 
 /**
  * strlen_user: - Get the size of a string in user space.
@@ -189,7 +188,7 @@ EXPORT_SYMBOL(__clear_user);
  * On exception, returns 0.
  * If the string is too long, returns a value greater than @n.
  */
-long strnlen_user(const char __user *s, long n)
+long direct_strnlen_user(const char __user *s, long n)
 {
 	unsigned long mask = -__addr_ok(s);
 	unsigned long res, tmp;
@@ -220,7 +219,7 @@ long strnlen_user(const char __user *s, 
 		:"cc");
 	return res & mask;
 }
-EXPORT_SYMBOL(strnlen_user);
+EXPORT_SYMBOL(direct_strnlen_user);
 
 #ifdef CONFIG_X86_INTEL_USERCOPY
 static unsigned long
@@ -826,54 +825,3 @@ unsigned long __copy_from_user_ll_nocach
 #endif
 	return n;
 }
-
-/**
- * copy_to_user: - Copy a block of data into user space.
- * @to:   Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n:    Number of bytes to copy.
- *
- * Context: User context only.  This function may sleep.
- *
- * Copy data from kernel space to user space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-unsigned long
-copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-	BUG_ON((long) n < 0);
-	if (access_ok(VERIFY_WRITE, to, n))
-		n = __copy_to_user(to, from, n);
-	return n;
-}
-EXPORT_SYMBOL(copy_to_user);
-
-/**
- * copy_from_user: - Copy a block of data from user space.
- * @to:   Destination address, in kernel space.
- * @from: Source address, in user space.
- * @n:    Number of bytes to copy.
- *
- * Context: User context only.  This function may sleep.
- *
- * Copy data from user space to kernel space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- *
- * If some data could not be copied, this function will pad the copied
- * data to the requested size using zero bytes.
- */
-unsigned long
-copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-	BUG_ON((long) n < 0);
-	if (access_ok(VERIFY_READ, from, n))
-		n = __copy_from_user(to, from, n);
-	else
-		memset(to, 0, n);
-	return n;
-}
-EXPORT_SYMBOL(copy_from_user);
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/math-emu/fpu_system.h kernel-2.6.18-417.el5-028stab121/arch/i386/math-emu/fpu_system.h
--- kernel-2.6.18-417.el5.orig/arch/i386/math-emu/fpu_system.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/math-emu/fpu_system.h	2017-01-13 08:40:14.000000000 -0500
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <asm/atomic_kmap.h>
 
 /* This sets the pointer FPU_info to point to the argument part
    of the stack frame of math_emulate() */
@@ -22,7 +23,7 @@
 
 /* s is always from a cpu register, and the cpu does bounds checking
  * during register load --> no further bounds checks needed */
-#define LDT_DESCRIPTOR(s)	(((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
+#define LDT_DESCRIPTOR(s)	(((struct desc_struct *)__kmap_atomic_vaddr(KM_LDT_PAGE0))[(s) >> 3])
 #define SEG_D_SIZE(x)		((x).b & (3 << 21))
 #define SEG_G_BIT(x)		((x).b & (1 << 23))
 #define SEG_GRANULARITY(x)	(((x).b & (1 << 23)) ? 4096 : 1)
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/fault.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/fault.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/fault.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/fault.c	2017-01-13 08:40:17.000000000 -0500
@@ -28,6 +28,7 @@
 #include <asm/uaccess.h>
 #include <asm/desc.h>
 #include <asm/kdebug.h>
+#include <asm/tlbflush.h>
 
 extern void die(const char *,struct pt_regs *,long);
 
@@ -66,32 +67,6 @@ static inline int notify_page_fault(enum
 
 
 /*
- * Unlock any spinlocks which will prevent us from getting the
- * message out 
- */
-void bust_spinlocks(int yes)
-{
-	int loglevel_save = console_loglevel;
-
-	if (yes) {
-		oops_in_progress = 1;
-		return;
-	}
-#ifdef CONFIG_VT
-	unblank_screen();
-#endif
-	oops_in_progress = 0;
-	/*
-	 * OK, the message is on the console.  Now we call printk()
-	 * without oops_in_progress set so that printk will give klogd
-	 * a poke.  Hold onto your hats...
-	 */
-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
-	printk(" ");
-	console_loglevel = loglevel_save;
-}
-
-/*
  * Return EIP plus the CS segment base.  The segment limit is also
  * adjusted, clamped to the kernel/user address space (whichever is
  * appropriate), and returned in *eip_limit.
@@ -142,8 +117,17 @@ static inline unsigned long get_segment_
 	if (seg & (1<<2)) {
 		/* Must lock the LDT while reading it. */
 		down(&current->mm->context.sem);
+#if 1
+		/* horrible hack for 4/4 disabled kernels.
+		   I'm not quite sure what the TLB flush is good for,
+		   it's mindlessly copied from the read_ldt code */
+		__flush_tlb_global();
+		desc = kmap(current->mm->context.ldt_pages[(seg&~7)/PAGE_SIZE]);
+		desc = (void *)desc + ((seg & ~7) % PAGE_SIZE);
+#else
 		desc = current->mm->context.ldt;
 		desc = (void *)desc + (seg & ~7);
+#endif
 	} else {
 		/* Must disable preemption while reading the GDT. */
  		desc = (u32 *)get_cpu_gdt_table(get_cpu());
@@ -154,6 +138,9 @@ static inline unsigned long get_segment_
 	base = get_desc_base((unsigned long *)desc);
 
 	if (seg & (1<<2)) { 
+#if 1
+		kunmap((void *)((unsigned long)desc & PAGE_MASK));
+#endif
 		up(&current->mm->context.sem);
 	} else
 		put_cpu();
@@ -172,12 +159,16 @@ static inline unsigned long get_segment_
  */
 static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
 { 
+	mm_segment_t oldfs;
 	unsigned long limit;
 	unsigned long instr = get_segment_eip (regs, &limit);
 	int scan_more = 1;
 	int prefetch = 0; 
 	int i;
 
+	oldfs = get_fs();
+	if ((regs->xcs & 0xffff) == __KERNEL_CS)
+		set_fs(KERNEL_DS);
 	for (i = 0; scan_more && i < 15; i++) { 
 		unsigned char opcode;
 		unsigned char instr_hi;
@@ -222,6 +213,7 @@ static int __is_prefetch(struct pt_regs 
 			break;
 		} 
 	}
+	set_fs(oldfs);
 	return prefetch;
 }
 
@@ -346,6 +338,23 @@ static inline void __do_page_fault(struc
 	 * (error_code & 4) == 0, and that the fault was not a
 	 * protection error (error_code & 9) == 0.
 	 */
+#ifdef CONFIG_X86_4G
+	/*
+	 * On 4/4 all kernels faults are either bugs, vmalloc or prefetch
+	 */
+	/* If it's vm86 fall through */
+	if (unlikely(!(regs->eflags & VM_MASK) && ((regs->xcs & 3) == 0))) {
+		if (error_code & 3)
+			goto kernel_pgf;
+		if (vmalloc_fault(address) >= 0)
+			return;
+kernel_pgf:
+		if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+						SIGSEGV) == NOTIFY_STOP)
+			return;
+		goto bad_area_nosemaphore;
+	}
+#else
 	if (unlikely(address >= TASK_SIZE)) {
 		if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
 			return;
@@ -358,7 +367,7 @@ static inline void __do_page_fault(struc
 		 */
 		goto bad_area_nosemaphore;
 	}
-
+#endif
 	if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
 					SIGSEGV) == NOTIFY_STOP)
 		return;
@@ -444,7 +453,6 @@ good_area:
 				goto bad_area;
 	}
 
- survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -588,14 +596,14 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (tsk->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
+	if (error_code & 4) {
+		/* 
+		 * 0-order allocation always success if something really 
+		 * fatal not happen: beancounter overdraft or OOM.
+		 */
+		force_sig(SIGKILL, tsk);
+		return;
 	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
-		do_exit(SIGKILL);
 	goto no_context;
 
 do_sigbus:
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/fault-xen.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/fault-xen.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/fault-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/fault-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -65,32 +65,6 @@ static inline int notify_page_fault(enum
 #endif
 
 /*
- * Unlock any spinlocks which will prevent us from getting the
- * message out 
- */
-void bust_spinlocks(int yes)
-{
-	int loglevel_save = console_loglevel;
-
-	if (yes) {
-		oops_in_progress = 1;
-		return;
-	}
-#ifdef CONFIG_VT
-	unblank_screen();
-#endif
-	oops_in_progress = 0;
-	/*
-	 * OK, the message is on the console.  Now we call printk()
-	 * without oops_in_progress set so that printk will give klogd
-	 * a poke.  Hold onto your hats...
-	 */
-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
-	printk(" ");
-	console_loglevel = loglevel_save;
-}
-
-/*
  * Return EIP plus the CS segment base.  The segment limit is also
  * adjusted, clamped to the kernel/user address space (whichever is
  * appropriate), and returned in *eip_limit.
@@ -563,7 +537,6 @@ good_area:
 				goto bad_area;
 	}
 
- survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -690,14 +663,14 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (tsk->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
-		do_exit(SIGKILL);
+	if (error_code & 4) {
+		/*
+		 * 0-order allocation always success if something really
+		 * fatal not happen: beancounter overdraft or OOM.
+		 */
+		force_sig(SIGKILL, tsk);
+		return;
+        }
 	goto no_context;
 
 do_sigbus:
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/highmem.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/highmem.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/highmem.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/highmem.c	2017-01-13 08:40:14.000000000 -0500
@@ -42,12 +42,45 @@ void *kmap_atomic(struct page *page, enu
 	if (!pte_none(*(kmap_pte-idx)))
 		BUG();
 #endif
-	set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+	/*
+	 * If the page is not a normal RAM page, then map it
+	 * uncached to be on the safe side - it could be device
+	 * memory that must not be prefetched:
+	 */
+	if (PageReserved(page))
+		set_pte(kmap_pte-idx, mk_pte(page, kmap_prot_nocache));
+	else
+		set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
 	__flush_tlb_one(vaddr);
 
 	return (void*) vaddr;
 }
 
+/*
+ * page frame number based kmaps - useful for PCI mappings.
+ * NOTE: we map the page with the same mapping as what user is using.
+ */
+void *kmap_atomic_pte(pte_t *pte, enum km_type type)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	inc_preempt_count();
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	if (!pte_none(*(kmap_pte-idx)))
+		BUG();
+#endif
+	set_pte(kmap_pte-idx, *pte);
+	__flush_tlb_one(vaddr);
+
+	return (void*) vaddr;
+}
+
+
 void kunmap_atomic(void *kvaddr, enum km_type type)
 {
 #ifdef CONFIG_DEBUG_HIGHMEM
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/hugetlbpage.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/hugetlbpage.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/hugetlbpage.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/hugetlbpage.c	2017-01-13 08:40:24.000000000 -0500
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/sysctl.h>
+#include <linux/module.h>
 #include <asm/mman.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
@@ -210,6 +211,7 @@ int pmd_huge(pmd_t pmd)
 {
 	return !!(pmd_val(pmd) & _PAGE_PSE);
 }
+EXPORT_SYMBOL(pmd_huge);
 
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/hypervisor.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/hypervisor.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/hypervisor.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/hypervisor.c	2017-01-13 08:40:40.000000000 -0500
@@ -213,6 +213,7 @@ void xen_set_ldt(unsigned long ptr, unsi
 	op.arg2.nr_ents     = len;
 	BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
+EXPORT_SYMBOL(xen_set_ldt);
 
 /* Protected by balloon_lock. */
 #define MAX_CONTIG_ORDER 9 /* 2MB */
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/init.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/init.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/init.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/init.c	2017-01-13 08:40:16.000000000 -0500
@@ -42,154 +42,20 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/desc.h>
 
-unsigned int __VMALLOC_RESERVE = 128 << 20;
+unsigned int __VMALLOC_RESERVE = 
+#ifdef CONFIG_X86_4G
+			(256 << 20);
+#else
+			(128 << 20);
+#endif
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
 
 static int noinline do_test_wp_bit(void);
 
-/*
- * Creates a middle page table and puts a pointer to it in the
- * given global directory entry. This only returns the gd entry
- * in non-PAE compilation mode, since the middle layer is folded.
- */
-static pmd_t * __init one_md_table_init(pgd_t *pgd)
-{
-	pud_t *pud;
-	pmd_t *pmd_table;
-		
-#ifdef CONFIG_X86_PAE
-	pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-	set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-	pud = pud_offset(pgd, 0);
-	if (pmd_table != pmd_offset(pud, 0)) 
-		BUG();
-#else
-	pud = pud_offset(pgd, 0);
-	pmd_table = pmd_offset(pud, 0);
-#endif
-
-	return pmd_table;
-}
-
-/*
- * Create a page table and place a pointer to it in a middle page
- * directory entry.
- */
-static pte_t * __init one_page_table_init(pmd_t *pmd)
-{
-	if (pmd_none(*pmd)) {
-		pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
-		if (page_table != pte_offset_kernel(pmd, 0))
-			BUG();	
-
-		return page_table;
-	}
-	
-	return pte_offset_kernel(pmd, 0);
-}
-
-/*
- * This function initializes a certain range of kernel virtual memory 
- * with new bootmem page tables, everywhere page tables are missing in
- * the given range.
- */
-
-/*
- * NOTE: The pagetables are allocated contiguous on the physical space 
- * so we can cache the place of the first one and move around without 
- * checking the pgd every time.
- */
-static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	int pgd_idx, pmd_idx;
-	unsigned long vaddr;
-
-	vaddr = start;
-	pgd_idx = pgd_index(vaddr);
-	pmd_idx = pmd_index(vaddr);
-	pgd = pgd_base + pgd_idx;
-
-	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
-		if (pgd_none(*pgd)) 
-			one_md_table_init(pgd);
-		pud = pud_offset(pgd, vaddr);
-		pmd = pmd_offset(pud, vaddr);
-		for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
-			if (pmd_none(*pmd)) 
-				one_page_table_init(pmd);
-
-			vaddr += PMD_SIZE;
-		}
-		pmd_idx = 0;
-	}
-}
-
-static inline int is_kernel_text(unsigned long addr)
-{
-	if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
-		return 1;
-	return 0;
-}
-
-/*
- * This maps the physical memory to kernel virtual address space, a total 
- * of max_low_pfn pages, by creating page tables starting from address 
- * PAGE_OFFSET.
- */
-static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
-{
-	unsigned long pfn;
-	pgd_t *pgd;
-	pmd_t *pmd;
-	pte_t *pte;
-	int pgd_idx, pmd_idx, pte_ofs;
-
-	pgd_idx = pgd_index(PAGE_OFFSET);
-	pgd = pgd_base + pgd_idx;
-	pfn = 0;
-
-	for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
-		pmd = one_md_table_init(pgd);
-		if (pfn >= max_low_pfn)
-			continue;
-		for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
-			unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
-
-			/* Map with big pages if possible, otherwise create normal page tables. 
-			 * Don't use a large page for the first 2/4MB of memory
-			 * because there are often fixed size MTRRs in there
-			 * and overlapping MTRRs into large pages can cause
-			 * slowdowns.
-			 */
-			if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
-				unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
-
-				if (is_kernel_text(address) || is_kernel_text(address2))
-					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
-				else
-					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
-				pfn += PTRS_PER_PTE;
-			} else {
-				pte = one_page_table_init(pmd);
-
-				for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
-						if (is_kernel_text(address))
-							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
-						else
-							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
-				}
-			}
-		}
-	}
-}
-
 static inline int page_kills_ppro(unsigned long pagenr)
 {
 	if (pagenr >= 0x70000 && pagenr <= 0x7003F)
@@ -259,40 +125,14 @@ int devmem_is_allowed(unsigned long page
 
 EXPORT_SYMBOL_GPL(page_is_ram);
 
-#ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
-pgprot_t kmap_prot;
 
 #define kmap_get_fixmap_pte(vaddr)					\
 	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
 
 static void __init kmap_init(void)
 {
-	unsigned long kmap_vstart;
-
-	/* cache the first kmap pte */
-	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
-	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
-	kmap_prot = PAGE_KERNEL;
-}
-
-static void __init permanent_kmaps_init(pgd_t *pgd_base)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long vaddr;
-
-	vaddr = PKMAP_BASE;
-	page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
-
-	pgd = swapper_pg_dir + pgd_index(vaddr);
-	pud = pud_offset(pgd, vaddr);
-	pmd = pmd_offset(pud, vaddr);
-	pte = pte_offset_kernel(pmd, vaddr);
-	pkmap_page_table = pte;	
+	kmap_pte = kmap_get_fixmap_pte(__fix_to_virt(FIX_KMAP_BEGIN));
 }
 
 static void __meminit free_new_highpage(struct page *page)
@@ -335,6 +175,8 @@ void online_page(struct page *page)
 }
 
 
+#ifdef CONFIG_HIGHMEM
+
 #ifdef CONFIG_NUMA
 extern void set_highmem_pages_init(int);
 #else
@@ -348,8 +190,6 @@ static void __init set_highmem_pages_ini
 #endif /* CONFIG_FLATMEM */
 
 #else
-#define kmap_init() do { } while (0)
-#define permanent_kmaps_init(pgd_base) do { } while (0)
 #define set_highmem_pages_init(bad_ppro) do { } while (0)
 #endif /* CONFIG_HIGHMEM */
 
@@ -364,31 +204,140 @@ extern void __init remap_numa_kva(void);
 #define remap_numa_kva() do {} while (0)
 #endif
 
-static void __init pagetable_init (void)
+static __init void prepare_pagetables(pgd_t *pgd_base, unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_base + pgd_index(address);
+	pud = pud_offset(pgd, address);
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd)) {
+		pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+		set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
+	}
+}
+
+static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
+{
+	unsigned long vaddr;
+
+	for (vaddr = start; vaddr != end; vaddr += PAGE_SIZE)
+		prepare_pagetables(pgd_base, vaddr);
+}
+
+void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end)
 {
 	unsigned long vaddr;
-	pgd_t *pgd_base = swapper_pg_dir;
+	pgd_t *pgd;
+	int i, j, k;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte, *pte_base;
 
+	pgd = pgd_base;
+
+	for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
+		vaddr = i*PGDIR_SIZE;
+		if (end && (vaddr >= end))
+			break;
+		pud = pud_offset(pgd, 0);
+		pmd = pmd_offset(pud, 0);
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+			vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+			if (end && (vaddr >= end))
+				break;
+			if (vaddr < start)
+				continue;
+                       /* Map with big pages if possible, otherwise create normal page tables. 
+			 * Don't use a large page for the first 2/4MB of memory
+			 * because there are often fixed size MTRRs in there
+			 * and overlapping MTRRs into large pages can cause
+			 * slowdowns.
+			 */
+			if (cpu_has_pse && !(i == 0 && j == 0)) {
+				unsigned long __pe;
+
+				set_in_cr4(X86_CR4_PSE);
+				boot_cpu_data.wp_works_ok = 1;
+				__pe = _KERNPG_TABLE + _PAGE_PSE + vaddr - start;
+				/* Make it "global" too if supported */
+				if (cpu_has_pge) {
+					set_in_cr4(X86_CR4_PGE);
+#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
+					__pe += _PAGE_GLOBAL;
+					__PAGE_KERNEL |= _PAGE_GLOBAL;
+#endif
+				}
+				set_pmd(pmd, __pmd(__pe));
+				continue;
+			}
+			if (!pmd_present(*pmd))
+				pte_base = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+			else
+				pte_base = pte_offset_kernel(pmd, 0);
+			pte = pte_base;
+			for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
+				vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
+				if (end && (vaddr >= end))
+					break;
+				if (vaddr < start)
+					continue;
+				/*
+				 * cpu_has_pse can be disabled on PAE, e.g. if CONFIG_DEBUG_PAGEALLOC is set
+				 * so we have to make mappings to be executable, otherwise will silently
+				 * reboot immedeately after the code under us will be marked NX.
+				 */
+				*pte = mk_pte_phys(vaddr-start, PAGE_KERNEL_EXEC);
+			}
+			set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
+		}
+	}
+}
+
+static void __init pagetable_init (void)
+{
+	unsigned long vaddr, end;
+	pgd_t *pgd_base;
 #ifdef CONFIG_X86_PAE
 	int i;
-	/* Init entries of the first-level page table to the zero page */
-	for (i = 0; i < PTRS_PER_PGD; i++)
-		set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
 #endif
 
-	/* Enable PSE if available */
-	if (cpu_has_pse) {
-		set_in_cr4(X86_CR4_PSE);
-	}
+	/*
+	 * This can be zero as well - no problem, in that case we exit
+	 * the loops anyway due to the PTRS_PER_* conditions.
+	 */
+	end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
 
-	/* Enable PGE if available */
-	if (cpu_has_pge) {
-		set_in_cr4(X86_CR4_PGE);
-		__PAGE_KERNEL |= _PAGE_GLOBAL;
-		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
+	pgd_base = swapper_pg_dir;
+#ifdef CONFIG_X86_PAE
+	/*
+	 * It causes too many problems if there's no proper pmd set up
+	 * for all 4 entries of the PGD - so we allocate all of them.
+	 * PAE systems will not miss this extra 4-8K anyway ...
+	 */
+	for (i = 0; i < PTRS_PER_PGD; i++) {
+		pmd_t *pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+		set_pgd(pgd_base + i, __pgd(__pa(pmd) + 0x1));
 	}
+#endif
+	/*
+	 * Set up lowmem-sized identity mappings at PAGE_OFFSET:
+	 */
+	setup_identity_mappings(pgd_base, PAGE_OFFSET, end);
 
-	kernel_physical_mapping_init(pgd_base);
+	/*
+	 * Add flat-mode identity-mappings - SMP needs it when
+	 * starting up on an AP from real-mode. (In the non-PAE
+	 * case we already have these mappings through head.S.)
+	 * All user-space mappings are explicitly cleared after
+	 * SMP startup.
+	 */
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_PAE)
+	setup_identity_mappings(pgd_base, 0, 16*1024*1024);
+#endif
 	remap_numa_kva();
 
 	/*
@@ -396,22 +345,60 @@ static void __init pagetable_init (void)
 	 * created - mappings will be set by set_fixmap():
 	 */
 	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-	page_table_range_init(vaddr, 0, pgd_base);
+	fixrange_init(vaddr, 0, pgd_base);
 
-	permanent_kmaps_init(pgd_base);
+#ifdef CONFIG_HIGHMEM
+	{
+		pgd_t *pgd;
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+		/*
+		 * Permanent kmaps:
+		 */
+		vaddr = PKMAP_BASE;
+		fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 
-#ifdef CONFIG_X86_PAE
-	/*
-	 * Add low memory identity-mappings - SMP needs it when
-	 * starting up on an AP from real-mode. In the non-PAE
-	 * case we already have these mappings through head.S.
-	 * All user-space mappings are explicitly cleared after
-	 * SMP startup.
-	 */
-	set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
+		pgd = swapper_pg_dir + pgd_index(vaddr);
+		pud = pud_offset(pgd, vaddr);
+		pmd = pmd_offset(pud, vaddr);
+		pte = pte_offset_kernel(pmd, vaddr);
+		pkmap_page_table = pte;
+	}
 #endif
 }
 
+/*
+ * Clear kernel pagetables in a PMD_SIZE-aligned range.
+ */
+static void clear_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end)
+{
+	unsigned long vaddr;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	int i, j;
+
+	pgd = pgd_base;
+
+	for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
+		vaddr = i*PGDIR_SIZE;
+		if (end && (vaddr >= end))
+			break;
+		pud = pud_offset(pgd, 0);
+		pmd = pmd_offset(pud, 0);
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+			vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+			if (end && (vaddr >= end))
+				break;
+			if (vaddr < start)
+				continue;
+			pmd_clear(pmd);
+		}
+	}
+	flush_tlb_all();
+}
+
 #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
 /*
  * Swap suspend & friends need this for resume because things like the intel-agp
@@ -430,25 +417,15 @@ static inline void save_pg_dir(void)
 }
 #endif
 
-void zap_low_mappings (void)
+void zap_low_mappings(void)
 {
-	int i;
-
 	save_pg_dir();
 
+	printk("zapping low mappings.\n");
 	/*
 	 * Zap initial low-memory mappings.
-	 *
-	 * Note that "pgd_clear()" doesn't do it for
-	 * us, because pgd_clear() is a no-op on i386.
 	 */
-	for (i = 0; i < USER_PTRS_PER_PGD; i++)
-#ifdef CONFIG_X86_PAE
-		set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
-#else
-		set_pgd(swapper_pg_dir+i, __pgd(0));
-#endif
-	flush_tlb_all();
+	clear_mappings(swapper_pg_dir, 0, 16*1024*1024);
 }
 
 static int disable_nx __initdata = 0;
@@ -492,7 +469,6 @@ static void __init set_nx(void)
 		}
 	}
 }
-
 /*
  * Enables/disables executability of a given kernel page and
  * returns the previous setting.
@@ -553,7 +529,15 @@ void __init paging_init(void)
 		set_in_cr4(X86_CR4_PAE);
 #endif
 	__flush_tlb_all();
-
+	/*
+	 * Subtle. SMP is doing it's boot stuff late (because it has to
+	 * fork idle threads) - but it also needs low mappings for the
+	 * protected-mode entry to work. We zap these entries only after
+	 * the WP-bit has been tested.
+	 */
+#ifndef CONFIG_SMP
+	zap_low_mappings();
+#endif
 	kmap_init();
 }
 
@@ -667,15 +651,9 @@ void __init mem_init(void)
 	if (boot_cpu_data.wp_works_ok < 0)
 		test_wp_bit();
 
-	/*
-	 * Subtle. SMP is doing it's boot stuff late (because it has to
-	 * fork idle threads) - but it also needs low mappings for the
-	 * protected-mode entry to work. We zap these entries only after
-	 * the WP-bit has been tested.
-	 */
-#ifndef CONFIG_SMP
-	zap_low_mappings();
-#endif
+	entry_trampoline_setup();
+	default_ldt_page = virt_to_page(default_ldt);
+	load_LDT(&init_mm.context);
 }
 
 /*
@@ -704,25 +682,52 @@ int remove_memory(u64 start, u64 size)
 
 kmem_cache_t *pgd_cache;
 kmem_cache_t *pmd_cache;
+kmem_cache_t *kpmd_cache;
 
 void __init pgtable_cache_init(void)
 {
+	void (*ctor)(void *, kmem_cache_t *, unsigned long);
+	void (*dtor)(void *, kmem_cache_t *, unsigned long);
+
 	if (PTRS_PER_PMD > 1) {
 		pmd_cache = kmem_cache_create("pmd",
 					PTRS_PER_PMD*sizeof(pmd_t),
 					PTRS_PER_PMD*sizeof(pmd_t),
-					0,
+					SLAB_UBC,
 					pmd_ctor,
 					NULL);
 		if (!pmd_cache)
 			panic("pgtable_cache_init(): cannot create pmd cache");
+
+		if (TASK_SIZE > PAGE_OFFSET) {
+			kpmd_cache = kmem_cache_create("kpmd",
+					PTRS_PER_PMD*sizeof(pmd_t),
+					PTRS_PER_PMD*sizeof(pmd_t),
+					0,
+					kpmd_ctor,
+					NULL);
+			if (!kpmd_cache)
+				panic("pgtable_cache_init(): "
+						"cannot create kpmd cache");
+		}
 	}
+
+	if (PTRS_PER_PMD == 1 || TASK_SIZE <= PAGE_OFFSET)
+		ctor = pgd_ctor;
+	else
+		ctor = NULL;
+
+	if (PTRS_PER_PMD == 1 && TASK_SIZE <= PAGE_OFFSET)
+		dtor = pgd_dtor;
+	else
+		dtor = NULL;
+
 	pgd_cache = kmem_cache_create("pgd",
 				PTRS_PER_PGD*sizeof(pgd_t),
 				PTRS_PER_PGD*sizeof(pgd_t),
-				0,
-				pgd_ctor,
-				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+				SLAB_UBC,
+				ctor,
+				dtor);
 	if (!pgd_cache)
 		panic("pgtable_cache_init(): Cannot create pgd cache");
 }
@@ -788,7 +793,8 @@ void free_init_pages(char *what, unsigne
 		__free_page(page);
 		totalram_pages++;
 	}
-	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+	printk(KERN_INFO "Freeing %s: (%08x-%08x) %ldk freed\n", what,
+			begin, end, (end - begin) >> 10);
 }
 
 void free_initmem(void)
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/init-xen.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/init-xen.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/init-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/init-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -767,7 +767,7 @@ void __init pgtable_cache_init(void)
 		pmd_cache = kmem_cache_create("pmd",
 					PTRS_PER_PMD*sizeof(pmd_t),
 					PTRS_PER_PMD*sizeof(pmd_t),
-					0,
+					SLAB_UBC,
 					pmd_ctor,
 					NULL);
 		if (!pmd_cache)
@@ -781,7 +781,7 @@ void __init pgtable_cache_init(void)
 				PAGE_SIZE,
 				PAGE_SIZE,
 #endif
-				0,
+				SLAB_UBC,
 				pgd_ctor,
 				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
 	if (!pgd_cache)
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/ioremap.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/ioremap.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/ioremap.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/ioremap.c	2017-01-13 08:40:24.000000000 -0500
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/mm.h>
 #include <asm/io.h>
 #include <asm/fixmap.h>
 #include <asm/cacheflush.h>
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/mmap.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/mmap.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/mmap.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/mmap.c	2017-01-13 08:40:41.000000000 -0500
@@ -27,6 +27,7 @@
 #include <linux/personality.h>
 #include <linux/mm.h>
 #include <linux/random.h>
+#include <linux/module.h>
 
 /*
  * Top of mmap area (just below the process stack).
@@ -93,3 +94,4 @@ void arch_pick_mmap_layout(struct mm_str
 		mm->unmap_area = arch_unmap_area_topdown;
 	}
 }
+EXPORT_SYMBOL(arch_pick_mmap_layout);
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/pageattr.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/pageattr.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/pageattr.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/pageattr.c	2017-01-13 08:40:14.000000000 -0500
@@ -86,6 +86,8 @@ static void set_pmd_pte(pte_t *kpte, uns
 	set_pte_atomic(kpte, pte); 	/* change init_mm */
 	if (HAVE_SHARED_KERNEL_PMD)
 		return;
+	if (TASK_SIZE > PAGE_OFFSET)
+		return;
 
 	spin_lock_irqsave(&pgd_lock, flags);
 	for (page = pgd_list; page; page = (struct page *)page->index) {
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/pgtable.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/pgtable.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/pgtable.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/pgtable.c	2017-01-13 08:40:21.000000000 -0500
@@ -4,8 +4,10 @@
 
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
+#include <linux/vmalloc.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
 #include <linux/highmem.h>
@@ -20,6 +22,11 @@
 #include <asm/e820.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
+#include <asm/atomic_kmap.h>
+
+#ifndef CONFIG_HIGHMEM
+#define PKMAP_BASE	0
+#endif
 
 void show_mem(void)
 {
@@ -64,6 +71,7 @@ void show_mem(void)
 	printk(KERN_INFO "%lu pages pagetables\n",
 					global_page_state(NR_PAGETABLE));
 }
+EXPORT_SYMBOL(show_mem);
 
 /*
  * Associate a virtual page frame with a given physical page frame 
@@ -158,9 +166,11 @@ struct page *pte_alloc_one(struct mm_str
 	struct page *pte;
 
 #ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|
+			__GFP_REPEAT|__GFP_ZERO, 0);
 #else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL|
+			__GFP_REPEAT|__GFP_ZERO, 0);
 #endif
 	return pte;
 }
@@ -170,16 +180,33 @@ void pmd_ctor(void *pmd, kmem_cache_t *c
 	memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
 }
 
+void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags)
+{
+	pud_t *pud;
+	pmd_t *kpmd, *pmd;
+
+	pud = pud_offset(&swapper_pg_dir[PTRS_PER_PGD-1], (long)"doesn't matter"); 
+	kpmd = pmd_offset(pud, (PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE);
+	pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS);
+
+	memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t));
+	memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t));
+}
+
 /*
- * List of all pgd's needed for non-PAE so it can invalidate entries
- * in both cached and uncached pgd's; not needed for PAE since the
- * kernel pmd is shared. If PAE were not to share the pmd a similar
- * tactic would be needed. This is essentially codepath-based locking
+ * List of all pgd's needed so it can invalidate entries in both cached
+ * and uncached pgd's. This is essentially codepath-based locking
  * against pageattr.c; it is the unique case in which a valid change
  * of kernel pagetables can't be lazily synchronized by vmalloc faults.
  * vmalloc faults work because attached pagetables are never freed.
  * The locking scheme was chosen on the basis of manfred's
  * recommendations and having no core impact whatsoever.
+ *
+ * Lexicon for #ifdefless conditions to config options:
+ * (a) PTRS_PER_PMD == 1 means non-PAE.
+ * (b) PTRS_PER_PMD > 1 means PAE.
+ * (c) TASK_SIZE > PAGE_OFFSET means 4:4.
+ * (d) TASK_SIZE <= PAGE_OFFSET means non-4:4.
  * -- wli
  */
 DEFINE_SPINLOCK(pgd_lock);
@@ -205,26 +232,50 @@ static inline void pgd_list_del(pgd_t *p
 		set_page_private(next, (unsigned long)pprev);
 }
 
-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused)
 {
+	pgd_t *pgd = __pgd;
 	unsigned long flags;
 
+	/*
+	 * Cases:
+	 * 1. non-PAE mode
+	 * 2. PAE mode
+	 * 3. 4:4 split
+	 * I guess it would look much better with ifdef's --dev@
+	 */
 	if (PTRS_PER_PMD == 1) {
-		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
-		spin_lock_irqsave(&pgd_lock, flags);
+		if (TASK_SIZE <= PAGE_OFFSET) {
+			/* case 1 */
+			memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+			spin_lock_irqsave(&pgd_lock, flags);
+		} else
+			/* case 3 */
+			clone_pgd_range((pgd_t *)pgd + PTRS_PER_PGD - NR_SHARED_PMDS,
+				 swapper_pg_dir + PTRS_PER_PGD - NR_SHARED_PMDS,
+				 NR_SHARED_PMDS);
 	}
 
-	clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
-			swapper_pg_dir + USER_PTRS_PER_PGD,
-			KERNEL_PGD_PTRS);
+	if (TASK_SIZE <= PAGE_OFFSET)
+		/* case 1, 2 */
+		clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+				swapper_pg_dir + USER_PTRS_PER_PGD,
+				KERNEL_PGD_PTRS);
+	/* case 2, 2+3 */
 	if (PTRS_PER_PMD > 1)
 		return;
 
-	pgd_list_add(pgd);
-	spin_unlock_irqrestore(&pgd_lock, flags);
+	if (TASK_SIZE > PAGE_OFFSET)
+		/* case 3 */
+		memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t));
+	else {
+		/* case 1 */
+		pgd_list_add(pgd);
+		spin_unlock_irqrestore(&pgd_lock, flags);
+	}
 }
 
-/* never called when PTRS_PER_PMD > 1 */
+/* Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET */
 void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
 {
 	unsigned long flags; /* can be called from interrupt context */
@@ -242,8 +293,19 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	if (PTRS_PER_PMD == 1 || !pgd)
 		return pgd;
 
+	/*
+	 * In the 4G userspace case alias the top 16 MB virtual
+	 * memory range into the user mappings as well (these
+	 * include the trampoline and CPU data structures).
+	 */
 	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
-		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+		pmd_t *pmd;
+
+		if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
+			pmd = kmem_cache_alloc(kpmd_cache, GFP_KERNEL);
+		else
+			pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+
 		if (!pmd)
 			goto out_oom;
 		set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
@@ -251,6 +313,11 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	return pgd;
 
 out_oom:
+	/*
+	 * we don't have to handle the kpmd_cache here, since it's the
+	 * last allocation, and has either nothing to free or when it
+	 * succeeds the whole operation succeeds.
+	 */
 	for (i--; i >= 0; i--)
 		kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
 	kmem_cache_free(pgd_cache, pgd);
@@ -261,10 +328,25 @@ void pgd_free(pgd_t *pgd)
 {
 	int i;
 
-	/* in the PAE case user pgd entries are overwritten before usage */
-	if (PTRS_PER_PMD > 1)
-		for (i = 0; i < USER_PTRS_PER_PGD; ++i)
-			kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
 	/* in the non-PAE case, free_pgtables() clears user pgd entries */
+	if (PTRS_PER_PMD == 1)
+		goto out_free;
+
+	/* in the PAE case user pgd entries are overwritten before usage */
+	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+		pmd_t *pmd = __va(pgd_val(pgd[i]) - 1);
+
+		/*
+		 * only userspace pmd's are cleared for us
+		 * by mm/memory.c; it's a slab cache invariant
+		 * that we must separate the kernel pmd slab
+		 * all times, else we'll have bad pmd's.
+		 */
+		if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
+			kmem_cache_free(kpmd_cache, pmd);
+		else
+			kmem_cache_free(pmd_cache, pmd);
+	}
+out_free:
 	kmem_cache_free(pgd_cache, pgd);
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/mm/pgtable-xen.c kernel-2.6.18-417.el5-028stab121/arch/i386/mm/pgtable-xen.c
--- kernel-2.6.18-417.el5.orig/arch/i386/mm/pgtable-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/mm/pgtable-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -4,6 +4,7 @@
 
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
@@ -73,6 +74,7 @@ void show_mem(void)
 	printk(KERN_INFO "%lu pages pagetables\n",
 					global_page_state(NR_PAGETABLE));
 }
+EXPORT_SYMBOL(show_mem);
 
 /*
  * Associate a large virtual page frame with a given physical page frame 
@@ -158,9 +160,11 @@ struct page *pte_alloc_one(struct mm_str
 	struct page *pte;
 
 #ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|
+			__GFP_REPEAT|__GFP_ZERO, 0);
 #else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL|
+			__GFP_REPEAT|__GFP_ZERO, 0);
 	if (pte) {
 		SetPageForeign(pte, pte_free);
 		init_page_count(pte);
diff -upr kernel-2.6.18-417.el5.orig/arch/i386/power/cpu.c kernel-2.6.18-417.el5-028stab121/arch/i386/power/cpu.c
--- kernel-2.6.18-417.el5.orig/arch/i386/power/cpu.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/i386/power/cpu.c	2017-01-13 08:40:14.000000000 -0500
@@ -63,9 +63,9 @@ static void do_fpu_end(void)
 static void fix_processor_context(void)
 {
 	int cpu = smp_processor_id();
-	struct tss_struct * t = &per_cpu(init_tss, cpu);
-
-	set_tss_desc(cpu,t);	/* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
+	struct tss_struct *t = init_tss + cpu;
+ 
+ 	set_tss_desc(cpu,t);	/* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
 
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->active_mm->context);	/* This does lldt */
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/ia32/binfmt_elf32.c kernel-2.6.18-417.el5-028stab121/arch/ia64/ia32/binfmt_elf32.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/ia32/binfmt_elf32.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/ia32/binfmt_elf32.c	2017-01-13 08:40:17.000000000 -0500
@@ -17,6 +17,8 @@
 #include <asm/param.h>
 #include <asm/signal.h>
 
+#include <ub/ub_vmpages.h>
+
 #include "ia32priv.h"
 #include "elfcore32.h"
 
@@ -138,6 +140,12 @@ ia64_elf32_init (struct pt_regs *regs)
 		up_write(&current->mm->mmap_sem);
 	}
 
+	if (ub_memory_charge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
+					IA32_LDT_ENTRY_SIZE),
+				VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE,
+				NULL, UB_SOFT))
+		goto skip;
+
 	/*
 	 * Install LDT as anonymous memory.  This gives us all-zero segment descriptors
 	 * until a task modifies them via modify_ldt().
@@ -159,7 +167,12 @@ ia64_elf32_init (struct pt_regs *regs)
 			}
 		}
 		up_write(&current->mm->mmap_sem);
-	}
+	} else
+		ub_memory_uncharge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
+					IA32_LDT_ENTRY_SIZE),
+				VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE, NULL);
+
+skip:
 
 	ia64_psr(regs)->ac = 0;		/* turn off alignment checking */
 	regs->loadrs = 0;
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/ia32/ia32_entry.S kernel-2.6.18-417.el5-028stab121/arch/ia64/ia32/ia32_entry.S
--- kernel-2.6.18-417.el5.orig/arch/ia64/ia32/ia32_entry.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/ia32/ia32_entry.S	2017-01-13 08:40:16.000000000 -0500
@@ -304,7 +304,7 @@ ia32_syscall_table:
 	data8 sys_ni_syscall	/* init_module */
 	data8 sys_ni_syscall	/* delete_module */
 	data8 sys_ni_syscall	/* get_kernel_syms */  /* 130 */
-	data8 sys_quotactl
+	data8 sys32_quotactl
 	data8 sys_getpgid
 	data8 sys_fchdir
 	data8 sys_ni_syscall	/* sys_bdflush */
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/Kconfig kernel-2.6.18-417.el5-028stab121/arch/ia64/Kconfig
--- kernel-2.6.18-417.el5.orig/arch/ia64/Kconfig	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/Kconfig	2017-01-13 08:40:28.000000000 -0500
@@ -304,7 +304,7 @@ config NR_CPUS
 
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
-	depends on SMP && EXPERIMENTAL
+	depends on SMP && EXPERIMENTAL && !SCHED_VCPU
 	select HOTPLUG
 	default n
 	---help---
@@ -605,10 +605,13 @@ endmenu
 
 source "arch/ia64/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
 
+source "kernel/ub/Kconfig"
 #
 # override default values of drivers/xen/Kconfig
 #
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/asm-offsets.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/asm-offsets.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/asm-offsets.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/asm-offsets.c	2017-01-13 08:40:20.000000000 -0500
@@ -44,11 +44,19 @@ void foo(void)
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
 	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
+#ifdef CONFIG_VE
+	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, vpid));
+#else
 	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
+#endif
 	DEFINE(IA64_TASK_PARENT_OFFSET, offsetof (struct task_struct, parent));
 	DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
 	DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
+#ifdef CONFIG_VE
+	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, vtgid));
+#else
 	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
+#endif
 	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
 	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
 
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/entry.S kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/entry.S
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/entry.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/entry.S	2017-01-13 08:40:24.000000000 -0500
@@ -504,6 +504,74 @@ GLOBAL_ENTRY(clone)
 	br.ret.sptk.many rp
 END(clone)
 
+GLOBAL_ENTRY(ia64_ret_from_resume)
+	PT_REGS_UNWIND_INFO(0)
+{	/*
+	 * Some versions of gas generate bad unwind info if the first instruction of a
+	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
+	 */
+	nop.m 0
+	nop.i 0
+	/*
+	 * We need to call schedule_tail() to complete the scheduling process.
+	 * Called by ia64_switch_to() after do_fork()->copy_thread().  r8 contains the
+	 * address of the previously executing task.
+	 */
+	br.call.sptk.many rp=ia64_invoke_schedule_tail
+}
+	br.call.sptk.many rp=ia64_invoke_resume
+	;;
+	adds sp=256,sp
+	;;
+	/* Return from interrupt, we are all right. */
+(pNonSys) br ia64_leave_kernel
+	;;
+	/* Tricky part follows. We must restore correct syscall
+	 * register frame before doing normal syscall exit job.
+	 * It would the most natural to keep sw->ar_pfs correct,
+	 * then we would be here with correct register frame.
+	 * Unfortunately, IA64 has a feature. Registers were in backstore
+	 * after context switch, and the first br.ret does _NOT_ fetch
+	 * output registers.
+	 * It is quite natural:	look, if caller has output regs in his
+	 * frame, they should be consumed. If callee does not have (enough of)
+	 * input/local registers (1 in this case), the situation is unusual.
+	 * Practical evidence: they are filled with something random crap.
+	 * The only case, when this is essential in mainstream kernel
+	 * is sys_clone(). The result is that new process gets some kernel
+	 * information in its register frame. Which is a security problem, btw.
+	 *
+	 * So, we set sw->ar_pfs to pretend the whole frame is of local
+	 * regs. And we have to repartition the frame it manually, using
+	 * information from pt->cr_ifs (the register is invalid in this
+	 * case, but it holds correct pfm).
+	 */
+	adds r3=PT(CR_IFS)+16,sp
+	;;
+	ld8  r2=[r3],-(PT(CR_IFS)-PT(R8))
+	;;
+	extr.u  r2=r2,0,37
+	mov	r8=ar.ec
+	;;
+	extr.u  r8=r8,0,5
+	;;
+	shl	r8=r8,52
+	;;
+	or	r2=r2,r8
+	;;
+	mov  ar.pfs=r2
+	;;
+	movl r2=ia64_leave_syscall
+	;;
+	mov  rp=r2
+	/* Plus, we should fetch r8 and r10 from pt_regs. Something else? */
+	ld8  r8=[r3],PT(R10)-PT(R8)
+	;;
+	ld8  r10=[r3]
+	;;
+	br.ret.sptk.many rp
+END(ia64_ret_from_resume)
+
 	/*
 	 * Invoke a system call, but do some tracing before and after the call.
 	 * We MUST preserve the current register frame throughout this routine
@@ -1176,6 +1244,34 @@ GLOBAL_ENTRY(ia64_invoke_schedule_tail)
 	br.ret.sptk.many rp
 END(ia64_invoke_schedule_tail)
 
+GLOBAL_ENTRY(ia64_invoke_resume)
+	alloc loc1=ar.pfs,0,3,1,0
+	mov loc0=rp
+	adds out0=16,sp
+	;;
+	ld8  r8=[out0]
+	;;
+	cmp.eq p6,p0=r8,r0
+	;;
+(p6)	br.cond.sptk 1f
+	;;
+	mov  loc2=gp
+	;;
+	ld8  r10=[r8],8
+	;;
+	ld8  gp=[r8]
+	;;
+	mov  b7=r10
+	;;
+	br.call.sptk.many rp=b7
+	;;
+	mov  gp=loc2
+1:	
+	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(ia64_invoke_resume)
+
 	/*
 	 * Setup stack and call do_notify_resume_user().  Note that pSys and pNonSys need to
 	 * be set up by the caller.  We declare 8 input registers so the system call
@@ -1618,5 +1714,10 @@ sys_call_table:
 	data8 sys_ni_syscall			// 1320
 	data8 sys_ni_syscall
 	data8 sys_recvmmsg
+.rept 1509-1323
+	data8 sys_ni_syscall
+.endr
+	data8 sys_lchmod
+	data8 sys_lutime			// 1510
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/fsys.S kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/fsys.S
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/fsys.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/fsys.S	2017-01-13 08:40:19.000000000 -0500
@@ -74,6 +74,7 @@ ENTRY(fsys_getpid)
 	FSYS_RETURN
 END(fsys_getpid)
 
+#ifndef CONFIG_VE
 ENTRY(fsys_getppid)
 	.prologue
 	.altrp b6
@@ -120,6 +121,7 @@ ENTRY(fsys_getppid)
 #endif
 	FSYS_RETURN
 END(fsys_getppid)
+#endif
 
 ENTRY(fsys_set_tid_address)
 	.prologue
@@ -763,7 +765,11 @@ fsyscall_table:
 	data8 0				// chown
 	data8 0				// lseek		// 1040
 	data8 fsys_getpid		// getpid
+#ifdef CONFIG_VE
+	data8 0
+#else
 	data8 fsys_getppid		// getppid
+#endif
 	data8 0				// mount
 	data8 0				// umount
 	data8 0				// setuid		// 1045
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/head.S kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/head.S
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/head.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/head.S	2017-01-13 08:40:15.000000000 -0500
@@ -1015,7 +1015,7 @@ GLOBAL_ENTRY(start_kernel_thread)
 	mov out1 = r11;;
 	br.call.sptk.many rp = kernel_thread_helper;;
 	mov out0 = r8
-	br.call.sptk.many rp = sys_exit;;
+	br.call.sptk.many rp = do_exit;;
 1:	br.sptk.few 1b				// not reached
 END(start_kernel_thread)
 
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/ia64_ksyms.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/ia64_ksyms.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/ia64_ksyms.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/ia64_ksyms.c	2017-01-13 08:40:24.000000000 -0500
@@ -80,6 +80,8 @@ EXPORT_SYMBOL(xor_ia64_4);
 EXPORT_SYMBOL(xor_ia64_5);
 #endif
 
+EXPORT_SYMBOL(empty_zero_page);
+
 #include <asm/pal.h>
 EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
 EXPORT_SYMBOL(ia64_pal_call_phys_static);
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/init_task.c	2017-01-13 08:40:14.000000000 -0500
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -21,6 +22,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/mca.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/mca.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/mca.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/mca.c	2017-01-13 08:40:19.000000000 -0500
@@ -1580,10 +1580,10 @@ default_monarch_init_process(struct noti
 	}
 	printk("\n\n");
 	if (read_trylock(&tasklist_lock)) {
-		do_each_thread (g, t) {
+		do_each_thread_all (g, t) {
 			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
 			show_stack(t, NULL);
-		} while_each_thread (g, t);
+		} while_each_thread_all (g, t);
 		read_unlock(&tasklist_lock);
 	}
 	/* FIXME: This will not restore zapped printk locks. */
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/perfmon.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/perfmon.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/perfmon.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/perfmon.c	2017-01-13 08:40:19.000000000 -0500
@@ -2623,7 +2623,7 @@ pfm_get_task(pfm_context_t *ctx, pid_t p
 
 		read_lock(&tasklist_lock);
 
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		/* make sure task cannot go away while we operate on it */
 		if (p) get_task_struct(p);
@@ -4187,12 +4187,12 @@ pfm_check_task_exist(pfm_context_t *ctx)
 
 	read_lock(&tasklist_lock);
 
-	do_each_thread (g, t) {
+	do_each_thread_ve (g, t) {
 		if (t->thread.pfm_context == ctx) {
 			ret = 0;
 			break;
 		}
-	} while_each_thread (g, t);
+	} while_each_thread_ve (g, t);
 
 	read_unlock(&tasklist_lock);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/process.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/process.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/process.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/process.c	2017-01-13 08:40:40.000000000 -0500
@@ -30,6 +30,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/utsname.h>
+#include <linux/sysctl.h>
 
 #include <asm/cpu.h>
 #include <asm/delay.h>
@@ -94,6 +95,8 @@ show_stack (struct task_struct *task, un
 	}
 }
 
+EXPORT_SYMBOL(show_stack);
+
 void
 dump_stack (void)
 {
@@ -111,7 +114,7 @@ show_regs (struct pt_regs *regs)
 	printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
 	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s (%s)\n",
 	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(),
-	       system_utsname.release);
+	       init_utsname()->release);
 	print_symbol("ip is at %s\n", ip);
 	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
 	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
@@ -363,6 +366,9 @@ ia64_load_extra (struct task_struct *tas
 #endif
 }
 
+extern char ia64_ret_from_resume;
+EXPORT_SYMBOL(ia64_ret_from_resume);
+
 /*
  * Copy the state of an ia-64 thread.
  *
@@ -436,7 +442,6 @@ copy_thread (int nr, unsigned long clone
 			child_ptregs->r12 = user_stack_base + user_stack_size - 16;
 			child_ptregs->ar_bspstore = user_stack_base;
 			child_ptregs->ar_rnat = 0;
-			child_ptregs->loadrs = 0;
 		}
 	} else {
 		/*
@@ -676,16 +681,26 @@ out:
 	return error;
 }
 
+extern void start_kernel_thread (void);
+EXPORT_SYMBOL(start_kernel_thread);
+EXPORT_SYMBOL(execve);
+
 pid_t
 kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
 {
-	extern void start_kernel_thread (void);
 	unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
 	struct {
 		struct switch_stack sw;
 		struct pt_regs pt;
 	} regs;
 
+	/* Don't allow kernel_thread() inside VE */
+	if (!ve_allow_kthreads && !ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside container\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
 	regs.pt.r1 = helper_fptr[1];		/* set GP */
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/ptrace.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/ptrace.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/ptrace.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/ptrace.c	2017-01-13 08:40:24.000000000 -0500
@@ -10,6 +10,7 @@
  * Derived from the x86 and Alpha versions.
  */
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
@@ -107,6 +108,8 @@ ia64_get_scratch_nat_bits (struct pt_reg
 
 #	undef GET_BITS
 }
+EXPORT_SYMBOL(ia64_get_scratch_nat_bits);
+EXPORT_SYMBOL(__ia64_save_fpu);
 
 /*
  * Set the NaT bits for the scratch registers according to NAT and
@@ -463,6 +466,7 @@ ia64_peek (struct task_struct *child, st
 	*val = ret;
 	return 0;
 }
+EXPORT_SYMBOL(ia64_peek);
 
 long
 ia64_poke (struct task_struct *child, struct switch_stack *child_stack,
@@ -527,6 +531,7 @@ ia64_get_user_rbs_end (struct task_struc
 		*cfmp = cfm;
 	return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
 }
+EXPORT_SYMBOL(ia64_get_user_rbs_end);
 
 /*
  * Synchronize (i.e, write) the RSE backing store living in kernel
@@ -765,20 +770,20 @@ access_nat_bits (struct task_struct *chi
 	if (write_access) {
 		nat_bits = *data;
 		scratch_unat = ia64_put_scratch_nat_bits(pt, nat_bits);
-		if (unw_set_ar(info, UNW_AR_UNAT, scratch_unat) < 0) {
-			dprintk("ptrace: failed to set ar.unat\n");
-			return -1;
-		}
+		if (info->pri_unat_loc)
+			*info->pri_unat_loc = scratch_unat;
+		else
+			info->sw->caller_unat = scratch_unat;
 		for (regnum = 4; regnum <= 7; ++regnum) {
 			unw_get_gr(info, regnum, &dummy, &nat);
 			unw_set_gr(info, regnum, dummy,
 				   (nat_bits >> regnum) & 1);
 		}
 	} else {
-		if (unw_get_ar(info, UNW_AR_UNAT, &scratch_unat) < 0) {
-			dprintk("ptrace: failed to read ar.unat\n");
-			return -1;
-		}
+		if (info->pri_unat_loc)
+			scratch_unat = *info->pri_unat_loc;
+		else
+			scratch_unat = info->sw->caller_unat;
 		nat_bits = ia64_get_scratch_nat_bits(pt, scratch_unat);
 		for (regnum = 4; regnum <= 7; ++regnum) {
 			unw_get_gr(info, regnum, &dummy, &nat);
@@ -1712,6 +1717,15 @@ int arch_ptrace(long *request, struct ta
 		ret = ret == sizeof(*val) ? 0 : -EIO;
 		break;
 
+	case PTRACE_POKETEXT:
+	case PTRACE_POKEDATA: {
+		unsigned long urbs_end;
+		struct switch_stack  * sw = (struct switch_stack *) (child->thread.ksp + 16);
+		urbs_end = ia64_get_user_rbs_end(child, task_pt_regs(child), NULL);
+		ret = ia64_poke(child, sw, urbs_end, addr, data);
+		break;
+	}
+
 	case PTRACE_PEEKUSR:
 		return ptrace_layout_access(child, engine,
 					    utrace_native_view(current),
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/setup.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/setup.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/setup.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/setup.c	2017-01-13 08:40:28.000000000 -0500
@@ -45,6 +45,7 @@
 #include <linux/cpufreq.h>
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
+#include <linux/vsched.h>
 
 #include <asm/ia32.h>
 #include <asm/machvec.h>
@@ -759,9 +760,13 @@ show_cpuinfo (struct seq_file *m, void *
 		sprintf(cp, " 0x%lx", mask);
 	}
 
+#ifndef CONFIG_FAIRSCHED
 	proc_freq = cpufreq_quick_get(cpunum);
 	if (!proc_freq)
 		proc_freq = c->proc_freq / 1000;
+#else
+	proc_freq = ve_scale_khz(c->proc_freq) / 1000;
+#endif
 
 	seq_printf(m,
 		   "processor  : %d\n"
@@ -800,7 +805,7 @@ static void *
 c_start (struct seq_file *m, loff_t *pos)
 {
 #ifdef CONFIG_SMP
-	while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
+	while (*pos < NR_CPUS && !vcpu_online(*pos))
 		++*pos;
 #endif
 	return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/signal.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/signal.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/signal.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/signal.c	2017-01-13 08:40:24.000000000 -0500
@@ -228,7 +228,7 @@ ia64_rt_sigreturn (struct sigscratch *sc
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = virt_pid(current);
 	si.si_uid = current->uid;
 	si.si_addr = sc;
 	force_sig_info(SIGSEGV, &si, current);
@@ -333,7 +333,7 @@ force_sigsegv_info (int sig, void __user
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = virt_pid(current);
 	si.si_uid = current->uid;
 	si.si_addr = addr;
 	force_sig_info(SIGSEGV, &si, current);
@@ -457,6 +457,12 @@ ia64_do_signal (struct sigscratch *scr, 
 	if (!user_mode(&scr->pt))
 		return;
 
+	if (try_to_freeze() && !signal_pending(current)) {
+		if ((long) scr->pt.r10 != -1)
+			restart = 0;
+ 		goto no_signal;
+	}
+
 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
 		oldset = &current->saved_sigmask;
 	else
@@ -512,8 +518,10 @@ ia64_do_signal (struct sigscratch *scr, 
 				if (IS_IA32_PROCESS(&scr->pt)) {
 					scr->pt.r8 = scr->pt.r1;
 					scr->pt.cr_iip -= 2;
-				} else
+				} else {
 					ia64_decrement_ip(&scr->pt);
+					scr->pt.r10 = 0;
+				}
 				restart = 0; /* don't restart twice if handle_signal() fails... */
 			}
 		}
@@ -534,6 +542,7 @@ ia64_do_signal (struct sigscratch *scr, 
 	}
 
 	/* Did we come from a system call? */
+no_signal:
 	if (restart) {
 		/* Restart the system call - no handlers present */
 		if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR
@@ -553,6 +562,7 @@ ia64_do_signal (struct sigscratch *scr, 
 				ia64_decrement_ip(&scr->pt);
 				if (errno == ERESTART_RESTARTBLOCK)
 					scr->pt.r15 = __NR_restart_syscall;
+				scr->pt.r10 = 0;
 			}
 		}
 	}
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/time.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/time.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/time.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/time.c	2017-01-13 08:40:16.000000000 -0500
@@ -39,6 +39,8 @@
 extern unsigned long wall_jiffies;
 
 volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
+unsigned int cpu_khz;                                   /* TSC clocks / usec, not used here */
+EXPORT_SYMBOL(cpu_khz);
 
 #ifdef CONFIG_IA64_DEBUG_IRQ
 
@@ -419,6 +421,8 @@ ia64_init_itm (void)
 	/* avoid softlock up message when cpu is unplug and plugged again. */
 	touch_softlockup_watchdog();
 
+	cpu_khz = local_cpu_data->proc_freq / 1000;
+
 	/* Setup the CPU local timer tick */
 	ia64_cpu_local_tick();
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/traps.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/traps.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/traps.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/traps.c	2017-01-13 08:40:15.000000000 -0500
@@ -53,34 +53,6 @@ trap_init (void)
 		fpswa_interface = __va(ia64_boot_param->fpswa);
 }
 
-/*
- * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
- * is acquired through the console unblank code)
- */
-void
-bust_spinlocks (int yes)
-{
-	int loglevel_save = console_loglevel;
-
-	if (yes) {
-		oops_in_progress = 1;
-		return;
-	}
-
-#ifdef CONFIG_VT
-	unblank_screen();
-#endif
-	oops_in_progress = 0;
-	/*
-	 * OK, the message is on the console.  Now we call printk() without
-	 * oops_in_progress set so that printk will give klogd a poke.  Hold onto
-	 * your hats...
-	 */
-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
-	printk(" ");
-	console_loglevel = loglevel_save;
-}
-
 void
 die (const char *str, struct pt_regs *regs, long err)
 {
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/unaligned.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/unaligned.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/unaligned.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/unaligned.c	2017-01-13 08:40:15.000000000 -0500
@@ -1290,7 +1290,7 @@ within_logging_rate_limit (void)
 {
 	static unsigned long count, last_time;
 
-	if (jiffies - last_time > 5*HZ)
+	if (jiffies - last_time > 60 * HZ)
 		count = 0;
 	if (count < 5) {
 		last_time = jiffies;
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/kernel/unwind.c kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/unwind.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/kernel/unwind.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/kernel/unwind.c	2017-01-13 08:40:16.000000000 -0500
@@ -60,6 +60,7 @@
 #  define UNW_DEBUG_ON(n)	unw_debug_level >= n
    /* Do not code a printk level, not all debug lines end in newline */
 #  define UNW_DPRINT(n, ...)  if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__)
+#  undef inline
 #  define inline
 #else /* !UNW_DEBUG */
 #  define UNW_DEBUG_ON(n)  0
@@ -1943,9 +1944,9 @@ EXPORT_SYMBOL(unw_unwind);
 int
 unw_unwind_to_user (struct unw_frame_info *info)
 {
-	unsigned long ip, sp, pr = 0;
+	unsigned long ip, sp, pr = info->pr;
 
-	while (unw_unwind(info) >= 0) {
+	do {
 		unw_get_sp(info, &sp);
 		if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
 		    < IA64_PT_REGS_SIZE) {
@@ -1963,7 +1964,7 @@ unw_unwind_to_user (struct unw_frame_inf
 				__FUNCTION__, ip);
 			return -1;
 		}
-	}
+	} while (unw_unwind(info) >= 0);
 	unw_get_ip(info, &ip);
 	UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
 		   __FUNCTION__, ip);
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/mm/contig.c kernel-2.6.18-417.el5-028stab121/arch/ia64/mm/contig.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/mm/contig.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/mm/contig.c	2017-01-13 08:40:19.000000000 -0500
@@ -92,6 +92,7 @@ void show_mem(void)
 	       pgtable_quicklist_total_size());
 	printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
 }
+EXPORT_SYMBOL(show_mem);
 
 
 /* physical address where the bootmem map is located */
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/mm/discontig.c kernel-2.6.18-417.el5-028stab121/arch/ia64/mm/discontig.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/mm/discontig.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/mm/discontig.c	2017-01-13 08:40:19.000000000 -0500
@@ -46,6 +46,7 @@ static struct early_node_data mem_data[M
 static nodemask_t memory_less_mask __initdata;
 
 pg_data_t *pgdat_list[MAX_NUMNODES];
+EXPORT_SYMBOL(pgdat_list);
 
 /*
  * To prevent cache aliasing effects, align per-node structures so that they
@@ -595,6 +596,7 @@ void show_mem(void)
 	       pgtable_quicklist_total_size());
 	printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
 }
+EXPORT_SYMBOL(show_mem);
 
 /**
  * call_pernode_memory - use SRAT to call callback functions with node info
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/mm/fault.c kernel-2.6.18-417.el5-028stab121/arch/ia64/mm/fault.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/mm/fault.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/mm/fault.c	2017-01-13 08:40:17.000000000 -0500
@@ -161,7 +161,6 @@ ia64_do_page_fault (unsigned long addres
 	if ((vma->vm_flags & mask) != mask)
 		goto bad_area;
 
-  survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault, make
 	 * sure we exit gracefully rather than endlessly redo the
@@ -288,13 +287,13 @@ ia64_do_page_fault (unsigned long addres
 
   out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
-	if (user_mode(regs))
-		do_exit(SIGKILL);
+	if (user_mode(regs)) {
+		/* 
+		 * 0-order allocation always success if something really 
+		 * fatal not happen: beancounter overdraft or OOM.
+		 */
+		force_sig(SIGKILL, current);
+		return;
+	}
 	goto no_context;
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/mm/hugetlbpage.c kernel-2.6.18-417.el5-028stab121/arch/ia64/mm/hugetlbpage.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/mm/hugetlbpage.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/mm/hugetlbpage.c	2017-01-13 08:40:23.000000000 -0500
@@ -16,6 +16,7 @@
 #include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
+#include <linux/module.h>
 #include <asm/mman.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
@@ -108,6 +109,7 @@ int pmd_huge(pmd_t pmd)
 {
 	return 0;
 }
+EXPORT_SYMBOL(pmd_huge);
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
 {
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/mm/init.c kernel-2.6.18-417.el5-028stab121/arch/ia64/mm/init.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/mm/init.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/mm/init.c	2017-01-13 08:40:17.000000000 -0500
@@ -36,6 +36,8 @@
 #include <asm/unistd.h>
 #include <asm/mca.h>
 
+#include <ub/ub_vmpages.h>
+
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
@@ -101,7 +103,7 @@ check_pgt_cache(void)
 	preempt_disable();
 	while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
 		while (pages_to_free--) {
-			free_page((unsigned long)pgtable_quicklist_alloc());
+			free_page((unsigned long)pgtable_quicklist_alloc(0));
 		}
 		preempt_enable();
 		preempt_disable();
@@ -157,6 +159,10 @@ ia64_init_addr_space (void)
 
 	ia64_set_rbs_bot();
 
+	if (ub_memory_charge(current->mm, PAGE_SIZE, VM_DATA_DEFAULT_FLAGS,
+				NULL, UB_SOFT))
+		goto skip;
+
 	/*
 	 * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
 	 * the problem.  When the process attempts to write to the register backing store
@@ -174,11 +180,16 @@ ia64_init_addr_space (void)
 		if (insert_vm_struct(current->mm, vma)) {
 			up_write(&current->mm->mmap_sem);
 			kmem_cache_free(vm_area_cachep, vma);
+			ub_memory_uncharge(current->mm, PAGE_SIZE,
+					VM_DATA_DEFAULT_FLAGS, NULL);
 			return;
 		}
 		up_write(&current->mm->mmap_sem);
-	}
+	} else
+		ub_memory_uncharge(current->mm, PAGE_SIZE,
+				VM_DATA_DEFAULT_FLAGS, NULL);
 
+skip:
 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
 		vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
diff -upr kernel-2.6.18-417.el5.orig/arch/ia64/sn/kernel/sn2/sn_hwperf.c kernel-2.6.18-417.el5-028stab121/arch/ia64/sn/kernel/sn2/sn_hwperf.c
--- kernel-2.6.18-417.el5.orig/arch/ia64/sn/kernel/sn2/sn_hwperf.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ia64/sn/kernel/sn2/sn_hwperf.c	2017-01-13 08:40:15.000000000 -0500
@@ -422,7 +422,7 @@ static int sn_topology_show(struct seq_f
 			"coherency_domain %d, "
 			"region_size %d\n",
 
-			partid, system_utsname.nodename,
+			partid, utsname()->nodename,
 			shubtype ? "shub2" : "shub1", 
 			(u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift,
 			system_size, sharing_size, coher, region_size);
diff -upr kernel-2.6.18-417.el5.orig/arch/m32r/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/m32r/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/m32r/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/m32r/kernel/init_task.c	2017-01-13 08:40:14.000000000 -0500
@@ -7,6 +7,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -16,6 +17,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/m32r/kernel/sys_m32r.c kernel-2.6.18-417.el5-028stab121/arch/m32r/kernel/sys_m32r.c
--- kernel-2.6.18-417.el5.orig/arch/m32r/kernel/sys_m32r.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/m32r/kernel/sys_m32r.c	2017-01-13 08:40:15.000000000 -0500
@@ -205,7 +205,7 @@ asmlinkage int sys_uname(struct old_utsn
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/m68knommu/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/m68knommu/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/m68knommu/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/m68knommu/kernel/init_task.c	2017-01-13 08:40:14.000000000 -0500
@@ -8,6 +8,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -17,6 +18,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/mips/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/mips/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/mips/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/mips/kernel/init_task.c	2017-01-13 08:40:14.000000000 -0500
@@ -4,6 +4,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/thread_info.h>
 #include <asm/uaccess.h>
@@ -14,6 +15,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/mips/kernel/linux32.c kernel-2.6.18-417.el5-028stab121/arch/mips/kernel/linux32.c
--- kernel-2.6.18-417.el5.orig/arch/mips/kernel/linux32.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/mips/kernel/linux32.c	2017-01-13 08:40:15.000000000 -0500
@@ -1041,7 +1041,7 @@ asmlinkage long sys32_newuname(struct ne
 	int ret = 0;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name,&system_utsname,sizeof *name))
+	if (copy_to_user(name, utsname(), sizeof *name))
 		ret = -EFAULT;
 	up_read(&uts_sem);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/mips/kernel/syscall.c kernel-2.6.18-417.el5-028stab121/arch/mips/kernel/syscall.c
--- kernel-2.6.18-417.el5.orig/arch/mips/kernel/syscall.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/mips/kernel/syscall.c	2017-01-13 08:40:15.000000000 -0500
@@ -231,7 +231,7 @@ out:
  */
 asmlinkage int sys_uname(struct old_utsname __user * name)
 {
-	if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+	if (name && !copy_to_user(name, utsname(), sizeof (*name)))
 		return 0;
 	return -EFAULT;
 }
@@ -248,16 +248,21 @@ asmlinkage int sys_olduname(struct oldol
 	if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
 		return -EFAULT;
 
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
-	error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
-	error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
-	error -= __put_user(0,name->release+__OLD_UTS_LEN);
-	error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
-	error -= __put_user(0,name->version+__OLD_UTS_LEN);
-	error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
-	error = __put_user(0,name->machine+__OLD_UTS_LEN);
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
+			       __OLD_UTS_LEN);
+	error -= __put_user(0, name->sysname + __OLD_UTS_LEN);
+	error -= __copy_to_user(&name->nodename, &utsname()->nodename,
+				__OLD_UTS_LEN);
+	error -= __put_user(0, name->nodename + __OLD_UTS_LEN);
+	error -= __copy_to_user(&name->release, &utsname()->release,
+				__OLD_UTS_LEN);
+	error -= __put_user(0, name->release + __OLD_UTS_LEN);
+	error -= __copy_to_user(&name->version, &utsname()->version,
+				__OLD_UTS_LEN);
+	error -= __put_user(0, name->version + __OLD_UTS_LEN);
+	error -= __copy_to_user(&name->machine, &utsname()->machine,
+				__OLD_UTS_LEN);
+	error = __put_user(0, name->machine + __OLD_UTS_LEN);
 	error = error ? -EFAULT : 0;
 
 	return error;
diff -upr kernel-2.6.18-417.el5.orig/arch/mips/kernel/sysirix.c kernel-2.6.18-417.el5-028stab121/arch/mips/kernel/sysirix.c
--- kernel-2.6.18-417.el5.orig/arch/mips/kernel/sysirix.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/mips/kernel/sysirix.c	2017-01-13 08:40:19.000000000 -0500
@@ -111,7 +111,7 @@ asmlinkage int irix_prctl(unsigned optio
 		printk("irix_prctl[%s:%d]: Wants PR_ISBLOCKED\n",
 		       current->comm, current->pid);
 		read_lock(&tasklist_lock);
-		task = find_task_by_pid(va_arg(args, pid_t));
+		task = find_task_by_pid_ve(va_arg(args, pid_t));
 		error = -ESRCH;
 		if (error)
 			error = (task->run_list.next != NULL);
@@ -884,7 +884,7 @@ asmlinkage int irix_getdomainname(char _
 	down_read(&uts_sem);
 	if (len > __NEW_UTS_LEN)
 		len = __NEW_UTS_LEN;
-	err = copy_to_user(name, system_utsname.domainname, len) ? -EFAULT : 0;
+	err = copy_to_user(name, utsname()->domainname, len) ? -EFAULT : 0;
 	up_read(&uts_sem);
 
 	return err;
@@ -1127,11 +1127,11 @@ struct iuname {
 asmlinkage int irix_uname(struct iuname __user *buf)
 {
 	down_read(&uts_sem);
-	if (copy_from_user(system_utsname.sysname, buf->sysname, 65)
-	    || copy_from_user(system_utsname.nodename, buf->nodename, 65)
-	    || copy_from_user(system_utsname.release, buf->release, 65)
-	    || copy_from_user(system_utsname.version, buf->version, 65)
-	    || copy_from_user(system_utsname.machine, buf->machine, 65)) {
+	if (copy_from_user(utsname()->sysname, buf->sysname, 65)
+	    || copy_from_user(utsname()->nodename, buf->nodename, 65)
+	    || copy_from_user(utsname()->release, buf->release, 65)
+	    || copy_from_user(utsname()->version, buf->version, 65)
+	    || copy_from_user(utsname()->machine, buf->machine, 65)) {
 		return -EFAULT;
 	}
 	up_read(&uts_sem);
diff -upr kernel-2.6.18-417.el5.orig/arch/parisc/hpux/sys_hpux.c kernel-2.6.18-417.el5-028stab121/arch/parisc/hpux/sys_hpux.c
--- kernel-2.6.18-417.el5.orig/arch/parisc/hpux/sys_hpux.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/parisc/hpux/sys_hpux.c	2017-01-13 08:40:15.000000000 -0500
@@ -266,16 +266,21 @@ static int hpux_uname(struct hpux_utsnam
 
 	down_read(&uts_sem);
 
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,HPUX_UTSLEN-1);
-	error |= __put_user(0,name->sysname+HPUX_UTSLEN-1);
-	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,HPUX_UTSLEN-1);
-	error |= __put_user(0,name->nodename+HPUX_UTSLEN-1);
-	error |= __copy_to_user(&name->release,&system_utsname.release,HPUX_UTSLEN-1);
-	error |= __put_user(0,name->release+HPUX_UTSLEN-1);
-	error |= __copy_to_user(&name->version,&system_utsname.version,HPUX_UTSLEN-1);
-	error |= __put_user(0,name->version+HPUX_UTSLEN-1);
-	error |= __copy_to_user(&name->machine,&system_utsname.machine,HPUX_UTSLEN-1);
-	error |= __put_user(0,name->machine+HPUX_UTSLEN-1);
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
+			       HPUX_UTSLEN - 1);
+	error |= __put_user(0, name->sysname + HPUX_UTSLEN - 1);
+	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
+				HPUX_UTSLEN - 1);
+	error |= __put_user(0, name->nodename + HPUX_UTSLEN - 1);
+	error |= __copy_to_user(&name->release, &utsname()->release,
+				HPUX_UTSLEN - 1);
+	error |= __put_user(0, name->release + HPUX_UTSLEN - 1);
+	error |= __copy_to_user(&name->version, &utsname()->version,
+				HPUX_UTSLEN - 1);
+	error |= __put_user(0, name->version + HPUX_UTSLEN - 1);
+	error |= __copy_to_user(&name->machine, &utsname()->machine,
+				HPUX_UTSLEN - 1);
+	error |= __put_user(0, name->machine + HPUX_UTSLEN - 1);
 
 	up_read(&uts_sem);
 
@@ -373,8 +378,8 @@ int hpux_utssys(char *ubuf, int n, int t
 		/*  TODO:  print a warning about using this?  */
 		down_write(&uts_sem);
 		error = -EFAULT;
-		if (!copy_from_user(system_utsname.sysname, ubuf, len)) {
-			system_utsname.sysname[len] = 0;
+		if (!copy_from_user(utsname()->sysname, ubuf, len)) {
+			utsname()->sysname[len] = 0;
 			error = 0;
 		}
 		up_write(&uts_sem);
@@ -400,8 +405,8 @@ int hpux_utssys(char *ubuf, int n, int t
 		/*  TODO:  print a warning about this?  */
 		down_write(&uts_sem);
 		error = -EFAULT;
-		if (!copy_from_user(system_utsname.release, ubuf, len)) {
-			system_utsname.release[len] = 0;
+		if (!copy_from_user(utsname()->release, ubuf, len)) {
+			utsname()->release[len] = 0;
 			error = 0;
 		}
 		up_write(&uts_sem);
@@ -422,13 +427,13 @@ int hpux_getdomainname(char *name, int l
  	
  	down_read(&uts_sem);
  	
-	nlen = strlen(system_utsname.domainname) + 1;
+	nlen = strlen(utsname()->domainname) + 1;
 
 	if (nlen < len)
 		len = nlen;
 	if(len > __NEW_UTS_LEN)
 		goto done;
-	if(copy_to_user(name, system_utsname.domainname, len))
+	if(copy_to_user(name, utsname()->domainname, len))
 		goto done;
 	err = 0;
 done:
diff -upr kernel-2.6.18-417.el5.orig/arch/parisc/Kconfig kernel-2.6.18-417.el5-028stab121/arch/parisc/Kconfig
--- kernel-2.6.18-417.el5.orig/arch/parisc/Kconfig	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/parisc/Kconfig	2017-01-13 08:40:28.000000000 -0500
@@ -194,6 +194,7 @@ config SMP
 
 config HOTPLUG_CPU
 	bool
+	depends on !SCHED_VCPU
 	default y if SMP
 	select HOTPLUG
 
diff -upr kernel-2.6.18-417.el5.orig/arch/parisc/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/parisc/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/parisc/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/parisc/kernel/init_task.c	2017-01-13 08:40:14.000000000 -0500
@@ -28,6 +28,7 @@
 #include <linux/init.h>
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -38,6 +39,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/Kconfig kernel-2.6.18-417.el5-028stab121/arch/powerpc/Kconfig
--- kernel-2.6.18-417.el5.orig/arch/powerpc/Kconfig	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/Kconfig	2017-01-13 08:40:28.000000000 -0500
@@ -630,6 +630,7 @@ config HIGHMEM
 	bool "High memory support"
 	depends on PPC32
 
+source "kernel/Kconfig.fairsched"
 source kernel/Kconfig.hz
 source kernel/Kconfig.preempt
 source "fs/Kconfig.binfmt"
@@ -676,7 +677,7 @@ config IOMMU_VMERGE
 
 config HOTPLUG_CPU
 	bool "Support for enabling/disabling CPUs"
-	depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
+	depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) && !SCHED_VCPU
 	---help---
 	  Say Y here to be able to disable and re-enable individual
 	  CPUs at runtime on SMP machines.
@@ -1104,6 +1105,8 @@ source "arch/powerpc/platforms/iseries/K
 
 source "lib/Kconfig"
 
+source "kernel/ub/Kconfig"
+
 menu "Instrumentation Support"
         depends on EXPERIMENTAL
 
@@ -1122,6 +1125,8 @@ endmenu
 
 source "arch/powerpc/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 config KEYS_COMPAT
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/init_task.c	2017-01-13 08:40:15.000000000 -0500
@@ -5,6 +5,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 #include <asm/uaccess.h>
 
 static struct fs_struct init_fs = INIT_FS;
@@ -12,6 +13,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/misc_32.S kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/misc_32.S
--- kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/misc_32.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/misc_32.S	2017-01-13 08:40:19.000000000 -0500
@@ -742,7 +742,7 @@ _GLOBAL(_get_SP)
  * Create a kernel thread
  *   kernel_thread(fn, arg, flags)
  */
-_GLOBAL(kernel_thread)
+_GLOBAL(ppc_kernel_thread)
 	stwu	r1,-16(r1)
 	stw	r30,8(r1)
 	stw	r31,12(r1)
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/misc_64.S kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/misc_64.S
--- kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/misc_64.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/misc_64.S	2017-01-13 08:40:19.000000000 -0500
@@ -386,7 +386,7 @@ _GLOBAL(scom970_write)
  * Create a kernel thread
  *   kernel_thread(fn, arg, flags)
  */
-_GLOBAL(kernel_thread)
+_GLOBAL(ppc_kernel_thread)
 	std	r29,-24(r1)
 	std	r30,-16(r1)
 	stdu	r1,-STACK_FRAME_OVERHEAD(r1)
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/process.c kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/process.c
--- kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/process.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/process.c	2017-01-13 08:40:40.000000000 -0500
@@ -49,6 +49,8 @@
 #include <asm/firmware.h>
 #endif
 
+#include <linux/utsrelease.h>
+
 extern unsigned long _get_SP(void);
 
 #ifndef CONFIG_SMP
@@ -429,8 +431,9 @@ void show_regs(struct pt_regs * regs)
 
 	printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
 	       regs->nip, regs->link, regs->ctr);
-	printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
-	       regs, regs->trap, print_tainted(), system_utsname.release);
+	printk("REGS: %p TRAP: %04lx   %s  (%s %s)\n",
+	       regs, regs->trap, print_tainted(), init_utsname()->release,
+	       VZVERSION);
 	printk("MSR: "REG" ", regs->msr);
 	printbits(regs->msr, msr_bits);
 	printk("  CR: %08lX  XER: %08lX\n", regs->ccr, regs->xer);
@@ -441,7 +444,7 @@ void show_regs(struct pt_regs * regs)
 	       current, current->pid, current->comm, task_thread_info(current));
 
 #ifdef CONFIG_SMP
-	printk(" CPU: %d", smp_processor_id());
+	printk(" CPU: %d, VCPU: %d:%d", smp_processor_id(), task_vsched_id(current), task_cpu(current));
 #endif /* CONFIG_SMP */
 
 	for (i = 0;  i < 32;  i++) {
@@ -828,12 +831,12 @@ int validate_sp(unsigned long sp, struct
 		return 1;
 
 #ifdef CONFIG_IRQSTACKS
-	stack_page = (unsigned long) hardirq_ctx[task_cpu(p)];
+	stack_page = (unsigned long) hardirq_ctx[task_pcpu(p)];
 	if (sp >= stack_page + sizeof(struct thread_struct)
 	    && sp <= stack_page + THREAD_SIZE - nbytes)
 		return 1;
 
-	stack_page = (unsigned long) softirq_ctx[task_cpu(p)];
+	stack_page = (unsigned long) softirq_ctx[task_pcpu(p)];
 	if (sp >= stack_page + sizeof(struct thread_struct)
 	    && sp <= stack_page + THREAD_SIZE - nbytes)
 		return 1;
@@ -944,6 +947,20 @@ void dump_stack(void)
 }
 EXPORT_SYMBOL(dump_stack);
 
+long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+	extern long ppc_kernel_thread(int (*fn)(void *), void *arg,
+			unsigned long flags);
+
+	if (!ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside container\n");
+		dump_stack();
+		return -EPERM;
+	}
+
+	return ppc_kernel_thread(fn, arg, flags);
+}
+
 #ifdef CONFIG_PPC64
 void ppc64_runlatch_on(void)
 {
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/setup_64.c kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/setup_64.c
--- kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/setup_64.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/setup_64.c	2017-01-13 08:40:15.000000000 -0500
@@ -425,7 +425,7 @@ void __init setup_system(void)
 	smp_release_cpus();
 #endif
 
-	printk("Starting Linux PPC64 %s\n", system_utsname.version);
+	printk("Starting Linux PPC64 %s\n", init_utsname()->version);
 
 	printk("-----------------------------------------------------\n");
 	printk("ppc64_pft_size                = 0x%lx\n", ppc64_pft_size);
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/syscalls.c kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/syscalls.c
--- kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/syscalls.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/syscalls.c	2017-01-13 08:40:15.000000000 -0500
@@ -230,7 +230,7 @@ long ppc_newuname(struct new_utsname __u
 	int err = 0;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name, &system_utsname, sizeof(*name)))
+	if (copy_to_user(name, utsname(), sizeof(*name)))
 		err = -EFAULT;
 	up_read(&uts_sem);
 	if (!err)
@@ -243,7 +243,7 @@ int sys_uname(struct old_utsname __user 
 	int err = 0;
 	
 	down_read(&uts_sem);
-	if (copy_to_user(name, &system_utsname, sizeof(*name)))
+	if (copy_to_user(name, utsname(), sizeof(*name)))
 		err = -EFAULT;
 	up_read(&uts_sem);
 	if (!err)
@@ -259,19 +259,19 @@ int sys_olduname(struct oldold_utsname _
 		return -EFAULT;
   
 	down_read(&uts_sem);
-	error = __copy_to_user(&name->sysname, &system_utsname.sysname,
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
 			       __OLD_UTS_LEN);
 	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename, &system_utsname.nodename,
+	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release, &system_utsname.release,
+	error |= __copy_to_user(&name->release, &utsname()->release,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->release + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version, &system_utsname.version,
+	error |= __copy_to_user(&name->version, &utsname()->version,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->version + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine, &system_utsname.machine,
+	error |= __copy_to_user(&name->machine, &utsname()->machine,
 				__OLD_UTS_LEN);
 	error |= override_machine(name->machine);
 	up_read(&uts_sem);
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/systbl.S kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/systbl.S
--- kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/systbl.S	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/systbl.S	2017-01-13 08:40:28.000000000 -0500
@@ -41,5 +41,8 @@
 #define sys_old_getrlimit sys_ni_syscall
 #endif
 
+#define SYS_SKIP(from, to)	.rept (to - from)
+#define SYS_SKIP_END()		.endr
+
 _GLOBAL(sys_call_table)
 #include <asm/systbl.h>
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/vdso.c kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/vdso.c
--- kernel-2.6.18-417.el5.orig/arch/powerpc/kernel/vdso.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/kernel/vdso.c	2017-01-13 08:40:26.000000000 -0500
@@ -217,7 +217,7 @@ static struct vm_operations_struct vdso_
  * This is called from binfmt_elf, we create the special vma for the
  * vDSO and insert it into the mm struct tree
  */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp,  unsigned long map_address)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/mm/fault.c kernel-2.6.18-417.el5-028stab121/arch/powerpc/mm/fault.c
--- kernel-2.6.18-417.el5.orig/arch/powerpc/mm/fault.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/mm/fault.c	2017-01-13 08:40:17.000000000 -0500
@@ -340,7 +340,6 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
- survive:
 	switch (handle_mm_fault(mm, vma, address, is_write)) {
 
 	case VM_FAULT_MINOR:
@@ -384,14 +383,12 @@ bad_area_nosemaphore:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		/*
+		 * 0-order allocation always success if something really
+		 * fatal not happen: beancounter overdraft or OOM. Den
+		 */
+		force_sig(SIGKILL, current);
 	return SIGKILL;
 
 do_sigbus:
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/mm/init_64.c kernel-2.6.18-417.el5-028stab121/arch/powerpc/mm/init_64.c
--- kernel-2.6.18-417.el5.orig/arch/powerpc/mm/init_64.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/mm/init_64.c	2017-01-13 08:40:16.000000000 -0500
@@ -184,7 +184,8 @@ void pgtable_cache_init(void)
 		pgtable_cache[i] = kmem_cache_create(name,
 						     size, size,
 						     SLAB_HWCACHE_ALIGN |
-						     SLAB_MUST_HWCACHE_ALIGN,
+						     SLAB_MUST_HWCACHE_ALIGN |
+						     SLAB_UBC | SLAB_NO_CHARGE,
 						     zero_ctor,
 						     NULL);
 		if (! pgtable_cache[i])
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/mm/mem.c kernel-2.6.18-417.el5-028stab121/arch/powerpc/mm/mem.c
--- kernel-2.6.18-417.el5.orig/arch/powerpc/mm/mem.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/mm/mem.c	2017-01-13 08:40:19.000000000 -0500
@@ -227,6 +227,7 @@ void show_mem(void)
 	printk("%ld pages shared\n", shared);
 	printk("%ld pages swap cached\n", cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 /*
  * Initialize the bootmem system and give it all the memory we
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/mm/pgtable_32.c kernel-2.6.18-417.el5-028stab121/arch/powerpc/mm/pgtable_32.c
--- kernel-2.6.18-417.el5.orig/arch/powerpc/mm/pgtable_32.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/mm/pgtable_32.c	2017-01-13 08:40:16.000000000 -0500
@@ -84,7 +84,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *ret;
 
-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
+			__GFP_ZERO, PGDIR_ORDER);
 	return ret;
 }
 
@@ -118,6 +119,7 @@ struct page *pte_alloc_one(struct mm_str
 #else
 	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
 #endif
+	flags |= (__GFP_UBC | __GFP_SOFT_UBC);
 
 	ptepage = alloc_pages(flags, 0);
 	if (ptepage)
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/platforms/cell/spu_callbacks.c kernel-2.6.18-417.el5-028stab121/arch/powerpc/platforms/cell/spu_callbacks.c
--- kernel-2.6.18-417.el5.orig/arch/powerpc/platforms/cell/spu_callbacks.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/platforms/cell/spu_callbacks.c	2017-01-13 08:40:28.000000000 -0500
@@ -46,6 +46,9 @@ static void *spu_syscall_table[] = {
 #define PPC_SYS_SPU(func)	ppc_##func,
 #define SYSX_SPU(f, f3264, f32)	f,
 
+#define SYS_SKIP(from, to) [from ... to] =
+#define SYS_SKIP_END()
+
 #include <asm/systbl.h>
 };
 
diff -upr kernel-2.6.18-417.el5.orig/arch/powerpc/platforms/pseries/setup.c kernel-2.6.18-417.el5-028stab121/arch/powerpc/platforms/pseries/setup.c
--- kernel-2.6.18-417.el5.orig/arch/powerpc/platforms/pseries/setup.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/powerpc/platforms/pseries/setup.c	2017-01-13 08:40:15.000000000 -0500
@@ -369,7 +369,7 @@ static int __init pSeries_init_panel(voi
 {
 	/* Manually leave the kernel version on the panel. */
 	ppc_md.progress("Linux ppc64\n", 0);
-	ppc_md.progress(system_utsname.release, 0);
+	ppc_md.progress(init_utsname()->version, 0);
 
 	return 0;
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/ppc/Kconfig kernel-2.6.18-417.el5-028stab121/arch/ppc/Kconfig
--- kernel-2.6.18-417.el5.orig/arch/ppc/Kconfig	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ppc/Kconfig	2017-01-13 08:40:28.000000000 -0500
@@ -964,6 +964,7 @@ config NR_CPUS
 config HIGHMEM
 	bool "High memory support"
 
+source "kernel/Kconfig.fairsched"
 source kernel/Kconfig.hz
 source kernel/Kconfig.preempt
 source "mm/Kconfig"
@@ -1429,6 +1430,10 @@ source "arch/powerpc/oprofile/Kconfig"
 
 source "arch/ppc/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
+source "kernel/ub/Kconfig"
+
 source "crypto/Kconfig"
diff -upr kernel-2.6.18-417.el5.orig/arch/ppc/kernel/misc.S kernel-2.6.18-417.el5-028stab121/arch/ppc/kernel/misc.S
--- kernel-2.6.18-417.el5.orig/arch/ppc/kernel/misc.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ppc/kernel/misc.S	2017-01-13 08:40:19.000000000 -0500
@@ -929,7 +929,7 @@ _GLOBAL(_get_SP)
  * Create a kernel thread
  *   kernel_thread(fn, arg, flags)
  */
-_GLOBAL(kernel_thread)
+_GLOBAL(ppc_kernel_thread)
 	stwu	r1,-16(r1)
 	stw	r30,8(r1)
 	stw	r31,12(r1)
diff -upr kernel-2.6.18-417.el5.orig/arch/ppc/mm/fault.c kernel-2.6.18-417.el5-028stab121/arch/ppc/mm/fault.c
--- kernel-2.6.18-417.el5.orig/arch/ppc/mm/fault.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ppc/mm/fault.c	2017-01-13 08:40:17.000000000 -0500
@@ -248,7 +248,6 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
- survive:
         switch (handle_mm_fault(mm, vma, address, is_write)) {
         case VM_FAULT_MINOR:
                 current->min_flt++;
@@ -291,14 +290,12 @@ bad_area:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		/*
+		 * 0-order allocation always success if something really
+		 * fatal not happen: beancounter overdraft or OOM. Den
+		 */
+		force_sig(SIGKILL, current);
 	return SIGKILL;
 
 do_sigbus:
diff -upr kernel-2.6.18-417.el5.orig/arch/ppc/mm/init.c kernel-2.6.18-417.el5-028stab121/arch/ppc/mm/init.c
--- kernel-2.6.18-417.el5.orig/arch/ppc/mm/init.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ppc/mm/init.c	2017-01-13 08:40:19.000000000 -0500
@@ -131,6 +131,7 @@ void show_mem(void)
 	printk("%d pages shared\n",shared);
 	printk("%d pages swap cached\n",cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 /* Free up now-unused memory */
 static void free_sec(unsigned long start, unsigned long end, const char *name)
diff -upr kernel-2.6.18-417.el5.orig/arch/ppc/mm/pgtable.c kernel-2.6.18-417.el5-028stab121/arch/ppc/mm/pgtable.c
--- kernel-2.6.18-417.el5.orig/arch/ppc/mm/pgtable.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/ppc/mm/pgtable.c	2017-01-13 08:40:16.000000000 -0500
@@ -83,7 +83,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *ret;
 
-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
+			__GFP_ZERO, PGDIR_ORDER);
 	return ret;
 }
 
@@ -117,6 +118,7 @@ struct page *pte_alloc_one(struct mm_str
 #else
 	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
 #endif
+	flags |= (__GFP_UBC | __GFP_SOFT_UBC);
 
 	ptepage = alloc_pages(flags, 0);
 	if (ptepage)
diff -upr kernel-2.6.18-417.el5.orig/arch/s390/hypfs/inode.c kernel-2.6.18-417.el5-028stab121/arch/s390/hypfs/inode.c
--- kernel-2.6.18-417.el5.orig/arch/s390/hypfs/inode.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/s390/hypfs/inode.c	2017-01-13 08:40:40.000000000 -0500
@@ -144,12 +144,20 @@ static int hypfs_open(struct inode *inod
 	return 0;
 }
 
-static ssize_t hypfs_aio_read(struct kiocb *iocb, __user char *buf,
-			      size_t count, loff_t offset)
+static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
+			      unsigned long nr_segs, loff_t offset)
 {
 	char *data;
 	size_t len;
 	struct file *filp = iocb->ki_filp;
+	/* XXX: temporary */
+	char __user *buf = iov[0].iov_base;
+	size_t count = iov[0].iov_len;
+
+	if (nr_segs != 1) {
+		count = -EINVAL;
+		goto out;
+	}
 
 	data = filp->private_data;
 	len = strlen(data);
@@ -168,12 +176,13 @@ static ssize_t hypfs_aio_read(struct kio
 out:
 	return count;
 }
-static ssize_t hypfs_aio_write(struct kiocb *iocb, const char __user *buf,
-			       size_t count, loff_t pos)
+static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
+			      unsigned long nr_segs, loff_t offset)
 {
 	int rc;
 	struct super_block *sb;
 	struct hypfs_sb_info *fs_info;
+	size_t count = iov_length(iov, nr_segs);
 
 	sb = iocb->ki_filp->f_dentry->d_inode->i_sb;
 	fs_info = sb->s_fs_info;
diff -upr kernel-2.6.18-417.el5.orig/arch/s390/Kconfig kernel-2.6.18-417.el5-028stab121/arch/s390/Kconfig
--- kernel-2.6.18-417.el5.orig/arch/s390/Kconfig	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/s390/Kconfig	2017-01-13 08:40:28.000000000 -0500
@@ -106,7 +106,7 @@ config NR_CPUS
 
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
-	depends on SMP
+	depends on SMP && !SCHED_VCPU
 	select HOTPLUG
 	default n
 	help
@@ -529,8 +529,12 @@ endmenu
 
 source "arch/s390/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "kernel/ub/Kconfig"
diff -upr kernel-2.6.18-417.el5.orig/arch/s390/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/s390/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/s390/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/s390/kernel/init_task.c	2017-01-13 08:40:15.000000000 -0500
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -20,6 +21,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/s390/kernel/process.c kernel-2.6.18-417.el5-028stab121/arch/s390/kernel/process.c
--- kernel-2.6.18-417.el5.orig/arch/s390/kernel/process.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/s390/kernel/process.c	2017-01-13 08:40:40.000000000 -0500
@@ -170,9 +170,10 @@ void show_regs(struct pt_regs *regs)
 	       system_utsname.release,
 	       (int)strcspn(system_utsname.version, " "),
 	       system_utsname.version);
-        printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
-	       current->comm, current->pid, (void *) tsk,
-	       (void *) tsk->thread.ksp);
+        printk("Process %s (pid: %d, veid: %d, task: %p, ksp: %p)\n",
+	       current->comm, current->pid,
+	       VEID(VE_TASK_INFO(current)->owner_env),
+	       (void *) tsk, (void *) tsk->thread.ksp);
 
 	show_registers(regs);
 	/* Show stack backtrace if pt_regs is from kernel mode */
@@ -193,6 +194,13 @@ int kernel_thread(int (*fn)(void *), voi
 {
 	struct pt_regs regs;
 
+	if (!ve_is_super(get_exec_env())) {
+		/* Don't allow kernel_thread() inside VE */
+		printk("kernel_thread call inside container\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 	regs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT;
 	regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
diff -upr kernel-2.6.18-417.el5.orig/arch/s390/kernel/smp.c kernel-2.6.18-417.el5-028stab121/arch/s390/kernel/smp.c
--- kernel-2.6.18-417.el5.orig/arch/s390/kernel/smp.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/s390/kernel/smp.c	2017-01-13 08:40:19.000000000 -0500
@@ -534,6 +534,17 @@ int __devinit start_secondary(void *cpuv
 {
         /* Setup the cpu */
         cpu_init();
+
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+	/*
+	 * Cosmetic: sleep_time won't be changed afterwards for the idle
+	 * thread;  keep it 0 rather than -cycles.
+	 */
+	VE_TASK_INFO(idle)->sleep_time = 0;
+#endif
+
 	preempt_disable();
         /* init per CPU timer */
         init_cpu_timer();
@@ -831,6 +842,11 @@ void __init smp_prepare_cpus(unsigned in
 	for_each_possible_cpu(cpu)
 		if (cpu != smp_processor_id())
 			smp_create_idle(cpu);
+
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+#endif
 }
 
 void __devinit smp_prepare_boot_cpu(void)
diff -upr kernel-2.6.18-417.el5.orig/arch/s390/mm/fault.c kernel-2.6.18-417.el5-028stab121/arch/s390/mm/fault.c
--- kernel-2.6.18-417.el5.orig/arch/s390/mm/fault.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/s390/mm/fault.c	2017-01-13 08:40:15.000000000 -0500
@@ -96,17 +96,9 @@ void bust_spinlocks(int yes)
 	if (yes) {
 		oops_in_progress = 1;
 	} else {
-		int loglevel_save = console_loglevel;
 		console_unblank();
 		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk will give klogd
-		 * a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;
-		printk(" ");
-		console_loglevel = loglevel_save;
+		wake_up_klogd();
 	}
 }
 
diff -upr kernel-2.6.18-417.el5.orig/arch/s390/mm/init.c kernel-2.6.18-417.el5-028stab121/arch/s390/mm/init.c
--- kernel-2.6.18-417.el5.orig/arch/s390/mm/init.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/s390/mm/init.c	2017-01-13 08:40:19.000000000 -0500
@@ -74,6 +74,7 @@ void show_mem(void)
         printk("%d pages shared\n",shared);
         printk("%d pages swap cached\n",cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 extern unsigned long __initdata zholes_size[];
 /*
diff -upr kernel-2.6.18-417.el5.orig/arch/sh/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/sh/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/sh/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sh/kernel/init_task.c	2017-01-13 08:40:15.000000000 -0500
@@ -3,6 +3,7 @@
 #include <linux/sched.h>
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -12,6 +13,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/sh/kernel/kgdb_stub.c kernel-2.6.18-417.el5-028stab121/arch/sh/kernel/kgdb_stub.c
--- kernel-2.6.18-417.el5.orig/arch/sh/kernel/kgdb_stub.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sh/kernel/kgdb_stub.c	2017-01-13 08:40:19.000000000 -0500
@@ -412,7 +412,7 @@ static struct task_struct *get_thread(in
 	if (pid == PID_MAX) pid = 0;
 
 	/* First check via PID */
-	thread = find_task_by_pid(pid);
+	thread = find_task_by_pid_all(pid);
 
 	if (thread)
 		return thread;
diff -upr kernel-2.6.18-417.el5.orig/arch/sh/kernel/setup.c kernel-2.6.18-417.el5-028stab121/arch/sh/kernel/setup.c
--- kernel-2.6.18-417.el5.orig/arch/sh/kernel/setup.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sh/kernel/setup.c	2017-01-13 08:40:15.000000000 -0500
@@ -483,7 +483,7 @@ static int show_cpuinfo(struct seq_file 
 		seq_printf(m, "machine\t\t: %s\n", get_system_type());
 
 	seq_printf(m, "processor\t: %d\n", cpu);
-	seq_printf(m, "cpu family\t: %s\n", system_utsname.machine);
+	seq_printf(m, "cpu family\t: %s\n", init_utsname()->machine);
 	seq_printf(m, "cpu type\t: %s\n", get_cpu_subtype());
 
 	show_cpuflags(m);
diff -upr kernel-2.6.18-417.el5.orig/arch/sh/kernel/sys_sh.c kernel-2.6.18-417.el5-028stab121/arch/sh/kernel/sys_sh.c
--- kernel-2.6.18-417.el5.orig/arch/sh/kernel/sys_sh.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sh/kernel/sys_sh.c	2017-01-13 08:40:15.000000000 -0500
@@ -267,7 +267,7 @@ asmlinkage int sys_uname(struct old_utsn
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/sh64/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/sh64/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/sh64/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sh64/kernel/init_task.c	2017-01-13 08:40:15.000000000 -0500
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -23,6 +24,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 struct pt_regs fake_swapper_regs;
 
diff -upr kernel-2.6.18-417.el5.orig/arch/sh64/kernel/process.c kernel-2.6.18-417.el5-028stab121/arch/sh64/kernel/process.c
--- kernel-2.6.18-417.el5.orig/arch/sh64/kernel/process.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sh64/kernel/process.c	2017-01-13 08:40:19.000000000 -0500
@@ -908,7 +908,7 @@ asids_proc_info(char *buf, char **start,
 	int len=0;
 	struct task_struct *p;
 	read_lock(&tasklist_lock);
-	for_each_process(p) {
+	for_each_process_ve(p) {
 		int pid = p->pid;
 		struct mm_struct *mm;
 		if (!pid) continue;
diff -upr kernel-2.6.18-417.el5.orig/arch/sh64/kernel/sys_sh64.c kernel-2.6.18-417.el5-028stab121/arch/sh64/kernel/sys_sh64.c
--- kernel-2.6.18-417.el5.orig/arch/sh64/kernel/sys_sh64.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sh64/kernel/sys_sh64.c	2017-01-13 08:40:15.000000000 -0500
@@ -279,7 +279,7 @@ asmlinkage int sys_uname(struct old_utsn
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/sparc/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/sparc/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc/kernel/init_task.c	2017-01-13 08:40:15.000000000 -0500
@@ -3,6 +3,7 @@
 #include <linux/sched.h>
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -12,6 +13,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 struct task_struct init_task = INIT_TASK(init_task);
 
 EXPORT_SYMBOL(init_mm);
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc/kernel/sys_sparc.c kernel-2.6.18-417.el5-028stab121/arch/sparc/kernel/sys_sparc.c
--- kernel-2.6.18-417.el5.orig/arch/sparc/kernel/sys_sparc.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc/kernel/sys_sparc.c	2017-01-13 08:40:15.000000000 -0500
@@ -475,13 +475,13 @@ asmlinkage int sys_getdomainname(char __
 
  	down_read(&uts_sem);
  	
-	nlen = strlen(system_utsname.domainname) + 1;
+	nlen = strlen(init_utsname()->domainname) + 1;
 	err = -EINVAL;
 	if (nlen > len)
 		goto out;
 
 	err = -EFAULT;
-	if (!copy_to_user(name, system_utsname.domainname, nlen))
+	if (!copy_to_user(name, init_utsname()->domainname, nlen))
 		err = 0;
 
 out:
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc/kernel/sys_sunos.c kernel-2.6.18-417.el5-028stab121/arch/sparc/kernel/sys_sunos.c
--- kernel-2.6.18-417.el5.orig/arch/sparc/kernel/sys_sunos.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc/kernel/sys_sunos.c	2017-01-13 08:40:15.000000000 -0500
@@ -491,13 +491,18 @@ asmlinkage int sunos_uname(struct sunos_
 {
 	int ret;
 	down_read(&uts_sem);
-	ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0], sizeof(name->sname) - 1);
+	ret = copy_to_user(&name->sname[0], &utsname()->sysname[0],
+			   sizeof(name->sname) - 1);
 	if (!ret) {
-		ret |= __copy_to_user(&name->nname[0], &system_utsname.nodename[0], sizeof(name->nname) - 1);
+		ret |= __copy_to_user(&name->nname[0], &utsname()->nodename[0],
+				      sizeof(name->nname) - 1);
 		ret |= __put_user('\0', &name->nname[8]);
-		ret |= __copy_to_user(&name->rel[0], &system_utsname.release[0], sizeof(name->rel) - 1);
-		ret |= __copy_to_user(&name->ver[0], &system_utsname.version[0], sizeof(name->ver) - 1);
-		ret |= __copy_to_user(&name->mach[0], &system_utsname.machine[0], sizeof(name->mach) - 1);
+		ret |= __copy_to_user(&name->rel[0], &utsname()->release[0],
+				      sizeof(name->rel) - 1);
+		ret |= __copy_to_user(&name->ver[0], &utsname()->version[0],
+				      sizeof(name->ver) - 1);
+		ret |= __copy_to_user(&name->mach[0], &utsname()->machine[0],
+				      sizeof(name->mach) - 1);
 	}
 	up_read(&uts_sem);
 	return ret ? -EFAULT : 0;
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc/kernel/systbls.S kernel-2.6.18-417.el5-028stab121/arch/sparc/kernel/systbls.S
--- kernel-2.6.18-417.el5.orig/arch/sparc/kernel/systbls.S	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc/kernel/systbls.S	2017-01-13 08:40:40.000000000 -0500
@@ -78,7 +78,9 @@ sys_call_table:
 /*285*/	.long sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64
 /*290*/	.long sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
 /*295*/	.long sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
-/*300*/	.long sys_set_robust_list, sys_get_robust_list
+/*300*/	.long sys_set_robust_list, sys_get_robust_list, sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
+/*305*/	.long sys_ni_syscall, sys_ni_syscall, sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
+/*310*/	.long sys_utimensat
 
 #ifdef CONFIG_SUNOS_EMUL
 	/* Now the SunOS syscall table. */
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/Kconfig kernel-2.6.18-417.el5-028stab121/arch/sparc64/Kconfig
--- kernel-2.6.18-417.el5.orig/arch/sparc64/Kconfig	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/Kconfig	2017-01-13 08:40:20.000000000 -0500
@@ -138,6 +138,8 @@ config NR_CPUS
 	depends on SMP
 	default "32"
 
+source "kernel/Kconfig.fairsched"
+
 source "drivers/cpufreq/Kconfig"
 
 config US3_FREQ
@@ -431,8 +433,12 @@ endmenu
 
 source "arch/sparc64/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "kernel/ub/Kconfig"
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/init_task.c	2017-01-13 08:40:15.000000000 -0500
@@ -3,6 +3,7 @@
 #include <linux/sched.h>
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -13,6 +14,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/process.c kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/process.c
--- kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/process.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/process.c	2017-01-13 08:40:40.000000000 -0500
@@ -676,6 +676,13 @@ pid_t kernel_thread(int (*fn)(void *), v
 {
 	long retval;
 
+	/* Don't allow kernel_thread() inside VE */
+	if (!ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside container\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	/* If the parent runs before fn(arg) is called by the child,
 	 * the input registers of this function can be clobbered.
 	 * So we stash 'fn' and 'arg' into global registers which
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/prom.c kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/prom.c
--- kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/prom.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/prom.c	2017-01-13 08:40:16.000000000 -0500
@@ -793,7 +793,7 @@ static unsigned int schizo_irq_build(str
 	return virt_irq;
 }
 
-static void schizo_irq_trans_init(struct device_node *dp)
+static void __schizo_irq_trans_init(struct device_node *dp, int is_tomatillo)
 {
 	struct linux_prom64_registers *regs;
 	struct schizo_irq_data *irq_data;
@@ -807,11 +807,24 @@ static void schizo_irq_trans_init(struct
 	dp->irq_trans->data = irq_data;
 
 	irq_data->pbm_regs = regs[0].phys_addr;
-	irq_data->sync_reg = regs[3].phys_addr + 0x1a18UL;
+	if (is_tomatillo)
+		irq_data->sync_reg = regs[3].phys_addr + 0x1a18UL;
+	else
+		irq_data->sync_reg = 0UL;
 	irq_data->portid = of_getintprop_default(dp, "portid", 0);
 	irq_data->chip_version = of_getintprop_default(dp, "version#", 0);
 }
 
+static void schizo_irq_trans_init(struct device_node *dp)
+{
+	__schizo_irq_trans_init(dp, 0);
+}
+
+static void tomatillo_irq_trans_init(struct device_node *dp)
+{
+	__schizo_irq_trans_init(dp, 1);
+}
+
 static unsigned int pci_sun4v_irq_build(struct device_node *dp,
 					unsigned int devino,
 					void *_data)
@@ -1050,8 +1063,8 @@ static struct irq_trans pci_irq_trans_ta
 	{ "pci108e,8001", schizo_irq_trans_init },
 	{ "SUNW,schizo+", schizo_irq_trans_init },
 	{ "pci108e,8002", schizo_irq_trans_init },
-	{ "SUNW,tomatillo", schizo_irq_trans_init },
-	{ "pci108e,a801", schizo_irq_trans_init },
+	{ "SUNW,tomatillo", tomatillo_irq_trans_init },
+	{ "pci108e,a801", tomatillo_irq_trans_init },
 	{ "SUNW,sun4v-pci", pci_sun4v_irq_trans_init },
 };
 #endif
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/sparc64_ksyms.c kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/sparc64_ksyms.c
--- kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/sparc64_ksyms.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/sparc64_ksyms.c	2017-01-13 08:40:20.000000000 -0500
@@ -316,6 +316,7 @@ EXPORT_SYMBOL(copy_from_user_fixup);
 EXPORT_SYMBOL(copy_in_user_fixup);
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(mem_map_zero);
 
 /* Various address conversion macros use this. */
 EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/sys_sparc32.c kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/sys_sparc32.c
--- kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/sys_sparc32.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/sys_sparc32.c	2017-01-13 08:40:16.000000000 -0500
@@ -843,7 +843,7 @@ asmlinkage long sys32_utimes(char __user
 			return -EFAULT;
 	}
 
-	return do_utimes(AT_FDCWD, filename, (tvs ? &ktvs[0] : NULL));
+	return do_utimes(AT_FDCWD, filename, (tvs ? &ktvs[0] : NULL), 0);
 }
 
 /* These are here just in case some old sparc32 binary calls it. */
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/sys_sparc.c kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/sys_sparc.c
--- kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/sys_sparc.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/sys_sparc.c	2017-01-13 08:40:15.000000000 -0500
@@ -712,13 +712,13 @@ asmlinkage long sys_getdomainname(char _
 
  	down_read(&uts_sem);
  	
-	nlen = strlen(system_utsname.domainname) + 1;
+	nlen = strlen(utsname()->domainname) + 1;
 	err = -EINVAL;
 	if (nlen > len)
 		goto out;
 
 	err = -EFAULT;
-	if (!copy_to_user(name, system_utsname.domainname, nlen))
+	if (!copy_to_user(name, utsname()->domainname, nlen))
 		err = 0;
 
 out:
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/sys_sunos32.c kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/sys_sunos32.c
--- kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/sys_sunos32.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/sys_sunos32.c	2017-01-13 08:40:15.000000000 -0500
@@ -447,16 +447,16 @@ asmlinkage int sunos_uname(struct sunos_
 	int ret;
 
 	down_read(&uts_sem);
-	ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0],
+	ret = copy_to_user(&name->sname[0], &utsname()->sysname[0],
 			   sizeof(name->sname) - 1);
-	ret |= copy_to_user(&name->nname[0], &system_utsname.nodename[0],
+	ret |= copy_to_user(&name->nname[0], &utsname()->nodename[0],
 			    sizeof(name->nname) - 1);
 	ret |= put_user('\0', &name->nname[8]);
-	ret |= copy_to_user(&name->rel[0], &system_utsname.release[0],
+	ret |= copy_to_user(&name->rel[0], &utsname()->release[0],
 			    sizeof(name->rel) - 1);
-	ret |= copy_to_user(&name->ver[0], &system_utsname.version[0],
+	ret |= copy_to_user(&name->ver[0], &utsname()->version[0],
 			    sizeof(name->ver) - 1);
-	ret |= copy_to_user(&name->mach[0], &system_utsname.machine[0],
+	ret |= copy_to_user(&name->mach[0], &utsname()->machine[0],
 			    sizeof(name->mach) - 1);
 	up_read(&uts_sem);
 	return (ret ? -EFAULT : 0);
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/systbls.S kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/systbls.S
--- kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/systbls.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/systbls.S	2017-01-13 08:40:40.000000000 -0500
@@ -79,7 +79,26 @@ sys_call_table32:
 	.word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64
 /*290*/	.word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
 	.word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare
-/*300*/	.word compat_sys_set_robust_list, compat_sys_get_robust_list
+/*300*/	.word compat_sys_set_robust_list, compat_sys_get_robust_list, sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
+	.word sys_ni_syscall, sys_ni_syscall, sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
+/*310*/	.word compat_sys_utimensat
+	.rept 500-311
+	.word sys_nis_syscall
+	.endr
+	.word sys_fairsched_mknod	/* 500 */
+	.word sys_fairsched_rmnod
+	.word sys_fairsched_chwt
+	.word sys_fairsched_mvpr
+	.word sys_fairsched_rate
+	.word sys_nis_syscall		/* 505 */
+	.word sys_nis_syscall
+	.word sys_nis_syscall
+	.word sys_nis_syscall
+	.word sys_nis_syscall
+	.word sys_getluid		/* 510 */
+	.word sys_setluid
+	.word compat_sys_setublimit
+	.word compat_sys_ubstat
 
 #endif /* CONFIG_COMPAT */
 
@@ -149,7 +168,28 @@ sys_call_table:
 	.word sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64
 /*290*/	.word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
 	.word sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
-/*300*/	.word sys_set_robust_list, sys_get_robust_list
+/*300*/	.word sys_set_robust_list, sys_get_robust_list, sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
+	.word sys_ni_syscall, sys_ni_syscall, sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
+/*310*/	.word sys_utimensat
+
+	.rept 500-311
+	.word sys_nis_syscall
+	.endr
+	.word sys_fairsched_mknod	/* 500 */
+	.word sys_fairsched_rmnod
+	.word sys_fairsched_chwt
+	.word sys_fairsched_mvpr
+	.word sys_fairsched_rate
+	.word sys_nis_syscall		/* 505 */
+	.word sys_nis_syscall
+	.word sys_nis_syscall
+	.word sys_nis_syscall
+	.word sys_nis_syscall
+	.word sys_getluid		/* 510 */
+	.word sys_setluid
+	.word sys_setublimit
+	.word sys_ubstat
+
 
 #if defined(CONFIG_SUNOS_EMUL) || defined(CONFIG_SOLARIS_EMUL) || \
     defined(CONFIG_SOLARIS_EMUL_MODULE)
@@ -263,4 +303,7 @@ sunos_sys_table:
 	.word sunos_nosys, sunos_nosys, sunos_nosys
 	.word sunos_nosys, sunos_nosys, sunos_nosys
 	.word sunos_nosys, sunos_nosys, sunos_nosys
+	.rept 520-302
+	.word sunos_nosys
+	.endr
 #endif
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/traps.c kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/traps.c
--- kernel-2.6.18-417.el5.orig/arch/sparc64/kernel/traps.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/kernel/traps.c	2017-01-13 08:40:20.000000000 -0500
@@ -2216,6 +2216,10 @@ void die_if_kernel(char *str, struct pt_
 "                 \\__U_/\n");
 
 	printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
+	printk("VE:EXCVE %d:%d, CPU %d, VCPU %d:%d\n",
+		VEID(VE_TASK_INFO(current)->owner_env), VEID(get_exec_env()),
+		smp_processor_id(),
+		task_vsched_id(current), task_cpu(current));
 	notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV);
 	__asm__ __volatile__("flushw");
 	__show_regs(regs);
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/mm/init.c kernel-2.6.18-417.el5-028stab121/arch/sparc64/mm/init.c
--- kernel-2.6.18-417.el5.orig/arch/sparc64/mm/init.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/mm/init.c	2017-01-13 08:40:20.000000000 -0500
@@ -418,6 +418,7 @@ void show_mem(void)
 	printk("%ld pages of RAM\n", num_physpages);
 	printk("%d free pages\n", nr_free_pages());
 }
+EXPORT_SYMBOL(show_mem);
 
 void mmu_info(struct seq_file *m)
 {
diff -upr kernel-2.6.18-417.el5.orig/arch/sparc64/solaris/misc.c kernel-2.6.18-417.el5-028stab121/arch/sparc64/solaris/misc.c
--- kernel-2.6.18-417.el5.orig/arch/sparc64/solaris/misc.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/sparc64/solaris/misc.c	2017-01-13 08:40:15.000000000 -0500
@@ -249,7 +249,7 @@ asmlinkage int solaris_utssys(u32 buf, u
 		/* Let's cheat */
 		err  = set_utsfield(v->sysname, "SunOS", 1, 0);
 		down_read(&uts_sem);
-		err |= set_utsfield(v->nodename, system_utsname.nodename,
+		err |= set_utsfield(v->nodename, utsname()->nodename,
 				    1, 1);
 		up_read(&uts_sem);
 		err |= set_utsfield(v->release, "2.6", 0, 0);
@@ -273,7 +273,7 @@ asmlinkage int solaris_utsname(u32 buf)
 	/* Why should we not lie a bit? */
 	down_read(&uts_sem);
 	err  = set_utsfield(v->sysname, "SunOS", 0, 0);
-	err |= set_utsfield(v->nodename, system_utsname.nodename, 1, 1);
+	err |= set_utsfield(v->nodename, utsname()->nodename, 1, 1);
 	err |= set_utsfield(v->release, "5.6", 0, 0);
 	err |= set_utsfield(v->version, "Generic", 0, 0);
 	err |= set_utsfield(v->machine, machine(), 0, 0);
@@ -305,7 +305,7 @@ asmlinkage int solaris_sysinfo(int cmd, 
 	case SI_HOSTNAME:
 		r = buffer + 256;
 		down_read(&uts_sem);
-		for (p = system_utsname.nodename, q = buffer; 
+		for (p = utsname()->nodename, q = buffer;
 		     q < r && *p && *p != '.'; *q++ = *p++);
 		up_read(&uts_sem);
 		*q = 0;
diff -upr kernel-2.6.18-417.el5.orig/arch/um/drivers/mconsole_kern.c kernel-2.6.18-417.el5-028stab121/arch/um/drivers/mconsole_kern.c
--- kernel-2.6.18-417.el5.orig/arch/um/drivers/mconsole_kern.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/um/drivers/mconsole_kern.c	2017-01-13 08:40:19.000000000 -0500
@@ -106,9 +106,9 @@ void mconsole_version(struct mc_request 
 {
 	char version[256];
 
-	sprintf(version, "%s %s %s %s %s", system_utsname.sysname,
-		system_utsname.nodename, system_utsname.release,
-		system_utsname.version, system_utsname.machine);
+	sprintf(version, "%s %s %s %s %s", utsname()->sysname,
+		utsname()->nodename, utsname()->release,
+		utsname()->version, utsname()->machine);
 	mconsole_reply(req, version, 0, 0);
 }
 
@@ -734,7 +734,7 @@ static void do_stack_trace(struct mc_req
 
 	from = current;
 
-	to = find_task_by_pid(pid_requested);
+	to = find_task_by_pid_all(pid_requested);
 	if((to == NULL) || (pid_requested == 0)) {
 		mconsole_reply(req, "Couldn't find that pid", 1, 0);
 		return;
diff -upr kernel-2.6.18-417.el5.orig/arch/um/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/um/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/um/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/um/kernel/init_task.c	2017-01-13 08:40:15.000000000 -0500
@@ -8,6 +8,7 @@
 #include "linux/sched.h"
 #include "linux/init_task.h"
 #include "linux/mqueue.h"
+#include "linux/nsproxy.h"
 #include "asm/uaccess.h"
 #include "asm/pgtable.h"
 #include "user_util.h"
@@ -16,6 +17,7 @@
 
 static struct fs_struct init_fs = INIT_FS;
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 static struct files_struct init_files = INIT_FILES;
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
diff -upr kernel-2.6.18-417.el5.orig/arch/um/kernel/skas/process_kern.c kernel-2.6.18-417.el5-028stab121/arch/um/kernel/skas/process_kern.c
--- kernel-2.6.18-417.el5.orig/arch/um/kernel/skas/process_kern.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/um/kernel/skas/process_kern.c	2017-01-13 08:40:19.000000000 -0500
@@ -208,7 +208,7 @@ void kill_off_processes_skas(void)
 		int pid, me;
 
 		me = os_getpid();
-		for_each_process(p){
+		for_each_process_all(p){
 			if(p->mm == NULL)
 				continue;
 
diff -upr kernel-2.6.18-417.el5.orig/arch/um/kernel/syscall.c kernel-2.6.18-417.el5-028stab121/arch/um/kernel/syscall.c
--- kernel-2.6.18-417.el5.orig/arch/um/kernel/syscall.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/um/kernel/syscall.c	2017-01-13 08:40:15.000000000 -0500
@@ -110,7 +110,7 @@ long sys_uname(struct old_utsname __user
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err = copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
@@ -126,21 +126,21 @@ long sys_olduname(struct oldold_utsname 
 
   	down_read(&uts_sem);
 
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
 			       __OLD_UTS_LEN);
-	error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,
+	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
 				__OLD_UTS_LEN);
-	error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release,&system_utsname.release,
+	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->release, &utsname()->release,
 				__OLD_UTS_LEN);
-	error |= __put_user(0,name->release+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version,&system_utsname.version,
+	error |= __put_user(0, name->release + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->version, &utsname()->version,
 				__OLD_UTS_LEN);
-	error |= __put_user(0,name->version+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine,&system_utsname.machine,
+	error |= __put_user(0, name->version + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->machine, &utsname()->machine,
 				__OLD_UTS_LEN);
-	error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
 
 	up_read(&uts_sem);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/um/kernel/tt/process_kern.c kernel-2.6.18-417.el5-028stab121/arch/um/kernel/tt/process_kern.c
--- kernel-2.6.18-417.el5.orig/arch/um/kernel/tt/process_kern.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/um/kernel/tt/process_kern.c	2017-01-13 08:40:19.000000000 -0500
@@ -307,7 +307,7 @@ void kill_off_processes_tt(void)
 	int me;
 
 	me = os_getpid();
-        for_each_process(p){
+        for_each_process_all(p){
 		if(p->thread.mode.tt.extern_pid != me) 
 			os_kill_process(p->thread.mode.tt.extern_pid, 0);
 	}
@@ -450,7 +450,7 @@ int is_valid_pid(int pid)
 	struct task_struct *task;
 
         read_lock(&tasklist_lock);
-        for_each_process(task){
+        for_each_process_all(task){
                 if(task->thread.mode.tt.extern_pid == pid){
 			read_unlock(&tasklist_lock);
 			return(1);
diff -upr kernel-2.6.18-417.el5.orig/arch/um/kernel/um_arch.c kernel-2.6.18-417.el5-028stab121/arch/um/kernel/um_arch.c
--- kernel-2.6.18-417.el5.orig/arch/um/kernel/um_arch.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/um/kernel/um_arch.c	2017-01-13 08:40:15.000000000 -0500
@@ -166,7 +166,7 @@ static char *usage_string = 
 
 static int __init uml_version_setup(char *line, int *add)
 {
-	printf("%s\n", system_utsname.release);
+	printf("%s\n", init_utsname()->release);
 	exit(0);
 
 	return 0;
@@ -277,7 +277,7 @@ static int __init Usage(char *line, int 
 {
  	const char **p;
 
-	printf(usage_string, system_utsname.release);
+	printf(usage_string, init_utsname()->release);
  	p = &__uml_help_start;
  	while (p < &__uml_help_end) {
  		printf("%s", *p);
@@ -402,7 +402,7 @@ int linux_main(int argc, char **argv)
 	/* Reserve up to 4M after the current brk */
 	uml_reserved = ROUND_4M(brk_start) + (1 << 22);
 
-	setup_machinename(system_utsname.machine);
+	setup_machinename(init_utsname()->machine);
 
 #ifdef CONFIG_CMDLINE_ON_HOST
 	argv1_begin = argv[1];
diff -upr kernel-2.6.18-417.el5.orig/arch/um/sys-x86_64/syscalls.c kernel-2.6.18-417.el5-028stab121/arch/um/sys-x86_64/syscalls.c
--- kernel-2.6.18-417.el5.orig/arch/um/sys-x86_64/syscalls.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/um/sys-x86_64/syscalls.c	2017-01-13 08:40:15.000000000 -0500
@@ -21,7 +21,7 @@ asmlinkage long sys_uname64(struct new_u
 {
 	int err;
 	down_read(&uts_sem);
-	err = copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32)
 		err |= copy_to_user(&name->machine, "i686", 5);
diff -upr kernel-2.6.18-417.el5.orig/arch/um/sys-x86_64/sysrq.c kernel-2.6.18-417.el5-028stab121/arch/um/sys-x86_64/sysrq.c
--- kernel-2.6.18-417.el5.orig/arch/um/sys-x86_64/sysrq.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/um/sys-x86_64/sysrq.c	2017-01-13 08:40:15.000000000 -0500
@@ -16,7 +16,7 @@ void __show_regs(struct pt_regs * regs)
 	printk("\n");
 	print_modules();
 	printk("Pid: %d, comm: %.20s %s %s\n",
-	       current->pid, current->comm, print_tainted(), system_utsname.release);
+	       current->pid, current->comm, print_tainted(), init_utsname()->release);
 	printk("RIP: %04lx:[<%016lx>] ", PT_REGS_CS(regs) & 0xffff,
 	       PT_REGS_RIP(regs));
 	printk("\nRSP: %016lx  EFLAGS: %08lx\n", PT_REGS_RSP(regs),
diff -upr kernel-2.6.18-417.el5.orig/arch/v850/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/v850/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/v850/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/v850/kernel/init_task.c	2017-01-13 08:40:15.000000000 -0500
@@ -16,6 +16,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -25,6 +26,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM (init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/boot/compressed/head.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/boot/compressed/head.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/boot/compressed/head.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/boot/compressed/head.S	2017-01-13 08:40:24.000000000 -0500
@@ -37,7 +37,7 @@
 startup_32:
 	cld
 	cli
-	movl	$(__KERNEL_DS), %eax
+	movl	$(__BOOT_DS), %eax
 	movl	%eax, %ds
 	movl	%eax, %es
 	movl	%eax, %ss
@@ -135,11 +135,11 @@ startup_32:
 	 * When the jump is performend we will be in long mode but
 	 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
 	 * (and in turn EFER.LMA = 1).	To jump into 64bit mode we use
-	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+	 * the new gdt/idt that has __BOOT_CS with CS.L = 1.
 	 * We place all of the values on our mini stack so lret can
 	 * used to perform that far jump.
 	 */
-	pushl	$__KERNEL_CS
+	pushl	$__BOOT_CS
 	leal	startup_64(%ebp), %eax
 	pushl	%eax
 
@@ -261,8 +261,8 @@ gdt:
 	.long	gdt
 	.word	0
 	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
+	.quad	0x00af9a000000ffff	/* __BOOT_CS */
+	.quad	0x00cf92000000ffff	/* __BOOT_DS */
 	.quad	0x0080890000000000	/* TS descriptor */
 	.quad   0x0000000000000000	/* TS continued */
 gdt_end:
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/boot/setup.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/boot/setup.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/boot/setup.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/boot/setup.S	2017-01-13 08:40:24.000000000 -0500
@@ -728,7 +728,7 @@ flush_instr:
 	subw	$DELTA_INITSEG, %si
 	shll	$4, %esi			# Convert to 32-bit pointer
 # NOTE: For high loaded big kernels we need a
-#	jmpi    0x100000,__KERNEL_CS
+#	jmpi    0x100000,__BOOT_CS
 #
 #	but we yet haven't reloaded the CS register, so the default size 
 #	of the target offset still is 16 bit.
@@ -739,7 +739,7 @@ flush_instr:
 	.byte 0x66, 0xea			# prefix + jmpi-opcode
 code32:	.long	0x1000				# will be set to 0x100000
 						# for big kernels
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 # Here's a bunch of information about your current kernel..
 kernel_version:	.ascii	UTS_RELEASE
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/ia32_aout.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/ia32_aout.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/ia32_aout.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/ia32_aout.c	2017-01-13 08:40:19.000000000 -0500
@@ -347,14 +347,14 @@ static int load_aout_binary(struct linux
 		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
 		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
 		{
-			printk(KERN_NOTICE "executable not page aligned\n");
+			ve_printk(VE_LOG, KERN_NOTICE "executable not page aligned\n");
 			error_time2 = jiffies;
 		}
 
 		if ((fd_offset & ~PAGE_MASK) != 0 &&
 		    (jiffies-error_time) > 5*HZ)
 		{
-			printk(KERN_WARNING 
+			ve_printk(VE_LOG, KERN_WARNING 
 			       "fd_offset is not page aligned. Please convert program: %s\n",
 			       bprm->file->f_dentry->d_name.name);
 			error_time = jiffies;
@@ -461,7 +461,7 @@ static int load_aout_library(struct file
 		static unsigned long error_time;
 		if ((jiffies-error_time) > 5*HZ)
 		{
-			printk(KERN_WARNING 
+			ve_printk(VE_LOG, KERN_WARNING 
 			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
 			       file->f_dentry->d_name.name);
 			error_time = jiffies;
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/ia32_binfmt.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/ia32_binfmt.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/ia32_binfmt.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/ia32_binfmt.c	2017-01-13 08:40:27.000000000 -0500
@@ -31,7 +31,7 @@
 #define AT_SYSINFO 32
 #define AT_SYSINFO_EHDR		33
 
-int sysctl_vsyscall32 = 1;
+int sysctl_vsyscall32 = 0;
 
 #undef ARCH_DLINFO
 #define ARCH_DLINFO do {  \
@@ -249,6 +249,9 @@ do {							\
 
 #include <linux/module.h>
 
+extern struct linux_binfmt elf32_format;
+EXPORT_SYMBOL(elf32_format);
+
 MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries."); 
 MODULE_AUTHOR("Eric Youngdale, Andi Kleen");
 
@@ -261,7 +264,7 @@ static void elf32_init(struct pt_regs *)
 
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 #define arch_setup_additional_pages syscall32_setup_pages
-extern int syscall32_setup_pages(struct linux_binprm *, int uses_interp);
+extern int syscall32_setup_pages(struct linux_binprm *, int uses_interp, unsigned long map_address);
 
 #include "../../../fs/binfmt_elf.c" 
 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/ia32entry.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/ia32entry.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/ia32entry.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/ia32entry.S	2017-01-13 08:40:40.000000000 -0500
@@ -102,7 +102,8 @@ ENTRY(ia32_sysenter_target)
 	pushfq
 	CFI_ADJUST_CFA_OFFSET 8
 	/*CFI_REL_OFFSET rflags,0*/
-	movl	$VSYSCALL32_SYSEXIT, %r10d
+	GET_THREAD_INFO(%r10)
+	movl	threadinfo_sysenter_return(%r10), %r10d
 	CFI_REGISTER rip,r10
 	pushq	$__USER32_CS
 	CFI_ADJUST_CFA_OFFSET 8
@@ -151,7 +152,7 @@ sysenter_do_call:	
 	popq	%rcx				/* User %esp */
 	CFI_ADJUST_CFA_OFFSET -8
 	CFI_REGISTER rsp,rcx
-	movl	$VSYSCALL32_SYSEXIT,%edx	/* User %eip */
+	movl	threadinfo_sysenter_return(%r10),%edx /* User %eip */
 	CFI_REGISTER rip,rdx
 	TRACE_IRQS_ON
 	swapgs
@@ -518,7 +519,7 @@ ia32_sys_call_table:
 	.quad stub32_iopl		/* 110 */
 	.quad sys_vhangup
 	.quad quiet_ni_syscall	/* old "idle" system call */
-	.quad sys32_vm86_warning	/* vm86old */ 
+	.quad quiet_ni_syscall	/* vm86old */ 
 	.quad compat_sys_wait4
 	.quad sys_swapoff		/* 115 */
 	.quad sys32_sysinfo
@@ -536,7 +537,7 @@ ia32_sys_call_table:
 	.quad sys_init_module
 	.quad sys_delete_module
 	.quad quiet_ni_syscall		/* 130  get_kernel_syms */
-	.quad sys_quotactl
+	.quad sys32_quotactl
 	.quad sys_getpgid
 	.quad sys_fchdir
 	.quad quiet_ni_syscall	/* bdflush */
@@ -571,7 +572,7 @@ ia32_sys_call_table:
 	.quad sys_mremap
 	.quad sys_setresuid16
 	.quad sys_getresuid16	/* 165 */
-	.quad sys32_vm86_warning	/* vm86 */ 
+	.quad quiet_ni_syscall	/* vm86 */ 
 	.quad quiet_ni_syscall	/* query_module */
 	.quad sys_poll
 	.quad compat_sys_nfsservctl
@@ -732,23 +733,44 @@ ia32_sys_call_table:
 	.quad compat_sys_vmsplice
 	.quad compat_sys_move_pages
 	.quad sys_getcpu
-	.quad quiet_ni_syscall		/* sys_epoll_pwait */
-	.quad quiet_ni_syscall		/* 320 */ /* compat_sys_utimensat */
-	.quad quiet_ni_syscall		/* compat_sys_signalfd */
+	.quad compat_sys_epoll_pwait
+	.quad compat_sys_utimensat	/* 320 */
+	.quad compat_sys_signalfd
 	.quad quiet_ni_syscall		/* sys_timerfd_create */
 	.quad sys_eventfd		/* sys_eventd */
 	.quad sys32_fallocate
 	.quad quiet_ni_syscall		/* 325 */
 	.quad quiet_ni_syscall
+	.quad compat_sys_signalfd4
 	.quad quiet_ni_syscall
-	.quad quiet_ni_syscall
-	.quad quiet_ni_syscall
-	.quad quiet_ni_syscall		/* 330 */
-	.quad quiet_ni_syscall
-	.quad quiet_ni_syscall
-	.quad quiet_ni_syscall
-	.quad quiet_ni_syscall
+	.quad sys_epoll_create1
+	.quad sys_dup3			/* 330 */
+	.quad sys_pipe2
+	.quad sys_inotify_init1
+	.quad compat_sys_preadv
+	.quad compat_sys_pwritev
 	.quad quiet_ni_syscall		/* 335 */
 	.quad quiet_ni_syscall
 	.quad compat_sys_recvmmsg
+	.rept 500-(.-ia32_sys_call_table)/8
+		.quad sys_ni_syscall
+	.endr
+	.quad sys_fairsched_mknod	/* 500 */
+	.quad sys_fairsched_rmnod
+	.quad sys_fairsched_chwt
+	.quad sys_fairsched_mvpr
+	.quad sys_fairsched_rate
+	.quad sys_fairsched_vcpus	/* 505 */
+	.quad sys_fairsched_cpumask
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_getluid		/* 510 */
+	.quad sys_setluid
+	.quad compat_sys_setublimit
+	.quad compat_sys_ubstat
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall		/* 515 */
+	.quad sys_lchmod
+	.quad compat_sys_lutime
 ia32_syscall_end:		
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/ia32entry-xen.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/ia32entry-xen.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/ia32entry-xen.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/ia32entry-xen.S	2017-01-13 08:40:40.000000000 -0500
@@ -120,7 +120,8 @@ ENTRY(ia32_sysenter_target)
 	pushfq
 	CFI_ADJUST_CFA_OFFSET 8
 	/*CFI_REL_OFFSET rflags,0*/
-	movl	$VSYSCALL32_SYSEXIT, %r10d
+	GET_THREAD_INFO(%r10)
+	movl	threadinfo_sysenter_return(%r10), %r10d
 	CFI_REGISTER rip,r10
 	pushq	$__USER32_CS
 	CFI_ADJUST_CFA_OFFSET 8
@@ -170,7 +171,7 @@ sysenter_do_call:	
 	popq	%rcx				/* User %esp */
 	CFI_ADJUST_CFA_OFFSET -8
 	CFI_REGISTER rsp,rcx
-	movl	$VSYSCALL32_SYSEXIT,%edx	/* User %eip */
+	movl	threadinfo_sysenter_return(%r10),%edx /* User %eip */
 	CFI_REGISTER rip,rdx
 	TRACE_IRQS_ON
 	__swapgs
@@ -543,7 +544,7 @@ ia32_sys_call_table:
 	.quad stub32_iopl		/* 110 */
 	.quad sys_vhangup
 	.quad quiet_ni_syscall	/* old "idle" system call */
-	.quad sys32_vm86_warning	/* vm86old */ 
+	.quad quiet_ni_syscall	/* vm86old */ 
 	.quad compat_sys_wait4
 	.quad sys_swapoff		/* 115 */
 	.quad sys32_sysinfo
@@ -561,7 +562,7 @@ ia32_sys_call_table:
 	.quad sys_init_module
 	.quad sys_delete_module
 	.quad quiet_ni_syscall		/* 130  get_kernel_syms */
-	.quad sys_quotactl
+	.quad sys32_quotactl 
 	.quad sys_getpgid
 	.quad sys_fchdir
 	.quad quiet_ni_syscall	/* bdflush */
@@ -596,7 +597,7 @@ ia32_sys_call_table:
 	.quad sys_mremap
 	.quad sys_setresuid16
 	.quad sys_getresuid16	/* 165 */
-	.quad sys32_vm86_warning	/* vm86 */ 
+	.quad quiet_ni_syscall	/* vm86 */ 
 	.quad quiet_ni_syscall	/* query_module */
 	.quad sys_poll
 	.quad compat_sys_nfsservctl
@@ -757,7 +758,7 @@ ia32_sys_call_table:
 	.quad compat_sys_vmsplice
 	.quad compat_sys_move_pages
 	.quad sys_getcpu
-	.quad quiet_ni_syscall		/* sys_epoll_pwait */
+	.quad compat_sys_epoll_pwait
 	.quad quiet_ni_syscall		/* 320 */ /* compat_sys_utimensat */
 	.quad quiet_ni_syscall		/* compat_sys_signalfd */
 	.quad quiet_ni_syscall		/* sys_timerfd_create */
@@ -767,10 +768,10 @@ ia32_sys_call_table:
 	.quad quiet_ni_syscall
 	.quad quiet_ni_syscall
 	.quad quiet_ni_syscall
-	.quad quiet_ni_syscall
-	.quad quiet_ni_syscall		/* 330 */
-	.quad quiet_ni_syscall
-	.quad quiet_ni_syscall
+	.quad sys_epoll_create1
+	.quad sys_dup3			/* 330 */
+	.quad sys_pipe2
+	.quad sys_inotify_init1
 	.quad quiet_ni_syscall
 	.quad quiet_ni_syscall
 	.quad quiet_ni_syscall		/* 335 */
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/syscall32.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/syscall32.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/syscall32.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/syscall32.c	2017-01-13 08:40:41.000000000 -0500
@@ -10,6 +10,8 @@
 #include <linux/init.h>
 #include <linux/stringify.h>
 #include <linux/security.h>
+#include <linux/module.h>
+#include <linux/mman.h>
 #include <asm/proto.h>
 #include <asm/tlbflush.h>
 #include <asm/ia32_unistd.h>
@@ -19,17 +21,23 @@ extern unsigned char syscall32_sysenter[
 extern int sysctl_vsyscall32;
 
 static struct page *syscall32_pages[1];
+void *syscall32_page;
+EXPORT_SYMBOL_GPL(syscall32_page);
 static int use_sysenter = -1;
 
 struct linux_binprm;
 
 /* Setup a VMA at program startup for the vsyscall page */
-int syscall32_setup_pages(struct linux_binprm *bprm, int uses_interp)
+int syscall32_setup_pages(struct linux_binprm *bprm, int uses_interp,
+				unsigned long map_address)
 {
 	struct mm_struct *mm = current->mm;
+	unsigned long addr = map_address ? : __VSYSCALL32_BASE;
 	int ret;
 
-	down_write(&mm->mmap_sem);
+	if (sysctl_vsyscall32 == 0 && map_address == 0)
+		return 0;
+
 	/*
 	 * MAYWRITE to allow gdb to COW and set breakpoints
 	 *
@@ -38,18 +46,31 @@ int syscall32_setup_pages(struct linux_b
 	 * without matching up the same kernel and hardware config to see
 	 * what PC values meant.
 	 */
-	ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
+	down_write(&mm->mmap_sem);
+	ret = addr = get_unmapped_area_prot(NULL, addr, PAGE_SIZE, 0,
+			MAP_PRIVATE | MAP_FIXED, PROT_READ | PROT_EXEC);
+	if (unlikely(addr & ~PAGE_MASK))
+		goto out;
+
+	ret = install_special_mapping(mm, addr, PAGE_SIZE,
 			VM_READ|VM_EXEC|
 			VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
 			VM_ALWAYSDUMP,
 			syscall32_pages);
+	if (ret == 0) {
+		mm->context.vdso = (void *)addr;
+		current_thread_info()->sysenter_return = VSYSCALL32_SYSEXIT;
+	}
+
+out:
 	up_write(&mm->mmap_sem);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(syscall32_setup_pages);
 
 static int __init init_syscall32(void)
 { 
-	void *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+	syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
 	syscall32_pages[0] = virt_to_page(syscall32_page);
 	if (!syscall32_page) 
 		panic("Cannot allocate syscall32 page"); 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/syscall32-xen.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/syscall32-xen.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/syscall32-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/syscall32-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -10,6 +10,8 @@
 #include <linux/init.h>
 #include <linux/stringify.h>
 #include <linux/security.h>
+#include <linux/module.h>
+#include <linux/mman.h>
 #include <asm/proto.h>
 #include <asm/tlbflush.h>
 #include <asm/ia32_unistd.h>
@@ -22,6 +24,8 @@ extern unsigned char syscall32_sysenter[
 extern int sysctl_vsyscall32;
 
 static struct page *syscall32_pages[1];
+void *syscall32_page;
+EXPORT_SYMBOL_GPL(syscall32_page);
 #ifndef USE_INT80
 static int use_sysenter = -1;
 #endif
@@ -29,11 +33,16 @@ static int use_sysenter = -1;
 struct linux_binprm;
 
 /* Setup a VMA at program startup for the vsyscall page */
-int syscall32_setup_pages(struct linux_binprm *bprm, int uses_interp)
+int syscall32_setup_pages(struct linux_binprm *bprm, int uses_interp,
+				unsigned long map_address)
 {
 	struct mm_struct *mm = current->mm;
+	unsigned long addr = map_address ? : __VSYSCALL32_BASE;
 	int ret;
 
+	if (sysctl_vsyscall32 == 0 && map_address == 0)
+		return 0;
+
 	down_write(&mm->mmap_sem);
 	/*
 	 * MAYWRITE to allow gdb to COW and set breakpoints
@@ -43,18 +52,30 @@ int syscall32_setup_pages(struct linux_b
 	 * without matching up the same kernel and hardware config to see
 	 * what PC values meant.
 	 */
-	ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
+	down_write(&mm->mmap_sem);
+	ret = addr = get_unmapped_area_prot(NULL, addr, PAGE_SIZE, 0,
+			MAP_PRIVATE | MAP_FIXED, PROT_READ | PROT_EXEC);
+	if (unlikely(addr & ~PAGE_MASK))
+		goto out;
+
+	ret = install_special_mapping(mm, addr, PAGE_SIZE,
 			VM_READ|VM_EXEC|
 			VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
 			VM_ALWAYSDUMP,
 			syscall32_pages);
+	if (ret == 0) {
+		mm->context.vdso = (void *)addr;
+		current_thread_info()->sysenter_return = VSYSCALL32_SYSEXIT;
+	}
+out:
 	up_write(&mm->mmap_sem);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(syscall32_setup_pages);
 
 static int __init init_syscall32(void)
 { 
-	void *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+	syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
 	syscall32_pages[0] = virt_to_page(syscall32_page);
 	if (!syscall32_page) 
 		panic("Cannot allocate syscall32 page"); 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/sys_ia32.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/sys_ia32.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/sys_ia32.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/sys_ia32.c	2017-01-13 08:40:15.000000000 -0500
@@ -732,25 +732,26 @@ asmlinkage long sys32_olduname(struct ol
 
 	if (!name)
 		return -EFAULT;
-	if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
+	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
 		return -EFAULT;
   
   	down_read(&uts_sem);
 	
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
-	 __put_user(0,name->sysname+__OLD_UTS_LEN);
-	 __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
-	 __put_user(0,name->nodename+__OLD_UTS_LEN);
-	 __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
-	 __put_user(0,name->release+__OLD_UTS_LEN);
-	 __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
-	 __put_user(0,name->version+__OLD_UTS_LEN);
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
+			       __OLD_UTS_LEN);
+	 __put_user(0, name->sysname + __OLD_UTS_LEN);
+	 __copy_to_user(&name->nodename, &utsname()->nodename, __OLD_UTS_LEN);
+	 __put_user(0, name->nodename + __OLD_UTS_LEN);
+	 __copy_to_user(&name->release, &utsname()->release, __OLD_UTS_LEN);
+	 __put_user(0, name->release + __OLD_UTS_LEN);
+	 __copy_to_user(&name->version, &utsname()->version, __OLD_UTS_LEN);
+	 __put_user(0, name->version + __OLD_UTS_LEN);
 	 { 
 		 char *arch = "x86_64";
 		 if (personality(current->personality) == PER_LINUX32)
 			 arch = "i686";
 		 
-		 __copy_to_user(&name->machine,arch,strlen(arch)+1);
+		 __copy_to_user(&name->machine, arch, strlen(arch) + 1);
 	 }
 	
 	 up_read(&uts_sem);
@@ -766,7 +767,7 @@ long sys32_uname(struct old_utsname __us
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32) 
 		err |= copy_to_user(&name->machine, "i686", 5);
@@ -821,18 +822,6 @@ long sys32_fadvise64_64(int fd, __u32 of
 			       advice); 
 } 
 
-long sys32_vm86_warning(void)
-{ 
-	struct task_struct *me = current;
-	static char lastcomm[sizeof(me->comm)];
-	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
-		       me->comm);
-		strncpy(lastcomm, me->comm, sizeof(lastcomm));
-	} 
-	return -ENOSYS;
-} 
-
 long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
 			  char __user * buf, size_t len)
 {
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/vsyscall.lds kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/vsyscall.lds
--- kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/vsyscall.lds	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/vsyscall.lds	2017-01-13 08:40:26.000000000 -0500
@@ -4,11 +4,11 @@
  */
 
 /* This must match <asm/fixmap.h>.  */
-VSYSCALL_BASE = 0xffffe000;
+__VSYSCALL_BASE = 0xbffff000;
 
 SECTIONS
 {
-  . = VSYSCALL_BASE + SIZEOF_HEADERS;
+  . = __VSYSCALL_BASE + SIZEOF_HEADERS;
 
   .hash           : { *(.hash) }		:text
   .gnu.hash       : { *(.gnu.hash) }
@@ -22,18 +22,18 @@ SECTIONS
      For the layouts to match, we need to skip more than enough
      space for the dynamic symbol table et al.  If this amount
      is insufficient, ld -shared will barf.  Just increase it here.  */
-  . = VSYSCALL_BASE + 0x400;
+  . = __VSYSCALL_BASE + 0x400;
   
   .text.vsyscall   : { *(.text.vsyscall) } 	:text =0x90909090
 
   /* This is an 32bit object and we cannot easily get the offsets
      into the 64bit kernel. Just hardcode them here. This assumes
      that all the stubs don't need more than 0x100 bytes. */
-  . = VSYSCALL_BASE + 0x500;
+  . = __VSYSCALL_BASE + 0x500;
 
   .text.sigreturn  : { *(.text.sigreturn) }	:text =0x90909090
 
-  . = VSYSCALL_BASE + 0x600;
+  . = __VSYSCALL_BASE + 0x600;
 
   .text.rtsigreturn : { *(.text.rtsigreturn) }   :text =0x90909090
 	
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/vsyscall-sysenter.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/vsyscall-sysenter.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/ia32/vsyscall-sysenter.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/ia32/vsyscall-sysenter.S	2017-01-13 08:40:26.000000000 -0500
@@ -20,9 +20,9 @@ __kernel_vsyscall:
 .Lenter_kernel:
 	movl	%esp,%ebp
 	sysenter
-	.space 7,0x90
+	.space 23,0x90
 	jmp	.Lenter_kernel
-	/* 16: System call normal return point is here! */
+	/* 32: System call normal return point is here! */
 	pop	%ebp
 .Lpop_ebp:
 	pop	%edx
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/Kconfig kernel-2.6.18-417.el5-028stab121/arch/x86_64/Kconfig
--- kernel-2.6.18-417.el5.orig/arch/x86_64/Kconfig	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/Kconfig	2017-01-13 08:40:28.000000000 -0500
@@ -328,6 +328,8 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
+source "kernel/Kconfig.fairsched"
+
 source "kernel/Kconfig.preempt"
 
 config NUMA
@@ -433,7 +435,7 @@ config NR_CPUS
 
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
-	depends on SMP && HOTPLUG && EXPERIMENTAL
+	depends on SMP && HOTPLUG && EXPERIMENTAL && !SCHED_VCPU
 	help
 		Say Y here to experiment with turning CPUs off and on.  CPUs
 		can be controlled through /sys/devices/system/cpu/cpu#.
@@ -832,6 +834,8 @@ endmenu
 
 source "arch/x86_64/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
@@ -843,3 +847,5 @@ source "drivers/xenpv_hvm/Kconfig"
 endif
 
 source "lib/Kconfig"
+
+source "kernel/ub/Kconfig"
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/acpi/wakeup.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/acpi/wakeup.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/acpi/wakeup.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/acpi/wakeup.S	2017-01-13 08:40:24.000000000 -0500
@@ -95,7 +95,7 @@ wakeup_32:
 	movb	$0xa5, %al	;  outb %al, $0x80
 
 	/* Initialize segments */
-	movl	$__KERNEL_DS, %eax
+	movl	$__BOOT_DS, %eax
 	movl	%eax, %ds
 
 	movw	$0x0e00 + 'i', %ds:(0xb8012)
@@ -148,7 +148,7 @@ wakeup_32:
 	 * At this point we're in long mode but in 32bit compatibility mode
 	 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
 	 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we load
-	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+	 * the new gdt/idt that has __BOOT_CS with CS.L = 1.
 	 */
 
 	/* Finally jump in 64bit mode */
@@ -157,7 +157,7 @@ wakeup_32:
 	.balign 4
 wakeup_long64_vector:
 	.long	wakeup_long64 - wakeup_code
-	.word	__KERNEL_CS, 0
+	.word	__BOOT_CS, 0
 
 .code64
 
@@ -183,7 +183,7 @@ wakeup_long64:
 
 	nop
 	nop
-	movw	$__KERNEL_DS, %ax
+	movw	$__BOOT_DS, %ax
 	movw	%ax, %ss	
 	movw	%ax, %ds
 	movw	%ax, %es
@@ -208,8 +208,8 @@ gdta:
 	.word	0, 0, 0, 0			# dummy
 	/* ??? Why I need the accessed bit set in order for this to work? */
 	.quad	0x00cf9b000000ffff		# __KERNEL32_CS
-	.quad	0x00af9b000000ffff		# __KERNEL_CS
-	.quad	0x00cf93000000ffff		# __KERNEL_DS
+	.quad	0x00af9b000000ffff		# __BOOT_CS
+	.quad	0x00cf93000000ffff		# __BOOT_DS
 
 idt_48a:
 	.word	0				# idt limit = 0
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/asm-offsets.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/asm-offsets.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/asm-offsets.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/asm-offsets.c	2017-01-13 08:40:26.000000000 -0500
@@ -35,6 +35,7 @@ int main(void)
 	ENTRY(addr_limit);
 	ENTRY(preempt_count);
 	ENTRY(status);
+	ENTRY(sysenter_return);
 	BLANK();
 #undef ENTRY
 #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/entry.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/entry.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/entry.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/entry.S	2017-01-13 08:40:24.000000000 -0500
@@ -151,7 +151,12 @@ ENTRY(ret_from_fork)
 	popf				# reset kernel eflags
 	CFI_ADJUST_CFA_OFFSET -4
 	call schedule_tail
+ret_from_fork_tail:
 	GET_THREAD_INFO(%rcx)
+	btr $TIF_RESUME,threadinfo_flags(%rcx)
+	jc  x86_64_ret_from_resume
+
+ret_from_fork_check:
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
 	jnz rff_trace
 rff_action:	
@@ -167,6 +172,19 @@ rff_trace:
 	call syscall_trace_leave
 	GET_THREAD_INFO(%rcx)	
 	jmp rff_action
+
+x86_64_ret_from_resume:
+	movq (%rsp),%rax
+	testq %rax,%rax
+	jz 1f
+	movq  %rsp,%rdi
+	call  *%rax
+1:
+	addq $256,%rsp
+	cmpq $0,ORIG_RAX(%rsp)
+	jge  ret_from_fork_tail
+	RESTORE_REST
+	jmp  int_ret_from_sys_call
 	CFI_ENDPROC
 END(ret_from_fork)
 
@@ -379,7 +397,7 @@ int_check_syscall_exit_work:
 	jmp int_restore_rest
 	
 int_signal:
-	testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
+	testl $(_TIF_NOTIFY_RESUME|_TIF_RESTORE_SIGMASK|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
 	jz 1f
 	movq %rsp,%rdi		# &ptregs -> arg1
 	xorl %esi,%esi		# oldset -> arg2
@@ -599,7 +617,7 @@ retint_careful:
 	jmp retint_check
 	
 retint_signal:
-	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+	testl $(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
 	jz    retint_swapgs
 	TRACE_IRQS_ON
 	sti
@@ -954,7 +972,7 @@ ENTRY(kernel_thread)
 	xorl %r9d,%r9d
 	
 	# clone now
-	call do_fork
+	call do_fork_kthread
 	movq %rax,RAX(%rsp)
 	xorl %edi,%edi
 	test %rax,%rax
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/entry-xen.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/entry-xen.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/entry-xen.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/entry-xen.S	2017-01-13 08:40:40.000000000 -0500
@@ -85,6 +85,7 @@ NMI_MASK = 0x80000000
 		
 	/* %rsp:at FRAMEEND */ 
 	.macro FIXUP_TOP_OF_STACK tmp
+	movq    $__USER_DS,SS(%rsp)
 	movq    $__USER_CS,CS(%rsp)
 	movq 	$-1,RCX(%rsp)
 	.endm
@@ -192,7 +193,11 @@ ENTRY(ret_from_fork)
 	popf                # reset kernel eflags
 	CFI_ADJUST_CFA_OFFSET -4
 	call schedule_tail
+ret_from_fork_tail:
 	GET_THREAD_INFO(%rcx)
+	btr $TIF_RESUME,threadinfo_flags(%rcx)
+	jc  x86_64_ret_from_resume
+ret_from_fork_check:
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
 	jnz rff_trace
 rff_action:	
@@ -208,6 +213,19 @@ rff_trace:
 	call syscall_trace_leave
 	GET_THREAD_INFO(%rcx)	
 	jmp rff_action
+
+x86_64_ret_from_resume:
+	movq (%rsp),%rax
+	testq %rax,%rax
+	jz 1f
+	movq  %rsp,%rdi
+	call  *%rax
+1:
+	addq $256,%rsp
+	cmpq $0,ORIG_RAX(%rsp)
+	jge  ret_from_fork_tail
+	RESTORE_REST
+	jmp  int_ret_from_sys_call
 	CFI_ENDPROC
 END(ret_from_fork)
 
@@ -309,7 +327,7 @@ sysret_signal:
 	TRACE_IRQS_ON
 /*	sti */
         XEN_UNBLOCK_EVENTS(%rsi)        
-	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+	testl $(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
 	jz    1f
 
 	/* Really a signal */
@@ -427,7 +445,7 @@ int_very_careful:
 	jmp int_restore_rest
 	
 int_signal:
-	testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
+	testl $(_TIF_NOTIFY_RESUME|_TIF_RESTORE_SIGMASK|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
 	jz 1f
 	movq %rsp,%rdi		# &ptregs -> arg1
 	xorl %esi,%esi		# oldset -> arg2
@@ -577,7 +595,7 @@ retint_careful:
 	jmp retint_check
 	
 retint_signal:
-	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+	testl $(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
 	jz    retint_restore_args
 	TRACE_IRQS_ON
         XEN_UNBLOCK_EVENTS(%rsi)
@@ -972,7 +990,7 @@ ENTRY(kernel_thread)
 	xorl %r9d,%r9d
 	
 	# clone now
-	call do_fork
+	call do_fork_kthread
 	movq %rax,RAX(%rsp)
 	xorl %edi,%edi
 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/genapic.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/genapic.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/genapic.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/genapic.c	2017-01-13 08:40:15.000000000 -0500
@@ -67,7 +67,10 @@ void __init clustered_apic_check(void)
 				genapic = &apic_flat;
 		}
 #else
-		genapic = &apic_flat;
+		if (num_possible_cpus() > 8)
+			genapic = &apic_physflat;
+		else
+			genapic = &apic_flat;
 #endif
 	}
 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/head.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/head.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/head.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/head.S	2017-01-13 08:40:24.000000000 -0500
@@ -196,7 +196,7 @@ ENTRY(secondary_startup_64)
 	lgdt	cpu_gdt_descr(%rip)
 
 	/* set up data segments. actually 0 would do too */
-	movl $__KERNEL_DS,%eax
+	movl $__BOOT_DS,%eax
 	movl %eax,%ds	
 	movl %eax,%ss
 	movl %eax,%es
@@ -222,7 +222,14 @@ ENTRY(secondary_startup_64)
 	/* esi is pointer to real mode structure with interesting info.
 	   pass it to C */
 	movl	%esi, %edi
-	
+
+	/* Switch to __KERNEL_CS. The segment is the same, but selector
+	 * is different. */
+	pushq	$__KERNEL_CS
+	pushq	$switch_cs
+	lretq
+switch_cs:
+
 	/* Finally jump to run C code and to be on real kernel address
 	 * Since we are running on identity-mapped space we have to jump
 	 * to the full 64bit address , this is only possible as indirect
@@ -335,8 +342,6 @@ NEXT_PAGE(level2_spare_pgt)
 
 	.data
 
-	__CPUINITDATA
-
 	.align 16
 	.globl cpu_gdt_descr
 cpu_gdt_descr:
@@ -363,21 +368,30 @@ ENTRY(phys_base)
 	.align PAGE_SIZE
 
 /* The TLS descriptors are currently at a different place compared to i386.
-   Hopefully nobody expects them at a fixed place (Wine?) */
+   Hopefully nobody expects them at a fixed place (Wine?)
+   Descriptors rearranged to plase 32bit and TLS selectors in the same
+   places, because it is really necessary. sysret/exit mandates order
+   of kernel/user cs/ds, so we have to extend gdt.
+*/
 	
 ENTRY(cpu_gdt_table)
-	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x00cf9b000000ffff	/* __KERNEL32_CS */
-	.quad	0x00af9b000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf93000000ffff	/* __KERNEL_DS */
-	.quad	0x00cffb000000ffff	/* __USER32_CS */
-	.quad	0x00cff3000000ffff	/* __USER_DS, __USER32_DS  */
-	.quad	0x00affb000000ffff	/* __USER_CS */
-	.quad	0x0			/* unused */
-	.quad	0,0			/* TSS */
-	.quad	0,0			/* LDT */
-	.quad   0,0,0			/* three TLS descriptors */
- 	.quad	0x0000f40000000000	/* node/CPU stored in limit */
+	.quad	0x0000000000000000	/* 0 NULL descriptor */
+	.quad	0x00cf9a000000ffff	/* 1 __KERNEL32_CS */	
+	.quad	0x00af9a000000ffff	/* 2 __BOOT_CS */
+	.quad	0x00cf92000000ffff	/* 3 __BOOT_DS */
+	.quad	0,0			/* 4,5 TSS */
+	.quad   0,0,0			/* 6-8 three TLS descriptors */ 
+	.quad	0,0			/* 9,10 LDT */
+	.quad	0x0			/* 11 unused */
+	.quad	0x00af9a000000ffff	/* 12 __KERNEL_CS */
+	.quad	0x00cf92000000ffff	/* 13 __KERNEL_DS */
+	.quad	0x00cffa000000ffff	/* 14 __USER32_CS */
+	.quad	0x00cff2000000ffff	/* 15 __USER_DS, __USER32_DS  */
+	.quad	0x00affa000000ffff	/* 16 __USER_CS */
+	.quad	0x0000f40000000000	/* 17 node/CPU stored in limit */
+	.quad	0,0,0,0,0,0
+	.quad	0,0,0,0,0,0,0,0
+
 gdt_end:	
 	/* asm/segment.h:GDT_ENTRIES must match this */	
 	/* This should be a multiple of the cache line size */
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/head-xen.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/head-xen.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/head-xen.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/head-xen.S	2017-01-13 08:40:40.000000000 -0500
@@ -111,20 +111,30 @@ gdt:
 	.align PAGE_SIZE
 
 /* The TLS descriptors are currently at a different place compared to i386.
-   Hopefully nobody expects them at a fixed place (Wine?) */
+   Hopefully nobody expects them at a fixed place (Wine?) 
+   Descriptors rearranged to plase 32bit and TLS selectors in the same
+   places, because it is really necessary. sysret/exit mandates order
+   of kernel/user cs/ds, so we have to extend gdt.
+*/
 
 ENTRY(cpu_gdt_table)
-	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x00cf9b000000ffff	/* __KERNEL32_CS */
-	.quad	0x00af9b000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf93000000ffff	/* __KERNEL_DS */
-	.quad	0x00cffb000000ffff	/* __USER32_CS */
-	.quad	0x00cff3000000ffff	/* __USER_DS, __USER32_DS  */
-	.quad	0x00affb000000ffff	/* __USER_CS */
-	.quad	0,0			/* TSS */
-	.quad	0,0			/* LDT */
-	.quad   0,0,0			/* three TLS descriptors */
-	.quad	0			/* unused */
+	.quad   0x0000000000000000	/* 0 NULL descriptor */
+	.quad   0x00cf9a000000ffff	/* 1 __KERNEL32_CS */
+	.quad   0x00af9a000000ffff	/* 2 __BOOT_CS */
+	.quad   0x00cf92000000ffff	/* 3 __BOOT_DS */
+	.quad   0,0			/* 4,5 TSS */
+	.quad   0,0,0			/* 6-8 three TLS descriptors */
+	.quad   0,0			/* 9,10 LDT */
+	.quad   0x0			/* 11 unused */
+	.quad   0x00af9a000000ffff	/* 12 __KERNEL_CS */
+	.quad   0x00cf92000000ffff	/* 13 __KERNEL_DS */
+	.quad   0x00cffa000000ffff	/* 14 __USER32_CS */
+	.quad   0x00cff2000000ffff	/* 15 __USER_DS, __USER32_DS  */
+	.quad   0x00affa000000ffff	/* 16 __USER_CS */
+	.quad   0x0			/* 17 unused */
+	.quad   0,0,0,0,0,0
+	.quad   0,0,0,0,0,0,0,0
+
 gdt_end:
 	/* asm/segment.h:GDT_ENTRIES must match this */
 	/* This should be a multiple of the cache line size */
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/init_task.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/init_task.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/init_task.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/init_task.c	2017-01-13 08:40:15.000000000 -0500
@@ -5,6 +5,7 @@
 #include <linux/init_task.h>
 #include <linux/fs.h>
 #include <linux/mqueue.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -15,6 +16,7 @@ static struct files_struct init_files = 
 #include <linux/init_signals.h>
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
 EXPORT_SYMBOL(init_mm);
 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/io_apic.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/io_apic.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/io_apic.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/io_apic.c	2017-01-13 08:40:16.000000000 -0500
@@ -871,6 +871,12 @@ next:
 	return vector;
 }
 
+int __irq_to_vector(int irq)
+{
+	return IO_APIC_VECTOR(irq);
+}
+EXPORT_SYMBOL(__irq_to_vector);
+
 extern void (*interrupt[NR_IRQS])(void);
 static struct hw_interrupt_type ioapic_level_type;
 static struct hw_interrupt_type ioapic_edge_type;
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/irq.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/irq.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/irq.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/irq.c	2017-01-13 08:40:20.000000000 -0500
@@ -98,6 +98,10 @@ skip:
 			seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
 		seq_putc(p, '\n');
 #endif
+		seq_printf(p, "RES: ");
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", cpu_pda(j)->__reschedule_count);
+		seq_putc(p, '\n');
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
 #ifdef CONFIG_X86_IO_APIC
 #ifdef APIC_MISMATCH_DEBUG
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/ldt.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/ldt.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/ldt.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/ldt.c	2017-01-13 08:40:24.000000000 -0500
@@ -16,6 +16,7 @@
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -23,6 +24,8 @@
 #include <asm/desc.h>
 #include <asm/proto.h>
 
+#include <ub/ub_mem.h>
+
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
 {
@@ -42,9 +45,9 @@ static int alloc_ldt(mm_context_t *pc, u
 	oldsize = pc->size;
 	mincount = (mincount+511)&(~511);
 	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+		newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
 	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+		newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
 
 	if (!newldt)
 		return -ENOMEM;
@@ -109,6 +112,7 @@ int init_new_context(struct task_struct 
 	}
 	return retval;
 }
+EXPORT_SYMBOL_GPL(init_new_context);
 
 /*
  * 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/ldt-xen.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/ldt-xen.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/ldt-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/ldt-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -16,6 +16,7 @@
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -24,6 +25,8 @@
 #include <asm/proto.h>
 #include <asm/pgalloc.h>
 
+#include <ub/ub_mem.h>
+
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
 {
@@ -43,9 +46,9 @@ static int alloc_ldt(mm_context_t *pc, u
 	oldsize = pc->size;
 	mincount = (mincount+511)&(~511);
 	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+		newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
 	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+		newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
 
 	if (!newldt)
 		return -ENOMEM;
@@ -120,6 +123,8 @@ int init_new_context(struct task_struct 
 		retval = copy_ldt(&mm->context, &old_mm->context);
 		up(&old_mm->context.sem);
 	}
+	if (old_mm)
+		mm->context.vdso = old_mm->context.vdso;
 	if (retval == 0) {
 		spin_lock(&mm_unpinned_lock);
 		list_add(&mm->context.unpinned, &mm_unpinned);
@@ -127,6 +132,7 @@ int init_new_context(struct task_struct 
 	}
 	return retval;
 }
+EXPORT_SYMBOL_GPL(init_new_context);
 
 /*
  * 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/process.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/process.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/process.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/process.c	2017-01-13 08:40:41.000000000 -0500
@@ -36,6 +36,7 @@
 #include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
+#include <linux/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -52,6 +53,10 @@
 #include <asm/ia32.h>
 #include <asm/idle.h>
 
+#include <linux/utsrelease.h>
+
+asmlinkage extern void execve(void);
+EXPORT_SYMBOL_GPL(execve);
 asmlinkage extern void ret_from_fork(void);
 
 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
@@ -302,13 +307,14 @@ void __show_regs(struct pt_regs * regs)
 
 	printk("\n");
 	print_modules();
-	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+	printk("Pid: %d, comm: %.20s %s %s %.*s %s\n",
 		current->pid, current->comm, print_tainted(),
-		system_utsname.release,
-		(int)strcspn(system_utsname.version, " "),
-		system_utsname.version);
+		init_utsname()->release,
+		(int)strcspn(init_utsname()->version, " "),
+		init_utsname()->version, VZVERSION);
 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
-	printk_address(regs->rip); 
+	if (decode_call_traces)
+		printk_address(regs->rip); 
 	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
 		regs->eflags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
@@ -345,9 +351,9 @@ void __show_regs(struct pt_regs * regs)
 
 void show_regs(struct pt_regs *regs)
 {
-	printk("CPU %d:", smp_processor_id());
+	printk("CPU %d, VCPU %d:%d", smp_processor_id(), task_vsched_id(current), task_cpu(current));
 	__show_regs(regs);
-	show_trace(NULL, regs, (void *)(regs + 1));
+	show_trace(NULL, regs, &regs->rsp);
 }
 
 /*
@@ -889,3 +895,20 @@ void randomize_brk(unsigned long old_brk
 		current->mm->brk = current->mm->start_brk = new_brk;
 
 }
+
+long do_fork_kthread(unsigned long clone_flags,
+	      unsigned long stack_start,
+	      struct pt_regs *regs,
+	      unsigned long stack_size,
+	      int __user *parent_tidptr,
+	      int __user *child_tidptr)
+{
+	if (ve_allow_kthreads || ve_is_super(get_exec_env()))
+		return do_fork(clone_flags, stack_start, regs, stack_size,
+				parent_tidptr, child_tidptr);
+
+	/* Don't allow kernel_thread() inside VE */
+	printk("kernel_thread call inside container\n");
+	dump_stack();
+	return -EPERM;
+}
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/process-xen.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/process-xen.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/process-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/process-xen.c	2017-01-13 08:40:41.000000000 -0500
@@ -39,6 +39,7 @@
 #include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
+#include <linux/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -62,6 +63,8 @@
 #include <xen/cpu_hotplug.h>
 
 asmlinkage extern void ret_from_fork(void);
+asmlinkage extern void execve(void);
+EXPORT_SYMBOL_GPL(execve);
 
 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
 
@@ -218,11 +221,11 @@ void __show_regs(struct pt_regs * regs)
 	print_modules();
 	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
 		current->pid, current->comm, print_tainted(),
-		system_utsname.release,
-		(int)strcspn(system_utsname.version, " "),
-		system_utsname.version);
+		init_utsname()->release,
+		(int)strcspn(init_utsname()->version, " "),
+		init_utsname()->version);
 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
-	printk_address(regs->rip); 
+	printk_address(regs->rip);
 	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
 		regs->eflags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
@@ -254,7 +257,7 @@ void __show_regs(struct pt_regs * regs)
 
 void show_regs(struct pt_regs *regs)
 {
-	printk("CPU %d:", smp_processor_id());
+	printk("CPU %d, VCPU %d:%d", smp_processor_id(), task_vsched_id(current), task_cpu(current));
 	__show_regs(regs);
 	show_trace(NULL, regs, &regs->rsp);
 }
@@ -818,3 +821,21 @@ void randomize_brk(unsigned long old_brk
 		current->mm->brk = new_brk;
 
 }
+
+long do_fork_kthread(unsigned long clone_flags,
+		unsigned long stack_start,
+		struct pt_regs *regs,
+		unsigned long stack_size,
+		int __user *parent_tidptr,
+		int __user *child_tidptr)
+{
+	if (ve_allow_kthreads || ve_is_super(get_exec_env()))
+		return do_fork(clone_flags, stack_start, regs, stack_size,
+				parent_tidptr, child_tidptr);
+
+	/* Don't allow kernel_thread() inside VE */
+	printk("kernel_thread call inside container\n");
+	dump_stack();
+	return -EPERM;
+}
+
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/ptrace.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/ptrace.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/ptrace.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/ptrace.c	2017-01-13 08:40:25.000000000 -0500
@@ -35,6 +35,9 @@
 #include <asm/proto.h>
 #include <asm/ia32.h>
 #include <asm/prctl.h>
+#ifdef CONFIG_VE
+#include <asm/unistd.h>
+#endif
 
 #include <trace/syscall.h>
 
@@ -316,10 +319,29 @@ static int putreg(struct task_struct *ch
 	return 0;
 }
 
+#ifdef CONFIG_VE
+static inline unsigned long ptrace_hack_child_pid(struct task_struct *child, unsigned long regno, unsigned long value)
+{
+	struct pt_regs *regs = task_pt_regs(child);
+
+	if ((offsetof(struct pt_regs, rax) == regno) && 
+		ve_is_super(get_exec_env()) &&
+		!ve_is_super(child->ve_task_info.owner_env) && 
+		((regs->orig_rax == __NR_fork) || 
+		(regs->orig_rax == __NR_vfork) || 
+		(regs->orig_rax == __NR_clone)) && 
+		((long)value > 0))
+			value = vpid_to_pid_ve(value, child->ve_task_info.owner_env);
+	return value;
+}
+#endif
+
 static unsigned long getreg(struct task_struct *child, unsigned long regno)
 {
 	unsigned long val;
 	unsigned int seg;
+	unsigned long orig_regno = regno;
+
 	switch (regno) {
 	case offsetof(struct user_regs_struct, fs):
 		if (child == current) {
@@ -350,6 +372,15 @@ static unsigned long getreg(struct task_
 		return child->thread.fs;
 	case offsetof(struct user_regs_struct, gs_base):
 		return child->thread.gs;
+	case offsetof(struct user_regs_struct, cs):
+		if (test_tsk_thread_flag(child, TIF_SYSCALL_TRACE)) {
+			val = get_stack_long(child, regno - sizeof(struct pt_regs));
+			if (val == __USER_CS)
+				return 0x33;
+			if (val == __USER32_CS)
+				return 0x23;
+		}
+		/* fall through */
 	default:
 		regno = regno - sizeof(struct pt_regs);
 		val = get_stack_long(child, regno);
@@ -359,6 +390,9 @@ static unsigned long getreg(struct task_
 			      - sizeof(struct pt_regs))
 		    && test_tsk_thread_flag(child, TIF_FORCED_TF))
 			val &= ~X86_EFLAGS_TF;
+#ifdef CONFIG_VE
+		val = ptrace_hack_child_pid(child, orig_regno, val);
+#endif
 		return val;
 	}
 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/setup64.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/setup64.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/setup64.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/setup64.c	2017-01-13 08:40:24.000000000 -0500
@@ -273,3 +273,5 @@ void __cpuinit cpu_init (void)
 
 	raw_local_save_flags(kernel_eflags);
 }
+
+EXPORT_SYMBOL_GPL(cpu_gdt_descr);
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/setup64-xen.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/setup64-xen.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/setup64-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/setup64-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -341,3 +341,6 @@ void __cpuinit cpu_init (void)
 
 	raw_local_save_flags(kernel_eflags);
 }
+
+EXPORT_SYMBOL_GPL(cpu_gdt_descr);
+
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/setup.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/setup.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/setup.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/setup.c	2017-01-13 08:40:28.000000000 -0500
@@ -45,6 +45,7 @@
 #include <linux/dmi.h>
 #include <linux/dma-mapping.h>
 #include <linux/ctype.h>
+#include <linux/vsched.h>
 #include <linux/efi.h>
 
 #include <asm/mtrr.h>
@@ -1389,6 +1390,9 @@ void __cpuinit early_identify_cpu(struct
 	    (cpuid_edx(0x80000007) & (1<<8)))
 		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
 	init_scattered_cpuid_features(c);
+
+	if (c->x86_vendor == X86_VENDOR_INTEL)
+		early_init_intel(c);
 }
 
 /*
@@ -1570,7 +1574,7 @@ static int show_cpuinfo(struct seq_file 
 
 
 #ifdef CONFIG_SMP
-	if (!cpu_online(c-cpu_data))
+	if (!vcpu_online(c - cpu_data))
 		return 0;
 #endif
 
@@ -1591,9 +1595,13 @@ static int show_cpuinfo(struct seq_file 
 		seq_printf(m, "stepping\t: unknown\n");
 	
 	if (cpu_has(c,X86_FEATURE_TSC)) {
+#ifndef CONFIG_FAIRSCHED
 		unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
 		if (!freq)
 			freq = cpu_khz;
+#else
+		unsigned int freq = (unsigned int)ve_scale_khz(cpu_khz);
+#endif
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
 			     freq / 1000, (freq % 1000));
 	}
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/setup-xen.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/setup-xen.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/setup-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/setup-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -45,6 +45,7 @@
 #include <linux/dmi.h>
 #include <linux/dma-mapping.h>
 #include <linux/ctype.h>
+#include <linux/vsched.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
@@ -1693,7 +1694,7 @@ static int show_cpuinfo(struct seq_file 
 
 
 #ifdef CONFIG_SMP
-	if (!cpu_online(c-cpu_data))
+	if (!vcpu_online(c - cpu_data))
 		return 0;
 #endif
 
@@ -1714,9 +1715,13 @@ static int show_cpuinfo(struct seq_file 
 		seq_printf(m, "stepping\t: unknown\n");
 	
 	if (cpu_has(c,X86_FEATURE_TSC)) {
+#ifndef CONFIG_FAIRSCHED
 		unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
 		if (!freq)
 			freq = cpu_khz;
+#else
+		unsigned int freq = (unsigned int)ve_scale_khz(cpu_khz);
+#endif
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
 			     freq / 1000, (freq % 1000));
 	}
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/signal.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/signal.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/signal.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/signal.c	2017-01-13 08:40:24.000000000 -0500
@@ -441,6 +441,9 @@ static void do_signal(struct pt_regs *re
 	if (!user_mode(regs))
 		return;
 
+	if (try_to_freeze() && !signal_pending(current))
+  		goto no_signal;
+  
 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
 		oldset = &current->saved_sigmask;
 	else
@@ -467,6 +470,7 @@ static void do_signal(struct pt_regs *re
 		return;
 	}
 
+no_signal:
 	/* Did we come from a system call? */
 	if (current_syscall(regs) >= 0) {
 		/* Restart the system call - no handlers present */
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/smp.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/smp.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/smp.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/smp.c	2017-01-13 08:40:16.000000000 -0500
@@ -532,6 +532,7 @@ void smp_send_stop(void)
 asmlinkage void smp_reschedule_interrupt(void)
 {
 	ack_APIC_irq();
+	add_pda(__reschedule_count, 1);
 }
 
 asmlinkage void smp_call_function_interrupt(void)
@@ -559,3 +560,8 @@ asmlinkage void smp_call_function_interr
 		atomic_inc(&call_data->finished);
 	}
 }
+
+void send_nmi_ipi_allbutself(void)
+{
+	send_IPI_allbutself(NMI_VECTOR);
+}
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/smp-xen.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/smp-xen.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/smp-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/smp-xen.c	2017-01-13 08:40:26.000000000 -0500
@@ -303,8 +303,10 @@ void flush_tlb_current_task(void)
 { xen_tlb_flush_mask(&current->mm->cpu_vm_mask); }
 void flush_tlb_mm (struct mm_struct * mm)
 { xen_tlb_flush_mask(&mm->cpu_vm_mask); }
+EXPORT_SYMBOL(flush_tlb_mm);
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 { xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
+EXPORT_SYMBOL(flush_tlb_page);
 void flush_tlb_all(void)
 { xen_tlb_flush_all(); }
 #endif /* Xen */
@@ -560,6 +562,7 @@ asmlinkage irqreturn_t smp_reschedule_in
 #ifndef CONFIG_XEN
 	ack_APIC_irq();
 #else
+	add_pda(__reschedule_count, 1);
 	return IRQ_HANDLED;
 #endif
 }
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/sys_x86_64.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/sys_x86_64.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/sys_x86_64.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/sys_x86_64.c	2017-01-13 08:40:15.000000000 -0500
@@ -305,7 +305,7 @@ asmlinkage long sys_uname(struct new_uts
 {
 	int err;
 	down_read(&uts_sem);
-	err = copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32) 
 		err |= copy_to_user(&name->machine, "i686", 5); 		
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/time.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/time.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/time.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/time.c	2017-01-13 08:40:40.000000000 -0500
@@ -1557,7 +1557,7 @@ void __init time_init(void)
 #endif
 }
 
-static __cpuinit int num_chassis;
+static __cpuinitdata int num_chassis;
 
 static __cpuinit void check_multi_chassis(const struct dmi_header *d)
 {
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/trampoline.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/trampoline.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/trampoline.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/trampoline.S	2017-01-13 08:40:24.000000000 -0500
@@ -84,7 +84,7 @@ r_base = .
 	.code32
 	.balign 4
 startup_32:
-	movl	$__KERNEL_DS, %eax	# Initialize the %ds segment register
+	movl	$__BOOT_DS, %eax	# Initialize the %ds segment register
 	movl	%eax, %ds
 
 	xorl	%eax, %eax
@@ -198,7 +198,7 @@ startup_32_vector:
 	.balign 4
 startup_64_vector:
 	.long	startup_64 - r_base
-	.word	__KERNEL_CS, 0
+	.word	__BOOT_CS, 0
 
 trampoline_stack:
 	.org 0x1000
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/traps.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/traps.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/traps.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/traps.c	2017-01-13 08:40:23.000000000 -0500
@@ -121,6 +121,11 @@ void printk_address(unsigned long addres
 	char *delim = ":";
 	char namebuf[128];
 
+	if (!decode_call_traces) {
+		printk("[<%016lx>]", address);
+		return;
+	}
+
 	symname = kallsyms_lookup(address, &symsize, &offset,
 					&modname, namebuf);
 	if (!symname) {
@@ -238,12 +243,19 @@ static unsigned long *in_exception_stack
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+{
+	void *t = (void *)tinfo;
+	return p > t && p < t + THREAD_SIZE - 3;
+}
+
 void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
 		struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = smp_processor_id();
 	unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
 	unsigned used = 0;
+	struct thread_info *tinfo;
 
 	if (!tsk)
 		tsk = current;
@@ -326,7 +338,8 @@ void dump_trace(struct task_struct *tsk,
 	/*
 	 * This handles the process stack:
 	 */
-	HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
+	tinfo = task_thread_info(tsk);
+	HANDLE_STACK (valid_stack_ptr(tinfo, stack));
 #undef HANDLE_STACK
 }
 EXPORT_SYMBOL(dump_trace);
@@ -400,7 +413,7 @@ _show_stack(struct task_struct *tsk, str
 		if (((long) stack & (THREAD_SIZE-1)) == 0)
 			break;
 		}
-		if (i && ((i % 4) == 0))
+		if (i && ((i % 4) == 0) && decode_call_traces)
 			printk("\n");
 		printk(" %016lx", *stack++);
 		touch_nmi_watchdog();
@@ -413,6 +426,8 @@ void show_stack(struct task_struct *tsk,
 	_show_stack(tsk, NULL, rsp);
 }
 
+EXPORT_SYMBOL(show_stack);
+
 /*
  * The architecture-independent dump_stack generator
  */
@@ -434,10 +449,12 @@ void show_registers(struct pt_regs *regs
 
 		rsp = regs->rsp;
 
-	printk("CPU %d ", cpu);
+	printk("CPU: %d ", cpu);
 	__show_regs(regs);
-	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
-		cur->comm, cur->pid, task_thread_info(cur), cur);
+	printk("Process %s (pid: %d, veid=%d, threadinfo %p, task %p)\n",
+		cur->comm, cur->pid,
+		VEID(VE_TASK_INFO(current)->owner_env),
+		task_thread_info(cur), cur);
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
@@ -584,6 +601,7 @@ void __kprobes die_nmi(char *str, struct
 	 */
 	printk(str, smp_processor_id());
 	show_registers(regs);
+	nmi_show_regs(regs, 1);
 	if (kexec_should_crash(current))
 		crash_kexec(regs);
 	if (panic_on_timeout || panic_on_oops)
@@ -792,7 +810,8 @@ asmlinkage __kprobes void default_do_nmi
 		 * Ok, so this is none of the documented NMI sources,
 		 * so it must be the NMI watchdog.
 		 */
-		if (nmi_watchdog_tick(regs, reason))
+		if (nmi_watchdog_tick(regs, reason) +
+				do_nmi_show_regs(regs, cpu))
 			return;
 
 		if (!do_nmi_callback2(regs, cpu))
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/traps-xen.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/traps-xen.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/traps-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/traps-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -415,6 +415,8 @@ void show_stack(struct task_struct *tsk,
 	_show_stack(tsk, NULL, rsp);
 }
 
+EXPORT_SYMBOL(show_stack);
+
 /*
  * The architecture-independent dump_stack generator
  */
@@ -436,10 +438,12 @@ void show_registers(struct pt_regs *regs
 
 		rsp = regs->rsp;
 
-	printk("CPU %d ", cpu);
+	printk("CPU: %d ", cpu);
 	__show_regs(regs);
-	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
-		cur->comm, cur->pid, task_thread_info(cur), cur);
+	printk("Process %s (pid: %d, veid=%d, threadinfo %p, task %p)\n",
+		cur->comm, cur->pid,
+		VEID(VE_TASK_INFO(current)->owner_env),
+		task_thread_info(cur), cur);
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/vsyscall.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/vsyscall.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/kernel/vsyscall.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/kernel/vsyscall.c	2017-01-13 08:40:28.000000000 -0500
@@ -37,6 +37,7 @@
 #include <asm/segment.h>
 #include <asm/desc.h>
 #include <asm/topology.h>
+#include <asm/unistd.h>
 
 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
 
@@ -148,6 +149,24 @@ time_t __vsyscall(1) vtime(time_t *t)
 
    tcache must point to a two element sized long array.
    All arguments can be NULL. */
+
+#ifdef CONFIG_SCHED_VCPU
+long __vsyscall(2)
+vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+{
+	long ret;
+	/*
+	 * arguments already in proper order due
+	 * to x86-64 ABI convention
+	 */
+	asm volatile ("syscall"
+		: "=a" (ret)
+		: "0" (__NR_getcpu), "D" (cpu), "S" (node), "d" (tcache)
+		: "r11","rcx","memory");
+
+	return ret;
+}
+#else /* CONFIG_SCHED_VCPU */
 long __vsyscall(2)
 vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 {
@@ -181,6 +200,7 @@ vgetcpu(unsigned *cpu, unsigned *node, s
 		*node = p >> 12;
 	return 0;
 }
+#endif /* CONFIG_SCHED_VCPU */
 
 long __vsyscall(3) venosys_1(void)
 {
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/lib/cpuid-on-cpu.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/lib/cpuid-on-cpu.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/lib/cpuid-on-cpu.c	2017-01-13 08:40:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/lib/cpuid-on-cpu.c	2017-01-13 08:40:15.000000000 -0500
@@ -0,0 +1 @@
+#include "../../i386/lib/cpuid-on-cpu.c"
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/lib/Makefile kernel-2.6.18-417.el5-028stab121/arch/x86_64/lib/Makefile
--- kernel-2.6.18-417.el5.orig/arch/x86_64/lib/Makefile	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/lib/Makefile	2017-01-13 08:40:15.000000000 -0500
@@ -19,7 +19,7 @@ CFLAGS_csum-partial.o := -funroll-loops
 obj-y := io.o iomap_copy.o
 
 ifndef CONFIG_XEN
-obj-$(CONFIG_SMP)	+= msr-on-cpu.o
+obj-$(CONFIG_SMP)	+= msr-on-cpu.o cpuid-on-cpu.o
 endif
 
 lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/mm/fault.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/fault.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/mm/fault.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/fault.c	2017-01-13 08:40:19.000000000 -0500
@@ -76,27 +76,6 @@ static inline int notify_page_fault(enum
 }
 #endif
 
-void bust_spinlocks(int yes)
-{
-	int loglevel_save = console_loglevel;
-	if (yes) {
-		oops_in_progress = 1;
-	} else {
-#ifdef CONFIG_VT
-		unblank_screen();
-#endif
-		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk will give klogd
-		 * a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
-		printk(" ");
-		console_loglevel = loglevel_save;
-	}
-}
-
 /* Sometimes the CPU reports invalid exceptions on prefetch.
    Check that here and ignore.
    Opcode checker based on code by Richard Brunner */
@@ -334,7 +313,7 @@ static int vmalloc_fault(unsigned long a
 }
 
 int page_fault_trace = 0;
-int exception_trace = 1;
+int exception_trace = 0;
 
 static inline void __do_page_fault(struct pt_regs *regs,
 					unsigned long address,
@@ -397,7 +376,7 @@ static inline void __do_page_fault(struc
 		local_irq_enable();
 
 	if (unlikely(page_fault_trace))
-		printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+		ve_printk(VE_LOG, "pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
 		       regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); 
 
 	if (unlikely(error_code & PF_RSVD))
@@ -417,7 +396,6 @@ static inline void __do_page_fault(struc
 	if (user_mode_vm(regs))
 		error_code |= PF_USER;
 
- again:
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
@@ -523,7 +501,7 @@ bad_area_nosemaphore:
 			return;
 
 		if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
-			printk(
+			ve_printk(VE_LOG, 
 		       "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
 					tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
 					tsk->comm, tsk->pid, address, regs->rip,
@@ -572,7 +550,8 @@ no_context:
 	else
 		printk(KERN_ALERT "Unable to handle kernel paging request");
 	printk(" at %016lx RIP: \n" KERN_ALERT,address);
-	printk_address(regs->rip);
+	if (decode_call_traces)
+		printk_address(regs->rip);
 	dump_pagetable(address);
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
@@ -589,13 +568,14 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) { 
-		yield();
-		goto again;
-	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
-		do_exit(SIGKILL);
+	if (error_code & 4) {
+		/* 
+		 * 0-order allocation always success if something really 
+		 * fatal not happen: beancounter overdraft or OOM.
+		 */
+		force_sig(SIGKILL, tsk);
+		return;
+	}
 	goto no_context;
 
 do_sigbus:
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/mm/fault-xen.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/fault-xen.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/mm/fault-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/fault-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -76,27 +76,6 @@ static inline int notify_page_fault(enum
 }
 #endif
 
-void bust_spinlocks(int yes)
-{
-	int loglevel_save = console_loglevel;
-	if (yes) {
-		oops_in_progress = 1;
-	} else {
-#ifdef CONFIG_VT
-		unblank_screen();
-#endif
-		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk will give klogd
-		 * a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
-		printk(" ");
-		console_loglevel = loglevel_save;
-	}
-}
-
 /* Sometimes the CPU reports invalid exceptions on prefetch.
    Check that here and ignore.
    Opcode checker based on code by Richard Brunner */
@@ -331,7 +310,7 @@ static int vmalloc_fault(unsigned long a
 }
 
 int page_fault_trace = 0;
-int exception_trace = 1;
+int exception_trace = 0;
 
 
 #define MEM_VERBOSE 1
@@ -454,7 +433,7 @@ static inline void __do_page_fault(struc
 		local_irq_enable();
 
 	if (unlikely(page_fault_trace))
-		printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+		ve_printk(VE_LOG, "pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
 		       regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); 
 
 	if (unlikely(error_code & PF_RSVD))
@@ -467,7 +446,6 @@ static inline void __do_page_fault(struc
 	if (unlikely(in_atomic() || !mm))
 		goto bad_area_nosemaphore;
 
- again:
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
@@ -571,7 +549,7 @@ bad_area_nosemaphore:
 			return;
 
 		if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
-			printk(
+			ve_printk(VE_LOG,
 		       "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
 					tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
 					tsk->comm, tsk->pid, address, regs->rip,
@@ -637,13 +615,14 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) { 
-		yield();
-		goto again;
-	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
-		do_exit(SIGKILL);
+	if (error_code & 4) {
+		/*
+		 * 0-order allocation always success if something really
+		 * fatal not happen: beancounter overdraft or OOM.
+		 */
+		force_sig(SIGKILL, tsk);
+		return;
+	}
 	goto no_context;
 
 do_sigbus:
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/mm/init.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/init.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/mm/init.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/init.c	2017-01-13 08:40:23.000000000 -0500
@@ -96,6 +96,7 @@ void show_mem(void)
 	printk(KERN_INFO "%lu pages shared\n",shared);
 	printk(KERN_INFO "%lu pages swap cached\n",cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 int after_bootmem;
 EXPORT_SYMBOL(after_bootmem);
@@ -903,3 +904,5 @@ const char *arch_vma_name(struct vm_area
 		return "[vsyscall]";
 	return NULL;
 }
+
+EXPORT_SYMBOL(phys_base);
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/mm/init-xen.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/init-xen.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/mm/init-xen.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/init-xen.c	2017-01-13 08:40:40.000000000 -0500
@@ -193,6 +193,7 @@ void show_mem(void)
 	printk(KERN_INFO "%lu pages shared\n",shared);
 	printk(KERN_INFO "%lu pages swap cached\n",cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 int after_bootmem;
 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/mm/ioremap.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/ioremap.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/mm/ioremap.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/ioremap.c	2017-01-13 08:40:14.000000000 -0500
@@ -299,7 +299,8 @@ void iounmap(volatile void __iomem *addr
 
 	/* Reset the direct mapping. Can block */
 	if (p->flags >> 20)
-		ioremap_change_attr(p->phys_addr, p->size, 0);
+		/* p->size includes the guard page, but cpa doesn't like that */
+		ioremap_change_attr(p->phys_addr, p->size - PAGE_SIZE, 0);
 
 	/* Finally remove it */
 	o = remove_vm_area((void *)addr);
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/mm/mmap.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/mmap.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/mm/mmap.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/mm/mmap.c	2017-01-13 08:40:41.000000000 -0500
@@ -27,6 +27,7 @@
 #include <linux/personality.h>
 #include <linux/mm.h>
 #include <linux/random.h>
+#include <linux/module.h>
 
 /*
  * Top of mmap area (just below the process stack).
@@ -133,3 +134,4 @@ void arch_pick_mmap_layout(struct mm_str
 	}
 
 }
+EXPORT_SYMBOL(arch_pick_mmap_layout);
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/Makefile kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/Makefile
--- kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/Makefile	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/Makefile	2017-01-13 08:40:40.000000000 -0500
@@ -4,16 +4,16 @@
 
 # files to link into the vdso
 # vdso-start.o has to be first
-vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
+vobjs-y := vclock_gettime.o vgetcpu.o vvar.o
 
 # files to link into kernel
-obj-y := vma.o vdso.o vdso-syms.o
+obj-y := vma.o vdso.o
 
 vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
 
 $(obj)/vdso.o: $(obj)/vdso.so
 
-targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o
+targets += vdso.so vdso.lds $(vobjs-y)
 
 # The DSO images are built using a special linker script.
 quiet_cmd_syscall = SYSCALL $@
@@ -38,13 +38,17 @@ CF := $(PROFILING) -mcmodel=small -fPIC 
 $(obj)/vclock_gettime.o: CFLAGS = $(CF)
 $(obj)/vgetcpu.o: CFLAGS = $(CF)
 
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO.  With ld -R we can then refer to
-# these symbols in the kernel code rather than hand-coded addresses.
-extra-y += vdso-syms.o
-$(obj)/built-in.o: $(obj)/vdso-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
+targets += vdso-syms.lds
+obj-y			+= vdso-syms.lds
+#
+# Match symbols in the DSO that look like VDSO*; produce a file of constants.
+#
+sed-vdsosym := -e 's/^00*/0/' \
+	-e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
+quiet_cmd_vdsosym = VDSOSYM $@
+define cmd_vdsosym
+	$(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
+endef
 
-SYSCFLAGS_vdso-syms.o = -r -d
-$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE
-	$(call if_changed,syscall)
+$(obj)/%-syms.lds: $(obj)/%.so FORCE
+	$(call if_changed,vdsosym)
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/vclock_gettime.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/vclock_gettime.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/vclock_gettime.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/vclock_gettime.c	2017-01-13 08:40:41.000000000 -0500
@@ -57,6 +57,8 @@ static inline long vgetns(void)
 		cycle_last = vdso_vxtime->last_tsc;
 		mult = vdso_vxtime->tsc_quot;
 		mask = (~(u64)0);
+		if (unlikely(vread < cycle_last))
+			vread = cycle_last;
 	}
 
 	return (((vread - cycle_last) & mask) * mult) >>
@@ -105,11 +107,10 @@ static noinline int do_monotonic(struct 
 	return 0;
 }
 
-int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+int ___vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 {
 	if (likely(*vdso_sysctl_vsyscall &&
-		   (vdso_vxtime->mode != VXTIME_KVM) &&
-		   (vdso_vxtime->mode != VXTIME_HV)))
+		   (vdso_vxtime->mode < VXTIME_PMTMR)))
 		switch (clock) {
 		case CLOCK_REALTIME:
 			return do_realtime(ts);
@@ -118,16 +119,22 @@ int __vdso_clock_gettime(clockid_t clock
 		}
 	return vdso_fallback_gettime(clock, ts);
 }
+
+int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) __attribute__((section("CLOCKGETTIME")));
+int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+{
+	return ___vdso_clock_gettime(clock, ts);
+}
 int clock_gettime(clockid_t, struct timespec *)
 	 __attribute__((weak, alias("__vdso_clock_gettime")));
 
-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+#if __GNUC__ >= 4
+int ___vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
 	long ret;
 
 	if (likely(*vdso_sysctl_vsyscall &&
-		   (vdso_vxtime->mode != VXTIME_KVM) &&
-		   (vdso_vxtime->mode != VXTIME_HV))) {
+		   (vdso_vxtime->mode < VXTIME_PMTMR))) {
 		if (likely(tv != NULL)) {
 			BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
 				     offsetof(struct timespec, tv_nsec) ||
@@ -146,5 +153,11 @@ int __vdso_gettimeofday(struct timeval *
 	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
 	return ret;
 }
+int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) __attribute__((section("GETTIMEOFDAY")));
+int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	return ___vdso_gettimeofday(tv, tz);
+}
 int gettimeofday(struct timeval *, struct timezone *)
 	__attribute__((weak, alias("__vdso_gettimeofday")));
+#endif
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/vdso.lds.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/vdso.lds.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/vdso.lds.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/vdso.lds.S	2017-01-13 08:40:40.000000000 -0500
@@ -26,16 +26,19 @@ SECTIONS
       is insufficient, ld -shared will barf.  Just increase it here.  */
    . = VDSO_PRELINK + VDSO_TEXT_OFFSET;
 
+   .gettimeofday 0xffffffffff7006f0 : { *(GETTIMEOFDAY) } :text
+   .clockgettime 0xffffffffff700780 : { *(CLOCKGETTIME) } :text
+   .getcpu 0xffffffffff7007c0 : { *(GETCPU) } :text
    .text           : { *(.text) }               :text
    .text.ptr       : { *(.text.ptr) }           :text
-   . = VDSO_PRELINK + 0x900;
+   . = VDSO_PRELINK + 0x1000;
    .data           : { *(.data) }               :text
    .bss            : { *(.bss) }                        :text
 
    .altinstructions : { *(.altinstructions) }                   :text
    .altinstr_replacement  : { *(.altinstr_replacement) }        :text
 
-   .note                  : { *(.note.*) }              :text :note
+   /* .note                  : { *(.note.*) }              :text :note */
    .eh_frame_hdr   : { *(.eh_frame_hdr) }       :text :eh_frame_hdr
    .eh_frame       : { KEEP (*(.eh_frame)) }    :text
    .dynamic        : { *(.dynamic) }            :text :dynamic
@@ -55,7 +58,7 @@ PHDRS
 {
   text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
   dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
-  note PT_NOTE FLAGS(4); /* PF_R */
+  /* note PT_NOTE FLAGS(4); */ /* PF_R */
   eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
 }
 
@@ -75,3 +78,11 @@ VERSION
     local: *;
   };
 }
+
+VDSO64_PRELINK = VDSO_PRELINK;
+/*
+ * Define VDSO64_x for each VEXTERN(x), for use via VDSO64_SYMBOL.
+ */
+#define VEXTERN(x)	VDSO64_ ## x = vdso_ ## x;
+#include "vextern.h"
+#undef	VEXTERN
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/vdso-start.S kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/vdso-start.S
--- kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/vdso-start.S	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/vdso-start.S	2017-01-13 08:40:41.000000000 -0500
@@ -1,2 +0,0 @@
-	.globl vdso_kernel_start
-vdso_kernel_start:
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/vgetcpu.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/vgetcpu.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/vgetcpu.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/vgetcpu.c	2017-01-13 08:40:40.000000000 -0500
@@ -13,6 +13,8 @@
 #include "vextern.h"
 
 long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+				__attribute__((section("GETCPU")));
+long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 {
 	unsigned int dummy, p;
 
diff -upr kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/vma.c kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/vma.c
--- kernel-2.6.18-417.el5.orig/arch/x86_64/vdso/vma.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/x86_64/vdso/vma.c	2017-01-13 08:40:40.000000000 -0500
@@ -7,15 +7,19 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/random.h>
+#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <asm/vsyscall.h>
 #include <asm/proto.h>
-#include "voffset.h"
+#include <asm/vdso.h>
 
-int vdso_enabled = 1;
+#include "vextern.h"		/* Just for VMAGIC.  */
 
+int vdso_enabled = 1;
+/*
 #define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
 #include "vextern.h"
+*/
 #undef VEXTERN
 
 extern char vdso_kernel_start[], vdso_start[], vdso_end[];
@@ -23,10 +27,10 @@ extern unsigned short vdso_sync_cpuid;
 
 struct page **vdso_pages;
 
-static inline void *var_ref(void *vbase, char *var, char *name)
+extern const char VDSO64_PRELINK[];
+
+static inline void *var_ref(char *p, char *name)
 {
-	unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
-	void *p = vbase + offset;
 	if (*(void **)p != (void *)VMAGIC) {
 		printk("VDSO: variable %s broken\n", name);
 		vdso_enabled = 0;
@@ -61,9 +65,8 @@ static int __init init_vdso_vars(void)
 		vdso_enabled = 0;
 	}
 
-#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
 #define VEXTERN(x) \
-	V(vdso_ ## x) = &__ ## x;
+	*(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x;
 #include "vextern.h"
 #undef VEXTERN
 	return 0;
@@ -99,7 +102,8 @@ static unsigned long vdso_addr(unsigned 
 
 /* Setup a VMA at program startup for the vsyscall page.
    Not called for compat tasks */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp,
+				unsigned long map_addr)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long addr;
@@ -110,12 +114,16 @@ int arch_setup_additional_pages(struct l
 		return 0;
 
 	down_write(&mm->mmap_sem);
-	addr = vdso_addr(mm->start_stack, len);
-	addr = get_unmapped_area(NULL, addr, len, 0, 0);
-	if (IS_ERR_VALUE(addr)) {
-		ret = addr;
-		goto up_fail;
-	}
+	if (!map_addr) {
+		addr = vdso_addr(mm->start_stack, len);
+		addr = get_unmapped_area(NULL, addr, len, 0, 0);
+
+		if (IS_ERR_VALUE(addr)) {
+			ret = addr;
+			goto up_fail;
+		}
+	} else
+		addr = map_addr;
 
 	ret = install_special_mapping(mm, addr, len,
 				      VM_READ|VM_EXEC|
@@ -130,6 +138,7 @@ up_fail:
 	up_write(&mm->mmap_sem);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(arch_setup_additional_pages);
 
 static __init int vdso_setup(char *s)
 {
diff -upr kernel-2.6.18-417.el5.orig/arch/xtensa/kernel/syscalls.c kernel-2.6.18-417.el5-028stab121/arch/xtensa/kernel/syscalls.c
--- kernel-2.6.18-417.el5.orig/arch/xtensa/kernel/syscalls.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/arch/xtensa/kernel/syscalls.c	2017-01-13 08:40:15.000000000 -0500
@@ -128,7 +128,7 @@ out:
 
 int sys_uname(struct old_utsname * name)
 {
-	if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+	if (name && !copy_to_user(name, utsname(), sizeof (*name)))
 		return 0;
 	return -EFAULT;
 }
diff -upr kernel-2.6.18-417.el5.orig/block/cfq-iosched.c kernel-2.6.18-417.el5-028stab121/block/cfq-iosched.c
--- kernel-2.6.18-417.el5.orig/block/cfq-iosched.c	2017-01-13 07:39:10.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/block/cfq-iosched.c	2017-01-13 08:40:19.000000000 -0500
@@ -12,6 +12,11 @@
 #include <linux/hash.h>
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
+#include <linux/cfq-iosched.h>
+#include <ub/beancounter.h>
+#include <ub/io_prio.h>
+#include <ub/io_acct.h>
+#include <ub/ub_hash.h>
 
 /*
  * tunables
@@ -26,6 +31,7 @@ static const int cfq_slice_sync = HZ / 1
 static int cfq_slice_async = HZ / 25;
 static const int cfq_slice_async_rq = 2;
 static int cfq_slice_idle = HZ / 125;
+static int cfq_ub_slice = HZ / 5;
 
 /*
  * Allow merged cfqqs to perform this amount of seeky I/O before
@@ -58,13 +64,11 @@ static int cfq_slice_idle = HZ / 125;
 #define rq_rb_key(rq)		(rq)->sector
 
 static kmem_cache_t *crq_pool;
-static kmem_cache_t *cfq_pool;
 static kmem_cache_t *cfq_ioc_pool;
 
 static atomic_t ioc_count = ATOMIC_INIT(0);
 static struct completion *ioc_gone;
 
-#define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
 #define cfq_class_be(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
 #define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
@@ -77,135 +81,6 @@ static struct completion *ioc_gone;
 
 #define sample_valid(samples)	((samples) > 80)
 
-/*
- * Per block device queue structure
- */
-struct cfq_data {
-	request_queue_t *queue;
-
-	/*
-	 * rr list of queues with requests and the count of them
-	 */
-	struct list_head rr_list[CFQ_PRIO_LISTS];
-	struct list_head busy_rr;
-	struct list_head cur_rr;
-	struct list_head idle_rr;
-
-	/*
-	 * Each priority tree is sorted by next_request position.  These
-	 * trees are used when determining if two or more queues are
-	 * interleaving requests (see cfq_close_cooperator).
-	 */
-	struct rb_root prio_trees[CFQ_PRIO_LISTS];
-
-	unsigned int busy_queues;
-
-	/*
-	 * non-ordered list of empty cfqq's
-	 */
-	struct list_head empty_list;
-
-	/*
-	 * global crq hash for all queues
-	 */
-	struct hlist_head *crq_hash;
-
-	mempool_t *crq_pool;
-
-	int rq_in_driver;
-	int hw_tag;
-
-	/*
-	 * schedule slice state info
-	 */
-	/*
-	 * idle window management
-	 */
-	struct timer_list idle_slice_timer;
-	struct work_struct unplug_work;
-
-	struct cfq_queue *active_queue;
-	struct cfq_io_context *active_cic;
-	int cur_prio, cur_end_prio;
-	unsigned int dispatch_slice;
-
-	/*
-	 * async queue for each priority case
-	 */
-	struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
-	struct cfq_queue *async_idle_cfqq;
-
-	struct timer_list idle_class_timer;
-
-	sector_t last_position;
-	unsigned long last_end_request;
-
-	unsigned int rq_starved;
-
-	/*
-	 * tunables, see top of file
-	 */
-	unsigned int cfq_quantum;
-	unsigned int cfq_queued;
-	unsigned int cfq_fifo_expire[2];
-	unsigned int cfq_back_penalty;
-	unsigned int cfq_back_max;
-	unsigned int cfq_slice[2];
-	unsigned int cfq_slice_async_rq;
-	unsigned int cfq_slice_idle;
-
-	struct list_head cic_list;
-};
-
-/*
- * Per process-grouping structure
- */
-struct cfq_queue {
-	/* reference count */
-	atomic_t ref;
-	/* parent cfq_data */
-	struct cfq_data *cfqd;
-	/* on either rr or empty list of cfqd */
-	struct list_head cfq_list;
-	/* prio tree member */
-	struct rb_node p_node;
-	/* prio tree root we belong to, if any */
-	struct rb_root *p_root;
-	/* sorted list of pending requests */
-	struct rb_root sort_list;
-	/* if fifo isn't expired, next request to serve */
-	struct cfq_rq *next_crq;
-	/* requests queued in sort_list */
-	int queued[2];
-	/* currently allocated requests */
-	int allocated[2];
-	/* fifo list of requests in sort_list */
-	struct list_head fifo;
-
-	unsigned long slice_start;
-	unsigned long slice_end;
-	unsigned long slice_left;
-	unsigned long service_last;
-
-	/* number of requests that are on the dispatch list */
-	int on_dispatch[2];
-
-	/* io prio of this group */
-	unsigned short ioprio, org_ioprio;
-	unsigned short ioprio_class, org_ioprio_class;
-
-	unsigned int seek_samples;
-	u64 seek_total;
-	sector_t seek_mean;
-	sector_t last_request_pos;
-	unsigned long seeky_start;
-
-	/* various state flags, see below */
-	unsigned int flags;
-
-	struct cfq_queue *new_cfqq;
-};
-
 struct cfq_rq {
 	struct rb_node rb_node;
 	sector_t rb_key;
@@ -280,7 +155,7 @@ CFQ_CRQ_FNS(is_sync);
 
 static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
 static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
-				       struct task_struct *, gfp_t);
+				       struct task_struct *, struct ub_iopriv *, gfp_t);
 static struct cfq_io_context *cfq_cic_rb_lookup(struct cfq_data *,
 						struct io_context *);
 
@@ -295,6 +170,31 @@ static inline void cic_set_cfqq(struct c
 {
 	cic->cfqq[!!is_sync] = cfqq;
 }
+static void cfq_put_queue(struct cfq_queue *cfqq);
+
+#ifdef CONFIG_UBC_IO_PRIO
+static inline struct ub_iopriv *cfqq_ub_iopriv(struct cfq_data *cfqd, int is_sync)
+{
+	if (is_sync)
+		return cfqd->virt_mode ? &get_io_ub()->iopriv : &get_ub0()->iopriv;
+	else
+		return cfqd->write_virt_mode ? &get_io_ub()->iopriv : &get_ub0()->iopriv;
+}
+#else
+static inline struct ub_iopriv *cfqq_ub_iopriv(struct cfq_data *cfqd, pid_t key)
+{
+	return  NULL;
+}
+#endif
+
+static inline struct user_beancounter *ub_by_iopriv(struct ub_iopriv *iopriv)
+{
+#ifdef CONFIG_UBC_IO_PRIO
+	return container_of(iopriv, struct user_beancounter, iopriv);
+#else
+	return NULL;
+#endif
+}
 
 /*
  * We regard a request as SYNC, if it's either a read or has the SYNC bit
@@ -555,17 +455,19 @@ static void cfq_prio_tree_add(struct cfq
 
 static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
 {
-	struct cfq_data *cfqd = cfqq->cfqd;
 	struct list_head *list, *entry;
+	struct cfq_bc_data *cfq_bc;
 
 	BUG_ON(!cfq_cfqq_on_rr(cfqq));
 
 	list_del(&cfqq->cfq_list);
 
+	cfq_bc = cfqq->cfq_bc;
+
 	if (cfq_class_rt(cfqq))
-		list = &cfqd->cur_rr;
+		list = &cfq_bc->cur_rr;
 	else if (cfq_class_idle(cfqq))
-		list = &cfqd->idle_rr;
+		list = &cfq_bc->idle_rr;
 	else {
 		/*
 		 * if cfqq has requests in flight, don't allow it to be
@@ -575,16 +477,16 @@ static void cfq_resort_rr_list(struct cf
 		 * sporadically or synchronously
 		 */
 		if (cfq_cfqq_dispatched(cfqq))
-			list = &cfqd->busy_rr;
+			list = &cfq_bc->busy_rr;
 		else
-			list = &cfqd->rr_list[cfqq->ioprio];
+			list = &cfq_bc->rr_list[cfqq->ioprio];
 	}
 
 	/*
 	 * if queue was preempted, just add to front to be fair. busy_rr
 	 * isn't sorted, but insert at the back for fairness.
 	 */
-	if (preempted || list == &cfqd->busy_rr) {
+	if (preempted || list == &cfq_bc->busy_rr) {
 		if (preempted)
 			list = list->prev;
 
@@ -606,7 +508,7 @@ static void cfq_resort_rr_list(struct cf
 	}
 
 	list_add(&cfqq->cfq_list, entry);
-	cfq_prio_tree_add(cfqd, cfqq);
+	cfq_prio_tree_add(cfqq->cfqd, cfqq);
 }
 
 /*
@@ -619,6 +521,7 @@ cfq_add_cfqq_rr(struct cfq_data *cfqd, s
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 	cfq_mark_cfqq_on_rr(cfqq);
 	cfqd->busy_queues++;
+	bc_inc_rqnum(cfqq);
 
 	cfq_resort_rr_list(cfqq, 0);
 }
@@ -628,7 +531,7 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, s
 {
 	BUG_ON(!cfq_cfqq_on_rr(cfqq));
 	cfq_clear_cfqq_on_rr(cfqq);
-	list_move(&cfqq->cfq_list, &cfqd->empty_list);
+	list_move(&cfqq->cfq_list, &cfqq->cfq_bc->empty_list);
 	if (cfqq->p_root) {
 		rb_erase(&cfqq->p_node, cfqq->p_root);
 		cfqq->p_root = NULL;
@@ -636,6 +539,7 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, s
 
 	BUG_ON(!cfqd->busy_queues);
 	cfqd->busy_queues--;
+	bc_dec_rqnum(cfqq);
 }
 
 /*
@@ -851,6 +755,36 @@ cfq_merged_requests(request_queue_t *q, 
 	cfq_remove_request(next);
 }
 
+static int cfq_allow_merge(request_queue_t *q, struct request *rq,
+			   struct bio *bio)
+{
+	struct cfq_data *cfqd = q->elevator->elevator_data;
+	int bio_sync = cfq_bio_sync(bio);
+	struct cfq_rq *crq = RQ_DATA(rq);
+	struct cfq_queue *cfqq;
+	struct cfq_io_context *cic;
+
+	/*
+	 * Disallow merge of a sync bio into an async request.
+	 */
+	if (bio_sync && !cfq_crq_is_sync(crq))
+		return 0;
+
+	/*
+	 * Lookup the cfqq that this bio will be queued with. Allow
+	 * merge only if rq is queued there.
+	 */
+	cic = cfq_cic_rb_lookup(cfqd, current->io_context);
+	if (!cic)
+		return 0;
+
+	cfqq = cic_to_cfqq(cic, bio_sync);
+	if (cfqq == crq->cfq_queue)
+		return 1;
+
+	return 0;
+}
+
 static inline void
 __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
@@ -866,6 +800,8 @@ __cfq_set_active_queue(struct cfq_data *
 		cfq_clear_cfqq_must_dispatch(cfqq);
 		cfq_clear_cfqq_must_alloc_slice(cfqq);
 		cfq_clear_cfqq_fifo_expire(cfqq);
+
+		WARN_ON(cfqq->cfq_bc != cfqd->active_cfq_bc);
 	}
 
 	cfqd->active_queue = cfqq;
@@ -906,6 +842,9 @@ __cfq_slice_expired(struct cfq_data *cfq
 	if (cfqq == cfqd->active_queue)
 		cfqd->active_queue = NULL;
 
+	if (!cfqd->busy_queues)
+		bc_set_active(cfqd, NULL);
+
 	if (cfqd->active_cic) {
 		put_io_context(cfqd->active_cic->ioc);
 		cfqd->active_cic = NULL;
@@ -935,14 +874,19 @@ static inline void cfq_slice_expired(str
 static int cfq_get_next_prio_level(struct cfq_data *cfqd)
 {
 	int prio, wrap;
+	struct cfq_bc_data *cfq_bc;
+
+	cfq_bc = cfqd->active_cfq_bc;
+	if (!cfq_bc)
+		return -1;
 
 	prio = -1;
 	wrap = 0;
 	do {
 		int p;
 
-		for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {
-			if (!list_empty(&cfqd->rr_list[p])) {
+		for (p = cfq_bc->cur_prio; p <= cfq_bc->cur_end_prio; p++) {
+			if (!list_empty(&cfq_bc->rr_list[p])) {
 				prio = p;
 				break;
 			}
@@ -950,9 +894,9 @@ static int cfq_get_next_prio_level(struc
 
 		if (prio != -1)
 			break;
-		cfqd->cur_prio = 0;
-		if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
-			cfqd->cur_end_prio = 0;
+		cfq_bc->cur_prio = 0;
+		if (++cfq_bc->cur_end_prio == CFQ_PRIO_LISTS) {
+			cfq_bc->cur_end_prio = 0;
 			if (wrap)
 				break;
 			wrap = 1;
@@ -964,16 +908,16 @@ static int cfq_get_next_prio_level(struc
 
 	BUG_ON(prio >= CFQ_PRIO_LISTS);
 
-	list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);
+	list_splice_init(&cfq_bc->rr_list[prio], &cfq_bc->cur_rr);
 
-	cfqd->cur_prio = prio + 1;
-	if (cfqd->cur_prio > cfqd->cur_end_prio) {
-		cfqd->cur_end_prio = cfqd->cur_prio;
-		cfqd->cur_prio = 0;
-	}
-	if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
-		cfqd->cur_prio = 0;
-		cfqd->cur_end_prio = 0;
+	cfq_bc->cur_prio = prio + 1;
+	if (cfq_bc->cur_prio > cfq_bc->cur_end_prio) {
+		cfq_bc->cur_end_prio = cfq_bc->cur_prio;
+		cfq_bc->cur_prio = 0;
+	}
+	if (cfq_bc->cur_end_prio == CFQ_PRIO_LISTS) {
+		cfq_bc->cur_prio = 0;
+		cfq_bc->cur_end_prio = 0;
 	}
 
 	return prio;
@@ -1005,33 +949,42 @@ static inline int cfq_rq_close(struct cf
 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
 					      struct cfq_queue *cfqq)
 {
+	struct cfq_bc_data *cfq_bc;
+
 	if (cfqq)
 		goto set_queue;
 
+	bc_schedule_active(cfqd);
+
+	cfq_bc = cfqd->active_cfq_bc;
+	if (!cfq_bc)
+		goto set_queue;
+
 	/*
 	 * if current list is non-empty, grab first entry. if it is empty,
 	 * get next prio level and grab first entry then if any are spliced
 	 */
-	if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
-		cfqq = list_entry_cfqq(cfqd->cur_rr.next);
+	if (!list_empty(&cfq_bc->cur_rr)
+			|| cfq_get_next_prio_level(cfqd) != -1)
+		cfqq = list_entry_cfqq(cfq_bc->cur_rr.next);
 
 	/*
 	 * If no new queues are available, check if the busy list has some
 	 * before falling back to idle io.
 	 */
-	if (!cfqq && !list_empty(&cfqd->busy_rr))
-		cfqq = list_entry_cfqq(cfqd->busy_rr.next);
+	if (!cfqq && !list_empty(&cfq_bc->busy_rr))
+		cfqq = list_entry_cfqq(cfq_bc->busy_rr.next);
 
 	/*
 	 * if we have idle queues and no rt or be queues had pending
 	 * requests, either allow immediate service if the grace period
 	 * has passed or arm the idle grace timer
 	 */
-	if (!cfqq && !list_empty(&cfqd->idle_rr)) {
+	if (!cfqq && !list_empty(&cfq_bc->idle_rr)) {
 		unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
 
 		if (time_after_eq(jiffies, end))
-			cfqq = list_entry_cfqq(cfqd->idle_rr.next);
+			cfqq = list_entry_cfqq(cfq_bc->idle_rr.next);
 		else
 			mod_timer(&cfqd->idle_class_timer, end);
 	}
@@ -1057,7 +1010,7 @@ static struct cfq_queue *cfqq_close(stru
 	 * request, choose it.
 	 */
 	__cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL);
-	if (__cfqq)
+	if (__cfqq && __cfqq->cfq_bc == cur_cfqq->cfq_bc)
 		return __cfqq;
 
 	/*
@@ -1065,7 +1018,8 @@ static struct cfq_queue *cfqq_close(stru
 	 * will contain the closest sector.
 	 */
 	__cfqq = rb_entry(parent, struct cfq_queue, p_node);
-	if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_crq->request))
+	if (__cfqq->cfq_bc == cur_cfqq->cfq_bc &&
+			cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_crq->request))
 		return __cfqq;
 
 	if (__cfqq->next_crq->request->sector < sector)
@@ -1076,7 +1030,8 @@ static struct cfq_queue *cfqq_close(stru
 		return NULL;
 
 	__cfqq = rb_entry(node, struct cfq_queue, p_node);
-	if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_crq->request))
+	if (__cfqq->cfq_bc == cur_cfqq->cfq_bc &&
+			cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_crq->request))
 		return __cfqq;
 
 	return NULL;
@@ -1108,7 +1063,7 @@ static struct cfq_queue *cfq_close_coope
 	 * we can group them together and don't waste time idling.
 	 */
 	cfqq = cfqq_close(cfqd, cur_cfqq);
-	if (!cfqq)
+	if (!cfqq || cfqq->cfq_bc != cur_cfqq->cfq_bc)
 		return NULL;
 
 	/*
@@ -1166,13 +1121,16 @@ static int cfq_arm_slice_timer(struct cf
 
 static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
 {
-	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq = crq->cfq_queue;
 	struct request *rq;
 
-	cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
 	cfq_remove_request(crq->request);
 	cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
+	cfqq->cfq_bc->on_dispatch++;
+
+	// FIXME: must be in cfq_completed_request, but nr_sectors not valid there
+	cfqq->cfq_bc->sectors_dispatched += crq->request->nr_sectors;
+
 	elv_dispatch_sort(q, crq->request);
 
 	rq = list_entry(q->queue_head.prev, struct request, queuelist);
@@ -1302,10 +1260,14 @@ static struct cfq_queue *cfq_select_queu
 	if (!cfqq)
 		goto new_queue;
 
+	if (!cfqq->cfq_bc)
+		goto expire;
+
 	/*
 	 * slice has expired
 	 */
-	if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end))
+	if (!cfq_cfqq_must_dispatch(cfqq) &&
+	   (time_after(now, cfqq->slice_end) || bc_expired(cfqd)))
 		goto expire;
 
 	/*
@@ -1324,7 +1286,7 @@ static struct cfq_queue *cfq_select_queu
 		if (!cfqq->new_cfqq)
 			cfq_setup_merge(cfqq, new_cfqq);
 		goto expire;
-	} else if (cfq_cfqq_dispatched(cfqq)) {
+	} else if (cfq_cfqq_dispatched(cfqq) && cfq_cfqq_idle_window(cfqq)) {
 		cfqq = NULL;
 		goto keep_queue;
 	} else if (cfq_cfqq_sync(cfqq)) {
@@ -1414,7 +1376,7 @@ cfq_forced_dispatch_cfqqs(struct list_he
 }
 
 static int
-cfq_forced_dispatch(struct cfq_data *cfqd)
+__cfq_forced_dispatch(struct cfq_bc_data *cfqd)
 {
 	int i, dispatched = 0;
 
@@ -1425,6 +1387,36 @@ cfq_forced_dispatch(struct cfq_data *cfq
 	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr);
 	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr);
 
+	return dispatched;
+}
+
+static int
+cfq_forced_dispatch(struct cfq_data *cfqd)
+{
+#ifdef CONFIG_UBC_IO_PRIO
+	struct rb_node *node;
+	struct cfq_bc_data *cfq_bc;
+#endif
+	int dispatched;
+
+	dispatched = 0;
+#ifdef CONFIG_UBC_IO_PRIO
+	if (cfqd->active_cfq_bc)
+		dispatched += __cfq_forced_dispatch(cfqd->active_cfq_bc);
+	/*
+	 * We use here _safe iterating, because
+	 * __cfq_forced_dispatch() remove bc from tree implicitly
+	 */
+	node = rb_first(&cfqd->cfq_bc_queue);
+	while (node) {
+		cfq_bc = rb_entry(node, struct cfq_bc_data, cfq_bc_node);
+		node = rb_next(node);
+		dispatched += __cfq_forced_dispatch(cfq_bc);
+	}
+#else
+	dispatched += __cfq_forced_dispatch(&cfqd->cfq_bc);
+#endif
+
 	cfq_slice_expired(cfqd, 0);
 
 	BUG_ON(cfqd->busy_queues);
@@ -1668,6 +1660,7 @@ static void cfq_init_prio_data(struct cf
 static inline void changed_ioprio(struct cfq_io_context *cic)
 {
 	struct cfq_data *cfqd = cic->key;
+	struct ub_iopriv *iopriv;
 	struct cfq_queue *cfqq;
 	unsigned long flags;
 
@@ -1679,8 +1672,9 @@ static inline void changed_ioprio(struct
 	cfqq = cic->cfqq[ASYNC];
 	if (cfqq) {
 		struct cfq_queue *new_cfqq;
+		iopriv = cfqq_ub_iopriv(cfqd, ASYNC);
 		new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc->task,
-					 GFP_ATOMIC);
+					 iopriv, GFP_ATOMIC);
 		if (new_cfqq) {
 			cic->cfqq[ASYNC] = new_cfqq;
 			cfq_put_queue(cfqq);
@@ -1766,45 +1760,61 @@ out:
 }
 
 static struct cfq_queue **
-cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
+cfq_async_queue_prio(struct cfq_bc_data *cfq_bc, int ioprio_class, int ioprio)
 {
 	switch(ioprio_class) {
 	case IOPRIO_CLASS_RT:
-		return &cfqd->async_cfqq[0][ioprio];
+		return &cfq_bc->async_cfqq[0][ioprio];
 	case IOPRIO_CLASS_BE:
-		return &cfqd->async_cfqq[1][ioprio];
+		return &cfq_bc->async_cfqq[1][ioprio];
 	case IOPRIO_CLASS_IDLE:
-		return &cfqd->async_idle_cfqq;
+		return &cfq_bc->async_idle_cfqq;
 	default:
 		BUG();
+		return NULL;
 	}
 }
 
 static struct cfq_queue *
 cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk,
-	      gfp_t gfp_mask)
+		struct ub_iopriv *iopriv, gfp_t gfp_mask)
 {
 	const int ioprio = task_ioprio(tsk);
 	const int ioprio_class = task_ioprio_class(tsk);
 	struct cfq_queue **async_cfqq = NULL;
 	struct cfq_queue *cfqq = NULL;
+	struct cfq_bc_data *cfq_bc;
+
+	if (gfp_mask & __GFP_WAIT) {
+		spin_unlock_irq(cfqd->queue->queue_lock);
+		cfq_bc = bc_findcreate_cfq_bc(iopriv, cfqd, gfp_mask);
+		spin_lock_irq(cfqd->queue->queue_lock);
+	} else
+		cfq_bc = bc_findcreate_cfq_bc(iopriv, cfqd, gfp_mask);
+	if (!cfq_bc)
+		return NULL;
 
 	if (!is_sync) {
-		async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
+		async_cfqq = cfq_async_queue_prio(cfq_bc, ioprio_class, ioprio);
 		cfqq = *async_cfqq;
 	}
 
-	if (!cfqq)
+	if (!cfqq) {
 		cfqq = cfq_find_alloc_queue(cfqd, is_sync, tsk, gfp_mask);
+		if (!cfqq->cfq_bc)
+			cfqq->cfq_bc = cfq_bc;
+	}
 
 	/*
 	 * pin the queue now that it's allocated, scheduler exit will prune it
 	 */
-	if (!is_sync && !(*async_cfqq)) {
+	if (!is_sync && !(*async_cfqq) && cfqq->cfq_bc == cfq_bc) {
 		atomic_inc(&cfqq->ref);
 		*async_cfqq = cfqq;
 	}
 
+	BUG_ON(!cfqq->cfq_bc);
+
 	atomic_inc(&cfqq->ref);
 	return cfqq;
 }
@@ -2047,6 +2057,9 @@ cfq_should_preempt(struct cfq_data *cfqd
 	 */
 	if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
 		return 0;
+	if (cfqd->cfq_ub_isolate && new_cfqq->cfq_bc != cfqq->cfq_bc &&
+			!bc_allow_preempt(cfqd, new_cfqq->cfq_bc))
+		return 0;
 	if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq))
 		return 1;
 
@@ -2063,8 +2076,13 @@ cfq_should_preempt(struct cfq_data *cfqd
 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	struct cfq_queue *__cfqq, *next;
+	struct cfq_bc_data *cfq_bc;
+
+	cfq_bc = cfqd->active_cfq_bc;
+	if (!cfq_bc)
+		return;
 
-	list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)
+	list_for_each_entry_safe(__cfqq, next, &cfq_bc->cur_rr, cfq_list)
 		cfq_resort_rr_list(__cfqq, 1);
 
 	if (!cfqq->slice_left)
@@ -2072,6 +2090,7 @@ static void cfq_preempt_queue(struct cfq
 
 	cfqq->slice_end = cfqq->slice_left + jiffies;
 	cfq_slice_expired(cfqd, 1);
+	bc_set_active(cfqd, cfqq->cfq_bc);
 	__cfq_set_active_queue(cfqd, cfqq);
 }
 
@@ -2185,6 +2204,8 @@ static void cfq_completed_request(reques
 	WARN_ON(!cfqq->on_dispatch[sync]);
 	cfqd->rq_in_driver--;
 	cfqq->on_dispatch[sync]--;
+	cfqq->cfq_bc->on_dispatch--;
+	cfqq->cfq_bc->requests_dispatched++;
 
 	if (!cfq_class_idle(cfqq))
 		cfqd->last_end_request = now;
@@ -2213,8 +2234,11 @@ static void cfq_completed_request(reques
 			cfq_arm_slice_timer(cfqd, cfqq);
 	}
 
-	if (!cfqd->rq_in_driver)
+	if (!cfqd->rq_in_driver) {
 		cfq_schedule_dispatch(cfqd);
+		if (!cfqd->busy_queues)
+			mod_timer(&cfqd->idle_class_timer, now + CFQ_IDLE_GRACE);
+	}
 }
 
 static struct request *
@@ -2354,6 +2378,7 @@ static void cfq_put_request(request_queu
 		rq->elevator_private = NULL;
 
 		cfq_check_waiters(q, cfqq);
+		put_beancounter(ub_by_iopriv(cfqq->cfq_bc->ub_iopriv));
 		cfq_put_queue(cfqq);
 	}
 }
@@ -2409,10 +2434,12 @@ cfq_set_request(request_queue_t *q, stru
 	struct cfq_queue *cfqq;
 	struct cfq_rq *crq;
 	unsigned long flags;
+	struct ub_iopriv *iopriv;
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
 	cic = cfq_get_io_context(cfqd, gfp_mask);
+	iopriv = cfqq_ub_iopriv(cfqd, is_sync);
 
 	spin_lock_irqsave(q->queue_lock, flags);
 
@@ -2422,7 +2449,7 @@ new_queue:
 
 	cfqq = cic_to_cfqq(cic, is_sync);
 	if (!cfqq) {
-		cfqq = cfq_get_queue(cfqd, is_sync, tsk, gfp_mask);
+		cfqq = cfq_get_queue(cfqd, is_sync, tsk, iopriv, gfp_mask);
 		if (!cfqq)
 			goto queue_fail;
 
@@ -2439,6 +2466,15 @@ new_queue:
 			cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq);
 	}
 
+	/*
+	 * UB was released or changed
+	 */
+	if (!cfqq->cfq_bc || cfqq->cfq_bc->ub_iopriv != iopriv) {
+		cic_set_cfqq(cic, NULL, is_sync);
+		cfq_put_queue(cfqq);
+		goto new_queue;
+	}
+
 	cfqq->allocated[rw]++;
 	cfq_clear_cfqq_must_alloc(cfqq);
 	cfqd->rq_starved = 0;
@@ -2460,6 +2496,11 @@ new_queue:
 			cfq_clear_crq_is_sync(crq);
 
 		rq->elevator_private = crq;
+		/*
+		 * We can't get iopriv here. Otherwise if prioritization
+		 * was suddenly disabled we get a wrong beancounter.
+		 */
+		get_beancounter(ub_by_iopriv(cfqq->cfq_bc->ub_iopriv));
 		return 0;
 	}
 
@@ -2568,8 +2609,11 @@ static void cfq_idle_class_timer(unsigne
 	end = cfqd->last_end_request + CFQ_IDLE_GRACE;
 	if (!time_after_eq(jiffies, end))
 		mod_timer(&cfqd->idle_class_timer, end);
-	else
+	else {
 		cfq_schedule_dispatch(cfqd);
+		if (!cfqd->busy_queues)
+			cfq_slice_expired(cfqd, 0);
+	}
 
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
@@ -2581,20 +2625,45 @@ static void cfq_shutdown_timer_wq(struct
 	blk_sync_queue(cfqd->queue);
 }
 
-static void cfq_put_async_queues(struct cfq_data *cfqd)
+void __cfq_put_async_queues(struct cfq_bc_data *cfq_bc)
 {
 	int i;
 
 	for (i = 0; i < IOPRIO_BE_NR; i++) {
-		if (cfqd->async_cfqq[0][i])
-			cfq_put_queue(cfqd->async_cfqq[0][i]);
-		if (cfqd->async_cfqq[1][i])
-			cfq_put_queue(cfqd->async_cfqq[1][i]);
+		if (cfq_bc->async_cfqq[0][i])
+			cfq_put_queue(cfq_bc->async_cfqq[0][i]);
+		if (cfq_bc->async_cfqq[1][i])
+			cfq_put_queue(cfq_bc->async_cfqq[1][i]);
+		cfq_bc->async_cfqq[0][i] = cfq_bc->async_cfqq[1][i] = NULL;
 	}
 
-	if (cfqd->async_idle_cfqq)
-		cfq_put_queue(cfqd->async_idle_cfqq);
+	if (cfq_bc->async_idle_cfqq)
+		cfq_put_queue(cfq_bc->async_idle_cfqq);
+	cfq_bc->async_idle_cfqq = NULL;
+}
+
+#ifdef CONFIG_UBC_IO_PRIO
+static void cfq_put_async_queues(struct cfq_data *cfqd)
+{
+	struct user_beancounter *ub;
+	struct cfq_bc_data *cfq_bc;
+
+	rcu_read_lock();
+	for_each_beancounter(ub) {
+		write_lock(&ub->iopriv.cfq_bc_list_lock);
+		cfq_bc = __find_cfq_bc(&ub->iopriv, cfqd);
+		if (cfq_bc)
+			__cfq_put_async_queues(cfq_bc);
+		write_unlock(&ub->iopriv.cfq_bc_list_lock);
+	}
+	rcu_read_unlock();
+}
+#else
+static void cfq_put_async_queues(struct cfq_data *cfqd)
+{
+	__cfq_put_async_queues(&cfqd->cfq_bc);
 }
+#endif
 
 static void cfq_exit_queue(elevator_t *e)
 {
@@ -2630,6 +2699,8 @@ static void cfq_exit_queue(elevator_t *e
 
 	cfq_shutdown_timer_wq(cfqd);
 
+	bc_cfq_exit_queue(cfqd);
+
 	mempool_destroy(cfqd->crq_pool);
 	kfree(cfqd->crq_hash);
 	kfree(cfqd);
@@ -2646,13 +2717,12 @@ static void *cfq_init_queue(request_queu
 
 	memset(cfqd, 0, sizeof(*cfqd));
 
-	for (i = 0; i < CFQ_PRIO_LISTS; i++)
-		INIT_LIST_HEAD(&cfqd->rr_list[i]);
-
-	INIT_LIST_HEAD(&cfqd->busy_rr);
-	INIT_LIST_HEAD(&cfqd->cur_rr);
-	INIT_LIST_HEAD(&cfqd->idle_rr);
-	INIT_LIST_HEAD(&cfqd->empty_list);
+#ifdef CONFIG_UBC_IO_PRIO
+	cfqd->cfq_bc_queue = RB_ROOT;
+#else
+	cfq_init_cfq_bc(&cfqd->cfq_bc);
+	cfqd->cfq_bc.cfqd = cfqd;
+#endif
 	INIT_LIST_HEAD(&cfqd->cic_list);
 
 	cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
@@ -2688,6 +2758,10 @@ static void *cfq_init_queue(request_queu
 	cfqd->cfq_slice[1] = cfq_slice_sync;
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
+	cfqd->cfq_ub_slice = cfq_ub_slice;
+	cfqd->virt_mode = 1;
+	cfqd->write_virt_mode = 1;
+	cfqd->cfq_ub_isolate = 0;
 
 	return cfqd;
 out_crqpool:
@@ -2768,6 +2842,10 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd-
 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
+SHOW_FUNCTION(cfq_ub_slice_show, cfqd->cfq_ub_slice, 1);
+SHOW_FUNCTION(cfq_virt_mode_show, cfqd->virt_mode, 0);
+SHOW_FUNCTION(cfq_write_virt_mode_show, cfqd->write_virt_mode, 0);
+SHOW_FUNCTION(cfq_ub_isolate_show, cfqd->cfq_ub_isolate, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -2796,6 +2874,10 @@ STORE_FUNCTION(cfq_slice_idle_store, &cf
 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
+STORE_FUNCTION(cfq_ub_slice_store, &cfqd->cfq_ub_slice, 1, UINT_MAX, 1);
+STORE_FUNCTION(cfq_virt_mode_store, &cfqd->virt_mode, 0, 1, 0);
+STORE_FUNCTION(cfq_write_virt_mode_store, &cfqd->write_virt_mode, 0, 1, 0);
+STORE_FUNCTION(cfq_ub_isolate_store, &cfqd->cfq_ub_isolate, 0, 1, 0);
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -2812,6 +2894,10 @@ static struct elv_fs_entry cfq_attrs[] =
 	CFQ_ATTR(slice_async),
 	CFQ_ATTR(slice_async_rq),
 	CFQ_ATTR(slice_idle),
+	CFQ_ATTR(ub_slice),
+	CFQ_ATTR(virt_mode),
+	CFQ_ATTR(write_virt_mode),
+	CFQ_ATTR(ub_isolate),
 	__ATTR_NULL
 };
 
@@ -2820,6 +2906,7 @@ static struct elevator_type iosched_cfq 
 		.elevator_merge_fn = 		cfq_merge,
 		.elevator_merged_fn =		cfq_merged_request,
 		.elevator_merge_req_fn =	cfq_merged_requests,
+		.elevator_allow_merge_fn =	cfq_allow_merge,
 		.elevator_dispatch_fn =		cfq_dispatch_requests,
 		.elevator_add_req_fn =		cfq_insert_request,
 		.elevator_activate_req_fn =	cfq_activate_request,
diff -upr kernel-2.6.18-417.el5.orig/block/elevator.c kernel-2.6.18-417.el5-028stab121/block/elevator.c
--- kernel-2.6.18-417.el5.orig/block/elevator.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/block/elevator.c	2017-01-13 08:40:19.000000000 -0500
@@ -40,6 +40,24 @@
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
+kmem_cache_t *cfq_pool;
+EXPORT_SYMBOL_GPL(cfq_pool);
+
+/*
+ * Query io scheduler to see if the current process issuing bio may be
+ * merged with rq.
+ */
+static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
+{
+	request_queue_t *q = rq->q;
+	elevator_t *e = q->elevator;
+
+	if (e->ops->elevator_allow_merge_fn)
+		return e->ops->elevator_allow_merge_fn(q, rq, bio);
+
+	return 1;
+}
+
 /*
  * can we safely merge with this request?
  */
@@ -55,13 +73,16 @@ inline int elv_rq_merge_ok(struct reques
 		return 0;
 
 	/*
-	 * same device and no special stuff set, merge is ok
+	 *  must be same device and not a special request
 	 */
-	if (rq->rq_disk == bio->bi_bdev->bd_disk &&
-	    !rq->waiting && !rq->special)
-		return 1;
+	if (rq->rq_disk != bio->bi_bdev->bd_disk || 
+	    rq->waiting || rq->special)
+		return 0;
 
-	return 0;
+	if (!elv_iosched_allow_merge(rq, bio))
+		return 0;
+
+	return 1;
 }
 EXPORT_SYMBOL(elv_rq_merge_ok);
 
@@ -766,12 +787,12 @@ void elv_unregister(struct elevator_type
 	 */
 	if (e->ops.trim) {
 		read_lock(&tasklist_lock);
-		do_each_thread(g, p) {
+		do_each_thread_all(g, p) {
 			task_lock(p);
 			if (p->io_context)
 				e->ops.trim(p->io_context);
 			task_unlock(p);
-		} while_each_thread(g, p);
+		} while_each_thread_all(g, p);
 		read_unlock(&tasklist_lock);
 	}
 
diff -upr kernel-2.6.18-417.el5.orig/block/genhd.c kernel-2.6.18-417.el5-028stab121/block/genhd.c
--- kernel-2.6.18-417.el5.orig/block/genhd.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/block/genhd.c	2017-01-13 08:40:40.000000000 -0500
@@ -18,6 +18,7 @@
 #include <linux/hash.h>
 
 struct subsystem block_subsys;
+EXPORT_SYMBOL(block_subsys);
 static DEFINE_MUTEX(block_subsys_lock);
 
 /*
@@ -346,6 +347,11 @@ static struct sysfs_ops disk_sysfs_ops =
 	.store	= &disk_attr_store,
 };
 
+static ssize_t disk_uevent_show(struct gendisk *disk, char *buf)
+{
+	return kobject_uevent_show(&disk->kobj, buf);
+}
+
 static ssize_t disk_uevent_store(struct gendisk * disk,
 				 const char *buf, size_t count)
 {
@@ -395,7 +401,8 @@ static ssize_t disk_stats_read(struct ge
 		jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
 }
 static struct disk_attribute disk_attr_uevent = {
-	.attr = {.name = "uevent", .mode = S_IWUSR },
+	.attr = {.name = "uevent", .mode = S_IRUGO | S_IWUSR },
+	.show	= disk_uevent_show,
 	.store	= disk_uevent_store
 };
 static struct disk_attribute disk_attr_dev = {
diff -upr kernel-2.6.18-417.el5.orig/block/scsi_ioctl.c kernel-2.6.18-417.el5-028stab121/block/scsi_ioctl.c
--- kernel-2.6.18-417.el5.orig/block/scsi_ioctl.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/block/scsi_ioctl.c	2017-01-13 08:40:14.000000000 -0500
@@ -409,7 +409,7 @@ int sg_scsi_ioctl(struct file *file, str
 		return -EFAULT;
 	if (in_len > PAGE_SIZE || out_len > PAGE_SIZE)
 		return -EINVAL;
-	if (get_user(opcode, sic->data))
+	if (get_user(opcode, (int *)sic->data))
 		return -EFAULT;
 
 	bytes = max(in_len, out_len);
diff -upr kernel-2.6.18-417.el5.orig/COPYING.SWsoft kernel-2.6.18-417.el5-028stab121/COPYING.SWsoft
--- kernel-2.6.18-417.el5.orig/COPYING.SWsoft	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/COPYING.SWsoft	2017-01-13 08:40:16.000000000 -0500
@@ -0,0 +1,350 @@
+
+Nothing in this license should be construed as a grant by SWsoft of any rights
+beyond the rights specified in the GNU General Public License, and nothing in
+this license should be construed as a waiver by SWsoft of its patent, copyright
+and/or trademark rights, beyond the waiver required by the GNU General Public
+License. This license is expressly inapplicable to any product that is not
+within the scope of the GNU General Public License
+
+----------------------------------------
+
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff -upr kernel-2.6.18-417.el5.orig/Documentation/filesystems/Locking kernel-2.6.18-417.el5-028stab121/Documentation/filesystems/Locking
--- kernel-2.6.18-417.el5.orig/Documentation/filesystems/Locking	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/Documentation/filesystems/Locking	2017-01-13 08:40:40.000000000 -0500
@@ -373,10 +373,9 @@ The last two are called only from check_
 prototypes:
 	loff_t (*llseek) (struct file *, loff_t, int);
 	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
-	ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
-	ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t,
-			loff_t);
+	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	int (*readdir) (struct file *, void *, filldir_t);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	int (*ioctl) (struct inode *, struct file *, unsigned int,
diff -upr kernel-2.6.18-417.el5.orig/Documentation/filesystems/vfs.txt kernel-2.6.18-417.el5-028stab121/Documentation/filesystems/vfs.txt
--- kernel-2.6.18-417.el5.orig/Documentation/filesystems/vfs.txt	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/Documentation/filesystems/vfs.txt	2017-01-13 08:40:40.000000000 -0500
@@ -754,9 +754,9 @@ This describes how the VFS can manipulat
 struct file_operations {
 	loff_t (*llseek) (struct file *, loff_t, int);
 	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
-	ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
-	ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	int (*readdir) (struct file *, void *, filldir_t);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
diff -upr kernel-2.6.18-417.el5.orig/Documentation/kernel-parameters.txt kernel-2.6.18-417.el5-028stab121/Documentation/kernel-parameters.txt
--- kernel-2.6.18-417.el5.orig/Documentation/kernel-parameters.txt	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/Documentation/kernel-parameters.txt	2017-01-13 08:40:40.000000000 -0500
@@ -1896,6 +1896,9 @@ running once the system is up.
 	uart6850=	[HW,OSS]
 			Format: <io>,<irq>
 
+	uhash_entries=	[KNL,NET]
+			Set number of hash buckets for UDP
+
 	usbhid.mousepoll=
 			[USBHID] The interval which mice are to be polled at.
 
diff -upr kernel-2.6.18-417.el5.orig/Documentation/sysctl/kernel.txt kernel-2.6.18-417.el5-028stab121/Documentation/sysctl/kernel.txt
--- kernel-2.6.18-417.el5.orig/Documentation/sysctl/kernel.txt	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/Documentation/sysctl/kernel.txt	2017-01-13 08:40:41.000000000 -0500
@@ -139,9 +139,11 @@ dmesg_restrict:
 
 This toggle indicates whether unprivileged users are prevented from using
 dmesg(8) to view messages from the kernel's log buffer.  When
-dmesg_restrict is set to (0) there are no restrictions.  When
-dmesg_restrict is set set to (1), users must have CAP_SYS_ADMIN to use
-dmesg(8).
+dmesg_restrict is set to 0 there are no restrictions.  When
+dmesg_restrict is set to 1, users must have CAP_SYS_ADMIN to use
+dmesg(8) on the hardware node.
+Inside containers dmesg_restrict is ignored becasue virtualized dmesg buffer
+contains safe kernel messages only.
 
 The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the default
 value of dmesg_restrict.
diff -upr kernel-2.6.18-417.el5.orig/Documentation/vsched.txt kernel-2.6.18-417.el5-028stab121/Documentation/vsched.txt
--- kernel-2.6.18-417.el5.orig/Documentation/vsched.txt	2017-01-13 08:40:28.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/Documentation/vsched.txt	2017-01-13 08:40:28.000000000 -0500
@@ -0,0 +1,83 @@
+Copyright (C) 2005 SWsoft. All rights reserved.
+Licensing governed by "linux/COPYING.SWsoft" file.
+
+Hierarchical CPU schedulers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Hierarchical CPU scheduler is a stack of CPU schedulers which allows
+to organize different policies of scheduling in the system and/or between
+groups of processes.
+
+Virtuozzo uses a hierarchical Fair CPU scheduler organized as a 2-stage
+CPU scheduler, where the scheduling decisions are made in 2 steps:
+1. On the first step Fair CPU scheduler selects a group of processes
+  which should get some CPU time.
+2. Then standard Linux scheduler chooses a process inside the group.
+Such scheduler efficiently allows to isolate one group of processes
+from another and still allows a group to use more than 1 CPU on SMP systems.
+
+This document describes a new middle layer of Virtuozzo hierarchical CPU
+scheduler which makes decisions after Fair scheduler, but before Linux
+scheduler and which is called VCPU scheduler.
+
+
+Where VCPU scheduler comes from?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Existing hierarchical CPU scheduler uses isolated algorithms on each stage
+of decision making, i.e. every scheduler makes its decisions without
+taking into account the details of other schedulers. This can lead to a number
+of problems described below.
+
+On SMP systems there are possible situations when the first CPU scheduler
+in the hierarchy (e.g. Fair scheduler) wants to schedule some group of
+processes on the physical CPU, but the underlying process scheduler
+(e.g. Linux O(1) CPU scheduler) is unable to schedule any processes
+on this physical CPU. Usually this happens due to the fact that Linux
+kernel scheduler uses per-physical CPU runqueues.
+
+Another problem is that Linux scheduler also knows nothing about
+Fair scheduler and can't balance efficiently without taking into account
+statistics about process groups from Fair scheduler. Without such
+statistics Linux scheduler can concentrate all processes on one physical
+CPU, thus making CPU consuming highly inefficient.
+
+VCPU scheduler solves these problems by adding a new layer between
+Fair schedule and Linux scheduler.
+
+VCPU scheduler
+~~~~~~~~~~~~~~
+
+VCPU scheduler is a CPU scheduler which splits notion of
+physical and virtual CPUs (VCPU and PCPU). This means that tasks are
+running on virtual CPU runqueues, while VCPUs are running on PCPUs.
+
+The Virtuozzo hierarchical fair scheduler becomes 3 stage CPU scheduler:
+1. First, Fair CPU scheduler select a group of processes.
+2. Then VCPU scheduler select a virtual CPU to run (this is actually
+  a runqueue).
+3. Standard Linux scheduler chooses a process from the runqueue.
+
+For example on the picture below PCPU0 executes tasks from
+VCPU1 runqueue and PCPU1 is idle:
+
+   virtual          |         physical       |          virtual
+  idle CPUs         |           CPUs         |           CPUS
+--------------------|------------------------|--------------------------
+                    |                        |     -----------------
+                    |                        |    | virtual sched X |
+                    |                        |    |   -----------   |
+                    |                        |    |  |   VCPU0   |  |
+                    |                        |    |   -----------   |
+ ------------       |        -----------          |   -----------   |
+| idle VCPU0 |      |       |   PCPU0   |  <--->  |  |   VCPU1   |  |
+ ------------       |        -----------          |   -----------   |
+                    |                        |     -----------------
+                    |                        |
+                    |                        |     -----------------
+                    |                        |    | virtual sched Y |
+ ------------                -----------     |    |   -----------   |
+| idle VCPU1 |    <--->     |   PCPU1   |    |    |  |   VCPU0   |  |
+ ------------                -----------     |    |   -----------   |
+                    |                        |     -----------------
+                    |                        |
diff -upr kernel-2.6.18-417.el5.orig/drivers/acpi/numa.c kernel-2.6.18-417.el5-028stab121/drivers/acpi/numa.c
--- kernel-2.6.18-417.el5.orig/drivers/acpi/numa.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/acpi/numa.c	2017-01-13 08:40:15.000000000 -0500
@@ -40,9 +40,9 @@ static nodemask_t nodes_found_map = NODE
 #define NID_INVAL	-1
 
 /* maps to convert between proximity domain and logical node ID */
-int __cpuinitdata pxm_to_node_map[MAX_PXM_DOMAINS]
+int pxm_to_node_map[MAX_PXM_DOMAINS]
 				= { [0 ... MAX_PXM_DOMAINS - 1] = NID_INVAL };
-int __cpuinitdata node_to_pxm_map[MAX_NUMNODES]
+int node_to_pxm_map[MAX_NUMNODES]
 				= { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
 
 extern int __init acpi_table_parse_madt_family(enum acpi_table_id id,
@@ -51,14 +51,14 @@ extern int __init acpi_table_parse_madt_
 					       acpi_madt_entry_handler handler,
 					       unsigned int max_entries);
 
-int __cpuinit pxm_to_node(int pxm)
+int pxm_to_node(int pxm)
 {
 	if (pxm < 0)
 		return NID_INVAL;
 	return pxm_to_node_map[pxm];
 }
 
-int __cpuinit node_to_pxm(int node)
+int node_to_pxm(int node)
 {
 	if (node < 0)
 		return PXM_INVAL;
diff -upr kernel-2.6.18-417.el5.orig/drivers/base/class.c kernel-2.6.18-417.el5-028stab121/drivers/base/class.c
--- kernel-2.6.18-417.el5.orig/drivers/base/class.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/base/class.c	2017-01-13 08:40:22.000000000 -0500
@@ -71,8 +71,13 @@ static struct kobj_type ktype_class = {
 };
 
 /* Hotplug events for classes go to the class_obj subsys */
-static decl_subsys(class, &ktype_class, NULL);
+decl_subsys(class, &ktype_class, NULL);
 
+#ifndef CONFIG_VE
+#define visible_class_subsys class_subsys
+#else
+#define visible_class_subsys (*get_exec_env()->class_subsys)
+#endif
 
 int class_create_file(struct class * cls, const struct class_attribute * attr)
 {
@@ -148,7 +153,7 @@ int class_register(struct class * cls)
 	if (error)
 		return error;
 
-	subsys_set_kset(cls, class_subsys);
+	subsys_set_kset(cls, visible_class_subsys);
 
 	error = subsystem_register(&cls->subsys);
 	if (!error) {
@@ -420,8 +425,13 @@ static struct kset_uevent_ops class_ueve
 	.uevent =	class_uevent,
 };
 
-static decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops);
+decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops);
 
+#ifndef CONFIG_VE
+#define visible_class_obj_subsys class_obj_subsys
+#else
+#define visible_class_obj_subsys (*get_exec_env()->class_obj_subsys)
+#endif
 
 static int class_device_add_attrs(struct class_device * cd)
 {
@@ -497,9 +507,14 @@ static ssize_t store_uevent(struct class
 	return count;
 }
 
+void class_device_virtualize(struct class_device *class_dev)
+{
+	kobj_set_kset_s(class_dev, visible_class_obj_subsys);
+}
+
 void class_device_initialize(struct class_device *class_dev)
 {
-	kobj_set_kset_s(class_dev, class_obj_subsys);
+	class_device_virtualize(class_dev);
 	kobject_init(&class_dev->kobj);
 	INIT_LIST_HEAD(&class_dev->node);
 }
@@ -877,12 +892,19 @@ void class_interface_unregister(struct c
 	class_put(parent);
 }
 
-
+void prepare_sysfs_classes(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->class_subsys = &class_subsys;
+	get_ve0()->class_obj_subsys = &class_obj_subsys;
+#endif
+}
 
 int __init classes_init(void)
 {
 	int retval;
 
+	prepare_sysfs_classes();
 	retval = subsystem_register(&class_subsys);
 	if (retval)
 		return retval;
@@ -918,3 +940,6 @@ EXPORT_SYMBOL_GPL(class_device_remove_bi
 
 EXPORT_SYMBOL_GPL(class_interface_register);
 EXPORT_SYMBOL_GPL(class_interface_unregister);
+
+EXPORT_SYMBOL(class_subsys);
+EXPORT_SYMBOL(class_obj_subsys);
diff -upr kernel-2.6.18-417.el5.orig/drivers/base/core.c kernel-2.6.18-417.el5-028stab121/drivers/base/core.c
--- kernel-2.6.18-417.el5.orig/drivers/base/core.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/base/core.c	2017-01-13 08:40:40.000000000 -0500
@@ -188,6 +188,64 @@ static struct kset_uevent_ops device_uev
 	.uevent =	dev_uevent,
 };
 
+ssize_t kobject_uevent_show(struct kobject *kobj, char *buf)
+{
+	struct kobject *top_kobj;
+	struct kset *kset;
+	char *envp[32];
+	char *data = NULL;
+	char *pos;
+	int i;
+	size_t count = 0;
+	int retval;
+
+	/* search the kset, the device belongs to */
+	top_kobj = kobj;
+	if (!top_kobj->kset && top_kobj->parent) {
+		do {
+			top_kobj = top_kobj->parent;
+		} while (!top_kobj->kset && top_kobj->parent);
+	}
+	if (!top_kobj->kset)
+		goto out;
+	kset = top_kobj->kset;
+	if (!kset->uevent_ops || !kset->uevent_ops->uevent)
+		goto out;
+
+	/* respect filter */
+	if (kset->uevent_ops && kset->uevent_ops->filter)
+		if (!kset->uevent_ops->filter(kset, kobj))
+			goto out;
+
+	data = (char *)get_zeroed_page(GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* let the kset specific function add its keys */
+	pos = data;
+	memset(envp, 0, sizeof(envp));
+	retval = kset->uevent_ops->uevent(kset, kobj,
+					  envp, ARRAY_SIZE(envp),
+					  pos, PAGE_SIZE);
+	if (retval)
+		goto out;
+
+	/* copy keys to file */
+	for (i = 0; envp[i]; i++) {
+		pos = &buf[count];
+		count += sprintf(pos, "%s\n", envp[i]);
+	}
+out:
+	free_page((unsigned long)data);
+	return count;
+}
+
+static ssize_t show_uevent(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	return kobject_uevent_show(&dev->kobj, buf);
+}
+
 static ssize_t store_uevent(struct device *dev, struct device_attribute *attr,
 			    const char *buf, size_t count)
 {
@@ -349,10 +407,11 @@ int device_add(struct device *dev)
 	}
 
 	dev->uevent_attr.attr.name = "uevent";
-	dev->uevent_attr.attr.mode = S_IWUSR;
+	dev->uevent_attr.attr.mode = S_IRUGO | S_IWUSR;
 	if (dev->driver)
 		dev->uevent_attr.attr.owner = dev->driver->owner;
 	dev->uevent_attr.store = store_uevent;
+	dev->uevent_attr.show = show_uevent;
 	device_create_file(dev, &dev->uevent_attr);
 
 	if (MAJOR(dev->devt)) {
diff -upr kernel-2.6.18-417.el5.orig/drivers/char/ipmi/ipmi_msghandler.c kernel-2.6.18-417.el5-028stab121/drivers/char/ipmi/ipmi_msghandler.c
--- kernel-2.6.18-417.el5.orig/drivers/char/ipmi/ipmi_msghandler.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/char/ipmi/ipmi_msghandler.c	2017-01-13 08:40:15.000000000 -0500
@@ -4342,7 +4342,7 @@ static __exit void cleanup_ipmi(void)
 	del_timer_sync(&ipmi_timer);
 
 #ifdef CONFIG_PROC_FS
-	remove_proc_entry(proc_ipmi_root->name, &proc_root);
+	remove_proc_entry(proc_ipmi_root->name, NULL);
 #endif /* CONFIG_PROC_FS */
 
 	driver_unregister(&ipmidriver.driver);
diff -upr kernel-2.6.18-417.el5.orig/drivers/char/keyboard.c kernel-2.6.18-417.el5-028stab121/drivers/char/keyboard.c
--- kernel-2.6.18-417.el5.orig/drivers/char/keyboard.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/char/keyboard.c	2017-01-13 08:40:32.000000000 -0500
@@ -151,6 +151,7 @@ unsigned char kbd_sysrq_xlate[KEY_MAX + 
 static int sysrq_down;
 static int sysrq_alt_use;
 #endif
+int sysrq_key_scancode = KEY_SYSRQ;
 static int sysrq_alt;
 
 /*
@@ -1049,6 +1050,9 @@ static int emulate_raw(struct vc_data *v
 {
 	int code;
 
+	if (keycode == sysrq_key_scancode && sysrq_alt)
+		goto sysrq;
+
 	switch (keycode) {
 		case KEY_PAUSE:
 			put_queue(vc, 0xe1);
@@ -1067,6 +1071,7 @@ static int emulate_raw(struct vc_data *v
 			break;
 
 		case KEY_SYSRQ:
+sysrq:
 			/*
 			 * Real AT keyboards (that's what we're trying
 			 * to emulate here emit 0xe0 0x2a 0xe0 0x37 when
@@ -1163,7 +1168,8 @@ static void kbd_keycode(unsigned int key
 				printk(KERN_WARNING "keyboard.c: can't emulate rawmode for keycode %d\n", keycode);
 
 #ifdef CONFIG_MAGIC_SYSRQ	       /* Handle the SysRq Hack */
-	if (keycode == KEY_SYSRQ && (sysrq_down || (down == 1 && sysrq_alt))) {
+	if ((keycode == sysrq_key_scancode || keycode == KEY_SYSRQ) &&
+				(sysrq_down || (down == 1 && sysrq_alt))) {
 		if (!sysrq_down) {
 			sysrq_down = down;
 			sysrq_alt_use = sysrq_alt;
diff -upr kernel-2.6.18-417.el5.orig/drivers/char/pty.c kernel-2.6.18-417.el5-028stab121/drivers/char/pty.c
--- kernel-2.6.18-417.el5.orig/drivers/char/pty.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/char/pty.c	2017-01-13 08:40:20.000000000 -0500
@@ -30,16 +30,30 @@
 #include <linux/bitops.h>
 #include <linux/devpts_fs.h>
 
+#include <ub/ub_misc.h>
+
 /* These are global because they are accessed in tty_io.c */
 #ifdef CONFIG_UNIX98_PTYS
 struct tty_driver *ptm_driver;
-static struct tty_driver *pts_driver;
+struct tty_driver *pts_driver;
+EXPORT_SYMBOL(ptm_driver);
+EXPORT_SYMBOL(pts_driver);
+
+void prepare_pty(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->ptm_driver = ptm_driver;
+	/* don't clean ptm_driver and co. here, they are used in vecalls.c */
+#endif
+}
 #endif
 
 static void pty_close(struct tty_struct * tty, struct file * filp)
 {
 	if (!tty)
 		return;
+
+	ub_pty_uncharge(tty);
 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
 		if (tty->count > 1)
 			printk("master pty_close: count = %d!!\n", tty->count);
@@ -59,8 +73,12 @@ static void pty_close(struct tty_struct 
 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
 		set_bit(TTY_OTHER_CLOSED, &tty->flags);
 #ifdef CONFIG_UNIX98_PTYS
-		if (tty->driver == ptm_driver)
+		if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+			struct ve_struct *old_env;
+			old_env = set_exec_env(tty->owner_env);
 			devpts_pty_kill(tty->index);
+			(void)set_exec_env(old_env);
+		}
 #endif
 		tty_vhangup(tty->link);
 	}
@@ -210,6 +228,10 @@ static int pty_open(struct tty_struct *t
 	if (tty->link->count != 1)
 		goto out;
 
+	retval = -ENOMEM;
+	if (ub_pty_charge(tty))
+		goto out;
+
 	clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
 	set_bit(TTY_THROTTLED, &tty->flags);
 	set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
@@ -237,7 +259,9 @@ static struct tty_operations pty_ops = {
 
 /* Traditional BSD devices */
 #ifdef CONFIG_LEGACY_PTYS
-static struct tty_driver *pty_driver, *pty_slave_driver;
+struct tty_driver *pty_driver, *pty_slave_driver;
+EXPORT_SYMBOL(pty_driver);
+EXPORT_SYMBOL(pty_slave_driver);
 
 static int pty_bsd_ioctl(struct tty_struct *tty, struct file *file,
 			 unsigned int cmd, unsigned long arg)
@@ -397,6 +421,7 @@ static void __init unix98_pty_init(void)
 		panic("Couldn't register Unix98 pts driver");
 
 	pty_table[1].data = &ptm_driver->refcount;
+	prepare_pty();
 }
 #else
 static inline void unix98_pty_init(void) { }
diff -upr kernel-2.6.18-417.el5.orig/drivers/char/random.c kernel-2.6.18-417.el5-028stab121/drivers/char/random.c
--- kernel-2.6.18-417.el5.orig/drivers/char/random.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/char/random.c	2017-01-13 08:40:15.000000000 -0500
@@ -951,7 +951,7 @@ static void init_std_data(struct entropy
 			break;
 		mix_pool_bytes(r, &flags, sizeof(flags));
 	}
-	mix_pool_bytes(r, &system_utsname, sizeof(system_utsname));
+	mix_pool_bytes(r, utsname(), sizeof(*utsname()));
 
 	/* Enable continuous test in fips mode */
 	if (fips_enabled) {
diff -upr kernel-2.6.18-417.el5.orig/drivers/char/raw.c kernel-2.6.18-417.el5-028stab121/drivers/char/raw.c
--- kernel-2.6.18-417.el5.orig/drivers/char/raw.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/char/raw.c	2017-01-13 08:40:40.000000000 -0500
@@ -249,23 +249,11 @@ static ssize_t raw_file_write(struct fil
 	return generic_file_write_nolock(file, &local_iov, 1, ppos);
 }
 
-static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf,
-					size_t count, loff_t pos)
-{
-	struct iovec local_iov = {
-		.iov_base = (char __user *)buf,
-		.iov_len = count
-	};
-
-	return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-}
-
-
 static const struct file_operations raw_fops = {
 	.read	=	generic_file_read,
 	.aio_read = 	generic_file_aio_read,
 	.write	=	raw_file_write,
-	.aio_write = 	raw_file_aio_write,
+	.aio_write = 	generic_file_aio_write_nolock,
 	.open	=	raw_open,
 	.release=	raw_release,
 	.ioctl	=	raw_ioctl,
diff -upr kernel-2.6.18-417.el5.orig/drivers/char/sysrq.c kernel-2.6.18-417.el5-028stab121/drivers/char/sysrq.c
--- kernel-2.6.18-417.el5.orig/drivers/char/sysrq.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/char/sysrq.c	2017-01-13 08:40:40.000000000 -0500
@@ -35,6 +35,9 @@
 #include <linux/vt_kern.h>
 #include <linux/workqueue.h>
 #include <linux/kexec.h>
+#include <linux/kallsyms.h>
+#include <linux/slab.h>
+#include <linux/nmi.h>
 
 #include <asm/ptrace.h>
 
@@ -169,8 +172,7 @@ static struct sysrq_key_op sysrq_showloc
 static void sysrq_handle_showregs(int key, struct pt_regs *pt_regs,
 				  struct tty_struct *tty)
 {
-	if (pt_regs)
-		show_regs(pt_regs);
+	nmi_show_regs(pt_regs, 0);
 }
 static struct sysrq_key_op sysrq_showregs_op = {
 	.handler	= sysrq_handle_showregs,
@@ -195,6 +197,7 @@ static void sysrq_handle_showmem(int key
 				 struct tty_struct *tty)
 {
 	show_mem();
+	show_slab_info();
 }
 static struct sysrq_key_op sysrq_showmem_op = {
 	.handler	= sysrq_handle_showmem,
@@ -207,14 +210,29 @@ static spinlock_t show_lock = SPIN_LOCK_
 static void showacpu(void *info)
 {
 	spin_lock(&show_lock);
-	printk("CPU%d:\n", smp_processor_id());
-	show_stack(NULL, NULL);
+	/* Idle CPUs have no interesting backtrace. */
+	if (idle_cpu(smp_processor_id()))
+		printk("CPU%d: idle\n", smp_processor_id());
+	else {		
+		printk("CPU%d:\n", smp_processor_id());
+		show_stack(NULL, NULL);
+	}
 	spin_unlock(&show_lock);
 }
+
+static void sysrq_showregs_othercpus(void *dummy)
+{
+	on_each_cpu(showacpu, NULL, 0, 0);
+}
+
+static DECLARE_WORK(sysrq_showallcpus, sysrq_showregs_othercpus, NULL);
+
 static void sysrq_handle_showcpus(int key, struct pt_regs *pt_regs,
 				  struct tty_struct *tty) {
-	on_each_cpu(showacpu, NULL, 0, 0);
+	printk("requested on CPU%d:\n", smp_processor_id());
+	schedule_work(&sysrq_showallcpus);
 }
+
 static struct sysrq_key_op sysrq_showcpus_op = {
 	.handler	= sysrq_handle_showcpus,
 	.help_msg	= "shoWcpus",
@@ -229,7 +247,7 @@ static void send_sig_all(int sig)
 {
 	struct task_struct *p;
 
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (p->mm && p->pid != 1)
 			/* Not swapper, init nor kernel thread */
 			force_sig(sig, p);
@@ -292,6 +310,19 @@ static struct sysrq_key_op sysrq_kill_op
 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
 };
 
+#ifdef CONFIG_SCHED_VCPU
+static void sysrq_handle_vschedstate(int key, struct pt_regs *pt_regs,
+				   struct tty_struct *tty) 
+{
+	show_vsched();
+}
+static struct sysrq_key_op sysrq_vschedstate_op = {
+	.handler	= sysrq_handle_vschedstate,
+	.help_msg	= "vsced_stAte",
+	.action_msg	= "Show Vsched",
+};
+#endif
+
 static void sysrq_handle_unrt(int key, struct pt_regs *pt_regs,
 				struct tty_struct *tty)
 {
@@ -307,7 +338,274 @@ static struct sysrq_key_op sysrq_unrt_op
 /* Key Operations table and lock */
 static DEFINE_SPINLOCK(sysrq_key_table_lock);
 
-static struct sysrq_key_op *sysrq_key_table[36] = {
+#define SYSRQ_KEY_TABLE_LENGTH 37
+static struct sysrq_key_op **sysrq_key_table;
+static struct sysrq_key_op *sysrq_default_key_table[];
+
+#ifdef CONFIG_SYSRQ_DEBUG
+#define SYSRQ_NAMELEN_MAX	64
+#define SYSRQ_DUMP_LINES	32
+
+static struct sysrq_key_op *sysrq_debug_key_table[];
+static struct sysrq_key_op *sysrq_input_key_table[];
+static unsigned long *dump_address;
+static int orig_console_loglevel;
+static void (*sysrq_input_return)(char *) = NULL;
+
+static void dump_mem(void)
+{
+	unsigned long value[4];
+	mm_segment_t old_fs;
+	int line, err;
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	err = 0;
+
+	for (line = 0; line < SYSRQ_DUMP_LINES; line++) {
+		err |= __get_user(value[0], dump_address++);
+		err |= __get_user(value[1], dump_address++);
+		err |= __get_user(value[2], dump_address++);
+		err |= __get_user(value[3], dump_address++);
+		if (err) {
+			printk("Invalid address %p\n", dump_address - 4);
+			break;
+		}
+#if BITS_PER_LONG == 32
+		printk("0x%p: %08lx %08lx %08lx %08lx\n",
+				dump_address - 4,
+				value[0], value[1], value[2], value[3]);
+#else
+		printk("0x%p: %016lx %016lx %016lx %016lx\n",
+				dump_address - 4,
+				value[0], value[1], value[2], value[3]);
+#endif
+	}
+	set_fs(old_fs);
+}
+
+static void write_mem(unsigned long val)
+{
+	mm_segment_t old_fs;
+	unsigned long old_val;
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	if (__get_user(old_val, dump_address)) {
+		printk("Invalid address %p\n", dump_address);
+		goto out;
+	}
+
+#if BITS_PER_LONG == 32
+	printk("Changing [%p] from %08lx to %08lx\n",
+			dump_address, old_val, val);
+#else
+	printk("Changing [%p] from %016lx to %016lx\n",
+			dump_address, old_val, val);
+#endif
+	__put_user(val, dump_address);
+out:
+	set_fs(old_fs);
+}
+
+static void handle_read(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	static int pos;
+	static int upper_case;
+	static char str[SYSRQ_NAMELEN_MAX];
+
+	if (key == 0) {
+		/* actually 0 is not shift only... */
+		upper_case = 1;
+		return;
+	}
+
+	if (key == 0x0d || pos == SYSRQ_NAMELEN_MAX - 1) {
+		/* enter */
+		sysrq_key_table = sysrq_debug_key_table;
+		str[pos] = '\0';
+		pos = upper_case = 0;
+		printk("\n");
+		if (sysrq_input_return == NULL)
+			printk("No return handler!!!\n");
+		else
+			sysrq_input_return(str);
+		return;
+	};
+
+	/* check for alowed symbols */
+	if (key == '-') {
+		if (upper_case)
+			key = '_';
+		goto correct;
+	};
+	if (key >= 'a' && key <= 'z') {
+		if (upper_case)
+			key = key - 'a' + 'A';
+		goto correct;
+	};
+	if (key >= '0' && key <= '9')
+		goto correct;
+
+	upper_case = 0;
+	return;
+
+correct:
+	str[pos] = key;
+	printk("%c", (char)key);
+	pos++;
+	upper_case = 0;
+}
+
+static struct sysrq_key_op input_read = {
+	.handler	= handle_read,
+	.help_msg	= "",
+	.action_msg	= NULL,
+};
+
+static struct sysrq_key_op *sysrq_input_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
+	[0 ... SYSRQ_KEY_TABLE_LENGTH - 1] = &input_read,
+};
+
+static void return_dump_mem(char *str)
+{
+	unsigned long address;
+	char *end;
+
+	address = simple_strtoul(str, &end, 0);
+	if (*end != '\0') {
+		printk("Bad address [%s]\n", str);
+		return;
+	}
+
+	dump_address = (unsigned long *)address;
+	dump_mem();
+}
+
+static void handle_dump_mem(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	sysrq_input_return = return_dump_mem;
+	sysrq_key_table = sysrq_input_key_table;
+}
+
+static struct sysrq_key_op debug_dump_mem = {
+	.handler	= handle_dump_mem,
+	.help_msg	= "Dump",
+	.action_msg	= "Enter address:",
+};
+
+static void return_resolve(char *str)
+{
+	unsigned long address;
+
+	address = kallsyms_lookup_name(str);
+	printk("%s : %lx\n", str, address);
+	if (address) {
+		dump_address = (unsigned long *)address;
+		printk("Now you can dump it via X\n");
+	}
+}
+
+static void handle_resolve(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	sysrq_input_return = return_resolve;
+	sysrq_key_table = sysrq_input_key_table;
+}
+
+static struct sysrq_key_op debug_resolve = {
+	.handler	= handle_resolve,
+	.help_msg	= "Resolve",
+	.action_msg	= "Enter symbol name:",
+};
+
+static void return_write_mem(char *str)
+{
+	unsigned long address;
+	unsigned long value;
+	char *end;
+
+	address = simple_strtoul(str, &end, 0);
+	if (*end != '-') {
+		printk("Bad address in %s\n", str);
+		return;
+	}
+	value = simple_strtoul(end + 1, &end, 0);
+	if (*end != '\0') {
+		printk("Bad value in %s\n", str);
+		return;
+	}
+
+	dump_address = (unsigned long *)address;
+	write_mem(value);
+}
+
+static void handle_write_mem(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	sysrq_input_return = return_write_mem;
+	sysrq_key_table = sysrq_input_key_table;
+}
+
+static struct sysrq_key_op debug_write_mem = {
+	.handler	= handle_write_mem,
+	.help_msg	= "Writemem",
+	.action_msg	= "Enter address-value:",
+};
+
+static void handle_next(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	dump_mem();
+}
+
+static struct sysrq_key_op debug_next = {
+	.handler	= handle_next,
+	.help_msg	= "neXt",
+	.action_msg	= "continuing",
+};
+
+static void handle_quit(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	sysrq_key_table = sysrq_default_key_table;
+	console_loglevel = orig_console_loglevel;
+}
+
+static struct sysrq_key_op debug_quit = {
+	.handler	= handle_quit,
+	.help_msg	= "Quit",
+	.action_msg	= "Tnahk you for using debugger",
+};
+
+static struct sysrq_key_op *sysrq_debug_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
+	[13] = &debug_dump_mem,		/* d */
+	[26] = &debug_quit,		/* q */
+	[27] = &debug_resolve,		/* r */
+	[32] = &debug_write_mem,	/* w */
+	[33] = &debug_next,		/* x */
+};
+
+static void sysrq_handle_debug(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	orig_console_loglevel = console_loglevel;
+	console_loglevel = 8;
+	sysrq_key_table = sysrq_debug_key_table;
+	printk("Welcome sysrq debugging mode\n"
+			"Press H for help\n");
+}
+
+static struct sysrq_key_op sysrq_debug_op = {
+	.handler        = sysrq_handle_debug,
+	.help_msg       = "debuG",
+	.action_msg     = "Select desired action",
+};
+#endif
+
+static struct sysrq_key_op *sysrq_default_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
 	&sysrq_loglevel_op,		/* 0 */
 	&sysrq_loglevel_op,		/* 1 */
 	&sysrq_loglevel_op,		/* 2 */
@@ -323,13 +621,21 @@ static struct sysrq_key_op *sysrq_key_ta
 	 * Don't use for system provided sysrqs, it is handled specially on
 	 * sparc and will never arrive
 	 */
+#ifdef CONFIG_SCHED_VCPU
+	&sysrq_vschedstate_op,		/* a */
+#else
 	NULL,				/* a */
+#endif
 	&sysrq_reboot_op,		/* b */
 	&sysrq_crashdump_op,		/* c */
 	&sysrq_showlocks_op,		/* d */
 	&sysrq_term_op,			/* e */
 	&sysrq_moom_op,			/* f */
+#ifdef CONFIG_SYSRQ_DEBUG
+	&sysrq_debug_op,		/* g */
+#else
 	NULL,				/* g */
+#endif
 	NULL,				/* h - reserved for help */
 	&sysrq_kill_op,			/* i */
 	&sysrq_thaw_op,			/* j */
@@ -350,9 +656,12 @@ static struct sysrq_key_op *sysrq_key_ta
 	&sysrq_showcpus_op,		/* w */
 	NULL,				/* x */
 	NULL,				/* y */
-	NULL				/* z */
+	NULL,				/* z */
+	NULL,				/* for debugger */
 };
 
+static struct sysrq_key_op **sysrq_key_table = sysrq_default_key_table;
+
 /* key2index calculation, -1 on invalid index */
 static int sysrq_key_table_key2index(int key)
 {
@@ -362,6 +671,10 @@ static int sysrq_key_table_key2index(int
 		retval = key - '0';
 	else if ((key >= 'a') && (key <= 'z'))
 		retval = key + 10 - 'a';
+#ifdef CONFIG_SYSRQ_DEBUG
+	else if (key == 0 || key == 0x0d || key == '-')
+		retval = SYSRQ_KEY_TABLE_LENGTH - 1;
+#endif
 	else
 		retval = -1;
 	return retval;
@@ -404,7 +717,6 @@ void __handle_sysrq(int key, struct pt_r
 	spin_lock_irqsave(&sysrq_key_table_lock, flags);
 	orig_log_level = console_loglevel;
 	console_loglevel = 7;
-	printk(KERN_INFO "SysRq : ");
 
         op_p = __sysrq_get_key_op(key);
         if (op_p) {
@@ -414,16 +726,17 @@ void __handle_sysrq(int key, struct pt_r
 		 */
 		if (!check_mask || sysrq_enabled == 1 ||
 		    (sysrq_enabled & op_p->enable_mask)) {
-			printk("%s\n", op_p->action_msg);
+		    	if (op_p->action_msg)
+				printk("SysRq: %s\n", op_p->action_msg);
 			console_loglevel = orig_log_level;
 			op_p->handler(key, pt_regs, tty);
 		} else {
 			printk("This sysrq operation is disabled.\n");
 		}
 	} else {
-		printk("HELP : ");
+		printk("SysRq HELP : ");
 		/* Only print the help msg once per handler */
-		for (i = 0; i < ARRAY_SIZE(sysrq_key_table); i++) {
+		for (i = 0; i < SYSRQ_KEY_TABLE_LENGTH; i++) {
 			if (sysrq_key_table[i]) {
 				int j;
 
diff -upr kernel-2.6.18-417.el5.orig/drivers/char/tpm/tpm.c kernel-2.6.18-417.el5-028stab121/drivers/char/tpm/tpm.c
--- kernel-2.6.18-417.el5.orig/drivers/char/tpm/tpm.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/char/tpm/tpm.c	2017-01-13 08:40:15.000000000 -0500
@@ -961,7 +961,7 @@ ssize_t tpm_write(struct file *file, con
 		  size_t size, loff_t *off)
 {
 	struct tpm_chip *chip = file->private_data;
-	int in_size = size, out_size;
+	size_t in_size = size, out_size;
 
 	/* cannot perform a write until the read has cleared
 	   either via tpm_read or a user_read_timer timeout */
@@ -996,7 +996,7 @@ ssize_t tpm_read(struct file *file, char
 		 size_t size, loff_t *off)
 {
 	struct tpm_chip *chip = file->private_data;
-	int ret_size;
+	ssize_t ret_size;
 	int rc;
 
 	del_singleshot_timer_sync(&chip->user_read_timer);
diff -upr kernel-2.6.18-417.el5.orig/drivers/char/tty_io.c kernel-2.6.18-417.el5-028stab121/drivers/char/tty_io.c
--- kernel-2.6.18-417.el5.orig/drivers/char/tty_io.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/char/tty_io.c	2017-01-13 08:40:40.000000000 -0500
@@ -103,6 +103,7 @@
 #include <linux/selection.h>
 
 #include <linux/kmod.h>
+#include <ub/ub_mem.h>
 
 #undef TTY_DEBUG_HANGUP
 
@@ -120,11 +121,16 @@ struct termios tty_std_termios = {	/* fo
 
 EXPORT_SYMBOL(tty_std_termios);
 
+/* this lock protects tty_drivers list, this pretty guys do no locking */
+rwlock_t tty_driver_guard = RW_LOCK_UNLOCKED;
+EXPORT_SYMBOL(tty_driver_guard);
+
 /* This list gets poked at by procfs and various bits of boot up code. This
    could do with some rationalisation such as pulling the tty proc function
    into this file */
    
 LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
+EXPORT_SYMBOL(tty_drivers);
 
 /* Semaphore to protect creating and releasing a tty. This is shared with
    vt.c for deeply disgusting hack reasons */
@@ -135,6 +141,15 @@ EXPORT_SYMBOL(tty_mutex);
 extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
 extern int pty_limit;		/* Config limit on Unix98 ptys */
 static DEFINE_IDR(allocated_ptys);
+#ifdef CONFIG_VE
+#define __ve_allocated_ptys(ve) (*((ve)->allocated_ptys))
+#define ve_allocated_ptys	__ve_allocated_ptys(get_exec_env())
+#define ve_ptm_driver		(get_exec_env()->ptm_driver)
+#else
+#define __ve_allocated_ptys(ve) allocated_ptys
+#define ve_allocated_ptys	allocated_ptys
+#define ve_ptm_driver		ptm_driver
+#endif
 static DECLARE_MUTEX(allocated_ptys_lock);
 static int ptmx_open(struct inode *, struct file *);
 #endif
@@ -163,9 +178,20 @@ static void release_tty(struct tty_struc
  *	Locking: none
  */
 
+void prepare_tty(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->allocated_ptys = &allocated_ptys;
+	/*
+	 * in this case, tty_register_driver() setups
+	 * owner_env correctly right from the bootup
+	 */
+#endif
+}
+
 static struct tty_struct *alloc_tty_struct(void)
 {
-	return kzalloc(sizeof(struct tty_struct), GFP_KERNEL);
+	return ub_kzalloc(sizeof(struct tty_struct), GFP_KERNEL);
 }
 
 static void tty_buffer_free_all(struct tty_struct *);
@@ -233,14 +259,13 @@ int tty_paranoia_check(struct tty_struct
 static int check_tty_count(struct tty_struct *tty, const char *routine)
 {
 #ifdef CHECK_TTY_COUNT
-	struct list_head *p;
+	struct file *file;
 	int count = 0;
-	
-	file_list_lock();
-	list_for_each(p, &tty->tty_files) {
+
+	file_list_lock(&tty->tty_files);
+	for_each_fl_file(file, &tty->tty_files)
 		count++;
-	}
-	file_list_unlock();
+	file_list_unlock(&tty->tty_files);
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_SLAVE &&
 	    tty->link && tty->link->count)
@@ -1094,14 +1119,37 @@ static struct tty_driver *get_tty_driver
 {
 	struct tty_driver *p;
 
+	read_lock(&tty_driver_guard);
 	list_for_each_entry(p, &tty_drivers, tty_drivers) {
 		dev_t base = MKDEV(p->major, p->minor_start);
 		if (device < base || device >= base + p->num)
 			continue;
 		*index = device - base;
-		return p;
+#ifdef CONFIG_VE
+		if (in_interrupt())
+			goto found;
+		if (p->major!=PTY_MASTER_MAJOR && p->major!=PTY_SLAVE_MAJOR
+#ifdef CONFIG_UNIX98_PTYS
+		    && (p->major<UNIX98_PTY_MASTER_MAJOR ||
+		    	p->major>UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT-1) &&
+		       (p->major<UNIX98_PTY_SLAVE_MAJOR ||
+		        p->major>UNIX98_PTY_SLAVE_MAJOR+UNIX98_PTY_MAJOR_COUNT-1)
+#endif
+		)
+			goto found;
+		if (ve_is_super(p->owner_env) && ve_is_super(get_exec_env()))
+			goto found;
+		if (!ve_accessible_strict(p->owner_env, get_exec_env()))
+			continue;
+#endif
+		goto found;
 	}
+	read_unlock(&tty_driver_guard);
 	return NULL;
+
+found:
+	read_unlock(&tty_driver_guard);
+	return p;
 }
 
 /**
@@ -1298,9 +1346,9 @@ static void do_tty_hangup(void *data)
 	spin_unlock(&redirect_lock);
 	
 	check_tty_count(tty, "do_tty_hangup");
-	file_list_lock();
+	file_list_lock(&tty->tty_files);
 	/* This breaks for file handles being sent over AF_UNIX sockets ? */
-	list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
+	for_each_fl_file(filp, &tty->tty_files) {
 		if (filp->f_op->write == redirected_tty_write)
 			cons_filp = filp;
 		if (filp->f_op->write != tty_write)
@@ -1309,7 +1357,7 @@ static void do_tty_hangup(void *data)
 		tty_fasync(-1, filp, 0);	/* can't block */
 		filp->f_op = &hung_up_tty_fops;
 	}
-	file_list_unlock();
+	file_list_unlock(&tty->tty_files);
 	
 	/* FIXME! What are the locking issues here? This may me overdoing things..
 	 * this question is especially important now that we've removed the irqlock. */
@@ -1353,7 +1401,7 @@ static void do_tty_hangup(void *data)
 	
 	read_lock(&tasklist_lock);
 	if (tty->session > 0) {
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 			task_lock(p);
 			if (p->signal->tty == tty)
 				p->signal->tty = NULL;
@@ -1364,7 +1412,7 @@ static void do_tty_hangup(void *data)
 			group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
 			if (tty->pgrp > 0)
 				p->signal->tty_old_pgrp = tty->pgrp;
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 	}
 	read_unlock(&tasklist_lock);
 
@@ -1526,11 +1574,11 @@ void disassociate_ctty(int on_exit)
 
 	/* Now clear signal->tty under the lock */
 	read_lock(&tasklist_lock);
-	do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
+	do_each_task_pid_all(current->signal->session, PIDTYPE_SID, p) {
 		task_lock(p);
 		p->signal->tty = NULL;
 		task_unlock(p);
-	} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
+	} while_each_task_pid_all(current->signal->session, PIDTYPE_SID, p);
 	read_unlock(&tasklist_lock);
 	mutex_unlock(&tty_mutex);
 	unlock_kernel();
@@ -1895,34 +1943,41 @@ static void tty_line_name(struct tty_dri
  */
 
 static int init_dev(struct tty_driver *driver, int idx,
-	struct tty_struct **ret_tty)
+	struct tty_struct *i_tty, struct tty_struct **ret_tty)
 {
 	struct tty_struct *tty, *o_tty;
 	struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
 	struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
+	struct ve_struct * owner;
 	int retval = 0;
 
-	/* check whether we're reopening an existing tty */
-	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
-		tty = devpts_get_tty(idx);
-		/*
-		 * If we don't have a tty here on a slave open, it's because
-		 * the master already started the close process and there's
-		 * no relation between devpts file and tty anymore.
-		 */
-		if (!tty && driver->subtype == PTY_TYPE_SLAVE) {
-			retval = -EIO;
-			goto end_init;
+	owner = driver->owner_env;
+
+	if (i_tty)
+		tty = i_tty;
+	else {
+		/* check whether we're reopening an existing tty */
+		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+			tty = devpts_get_tty(idx);
+			/*
+			 * If we don't have a tty here on a slave open, it's because
+			 * the master already started the close process and there's
+			 * no relation between devpts file and tty anymore.
+			 */
+			if (!tty && driver->subtype == PTY_TYPE_SLAVE) {
+				retval = -EIO;
+				goto end_init;
+			}
+			/*
+			 * It's safe from now on because init_dev() is called with
+			 * tty_mutex held and release_dev() won't change tty->count
+			 * or tty->flags without having to grab tty_mutex
+			 */
+			if (tty && driver->subtype == PTY_TYPE_MASTER)
+				tty = tty->link;
+		} else {
+			tty = driver->ttys[idx];
 		}
-		/*
-		 * It's safe from now on because init_dev() is called with
-		 * tty_mutex held and release_dev() won't change tty->count
-		 * or tty->flags without having to grab tty_mutex
-		 */
-		if (tty && driver->subtype == PTY_TYPE_MASTER)
-			tty = tty->link;
-	} else {
-		tty = driver->ttys[idx];
 	}
 	if (tty) goto fast_track;
 
@@ -1950,6 +2005,7 @@ static int init_dev(struct tty_driver *d
 	tty->driver = driver;
 	tty->index = idx;
 	tty_line_name(driver, idx, tty->name);
+	tty->owner_env = owner;
 
 	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
 		tp_loc = &tty->termios;
@@ -1960,7 +2016,7 @@ static int init_dev(struct tty_driver *d
 	}
 
 	if (!*tp_loc) {
-		tp = (struct termios *) kmalloc(sizeof(struct termios),
+		tp = (struct termios *) ub_kmalloc(sizeof(struct termios),
 						GFP_KERNEL);
 		if (!tp)
 			goto free_mem_out;
@@ -1968,7 +2024,7 @@ static int init_dev(struct tty_driver *d
 	}
 
 	if (!*ltp_loc) {
-		ltp = (struct termios *) kmalloc(sizeof(struct termios),
+		ltp = (struct termios *) ub_kmalloc(sizeof(struct termios),
 						 GFP_KERNEL);
 		if (!ltp)
 			goto free_mem_out;
@@ -1983,6 +2039,7 @@ static int init_dev(struct tty_driver *d
 		o_tty->driver = driver->other;
 		o_tty->index = idx;
 		tty_line_name(driver->other, idx, o_tty->name);
+		o_tty->owner_env = owner;
 
 		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
 			o_tp_loc = &o_tty->termios;
@@ -1994,7 +2051,7 @@ static int init_dev(struct tty_driver *d
 
 		if (!*o_tp_loc) {
 			o_tp = (struct termios *)
-				kmalloc(sizeof(struct termios), GFP_KERNEL);
+				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
 			if (!o_tp)
 				goto free_mem_out;
 			*o_tp = driver->other->init_termios;
@@ -2002,7 +2059,7 @@ static int init_dev(struct tty_driver *d
 
 		if (!*o_ltp_loc) {
 			o_ltp = (struct termios *)
-				kmalloc(sizeof(struct termios), GFP_KERNEL);
+				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
 			if (!o_ltp)
 				goto free_mem_out;
 			memset(o_ltp, 0, sizeof(struct termios));
@@ -2020,6 +2077,10 @@ static int init_dev(struct tty_driver *d
 			*o_ltp_loc = o_ltp;
 		o_tty->termios = *o_tp_loc;
 		o_tty->termios_locked = *o_ltp_loc;
+#ifdef CONFIG_VE
+		if (driver->other->refcount == 0)
+			(void)get_ve(owner);
+#endif
 		driver->other->refcount++;
 		if (driver->subtype == PTY_TYPE_MASTER)
 			o_tty->count++;
@@ -2044,6 +2105,10 @@ static int init_dev(struct tty_driver *d
 		*ltp_loc = ltp;
 	tty->termios = *tp_loc;
 	tty->termios_locked = *ltp_loc;
+#ifdef CONFIG_VE
+	if (driver->refcount == 0)
+		(void)get_ve(owner);
+#endif
 	driver->refcount++;
 	tty->count++;
 
@@ -2177,6 +2242,7 @@ static void release_one_tty(struct tty_s
 	struct termios *tp;
 	int devpts = tty->driver->flags & TTY_DRIVER_DEVPTS_MEM;
 	void (*shutdown)(struct tty_struct *);
+	struct file *filp, *tmp;
 
 	if (tty->driver->flags & TTY_DRIVER_HAS_SHUTDOWN)
 		tty->driver->shutdown(tty);
@@ -2185,10 +2251,13 @@ static void release_one_tty(struct tty_s
 
 	tty->magic = 0;
 	tty->driver->refcount--;
+	if (tty->driver->refcount == 0)
+		put_ve(tty->owner_env);
 
-	file_list_lock();
-	list_del_init(&tty->tty_files);
-	file_list_unlock();
+	file_list_lock(&tty->tty_files);
+	list_for_each_entry_safe(filp, tmp, &tty->tty_files.fl_list, f_u.fu_list)
+		list_del_init(&filp->f_u.fu_list);
+	file_list_unlock(&tty->tty_files);
 
 	free_tty_struct(tty);
 }
@@ -2231,7 +2300,10 @@ static void release_dev(struct file * fi
 	int	idx;
 	char	buf[64];
 	unsigned long flags;
-	
+#ifdef CONFIG_UNIX98_PTYS
+	struct idr *idr_alloced;
+#endif
+
 	tty = (struct tty_struct *)filp->private_data;
 	if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "release_dev"))
 		return;
@@ -2245,6 +2317,9 @@ static void release_dev(struct file * fi
 		      tty->driver->subtype == PTY_TYPE_MASTER);
 	devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
 	o_tty = tty->link;
+#ifdef CONFIG_UNIX98_PTYS
+	idr_alloced = &__ve_allocated_ptys(tty->owner_env);
+#endif
 
 #ifdef TTY_PARANOIA_CHECK
 	if (idx < 0 || idx >= tty->driver->num) {
@@ -2417,13 +2492,13 @@ static void release_dev(struct file * fi
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 			p->signal->tty = NULL;
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 		if (o_tty)
-			do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
+			do_each_task_pid_all(o_tty->session, PIDTYPE_SID, p) {
 				p->signal->tty = NULL;
-			} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
+			} while_each_task_pid_all(o_tty->session, PIDTYPE_SID, p);
 		read_unlock(&tasklist_lock);
 	}
 
@@ -2497,7 +2572,7 @@ static void release_dev(struct file * fi
 	/* Make this pty number available for reallocation */
 	if (devpts) {
 		down(&allocated_ptys_lock);
-		idr_remove(&allocated_ptys, idx);
+		idr_remove(idr_alloced, idx);
 		up(&allocated_ptys_lock);
 	}
 #endif
@@ -2527,7 +2602,7 @@ static void release_dev(struct file * fi
 
 static int tty_open(struct inode * inode, struct file * filp)
 {
-	struct tty_struct *tty;
+	struct tty_struct *tty, *c_tty;
 	int noctty, retval;
 	struct tty_driver *driver;
 	int index;
@@ -2540,6 +2615,7 @@ retry_open:
 	noctty = filp->f_flags & O_NOCTTY;
 	index  = -1;
 	retval = 0;
+	c_tty = NULL;
 	
 	mutex_lock(&tty_mutex);
 
@@ -2550,6 +2626,7 @@ retry_open:
 		}
 		driver = current->signal->tty->driver;
 		index = current->signal->tty->index;
+		c_tty = current->signal->tty;
 		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
 		/* noctty = 1; */
 		goto got_driver;
@@ -2557,6 +2634,12 @@ retry_open:
 #ifdef CONFIG_VT
 	if (device == MKDEV(TTY_MAJOR,0)) {
 		extern struct tty_driver *console_driver;
+#ifdef CONFIG_VE
+		if (!ve_is_super(get_exec_env())) {
+			mutex_unlock(&tty_mutex);
+			return -ENODEV;
+		}
+#endif
 		driver = console_driver;
 		index = fg_console;
 		noctty = 1;
@@ -2564,6 +2647,12 @@ retry_open:
 	}
 #endif
 	if (device == MKDEV(TTYAUX_MAJOR,1)) {
+#ifdef CONFIG_VE
+		if (!ve_is_super(get_exec_env())) {
+			mutex_unlock(&tty_mutex);
+			return -ENODEV;
+		}
+#endif
 		driver = console_device(&index);
 		if (driver) {
 			/* Don't let /dev/console block */
@@ -2581,7 +2670,7 @@ retry_open:
 		return -ENODEV;
 	}
 got_driver:
-	retval = init_dev(driver, index, &tty);
+	retval = init_dev(driver, index, c_tty, &tty);
 	mutex_unlock(&tty_mutex);
 	if (retval)
 		return retval;
@@ -2662,11 +2751,11 @@ static int ptmx_open(struct inode * inod
 
 	/* find a device that is not in use. */
 	down(&allocated_ptys_lock);
-	if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
+	if (!idr_pre_get(&ve_allocated_ptys, GFP_KERNEL)) {
 		up(&allocated_ptys_lock);
 		return -ENOMEM;
 	}
-	idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
+	idr_ret = idr_get_new(&ve_allocated_ptys, NULL, &index);
 	if (idr_ret < 0) {
 		up(&allocated_ptys_lock);
 		if (idr_ret == -EAGAIN)
@@ -2674,14 +2763,14 @@ static int ptmx_open(struct inode * inod
 		return -EIO;
 	}
 	if (index >= pty_limit) {
-		idr_remove(&allocated_ptys, index);
+		idr_remove(&ve_allocated_ptys, index);
 		up(&allocated_ptys_lock);
 		return -EIO;
 	}
 	up(&allocated_ptys_lock);
 
 	mutex_lock(&tty_mutex);
-	retval = init_dev(ptm_driver, index, &tty);
+	retval = init_dev(ve_ptm_driver, index, NULL, &tty);
 	mutex_unlock(&tty_mutex);
 	
 	if (retval)
@@ -2696,7 +2785,7 @@ static int ptmx_open(struct inode * inod
 		goto out1;
 
 	check_tty_count(tty, "tty_open");
-	retval = ptm_driver->open(tty, filp);
+	retval = ve_ptm_driver->open(tty, filp);
 	if (!retval)
 		return 0;
 out1:
@@ -2704,7 +2793,7 @@ out1:
 	return retval;
 out:
 	down(&allocated_ptys_lock);
-	idr_remove(&allocated_ptys, index);
+	idr_remove(&ve_allocated_ptys, index);
 	up(&allocated_ptys_lock);
 	return retval;
 }
@@ -2901,6 +2990,8 @@ static int tioccons(struct file *file)
 {
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
+	if (!ve_is_super(get_exec_env()))
+		return -EACCES;
 	if (file->f_op->write == redirected_tty_write) {
 		struct file *f;
 		spin_lock(&redirect_lock);
@@ -2987,9 +3078,9 @@ static int tiocsctty(struct tty_struct *
 			 */
 
 			read_lock(&tasklist_lock);
-			do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+			do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 				p->signal->tty = NULL;
-			} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+			} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 			read_unlock(&tasklist_lock);
 		} else
 			return -EPERM;
@@ -3025,7 +3116,7 @@ static int tiocgpgrp(struct tty_struct *
 	 */
 	if (tty == real_tty && current->signal->tty != real_tty)
 		return -ENOTTY;
-	return put_user(real_tty->pgrp, p);
+	return put_user(pid_to_vpid(real_tty->pgrp), p);
 }
 
 /**
@@ -3057,6 +3148,9 @@ static int tiocspgrp(struct tty_struct *
 		return -EFAULT;
 	if (pgrp < 0)
 		return -EINVAL;
+	pgrp = vpid_to_pid(pgrp);
+	if (pgrp < 0)
+		return -EPERM;
 	if (session_of_pgrp(pgrp) != current->signal->session)
 		return -EPERM;
 	real_tty->pgrp = pgrp;
@@ -3085,7 +3179,7 @@ static int tiocgsid(struct tty_struct *t
 		return -ENOTTY;
 	if (real_tty->session <= 0)
 		return -ENOTTY;
-	return put_user(real_tty->session, p);
+	return put_user(pid_to_vpid(real_tty->session), p);
 }
 
 /**
@@ -3409,16 +3503,16 @@ static void __do_SAK(void *arg)
 	
 	read_lock(&tasklist_lock);
 	/* Kill the entire session */
-	do_each_task_pid(session, PIDTYPE_SID, p) {
+	do_each_task_pid_all(session, PIDTYPE_SID, p) {
 		printk(KERN_NOTICE "SAK: killed process %d"
 			" (%s): p->signal->session==tty->session\n",
 			p->pid, p->comm);
 		send_sig(SIGKILL, p, 1);
-	} while_each_task_pid(session, PIDTYPE_SID, p);
+	} while_each_task_pid_all(session, PIDTYPE_SID, p);
 	/* Now kill any processes that happen to have the
 	 * tty open.
 	 */
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		if (p->signal->tty == tty) {
 			printk(KERN_NOTICE "SAK: killed process %d"
 			    " (%s): p->signal->session==tty->session\n",
@@ -3450,7 +3544,7 @@ static void __do_SAK(void *arg)
 			spin_unlock(&p->files->file_lock);
 		}
 		task_unlock(p);
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	read_unlock(&tasklist_lock);
 #endif
 }
@@ -3666,7 +3760,7 @@ static void initialize_tty_struct(struct
 	mutex_init(&tty->atomic_read_lock);
 	mutex_init(&tty->atomic_write_lock);
 	spin_lock_init(&tty->read_lock);
-	INIT_LIST_HEAD(&tty->tty_files);
+	file_list_init(&tty->tty_files);
 	INIT_WORK(&tty->SAK_work, NULL, NULL);
 }
 
@@ -3849,8 +3943,11 @@ int tty_register_driver(struct tty_drive
 
 	if (!driver->put_char)
 		driver->put_char = tty_default_put_char;
-	
+
+	driver->owner_env = get_exec_env();
+	write_lock_irq(&tty_driver_guard);
 	list_add(&driver->tty_drivers, &tty_drivers);
+	write_unlock_irq(&tty_driver_guard);
 	
 	if ( !(driver->flags & TTY_DRIVER_DYNAMIC_DEV) ) {
 		for(i = 0; i < driver->num; i++)
@@ -3877,7 +3974,9 @@ int tty_unregister_driver(struct tty_dri
 	unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
 				driver->num);
 
+	write_lock_irq(&tty_driver_guard);
 	list_del(&driver->tty_drivers);
+	write_unlock_irq(&tty_driver_guard);
 
 	/*
 	 * Free the termios and termios_locked structures because
@@ -3997,6 +4096,44 @@ static int __init tty_init(void)
 	vty_init();
  out_vt:
 #endif
+	prepare_tty();
 	return 0;
 }
 module_init(tty_init);
+
+#ifdef CONFIG_UNIX98_PTYS
+struct class *init_ve_tty_class(void)
+{
+	struct class * ve_tty_class;
+	struct class_device * ve_ptmx_dev_class;
+
+	ve_tty_class = class_create(THIS_MODULE, "tty");
+	if (IS_ERR(ve_tty_class))
+		return ve_tty_class;
+
+	ve_ptmx_dev_class = class_device_create(ve_tty_class, NULL,
+				MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx");
+	if (IS_ERR(ve_ptmx_dev_class)) {
+		class_destroy(ve_tty_class);
+		return (struct class *)ve_ptmx_dev_class;
+	}
+
+	return ve_tty_class;
+}
+
+void fini_ve_tty_class(struct class *ve_tty_class)
+{
+	class_device_destroy(ve_tty_class, MKDEV(TTYAUX_MAJOR, 2));
+	class_destroy(ve_tty_class);
+}
+#else
+struct class *init_ve_tty_class(void)
+{
+	return NULL;
+}
+void fini_ve_tty_class(struct class *ve_tty_class)
+{
+}
+#endif
+EXPORT_SYMBOL(init_ve_tty_class);
+EXPORT_SYMBOL(fini_ve_tty_class);
diff -upr kernel-2.6.18-417.el5.orig/drivers/cpufreq/cpufreq_ondemand.c kernel-2.6.18-417.el5-028stab121/drivers/cpufreq/cpufreq_ondemand.c
--- kernel-2.6.18-417.el5.orig/drivers/cpufreq/cpufreq_ondemand.c	2017-01-13 07:39:11.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/cpufreq/cpufreq_ondemand.c	2017-01-13 08:40:15.000000000 -0500
@@ -49,8 +49,6 @@ static unsigned int def_sampling_rate;
 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER	(1000)
 #define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
 
-static void do_dbs_timer(void *data);
-
 /* Sampling types */
 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
 
@@ -67,6 +65,9 @@ struct cpu_dbs_info_s {
 	unsigned int enable:1,
 	             sample_type:1;
 };
+
+static void do_dbs_timer(struct cpu_dbs_info_s *dbs_info);
+
 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
 
 static unsigned int dbs_enable;	/* number of CPUs using this policy */
@@ -508,12 +509,12 @@ static void dbs_check_cpu(struct cpu_dbs
 	}
 }
 
-static void do_dbs_timer(void *data)
+static void do_dbs_timer(struct cpu_dbs_info_s *dbs_info)
 {
-	unsigned int cpu = smp_processor_id();
-	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+	unsigned int cpu = dbs_info->cpu;
 	/* We want all CPUs to do sampling nearly on same jiffy */
 	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+	unsigned long data = dbs_info->sample_type;
 
 	delay -= jiffies % delay;
 
@@ -526,16 +527,15 @@ static void do_dbs_timer(void *data)
 	}
 
 	/* Common NORMAL_SAMPLE setup */
-	INIT_WORK(&dbs_info->work, do_dbs_timer, (void *)DBS_NORMAL_SAMPLE);
+	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
 	if (!dbs_tuners_ins.powersave_bias ||
-	    (unsigned long) data == DBS_NORMAL_SAMPLE) {
+	    data == DBS_NORMAL_SAMPLE) {
 		lock_cpu_hotplug();
 		dbs_check_cpu(dbs_info);
 		unlock_cpu_hotplug();
 		if (dbs_info->freq_lo) {
 			/* Setup timer for SUB_SAMPLE */
-			INIT_WORK(&dbs_info->work, do_dbs_timer,
-					(void *)DBS_SUB_SAMPLE);
+			dbs_info->sample_type = DBS_SUB_SAMPLE;
 			delay = dbs_info->freq_hi_jiffies;
 		}
 	} else {
@@ -543,6 +543,7 @@ static void do_dbs_timer(void *data)
 	                        	dbs_info->freq_lo,
 	                        	CPUFREQ_RELATION_H);
 	}
+	INIT_WORK(&dbs_info->work, do_dbs_timer, dbs_info);
 	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 	unlock_policy_rwsem_write(cpu);
 }
diff -upr kernel-2.6.18-417.el5.orig/drivers/net/benet/be_main.c kernel-2.6.18-417.el5-028stab121/drivers/net/benet/be_main.c
--- kernel-2.6.18-417.el5.orig/drivers/net/benet/be_main.c	2017-01-13 07:39:12.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/net/benet/be_main.c	2017-01-13 08:40:41.000000000 -0500
@@ -736,7 +736,7 @@ static struct sk_buff *be_insert_vlan_in
 		skb = __vlan_put_tag(skb, vlan_tag);
 		if (unlikely(!skb))
 			return skb;
-		VLAN_TX_SKB_CB(skb)->magic = 0;
+		skb->vlan_tci = 0;
 	}
 
 	/* Insert the outer VLAN, if any */
diff -upr kernel-2.6.18-417.el5.orig/drivers/net/loopback.c kernel-2.6.18-417.el5-028stab121/drivers/net/loopback.c
--- kernel-2.6.18-417.el5.orig/drivers/net/loopback.c	2017-01-13 07:39:12.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/net/loopback.c	2017-01-13 08:40:24.000000000 -0500
@@ -58,11 +58,14 @@
 #include <linux/tcp.h>
 #include <linux/percpu.h>
 
-struct pcpu_lstats {
-	unsigned long packets;
-	unsigned long bytes;
-};
 static DEFINE_PER_CPU(struct pcpu_lstats, pcpu_lstats);
+#ifdef CONFIG_VE
+#define LOOPBACK_STATS(cpu)	((ve_is_super(get_exec_env())) ?	\
+				&per_cpu(pcpu_lstats, cpu) :		\
+				per_cpu_ptr(get_exec_env()->_pcpu_lstats, cpu))
+#else
+#define LOOPBACK_STATS(cpu)	&per_cpu(pcpu_lstats, cpu)
+#endif
 
 #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)
 
@@ -134,6 +137,11 @@ static int loopback_xmit(struct sk_buff 
 {
 	struct pcpu_lstats *lb_stats;
 
+	if (unlikely(get_exec_env()->disable_net)) {
+		kfree_skb(skb);
+		return 0;
+	}
+
 	skb_orphan(skb);
 
 	skb->protocol = eth_type_trans(skb,dev);
@@ -153,7 +161,7 @@ static int loopback_xmit(struct sk_buff 
 #endif
 	dev->last_rx = jiffies;
 
-	lb_stats = &per_cpu(pcpu_lstats, get_cpu());
+	lb_stats = LOOPBACK_STATS(get_cpu());
 	lb_stats->bytes += skb->len;
 	lb_stats->packets++;
 	put_cpu();
@@ -176,7 +184,7 @@ static struct net_device_stats *get_stat
 
 	for_each_possible_cpu(i) {
 		const struct pcpu_lstats *lb_stats;
-		lb_stats = &per_cpu(pcpu_lstats, i);
+		lb_stats = LOOPBACK_STATS(i);
 		bytes   += lb_stats->bytes;
 		packets += lb_stats->packets;
 	}
@@ -199,6 +207,36 @@ static struct ethtool_ops loopback_ethto
 	.set_tso		= ethtool_op_set_tso,
 };
 
+static void loopback_destructor(struct net_device *dev)
+{
+	kfree(dev->priv);
+	dev->priv = NULL;
+}
+
+struct net_device templ_loopback_dev = {
+	.name	 		= "lo",
+	.get_stats		= get_stats,
+	.destructor		= loopback_destructor,
+	.mtu			= (16 * 1024) + 20 + 20 + 12,
+	.hard_start_xmit	= loopback_xmit,
+	.hard_header		= eth_header,
+	.hard_header_cache	= eth_header_cache,
+	.header_cache_update	= eth_header_cache_update,
+	.hard_header_len	= ETH_HLEN,	/* 14	*/
+	.addr_len		= ETH_ALEN,	/* 6	*/
+	.tx_queue_len		= 0,
+	.type			= ARPHRD_LOOPBACK,	/* 0x0001*/
+	.rebuild_header		= eth_rebuild_header,
+	.flags			= IFF_LOOPBACK,
+	.features 		= NETIF_F_SG|NETIF_F_FRAGLIST
+				  |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
+				  |NETIF_F_LLTX|NETIF_F_VIRTUAL,
+};
+
+#ifdef loopback_dev
+#undef loopback_dev
+#endif
+
 struct net_device_loopback {
 	struct net_device ld;
 	struct net_device_stats stats;
@@ -240,8 +278,13 @@ extern struct net_device loopback_dev __
 int __init loopback_init(void)
 {
 	loopback_dev.priv = netdev_priv(&loopback_dev);
+	loopback_dev.destructor = &loopback_destructor;
 
+#ifdef CONFIG_VE
+	get_ve0()->_loopback_dev = &loopback_dev;
+#endif
 	return register_netdev(&loopback_dev);
 };
 
 EXPORT_SYMBOL(loopback_dev);
+EXPORT_SYMBOL(templ_loopback_dev);
diff -upr kernel-2.6.18-417.el5.orig/drivers/net/Makefile kernel-2.6.18-417.el5-028stab121/drivers/net/Makefile
--- kernel-2.6.18-417.el5.orig/drivers/net/Makefile	2017-01-13 07:39:12.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/net/Makefile	2017-01-13 08:40:21.000000000 -0500
@@ -33,6 +33,12 @@ gianfar_driver-objs := gianfar.o \
 obj-$(CONFIG_UCC_GETH) += ucc_geth_driver.o
 ucc_geth_driver-objs := ucc_geth.o ucc_geth_phy.o
 
+obj-$(CONFIG_VE_NETDEV) += vznetdev.o
+vznetdev-objs := open_vznet.o venet_core.o
+
+obj-$(CONFIG_VE_ETHDEV) += vzethdev.o
+vzethdev-objs := veth.o
+
 #
 # link order important here
 #
diff -upr kernel-2.6.18-417.el5.orig/drivers/net/open_vznet.c kernel-2.6.18-417.el5-028stab121/drivers/net/open_vznet.c
--- kernel-2.6.18-417.el5.orig/drivers/net/open_vznet.c	2017-01-13 08:40:21.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/net/open_vznet.c	2017-01-13 08:40:22.000000000 -0500
@@ -0,0 +1,244 @@
+/*
+ *  open_vznet.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Virtual Networking device used to change VE ownership on packets
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+
+#include <linux/inet.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <linux/venet.h>
+
+void veip_stop(struct ve_struct *ve)
+{
+	struct list_head *p, *tmp;
+
+	write_lock_irq(&veip_hash_lock);
+	if (ve->veip == NULL)
+		goto unlock;
+	list_for_each_safe(p, tmp, &ve->veip->ip_lh) {
+		struct ip_entry_struct *ptr;
+		ptr = list_entry(p, struct ip_entry_struct, ve_list);
+		ptr->active_env = NULL;
+		list_del(&ptr->ve_list);
+		list_del(&ptr->ip_hash);
+		kfree(ptr);
+	}
+	veip_put(ve->veip);
+	ve->veip = NULL;
+	if (!ve_is_super(ve))
+		module_put(THIS_MODULE);
+unlock:
+	write_unlock_irq(&veip_hash_lock);
+}
+
+int veip_start(struct ve_struct *ve)
+{
+	int err, get;
+
+	err = 0;
+	write_lock_irq(&veip_hash_lock);
+	get = ve->veip == NULL;
+	ve->veip = veip_findcreate(ve->veid);
+	if (ve->veip == NULL)
+		err = -ENOMEM;
+	write_unlock_irq(&veip_hash_lock);
+	if (err == 0 && get && !ve_is_super(ve))
+		__module_get(THIS_MODULE);
+	return err;
+}
+
+int veip_entry_add(struct ve_struct *ve, struct ve_addr_struct *addr)
+{
+	struct ip_entry_struct *entry, *found;
+	int err;
+
+	entry = kzalloc(sizeof(struct ip_entry_struct), GFP_KERNEL);
+	if (entry == NULL)
+		return -ENOMEM;
+
+	if (ve->veip == NULL) {
+		/* This can happen if we load venet AFTER ve was started */
+	       	err = veip_start(ve);
+		if (err < 0)
+			goto out;
+	}
+
+	write_lock_irq(&veip_hash_lock);
+	err = -EADDRINUSE;
+	found = venet_entry_lookup(addr);
+	if (found != NULL)
+		goto out_unlock;
+
+	entry->active_env = ve;
+	entry->addr = *addr;
+	ip_entry_hash(entry, ve->veip);
+
+	err = 0;
+	entry = NULL;
+out_unlock:
+	write_unlock_irq(&veip_hash_lock);
+out:
+	if (entry != NULL)
+		kfree(entry);
+	return err;
+}
+
+int veip_entry_del(envid_t veid, struct ve_addr_struct *addr)
+{
+	struct ip_entry_struct *found;
+	int err;
+
+	err = -EADDRNOTAVAIL;
+	write_lock_irq(&veip_hash_lock);
+	found = venet_entry_lookup(addr);
+	if (found == NULL)
+		goto out;
+	if (found->active_env->veid != veid)
+		goto out;
+
+	err = 0;
+	found->active_env = NULL;
+
+	list_del(&found->ip_hash);
+	list_del(&found->ve_list);
+	kfree(found);
+out:
+	write_unlock_irq(&veip_hash_lock);
+	return err;
+}
+
+static int skb_extract_addr(struct sk_buff *skb,
+		struct ve_addr_struct *addr, int dir)
+{
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		addr->family = AF_INET;
+		addr->key[0] = 0;
+		addr->key[1] = 0;
+		addr->key[2] = 0;
+		addr->key[3] = (dir ? skb->nh.iph->daddr : skb->nh.iph->saddr);
+		return 0;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case __constant_htons(ETH_P_IPV6):
+		addr->family = AF_INET6;
+		memcpy(&addr->key, dir ?
+				skb->nh.ipv6h->daddr.s6_addr32 :
+				skb->nh.ipv6h->saddr.s6_addr32,
+				sizeof(addr->key));
+		return 0;
+#endif
+	}
+
+	return -EAFNOSUPPORT;
+}
+
+static struct ve_struct *venet_find_ve(struct sk_buff *skb, int dir)
+{
+	struct ip_entry_struct *entry;
+	struct ve_addr_struct addr;
+
+	if (skb_extract_addr(skb, &addr, dir) < 0)
+		return NULL;
+
+	entry = venet_entry_lookup(&addr);
+	if (entry == NULL)
+		return NULL;
+
+	return entry->active_env;
+}
+
+int venet_change_skb_owner(struct sk_buff *skb)
+{
+	struct ve_struct *ve, *ve_old;
+
+	ve_old = skb->owner_env;
+
+	read_lock(&veip_hash_lock);
+	if (!ve_is_super(ve_old)) {
+		/* from VE to host */
+		ve = venet_find_ve(skb, 0);
+		if (ve == NULL)
+			goto out_drop;
+		if (!ve_accessible_strict(ve, ve_old))
+			goto out_source;
+		skb->owner_env = get_ve0();
+	} else {
+		/* from host to VE */
+		ve = venet_find_ve(skb, 1);
+		if (ve == NULL)
+			goto out_drop;
+		skb->owner_env = ve;
+	}
+	read_unlock(&veip_hash_lock);
+
+	return 0;
+
+out_drop:
+	read_unlock(&veip_hash_lock);
+	return -ESRCH;
+
+out_source:
+	read_unlock(&veip_hash_lock);
+	if (net_ratelimit() && skb->protocol == __constant_htons(ETH_P_IP)) {
+		printk(KERN_WARNING "Dropped packet, source wrong "
+		       "veid=%u src-IP=%u.%u.%u.%u "
+		       "dst-IP=%u.%u.%u.%u\n",
+		       skb->owner_env->veid,
+		       NIPQUAD(skb->nh.iph->saddr),
+		       NIPQUAD(skb->nh.iph->daddr));
+	}
+	return -EACCES;
+}
+
+#ifdef CONFIG_PROC_FS
+int veip_seq_show(struct seq_file *m, void *v)
+{
+	struct list_head *p;
+	struct ip_entry_struct *entry;
+	char s[40];
+
+	p = (struct list_head *)v;
+	if (p == ip_entry_hash_table) {
+		seq_puts(m, "Version: 2.5\n");
+		return 0;
+	}
+	entry = list_entry(p, struct ip_entry_struct, ip_hash);
+	veaddr_print(s, sizeof(s), &entry->addr);
+	seq_printf(m, "%39s %10u\n", s, 0);
+	return 0;
+}
+#endif
+
+__exit void veip_cleanup(void)
+{
+	int i;
+
+	write_lock_irq(&veip_hash_lock);
+	for (i = 0; i < VEIP_HASH_SZ; i++)
+		while (!list_empty(ip_entry_hash_table + i)) {
+			struct ip_entry_struct *entry;
+
+			entry = list_first_entry(ip_entry_hash_table + i,
+					struct ip_entry_struct, ip_hash);
+			list_del(&entry->ip_hash);
+			kfree(entry);
+		}
+	write_unlock_irq(&veip_hash_lock);
+}
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Virtual Network Device");
+MODULE_LICENSE("GPL v2");
diff -upr kernel-2.6.18-417.el5.orig/drivers/net/ppp_generic.c kernel-2.6.18-417.el5-028stab121/drivers/net/ppp_generic.c
--- kernel-2.6.18-417.el5.orig/drivers/net/ppp_generic.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/net/ppp_generic.c	2017-01-13 08:40:41.000000000 -0500
@@ -48,6 +48,11 @@
 #include <net/slhc_vj.h>
 #include <asm/atomic.h>
 
+#include <linux/ve.h>
+#include <linux/ve_task.h>
+#include <linux/ve_proto.h>
+#include <linux/vzcalluser.h>
+
 #define PPP_VERSION	"2.4.2"
 
 /*
@@ -132,6 +137,7 @@ struct ppp {
 	struct sock_filter *active_filter;/* filter for pkts to reset idle */
 	unsigned pass_len, active_len;
 #endif /* CONFIG_PPP_FILTER */
+	struct ve_struct	*ve;	/* the VE we belong to */
 };
 
 /*
@@ -156,6 +162,7 @@ struct channel {
 	struct rw_semaphore chan_sem;	/* protects `chan' during chan ioctl */
 	spinlock_t	downl;		/* protects `chan', file.xq dequeue */
 	struct ppp	*ppp;		/* ppp unit we're connected to */
+	struct ve_struct *ve;		/* VE channel belongs to */
 	struct list_head clist;		/* link in list of channels per unit */
 	rwlock_t	upl;		/* protects `ppp' */
 #ifdef CONFIG_PPP_MULTILINK
@@ -202,18 +209,23 @@ static void cardmap_destroy(struct cardm
  * and updating its file.refcnt field is atomic.
  */
 static DEFINE_MUTEX(all_ppp_mutex);
-static struct cardmap *all_ppp_units;
 static atomic_t ppp_unit_count = ATOMIC_INIT(0);
 
+struct ve_ppp {
+	struct cardmap *all_ppp_units;
+
+	/* channels */
+	struct list_head all_channels;
+	struct list_head new_channels;
+	int last_channel_index;
+};
+
 /*
  * all_channels_lock protects all_channels and last_channel_index,
  * and the atomicity of find a channel and updating its file.refcnt
  * field.
  */
 static DEFINE_SPINLOCK(all_channels_lock);
-static LIST_HEAD(all_channels);
-static LIST_HEAD(new_channels);
-static int last_channel_index;
 static atomic_t channel_count = ATOMIC_INIT(0);
 
 /* Get the PPP protocol number from a skb */
@@ -263,18 +275,23 @@ static void ppp_ccp_peek(struct ppp *ppp
 static void ppp_ccp_closed(struct ppp *ppp);
 static struct compressor *find_compressor(int type);
 static void ppp_get_stats(struct ppp *ppp, struct ppp_stats *st);
-static struct ppp *ppp_create_interface(int unit, int *retp);
+static struct ppp *ppp_create_interface(struct ve_struct *ve, int unit, int *retp);
 static void init_ppp_file(struct ppp_file *pf, int kind);
 static void ppp_shutdown_interface(struct ppp *ppp);
 static void ppp_destroy_interface(struct ppp *ppp);
-static struct ppp *ppp_find_unit(int unit);
-static struct channel *ppp_find_channel(int unit);
+static struct ppp *ppp_find_unit(struct ve_ppp *vp, int unit);
+static struct channel *ppp_find_channel(struct ve_ppp *vp, int unit);
 static int ppp_connect_channel(struct channel *pch, int unit);
 static int ppp_disconnect_channel(struct channel *pch);
 static void ppp_destroy_channel(struct channel *pch);
 
 static struct class *ppp_class;
 
+static inline int ve_is_ppp_capable(struct ve_struct *ve)
+{
+	return (ve->features & VE_FEATURE_PPP) && ve->ve_ppp;
+}
+
 /* Translates a PPP protocol number to a NP index (NP == network protocol) */
 static inline int proto_to_npindex(int proto)
 {
@@ -360,8 +377,10 @@ static int ppp_open(struct inode *inode,
 	/*
 	 * This could (should?) be enforced by the permissions on /dev/ppp.
 	 */
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
+	if (!ve_is_ppp_capable(get_exec_env()))
+		return -EACCES;
 	return 0;
 }
 
@@ -781,6 +800,7 @@ static int ppp_unattached_ioctl(struct p
 	int unit, err = -EFAULT;
 	struct ppp *ppp;
 	struct channel *chan;
+	struct ve_struct *ve = get_exec_env();
 	int __user *p = (int __user *)arg;
 
 	switch (cmd) {
@@ -788,7 +808,7 @@ static int ppp_unattached_ioctl(struct p
 		/* Create a new ppp unit */
 		if (get_user(unit, p))
 			break;
-		ppp = ppp_create_interface(unit, &err);
+		ppp = ppp_create_interface(ve, unit, &err);
 		if (ppp == 0)
 			break;
 		file->private_data = &ppp->file;
@@ -805,7 +825,7 @@ static int ppp_unattached_ioctl(struct p
 			break;
 		mutex_lock(&all_ppp_mutex);
 		err = -ENXIO;
-		ppp = ppp_find_unit(unit);
+		ppp = ppp_find_unit(ve->ve_ppp, unit);
 		if (ppp != 0) {
 			atomic_inc(&ppp->file.refcnt);
 			file->private_data = &ppp->file;
@@ -819,7 +839,7 @@ static int ppp_unattached_ioctl(struct p
 			break;
 		spin_lock_bh(&all_channels_lock);
 		err = -ENXIO;
-		chan = ppp_find_channel(unit);
+		chan = ppp_find_channel(ve->ve_ppp, unit);
 		if (chan != 0) {
 			atomic_inc(&chan->file.refcnt);
 			file->private_data = &chan->file;
@@ -844,6 +864,51 @@ static struct file_operations ppp_device
 	.release	= ppp_release
 };
 
+static int ve_ppp_init(void *x)
+{
+	struct ve_struct *ve = x;
+	struct ve_ppp *vp;
+
+	if (!(ve->features & VE_FEATURE_PPP))
+		return 0;
+
+	vp = kzalloc(sizeof(*vp), GFP_KERNEL);
+	if (!vp)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&vp->all_channels);
+	INIT_LIST_HEAD(&vp->new_channels);
+
+	ve->ve_ppp = vp;
+
+	if (!ve_is_super(ve))
+		__module_get(THIS_MODULE);
+
+	return 0;
+}
+
+static void ve_ppp_fini(void *x)
+{
+	struct ve_struct *ve = x;
+
+	if (!ve_is_ppp_capable(ve))
+		return;
+
+	cardmap_destroy(&ve->ve_ppp->all_ppp_units);
+	kfree(ve->ve_ppp);
+	ve->ve_ppp = NULL;
+
+	if (!ve_is_super(ve))
+		module_put(THIS_MODULE);
+}
+
+static struct ve_hook ppp_hook = {
+	.owner	= THIS_MODULE,
+	.priority = HOOK_PRIO_NET,
+	.init	= ve_ppp_init,
+	.fini	= ve_ppp_fini,
+};
+
 #define PPP_MAJOR	108
 
 /* Called at boot time if ppp is compiled into the kernel,
@@ -863,6 +928,11 @@ static int __init ppp_init(void)
 		class_device_create(ppp_class, NULL, MKDEV(PPP_MAJOR, 0), NULL, "ppp");
 	}
 
+	/* all is granted to ve0 */
+	ve_ppp_init(get_ve0());
+
+	ve_hook_register(VE_SS_CHAIN, &ppp_hook);
+
 out:
 	if (err)
 		printk(KERN_ERR "failed to register PPP device (%d)\n", err);
@@ -988,6 +1058,7 @@ static void ppp_setup(struct net_device 
 	dev->tx_queue_len = 3;
 	dev->type = ARPHRD_PPP;
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+	dev->features |= NETIF_F_VIRTUAL;
 }
 
 /*
@@ -1993,12 +2064,23 @@ ppp_mp_reconstruct(struct ppp *ppp)
 int
 ppp_register_channel(struct ppp_channel *chan)
 {
+	return ppp_register_ve_channel(get_exec_env(), chan);
+}
+
+int ppp_register_ve_channel(struct ve_struct *ve, struct ppp_channel *chan)
+{
 	struct channel *pch;
+	struct ve_ppp *vp;
+
+	if (!ve_is_ppp_capable(get_exec_env()))
+		return -EACCES;
 
 	pch = kzalloc(sizeof(struct channel), GFP_KERNEL);
 	if (pch == 0)
 		return -ENOMEM;
+	vp = ve->ve_ppp;
 	pch->ppp = NULL;
+	pch->ve = ve;
 	pch->chan = chan;
 	chan->ppp = pch;
 	init_ppp_file(&pch->file, CHANNEL);
@@ -2010,8 +2092,8 @@ ppp_register_channel(struct ppp_channel 
 	spin_lock_init(&pch->downl);
 	rwlock_init(&pch->upl);
 	spin_lock_bh(&all_channels_lock);
-	pch->file.index = ++last_channel_index;
-	list_add(&pch->list, &new_channels);
+	pch->file.index = ++vp->last_channel_index;
+	list_add(&pch->list, &vp->new_channels);
 	atomic_inc(&channel_count);
 	spin_unlock_bh(&all_channels_lock);
 	return 0;
@@ -2400,10 +2482,11 @@ ppp_get_stats(struct ppp *ppp, struct pp
  * unit == -1 means allocate a new number.
  */
 static struct ppp *
-ppp_create_interface(int unit, int *retp)
+ppp_create_interface(struct ve_struct *ve, int unit, int *retp)
 {
 	struct ppp *ppp;
 	struct net_device *dev = NULL;
+	struct ve_ppp *vp;
 	int ret = -ENOMEM;
 	int i;
 
@@ -2434,10 +2517,11 @@ ppp_create_interface(int unit, int *retp
 	dev->do_ioctl = ppp_net_ioctl;
 
 	ret = -EEXIST;
+	vp = ve->ve_ppp;
 	mutex_lock(&all_ppp_mutex);
 	if (unit < 0)
-		unit = cardmap_find_first_free(all_ppp_units);
-	else if (cardmap_get(all_ppp_units, unit) != NULL)
+		unit = cardmap_find_first_free(vp->all_ppp_units);
+	else if (cardmap_get(vp->all_ppp_units, unit) != NULL)
 		goto out2;	/* unit already exists */
 
 	/* Initialize the new ppp unit */
@@ -2452,10 +2536,12 @@ ppp_create_interface(int unit, int *retp
 	}
 
 	atomic_inc(&ppp_unit_count);
-	ret = cardmap_set(&all_ppp_units, unit, ppp);
+	ret = cardmap_set(&vp->all_ppp_units, unit, ppp);
 	if (ret != 0)
 		goto out3;
 
+	ppp->ve = ve;
+
 	mutex_unlock(&all_ppp_mutex);
 	*retp = 0;
 	return ppp;
@@ -2492,6 +2578,7 @@ init_ppp_file(struct ppp_file *pf, int k
 static void ppp_shutdown_interface(struct ppp *ppp)
 {
 	struct net_device *dev;
+	struct ve_ppp *vp = ppp->ve->ve_ppp;
 
 	mutex_lock(&all_ppp_mutex);
 	ppp_lock(ppp);
@@ -2503,7 +2590,7 @@ static void ppp_shutdown_interface(struc
 		unregister_netdev(dev);
 		free_netdev(dev);
 	}
-	cardmap_set(&all_ppp_units, ppp->file.index, NULL);
+	cardmap_set(&vp->all_ppp_units, ppp->file.index, NULL);
 	ppp->file.dead = 1;
 	ppp->owner = NULL;
 	wake_up_interruptible(&ppp->file.rwait);
@@ -2552,9 +2639,9 @@ static void ppp_destroy_interface(struct
  * The caller should have locked the all_ppp_mutex.
  */
 static struct ppp *
-ppp_find_unit(int unit)
+ppp_find_unit(struct ve_ppp *vp, int unit)
 {
-	return cardmap_get(all_ppp_units, unit);
+	return cardmap_get(vp->all_ppp_units, unit);
 }
 
 /*
@@ -2566,17 +2653,17 @@ ppp_find_unit(int unit)
  * when we have a lot of channels in use.
  */
 static struct channel *
-ppp_find_channel(int unit)
+ppp_find_channel(struct ve_ppp *vp, int unit)
 {
 	struct channel *pch;
 
-	list_for_each_entry(pch, &new_channels, list) {
+	list_for_each_entry(pch, &vp->new_channels, list) {
 		if (pch->file.index == unit) {
-			list_move(&pch->list, &all_channels);
+			list_move(&pch->list, &vp->all_channels);
 			return pch;
 		}
 	}
-	list_for_each_entry(pch, &all_channels, list) {
+	list_for_each_entry(pch, &vp->all_channels, list) {
 		if (pch->file.index == unit)
 			return pch;
 	}
@@ -2590,11 +2677,12 @@ static int
 ppp_connect_channel(struct channel *pch, int unit)
 {
 	struct ppp *ppp;
+	struct ve_ppp *vp = pch->ve->ve_ppp;
 	int ret = -ENXIO;
 	int hdrlen;
 
 	mutex_lock(&all_ppp_mutex);
-	ppp = ppp_find_unit(unit);
+	ppp = ppp_find_unit(vp, unit);
 	if (ppp == 0)
 		goto out;
 	write_lock_bh(&pch->upl);
@@ -2669,14 +2757,17 @@ static void ppp_destroy_channel(struct c
 
 static void __exit ppp_cleanup(void)
 {
+	ve_hook_unregister(&ppp_hook);
+
 	/* should never happen */
 	if (atomic_read(&ppp_unit_count) || atomic_read(&channel_count))
 		printk(KERN_ERR "PPP: removing module but units remain!\n");
-	cardmap_destroy(&all_ppp_units);
 	if (unregister_chrdev(PPP_MAJOR, "ppp") != 0)
 		printk(KERN_ERR "PPP: failed to unregister PPP device\n");
 	class_device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0));
 	class_destroy(ppp_class);
+
+	ve_ppp_fini(get_ve0());
 }
 
 /*
@@ -2798,6 +2889,7 @@ module_init(ppp_init);
 module_exit(ppp_cleanup);
 
 EXPORT_SYMBOL(ppp_register_channel);
+EXPORT_SYMBOL(ppp_register_ve_channel);
 EXPORT_SYMBOL(ppp_unregister_channel);
 EXPORT_SYMBOL(ppp_channel_index);
 EXPORT_SYMBOL(ppp_unit_number);
diff -upr kernel-2.6.18-417.el5.orig/drivers/net/pppoe.c kernel-2.6.18-417.el5-028stab121/drivers/net/pppoe.c
--- kernel-2.6.18-417.el5.orig/drivers/net/pppoe.c	2017-01-13 07:39:12.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/net/pppoe.c	2017-01-13 08:40:23.000000000 -0500
@@ -76,11 +76,18 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/ve.h>
+#include <linux/ve_task.h>
+#include <linux/ve_proto.h>
+#include <linux/vzcalluser.h>
+
+#ifndef CONFIG_VE
+#error Turn on OpenVZ support (CONFIG_VE) in your kernel config
+#endif
+
 #define PPPOE_HASH_BITS 4
 #define PPPOE_HASH_SIZE (1<<PPPOE_HASH_BITS)
 
-static struct ppp_channel_ops pppoe_chan_ops;
-
 static int pppoe_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb);
 static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb);
@@ -90,6 +97,19 @@ static DEFINE_RWLOCK(pppoe_hash_lock);
 
 static struct ppp_channel_ops pppoe_chan_ops;
 
+/* per-VE private data */
+struct ve_pppoe {
+	struct pppox_sock *hash_table[PPPOE_HASH_SIZE];
+};
+
+/* to eliminate a race btw pppoe_flush_dev and pppoe_release */
+static DEFINE_SPINLOCK(flush_lock);
+
+static inline int ve_is_pppoe_capable(struct ve_struct *ve)
+{
+	return (ve->features & VE_FEATURE_PPP) && ve->ve_pppoe;
+}
+
 static inline int cmp_2_addr(struct pppoe_addr *a, struct pppoe_addr *b)
 {
 	return (a->sid == b->sid &&
@@ -119,33 +139,31 @@ static int hash_item(unsigned long sid, 
 	return hash & ( PPPOE_HASH_SIZE - 1 );
 }
 
-/* zeroed because its in .bss */
-static struct pppox_sock *item_hash_table[PPPOE_HASH_SIZE];
-
 /**********************************************************************
  *
  *  Set/get/delete/rehash items  (internal versions)
  *
  **********************************************************************/
-static struct pppox_sock *__get_item(unsigned long sid, unsigned char *addr)
+static struct pppox_sock *__get_item(struct ve_struct *ve, unsigned long sid, unsigned char *addr)
 {
 	int hash = hash_item(sid, addr);
+	struct ve_pppoe *vp = ve->ve_pppoe;
 	struct pppox_sock *ret;
 
-	ret = item_hash_table[hash];
-
+	ret = vp->hash_table[hash];
 	while (ret && !cmp_addr(&ret->pppoe_pa, sid, addr))
 		ret = ret->next;
 
 	return ret;
 }
 
-static int __set_item(struct pppox_sock *po)
+static int __set_item(struct ve_struct *ve, struct pppox_sock *po)
 {
 	int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote);
+	struct ve_pppoe *vp = ve->ve_pppoe;
 	struct pppox_sock *ret;
 
-	ret = item_hash_table[hash];
+	ret = vp->hash_table[hash];
 	while (ret) {
 		if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa))
 			return -EALREADY;
@@ -154,20 +172,21 @@ static int __set_item(struct pppox_sock 
 	}
 
 	if (!ret) {
-		po->next = item_hash_table[hash];
-		item_hash_table[hash] = po;
+		po->next = vp->hash_table[hash];
+		vp->hash_table[hash] = po;
 	}
 
 	return 0;
 }
 
-static struct pppox_sock *__delete_item(unsigned long sid, char *addr)
+static struct pppox_sock *__delete_item(struct ve_struct *ve, unsigned long sid, char *addr)
 {
 	int hash = hash_item(sid, addr);
+	struct ve_pppoe *vp = ve->ve_pppoe;
 	struct pppox_sock *ret, **src;
 
-	ret = item_hash_table[hash];
-	src = &item_hash_table[hash];
+	ret = vp->hash_table[hash];
+	src = &vp->hash_table[hash];
 
 	while (ret) {
 		if (cmp_addr(&ret->pppoe_pa, sid, addr)) {
@@ -187,13 +206,13 @@ static struct pppox_sock *__delete_item(
  *  Set/get/delete/rehash items
  *
  **********************************************************************/
-static inline struct pppox_sock *get_item(unsigned long sid,
+static inline struct pppox_sock *get_item(struct ve_struct *ve, unsigned long sid,
 					 unsigned char *addr)
 {
 	struct pppox_sock *po;
 
 	read_lock_bh(&pppoe_hash_lock);
-	po = __get_item(sid, addr);
+	po = __get_item(ve, sid, addr);
 	if (po)
 		sock_hold(sk_pppox(po));
 	read_unlock_bh(&pppoe_hash_lock);
@@ -201,12 +220,12 @@ static inline struct pppox_sock *get_ite
 	return po;
 }
 
-static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp)
+static inline struct pppox_sock *get_item_by_addr(struct ve_struct *ve, struct sockaddr_pppox *sp)
 {
-	return get_item(sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote);
+	return get_item(ve, sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote);
 }
 
-static inline int set_item(struct pppox_sock *po)
+static inline int set_item(struct ve_struct *ve, struct pppox_sock *po)
 {
 	int i;
 
@@ -214,18 +233,18 @@ static inline int set_item(struct pppox_
 		return -EINVAL;
 
 	write_lock_bh(&pppoe_hash_lock);
-	i = __set_item(po);
+	i = __set_item(ve, po);
 	write_unlock_bh(&pppoe_hash_lock);
 
 	return i;
 }
 
-static inline struct pppox_sock *delete_item(unsigned long sid, char *addr)
+static inline struct pppox_sock *delete_item(struct ve_struct *ve, unsigned long sid, char *addr)
 {
 	struct pppox_sock *ret;
 
 	write_lock_bh(&pppoe_hash_lock);
-	ret = __delete_item(sid, addr);
+	ret = __delete_item(ve, sid, addr);
 	write_unlock_bh(&pppoe_hash_lock);
 
 	return ret;
@@ -243,19 +262,27 @@ static inline struct pppox_sock *delete_
 static void pppoe_flush_dev(struct net_device *dev)
 {
 	int hash;
+	struct ve_pppoe *vp;
 
 	BUG_ON(dev == NULL);
 
+	if (!dev->owner_env->ve_pppoe)  /* already freed or no VE_FEATURE_PPP */
+		return;
+
+	vp = dev->owner_env->ve_pppoe;
+
 	read_lock_bh(&pppoe_hash_lock);
 	for (hash = 0; hash < PPPOE_HASH_SIZE; hash++) {
-		struct pppox_sock *po = item_hash_table[hash];
+		struct pppox_sock *po = vp->hash_table[hash];
 
 		while (po != NULL) {
 			if (po->pppoe_dev == dev) {
 				struct sock *sk = sk_pppox(po);
 
 				sock_hold(sk);
+				spin_lock(&flush_lock);
 				po->pppoe_dev = NULL;
+				spin_unlock(&flush_lock);
 
 				/* We hold a reference to SK, now drop the
 				 * hash table lock so that we may attempt
@@ -284,7 +311,7 @@ static void pppoe_flush_dev(struct net_d
 				 * so we are guaranteed to make forward
 				 * progress.
 				 */
-				po = item_hash_table[hash];
+				po = vp->hash_table[hash];
 				continue;
 			}
 			po = po->next;
@@ -333,11 +360,12 @@ static int pppoe_rcv_core(struct sock *s
 {
 	struct pppox_sock *po = pppox_sk(sk);
 	struct pppox_sock *relay_po = NULL;
+	struct ve_struct *ve = sk->owner_env;
 
 	if (sk->sk_state & PPPOX_BOUND) {
 		ppp_input(&po->chan, skb);
 	} else if (sk->sk_state & PPPOX_RELAY) {
-		relay_po = get_item_by_addr(&po->pppoe_relay);
+		relay_po = get_item_by_addr(ve, &po->pppoe_relay);
 
 		if (relay_po == NULL)
 			goto abort_kfree;
@@ -390,7 +418,7 @@ static int pppoe_rcv(struct sk_buff *skb
 	if (skb->len < len)
 		goto drop;
 
-	po = get_item((unsigned long) ph->sid, eth_hdr(skb)->h_source);
+	po = get_item(dev->owner_env, (unsigned long) ph->sid, eth_hdr(skb)->h_source);
 	if (!po)
 		goto drop;
 
@@ -430,7 +458,7 @@ static int pppoe_disc_rcv(struct sk_buff
 	if (ph->code != PADT_CODE)
 		goto abort;
 
-	po = get_item((unsigned long) ph->sid, eth_hdr(skb)->h_source);
+	po = get_item(dev->owner_env, (unsigned long) ph->sid, eth_hdr(skb)->h_source);
 	if (po) {
 		struct sock *sk = sk_pppox(po);
 
@@ -484,6 +512,9 @@ static int pppoe_create(struct socket *s
 	int error = -ENOMEM;
 	struct sock *sk;
 
+	if (!ve_is_pppoe_capable(get_exec_env()))
+		return -EACCES;
+
 	sk = sk_alloc(PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1);
 	if (!sk)
 		goto out;
@@ -522,13 +553,15 @@ static int pppoe_release(struct socket *
 
 	po = pppox_sk(sk);
 	if (po->pppoe_pa.sid) {
-		delete_item(po->pppoe_pa.sid, po->pppoe_pa.remote);
+		delete_item(sk->owner_env, po->pppoe_pa.sid, po->pppoe_pa.remote);
 	}
 
+	spin_lock(&flush_lock);
 	if (po->pppoe_dev)
 		dev_put(po->pppoe_dev);
 
 	po->pppoe_dev = NULL;
+	spin_unlock(&flush_lock);
 
 	sock_orphan(sk);
 	sock->sk = NULL;
@@ -570,7 +603,7 @@ static int pppoe_connect(struct socket *
 		pppox_unbind_sock(sk);
 
 		/* Delete the old binding */
-		delete_item(po->pppoe_pa.sid,po->pppoe_pa.remote);
+		delete_item(sk->owner_env, po->pppoe_pa.sid,po->pppoe_pa.remote);
 
 		if(po->pppoe_dev)
 			dev_put(po->pppoe_dev);
@@ -598,7 +631,7 @@ static int pppoe_connect(struct socket *
 		       &sp->sa_addr.pppoe,
 		       sizeof(struct pppoe_addr));
 
-		error = set_item(po);
+		error = set_item(dev->owner_env, po);
 		if (error < 0)
 			goto err_put;
 
@@ -608,7 +641,7 @@ static int pppoe_connect(struct socket *
 		po->chan.private = sk;
 		po->chan.ops = &pppoe_chan_ops;
 
-		error = ppp_register_channel(&po->chan);
+		error = ppp_register_ve_channel(dev->owner_env, &po->chan);
 		if (error)
 			goto err_put;
 
@@ -723,7 +756,7 @@ static int pppoe_ioctl(struct socket *so
 
 		/* Check that the socket referenced by the address
 		   actually exists. */
-		relay_po = get_item_by_addr(&po->pppoe_relay);
+		relay_po = get_item_by_addr(sk->owner_env, &po->pppoe_relay);
 
 		if (!relay_po)
 			break;
@@ -973,13 +1006,13 @@ out:
 	return 0;
 }
 
-static __inline__ struct pppox_sock *pppoe_get_idx(loff_t pos)
+static inline struct pppox_sock *pppoe_get_idx(struct ve_pppoe *vp, loff_t pos)
 {
 	struct pppox_sock *po = NULL;
 	int i = 0;
 
 	for (; i < PPPOE_HASH_SIZE; i++) {
-		po = item_hash_table[i];
+		po = vp->hash_table[i];
 		while (po) {
 			if (!pos--)
 				goto out;
@@ -995,16 +1028,17 @@ static void *pppoe_seq_start(struct seq_
 	loff_t l = *pos;
 
 	read_lock_bh(&pppoe_hash_lock);
-	return l ? pppoe_get_idx(--l) : SEQ_START_TOKEN;
+	return l ? pppoe_get_idx(seq->private, --l) : SEQ_START_TOKEN;
 }
 
 static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct ve_pppoe *vp = seq->private;
 	struct pppox_sock *po;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN) {
-		po = pppoe_get_idx(0);
+		po = pppoe_get_idx(vp, 0);
 		goto out;
 	}
 	po = v;
@@ -1014,7 +1048,7 @@ static void *pppoe_seq_next(struct seq_f
 		int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote);
 
 		while (++hash < PPPOE_HASH_SIZE) {
-			po = item_hash_table[hash];
+			po = vp->hash_table[hash];
 			if (po)
 				break;
 		}
@@ -1037,7 +1071,20 @@ static struct seq_operations pppoe_seq_o
 
 static int pppoe_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &pppoe_seq_ops);
+	struct seq_file *m;
+	struct ve_struct *ve = get_exec_env();
+	int err;
+
+	if (!ve_is_pppoe_capable(ve))
+		return -EPERM;
+
+	err = seq_open(file, &pppoe_seq_ops);
+	if (err)
+		return err;
+	m = file->private_data;
+	m->private = ve->ve_pppoe;
+
+	return err;
 }
 
 static struct file_operations pppoe_seq_fops = {
@@ -1047,20 +1094,6 @@ static struct file_operations pppoe_seq_
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
-
-static int __init pppoe_proc_init(void)
-{
-	struct proc_dir_entry *p;
-
-	p = create_proc_entry("net/pppoe", S_IRUGO, NULL);
-	if (!p)
-		return -ENOMEM;
-
-	p->proc_fops = &pppoe_seq_fops;
-	return 0;
-}
-#else /* CONFIG_PROC_FS */
-static inline int pppoe_proc_init(void) { return 0; }
 #endif /* CONFIG_PROC_FS */
 
 static const struct proto_ops pppoe_ops = {
@@ -1089,6 +1122,46 @@ static struct pppox_proto pppoe_proto = 
     .owner	= THIS_MODULE,
 };
 
+static int ve_pppoe_init(void *x)
+{
+	struct ve_struct *ve = x;
+	struct ve_pppoe *vp;
+
+	if (!(ve->features & VE_FEATURE_PPP))
+		return 0;
+
+	vp = kzalloc(sizeof(*vp), GFP_KERNEL);
+	if (!vp)
+		return -ENOMEM;
+
+	ve->ve_pppoe = vp;
+
+	if (!ve_is_super(ve))
+		__module_get(THIS_MODULE);
+
+	return 0;
+}
+
+static void ve_pppoe_fini(void *x)
+{
+	struct ve_struct *ve = x;
+
+	if (!ve_is_pppoe_capable(ve))
+		return;
+
+	kfree(ve->ve_pppoe);
+	ve->ve_pppoe = NULL;
+
+	if (!ve_is_super(ve))
+		module_put(THIS_MODULE);
+}
+
+static struct ve_hook pppoe_hook = {
+	.owner	= THIS_MODULE,
+	.priority = HOOK_PRIO_NET,
+	.init	= ve_pppoe_init,
+	.fini	= ve_pppoe_fini,
+};
 
 static int __init pppoe_init(void)
 {
@@ -1101,13 +1174,22 @@ static int __init pppoe_init(void)
 	if (err)
 		goto out_unregister_pppoe_proto;
 
-	err = pppoe_proc_init();
-	if (err)
+#ifdef CONFIG_PROC_FS
+	err = -ENOMEM;
+	if (!proc_glob_fops_create("net/pppoe", S_IRUGO, &pppoe_seq_fops)) {
+		printk(KERN_ERR "pppoe: cant create proc entry\n");
 		goto out_unregister_pppox_proto;
+	}
+#endif
 	
 	dev_add_pack(&pppoes_ptype);
 	dev_add_pack(&pppoed_ptype);
 	register_netdevice_notifier(&pppoe_notifier);
+
+	ve_pppoe_init(get_ve0());
+
+	ve_hook_register(VE_SS_CHAIN, &pppoe_hook);
+	err = 0;
 out:
 	return err;
 out_unregister_pppox_proto:
@@ -1119,12 +1201,16 @@ out_unregister_pppoe_proto:
 
 static void __exit pppoe_exit(void)
 {
+	ve_hook_unregister(&pppoe_hook);
+
 	unregister_pppox_proto(PX_PROTO_OE);
 	dev_remove_pack(&pppoes_ptype);
 	dev_remove_pack(&pppoed_ptype);
 	unregister_netdevice_notifier(&pppoe_notifier);
-	remove_proc_entry("net/pppoe", NULL);
+	remove_proc_glob_entry("net/pppoe", NULL);
 	proto_unregister(&pppoe_sk_proto);
+
+	ve_pppoe_fini(get_ve0());
 }
 
 module_init(pppoe_init);
diff -upr kernel-2.6.18-417.el5.orig/drivers/net/tun.c kernel-2.6.18-417.el5-028stab121/drivers/net/tun.c
--- kernel-2.6.18-417.el5.orig/drivers/net/tun.c	2017-01-13 07:39:13.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/net/tun.c	2017-01-13 08:40:41.000000000 -0500
@@ -63,6 +63,11 @@
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <ub/beancounter.h>
+
+#include <linux/cpt_image.h>
+#include <linux/cpt_exports.h>
+#include <linux/file.h>
 
 /* Uncomment to enable debugging */
 /* #define TUN_DEBUG 1 */
@@ -97,6 +102,8 @@ struct tun_sock {
 	struct tun_struct	*tun;
 };
 
+static void tun_setup(struct net_device *dev);
+
 static inline struct tun_sock *tun_sk(struct sock *sk)
 {
 	return container_of(sk, struct tun_sock, sk);
@@ -223,6 +230,9 @@ static int tun_net_close(struct net_devi
 static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
+#if 0
+	struct user_beancounter *ub;
+#endif
 
 	DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
 
@@ -251,6 +261,24 @@ static int tun_net_xmit(struct sk_buff *
         * for indefinite time. */
 	skb_orphan(skb);
 
+	/*
+	 * XXX this code is broken:
+	 * See comment in dev_queue_xmit
+	 */
+#if 0
+	ub = netdev_bc(dev)->exec_ub;
+	if (ub && (skb_bc(skb)->charged == 0)) {
+		unsigned long charge;
+		charge = skb_charge_fullsize(skb);
+		if (charge_beancounter(ub, UB_OTHERSOCKBUF, charge, 1))
+			goto drop;
+		get_beancounter(ub);
+		skb_bc(skb)->ub = ub;
+		skb_bc(skb)->charged = charge;
+		skb_bc(skb)->resource = UB_OTHERSOCKBUF;
+	}
+#endif
+
 	/* Queue packet */
 	skb_queue_tail(&tun->readq, skb);
 	dev->trans_start = jiffies;
@@ -399,12 +427,8 @@ static inline struct sk_buff *tun_alloc_
 	struct sk_buff *skb;
 	int err;
 
-	/* Under a page?  Don't bother with paged skb. */
-	if (prepad + len < PAGE_SIZE || !linear)
-		linear = len;
-
-	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
-				   &err);
+	linear = len;
+	skb = sock_alloc_send_skb(sk, prepad + linear, noblock, &err);
 	if (!skb)
 		return ERR_PTR(err);
 
@@ -705,12 +729,14 @@ static ssize_t tun_chr_readv(struct file
 					tun->dev->name, addr[0], addr[1], addr[2],
 					addr[3], addr[4], addr[5]);
 			ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
+			/* skb will be uncharged in kfree_skb() */
 			kfree_skb(skb);
 			break;
 		} else {
 			DBG(KERN_DEBUG "%s: tun_chr_readv: rejected: %x:%x:%x:%x:%x:%x\n",
 					tun->dev->name, addr[0], addr[1], addr[2],
 					addr[3], addr[4], addr[5]);
+			/* skb will be uncharged in kfree_skb() */
 			kfree_skb(skb);
 			continue;
 		}
@@ -732,6 +758,150 @@ static ssize_t tun_chr_read(struct file 
 	return tun_chr_readv(file, &iv, 1, pos);
 }
 
+static int tun_sk_alloc_init(struct tun_struct *tun, struct net_device *dev);
+static void tun_sock_destruct(struct sock *sk);
+
+static void cpt_dump_tuntap(struct net_device *dev,
+		struct cpt_ops *ops, struct cpt_context *ctx)
+{
+	struct cpt_tuntap_image v;
+	struct tun_struct *tun;
+
+	tun = netdev_priv(dev);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_TUNTAP;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_VOID;
+
+	v.cpt_owner = tun->owner;
+	v.cpt_flags = tun->flags;
+	v.cpt_attached = tun->attached;
+	v.cpt_bindfile = 0;
+
+	if (tun->bind_file) {
+		v.cpt_bindfile = ops->lookup_object(CPT_OBJ_FILE,
+				tun->bind_file, ctx);
+	}
+
+	v.cpt_if_flags = tun->if_flags;
+	BUILD_BUG_ON(sizeof(v.cpt_dev_addr) != sizeof(tun->dev_addr));
+	memcpy(v.cpt_dev_addr, tun->dev_addr, sizeof(v.cpt_dev_addr));
+	BUILD_BUG_ON(sizeof(v.cpt_chr_filter) != sizeof(tun->chr_filter));
+	memcpy(v.cpt_chr_filter, tun->chr_filter, sizeof(v.cpt_chr_filter));
+	BUILD_BUG_ON(sizeof(v.cpt_net_filter) != sizeof(tun->net_filter));
+	memcpy(v.cpt_net_filter, tun->net_filter, sizeof(v.cpt_net_filter));
+	ops->write(&v, sizeof(v), ctx);
+}
+
+static int tun_chr_open(struct inode *inode, struct file * file);
+
+/* We do not restore skb queue, just reinit it */
+static int rst_restore_tuntap(loff_t start, struct cpt_netdev_image *di,
+			struct cpt_ops *ops, struct cpt_context *ctx)
+{
+	int err = -ENODEV;
+	struct cpt_tuntap_image ti;
+	struct net_device *dev;
+	struct file *bind_file = NULL;
+	struct tun_struct *tun;
+	loff_t pos;
+
+	pos = start + di->cpt_hdrlen;
+	err = ops->get_object(CPT_OBJ_NET_TUNTAP,
+			pos, &ti, sizeof(ti), ctx);
+	if (err)
+		goto out;
+
+	pos += ti.cpt_next;
+	if (ti.cpt_bindfile) {
+		bind_file = ops->rst_file(ti.cpt_bindfile, -1, ctx);
+		if (IS_ERR(bind_file))
+			return PTR_ERR(bind_file);
+
+		if (bind_file->private_data == NULL) {
+			err = tun_chr_open(NULL, bind_file);
+			if (err)
+				goto out_put_bind_file;
+		}
+	}
+
+	err = -ENOMEM;
+	dev = alloc_netdev(sizeof(struct tun_struct), di->cpt_name, tun_setup);
+	if (!dev)
+		goto out_put_bind_file;
+
+	tun = netdev_priv(dev);
+
+	tun->dev = dev;
+	tun->owner = ti.cpt_owner;
+	tun->flags = ti.cpt_flags;
+	tun->attached = ti.cpt_attached;
+	tun->if_flags = ti.cpt_if_flags;
+	tun_net_init(dev);
+	BUILD_BUG_ON(sizeof(ti.cpt_dev_addr) != sizeof(tun->dev_addr));
+	memcpy(tun->dev_addr, ti.cpt_dev_addr, sizeof(ti.cpt_dev_addr));
+	BUILD_BUG_ON(sizeof(ti.cpt_chr_filter) != sizeof(tun->chr_filter));
+	memcpy(tun->chr_filter, ti.cpt_chr_filter, sizeof(ti.cpt_chr_filter));
+	BUILD_BUG_ON(sizeof(ti.cpt_net_filter) != sizeof(tun->net_filter));
+	memcpy(tun->net_filter, ti.cpt_net_filter, sizeof(ti.cpt_net_filter));
+
+	err = tun_sk_alloc_init(tun, dev);
+	if (err)
+		goto err_free_dev;
+
+	err = register_netdevice(dev);
+	if (err < 0) {
+		goto err_free_sk;
+	}
+	if (pos < start + di->cpt_next) {
+		struct cpt_hwaddr_image hw;
+		/* Restore hardware address */
+		err = ops->get_object(CPT_OBJ_NET_HWADDR, pos,
+				&hw, sizeof(hw), ctx);
+		if (err)
+			goto err_free_sk;
+		BUILD_BUG_ON(sizeof(hw.cpt_dev_addr) != sizeof(dev->dev_addr));
+		memcpy(dev->dev_addr, hw.cpt_dev_addr,
+				sizeof(hw.cpt_dev_addr));
+	}
+
+	tun->socket.sk->sk_destruct = tun_sock_destruct;
+
+	if (bind_file) {
+		err = tun_attach(tun, bind_file);
+		if (err < 0)
+			goto err_free_sk;
+
+		tun->bind_file = bind_file;
+		fput(bind_file);
+	}
+	return 0;
+
+err_free_sk:
+	sock_put(tun->socket.sk);
+err_free_dev:
+	free_netdev(dev);
+out_put_bind_file:
+	if (bind_file)
+		fput(bind_file);
+out:
+	return err;
+}
+
+static struct net_device_stats *cpt_tun_stats_ptr(struct net_device *dev)
+{
+	return  &((struct tun_struct *)netdev_priv(dev))->stats;
+}
+
+static struct dev_cpt_ops tun_cpt_ops = {
+	.cpt_object = CPT_OBJ_NET_TUNTAP,
+	.name = "tuntap",
+	.dump = cpt_dump_tuntap,
+	.restore = rst_restore_tuntap,
+	.stats = cpt_tun_stats_ptr,
+};
+
 static void tun_setup(struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
@@ -750,6 +920,9 @@ static void tun_setup(struct net_device 
 	dev->get_stats = tun_net_stats;
 	dev->ethtool_ops = &tun_ethtool_ops;
 	dev->destructor = tun_free_netdev;
+	dev->features |= NETIF_F_VIRTUAL;
+
+	dev->cpt_ops = &tun_cpt_ops;
 }
 
 static void tun_sock_write_space(struct sock *sk)
@@ -780,9 +953,25 @@ static struct proto tun_proto = {
 	.obj_size	= sizeof(struct tun_sock),
 };
 
-static int tun_set_iff(struct file *file, struct ifreq *ifr)
+static int tun_sk_alloc_init(struct tun_struct *tun, struct net_device *dev)
 {
 	struct sock *sk;
+
+	sk = sk_alloc(AF_UNSPEC, GFP_KERNEL, &tun_proto, 1);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(&tun->socket, sk);
+	sk->sk_write_space = tun_sock_write_space;
+	sk->sk_sndbuf = INT_MAX;
+	sk->sk_sleep = &tun->read_wait;
+
+	tun_sk(sk)->tun = tun;
+	return 0;
+}
+
+static int tun_set_iff(struct file *file, struct ifreq *ifr)
+{
 	struct tun_struct *tun;
 	struct net_device *dev;
 	int err;
@@ -798,7 +987,7 @@ static int tun_set_iff(struct file *file
 
 		if (((tun->owner != -1 && current->euid != tun->owner) ||
 		     (tun->group != -1 && !in_egroup_p(tun->group))) &&
-		    !capable(CAP_NET_ADMIN))
+		    !capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			return -EPERM;
 		err = tun_attach(tun, file);
 		if (err < 0)
@@ -808,7 +997,7 @@ static int tun_set_iff(struct file *file
 		char *name;
 		unsigned long flags = 0;
 
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			return -EPERM;
 
 		/* Set dev type */
@@ -841,18 +1030,10 @@ static int tun_set_iff(struct file *file
 		get_random_bytes(tun->dev_addr + sizeof(u16), 4);
 		memset(tun->chr_filter, 0, sizeof tun->chr_filter);
 
-		err = -ENOMEM;
-		sk = sk_alloc(AF_UNSPEC, GFP_KERNEL, &tun_proto, 1);
-		if (!sk)
+		err = tun_sk_alloc_init(tun, dev);
+		if (err)
 			goto err_free_dev;
 
-		sock_init_data(&tun->socket, sk);
-		sk->sk_write_space = tun_sock_write_space;
-		sk->sk_sndbuf = INT_MAX;
-		sk->sk_sleep = &tun->read_wait;
-
-		tun_sk(sk)->tun = tun;
-
 		tun_net_init(dev);
 
 		if (strchr(dev->name, '%')) {
@@ -865,7 +1046,7 @@ static int tun_set_iff(struct file *file
 		if (err < 0)
 			goto err_free_sk;
 
-		sk->sk_destruct = tun_sock_destruct;
+		tun->socket.sk->sk_destruct = tun_sock_destruct;
 
 		err = tun_attach(tun, file);
 		if (err < 0)
@@ -885,11 +1066,13 @@ static int tun_set_iff(struct file *file
 	else
 		tun->flags &= ~TUN_VNET_HDR;
 
+	tun->bind_file = file;
+
 	strcpy(ifr->ifr_name, tun->dev->name);
 	return 0;
 
  err_free_sk:
-	sock_put(sk);
+	sock_put(tun->socket.sk);
  err_free_dev:
 	free_netdev(dev);
  failed:
@@ -1031,6 +1214,12 @@ static int tun_chr_ioctl(struct inode *i
 		break;
 
 	case TUNSETPERSIST:
+		/* prohibit persist mode iniside VE */
+		if (!ve_is_super(get_exec_env())) {
+			ret = -EPERM;
+			goto unlock;
+		}
+
 		/* Disable/Enable persist mode */
 		if (arg)
 			tun->flags |= TUN_PERSIST;
@@ -1343,6 +1532,9 @@ static int __init tun_init(void)
 		printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR);
 		goto err_misc;
 	}
+	else
+		register_dev_cpt_ops(&tun_cpt_ops);
+
 	return  0;
 err_misc:
 	return ret;
@@ -1350,6 +1542,7 @@ err_misc:
 
 static void tun_cleanup(void)
 {
+	unregister_dev_cpt_ops(&tun_cpt_ops);
 	misc_deregister(&tun_miscdev);
 }
 
diff -upr kernel-2.6.18-417.el5.orig/drivers/net/venet_core.c kernel-2.6.18-417.el5-028stab121/drivers/net/venet_core.c
--- kernel-2.6.18-417.el5.orig/drivers/net/venet_core.c	2017-01-13 08:40:21.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/net/venet_core.c	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,1032 @@
+/*
+ *  venet_core.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Common part for Virtuozzo virtual network devices
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/addrconf.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/if_ether.h>	/* For the statistics structure. */
+#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
+#include <linux/ethtool.h>
+#include <linux/venet.h>
+#include <linux/ve_proto.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_venet.h>
+#include <linux/kthread.h>
+
+static struct task_struct *venet_rx_tsk;
+static struct sk_buff_head venet_pkt_queue;
+static int venet_napi = 0;
+module_param(venet_napi, int, 0640);
+
+static int venet_netif_rx(struct sk_buff *skb);
+
+struct list_head ip_entry_hash_table[VEIP_HASH_SZ];
+rwlock_t veip_hash_lock = RW_LOCK_UNLOCKED;
+LIST_HEAD(veip_lh);
+
+#define ip_entry_hash_function(ip)  (ntohl(ip) & (VEIP_HASH_SZ - 1))
+
+void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip)
+{
+	list_add(&entry->ip_hash,
+			ip_entry_hash_table +
+			ip_entry_hash_function(entry->addr.key[3]));
+	list_add(&entry->ve_list, &veip->ip_lh);
+}
+
+void veip_put(struct veip_struct *veip)
+{
+	if (!list_empty(&veip->ip_lh))
+		return;
+	if (!list_empty(&veip->src_lh))
+		return;
+	if (!list_empty(&veip->dst_lh))
+		return;
+
+	list_del(&veip->list);
+	kfree(veip);
+}
+
+struct ip_entry_struct *venet_entry_lookup(struct ve_addr_struct *addr)
+{
+	struct ip_entry_struct *entry;
+
+	list_for_each_entry (entry, ip_entry_hash_table +
+			ip_entry_hash_function(addr->key[3]), ip_hash)
+		if (memcmp(&entry->addr, addr, sizeof(*addr)) == 0)
+			return entry;
+	return NULL;
+}
+
+struct ext_entry_struct *venet_ext_lookup(struct ve_struct *ve,
+		struct ve_addr_struct *addr)
+{
+	struct ext_entry_struct *entry;
+
+	if (ve->veip == NULL)
+		return NULL;
+
+	list_for_each_entry (entry, &ve->veip->ext_lh, list)
+		if (memcmp(&entry->addr, addr, sizeof(*addr)) == 0)
+			return entry;
+	return NULL;
+}
+
+int venet_ext_add(struct ve_struct *ve, struct ve_addr_struct *addr)
+{
+	struct ext_entry_struct *entry, *found;
+	int err;
+
+	if (ve->veip == NULL)
+		return -ENONET;
+
+	entry = kzalloc(sizeof(struct ext_entry_struct), GFP_KERNEL);
+	if (entry == NULL)
+		return -ENOMEM;
+
+	write_lock_irq(&veip_hash_lock);
+	err = -EADDRINUSE;
+	found = venet_ext_lookup(ve, addr);
+	if (found != NULL)
+		goto out_unlock;
+
+	entry->addr = *addr;
+	list_add(&entry->list, &ve->veip->ext_lh);
+	err = 0;
+	entry = NULL;
+out_unlock:
+	write_unlock_irq(&veip_hash_lock);
+	if (entry != NULL)
+		kfree(entry);
+	return err;
+}
+
+int venet_ext_del(struct ve_struct *ve, struct ve_addr_struct *addr)
+{
+	struct ext_entry_struct *found;
+	int err;
+
+	if (ve->veip == NULL)
+		return -ENONET;
+
+	err = -EADDRNOTAVAIL;
+	write_lock_irq(&veip_hash_lock);
+	found = venet_ext_lookup(ve, addr);
+	if (found == NULL)
+		goto out;
+
+	list_del(&found->list);
+	kfree(found);
+	err = 0;
+out:
+	write_unlock_irq(&veip_hash_lock);
+	return err;
+}
+
+void venet_ext_clean(struct ve_struct *ve)
+{
+	struct ext_entry_struct *entry, *tmp;
+
+	if (ve->veip == NULL)
+		return;
+
+	write_lock_irq(&veip_hash_lock);
+	list_for_each_entry_safe (entry, tmp, &ve->veip->ext_lh, list) {
+		list_del(&entry->list);
+		kfree(entry);
+	}
+	write_unlock_irq(&veip_hash_lock);
+}
+
+struct veip_struct *veip_find(envid_t veid)
+{
+	struct veip_struct *ptr;
+
+	list_for_each_entry(ptr, &veip_lh, list) {
+		if (ptr->veid != veid)
+			continue;
+		return ptr;
+	}
+	return NULL;
+}
+
+struct veip_struct *veip_findcreate(envid_t veid)
+{
+	struct veip_struct *ptr;
+
+	ptr = veip_find(veid);
+	if (ptr != NULL)
+		return ptr;
+
+	ptr = kmalloc(sizeof(struct veip_struct), GFP_ATOMIC);
+	if (ptr == NULL)
+		return NULL;
+	memset(ptr, 0, sizeof(struct veip_struct));
+	INIT_LIST_HEAD(&ptr->ip_lh);
+	INIT_LIST_HEAD(&ptr->src_lh);
+	INIT_LIST_HEAD(&ptr->dst_lh);
+	INIT_LIST_HEAD(&ptr->ext_lh);
+	ptr->veid = veid;
+	list_add(&ptr->list, &veip_lh);
+	return ptr;
+}
+
+static int convert_sockaddr(struct sockaddr *addr, int addrlen,
+		struct ve_addr_struct *veaddr)
+{
+	int err;
+
+	switch (addr->sa_family) {
+	case AF_INET: {
+		struct sockaddr_in *sin;
+
+		err = -EINVAL;
+		if (addrlen != sizeof(struct sockaddr_in))
+			break;
+
+		err = 0;
+		sin = (struct sockaddr_in *)addr;
+		veaddr->family = AF_INET;
+		veaddr->key[0] = 0;
+		veaddr->key[1] = 0;
+		veaddr->key[2] = 0;
+		veaddr->key[3] = sin->sin_addr.s_addr;
+		break;
+	}
+	case AF_INET6: {
+		struct sockaddr_in6 *sin;
+
+		err = -EINVAL;
+		if (addrlen != sizeof(struct sockaddr_in6))
+			break;
+
+		err = 0;
+		sin = (struct sockaddr_in6 *)addr;
+		veaddr->family = AF_INET6;
+		memcpy(veaddr->key, &sin->sin6_addr, sizeof(veaddr->key));
+		break;
+	}
+	default:
+		err = -EAFNOSUPPORT;
+	}
+	return err;
+}
+
+int sockaddr_to_veaddr(struct sockaddr __user *uaddr, int addrlen,
+		struct ve_addr_struct *veaddr)
+{
+	int err;
+	char addr[MAX_SOCK_ADDR];
+
+	err = move_addr_to_kernel(uaddr, addrlen, &addr);
+	if (err < 0)
+		goto out;
+
+	err = convert_sockaddr((struct sockaddr *)&addr, addrlen, veaddr);
+out:
+	return err;
+}
+
+void veaddr_print(char *str, int len, struct ve_addr_struct *a)
+{
+	if (a->family == AF_INET)
+		snprintf(str, len, "%u.%u.%u.%u", NIPQUAD(a->key[3]));
+	else
+		snprintf(str, len, "%x:%x:%x:%x:%x:%x:%x:%x",
+				ntohl(a->key[0])>>16, ntohl(a->key[0])&0xFFFF,
+				ntohl(a->key[1])>>16, ntohl(a->key[1])&0xFFFF,
+				ntohl(a->key[2])>>16, ntohl(a->key[2])&0xFFFF,
+				ntohl(a->key[3])>>16, ntohl(a->key[3])&0xFFFF
+			);
+}
+
+/*
+ * Device functions
+ */
+
+static int venet_open(struct net_device *dev)
+{
+	if (!ve_is_super(get_exec_env()) && !try_module_get(THIS_MODULE))
+		return -EBUSY;
+	return 0;
+}
+
+static int venet_close(struct net_device *master)
+{
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
+	return 0;
+}
+
+static void venet_destructor(struct net_device *dev)
+{
+	struct venet_stats *stats = (struct venet_stats *)dev->priv;
+	if (stats == NULL)
+		return;
+	free_percpu(stats->real_stats);
+	kfree(stats);
+	dev->priv = NULL;
+}
+
+/*
+ * The higher levels take care of making this non-reentrant (it's
+ * called with bh's disabled).
+ */
+static int venet_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats;
+	struct net_device *rcv = NULL;
+	int length;
+
+	stats = venet_stats(dev, smp_processor_id());
+	if (unlikely(get_exec_env()->disable_net))
+		goto outf;
+
+	if (skb->protocol == __constant_htons(ETH_P_IP)) {
+		struct iphdr *iph;
+		iph = skb->nh.iph;
+		if (MULTICAST(iph->daddr))
+			goto outf;
+	} else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ip6h;
+		ip6h = skb->nh.ipv6h;
+		if (ipv6_addr_is_multicast(&ip6h->daddr))
+			goto outf;
+		skb_orphan(skb);
+	} else {
+		goto outf;
+	}
+
+	if (venet_change_skb_owner(skb) < 0)
+		goto outf;
+
+	if (unlikely(skb->owner_env->disable_net))
+		goto outf;
+
+	rcv = skb->owner_env->_venet_dev;
+	if (!rcv)
+		/* VE going down */
+		goto outf;
+
+	dev_hold(rcv);
+
+	if (!(rcv->flags & IFF_UP)) {
+		/* Target VE does not want to receive packets */
+		dev_put(rcv);
+		goto outf;
+	}
+
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = rcv;
+
+	/*
+	 * If there is not enough space for header we allocate one.
+	 * Remember the traffic can reach VE from outside world and
+	 * as result we have to cleanup mac address of such packet.
+	 * The same applies to traffic which comes from inside of VE
+	 * but if TUN is used and traffic get fragmented we might reach
+	 * the point where is no L2 header at all and hard_header_len
+	 * is simply ingnored (because this parameter is kind of a hint
+	 * for upper net layers and never a guarantee that header will be
+	 * provided). To unify the way how packets are seen after venet
+	 * we always produce L2 header with zero'ified MAC.
+	 */
+	if (unlikely(skb_headroom(skb) < dev->hard_header_len)) {
+		struct sk_buff *skb2;
+
+		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
+		if (!skb2) {
+			dev_put(rcv);
+			goto outf;
+		}
+		if (skb->sk)
+			skb_set_owner_w(skb2, skb->sk);
+		kfree_skb(skb);
+		skb = skb2;
+	}
+
+	skb_reset_mac_header(skb);
+	memset(skb->data - dev->hard_header_len, 0, dev->hard_header_len);
+
+	dst_release(skb->dst);
+	skb->dst = NULL;
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = NULL;
+#endif
+	length = skb->len;
+
+	if (skb->ip_summed == CHECKSUM_HW) {
+		/* Generated locally */
+		skb->proto_csum_blank = 1;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	} else if (skb->proto_data_valid) {
+		/* Remote but checksummed */
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	if (rcv && venet_napi)
+		venet_netif_rx(skb);
+	else
+		netif_rx(skb);
+
+	stats->tx_bytes += length;
+	stats->tx_packets++;
+	if (rcv) {
+		struct net_device_stats *rcv_stats;
+
+		rcv_stats = venet_stats(rcv, smp_processor_id());
+		rcv_stats->rx_bytes += length;
+		rcv_stats->rx_packets++;
+		dev_put(rcv);
+	}
+
+	return 0;
+
+outf:
+	kfree_skb(skb);
+	++stats->tx_dropped;
+	return 0;
+}
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+	int i;
+	struct venet_stats *stats;
+
+	stats = (struct venet_stats *)dev->priv;
+	memset(&stats->stats, 0, sizeof(struct net_device_stats));
+	for_each_possible_cpu(i) {
+		struct net_device_stats *dev_stats = venet_stats(dev, i);
+
+		stats->stats.rx_bytes   += dev_stats->rx_bytes;
+		stats->stats.tx_bytes   += dev_stats->tx_bytes;
+		stats->stats.rx_packets += dev_stats->rx_packets;
+		stats->stats.tx_packets += dev_stats->tx_packets;
+		stats->stats.tx_dropped += dev_stats->tx_dropped;
+	}
+
+	return &stats->stats;
+}
+
+/* Initialize the rest of the LOOPBACK device. */
+int venet_init_dev(struct net_device *dev)
+{
+	struct venet_stats *stats;
+
+	dev->hard_start_xmit = venet_xmit;
+	stats = kzalloc(sizeof(struct venet_stats), GFP_KERNEL);
+	if (stats == NULL)
+		goto fail;
+	stats->real_stats = alloc_percpu(struct net_device_stats);
+	if (stats->real_stats == NULL)
+		goto fail_free;
+	dev->priv = stats;
+
+	dev->get_stats = get_stats;
+	dev->open = venet_open;
+	dev->stop = venet_close;
+	dev->destructor = venet_destructor;
+
+	/*
+	 *	Fill in the generic fields of the device structure.
+	 */
+	dev->type		= ARPHRD_VOID;
+	dev->hard_header_len 	= ETH_HLEN;
+	dev->mtu		= 1500; /* eth_mtu */
+	dev->tx_queue_len	= 0;
+
+	memset(dev->broadcast, 0xFF, ETH_ALEN);
+
+	/* New-style flags. */
+	dev->flags		= IFF_BROADCAST|IFF_NOARP|IFF_POINTOPOINT;
+	return 0;
+
+fail_free:
+	kfree(stats);
+fail:
+	return -ENOMEM;
+}
+
+static int
+venet_set_op(struct net_device *dev, u32 data,
+	     int (*fop)(struct net_device *, u32))
+{
+
+	struct ve_struct *ve;
+	int ret = 0;
+
+	read_lock(&ve_list_lock);
+	for_each_ve(ve) {
+		struct ve_struct *ve_old;
+
+		ve_old = set_exec_env(ve);
+		read_lock(&dev_base_lock);
+		for (dev = dev_base; dev != NULL; dev = dev->next) {
+			if (dev->hard_start_xmit == venet_xmit)
+				ret = fop(dev, data);
+		}
+		read_unlock(&dev_base_lock);
+		set_exec_env(ve_old);
+
+		if (ret < 0)
+			break;
+	}
+	read_unlock(&ve_list_lock);
+	return ret;
+}
+
+static unsigned long common_features;
+
+static int
+venet_op_set_sg(struct net_device *dev, u32 data)
+{
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
+	if (data)
+		common_features |= NETIF_F_SG;
+	else
+		common_features &= ~NETIF_F_SG;
+
+	return venet_set_op(dev, data, ethtool_op_set_sg);
+}
+
+static int
+venet_op_set_tx_csum(struct net_device *dev, u32 data)
+{
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
+	if (data)
+		common_features |= NETIF_F_IP_CSUM;
+	else
+		common_features &= ~NETIF_F_IP_CSUM;
+
+	return venet_set_op(dev, data, ethtool_op_set_tx_csum);
+}
+
+static int
+venet_op_set_tso(struct net_device *dev, u32 data)
+{
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
+	if (data)
+		common_features |= NETIF_F_TSO;
+	else
+		common_features &= ~NETIF_F_TSO;
+
+	return venet_set_op(dev, data, ethtool_op_set_tso);
+}
+
+#define venet_op_set_rx_csum venet_op_set_tx_csum
+
+static struct ethtool_ops venet_ethtool_ops = {
+	.get_sg = ethtool_op_get_sg,
+	.set_sg = venet_op_set_sg,
+	.get_tx_csum = ethtool_op_get_tx_csum,
+	.set_tx_csum = venet_op_set_tx_csum,
+	.get_rx_csum = ethtool_op_get_tx_csum,
+	.set_rx_csum = venet_op_set_rx_csum,
+	.get_tso = ethtool_op_get_tso,
+	.set_tso = venet_op_set_tso,
+};
+
+static void cpt_dump_venet(struct net_device *dev,
+		struct cpt_ops *ops, struct cpt_context *ctx)
+{
+}
+
+static struct net_device_stats *cpt_venet_stats_ptr(struct net_device *dev)
+{
+	return venet_stats(dev, smp_processor_id());
+}
+
+static struct dev_cpt_ops venet_cpt_ops = {
+	.dump = cpt_dump_venet,
+	.stats = cpt_venet_stats_ptr,
+};
+
+static void venet_setup(struct net_device *dev)
+{
+	dev->init = venet_init_dev;
+
+	/*
+	 * No other features, as they are:
+	 *  - checksumming is required, and nobody else will done our job
+	 */
+	dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL | NETIF_F_LLTX |
+	       NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED;
+
+	dev->features |= common_features;
+
+	SET_MODULE_OWNER(dev);
+	SET_ETHTOOL_OPS(dev, &venet_ethtool_ops);
+
+	dev->cpt_ops = &venet_cpt_ops;
+}
+
+#ifdef CONFIG_PROC_FS
+static void veaddr_seq_print(struct seq_file *m, struct ve_struct *ve)
+{
+	struct ip_entry_struct *entry;
+
+	read_lock(&veip_hash_lock);
+	if (ve->veip == NULL)
+		goto unlock;
+	list_for_each_entry (entry, &ve->veip->ip_lh, ve_list) {
+		char addr[40];
+
+		if (entry->active_env == NULL)
+			continue;
+
+		veaddr_print(addr, sizeof(addr), &entry->addr);
+		if (entry->addr.family == AF_INET)
+			seq_printf(m, " %15s", addr);
+		else
+			seq_printf(m, " %39s", addr);
+	}
+unlock:
+	read_unlock(&veip_hash_lock);
+}
+
+static void *veip_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t l;
+	struct list_head *p;
+	int i;
+
+	l = *pos;
+	write_lock_irq(&veip_hash_lock);
+	if (l == 0)
+		return ip_entry_hash_table;
+	for (i = 0; i < VEIP_HASH_SZ; i++) {
+		list_for_each(p, ip_entry_hash_table + i) {
+			if (--l == 0)
+				return p;
+		}
+	}
+	return NULL;
+}
+
+static void *veip_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *p;
+
+	p = (struct list_head *)v;
+	while (1) {
+		p = p->next;
+		if (p < ip_entry_hash_table ||
+		    p >= ip_entry_hash_table + VEIP_HASH_SZ) {
+			(*pos)++;
+			return p;
+		}
+		if (++p >= ip_entry_hash_table + VEIP_HASH_SZ)
+			return NULL;
+	}
+	return NULL;
+}
+
+static void veip_seq_stop(struct seq_file *m, void *v)
+{
+	write_unlock_irq(&veip_hash_lock);
+}
+
+static struct seq_operations veip_seq_op = {
+	.start	= veip_seq_start,
+	.next	= veip_seq_next,
+	.stop	= veip_seq_stop,
+	.show	= veip_seq_show,
+};
+
+static int veip_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &veip_seq_op);
+}
+
+static struct file_operations proc_veip_operations = {
+	.open		= veip_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+#endif
+
+static int real_ve_ip_map(envid_t veid, int op, struct sockaddr __user *uaddr,
+		int addrlen)
+{
+	int err;
+	struct ve_struct *ve;
+	struct ve_addr_struct addr;
+
+	err = -EPERM;
+	if (!capable_setveid())
+		goto out;
+
+	err = sockaddr_to_veaddr(uaddr, addrlen, &addr);
+	if (err < 0)
+		goto out;
+
+	switch (op)
+	{
+		case VE_IP_ADD:
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veip_entry_add(ve, &addr);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+
+		case VE_IP_DEL:
+			err = veip_entry_del(veid, &addr);
+			break;
+		case VE_IP_EXT_ADD:
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			err = venet_ext_add(ve, &addr);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+		case VE_IP_EXT_DEL:
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			err = venet_ext_del(ve, &addr);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+		default:
+			err = -EINVAL;
+	}
+
+out:
+	return err;
+}
+
+int venet_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	case VENETCTL_VE_IP_MAP: {
+		struct vzctl_ve_ip_map s;
+		err = -EFAULT;
+		if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+			break;
+		err = real_ve_ip_map(s.veid, s.op, s.addr, s.addrlen);
+		break;
+	}
+	}
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+int compat_venet_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	switch(cmd) {
+	case VENETCTL_COMPAT_VE_IP_MAP: {
+		struct compat_vzctl_ve_ip_map cs;
+
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+
+		err = real_ve_ip_map(cs.veid, cs.op, compat_ptr(cs.addr),
+				cs.addrlen);
+		break;
+	}
+	default:
+		err = venet_ioctl(file, cmd, arg);
+		break;
+	}
+	return err;
+}
+#endif
+
+static struct vzioctlinfo venetcalls = {
+	.type		= VENETCTLTYPE,
+	.ioctl		= venet_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= compat_venet_ioctl,
+#endif
+	.owner		= THIS_MODULE,
+};
+
+int venet_dev_start(struct ve_struct *env)
+{
+	struct net_device *dev_venet;
+	int err;
+
+	dev_venet = alloc_netdev(0, "venet%d", venet_setup);
+	if (!dev_venet)
+		return -ENOMEM;
+	err = dev_alloc_name(dev_venet, dev_venet->name);
+	if (err<0)
+		goto err;
+	if ((err = register_netdev(dev_venet)) != 0)
+		goto err;
+	env->_venet_dev = dev_venet;
+	return 0;
+err:
+	free_netdev(dev_venet);
+	printk(KERN_ERR "VENET initialization error err=%d\n", err);
+	return err;
+}
+
+static int venet_start(void *data)
+{
+	struct ve_struct *env;
+	int err;
+
+	env = (struct ve_struct *)data;
+	if (env->veip)
+		return -EEXIST;
+
+	err = veip_start(env);
+	if (err != 0)
+		return err;
+
+	err = venet_dev_start(env);
+	if (err)
+		goto err_free;
+	return 0;
+
+err_free:
+	veip_stop(env);
+	return err;
+}
+
+static void venet_stop(void *data)
+{
+	struct ve_struct *env;
+	struct net_device *dev;
+
+	env = (struct ve_struct *)data;
+	venet_ext_clean(env);
+	veip_stop(env);
+
+	dev = env->_venet_dev;
+	if (dev == NULL)
+		return;
+
+	unregister_netdev(dev);
+	env->_venet_dev = NULL;
+	free_netdev(dev);
+}
+
+static struct ve_hook venet_ve_hook = {
+	.init	  = venet_start,
+	.fini	  = venet_stop,
+	.owner	  = THIS_MODULE,
+	.priority = HOOK_PRIO_NET,
+};
+
+static int venet_netif_rx(struct sk_buff *skb)
+{
+	dev_hold(skb->dev);
+	skb_queue_tail(&venet_pkt_queue, skb);
+	wake_up_process(venet_rx_tsk);
+	return NET_RX_SUCCESS;
+}
+
+static void venet_process_backlog(void)
+{
+	unsigned long start_time;
+
+	local_bh_disable();
+	start_time = jiffies;
+	while (1) {
+		struct sk_buff *skb;
+		struct net_device *dev;
+
+		skb = skb_dequeue(&venet_pkt_queue);
+		if (!skb)
+			break;
+
+		dev = skb->dev;
+		netif_receive_skb(skb);
+		dev_put(dev);
+		if (need_resched() || (jiffies - start_time > 1))
+			break;
+	}
+	local_bh_enable();
+}
+
+static int venet_rx_loop(void *data)
+{
+	while (1) {
+		try_to_freeze();
+		venet_process_backlog();
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (!skb_queue_empty(&venet_pkt_queue))
+			__set_current_state(TASK_RUNNING);
+		else
+			if (kthread_should_stop())
+				break;
+
+		schedule();
+	}
+	return 0;
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *venet_sysctl_table;
+#define CTL_UNNUMBERED           -2
+
+static ctl_table venet_sysctl_dir[] = {
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "venet_napi",
+		.data = &venet_napi,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table venet_sysctl_root[] = {
+	{
+		.ctl_name = CTL_NET,
+		.procname = "debug",
+		.mode = 0555,
+		.child = venet_sysctl_dir,
+	},
+	{ .ctl_name = 0 }
+};
+#endif
+
+__init int venet_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *de;
+#endif
+	int i, err;
+
+	if (get_ve0()->_venet_dev != NULL)
+		return -EEXIST;
+
+	for (i = 0; i < VEIP_HASH_SZ; i++)
+		INIT_LIST_HEAD(ip_entry_hash_table + i);
+
+	skb_queue_head_init(&venet_pkt_queue);
+
+	venet_rx_tsk = kthread_run(venet_rx_loop, NULL, "venet_rx");
+	if (IS_ERR(venet_rx_tsk)) {
+		err = PTR_ERR(venet_rx_tsk);
+		printk(KERN_ERR "Cannot start venet_rx kernel thread\n");
+		goto fail;
+	}
+
+#ifdef CONFIG_SYSCTL
+	venet_sysctl_table = register_sysctl_table(venet_sysctl_root, 0);
+	if (venet_sysctl_table == NULL) {
+		printk(KERN_ERR "Cannot register venet sysctl table\n");
+		err = -ENOMEM;
+		goto fail_sysctl;
+	}
+#endif
+
+	err = venet_start(get_ve0());
+	if (err) {
+		printk(KERN_ERR "venet_start() fails (0x%8x)\n", err);
+		goto fail_start;
+	}
+
+#ifdef CONFIG_PROC_FS
+	de = create_proc_entry_mod("vz/veip", 
+			S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_veip_operations;
+	else
+		printk(KERN_WARNING "venet: can't make veip proc entry\n");
+#endif
+
+	ve_hook_register(VE_SS_CHAIN, &venet_ve_hook);
+	vzioctl_register(&venetcalls);
+	vzmon_register_veaddr_print_cb(veaddr_seq_print);
+	return 0;
+
+fail_start:
+#ifdef CONFIG_SYSCTL
+	unregister_sysctl_table(venet_sysctl_table);
+fail_sysctl:
+#endif
+	kthread_stop(venet_rx_tsk);
+fail:
+	return err;
+}
+
+__exit void venet_exit(void)
+{
+	vzmon_unregister_veaddr_print_cb(veaddr_seq_print);
+	vzioctl_unregister(&venetcalls);
+	ve_hook_unregister(&venet_ve_hook);
+
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("vz/veip", NULL);
+#endif
+#ifdef CONFIG_SYSCTL
+	unregister_sysctl_table(venet_sysctl_table);
+#endif
+	venet_stop(get_ve0());
+	kthread_stop(venet_rx_tsk);
+	veip_cleanup();
+}
+
+module_init(venet_init);
+module_exit(venet_exit);
diff -upr kernel-2.6.18-417.el5.orig/drivers/net/veth.c kernel-2.6.18-417.el5-028stab121/drivers/net/veth.c
--- kernel-2.6.18-417.el5.orig/drivers/net/veth.c	2017-01-13 08:40:21.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/net/veth.c	2017-01-13 08:40:34.000000000 -0500
@@ -0,0 +1,803 @@
+/*
+ *  veth.c
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Virtual ethernet device used to change VE ownership on packets
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/if_ether.h>	/* For the statistics structure. */
+#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
+#include <linux/if_bridge.h>
+#include <linux/ethtool.h>
+#include <linux/ve_proto.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_veth.h>
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/vzcalluser.h>
+#include <linux/nfcalls.h>
+
+#include <linux/cpt_image.h>
+#include <linux/cpt_exports.h>
+
+static struct list_head veth_hwaddr_list;
+static rwlock_t ve_hwaddr_lock = RW_LOCK_UNLOCKED;
+static DECLARE_MUTEX(hwaddr_sem);
+
+struct net_device * veth_dev_start(char *dev_addr, char *name);
+
+#define veth_from_netdev(dev) \
+	((struct veth_struct *)(netdev_priv(dev)))
+
+#define veth_to_netdev(veth) \
+	((struct net_device *)((char *)veth - \
+	(unsigned long)netdev_priv(NULL)))
+
+struct veth_struct {
+	struct net_device_stats stats;
+	struct net_device	*pair;
+	struct list_head	hwaddr_list;
+	struct net_device_stats	*real_stats;
+	int			allow_mac_change;
+};
+
+struct veth_struct *hwaddr_entry_lookup(char *name)
+{
+	struct veth_struct *entry;
+	struct list_head *tmp;
+
+	list_for_each(tmp, &veth_hwaddr_list) {
+		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
+		BUG_ON(entry->pair == NULL);
+		if (strncmp(name, entry->pair->name, IFNAMSIZ) == 0)
+			return entry;
+	}
+	return NULL;
+}
+
+int veth_entry_add(struct ve_struct *ve, char *dev_addr, char *name,
+		char *dev_addr_ve, char *name_ve)
+{
+	struct net_device *dev_ve;
+	struct net_device *dev_ve0;
+	struct ve_struct *old_env;
+	char dev_name[IFNAMSIZ];
+	int err;
+
+	down(&hwaddr_sem);
+
+	if (name[0] == '\0')
+		snprintf(dev_name, sizeof(dev_name), "vz%d.%%d", ve->veid);
+	else {
+		memcpy(dev_name, name, IFNAMSIZ - 1);
+		dev_name[IFNAMSIZ - 1] = '\0';
+	}
+	dev_ve0 = veth_dev_start(dev_addr, dev_name);
+	if (IS_ERR(dev_ve0)) {
+		err = PTR_ERR(dev_ve0);
+		goto err;
+	}
+
+	old_env = set_exec_env(ve);
+	if (name_ve[0] == '\0')
+		sprintf(dev_name, "eth%%d");
+	else {
+		memcpy(dev_name, name_ve, IFNAMSIZ - 1);
+		dev_name[IFNAMSIZ - 1] = '\0';
+	}
+	dev_ve = veth_dev_start(dev_addr_ve, dev_name);
+	if (IS_ERR(dev_ve)) {
+		err = PTR_ERR(dev_ve);
+		goto err_ve;
+	}
+	set_exec_env(old_env);
+	veth_from_netdev(dev_ve)->pair = dev_ve0;
+	veth_from_netdev(dev_ve0)->pair = dev_ve;
+
+	write_lock(&ve_hwaddr_lock);
+	list_add(&(veth_from_netdev(dev_ve)->hwaddr_list), &veth_hwaddr_list);
+	write_unlock(&ve_hwaddr_lock);
+
+	up(&hwaddr_sem);
+	return 0;
+
+err_ve:
+	set_exec_env(old_env);
+	unregister_netdev(dev_ve0);
+err:
+	up(&hwaddr_sem);
+	return err;
+}
+
+void veth_pair_del(struct ve_struct *env, struct veth_struct *entry)
+{
+	struct net_device *dev;
+	struct ve_struct *old_env;
+
+	write_lock(&ve_hwaddr_lock);
+	list_del(&entry->hwaddr_list);
+	write_unlock(&ve_hwaddr_lock);
+
+	dev = entry->pair;
+	BUG_ON(entry->pair == NULL);
+
+	veth_from_netdev(dev)->pair = NULL;
+	entry->pair = NULL;
+	rtnl_lock();
+	old_env = set_exec_env(dev->owner_env);
+	dev_close(dev);
+
+	/*
+	 * Now device from VE0 does not send or receive anything,
+	 * i.e. dev->hard_start_xmit won't be called.
+	 */
+	set_exec_env(env);
+	unregister_netdevice(veth_to_netdev(entry));
+	set_exec_env(dev->owner_env);
+	unregister_netdevice(dev);
+	set_exec_env(old_env);
+	rtnl_unlock();
+}
+
+int veth_entry_del(struct ve_struct *ve, char *name)
+{
+	struct veth_struct *found;
+	int err;
+
+	err = -ENODEV;
+	down(&hwaddr_sem);
+	found = hwaddr_entry_lookup(name);
+	if (found == NULL)
+		goto out;
+	if (veth_to_netdev(found)->owner_env != ve)
+		goto out;
+
+	err = 0;
+	veth_pair_del(ve, found);
+
+out:
+	up(&hwaddr_sem);
+	return err;
+}
+
+int veth_allow_change_mac(envid_t veid, char *name, int allow)
+{
+	struct ve_struct *ve;
+	struct veth_struct *found;
+	int err;
+
+	err = -ESRCH;
+	ve = get_ve_by_id(veid);
+	if (!ve)
+		return err;
+
+	down_read(&ve->op_sem);
+	if (!ve->is_running)
+		goto out_ve;
+	err = -ENODEV;
+	down(&hwaddr_sem);
+	found = hwaddr_entry_lookup(name);
+	if (found == NULL)
+		goto out_sem;
+	if (veth_to_netdev(found)->owner_env != ve)
+		goto out_sem;
+
+	err = 0;
+	found->allow_mac_change = allow;
+
+out_sem:
+	up(&hwaddr_sem);
+out_ve:
+	up_read(&ve->op_sem);
+	put_ve(ve);
+	return err;
+}
+
+/*
+ * Device functions
+ */
+
+static void veth_destructor(struct net_device *dev)
+{
+	free_percpu(veth_from_netdev(dev)->real_stats);
+	free_netdev(dev);
+}
+
+static struct net_device_stats *
+veth_stats(struct net_device *dev, int cpuid)
+{
+	return per_cpu_ptr(veth_from_netdev(dev)->real_stats, cpuid);
+}
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+	int i;
+	struct net_device_stats *stats;
+
+	stats = &veth_from_netdev(dev)->stats;
+	memset(stats, 0, sizeof(struct net_device_stats));
+	for (i=0; i < NR_CPUS; i++) {
+		struct net_device_stats *dev_stats;
+
+		if (!cpu_possible(i)) 
+			continue;
+		dev_stats = veth_stats(dev, i);
+		stats->rx_bytes   += dev_stats->rx_bytes;
+		stats->tx_bytes   += dev_stats->tx_bytes;
+		stats->rx_packets += dev_stats->rx_packets;
+		stats->tx_packets += dev_stats->tx_packets;
+		stats->tx_dropped += dev_stats->tx_dropped;
+	}
+
+	return stats;
+}
+
+/*
+ * The higher levels take care of making this non-reentrant (it's
+ * called with bh's disabled).
+ */
+static int veth_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats;
+	struct net_device *rcv = NULL;
+	struct veth_struct *entry;
+	int length;
+
+	stats = veth_stats(dev, smp_processor_id());
+	if (unlikely(get_exec_env()->disable_net))
+		goto outf;
+
+	entry = veth_from_netdev(dev);
+	rcv = entry->pair;
+	if (!rcv)
+		/* VE going down */
+		goto outf;
+
+	if (!(rcv->flags & IFF_UP)) {
+		/* Target VE does not want to receive packets */
+		goto outf;
+	}
+
+	if (unlikely(rcv->owner_env->disable_net))
+		goto outf;
+	/* Filtering */
+	if (ve_is_super(dev->owner_env) &&
+			!veth_from_netdev(rcv)->allow_mac_change) {
+		/* from VE0 to VEX */
+		if (ve_is_super(rcv->owner_env))
+			goto out;
+		if (is_multicast_ether_addr(
+					((struct ethhdr *)skb->data)->h_dest))
+			goto out;
+		if (!rcv->br_port &&
+			compare_ether_addr(((struct ethhdr *)skb->data)->h_dest, rcv->dev_addr))
+				goto outf;
+	} else if (!ve_is_super(dev->owner_env) &&
+			!entry->allow_mac_change) {
+		/* from VEX to VE0 */
+		if (!skb->dev->br_port &&
+			compare_ether_addr(((struct ethhdr *)skb->data)->h_source, dev->dev_addr))
+				goto outf;
+	}
+
+out:
+	skb->owner_env = rcv->owner_env;
+
+	skb->dev = rcv;
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, rcv);
+
+	if (skb->protocol != __constant_htons(ETH_P_IP))
+		skb_orphan(skb);
+
+	dst_release(skb->dst);
+	skb->dst = NULL;
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = NULL;
+#endif
+	skb_init_brmark(skb);
+
+	length = skb->len;
+
+	if (skb->ip_summed == CHECKSUM_HW) {
+		/* Generated locally */
+		skb->proto_csum_blank = 1;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	} else if (skb->proto_data_valid) {
+		/* Remote but checksummed */
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	netif_rx(skb);
+
+	stats->tx_bytes += length;
+	stats->tx_packets++;
+	if (rcv) {
+		struct net_device_stats *rcv_stats;
+		rcv_stats = veth_stats(rcv, smp_processor_id());
+		rcv_stats->rx_bytes += length;
+		rcv_stats->rx_packets++;
+	}
+
+	return 0;
+
+outf:
+	kfree_skb(skb);
+	stats->tx_dropped++;
+	return 0;
+}
+
+static int veth_set_mac(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (!ve_is_super(dev->owner_env) &&
+			!veth_from_netdev(dev)->allow_mac_change)
+		return -EPERM;
+	if (netif_running(dev))
+		return -EBUSY;
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+
+	return 0;
+}
+
+int veth_init_dev(struct net_device *dev)
+{
+	dev->hard_start_xmit = veth_xmit;
+	dev->get_stats = get_stats;
+	dev->destructor = veth_destructor;
+
+	ether_setup(dev);
+	dev->set_mac_address = veth_set_mac;
+
+	/* remove setted by ether_setup() handler */
+	dev->change_mtu	= NULL;
+
+	dev->tx_queue_len = 0;
+
+	veth_from_netdev(dev)->real_stats =
+		alloc_percpu(struct net_device_stats);
+	if (veth_from_netdev(dev)->real_stats == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int
+veth_set_op(struct net_device *dev, u32 data,
+	     int (*fop)(struct net_device *, u32))
+{
+	struct net_device *pair;
+	int ret = 0;
+
+	ret = fop(dev, data);
+	if (ret < 0)
+		goto out;
+
+	pair = veth_from_netdev(dev)->pair;
+	if (pair)
+		ret = fop(pair, data);
+out:
+	return ret;
+}
+
+static int
+veth_op_set_sg(struct net_device *dev, u32 data)
+{
+	return veth_set_op(dev, data, ethtool_op_set_sg);
+}
+
+static int
+veth_op_set_tx_csum(struct net_device *dev, u32 data)
+{
+	return veth_set_op(dev, data, ethtool_op_set_tx_csum);
+}
+
+static int
+veth_op_set_tso(struct net_device *dev, u32 data)
+{
+	return veth_set_op(dev, data, ethtool_op_set_tso);
+}
+
+#define veth_op_set_rx_csum veth_op_set_tx_csum
+
+static struct ethtool_ops veth_ethtool_ops = {
+	.get_sg = ethtool_op_get_sg,
+	.set_sg = veth_op_set_sg,
+	.get_tx_csum = ethtool_op_get_tx_csum,
+	.set_tx_csum = veth_op_set_tx_csum,
+	.get_rx_csum = ethtool_op_get_tx_csum,
+	.set_rx_csum = veth_op_set_rx_csum,
+	.get_tso = ethtool_op_get_tso,
+	.set_tso = veth_op_set_tso,
+};
+
+static void cpt_dump_veth(struct net_device *dev,
+		struct cpt_ops *ops, struct cpt_context *ctx)
+{
+	struct cpt_veth_image v;
+	struct veth_struct *veth;
+
+	veth = veth_from_netdev(dev);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_VETH;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_VOID;
+
+	v.cpt_allow_mac_change = veth->allow_mac_change;
+
+	ops->write(&v, sizeof(v), ctx);
+}
+
+static int rst_restore_veth(loff_t pos, struct cpt_netdev_image *di,
+			struct cpt_ops *ops, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_veth_image vi;
+	struct net_device *dev;
+	struct veth_struct *veth;
+
+	pos = pos + di->cpt_hdrlen;
+	err = ops->get_object(CPT_OBJ_NET_VETH,
+			pos, &vi, sizeof(vi), ctx);
+	if (err)
+		return err;
+
+	dev = __dev_get_by_name(di->cpt_name);
+	if (dev == NULL)
+		return -ENODEV;
+
+	veth = veth_from_netdev(dev);
+	veth->allow_mac_change = vi.cpt_allow_mac_change;
+
+	return 0;
+}
+
+static struct net_device_stats *cpt_veth_stats_ptr(struct net_device *dev)
+{
+	return veth_stats(dev, smp_processor_id());
+}
+
+static struct dev_cpt_ops veth_cpt_ops = {
+	.cpt_object = CPT_OBJ_NET_VETH,
+	.name = "veth",
+	.dump = cpt_dump_veth,
+	.restore = rst_restore_veth,
+	.stats = cpt_veth_stats_ptr,
+};
+
+static void veth_setup(struct net_device *dev)
+{
+	dev->init = veth_init_dev;
+
+	/*
+	 * No other features, as they are:
+	 *  - checksumming is required, and nobody else will done our job
+	 */
+	dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL | NETIF_F_LLTX |
+		NETIF_F_HIGHDMA;
+
+	SET_MODULE_OWNER(dev);
+	SET_ETHTOOL_OPS(dev, &veth_ethtool_ops);
+
+	dev->cpt_ops = &veth_cpt_ops;
+}
+
+#ifdef CONFIG_PROC_FS
+#define ADDR_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
+#define ADDR_ARG(x) (x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5]
+static int vehwaddr_seq_show(struct seq_file *m, void *v)
+{
+	struct list_head *p;
+	struct veth_struct *entry;
+
+	p = (struct list_head *)v;
+	if (p == &veth_hwaddr_list) {
+		seq_puts(m, "Version: 1.0\n");
+		return 0;
+	}
+	entry = list_entry(p, struct veth_struct, hwaddr_list);
+	seq_printf(m, ADDR_FMT " %16s ",
+			ADDR_ARG(entry->pair->dev_addr), entry->pair->name);
+	seq_printf(m, ADDR_FMT " %16s %10u %5s\n",
+			ADDR_ARG(veth_to_netdev(entry)->dev_addr),
+			veth_to_netdev(entry)->name,
+			VEID(veth_to_netdev(entry)->owner_env),
+			entry->allow_mac_change ? "allow" : "deny");
+	return 0;
+}
+
+static void *vehwaddr_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t l;
+	struct list_head *p;
+
+	l = *pos;
+	read_lock(&ve_hwaddr_lock);
+	if (l == 0)
+		return &veth_hwaddr_list;
+	list_for_each(p, &veth_hwaddr_list) {
+		if (--l == 0)
+			return p;
+	}
+	return NULL;
+}
+
+static void *vehwaddr_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *p;
+
+	p = (struct list_head *)v;
+	(*pos)++;
+	return p->next == &veth_hwaddr_list ? NULL : p->next;
+}
+
+static void vehwaddr_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_hwaddr_lock);
+}
+
+static struct seq_operations vehwaddr_seq_op = {
+	.start 	= vehwaddr_seq_start,
+	.next	= vehwaddr_seq_next,
+	.stop	= vehwaddr_seq_stop,
+	.show	= vehwaddr_seq_show
+};
+
+static int vehwaddr_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &vehwaddr_seq_op);
+}
+
+static struct file_operations proc_vehwaddr_operations = {
+	.open		= vehwaddr_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release
+};
+#endif
+
+int real_ve_hwaddr(envid_t veid, int op,
+		unsigned char *dev_addr, int addrlen, char *name,
+		unsigned char *dev_addr_ve, int addrlen_ve, char *name_ve)
+{
+	int err;
+	struct ve_struct *ve;
+	char ve_addr[ETH_ALEN];
+
+	err = -EPERM;
+	if (!capable(CAP_NET_ADMIN))
+		goto out;
+
+	err = -EINVAL;
+	switch (op)
+	{
+		case VE_ETH_ADD:
+			if (addrlen != ETH_ALEN)
+				goto out;
+			if (addrlen_ve != ETH_ALEN && addrlen_ve != 0)
+				goto out;
+			/* If ve addr is not set then we use dev_addr[3] & 0x80 for it */
+			if (addrlen_ve == 0 && (dev_addr[3] & 0x80))
+				goto out;
+			if (addrlen_ve == 0) {
+				memcpy(ve_addr, dev_addr, ETH_ALEN);
+				ve_addr[3] |= 0x80;
+			} else {
+				memcpy(ve_addr, dev_addr_ve, ETH_ALEN);
+			}
+
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veth_entry_add(ve, dev_addr, name,
+						ve_addr, name_ve);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+
+		case VE_ETH_DEL:
+			if (name[0] == '\0')
+				goto out;
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veth_entry_del(ve, name);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+		case VE_ETH_ALLOW_MAC_CHANGE:
+		case VE_ETH_DENY_MAC_CHANGE:
+			err = veth_allow_change_mac(veid, name,
+					op == VE_ETH_ALLOW_MAC_CHANGE);
+			break;
+	}
+
+out:
+	return err;
+}
+
+int veth_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	    case VETHCTL_VE_HWADDR: {
+			struct vzctl_ve_hwaddr s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err = real_ve_hwaddr(s.veid, s.op,
+					s.dev_addr, s.addrlen, s.dev_name,
+					s.dev_addr_ve, s.addrlen_ve, s.dev_name_ve);
+		}
+		break;
+	}
+	return err;
+}
+
+static struct vzioctlinfo vethcalls = {
+	.type		= VETHCTLTYPE,
+	.ioctl		= veth_ioctl,
+	.compat_ioctl	= veth_ioctl,
+	.owner		= THIS_MODULE,
+};
+
+struct net_device * veth_dev_start(char *dev_addr, char *name)
+{
+	struct net_device *dev;
+	int err;
+
+	if (!is_valid_ether_addr(dev_addr))
+		return ERR_PTR(-EADDRNOTAVAIL);
+
+	dev = alloc_netdev(sizeof(struct veth_struct), name, veth_setup);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
+	if (strchr(dev->name, '%')) {
+		err = dev_alloc_name(dev, dev->name);
+		if (err < 0)
+			goto err;
+	}
+	if ((err = register_netdev(dev)) != 0)
+		goto err;
+
+	memcpy(dev->dev_addr, dev_addr, ETH_ALEN);
+	dev->addr_len = ETH_ALEN;
+
+	return dev;
+err:
+	free_netdev(dev);
+	printk(KERN_ERR "%s initialization error err=%d\n", name, err);
+	return ERR_PTR(err);
+}
+
+static int veth_start(void *data)
+{
+	return 0;
+}
+
+static void veth_stop(void *data)
+{
+	struct ve_struct *env;
+	struct veth_struct *entry, *tmp;
+
+	env = (struct ve_struct *)data;
+	down(&hwaddr_sem);
+	list_for_each_entry_safe(entry, tmp, &veth_hwaddr_list, hwaddr_list)
+		if (VEID(env) == VEID(veth_to_netdev(entry)->owner_env))
+			veth_pair_del(env, entry);
+	up(&hwaddr_sem);
+}
+
+static struct ve_hook veth_ve_hook = {
+	.init	  = veth_start,
+	.fini	  = veth_stop,
+	.owner	  = THIS_MODULE,
+	.priority = HOOK_PRIO_NET,
+};
+
+__init int veth_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *de;
+#endif
+
+	INIT_LIST_HEAD(&veth_hwaddr_list);
+
+#ifdef CONFIG_PROC_FS
+	de = create_proc_entry_mod("vz/veth",
+			S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_vehwaddr_operations;
+	else
+		printk(KERN_WARNING "veth: can't make vehwaddr proc entry\n");
+#endif
+
+	ve_hook_register(VE_SS_CHAIN, &veth_ve_hook);
+	vzioctl_register(&vethcalls);
+	register_dev_cpt_ops(&veth_cpt_ops);
+	return 0;
+}
+
+__exit void veth_exit(void)
+{
+	struct veth_struct *entry;
+	struct list_head *tmp, *n;
+	struct ve_struct *ve;
+
+	unregister_dev_cpt_ops(&veth_cpt_ops);
+	vzioctl_unregister(&vethcalls);
+	ve_hook_unregister(&veth_ve_hook);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("vz/veth", NULL);
+#endif
+
+	down(&hwaddr_sem);
+	list_for_each_safe(tmp, n, &veth_hwaddr_list) {
+		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
+		ve = get_ve(veth_to_netdev(entry)->owner_env);
+
+		veth_pair_del(ve, entry);
+
+		put_ve(ve);
+	}
+	up(&hwaddr_sem);
+}
+
+module_init(veth_init);
+module_exit(veth_exit);
+
+MODULE_AUTHOR("Andrey Mirkin <amirkin@sw.ru>");
+MODULE_DESCRIPTION("Virtuozzo Virtual Ethernet Device");
+MODULE_LICENSE("GPL v2");
+
diff -upr kernel-2.6.18-417.el5.orig/drivers/parisc/led.c kernel-2.6.18-417.el5-028stab121/drivers/parisc/led.c
--- kernel-2.6.18-417.el5.orig/drivers/parisc/led.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/parisc/led.c	2017-01-13 08:40:15.000000000 -0500
@@ -684,7 +684,7 @@ int __init led_init(void)
 	int ret;
 
 	snprintf(lcd_text_default, sizeof(lcd_text_default),
-		"Linux %s", system_utsname.release);
+		"Linux %s", init_utsname()->release);
 
 	/* Work around the buggy PDC of KittyHawk-machines */
 	switch (CPU_HVERSION) {
diff -upr kernel-2.6.18-417.el5.orig/drivers/pci/probe.c kernel-2.6.18-417.el5-028stab121/drivers/pci/probe.c
--- kernel-2.6.18-417.el5.orig/drivers/pci/probe.c	2017-01-13 07:39:13.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/pci/probe.c	2017-01-13 08:40:19.000000000 -0500
@@ -22,6 +22,7 @@ LIST_HEAD(pci_root_buses);
 EXPORT_SYMBOL(pci_root_buses);
 
 LIST_HEAD(pci_devices);
+EXPORT_SYMBOL(pci_devices);
 
 /*
  * Some device drivers need know if pci is initiated.
diff -upr kernel-2.6.18-417.el5.orig/drivers/sbus/char/bbc_envctrl.c kernel-2.6.18-417.el5-028stab121/drivers/sbus/char/bbc_envctrl.c
--- kernel-2.6.18-417.el5.orig/drivers/sbus/char/bbc_envctrl.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/sbus/char/bbc_envctrl.c	2017-01-13 08:40:15.000000000 -0500
@@ -14,6 +14,7 @@ static int errno;
 #include <linux/delay.h>
 #include <asm/oplib.h>
 #include <asm/ebus.h>
+#include <asm/unistd.h>
 
 #include "bbc_i2c.h"
 #include "max1617.h"
diff -upr kernel-2.6.18-417.el5.orig/drivers/sbus/char/envctrl.c kernel-2.6.18-417.el5-028stab121/drivers/sbus/char/envctrl.c
--- kernel-2.6.18-417.el5.orig/drivers/sbus/char/envctrl.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/sbus/char/envctrl.c	2017-01-13 08:40:15.000000000 -0500
@@ -37,6 +37,7 @@ static int errno;
 #include <asm/ebus.h>
 #include <asm/uaccess.h>
 #include <asm/envctrl.h>
+#include <asm/unistd.h>
 
 #define ENVCTRL_MINOR	162
 
diff -upr kernel-2.6.18-417.el5.orig/drivers/scsi/lpfc/lpfc_ct.c kernel-2.6.18-417.el5-028stab121/drivers/scsi/lpfc/lpfc_ct.c
--- kernel-2.6.18-417.el5.orig/drivers/scsi/lpfc/lpfc_ct.c	2017-01-13 07:39:13.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/scsi/lpfc/lpfc_ct.c	2017-01-13 08:40:15.000000000 -0500
@@ -1530,9 +1530,9 @@ lpfc_fdmi_cmd(struct lpfc_vport *vport, 
 			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
 			ae->ad.bits.AttrType = be16_to_cpu(OS_NAME_VERSION);
 			sprintf(ae->un.OsNameVersion, "%s %s %s",
-				system_utsname.sysname,
-				system_utsname.release,
-				system_utsname.version);
+				init_utsname()->sysname,
+				init_utsname()->release,
+				init_utsname()->version);
 			len = strlen(ae->un.OsNameVersion);
 			len += (len & 3) ? (4 - (len & 3)) : 4;
 			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
@@ -1660,7 +1660,7 @@ lpfc_fdmi_cmd(struct lpfc_vport *vport, 
 							  size);
 				ae->ad.bits.AttrType = be16_to_cpu(HOST_NAME);
 				sprintf(ae->un.HostName, "%s",
-					system_utsname.nodename);
+					init_utsname()->nodename);
 				len = strlen(ae->un.HostName);
 				len += (len & 3) ? (4 - (len & 3)) : 4;
 				ae->ad.bits.AttrLen =
@@ -1806,7 +1806,7 @@ lpfc_fdmi_timeout_handler(struct lpfc_vp
 
 	ndlp = lpfc_findnode_did(vport, FDMI_DID);
 	if (ndlp && NLP_CHK_NODE_ACT(ndlp)) {
-		if (system_utsname.nodename[0] != '\0')
+		if (init_utsname()->nodename[0] != '\0')
 			lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DHBA);
 		else
 			mod_timer(&vport->fc_fdmitmo, jiffies + HZ * 60);
diff -upr kernel-2.6.18-417.el5.orig/drivers/usb/core/hcd.c kernel-2.6.18-417.el5-028stab121/drivers/usb/core/hcd.c
--- kernel-2.6.18-417.el5.orig/drivers/usb/core/hcd.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/usb/core/hcd.c	2017-01-13 08:40:15.000000000 -0500
@@ -317,8 +317,8 @@ static int rh_string (
 
  	// id 3 == vendor description
 	} else if (id == 3) {
-		snprintf (buf, sizeof buf, "%s %s %s", system_utsname.sysname,
-			system_utsname.release, hcd->driver->description);
+		snprintf (buf, sizeof buf, "%s %s %s", init_utsname()->sysname,
+			init_utsname()->release, hcd->driver->description);
 
 	// unsupported IDs --> "protocol stall"
 	} else
diff -upr kernel-2.6.18-417.el5.orig/drivers/usb/gadget/ether.c kernel-2.6.18-417.el5-028stab121/drivers/usb/gadget/ether.c
--- kernel-2.6.18-417.el5.orig/drivers/usb/gadget/ether.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/usb/gadget/ether.c	2017-01-13 08:40:15.000000000 -0500
@@ -2257,7 +2257,7 @@ eth_bind (struct usb_gadget *gadget)
 		return -ENODEV;
 	}
 	snprintf (manufacturer, sizeof manufacturer, "%s %s/%s",
-		system_utsname.sysname, system_utsname.release,
+		init_utsname()->sysname, init_utsname()->release,
 		gadget->name);
 
 	/* If there's an RNDIS configuration, that's what Windows wants to
diff -upr kernel-2.6.18-417.el5.orig/drivers/usb/gadget/file_storage.c kernel-2.6.18-417.el5-028stab121/drivers/usb/gadget/file_storage.c
--- kernel-2.6.18-417.el5.orig/drivers/usb/gadget/file_storage.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/usb/gadget/file_storage.c	2017-01-13 08:40:15.000000000 -0500
@@ -3982,7 +3982,7 @@ static int __init fsg_bind(struct usb_ga
 	usb_gadget_set_selfpowered(gadget);
 
 	snprintf(manufacturer, sizeof manufacturer, "%s %s with %s",
-			system_utsname.sysname, system_utsname.release,
+			init_utsname()->sysname, init_utsname()->release,
 			gadget->name);
 
 	/* On a real device, serial[] would be loaded from permanent
diff -upr kernel-2.6.18-417.el5.orig/drivers/usb/gadget/inode.c kernel-2.6.18-417.el5-028stab121/drivers/usb/gadget/inode.c
--- kernel-2.6.18-417.el5.orig/drivers/usb/gadget/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/usb/gadget/inode.c	2017-01-13 08:40:40.000000000 -0500
@@ -526,7 +526,8 @@ struct kiocb_priv {
 	struct usb_request	*req;
 	struct ep_data		*epdata;
 	void			*buf;
-	char __user		*ubuf;		/* NULL for writes */
+	const struct iovec	*iv;
+	unsigned long		nr_segs;
 	unsigned		actual;
 };
 
@@ -554,17 +555,32 @@ static int ep_aio_cancel(struct kiocb *i
 static ssize_t ep_aio_read_retry(struct kiocb *iocb)
 {
 	struct kiocb_priv	*priv = iocb->private;
-	ssize_t			status = priv->actual;
+	ssize_t			len, total;
+	int			i;
 
-	/* we "retry" to get the right mm context for this: */
-	status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
-	if (unlikely(0 != status))
-		status = -EFAULT;
-	else
-		status = priv->actual;
-	kfree(priv->buf);
-	kfree(priv);
-	return status;
+  	/* we "retry" to get the right mm context for this: */
+
+ 	/* copy stuff into user buffers */
+ 	total = priv->actual;
+ 	len = 0;
+ 	for (i=0; i < priv->nr_segs; i++) {
+ 		ssize_t this = min((ssize_t)(priv->iv[i].iov_len), total);
+
+ 		if (copy_to_user(priv->iv[i].iov_base, priv->buf, this)) {
+ 			if (len == 0)
+ 				len = -EFAULT;
+ 			break;
+ 		}
+
+ 		total -= this;
+ 		len += this;
+ 		if (total == 0)
+ 			break;
+ 	}
+  	kfree(priv->buf);
+  	kfree(priv);
+  	aio_put_req(iocb);
+ 	return len;
 }
 
 static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
@@ -577,7 +593,7 @@ static void ep_aio_complete(struct usb_e
 	spin_lock(&epdata->dev->lock);
 	priv->req = NULL;
 	priv->epdata = NULL;
-	if (priv->ubuf == NULL
+	if (priv->iv == NULL
 			|| unlikely(req->actual == 0)
 			|| unlikely(kiocbIsCancelled(iocb))) {
 		kfree(req->buf);
@@ -612,7 +628,8 @@ ep_aio_rwtail(
 	char		*buf,
 	size_t		len,
 	struct ep_data	*epdata,
-	char __user	*ubuf
+	const struct iovec *iv,
+	unsigned long 	nr_segs
 )
 {
 	struct kiocb_priv	*priv;
@@ -627,7 +644,8 @@ fail:
 		return value;
 	}
 	iocb->private = priv;
-	priv->ubuf = ubuf;
+	priv->iv = iv;
+	priv->nr_segs = nr_segs;
 
 	value = get_ready_ep(iocb->ki_filp->f_flags, epdata);
 	if (unlikely(value < 0)) {
@@ -667,41 +685,53 @@ fail:
 		kfree(priv);
 		put_ep(epdata);
 	} else
-		value = (ubuf ? -EIOCBRETRY : -EIOCBQUEUED);
+		value = (iv ? -EIOCBRETRY : -EIOCBQUEUED);
 	return value;
 }
 
 static ssize_t
-ep_aio_read(struct kiocb *iocb, char __user *ubuf, size_t len, loff_t o)
+ep_aio_read(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t o)
 {
 	struct ep_data		*epdata = iocb->ki_filp->private_data;
 	char			*buf;
 
 	if (unlikely(epdata->desc.bEndpointAddress & USB_DIR_IN))
 		return -EINVAL;
-	buf = kmalloc(len, GFP_KERNEL);
+
+	buf = kmalloc(iocb->ki_left, GFP_KERNEL);
 	if (unlikely(!buf))
 		return -ENOMEM;
+
 	iocb->ki_retry = ep_aio_read_retry;
-	return ep_aio_rwtail(iocb, buf, len, epdata, ubuf);
+	return ep_aio_rwtail(iocb, buf, iocb->ki_left, epdata, iov, nr_segs);
 }
 
 static ssize_t
-ep_aio_write(struct kiocb *iocb, const char __user *ubuf, size_t len, loff_t o)
+ep_aio_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t o)
 {
 	struct ep_data		*epdata = iocb->ki_filp->private_data;
 	char			*buf;
+	size_t			len = 0;
+	int			i = 0;
 
 	if (unlikely(!(epdata->desc.bEndpointAddress & USB_DIR_IN)))
 		return -EINVAL;
-	buf = kmalloc(len, GFP_KERNEL);
+
+	buf = kmalloc(iocb->ki_left, GFP_KERNEL);
 	if (unlikely(!buf))
 		return -ENOMEM;
-	if (unlikely(copy_from_user(buf, ubuf, len) != 0)) {
-		kfree(buf);
-		return -EFAULT;
+
+	for (i=0; i < nr_segs; i++) {
+		if (unlikely(copy_from_user(&buf[len], iov[i].iov_base,
+				iov[i].iov_len) != 0)) {
+			kfree(buf);
+			return -EFAULT;
+		}
+		len += iov[i].iov_len;
 	}
-	return ep_aio_rwtail(iocb, buf, len, epdata, NULL);
+	return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0);
 }
 
 /*----------------------------------------------------------------------*/
diff -upr kernel-2.6.18-417.el5.orig/drivers/usb/gadget/serial.c kernel-2.6.18-417.el5-028stab121/drivers/usb/gadget/serial.c
--- kernel-2.6.18-417.el5.orig/drivers/usb/gadget/serial.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/usb/gadget/serial.c	2017-01-13 08:40:15.000000000 -0500
@@ -1431,7 +1431,7 @@ static int __init gs_bind(struct usb_gad
 		return -ENOMEM;
 
 	snprintf(manufacturer, sizeof(manufacturer), "%s %s with %s",
-		system_utsname.sysname, system_utsname.release,
+		init_utsname()->sysname, init_utsname()->release,
 		gadget->name);
 
 	memset(dev, 0, sizeof(struct gs_dev));
diff -upr kernel-2.6.18-417.el5.orig/drivers/usb/gadget/zero.c kernel-2.6.18-417.el5-028stab121/drivers/usb/gadget/zero.c
--- kernel-2.6.18-417.el5.orig/drivers/usb/gadget/zero.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/usb/gadget/zero.c	2017-01-13 08:40:15.000000000 -0500
@@ -1242,7 +1242,7 @@ autoconf_fail:
 		EP_OUT_NAME, EP_IN_NAME);
 
 	snprintf (manufacturer, sizeof manufacturer, "%s %s with %s",
-		system_utsname.sysname, system_utsname.release,
+		init_utsname()->sysname, init_utsname()->release,
 		gadget->name);
 
 	return 0;
diff -upr kernel-2.6.18-417.el5.orig/drivers/xen/blktap/blktapmain.c kernel-2.6.18-417.el5-028stab121/drivers/xen/blktap/blktapmain.c
--- kernel-2.6.18-417.el5.orig/drivers/xen/blktap/blktapmain.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/xen/blktap/blktapmain.c	2017-01-13 08:40:40.000000000 -0500
@@ -410,7 +410,7 @@ void signal_tapdisk(int idx) 
 
 	info = tapfds[idx];
 	if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) {
-		ptask = find_task_by_pid(info->pid);
+		ptask = find_task_by_pid_all(info->pid);
 		if (ptask)
 			info->status = CLEANSHUTDOWN;
 	}
diff -upr kernel-2.6.18-417.el5.orig/drivers/xen/netback/loopback.c kernel-2.6.18-417.el5-028stab121/drivers/xen/netback/loopback.c
--- kernel-2.6.18-417.el5.orig/drivers/xen/netback/loopback.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/xen/netback/loopback.c	2017-01-13 08:40:21.000000000 -0500
@@ -60,7 +60,7 @@ module_param(nloopbacks, int, 0);
 MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
 
 struct net_private {
-	struct net_device *loopback_dev;
+	struct net_device *_loopback_dev;
 	struct net_device_stats stats;
 	int loop_idx;
 };
@@ -146,7 +146,7 @@ static int loopback_start_xmit(struct sk
 	np->stats.tx_packets++;
 
 	/* Switch to loopback context. */
-	dev = np->loopback_dev;
+	dev = np->_loopback_dev;
 	np  = netdev_priv(dev);
 
 	np->stats.rx_bytes += skb->len;
@@ -215,7 +215,7 @@ static void loopback_construct(struct ne
 {
 	struct net_private *np = netdev_priv(dev);
 
-	np->loopback_dev     = lo;
+	np->_loopback_dev     = lo;
 	np->loop_idx         = loop_idx;
 
 	dev->open            = loopback_open;
diff -upr kernel-2.6.18-417.el5.orig/drivers/xen/xenbus/xenbus_client.c kernel-2.6.18-417.el5-028stab121/drivers/xen/xenbus/xenbus_client.c
--- kernel-2.6.18-417.el5.orig/drivers/xen/xenbus/xenbus_client.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/drivers/xen/xenbus/xenbus_client.c	2017-01-13 08:40:26.000000000 -0500
@@ -289,6 +289,7 @@ int xenbus_free_evtchn(struct xenbus_dev
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 
 
 enum xenbus_state xenbus_read_driver_state(const char *path)
diff -upr kernel-2.6.18-417.el5.orig/fs/aio.c kernel-2.6.18-417.el5-028stab121/fs/aio.c
--- kernel-2.6.18-417.el5.orig/fs/aio.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/aio.c	2017-01-13 08:40:40.000000000 -0500
@@ -15,6 +15,7 @@
 #include <linux/aio_abi.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/uio.h>
 
 #define DEBUG 0
 
@@ -47,13 +48,16 @@
 #endif
 
 /*------ sysctl variables----*/
-static DEFINE_SPINLOCK(aio_nr_lock);
+DEFINE_SPINLOCK(aio_nr_lock);
 unsigned long aio_nr;		/* current system wide number of aio requests */
 unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
+EXPORT_SYMBOL_GPL(aio_nr_lock);
+EXPORT_SYMBOL_GPL(aio_nr);
 /*----end sysctl variables---*/
 
 static kmem_cache_t	*kiocb_cachep;
-static kmem_cache_t	*kioctx_cachep;
+kmem_cache_t	*kioctx_cachep;
+EXPORT_SYMBOL_GPL(kioctx_cachep);
 
 static struct workqueue_struct *aio_wq;
 
@@ -79,7 +83,7 @@ struct aio_batch_entry {
 mempool_t *abe_pool;
  
 
-static void aio_kick_handler(void *);
+void aio_kick_handler(void *);
 static void aio_queue_work(struct kioctx *);
 
 /*
@@ -424,7 +428,7 @@ static void aio_cancel_all(struct kioctx
 	spin_unlock_irq(&ctx->ctx_lock);
 }
 
-static void wait_for_all_aios(struct kioctx *ctx)
+void wait_for_all_aios(struct kioctx *ctx)
 {
 	struct task_struct *tsk = current;
 	DECLARE_WAITQUEUE(wait, tsk);
@@ -447,6 +451,7 @@ static void wait_for_all_aios(struct kio
 out:
 	spin_unlock_irq(&ctx->ctx_lock);
 }
+EXPORT_SYMBOL_GPL(wait_for_all_aios);
 
 /* wait_on_sync_kiocb:
  *	Waits on the given sync kiocb to complete.
@@ -558,6 +563,7 @@ static struct kiocb fastcall *__aio_get_
 	req->ki_retry = NULL;
 	req->ki_dtor = NULL;
 	req->private = NULL;
+	req->ki_iovec = NULL;
 	INIT_LIST_HEAD(&req->ki_run_list);
 
 	/* Check if the completion queue has enough free space to
@@ -602,6 +608,8 @@ static inline void really_put_req(struct
 
 	if (req->ki_dtor)
 		req->ki_dtor(req);
+	if (req->ki_iovec != &req->ki_inline_vec)
+		kfree(req->ki_iovec);
 	kmem_cache_free(kiocb_cachep, req);
 	ctx->reqs_active--;
 
@@ -1004,7 +1012,7 @@ static inline void aio_run_all_iocbs(str
  *      space.
  * Run on aiod's context.
  */
-static void aio_kick_handler(void *data)
+void aio_kick_handler(void *data)
 {
 	struct kioctx *ctx = data;
 	mm_segment_t oldfs = get_fs();
@@ -1023,6 +1031,7 @@ static void aio_kick_handler(void *data)
 	if (requeue)
 		queue_work(aio_wq, &ctx->wq);
 }
+EXPORT_SYMBOL_GPL(aio_kick_handler);
 
 
 /*
@@ -1463,63 +1472,63 @@ asmlinkage long sys_io_destroy(aio_conte
 	return -EINVAL;
 }
 
-/*
- * aio_p{read,write} are the default  ki_retry methods for
- * IO_CMD_P{READ,WRITE}.  They maintains kiocb retry state around potentially
- * multiple calls to f_op->aio_read().  They loop around partial progress
- * instead of returning -EIOCBRETRY because they don't have the means to call
- * kick_iocb().
- */
-static ssize_t aio_pread(struct kiocb *iocb)
+static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret)
 {
-	struct file *file = iocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	ssize_t ret = 0;
-
-	do {
-		ret = file->f_op->aio_read(iocb, iocb->ki_buf,
-			iocb->ki_left, iocb->ki_pos);
-		/*
-		 * Can't just depend on iocb->ki_left to determine
-		 * whether we are done. This may have been a short read.
-		 */
-		if (ret > 0) {
-			iocb->ki_buf += ret;
-			iocb->ki_left -= ret;
-		}
+	struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg];
 
-		/*
-		 * For pipes and sockets we return once we have some data; for
-		 * regular files we retry till we complete the entire read or
-		 * find that we can't read any more data (e.g short reads).
-		 */
-	} while (ret > 0 && iocb->ki_left > 0 &&
-		 !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
+	BUG_ON(ret <= 0);
 
-	/* This means we must have transferred all that we could */
-	/* No need to retry anymore */
-	if ((ret == 0) || (iocb->ki_left == 0))
-		ret = iocb->ki_nbytes - iocb->ki_left;
+	while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) {
+		ssize_t this = min((ssize_t)iov->iov_len, ret);
+		iov->iov_base += this;
+		iov->iov_len -= this;
+		iocb->ki_left -= this;
+		ret -= this;
+		if (iov->iov_len == 0) {
+			iocb->ki_cur_seg++;
+			iov++;
+		}
+	}
 
-	return ret;
+	/* the caller should not have done more io than what fit in
+	 * the remaining iovecs */
+	BUG_ON(ret > 0 && iocb->ki_left == 0);
 }
 
-/* see aio_pread() */
-static ssize_t aio_pwrite(struct kiocb *iocb)
+static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
 {
 	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	ssize_t (*rw_op)(struct kiocb *, const struct iovec *,
+			 unsigned long, loff_t);
 	ssize_t ret = 0;
+	unsigned short opcode;
+
+	if ((iocb->ki_opcode == IOCB_CMD_PREADV) ||
+		(iocb->ki_opcode == IOCB_CMD_PREAD)) {
+		rw_op = file->f_op->aio_read;
+		opcode = IOCB_CMD_PREADV;
+	} else {
+		rw_op = file->f_op->aio_write;
+		opcode = IOCB_CMD_PWRITEV;
+	}
 
 	do {
-		ret = file->f_op->aio_write(iocb, iocb->ki_buf,
-			iocb->ki_left, iocb->ki_pos);
-		if (ret > 0) {
-			iocb->ki_buf += ret;
-			iocb->ki_left -= ret;
-		}
-	} while (ret > 0 && iocb->ki_left > 0);
+		ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
+			    iocb->ki_nr_segs - iocb->ki_cur_seg,
+			    iocb->ki_pos);
+		if (ret > 0)
+			aio_advance_iovec(iocb, ret);
+
+	/* retry all partial writes.  retry partial reads as long as its a
+	 * regular file. */
+	} while (ret > 0 && iocb->ki_left > 0 &&
+		 (opcode == IOCB_CMD_PWRITEV ||
+		  (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));
 
+	/* This means we must have transferred all that we could */
+	/* No need to retry anymore */
 	if ((ret == 0) || (iocb->ki_left == 0))
 		ret = iocb->ki_nbytes - iocb->ki_left;
 
@@ -1546,6 +1555,38 @@ static ssize_t aio_fsync(struct kiocb *i
 	return ret;
 }
 
+static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb)
+{
+	ssize_t ret;
+
+	ret = rw_copy_check_uvector(type, (struct iovec __user *)kiocb->ki_buf,
+				    kiocb->ki_nbytes, 1,
+				    &kiocb->ki_inline_vec, &kiocb->ki_iovec);
+	if (ret < 0)
+		goto out;
+
+	kiocb->ki_nr_segs = kiocb->ki_nbytes;
+	kiocb->ki_cur_seg = 0;
+	/* ki_nbytes/left now reflect bytes instead of segs */
+	kiocb->ki_nbytes = ret;
+	kiocb->ki_left = ret;
+
+	ret = 0;
+out:
+	return ret;
+}
+
+static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
+{
+	kiocb->ki_iovec = &kiocb->ki_inline_vec;
+	kiocb->ki_iovec->iov_base = kiocb->ki_buf;
+	kiocb->ki_iovec->iov_len = kiocb->ki_left;
+	kiocb->ki_nr_segs = 1;
+	kiocb->ki_cur_seg = 0;
+	kiocb->ki_nbytes = kiocb->ki_left;
+	return 0;
+}
+
 /*
  * aio_setup_iocb:
  *	Performs the initial checks and aio retry method
@@ -1568,9 +1609,12 @@ static ssize_t aio_setup_iocb(struct kio
 		ret = security_file_permission(file, MAY_READ);
 		if (unlikely(ret))
 			break;
+		ret = aio_setup_single_vector(kiocb);
+		if (ret)
+			break;
 		ret = -EINVAL;
 		if (file->f_op->aio_read)
-			kiocb->ki_retry = aio_pread;
+			kiocb->ki_retry = aio_rw_vect_retry;
 		break;
 	case IOCB_CMD_PWRITE:
 		ret = -EBADF;
@@ -1583,9 +1627,40 @@ static ssize_t aio_setup_iocb(struct kio
 		ret = security_file_permission(file, MAY_WRITE);
 		if (unlikely(ret))
 			break;
+		ret = aio_setup_single_vector(kiocb);
+		if (ret)
+			break;
+		ret = -EINVAL;
+		if (file->f_op->aio_write)
+			kiocb->ki_retry = aio_rw_vect_retry;
+		break;
+	case IOCB_CMD_PREADV:
+		ret = -EBADF;
+		if (unlikely(!(file->f_mode & FMODE_READ)))
+			break;
+		ret = security_file_permission(file, MAY_READ);
+		if (unlikely(ret))
+			break;
+		ret = aio_setup_vectored_rw(READ, kiocb);
+		if (ret)
+			break;
+		ret = -EINVAL;
+		if (file->f_op->aio_read)
+			kiocb->ki_retry = aio_rw_vect_retry;
+		break;
+	case IOCB_CMD_PWRITEV:
+		ret = -EBADF;
+		if (unlikely(!(file->f_mode & FMODE_WRITE)))
+			break;
+		ret = security_file_permission(file, MAY_WRITE);
+		if (unlikely(ret))
+			break;
+		ret = aio_setup_vectored_rw(WRITE, kiocb);
+		if (ret)
+			break;
 		ret = -EINVAL;
 		if (file->f_op->aio_write)
-			kiocb->ki_retry = aio_pwrite;
+			kiocb->ki_retry = aio_rw_vect_retry;
 		break;
 	case IOCB_CMD_FDSYNC:
 		ret = -EINVAL;
diff -upr kernel-2.6.18-417.el5.orig/fs/autofs/autofs_i.h kernel-2.6.18-417.el5-028stab121/fs/autofs/autofs_i.h
--- kernel-2.6.18-417.el5.orig/fs/autofs/autofs_i.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/autofs/autofs_i.h	2017-01-13 08:40:19.000000000 -0500
@@ -124,7 +124,7 @@ static inline struct autofs_sb_info *aut
    filesystem without "magic".) */
 
 static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
+	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
 }
 
 /* Hash operations */
diff -upr kernel-2.6.18-417.el5.orig/fs/autofs/init.c kernel-2.6.18-417.el5-028stab121/fs/autofs/init.c
--- kernel-2.6.18-417.el5.orig/fs/autofs/init.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/autofs/init.c	2017-01-13 08:40:19.000000000 -0500
@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
 	.name		= "autofs",
 	.get_sb		= autofs_get_sb,
 	.kill_sb	= autofs_kill_sb,
+	.fs_flags	= FS_VIRTUALIZED,
 };
 
 static int __init init_autofs_fs(void)
diff -upr kernel-2.6.18-417.el5.orig/fs/autofs/inode.c kernel-2.6.18-417.el5-028stab121/fs/autofs/inode.c
--- kernel-2.6.18-417.el5.orig/fs/autofs/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/autofs/inode.c	2017-01-13 08:40:19.000000000 -0500
@@ -76,7 +76,7 @@ static int parse_options(char *options, 
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = virt_pgid(current);
 
 	*minproto = *maxproto = AUTOFS_PROTO_VERSION;
 
@@ -149,7 +149,7 @@ int autofs_fill_super(struct super_block
 	sbi->pipe = NULL;
 	sbi->catatonic = 1;
 	sbi->exp_timeout = 0;
-	sbi->oz_pgrp = process_group(current);
+	sbi->oz_pgrp = virt_pgid(current);
 	autofs_initialize_hash(&sbi->dirhash);
 	sbi->queues = NULL;
 	memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
diff -upr kernel-2.6.18-417.el5.orig/fs/autofs/root.c kernel-2.6.18-417.el5-028stab121/fs/autofs/root.c
--- kernel-2.6.18-417.el5.orig/fs/autofs/root.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/autofs/root.c	2017-01-13 08:40:19.000000000 -0500
@@ -354,7 +354,7 @@ static int autofs_root_unlink(struct ino
 
 	/* This allows root to remove symlinks */
 	lock_kernel();
-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) {
+	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) ) {
 		unlock_kernel();
 		return -EACCES;
 	}
@@ -541,7 +541,7 @@ static int autofs_root_ioctl(struct inod
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
 		return -ENOTTY;
 	
-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EPERM;
 	
 	switch(cmd) {
diff -upr kernel-2.6.18-417.el5.orig/fs/autofs4/autofs_i.h kernel-2.6.18-417.el5-028stab121/fs/autofs4/autofs_i.h
--- kernel-2.6.18-417.el5.orig/fs/autofs4/autofs_i.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/autofs4/autofs_i.h	2017-01-13 08:40:27.000000000 -0500
@@ -125,7 +125,9 @@ struct autofs_sb_info {
 	int pipefd;
 	struct file *pipe;
 	pid_t oz_pgrp;
+	pid_t pipe_pid;
 	int catatonic;
+	int is32bit;
 	int version;
 	int sub_version;
 	int min_proto;
@@ -143,6 +145,21 @@ struct autofs_sb_info {
 	struct list_head expiring_list;
 };
 
+struct autofs_mount_data {
+	__u32	i_uid;
+	__u32	i_gid;
+	__u32	oz_pgrp;
+	__u32	type;
+	__u32	min_proto;
+	__u32	max_proto;
+	__u32	exp_timeout;
+	__u32	pipefd;
+	__u32	pipe_pid;
+	__u32	is32bit;
+	/* see comment in check_autofs */
+	__u64	pipe_fd_id;
+};
+
 static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
 {
 	return (struct autofs_sb_info *)(sb->s_fs_info);
@@ -158,7 +175,7 @@ static inline struct autofs_info *autofs
    filesystem without "magic".) */
 
 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
+	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
 }
 
 /* Does a dentry have some pending activity? */
diff -upr kernel-2.6.18-417.el5.orig/fs/autofs4/dev-ioctl.c kernel-2.6.18-417.el5-028stab121/fs/autofs4/dev-ioctl.c
--- kernel-2.6.18-417.el5.orig/fs/autofs4/dev-ioctl.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/autofs4/dev-ioctl.c	2017-01-13 08:40:27.000000000 -0500
@@ -431,6 +431,7 @@ static int autofs_dev_ioctl_setpipefd(st
 		sbi->oz_pgrp = process_group(current);
 		sbi->pipefd = pipefd;
 		sbi->pipe = pipe;
+		sbi->pipe_pid = virt_pid(current);
 		sbi->catatonic = 0;
 	}
 out:
diff -upr kernel-2.6.18-417.el5.orig/fs/autofs4/init.c kernel-2.6.18-417.el5-028stab121/fs/autofs4/init.c
--- kernel-2.6.18-417.el5.orig/fs/autofs4/init.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/autofs4/init.c	2017-01-13 08:40:27.000000000 -0500
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/ve_proto.h>
 #include "autofs_i.h"
 
 static int autofs_get_sb(struct file_system_type *fs_type,
@@ -25,6 +26,26 @@ static struct file_system_type autofs_fs
 	.name		= "autofs",
 	.get_sb		= autofs_get_sb,
 	.kill_sb	= autofs4_kill_sb,
+	.fs_flags	= FS_VIRTUALIZED,
+};
+
+static int ve_autofs_start(void *data)
+{
+	return 0;
+}
+
+static void ve_autofs_stop(void *data)
+{
+	struct ve_struct *ve = data;
+
+	umount_ve_fs_type(&autofs_fs_type, ve->veid);
+}
+
+static struct ve_hook autofs4_hook = {
+	.init	  = ve_autofs_start,
+	.fini	  = ve_autofs_stop,
+	.owner	  = THIS_MODULE,
+	.priority = HOOK_PRIO_FS,
 };
 
 static int __init init_autofs4_fs(void)
@@ -36,12 +57,14 @@ static int __init init_autofs4_fs(void)
 		return err;
 
 	autofs_dev_ioctl_init();
+	ve_hook_register(VE_INIT_EXIT_CHAIN, &autofs4_hook);
 
 	return err;
 }
 
 static void __exit exit_autofs4_fs(void)
 {
+	ve_hook_unregister(&autofs4_hook);
 	autofs_dev_ioctl_exit();
 	unregister_filesystem(&autofs_fs_type);
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/autofs4/inode.c kernel-2.6.18-417.el5-028stab121/fs/autofs4/inode.c
--- kernel-2.6.18-417.el5.orig/fs/autofs4/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/autofs4/inode.c	2017-01-13 08:40:27.000000000 -0500
@@ -222,7 +222,7 @@ static int parse_options(char *options, 
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = virt_pgid(current);
 
 	*minproto = AUTOFS_MIN_PROTO_VERSION;
 	*maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -295,6 +295,18 @@ static struct autofs_info *autofs4_mkroo
 	return ino;
 }
 
+static int autofs_open_pipe(struct autofs_sb_info *sbi)
+{
+	struct file *f;
+
+	f = get_task_file(sbi->pipe_pid, sbi->pipefd);
+	if (IS_ERR(f))
+		return PTR_ERR(f);
+
+	sbi->pipe = f;
+	return 0;
+}
+
 int autofs4_fill_super(struct super_block *s, void *data, int silent)
 {
 	struct inode * root_inode;
@@ -317,13 +329,17 @@ int autofs4_fill_super(struct super_bloc
 	sbi->pipe = NULL;
 	sbi->catatonic = 1;
 	sbi->exp_timeout = 0;
-	sbi->oz_pgrp = process_group(current);
+	sbi->oz_pgrp = virt_pgid(current);
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
 	set_autofs_type_indirect(&sbi->type);
 	sbi->min_proto = 0;
 	sbi->max_proto = 0;
+#if defined CONFIG_X86_64 && defined CONFIG_IA32_EMULATION
+	if (test_thread_flag(TIF_IA32))
+		sbi->is32bit = 1;
+#endif
 	mutex_init(&sbi->wq_mutex);
 	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
@@ -354,13 +370,54 @@ int autofs4_fill_super(struct super_bloc
 	root->d_op = &autofs4_dentry_operations;
 	root->d_fsdata = ino;
 
-	/* Can this call block? */
-	if (parse_options(data, &pipefd,
-			  &root_inode->i_uid, &root_inode->i_gid,
-			  &sbi->oz_pgrp, &sbi->type,
-			  &sbi->min_proto, &sbi->max_proto)) {
-		printk("autofs: called with bogus options\n");
-		goto fail_dput;
+	if (s->s_flags & MS_CPTMOUNT) {
+		struct autofs_mount_data *kd;
+		int err;
+
+		kd = (struct autofs_mount_data *)data;
+
+		root_inode->i_uid = kd->i_uid;
+		root_inode->i_gid = kd->i_gid;
+		sbi->oz_pgrp = kd->oz_pgrp;
+		sbi->type = kd->type;
+		sbi->min_proto = kd->min_proto;
+		sbi->max_proto = kd->max_proto;
+		sbi->exp_timeout = kd->exp_timeout;
+		sbi->pipefd = kd->pipefd;
+#if defined CONFIG_X86_64 && defined CONFIG_IA32_EMULATION
+		sbi->is32bit = kd->is32bit;
+#endif
+		sbi->pipe_pid = kd->pipe_pid;
+
+		err = autofs_open_pipe(sbi);
+		if (err < 0) {
+			printk("autofs: can't open file %d of %d - %d\n",
+					sbi->pipefd, sbi->pipe_pid, err);
+			dump_stack();
+			goto fail_dput;
+		}
+	} else {
+		if (parse_options(data, &pipefd,
+					&root_inode->i_uid, &root_inode->i_gid,
+					&sbi->oz_pgrp, &sbi->type,
+					&sbi->min_proto, &sbi->max_proto)) {
+			printk("autofs: called with bogus options\n");
+			goto fail_dput;
+		}
+
+		DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp);
+		pipe = fget(pipefd);
+
+		if ( !pipe ) {
+			printk("autofs: could not open pipe file descriptor\n");
+			goto fail_dput;
+		}
+		if ( !pipe->f_op || !pipe->f_op->write )
+			goto fail_fput;
+
+		sbi->pipe = pipe;
+		sbi->pipefd = pipefd;
+		sbi->pipe_pid = virt_pid(current);
 	}
 
 	if (autofs_type_trigger(sbi->type))
@@ -376,7 +433,7 @@ int autofs4_fill_super(struct super_bloc
 		       "daemon (%d, %d) kernel (%d, %d)\n",
 			sbi->min_proto, sbi->max_proto,
 			AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION);
-		goto fail_dput;
+		goto fail_fput;
 	}
 
 	/* Establish highest kernel protocol version */
@@ -386,17 +443,6 @@ int autofs4_fill_super(struct super_bloc
 		sbi->version = sbi->max_proto;
 	sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
 
-	DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp);
-	pipe = fget(pipefd);
-	
-	if ( !pipe ) {
-		printk("autofs: could not open pipe file descriptor\n");
-		goto fail_dput;
-	}
-	if ( !pipe->f_op || !pipe->f_op->write )
-		goto fail_fput;
-	sbi->pipe = pipe;
-	sbi->pipefd = pipefd;
 	sbi->catatonic = 0;
 
 	/*
diff -upr kernel-2.6.18-417.el5.orig/fs/autofs4/root.c kernel-2.6.18-417.el5-028stab121/fs/autofs4/root.c
--- kernel-2.6.18-417.el5.orig/fs/autofs4/root.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/autofs4/root.c	2017-01-13 08:40:19.000000000 -0500
@@ -556,7 +556,7 @@ static int autofs4_dir_unlink(struct ino
 	struct autofs_info *p_ino;
 	
 	/* This allows root to remove symlinks */
-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EACCES;
 
 	if (atomic_dec_and_test(&ino->count)) {
@@ -794,7 +794,7 @@ static int autofs4_root_ioctl(struct ino
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
 		return -ENOTTY;
 	
-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EPERM;
 	
 	switch(cmd) {
diff -upr kernel-2.6.18-417.el5.orig/fs/autofs4/waitq.c kernel-2.6.18-417.el5-028stab121/fs/autofs4/waitq.c
--- kernel-2.6.18-417.el5.orig/fs/autofs4/waitq.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/autofs4/waitq.c	2017-01-13 08:40:15.000000000 -0500
@@ -144,6 +144,16 @@ static void autofs4_notify_daemon(struct
 		struct autofs_v5_packet *packet = &pkt.v5_packet;
 
 		pktsz = sizeof(*packet);
+#if defined CONFIG_X86_64 && defined CONFIG_IA32_EMULATION
+		/*
+		 * On x86_64 autofs_v5_packet struct padded with 4 bytes
+		 * it broke autofs daemon worked in ia32 emulation mode
+		 *
+		 * reduce size if work in 32-bit mode to satisfy userspace hope
+		 */
+		if (sbi->is32bit)
+			pktsz -= 4;
+#endif
 
 		packet->wait_queue_token = wq->wait_queue_token;
 		packet->len = wq->name.len;
diff -upr kernel-2.6.18-417.el5.orig/fs/bad_inode.c kernel-2.6.18-417.el5-028stab121/fs/bad_inode.c
--- kernel-2.6.18-417.el5.orig/fs/bad_inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/bad_inode.c	2017-01-13 08:40:40.000000000 -0500
@@ -34,14 +34,14 @@ static ssize_t bad_file_write(struct fil
         return -EIO;
 }
 
-static ssize_t bad_file_aio_read(struct kiocb *iocb, char __user *buf,
-			size_t count, loff_t pos)
+static ssize_t bad_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
 {
 	return -EIO;
 }
 
-static ssize_t bad_file_aio_write(struct kiocb *iocb, const char __user *buf,
-			size_t count, loff_t pos)
+static ssize_t bad_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
 {
 	return -EIO;
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/binfmt_elf.c kernel-2.6.18-417.el5-028stab121/fs/binfmt_elf.c
--- kernel-2.6.18-417.el5.orig/fs/binfmt_elf.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/binfmt_elf.c	2017-01-13 08:40:27.000000000 -0500
@@ -76,7 +76,7 @@ static int elf_core_dump(long signr, str
 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
 
-static struct linux_binfmt elf_format = {
+struct linux_binfmt elf_format = {
 		.module		= THIS_MODULE,
 		.load_binary	= load_elf_binary,
 		.load_shlib	= load_elf_library,
@@ -157,8 +157,10 @@ create_elf_tables(struct linux_binprm *b
 	elf_addr_t __user *sp;
 	elf_addr_t __user *u_platform;
 	elf_addr_t __user *u_base_platform;
+	elf_addr_t __user *u_rand_bytes;
 	const char *k_platform = ELF_PLATFORM;
         const char *k_base_platform = ELF_BASE_PLATFORM;
+	unsigned char k_rand_bytes[16];
 	int items;
 	elf_addr_t *elf_info;
 	int ei_index = 0;
@@ -201,6 +203,15 @@ create_elf_tables(struct linux_binprm *b
                      return -EFAULT;
         }
 
+	/*
+	 * Generate 16 random bytes for userspace PRNG seeding.
+	 */
+	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
+	u_rand_bytes = (elf_addr_t __user *)
+		       STACK_ALLOC(p, sizeof(k_rand_bytes));
+	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
+		return -EFAULT;
+
 	/* Create the ELF interpreter info */
 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
 #define NEW_AUX_ENT(id, val) \
@@ -230,6 +241,7 @@ create_elf_tables(struct linux_binprm *b
 	NEW_AUX_ENT(AT_GID, tsk->gid);
 	NEW_AUX_ENT(AT_EGID, tsk->egid);
  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
+	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 	if (k_platform) {
 		NEW_AUX_ENT(AT_PLATFORM,
 			    (elf_addr_t)(unsigned long)u_platform);
@@ -442,7 +454,7 @@ static unsigned long load_elf_interp(str
 	eppnt = elf_phdata;
 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 		if (eppnt->p_type == PT_LOAD) {
-			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
+			int elf_type = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECPRIO;
 			int elf_prot = 0;
 			unsigned long vaddr = 0;
 			unsigned long k, map_addr;
@@ -930,7 +942,8 @@ static int load_elf_binary(struct linux_
 		if (elf_ppnt->p_flags & PF_X)
 			elf_prot |= PROT_EXEC;
 
-		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
+		elf_flags = MAP_PRIVATE | MAP_DENYWRITE |
+				MAP_EXECUTABLE | MAP_EXECPRIO;
 
 		vaddr = elf_ppnt->p_vaddr;
 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set)
@@ -946,6 +959,8 @@ static int load_elf_binary(struct linux_
 				elf_prot, elf_flags, 0);
 		if (BAD_ADDR(error)) {
 			send_sig(SIGKILL, current, 0);
+			retval = IS_ERR((void *)error) ?
+				PTR_ERR((void*)error) : -EINVAL;
 			goto out_free_dentry;
 		}
 
@@ -975,6 +990,7 @@ static int load_elf_binary(struct linux_
 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
 			/* set_brk can never work. Avoid overflows. */
 			send_sig(SIGKILL, current, 0);
+			retval = -EINVAL;
 			goto out_free_dentry;
 		}
 
@@ -1051,7 +1067,7 @@ static int load_elf_binary(struct linux_
 	set_binfmt(&elf_format);
 
 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
-	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
+	retval = arch_setup_additional_pages(bprm, !!elf_interpreter, 0);
 	if (retval < 0) {
 		send_sig(SIGKILL, current, 0);
 		goto out_free_fh;
@@ -1079,8 +1095,10 @@ static int load_elf_binary(struct linux_
 	current->mm->start_stack = bprm->p;
 
 #ifdef __HAVE_ARCH_RANDOMIZE_BRK
-	if (current->flags & PF_RANDOMIZE)
+	if (current->flags & PF_RANDOMIZE) {
 		randomize_brk(elf_brk);
+		current->mm->start_brk = current->mm->brk;
+	}
 #endif
 	if (current->personality & MMAP_PAGE_ZERO) {
 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
@@ -1450,10 +1468,10 @@ static void fill_prstatus(struct elf_prs
 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
 	prstatus->pr_sigpend = p->pending.signal.sig[0];
 	prstatus->pr_sighold = p->blocked.sig[0];
-	prstatus->pr_pid = p->pid;
-	prstatus->pr_ppid = p->parent->pid;
-	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = p->signal->session;
+	prstatus->pr_pid = virt_pid(p);
+	prstatus->pr_ppid = virt_pid(p->parent);
+	prstatus->pr_pgrp = virt_pgid(p);
+	prstatus->pr_sid = virt_sid(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1496,10 +1514,10 @@ static int fill_psinfo(struct elf_prpsin
 			psinfo->pr_psargs[i] = ' ';
 	psinfo->pr_psargs[len] = 0;
 
-	psinfo->pr_pid = p->pid;
-	psinfo->pr_ppid = p->parent->pid;
-	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = p->signal->session;
+	psinfo->pr_pid = virt_pid(p);
+	psinfo->pr_ppid = virt_pid(p->parent);
+	psinfo->pr_pgrp = virt_pgid(p);
+	psinfo->pr_sid = virt_sid(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
@@ -1637,7 +1655,7 @@ static int elf_core_dump(long signr, str
 	if (signr) {
 		struct elf_thread_status *tmp;
 		read_lock(&tasklist_lock);
-		do_each_thread(g,p)
+		do_each_thread_ve(g,p)
 			if (current->mm == p->mm && current != p) {
 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
 				if (!tmp) {
@@ -1648,7 +1666,7 @@ static int elf_core_dump(long signr, str
 				tmp->thread = p;
 				list_add(&tmp->list, &thread_list);
 			}
-		while_each_thread(g,p);
+		while_each_thread_ve(g,p);
 		read_unlock(&tasklist_lock);
 		list_for_each(t, &thread_list) {
 			struct elf_thread_status *tmp;
diff -upr kernel-2.6.18-417.el5.orig/fs/binfmt_misc.c kernel-2.6.18-417.el5-028stab121/fs/binfmt_misc.c
--- kernel-2.6.18-417.el5.orig/fs/binfmt_misc.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/binfmt_misc.c	2017-01-13 08:40:20.000000000 -0500
@@ -27,6 +27,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/syscalls.h>
+#include <linux/ve_proto.h>
 
 #include <asm/uaccess.h>
 
@@ -34,8 +35,15 @@ enum {
 	VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
 };
 
+#ifdef CONFIG_VE
+#define bm_entries(ve)		((ve)->bm_entries)
+#define bm_enabled(ve)		((ve)->bm_enabled)
+#else
 static LIST_HEAD(entries);
 static int enabled = 1;
+#define bm_entries(ve)		(entries)
+#define bm_enabled(ve)		(enabled)
+#endif
 
 enum {Enabled, Magic};
 #define MISC_FMT_PRESERVE_ARGV0 (1<<31)
@@ -58,21 +66,30 @@ typedef struct {
 } Node;
 
 static DEFINE_RWLOCK(entries_lock);
+#ifdef CONFIG_VE
+#define bm_fs_type(ve)		(*(ve)->bm_fs_type)
+#define bm_mnt(ve)		((ve)->bm_mnt)
+#define bm_entry_count(ve)	((ve)->bm_entry_count)
+#else
 static struct file_system_type bm_fs_type;
 static struct vfsmount *bm_mnt;
 static int entry_count;
+#define bm_fs_type(ve)		(bm_fs_type)
+#define bm_mnt(ve)		(bm_mnt)
+#define bm_entry_count(ve)	(bm_entry_count)
+#endif
 
 /* 
  * Check if we support the binfmt
  * if we do, return the node, else NULL
  * locking is done in load_misc_binary
  */
-static Node *check_file(struct linux_binprm *bprm)
+static Node *check_file(struct ve_struct *ve, struct linux_binprm *bprm)
 {
 	char *p = strrchr(bprm->interp, '.');
 	struct list_head *l;
 
-	list_for_each(l, &entries) {
+	list_for_each(l, &bm_entries(ve)) {
 		Node *e = list_entry(l, Node, list);
 		char *s;
 		int j;
@@ -114,9 +131,10 @@ static int load_misc_binary(struct linux
 	int retval;
 	int fd_binary = -1;
 	struct files_struct *files = NULL;
+	struct ve_struct *ve = get_exec_env();
 
 	retval = -ENOEXEC;
-	if (!enabled)
+	if (!bm_enabled(ve))
 		goto _ret;
 
 	retval = -ENOEXEC;
@@ -125,7 +143,7 @@ static int load_misc_binary(struct linux
 
 	/* to keep locking time low, we copy the interpreter string */
 	read_lock(&entries_lock);
-	fmt = check_file(bprm);
+	fmt = check_file(ve, bprm);
 	if (fmt)
 		strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE);
 	read_unlock(&entries_lock);
@@ -528,7 +546,7 @@ static void bm_clear_inode(struct inode 
 	kfree(inode->i_private);
 }
 
-static void kill_node(Node *e)
+static void kill_node(struct ve_struct *ve, Node *e)
 {
 	struct dentry *dentry;
 
@@ -544,7 +562,7 @@ static void kill_node(Node *e)
 		dentry->d_inode->i_nlink--;
 		d_drop(dentry);
 		dput(dentry);
-		simple_release_fs(&bm_mnt, &entry_count);
+		simple_release_fs(&bm_mnt(ve), &bm_entry_count(ve));
 	}
 }
 
@@ -598,7 +616,7 @@ static ssize_t bm_entry_write(struct fil
 		case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root);
 			mutex_lock(&root->d_inode->i_mutex);
 
-			kill_node(e);
+			kill_node(get_exec_env(), e);
 
 			mutex_unlock(&root->d_inode->i_mutex);
 			dput(root);
@@ -623,6 +641,7 @@ static ssize_t bm_register_write(struct 
 	struct dentry *root, *dentry;
 	struct super_block *sb = file->f_vfsmnt->mnt_sb;
 	int err = 0;
+	struct ve_struct *ve = get_exec_env();
 
 	e = create_entry(buffer, count);
 
@@ -646,7 +665,7 @@ static ssize_t bm_register_write(struct 
 	if (!inode)
 		goto out2;
 
-	err = simple_pin_fs(&bm_fs_type, &bm_mnt, &entry_count);
+	err = simple_pin_fs(&bm_fs_type(ve), &bm_mnt(ve), &bm_entry_count(ve));
 	if (err) {
 		iput(inode);
 		inode = NULL;
@@ -659,7 +678,7 @@ static ssize_t bm_register_write(struct 
 
 	d_instantiate(dentry, inode);
 	write_lock(&entries_lock);
-	list_add(&e->list, &entries);
+	list_add(&e->list, &bm_entries(ve));
 	write_unlock(&entries_lock);
 
 	err = 0;
@@ -685,7 +704,8 @@ static const struct file_operations bm_r
 static ssize_t
 bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 {
-	char *s = enabled ? "enabled" : "disabled";
+	struct ve_struct *ve = get_exec_env();
+	char *s = bm_enabled(ve) ? "enabled" : "disabled";
 	int len = strlen(s);
 	loff_t pos = *ppos;
 
@@ -701,21 +721,25 @@ bm_status_read(struct file *file, char _
 	return nbytes;
 }
 
+static void dm_genocide(struct ve_struct *ve)
+{
+	while (!list_empty(&bm_entries(ve)))
+		kill_node(ve, list_entry(bm_entries(ve).next, Node, list));
+}
+
 static ssize_t bm_status_write(struct file * file, const char __user * buffer,
 		size_t count, loff_t *ppos)
 {
+	struct ve_struct *ve = get_exec_env();
 	int res = parse_command(buffer, count);
 	struct dentry *root;
 
 	switch (res) {
-		case 1: enabled = 0; break;
-		case 2: enabled = 1; break;
+		case 1: bm_enabled(ve) = 0; break;
+		case 2: bm_enabled(ve) = 1; break;
 		case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root);
 			mutex_lock(&root->d_inode->i_mutex);
-
-			while (!list_empty(&entries))
-				kill_node(list_entry(entries.next, Node, list));
-
+			dm_genocide(ve);
 			mutex_unlock(&root->d_inode->i_mutex);
 			dput(root);
 		default: return res;
@@ -766,6 +790,52 @@ static struct file_system_type bm_fs_typ
 	.kill_sb	= kill_litter_super,
 };
 
+#ifdef CONFIG_VE
+static void __ve_binfmt_init(struct ve_struct *ve, struct file_system_type *fs)
+{
+	ve->bm_fs_type = fs;
+	INIT_LIST_HEAD(&ve->bm_entries);
+	ve->bm_enabled = 1;
+	ve->bm_mnt = NULL;
+	ve->bm_entry_count = 0;
+}
+
+static int ve_binfmt_init(void *x)
+{
+	struct ve_struct *ve = x;
+	struct file_system_type *fs_type;
+	int err;
+
+	err = register_ve_fs_type(ve, &bm_fs_type, &fs_type, NULL);
+	if (err == 0)
+		__ve_binfmt_init(ve, fs_type);
+
+	return err;
+}
+
+static void ve_binfmt_fini(void *x)
+{
+	struct ve_struct *ve = x;
+
+	/*
+	 * no locks since exec_ve is dead and noone will
+	 * mess with bm_xxx fields any longer
+	 */
+	if (!ve->bm_fs_type)
+		return;
+	dm_genocide(ve);
+	unregister_ve_fs_type(ve->bm_fs_type, NULL);
+	/* bm_fs_type is freed in real_put_ve -> free_ve_filesystems */
+}
+
+static struct ve_hook ve_binfmt_hook = {
+	.init		= ve_binfmt_init,
+	.fini		= ve_binfmt_fini,
+	.priority	= HOOK_PRIO_FS,
+	.owner		= THIS_MODULE,
+};
+#endif
+
 static int __init init_misc_binfmt(void)
 {
 	int err = register_filesystem(&bm_fs_type);
@@ -774,11 +844,17 @@ static int __init init_misc_binfmt(void)
 		if (err)
 			unregister_filesystem(&bm_fs_type);
 	}
+
+	if (!err) {
+		__ve_binfmt_init(get_ve0(), &bm_fs_type);
+		ve_hook_register(VE_SS_CHAIN, &ve_binfmt_hook);
+	}
 	return err;
 }
 
 static void __exit exit_misc_binfmt(void)
 {
+	ve_hook_unregister(&ve_binfmt_hook);
 	unregister_binfmt(&misc_format);
 	unregister_filesystem(&bm_fs_type);
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/block_dev.c kernel-2.6.18-417.el5-028stab121/fs/block_dev.c
--- kernel-2.6.18-417.el5.orig/fs/block_dev.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/block_dev.c	2017-01-13 08:40:40.000000000 -0500
@@ -21,6 +21,7 @@
 #include <linux/mount.h>
 #include <linux/uio.h>
 #include <linux/namei.h>
+#include <linux/ve_proto.h>
 #include <asm/uaccess.h>
 
 struct bdev_inode {
@@ -990,9 +991,21 @@ static int do_open(struct block_device *
 {
 	struct module *owner = NULL;
 	struct gendisk *disk;
-	int ret = -ENXIO;
+	int ret;
 	int part;
 
+	/*
+	 * We don't check permissions if we already have checked them 
+	 * and come here from __blkdev_get which is called below
+	 */
+	if (!for_part) {
+		ret = get_device_perms_ve(S_IFBLK, bdev->bd_dev,
+				  	file->f_mode & (FMODE_READ | FMODE_WRITE));
+		if (ret)
+	        	return ret;
+	}
+
+	ret = -ENXIO;
 	file->f_mapping = bdev->bd_inode->i_mapping;
 	lock_kernel();
 	disk = get_gendisk(bdev->bd_dev, &part);
@@ -1225,15 +1238,15 @@ static ssize_t blkdev_file_write(struct 
 	return ret;
 }
 
-static ssize_t blkdev_file_aio_read(struct kiocb *iocb, char __user *buf,
-				    size_t count, loff_t pos)
+static ssize_t blkdev_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+				    unsigned long nr_segs, loff_t pos)
 {
 	ssize_t ret;
 	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
 
 	percpu_down_read(&bdev->bd_block_size_semaphore);
 
-	ret = generic_file_aio_read(iocb, buf, count, pos);
+	ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
 
 	percpu_up_read(&bdev->bd_block_size_semaphore);
 
@@ -1241,16 +1254,15 @@ static ssize_t blkdev_file_aio_read(stru
 }
 
 
-static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
-				   size_t count, loff_t pos)
+static ssize_t blkdev_file_aio_write(struct kiocb *iocb,
+		const struct iovec *iov, unsigned long nr_segs, loff_t pos)
 {
 	ssize_t ret;
-	struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };
 	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
 
 	percpu_down_read(&bdev->bd_block_size_semaphore);
 
-	ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+	ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos);
 
 	percpu_up_read(&bdev->bd_block_size_semaphore);
 
@@ -1422,7 +1434,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
  * namespace if possible and return it.  Return ERR_PTR(error)
  * otherwise.
  */
-struct block_device *lookup_bdev(const char *path)
+struct block_device *lookup_bdev(const char *path, int mode)
 {
 	struct block_device *bdev;
 	struct inode *inode;
@@ -1440,6 +1452,11 @@ struct block_device *lookup_bdev(const c
 	error = -ENOTBLK;
 	if (!S_ISBLK(inode->i_mode))
 		goto fail;
+
+	error = get_device_perms_ve(S_IFBLK, inode->i_rdev, mode);
+	if (error)
+		goto fail;
+
 	error = -EACCES;
 	if (nd.mnt->mnt_flags & MNT_NODEV)
 		goto fail;
@@ -1471,12 +1488,13 @@ struct block_device *open_bdev_excl(cons
 	mode_t mode = FMODE_READ;
 	int error = 0;
 
-	bdev = lookup_bdev(path);
+	if (!(flags & MS_RDONLY))
+		mode |= FMODE_WRITE;
+
+	bdev = lookup_bdev(path, mode);
 	if (IS_ERR(bdev))
 		return bdev;
 
-	if (!(flags & MS_RDONLY))
-		mode |= FMODE_WRITE;
 	error = blkdev_get(bdev, mode, 0);
 	if (error)
 		return ERR_PTR(error);
diff -upr kernel-2.6.18-417.el5.orig/fs/buffer.c kernel-2.6.18-417.el5-028stab121/fs/buffer.c
--- kernel-2.6.18-417.el5.orig/fs/buffer.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/buffer.c	2017-01-13 08:40:40.000000000 -0500
@@ -43,6 +43,9 @@
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
 
+#include <ub/beancounter.h>
+#include <ub/io_acct.h>
+
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
@@ -379,9 +382,43 @@ static void do_sync(unsigned long wait)
 		laptop_sync_completion();
 }
 
+static void do_sync_ub(int wait, struct user_beancounter *ub)
+{
+	unsigned long dirty_pages;
+
+	dirty_pages = ub_dirty_pages(ub);
+	if (dirty_pages)
+		wakeup_pdflush(dirty_pages);
+
+	sync_inodes_ub(0, ub);
+	sync_inodes_ub(wait, ub);
+}
+
 asmlinkage long sys_sync(void)
 {
-	do_sync(1);
+	struct user_beancounter *ub;
+	struct ve_struct *ve;
+
+	ub = get_exec_ub();
+	ub_percpu_inc(ub, sync);
+
+	ve = get_exec_env();
+	if (ve_is_super(ve)) {
+		do_sync(1);
+	} else if (current == ve->init_entry) {
+	/* init can't sync during VE stop. Rationale:
+	 *  - NFS with -o hard will block forever as network is down
+	 *  - no useful job is performed as VE0 will call umount/sync
+	 *    by his own later
+	 *  Den
+	 */
+	} else if (sysctl_fsync_enable == 1) {
+		do_sync(1);
+	} else if (sysctl_fsync_enable == 2) {
+		do_sync_ub(1, get_io_ub());
+	}
+
+	ub_percpu_inc(ub, sync_done);
 	return 0;
 }
 
@@ -424,6 +461,7 @@ long do_fsync(struct file *file, int dat
 	int ret;
 	int err;
 	struct address_space *mapping = file->f_mapping;
+	struct user_beancounter *ub;
 
 	if (!file->f_op || !file->f_op->fsync) {
 		/* Why?  We can still call filemap_fdatawrite */
@@ -431,6 +469,12 @@ long do_fsync(struct file *file, int dat
 		goto out;
 	}
 
+	ub = get_exec_ub();
+	if (datasync)
+		ub_percpu_inc(ub, fdsync);
+	else
+		ub_percpu_inc(ub, fsync);
+
 	ret = filemap_fdatawrite(mapping);
 
 	/*
@@ -445,6 +489,11 @@ long do_fsync(struct file *file, int dat
 	err = filemap_fdatawait(mapping);
 	if (!ret)
 		ret = err;
+
+	if (datasync)
+		ub_percpu_inc(ub, fdsync_done);
+	else
+		ub_percpu_inc(ub, fsync_done);
 out:
 	return ret;
 }
@@ -455,10 +504,19 @@ static long __do_fsync(unsigned int fd, 
 	int ret = -EBADF;
 
 	file = fget(fd);
-	if (file) {
-		ret = do_fsync(file, datasync);
-		fput(file);
+
+	if (!file) {
+		return ret;
+	}
+
+	if (!sysctl_fsync_enable && !ve_is_super(get_exec_env())) {
+		ret = 0;
+		goto out;
 	}
+
+	ret = do_fsync(file, datasync);
+out:
+	fput(file);
 	return ret;
 }
 
@@ -960,13 +1018,20 @@ int __set_page_dirty_buffers(struct page
 	if (!TestSetPageDirty(page)) {
 		write_lock_irq(&mapping->tree_lock);
 		if (page->mapping) {	/* Race with truncate? */
+			int acct = 0;
+
 			if (mapping_cap_account_dirty(mapping)) {
 				__inc_zone_page_state(page, NR_FILE_DIRTY);
-				task_io_account_write(PAGE_CACHE_SIZE);
+				task_io_account_dirty(PAGE_CACHE_SIZE);
+				acct = 1;
 			}
 			radix_tree_tag_set(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
+			if (acct && !radix_tree_prev_tag_get(
+						&mapping->page_tree,
+						PAGECACHE_TAG_DIRTY))
+				ub_io_account_dirty(mapping, 1);
 		}
 		write_unlock_irq(&mapping->tree_lock);
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
diff -upr kernel-2.6.18-417.el5.orig/fs/char_dev.c kernel-2.6.18-417.el5-028stab121/fs/char_dev.c
--- kernel-2.6.18-417.el5.orig/fs/char_dev.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/char_dev.c	2017-01-13 08:40:20.000000000 -0500
@@ -20,6 +20,8 @@
 #include <linux/cdev.h>
 #include <linux/mutex.h>
 
+#include <linux/ve_proto.h>
+
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
@@ -296,6 +298,11 @@ int chrdev_open(struct inode * inode, st
 	struct cdev *new = NULL;
 	int ret = 0;
 
+	ret = get_device_perms_ve(S_IFCHR, inode->i_rdev,
+				  filp->f_mode & (FMODE_READ | FMODE_WRITE));
+	if (ret)
+		return ret;
+
 	spin_lock(&cdev_lock);
 	p = inode->i_cdev;
 	if (!p) {
diff -upr kernel-2.6.18-417.el5.orig/fs/cifs/cifsfs.c kernel-2.6.18-417.el5-028stab121/fs/cifs/cifsfs.c
--- kernel-2.6.18-417.el5.orig/fs/cifs/cifsfs.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/cifs/cifsfs.c	2017-01-13 08:40:40.000000000 -0500
@@ -711,15 +711,6 @@ cifs_get_sb(struct file_system_type *fs_
 #endif
 }
 
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
-static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				   unsigned long nr_segs, loff_t pos)
-{
-	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
-	ssize_t written;
-
-	written = generic_file_aio_write(iocb, iov, nr_segs, pos);
-#else
 static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov,
 				unsigned long nr_segs, loff_t *ppos)
 {
@@ -732,14 +723,13 @@ static ssize_t cifs_file_writev(struct f
 	return written;
 }
 
-static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf,
-				   size_t count, loff_t pos)
+static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+				   unsigned long nr_segs, loff_t pos)
 {
 	struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
 	ssize_t written;
 
-	written = generic_file_aio_write(iocb, buf, count, pos);
-#endif
+	written = generic_file_aio_write(iocb, iov, nr_segs, pos);
 	if (!CIFS_I(inode)->clientCanCacheAll)
 		filemap_fdatawrite(inode->i_mapping);
 	return written;
@@ -848,10 +838,8 @@ const
 struct file_operations cifs_file_ops = {
 	.read = do_sync_read,
 	.write = do_sync_write,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
 	.readv = generic_file_readv,
 	.writev = cifs_file_writev,
-#endif
 	.aio_read = generic_file_aio_read,
 	.aio_write = cifs_file_aio_write,
 	.open = cifs_open,
@@ -901,10 +889,8 @@ const
 struct file_operations cifs_file_nobrl_ops = {
 	.read = do_sync_read,
 	.write = do_sync_write,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
 	.readv = generic_file_readv,
 	.writev = cifs_file_writev,
-#endif
 	.aio_read = generic_file_aio_read,
 	.aio_write = cifs_file_aio_write,
 	.open = cifs_open,
diff -upr kernel-2.6.18-417.el5.orig/fs/cifs/connect.c kernel-2.6.18-417.el5-028stab121/fs/cifs/connect.c
--- kernel-2.6.18-417.el5.orig/fs/cifs/connect.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/cifs/connect.c	2017-01-13 08:40:15.000000000 -0500
@@ -861,11 +861,7 @@ cifs_parse_mount_options(char *options, 
 	if (Local_System_Name[0] != 0)
 		memcpy(vol->source_rfc1001_name, Local_System_Name, 15);
 	else {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 		char *nodename = utsname()->nodename;
-#else
-		char *nodename = system_utsname.nodename;
-#endif
 		int n = strnlen(nodename, 15);
 		memset(vol->source_rfc1001_name, 0x20, 15);
 		for (i = 0; i < n; i++) {
diff -upr kernel-2.6.18-417.el5.orig/fs/cifs/sess.c kernel-2.6.18-417.el5-028stab121/fs/cifs/sess.c
--- kernel-2.6.18-417.el5.orig/fs/cifs/sess.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/cifs/sess.c	2017-01-13 08:40:15.000000000 -0500
@@ -86,13 +86,8 @@ unicode_oslm_strings(char **pbcc_area, c
 	bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
 				  nls_cp);
 	bcc_ptr += 2 * bytes_ret;
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
 				  32, nls_cp);
-#else
-	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release,
-				  32, nls_cp);
-#endif
 	bcc_ptr += 2 * bytes_ret;
 	bcc_ptr += 2; /* trailing null */
 
@@ -189,13 +184,8 @@ static void ascii_ssetup_strings(char **
 
 	strcpy(bcc_ptr, "Linux version ");
 	bcc_ptr += strlen("Linux version ");
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 	strcpy(bcc_ptr, init_utsname()->release);
 	bcc_ptr += strlen(init_utsname()->release) + 1;
-#else
-	strcpy(bcc_ptr, system_utsname.release);
-	bcc_ptr += strlen(system_utsname.release) + 1;
-#endif
 	strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
 	bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
 
diff -upr kernel-2.6.18-417.el5.orig/fs/compat.c kernel-2.6.18-417.el5-028stab121/fs/compat.c
--- kernel-2.6.18-417.el5.orig/fs/compat.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/compat.c	2017-01-13 08:40:41.000000000 -0500
@@ -25,6 +25,7 @@
 #include <linux/file.h>
 #include <linux/vfs.h>
 #include <linux/ioctl32.h>
+#include <linux/virtinfo.h>
 #include <linux/ioctl.h>
 #include <linux/init.h>
 #include <linux/sockios.h>	/* for SIOCDEVPRIVATE */
@@ -46,6 +47,9 @@
 #include <linux/rwsem.h>
 #include <linux/tsacct_kern.h>
 #include <linux/mm.h>
+#include <linux/quota.h>
+#include <linux/ve_proto.h>
+#include <linux/grsecurity.h>
 
 #include <net/sock.h>		/* siocdevprivate_ioctl */
 
@@ -53,6 +57,10 @@
 #include <asm/mmu_context.h>
 #include <asm/ioctls.h>
 
+#ifdef CONFIG_QUOTA_COMPAT
+#include <linux/quota-compat.h>
+#endif
+
 extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
 
 int compat_log = 1;
@@ -69,6 +77,18 @@ int compat_printk(const char *fmt, ...)
 	return ret;
 }
 
+int ve_compat_printk(int dst, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+	if (!compat_log)
+		return 0;
+	va_start(ap, fmt);
+	ret = ve_vprintk(dst, fmt, ap);
+	va_end(ap);
+	return ret;
+}
+
 /*
  * Not all architectures have sys_utime, so implement this in terms
  * of sys_utimes.
@@ -84,7 +104,25 @@ asmlinkage long compat_sys_utime(char __
 		tv[0].tv_usec = 0;
 		tv[1].tv_usec = 0;
 	}
-	return do_utimes(AT_FDCWD, filename, t ? tv : NULL);
+	return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
+}
+
+asmlinkage long compat_sys_lutime(char __user * filename,
+		struct compat_utimbuf __user *times)
+{
+	struct timeval tv[2];
+
+	if (!lsyscall_enable)
+		return -ENOSYS;
+
+	if (times) {
+		if (get_user(tv[0].tv_sec, &times->actime) ||
+		    get_user(tv[1].tv_sec, &times->modtime))
+			return -EFAULT;
+		tv[0].tv_usec = 0;
+		tv[1].tv_usec = 0;
+	}
+	return do_utimes(AT_FDCWD, filename, times ? tv : NULL, AT_SYMLINK_NOFOLLOW);
 }
 
 asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, struct compat_timeval __user *t)
@@ -98,7 +136,7 @@ asmlinkage long compat_sys_futimesat(uns
 		    get_user(tv[1].tv_usec, &t[1].tv_usec))
 			return -EFAULT;
 	}
-	return do_utimes(dfd, filename, t ? tv : NULL);
+	return do_utimes(dfd, filename, t ? tv : NULL, 0);
 }
 
 asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t)
@@ -213,6 +251,8 @@ asmlinkage long compat_sys_statfs(const 
 		struct kstatfs tmp;
 		error = vfs_statfs(nd.dentry, &tmp);
 		if (!error)
+			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
+		if (!error)
 			error = put_compat_statfs(buf, &tmp);
 		path_release(&nd);
 	}
@@ -231,6 +271,8 @@ asmlinkage long compat_sys_fstatfs(unsig
 		goto out;
 	error = vfs_statfs(file->f_dentry, &tmp);
 	if (!error)
+		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
+	if (!error)
 		error = put_compat_statfs(buf, &tmp);
 	fput(file);
 out:
@@ -281,6 +323,8 @@ asmlinkage long compat_sys_statfs64(cons
 		struct kstatfs tmp;
 		error = vfs_statfs(nd.dentry, &tmp);
 		if (!error)
+			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
+		if (!error)
 			error = put_compat_statfs64(buf, &tmp);
 		path_release(&nd);
 	}
@@ -302,6 +346,8 @@ asmlinkage long compat_sys_fstatfs64(uns
 		goto out;
 	error = vfs_statfs(file->f_dentry, &tmp);
 	if (!error)
+		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
+	if (!error)
 		error = put_compat_statfs64(buf, &tmp);
 	fput(file);
 out:
@@ -478,8 +524,14 @@ asmlinkage long compat_sys_ustat(unsigne
 	struct compat_ustat tmp;
 	struct kstatfs sbuf;
 	int err;
+	dev_t kdev;
 
-	sb = user_get_super(new_decode_dev(dev));
+	kdev = new_decode_dev(dev);
+	err = get_device_perms_ve(S_IFBLK, kdev, FMODE_READ);
+	if (err)
+		return err;
+
+	sb = user_get_super(kdev);
 	if (!sb)
 		return -EINVAL;
 	err = vfs_statfs(sb->s_root, &sbuf);
@@ -1320,16 +1372,12 @@ out:
 	return ret;
 }
 
-asmlinkage ssize_t
-compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen)
+static size_t compat_readv(struct file *file,
+			   const struct compat_iovec __user *vec,
+			   unsigned long vlen, loff_t *pos)
 {
-	struct file *file;
 	ssize_t ret = -EBADF;
 
-	file = fget(fd);
-	if (!file)
-		return -EBADF;
-
 	if (!(file->f_mode & FMODE_READ))
 		goto out;
 
@@ -1337,22 +1385,51 @@ compat_sys_readv(unsigned long fd, const
 	if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
 		goto out;
 
-	ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos);
+	ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
 
 out:
+	return ret;
+}
+
+asmlinkage ssize_t
+compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
+		 unsigned long vlen)
+{
+	struct file *file;
+	ssize_t ret;
+
+	file = fget(fd);
+	if (!file)
+		return -EBADF;
+	ret = compat_readv(file, vec, vlen, &file->f_pos);
 	fput(file);
 	return ret;
 }
 
 asmlinkage ssize_t
-compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen)
+compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
+		  unsigned long vlen, u32 pos_low, u32 pos_high)
 {
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 	struct file *file;
-	ssize_t ret = -EBADF;
+	ssize_t ret;
 
+	if (pos < 0)
+		return -EINVAL;
 	file = fget(fd);
 	if (!file)
 		return -EBADF;
+	ret = compat_readv(file, vec, vlen, &pos);
+	fput(file);
+	return ret;
+}
+
+static size_t compat_writev(struct file *file,
+			    const struct compat_iovec __user *vec,
+			    unsigned long vlen, loff_t *pos)
+{
+	ssize_t ret = -EBADF;
+
 	if (!(file->f_mode & FMODE_WRITE))
 		goto out;
 
@@ -1360,9 +1437,41 @@ compat_sys_writev(unsigned long fd, cons
 	if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
 		goto out;
 
-	ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos);
+	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
 
 out:
+	return ret;
+}
+
+asmlinkage ssize_t
+compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
+		  unsigned long vlen)
+{
+	struct file *file;
+	ssize_t ret;
+
+	file = fget(fd);
+	if (!file)
+		return -EBADF;
+	ret = compat_writev(file, vec, vlen, &file->f_pos);
+	fput(file);
+	return ret;
+}
+
+asmlinkage ssize_t
+compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
+		   unsigned long vlen, u32 pos_low, u32 pos_high)
+{
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+	struct file *file;
+	ssize_t ret;
+
+	if (pos < 0)
+		return -EINVAL;
+	file = fget(fd);
+	if (!file)
+		return -EBADF;
+	ret = compat_writev(file, vec, vlen, &pos);
 	fput(file);
 	return ret;
 }
@@ -1528,6 +1637,103 @@ out:
 	return ret;
 }
 
+#define COPY_V2_DQBLK(cdq, idq) do {				\
+		cdq.dqb_ihardlimit = idq.dqb_ihardlimit;	\
+		cdq.dqb_isoftlimit = idq.dqb_isoftlimit;	\
+		cdq.dqb_curinodes = idq.dqb_curinodes;		\
+		cdq.dqb_bhardlimit = idq.dqb_bhardlimit;	\
+		cdq.dqb_bsoftlimit = idq.dqb_bsoftlimit;	\
+		cdq.dqb_curspace = idq.dqb_curspace;		\
+		cdq.dqb_btime = idq.dqb_btime;			\
+		cdq.dqb_itime = idq.dqb_itime;			\
+	} while (0)
+
+asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
+		qid_t id, void __user *addr)
+{
+	long ret;
+	unsigned int cmds;
+	mm_segment_t old_fs;
+	struct if_dqblk dqblk;
+	struct if32_dqblk {
+		__u32 dqb_bhardlimit[2];
+		__u32 dqb_bsoftlimit[2];
+		__u32 dqb_curspace[2];
+		__u32 dqb_ihardlimit[2];
+		__u32 dqb_isoftlimit[2];
+		__u32 dqb_curinodes[2];
+		__u32 dqb_btime[2];
+		__u32 dqb_itime[2];
+		__u32 dqb_valid;
+	} dqblk32;
+#ifdef CONFIG_QUOTA_COMPAT
+	struct compat_v2_dqblk cdq;
+	struct compat_v2_dqblk_32 cdq32;
+#endif
+
+	cmds = cmd >> SUBCMDSHIFT;
+
+	switch (cmds) {
+		case Q_GETQUOTA:
+			old_fs = get_fs();
+			set_fs(KERNEL_DS);
+			ret = sys_quotactl(cmd, special, id, &dqblk);
+			set_fs(old_fs);
+			if (ret < 0)
+				break;
+
+			memcpy(&dqblk32, &dqblk, sizeof(dqblk32));
+			dqblk32.dqb_valid = dqblk.dqb_valid;
+			if (copy_to_user(addr, &dqblk32, sizeof(dqblk32)))
+				ret = -EFAULT;
+
+			break;
+		case Q_SETQUOTA:
+			ret = -EFAULT;
+			if (copy_from_user(&dqblk32, addr, sizeof(dqblk32)))
+				break;
+			memcpy(&dqblk, &dqblk32, sizeof(dqblk32));
+			dqblk.dqb_valid = dqblk32.dqb_valid;
+
+			old_fs = get_fs();
+			set_fs(KERNEL_DS);
+			ret = sys_quotactl(cmd, special, id, &dqblk);
+			set_fs(old_fs);
+			break;
+#ifdef CONFIG_QUOTA_COMPAT
+		case QC_GETQUOTA:
+			old_fs = get_fs();
+			set_fs(KERNEL_DS);
+			ret = sys_quotactl(cmd, special, id, &cdq);
+			set_fs(old_fs);
+			if (ret < 0)
+				break;
+
+			COPY_V2_DQBLK(cdq32, cdq);
+
+			if (copy_to_user(addr, &cdq32, sizeof(cdq32)))
+				ret = -EFAULT;
+			break;
+		case QC_SETQUOTA:
+			ret = -EFAULT;
+			if (copy_from_user(&cdq32, addr, sizeof(cdq32)))
+				break;
+
+			COPY_V2_DQBLK(cdq, cdq32);
+
+			old_fs = get_fs();
+			set_fs(KERNEL_DS);
+			ret = sys_quotactl(cmd, special, id, &cdq);
+			set_fs(old_fs);
+			break;
+#endif
+		default:
+			ret = sys_quotactl(cmd, special, id, addr);
+			break;
+	}
+	return ret;
+}
+
 /*
  * compat_do_execve() is mostly a copy of do_execve(), with the exception
  * that it processes 32 bit argv and envp pointers.
@@ -1541,6 +1747,10 @@ int compat_do_execve(char * filename,
 	struct file *file;
 	int retval;
 
+	retval = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
+	if (retval)
+		return retval;
+
 	retval = -ENOMEM;
 	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
@@ -1590,6 +1800,11 @@ int compat_do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
+	if (!gr_tpe_allow(file)) {
+		retval = -EACCES;
+		goto out;
+	}
+
 	retval = search_binary_handler(bprm, regs);
 	if (retval >= 0) {
 		/* execve success */
@@ -2251,3 +2466,83 @@ long asmlinkage compat_sys_nfsservctl(in
 	return sys_ni_syscall();
 }
 #endif
+
+#ifdef CONFIG_EPOLL
+
+#ifdef TIF_RESTORE_SIGMASK
+asmlinkage long compat_sys_epoll_pwait(int epfd,
+			struct compat_epoll_event __user *events,
+			int maxevents, int timeout,
+			const compat_sigset_t __user *sigmask,
+			compat_size_t sigsetsize)
+{
+	long err;
+	compat_sigset_t csigmask;
+	sigset_t ksigmask, sigsaved;
+
+	/*
+	 * If the caller wants a certain signal mask to be set during the wait,
+	 * we apply it here.
+	 */
+	if (sigmask) {
+		if (sigsetsize != sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&csigmask, sigmask, sizeof(csigmask)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &csigmask);
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	err = sys_epoll_wait(epfd, events, maxevents, timeout);
+
+	/*
+	 * If we changed the signal mask, we need to restore the original one.
+	 * In case we've got a signal while waiting, we do not restore the
+	 * signal mask yet, and we allow do_signal() to deliver the signal on
+	 * the way back to userspace, before the signal mask is restored.
+	 */
+	if (sigmask) {
+		if (err == -EINTR) {
+			memcpy(&current->saved_sigmask, &sigsaved,
+			       sizeof(sigsaved));
+			set_thread_flag(TIF_RESTORE_SIGMASK);
+		} else
+			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	}
+
+	return err;
+}
+#endif /* TIF_RESTORE_SIGMASK */
+
+#endif /* CONFIG_EPOLL */
+
+#ifdef CONFIG_SIGNALFD
+
+asmlinkage long compat_sys_signalfd4(int ufd,
+				     const compat_sigset_t __user *sigmask,
+				     compat_size_t sigsetsize, int flags)
+{
+	compat_sigset_t ss32;
+	sigset_t tmp;
+	sigset_t __user *ksigmask;
+
+	if (sigsetsize != sizeof(compat_sigset_t))
+		return -EINVAL;
+	if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+		return -EFAULT;
+	sigset_from_compat(&tmp, &ss32);
+	ksigmask = compat_alloc_user_space(sizeof(sigset_t));
+	if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
+		return -EFAULT;
+
+	return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
+}
+
+asmlinkage long compat_sys_signalfd(int ufd,
+				    const compat_sigset_t __user *sigmask,
+				    compat_size_t sigsetsize)
+{
+	return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0);
+}
+#endif /* CONFIG_SIGNALFD */
diff -upr kernel-2.6.18-417.el5.orig/fs/dcache.c kernel-2.6.18-417.el5-028stab121/fs/dcache.c
--- kernel-2.6.18-417.el5.orig/fs/dcache.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/dcache.c	2017-01-13 08:40:41.000000000 -0500
@@ -27,12 +27,17 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/file.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <linux/security.h>
 #include <linux/seqlock.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
+#include <linux/kernel_stat.h>
+#include <net/inet_sock.h>
 
+#include <ub/ub_dcache.h>
+#include <ub/ub_dcache_op.h>
 
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
@@ -42,7 +47,7 @@ __cacheline_aligned_in_smp DEFINE_SEQLOC
 
 EXPORT_SYMBOL(dcache_lock);
 
-static kmem_cache_t *dentry_cache __read_mostly;
+kmem_cache_t *dentry_cache __read_mostly;
 
 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
 
@@ -116,6 +121,29 @@ static void dentry_iput(struct dentry * 
 	}
 }
 
+/**
+ * d_kill - kill dentry and return parent
+ * @dentry: dentry to kill
+ *
+ * Called with dcache_lock and d_lock, releases both.  The dentry must
+ * already be unhashed and removed from the LRU.
+ *
+ * If this is the root of the dentry tree, return NULL.
+ */
+static struct dentry *d_kill(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	list_del(&dentry->d_u.d_child);
+	dentry_stat.nr_dentry--;	/* For d_free, below */
+	preempt_enable_no_resched();
+	/*drops the locks, at that point nobody can reach this dentry */
+	dentry_iput(dentry);
+	parent = dentry->d_parent;
+	d_free(dentry);
+	return dentry == parent ? NULL : parent;
+}
+
 /* 
  * This is dput
  *
@@ -143,26 +171,45 @@ static void dentry_iput(struct dentry * 
  * they too may now get deleted.
  *
  * no dcache lock, please.
+ * preemption is disabled by the caller.
  */
 
-void dput(struct dentry *dentry)
+static void dput_recursive(struct dentry *dentry)
 {
-	if (!dentry)
-		return;
+	if (list_empty(&dentry->d_lru) || d_unhashed(dentry) ||
+	    ub_dentry_on || (dentry->d_op && dentry->d_op->d_delete))
+		goto repeat;
 
-repeat:
-	if (atomic_read(&dentry->d_count) == 1)
-		might_sleep();
-	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
-		return;
+	if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
+		goto out_preempt;
 
-	spin_lock(&dentry->d_lock);
-	if (atomic_read(&dentry->d_count)) {
+	if (d_unhashed(dentry)) {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
-		return;
+		spin_lock(&dcache_lock);
+		goto do_it;
 	}
 
+	spin_unlock(&dentry->d_lock);
+	goto out_preempt;
+
+repeat:
+	if (unlikely(ub_dentry_on)) {
+		spin_lock(&dcache_lock);
+		if (!atomic_dec_and_test(&dentry->d_count)) {
+			ub_dentry_uncharge_locked(dentry);
+			spin_unlock(&dcache_lock);
+			goto out_preempt;
+		}
+	} else {
+		if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
+			goto out_preempt;
+	}
+
+do_it:
+	spin_lock(&dentry->d_lock);
+	if (atomic_read(&dentry->d_count))
+		goto out_unlock;
+
 	/*
 	 * AV: ->d_delete() is _NOT_ allowed to block now.
 	 */
@@ -178,34 +225,49 @@ repeat:
   		list_add(&dentry->d_lru, &dentry_unused);
   		dentry_stat.nr_unused++;
   	}
+out_unlock:
  	spin_unlock(&dentry->d_lock);
+	ub_dentry_uncharge_locked(dentry);
 	spin_unlock(&dcache_lock);
+out_preempt:
+	preempt_enable();
 	return;
 
 unhash_it:
 	__d_drop(dentry);
+kill_it:
+	/* If dentry was on d_lru list
+	 * delete it from there
+	 */
+	if (!list_empty(&dentry->d_lru)) {
+		list_del(&dentry->d_lru);
+		dentry_stat.nr_unused--;
+	}
+	if (unlikely(ub_dentry_on)) {
+		struct user_beancounter *ub;
 
-kill_it: {
-		struct dentry *parent;
-
-		/* If dentry was on d_lru list
-		 * delete it from there
-		 */
-  		if (!list_empty(&dentry->d_lru)) {
-  			list_del(&dentry->d_lru);
-  			dentry_stat.nr_unused--;
-  		}
-  		list_del(&dentry->d_u.d_child);
-		dentry_stat.nr_dentry--;	/* For d_free, below */
-		/*drops the locks, at that point nobody can reach this dentry */
-		dentry_iput(dentry);
-		parent = dentry->d_parent;
-		d_free(dentry);
-		if (dentry == parent)
-			return;
-		dentry = parent;
+		ub = dentry->dentry_bc.d_ub;
+		BUG_ON(!ub_dput_testzero(dentry));
+		uncharge_dcache(ub, dentry->dentry_bc.d_ubsize);
+		put_beancounter(ub);
+	}
+	dentry = d_kill(dentry);
+	preempt_disable();
+	if (dentry)
 		goto repeat;
-	}
+	preempt_enable();
+}
+
+void dput(struct dentry *dentry)
+{
+	if (!dentry)
+		return;
+
+	if (atomic_read(&dentry->d_count) == 1)
+		might_sleep();
+
+	preempt_disable();
+	dput_recursive(dentry);
 }
 
 /**
@@ -274,6 +336,8 @@ static inline struct dentry * __dget_loc
 		dentry_stat.nr_unused--;
 		list_del_init(&dentry->d_lru);
 	}
+
+	ub_dentry_charge_nofail(dentry);
 	return dentry;
 }
 
@@ -362,22 +426,37 @@ restart:
  * Throw away a dentry - free the inode, dput the parent.  This requires that
  * the LRU list has already been removed.
  *
+ * Try to prune ancestors as well.
+ *
  * Called with dcache_lock, drops it and then regains.
  * Called with dentry->d_lock held, drops it.
  */
 static void prune_one_dentry(struct dentry * dentry)
 {
-	struct dentry * parent;
-
 	__d_drop(dentry);
-	list_del(&dentry->d_u.d_child);
-	dentry_stat.nr_dentry--;	/* For d_free, below */
-	dentry_iput(dentry);
-	parent = dentry->d_parent;
-	d_free(dentry);
-	if (parent != dentry)
-		dput(parent);
+	preempt_disable();
+	dentry = d_kill(dentry);
+
+	/*
+	 * Prune ancestors.  Locking is simpler than in dput(),
+	 * because dcache_lock needs to be taken anyway.
+	 */
 	spin_lock(&dcache_lock);
+	while (dentry) {
+		if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
+			return;
+
+		if (dentry->d_op && dentry->d_op->d_delete)
+			dentry->d_op->d_delete(dentry);
+		if (!list_empty(&dentry->d_lru)) {
+			list_del(&dentry->d_lru);
+			dentry_stat.nr_unused--;
+		}
+		__d_drop(dentry);
+		preempt_disable();
+		dentry = d_kill(dentry);
+		spin_lock(&dcache_lock);
+	}
 }
 
 /**
@@ -665,6 +744,8 @@ void shrink_dcache_for_umount(struct sup
 
 	dentry = sb->s_root;
 	sb->s_root = NULL;
+	/* "/" was also charged in d_alloc_root() */
+	ub_dentry_uncharge(dentry);
 	atomic_dec(&dentry->d_count);
 	shrink_dcache_for_umount_subtree(dentry);
 
@@ -833,12 +914,18 @@ void shrink_dcache_parent(struct dentry 
  */
 static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
 {
+	int res = -1;
+
+	KSTAT_PERF_ENTER(shrink_dcache)
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
-			return -1;
+			goto out;
 		prune_dcache(nr, NULL, NULL);
 	}
-	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+	res = (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+out:
+	KSTAT_PERF_LEAVE(shrink_dcache)
+	return res;
 }
 
 /**
@@ -856,21 +943,26 @@ struct dentry *d_alloc(struct dentry * p
 	struct dentry *dentry;
 	char *dname;
 
+	dname = NULL;
+	if (name->len > DNAME_INLINE_LEN-1) {
+		dname = kmalloc(name->len + 1, GFP_KERNEL);
+		if (!dname)
+			goto err_name;
+	}
+
+	ub_dentry_alloc_start();
 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
 	if (!dentry)
-		return NULL;
+		goto err_alloc;
 
-	if (name->len > DNAME_INLINE_LEN-1) {
-		dname = kmalloc(name->len + 1, GFP_KERNEL);
-		if (!dname) {
-			kmem_cache_free(dentry_cache, dentry); 
-			return NULL;
-		}
-	} else  {
+	preempt_disable();
+	if (dname == NULL)
 		dname = dentry->d_iname;
-	}	
 	dentry->d_name.name = dname;
 
+	if (ub_dentry_alloc(dentry))
+		goto err_charge;
+
 	dentry->d_name.len = name->len;
 	dentry->d_name.hash = name->hash;
 	memcpy(dname, name->name, name->len);
@@ -902,12 +994,29 @@ struct dentry *d_alloc(struct dentry * p
 	}
 
 	spin_lock(&dcache_lock);
-	if (parent)
+	if (parent) {
 		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+		if (parent->d_flags & DCACHE_VIRTUAL)
+			dentry->d_flags |= DCACHE_VIRTUAL;
+		if (parent->d_flags & DCACHE_LOCALCACHE)
+			dentry->d_flags |= DCACHE_LOCALCACHE;
+	}
 	dentry_stat.nr_dentry++;
 	spin_unlock(&dcache_lock);
+	preempt_enable();
+	ub_dentry_alloc_end();
 
 	return dentry;
+
+err_charge:
+	preempt_enable();
+	kmem_cache_free(dentry_cache, dentry);
+err_alloc:
+	if (name->len > DNAME_INLINE_LEN - 1)
+		kfree(dname);
+	ub_dentry_alloc_end();
+err_name:
+	return NULL;
 }
 
 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
@@ -1420,12 +1529,12 @@ struct dentry * __d_lookup(struct dentry
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
 	struct hlist_head *head = d_hash(parent,hash);
-	struct dentry *found = NULL;
 	struct hlist_node *node;
-	struct dentry *dentry;
+	struct dentry *dentry, *found;
 
 	rcu_read_lock();
 	
+	found = NULL;
 	hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
 		struct qstr *qstr;
 
@@ -1462,6 +1571,8 @@ struct dentry * __d_lookup(struct dentry
 		if (!d_unhashed(dentry)) {
 			atomic_inc(&dentry->d_count);
 			found = dentry;
+			if (ub_dentry_charge(found))
+				goto charge_failure;
 		}
 		spin_unlock(&dentry->d_lock);
 		break;
@@ -1471,6 +1582,14 @@ next:
  	rcu_read_unlock();
 
  	return found;
+
+charge_failure:
+	spin_unlock(&found->d_lock);
+	rcu_read_unlock();
+	/* dentry is now unhashed, just kill it */
+	dput(found);
+	/* ... and fail lookup */
+	return NULL;
 }
 
 /**
@@ -1948,6 +2067,32 @@ shouldnt_be_hashed:
 }
 
 /**
+ * __d_path_add_deleted - prepend "(deleted) " text
+ * @end: a pointer to the character after free space at the beginning of the
+ *       buffer
+ * @buflen: remaining free space
+ */
+static inline char * __d_path_add_deleted(char * end, int buflen)
+{
+	buflen -= 10;
+	if (buflen < 0)
+		return ERR_PTR(-ENAMETOOLONG);
+	end -= 10;
+	memcpy(end, "(deleted) ", 10);
+	return end;
+}
+
+/**
+ * d_root_check - checks if dentry is accessible from current's fs root
+ * @dentry: dentry to be verified
+ * @vfsmnt: vfsmnt to which the dentry belongs
+ */
+int d_root_check(struct dentry *dentry, struct vfsmount *vfsmnt)
+{
+	return PTR_ERR(d_path(dentry, vfsmnt, NULL, 0));
+}
+
+/**
  * d_path - return the path of a dentry
  * @dentry: dentry to report
  * @vfsmnt: vfsmnt to which the dentry belongs
@@ -1968,36 +2113,35 @@ char * __d_path( struct dentry *dentry, 
 			char *buffer, int buflen)
 {
 	char * end = buffer+buflen;
-	char * retval;
+	char * retval = NULL;
 	int namelen;
+	int deleted;
+	struct vfsmount *oldvfsmnt;
 
-	*--end = '\0';
-	buflen--;
-	if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
-		buflen -= 10;
-		end -= 10;
-		if (buflen < 0)
+	oldvfsmnt = vfsmnt;
+	deleted = (!IS_ROOT(dentry) && d_unhashed(dentry));
+	if (buffer != NULL) {
+		*--end = '\0';
+		buflen--;
+
+		if (buflen < 1)
 			goto Elong;
-		memcpy(end, " (deleted)", 10);
+		/* Get '/' right */
+		retval = end-1;
+		*retval = '/';
 	}
 
-	if (buflen < 1)
-		goto Elong;
-	/* Get '/' right */
-	retval = end-1;
-	*retval = '/';
-
 	for (;;) {
 		struct dentry * parent;
 
 		if (dentry == root && vfsmnt == rootmnt)
 			break;
 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
-			/* Global root? */
+			/* root of a tree? */
 			spin_lock(&vfsmount_lock);
 			if (vfsmnt->mnt_parent == vfsmnt) {
 				spin_unlock(&vfsmount_lock);
-				goto global_root;
+				goto other_root;
 			}
 			dentry = vfsmnt->mnt_mountpoint;
 			vfsmnt = vfsmnt->mnt_parent;
@@ -2006,32 +2150,56 @@ char * __d_path( struct dentry *dentry, 
 		}
 		parent = dentry->d_parent;
 		prefetch(parent);
+		if (buffer != NULL) {
+			namelen = dentry->d_name.len;
+			buflen -= namelen + 1;
+			if (buflen < 0)
+				goto Elong;
+			end -= namelen;
+			memcpy(end, dentry->d_name.name, namelen);
+			*--end = '/';
+			retval = end;
+		}
+		dentry = parent;
+	}
+	/* the given root point is reached */
+finish:
+	if (buffer != NULL && deleted)
+		retval = __d_path_add_deleted(end, buflen);
+	return retval;
+
+other_root:
+	/*
+	 * We traversed the tree upward and reached a root, but the given
+	 * lookup terminal point wasn't encountered.  It means either that the
+	 * dentry is out of our scope or belongs to an abstract space like
+	 * sock_mnt or pipe_mnt.  Check for it.
+	 *
+	 * There are different options to check it.
+	 * We may assume that any dentry tree is unreachable unless it's
+	 * connected to `root' (defined as fs root of init aka child reaper)
+	 * and expose all paths that are not connected to it.
+	 * The other option is to allow exposing of known abstract spaces
+	 * explicitly and hide the path information for other cases.
+	 * This approach is more safe, let's take it.  2001/04/22  SAW
+	 */
+	if (!(oldvfsmnt->mnt_sb->s_flags & MS_NOUSER))
+		return ERR_PTR(-EINVAL);
+	if (buffer != NULL) {
 		namelen = dentry->d_name.len;
-		buflen -= namelen + 1;
+		buflen -= namelen;
 		if (buflen < 0)
 			goto Elong;
-		end -= namelen;
-		memcpy(end, dentry->d_name.name, namelen);
-		*--end = '/';
-		retval = end;
-		dentry = parent;
+		retval -= namelen-1;	/* hit the slash */
+		memcpy(retval, dentry->d_name.name, namelen);
 	}
+	goto finish;
 
-	return retval;
-
-global_root:
-	namelen = dentry->d_name.len;
-	buflen -= namelen;
-	if (buflen < 0)
-		goto Elong;
-	retval -= namelen-1;	/* hit the slash */
-	memcpy(retval, dentry->d_name.name, namelen);
-	return retval;
 Elong:
 	return ERR_PTR(-ENAMETOOLONG);
 }
 
-EXPORT_SYMBOL_GPL(__d_path);
+EXPORT_SYMBOL(__d_path);
 
 /* write full pathname into buffer and return start of pathname */
 char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
@@ -2053,6 +2221,229 @@ char * d_path(struct dentry *dentry, str
 	return res;
 }
 
+#ifdef CONFIG_VE
+#include <net/sock.h>
+#include <linux/ip.h>
+#include <linux/file.h>
+#include <linux/namespace.h>
+#include <linux/vzratelimit.h>
+
+static void mark_sub_tree_virtual(struct dentry *d)
+{
+	struct dentry *orig_root;
+
+	orig_root = d;
+	while (1) {
+		spin_lock(&d->d_lock);
+		d->d_flags |= DCACHE_VIRTUAL;
+		spin_unlock(&d->d_lock);
+
+		if (!list_empty(&d->d_subdirs)) {
+			d = list_entry(d->d_subdirs.next,
+					struct dentry, d_u.d_child);
+			continue;
+		}
+		if (d == orig_root)
+			break;
+		while (d == list_entry(d->d_parent->d_subdirs.prev,
+					struct dentry, d_u.d_child)) {
+			d = d->d_parent;
+			if (d == orig_root)
+				goto out;
+		}
+		d = list_entry(d->d_u.d_child.next,
+				struct dentry, d_u.d_child);
+	}
+out:
+	return;
+}
+
+void mark_tree_virtual(struct vfsmount *m, struct dentry *d)
+{
+	struct vfsmount *orig_rootmnt;
+
+	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
+	orig_rootmnt = m;
+	while (1) {
+		mark_sub_tree_virtual(d);
+		if (!list_empty(&m->mnt_mounts)) {
+			m = list_entry(m->mnt_mounts.next,
+					struct vfsmount, mnt_child);
+			d = m->mnt_root;
+			continue;
+		}
+		if (m == orig_rootmnt)
+			break;
+		while (m == list_entry(m->mnt_parent->mnt_mounts.prev,
+					struct vfsmount, mnt_child)) {
+			m = m->mnt_parent;
+			if (m == orig_rootmnt)
+				goto out;
+		}
+		m = list_entry(m->mnt_child.next,
+				struct vfsmount, mnt_child);
+		d = m->mnt_root;
+	}
+out:
+	spin_unlock(&vfsmount_lock);
+	spin_unlock(&dcache_lock);
+}
+EXPORT_SYMBOL(mark_tree_virtual);
+
+static struct vz_rate_info area_ri = { 20, 10*HZ };
+#define VE_AREA_ACC_CHECK	0x0001
+#define VE_AREA_ACC_DENY	0x0002
+#define VE_AREA_EXEC_CHECK	0x0010
+#define VE_AREA_EXEC_DENY	0x0020
+#define VE0_AREA_ACC_CHECK	0x0100
+#define VE0_AREA_ACC_DENY	0x0200
+#define VE0_AREA_EXEC_CHECK	0x1000
+#define VE0_AREA_EXEC_DENY	0x2000
+int ve_area_access_check = 0;
+
+static void print_connection_info(struct task_struct *tsk)
+{
+	struct files_struct *files;
+	struct fdtable *fdt;
+	int fd;
+
+	files = get_files_struct(tsk);
+	if (!files)
+		return;
+
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	for (fd = 0; fd < fdt->max_fds; fd++) {
+		struct file *file;
+		struct inode *inode;
+		struct socket *socket;
+		struct sock *sk;
+		struct inet_sock *inet;
+
+		file = fdt->fd[fd];
+		if (file == NULL)
+			continue;
+
+		inode = file->f_dentry->d_inode;
+		if (!S_ISSOCK(inode->i_mode))
+			continue;
+
+		socket = SOCKET_I(inode);
+		if (socket == NULL)
+			continue;
+
+		sk = socket->sk;
+		if ((sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+		    || sk->sk_type != SOCK_STREAM)
+			continue;
+
+		inet = inet_sk(sk);
+		printk(KERN_ALERT "connection from %u.%u.%u.%u:%u to port %u\n",
+				NIPQUAD(inet->daddr), ntohs(inet->dport),
+				inet->num);
+	}
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+}
+
+static void check_alert(struct vfsmount *vfsmnt, struct dentry *dentry,
+		char *str)
+{
+	struct task_struct *tsk;
+	unsigned long page;
+	struct super_block *sb;
+	char *p;
+
+	if (!vz_ratelimit(&area_ri))
+		return;
+
+	tsk = current;
+	p = ERR_PTR(-ENOMEM);
+	page = __get_free_page(GFP_KERNEL);
+	if (page) {
+		spin_lock(&dcache_lock);
+		p = __d_path(dentry, vfsmnt, tsk->fs->root, tsk->fs->rootmnt,
+				(char *)page, PAGE_SIZE);
+		spin_unlock(&dcache_lock);
+	}
+	if (IS_ERR(p))
+		p = "(undefined)";
+
+	sb = dentry->d_sb;
+	printk(KERN_ALERT "%s check alert! file:[%s] from %d/%s, dev%x\n"
+			"Task %d/%d[%s] from VE%d, execenv %d\n",
+			str, p,	sb->s_type->owner_env->veid,
+			sb->s_type->name, sb->s_dev,
+			tsk->pid, virt_pid(tsk), tsk->comm,
+			VE_TASK_INFO(tsk)->owner_env->veid,
+			get_exec_env()->veid);
+
+	free_page(page);
+
+	print_connection_info(tsk);
+
+	read_lock(&tasklist_lock);
+	tsk = tsk->parent;
+	get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+
+	printk(KERN_ALERT "Parent %d/%d[%s] from VE%d\n",
+			tsk->pid, virt_pid(tsk), tsk->comm,
+			VE_TASK_INFO(tsk)->owner_env->veid);
+
+	print_connection_info(tsk);
+	put_task_struct(tsk);
+	dump_stack();
+}
+#endif
+
+int check_area_access_ve(struct dentry *dentry, struct vfsmount *mnt)
+{
+#ifdef CONFIG_VE
+	int check, alert, deny;
+
+	if (ve_is_super(get_exec_env())) {
+		check = ve_area_access_check & VE0_AREA_ACC_CHECK;
+		alert = dentry->d_flags & DCACHE_VIRTUAL;
+		deny = ve_area_access_check & VE0_AREA_ACC_DENY;
+	} else {
+		check = ve_area_access_check & VE_AREA_ACC_CHECK;
+		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
+		deny = ve_area_access_check & VE_AREA_ACC_DENY;
+	}
+
+	if (check && alert)
+		check_alert(mnt, dentry, "Access");
+	if (deny && alert)
+		return -EACCES;
+#endif
+	return 0;
+}
+
+int check_area_execute_ve(struct dentry *dentry, struct vfsmount *mnt)
+{
+#ifdef CONFIG_VE
+	int check, alert, deny;
+
+	if (ve_is_super(get_exec_env())) {
+		check = ve_area_access_check & VE0_AREA_EXEC_CHECK;
+		alert = dentry->d_flags & DCACHE_VIRTUAL;
+		deny = ve_area_access_check & VE0_AREA_EXEC_DENY;
+	} else {
+		check = ve_area_access_check & VE_AREA_EXEC_CHECK;
+		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
+		deny = ve_area_access_check & VE_AREA_EXEC_DENY;
+	}
+
+	if (check && alert)
+		check_alert(mnt, dentry, "Exec");
+	if (deny && alert)
+		return -EACCES;
+#endif
+	return 0;
+}
+
 /*
  * NOTE! The user-level library version returns a
  * character pointer. The kernel system call just
@@ -2088,6 +2479,16 @@ asmlinkage long sys_getcwd(char __user *
 	root = dget(current->fs->root);
 	read_unlock(&current->fs->lock);
 
+	if (pwd->d_inode->i_op && pwd->d_inode->i_op->permission) {
+		struct nameidata nd;
+
+		nd.dentry = pwd;
+		nd.mnt = pwdmnt;
+		error = pwd->d_inode->i_op->permission(pwd->d_inode, 0, &nd);
+		if (error == -ERESTARTSYS)
+			goto out;
+	}
+
 	error = -ENOENT;
 	/* Has the current directory has been unlinked? */
 	spin_lock(&dcache_lock);
@@ -2096,6 +2497,17 @@ asmlinkage long sys_getcwd(char __user *
 		char * cwd;
 
 		cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
+
+		error = PTR_ERR(cwd);
+		if (error == -EINVAL) {
+			struct ve_struct *ve;
+
+			ve = get_exec_env();
+			cwd = __d_path(pwd, pwdmnt,
+					ve->fs_root, ve->fs_rootmnt,
+					page, PAGE_SIZE);
+		}
+
 		spin_unlock(&dcache_lock);
 
 		error = PTR_ERR(cwd);
@@ -2189,10 +2601,12 @@ resume:
 			goto repeat;
 		}
 		atomic_dec(&dentry->d_count);
+		ub_dentry_uncharge_locked(dentry);
 	}
 	if (this_parent != root) {
 		next = this_parent->d_u.d_child.next;
 		atomic_dec(&this_parent->d_count);
+		ub_dentry_uncharge_locked(this_parent);
 		this_parent = this_parent->d_parent;
 		goto resume;
 	}
@@ -2345,7 +2759,7 @@ void __init vfs_caches_init(unsigned lon
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
 
 	dcache_init(mempages);
 	inode_init(mempages);
diff -upr kernel-2.6.18-417.el5.orig/fs/devpts/inode.c kernel-2.6.18-417.el5-028stab121/fs/devpts/inode.c
--- kernel-2.6.18-417.el5.orig/fs/devpts/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/devpts/inode.c	2017-01-13 08:40:19.000000000 -0500
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/ve.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/namei.h>
@@ -22,16 +23,17 @@
 
 #define DEVPTS_SUPER_MAGIC 0x1cd1
 
+struct devpts_config devpts_config = {.mode = 0600};
+
+#ifndef CONFIG_VE
 static struct vfsmount *devpts_mnt;
 static struct dentry *devpts_root;
-
-static struct {
-	int setuid;
-	int setgid;
-	uid_t   uid;
-	gid_t   gid;
-	umode_t mode;
-} config = {.mode = 0600};
+#define config	devpts_config
+#else
+#define devpts_mnt	(get_exec_env()->devpts_mnt)
+#define devpts_root	(get_exec_env()->devpts_root)
+#define config		(*(get_exec_env()->devpts_config))
+#endif
 
 enum {
 	Opt_uid, Opt_gid, Opt_mode,
@@ -83,7 +85,8 @@ static int devpts_remount(struct super_b
 			config.mode = option & ~S_IFMT;
 			break;
 		default:
-			printk(KERN_ERR "devpts: called with bogus options\n");
+			ve_printk(VE_LOG, KERN_ERR
+					"devpts: called with bogus options\n");
 			return -EINVAL;
 		}
 	}
@@ -135,13 +138,15 @@ static int devpts_get_sb(struct file_sys
 	return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
 }
 
-static struct file_system_type devpts_fs_type = {
+struct file_system_type devpts_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "devpts",
 	.get_sb		= devpts_get_sb,
 	.kill_sb	= kill_anon_super,
 };
 
+EXPORT_SYMBOL(devpts_fs_type);
+
 /*
  * The normal naming convention is simply /dev/pts/<number>; this conforms
  * to the System V naming convention
@@ -232,6 +237,7 @@ static int __init init_devpts_fs(void)
 
 static void __exit exit_devpts_fs(void)
 {
+	/* the code is never called, the argument is irrelevant */
 	unregister_filesystem(&devpts_fs_type);
 	mntput(devpts_mnt);
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/direct-io.c kernel-2.6.18-417.el5-028stab121/fs/direct-io.c
--- kernel-2.6.18-417.el5.orig/fs/direct-io.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/direct-io.c	2017-01-13 08:40:18.000000000 -0500
@@ -676,6 +676,8 @@ submit_page_section(struct dio *dio, str
 {
 	int ret = 0;
 
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 	if (dio->rw & WRITE) {
 		/*
 		 * Read accounting is performed in submit_bio()
diff -upr kernel-2.6.18-417.el5.orig/fs/dquot.c kernel-2.6.18-417.el5-028stab121/fs/dquot.c
--- kernel-2.6.18-417.el5.orig/fs/dquot.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/dquot.c	2017-01-13 08:40:40.000000000 -0500
@@ -161,7 +161,9 @@ static struct quota_format_type *find_qu
 	struct quota_format_type *actqf;
 
 	spin_lock(&dq_list_lock);
-	for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next);
+	for (actqf = quota_formats;
+		 actqf && (actqf->qf_fmt_id != id || actqf->qf_ops == NULL);
+						 actqf = actqf->qf_next);
 	if (!actqf || !try_module_get(actqf->qf_owner)) {
 		int qm;
 
@@ -713,19 +715,19 @@ static int dqinit_needed(struct inode *i
 /* This routine is guarded by dqonoff_mutex mutex */
 static void add_dquot_ref(struct super_block *sb, int type)
 {
-	struct list_head *p;
+	struct file *filp;
+	int cpu;
 #ifdef __DQUOT_PARANOIA
 	int reserved = 0;
 #endif
 
 restart:
-	file_list_lock();
-	list_for_each(p, &sb->s_files) {
-		struct file *filp = list_entry(p, struct file, f_u.fu_list);
+	file_list_lock_sb(sb);
+	for_each_sb_file(filp, sb, cpu) {
 		struct inode *inode = filp->f_dentry->d_inode;
 		if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) {
 			struct dentry *dentry = dget(filp->f_dentry);
-			file_list_unlock();
+			file_list_unlock_sb(sb);
 			sb->dq_op->initialize(inode, type);
 			dput(dentry);
 			/* As we may have blocked we had better restart... */
@@ -736,7 +738,7 @@ restart:
 			reserved = 1;
 #endif
 	}
-	file_list_unlock();
+	file_list_unlock_sb(sb);
 
 #ifdef __DQUOT_PARANOIA
 	if (reserved) {
diff -upr kernel-2.6.18-417.el5.orig/fs/ecryptfs/file.c kernel-2.6.18-417.el5-028stab121/fs/ecryptfs/file.c
--- kernel-2.6.18-417.el5.orig/fs/ecryptfs/file.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ecryptfs/file.c	2017-01-13 08:40:40.000000000 -0500
@@ -43,15 +43,15 @@
  * The function to be used for directory reads is ecryptfs_read.
  */
 static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
-				char __user *buf,
-				size_t count, loff_t pos)
+				const struct iovec *iov, unsigned long nr_segs,
+				loff_t pos)
 {
 	int rc;
 	struct dentry *lower_dentry;
 	struct vfsmount *lower_vfsmount;
 	struct file *file = iocb->ki_filp;
 
-	rc = generic_file_aio_read(iocb, buf, count, pos);
+	rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
 	/*
 	 * Even though this is a async interface, we need to wait
 	 * for IO to finish to update atime
diff -upr kernel-2.6.18-417.el5.orig/fs/eventpoll.c kernel-2.6.18-417.el5-028stab121/fs/eventpoll.c
--- kernel-2.6.18-417.el5.orig/fs/eventpoll.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/eventpoll.c	2017-01-13 08:40:41.000000000 -0500
@@ -102,12 +102,6 @@
 /* Maximum number of poll wake up nests we are allowing */
 #define EP_MAX_POLLWAKE_NESTS 4
 
-struct epoll_filefd {
-	struct file *file;
-	int fd;
-	int added;
-};
-
 /*
  * Structure used to track possible nested calls, for too deep recursions
  * and loop cycles.
@@ -149,42 +143,6 @@ struct poll_safewake {
 	spinlock_t lock;
 };
 
-/*
- * This structure is stored inside the "private_data" member of the file
- * structure and rapresent the main data sructure for the eventpoll
- * interface.
- */
-struct eventpoll {
-	/* Protect the this structure access */
-	rwlock_t lock;
-
-	/*
-	 * This semaphore is used to ensure that files are not removed
-	 * while epoll is using them. This is read-held during the event
-	 * collection loop and it is write-held during the file cleanup
-	 * path, the epoll file exit code and the ctl operations.
-	 */
-	struct rw_semaphore sem;
-
-	/* Wait queue used by sys_epoll_wait() */
-	wait_queue_head_t wq;
-
-	/* Wait queue used by file->poll() */
-	wait_queue_head_t poll_wait;
-
-	/* List of ready file descriptors */
-	struct list_head rdllist;
-
-	/* RB-Tree root used to store monitored fd structs */
-	struct rb_root rbr;
-
-	struct file *file;
-
-	/* used to optimize loop detection check */
-	int visited;
-	struct list_head visitedllink;
-};
-
 /* Wait structure used by the poll hooks */
 struct eppoll_entry {
 	/* List header used to link this structure to the "struct epitem" */
@@ -203,51 +161,6 @@ struct eppoll_entry {
 	wait_queue_head_t *whead;
 };
 
-/*
- * Each file descriptor added to the eventpoll interface will
- * have an entry of this type linked to the hash.
- */
-struct epitem {
-	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
-	struct rb_node rbn;
-
-	/* List header used to link this structure to the eventpoll ready list */
-	struct list_head rdllink;
-
-	/* The file descriptor information this item refers to */
-	struct epoll_filefd ffd;
-
-	/* Number of active wait queue attached to poll operations */
-	int nwait;
-
-	/* List containing poll wait queues */
-	struct list_head pwqlist;
-
-	/* The "container" of this item */
-	struct eventpoll *ep;
-
-	/* The structure that describe the interested events and the source fd */
-	struct epoll_event event;
-
-	/*
-	 * Used to keep track of the usage count of the structure. This avoids
-	 * that the structure will desappear from underneath our processing.
-	 */
-	atomic_t usecnt;
-
-	/* List header used to link this item to the "struct file" items list */
-	struct list_head fllink;
-
-	/* List header used to link the item to the transfer list */
-	struct list_head txlink;
-
-	/*
-	 * This is used during the collection/transfer of events to userspace
-	 * to pin items empty events set.
-	 */
-	unsigned int revents;
-};
-
 /* Wrapper struct used by poll queueing */
 struct ep_pqueue {
 	poll_table pt;
@@ -257,17 +170,13 @@ struct ep_pqueue {
 static void ep_poll_safewake_init(struct poll_safewake *psw);
 static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq);
 static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-		    struct eventpoll *ep);
+		    struct eventpoll *ep, int flags);
 static int ep_alloc(struct eventpoll **pep);
 static void ep_free(struct eventpoll *ep);
-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
 static void ep_use_epitem(struct epitem *epi);
-static void ep_release_epitem(struct epitem *epi);
 static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
 				 poll_table *pt);
 static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi);
-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
-		     struct file *tfile, int fd);
 static int ep_modify(struct eventpoll *ep, struct epitem *epi,
 		     struct epoll_event *event);
 static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi);
@@ -295,7 +204,8 @@ static int eventpollfs_get_sb(struct fil
 /*
  * This semaphore is used to serialize ep_free() and eventpoll_release_file().
  */
-static struct mutex epmutex;
+struct mutex epmutex;
+EXPORT_SYMBOL_GPL(epmutex);
 
 /* Used to check for epoll file descriptor inclusion loops */
 static struct nested_calls poll_loop_ncalls;
@@ -361,7 +271,7 @@ static void clear_added_flag(struct tfil
 		tfile_check_iter->tfile_arr[i]->added = 0;
 }
 
-static void clear_tfile_check_list(void)
+void clear_tfile_check_list(void)
 {
 	struct tfile_check *tfile_check_iter, *tmp;
 
@@ -378,12 +288,14 @@ static void clear_tfile_check_list(void)
 	}
 	current_tfile_check = &base_tfile_check;
 }
+EXPORT_SYMBOL_GPL(clear_tfile_check_list);
 
 /* File callbacks that implement the eventpoll file behaviour */
-static const struct file_operations eventpoll_fops = {
+const struct file_operations eventpoll_fops = {
 	.release	= ep_eventpoll_close,
 	.poll		= ep_eventpoll_poll
 };
+EXPORT_SYMBOL_GPL(eventpoll_fops);
 
 /* Fast test to see if the file is an eventpoll file */
 static inline int is_file_epoll(struct file *f)
@@ -695,6 +607,53 @@ static void ep_poll_safewake(struct poll
 	spin_unlock_irqrestore(&psw->lock, flags);
 }
 
+#ifdef TIF_RESTORE_SIGMASK
+
+/*
+ * Implement the event wait interface for the eventpoll file. It is the kernel
+ * part of the user space epoll_pwait(2).
+ */
+asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
+		int maxevents, int timeout, const sigset_t __user *sigmask,
+		size_t sigsetsize)
+{
+	int error;
+	sigset_t ksigmask, sigsaved;
+
+	/*
+	 * If the caller wants a certain signal mask to be set during the wait,
+	 * we apply it here.
+	 */
+	if (sigmask) {
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	error = sys_epoll_wait(epfd, events, maxevents, timeout);
+
+	/*
+	 * If we changed the signal mask, we need to restore the original one.
+	 * In case we've got a signal while waiting, we do not restore the
+	 * signal mask yet, and we allow do_signal() to deliver the signal on
+	 * the way back to userspace, before the signal mask is restored.
+	 */
+	if (sigmask) {
+		if (error == -EINTR) {
+			memcpy(&current->saved_sigmask, &sigsaved,
+			       sizeof(sigsaved));
+			set_thread_flag(TIF_RESTORE_SIGMASK);
+		} else
+			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	}
+
+	return error;
+}
+
+#endif /* #ifdef TIF_RESTORE_SIGMASK */
 
 /*
  * This is called from eventpoll_release() to unlink files from the eventpoll
@@ -739,34 +698,37 @@ void eventpoll_release_file(struct file 
  * file descriptors inside the epoll interface. It is the kernel part of
  * the userspace epoll_create(2).
  */
-asmlinkage long sys_epoll_create(int size)
+asmlinkage long sys_epoll_create1(int flags)
 {
 	int error, fd;
 	struct eventpoll *ep;
 	struct inode *inode;
 	struct file *file;
 
+	/* Check the EPOLL_* constant for consistency.  */
+	if (flags & ~EPOLL_CLOEXEC)
+		return -EINVAL;
+
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
-		     current, size));
+		     current, flags));
 
 	/*
-	 * Sanity check on the size parameter, and create the internal data
-	 * structure ( "struct eventpoll" ).
+	 * Create the internal data structure ( "struct eventpoll" ).
 	 */
-	error = -EINVAL;
-	if (size <= 0 || (error = ep_alloc(&ep)) != 0)
+	error = ep_alloc(&ep);
+	if (error)
 		goto eexit_1;
 
 	/*
 	 * Creates all the items needed to setup an eventpoll file. That is,
 	 * a file structure, and inode and a free file descriptor.
 	 */
-	error = ep_getfd(&fd, &inode, &file, ep);
+	error = ep_getfd(&fd, &inode, &file, ep, flags);
 	if (error)
 		goto eexit_2;
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
-		     current, size, fd));
+		     current, flags, fd));
 
 	return fd;
 
@@ -775,10 +737,18 @@ eexit_2:
 	kfree(ep);
 eexit_1:
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
-		     current, size, error));
+		     current, flags, error));
 	return error;
 }
 
+asmlinkage long sys_epoll_create(int size)
+{
+	if (size < 0)
+		return -EINVAL;
+
+	return sys_epoll_create1(0);
+}
+EXPORT_SYMBOL_GPL(sys_epoll_create);
 
 /*
  * The following function implements the controller interface for
@@ -1076,7 +1046,7 @@ static int reverse_path_check(void)
  * Creates the file descriptor to be used by the epoll interface.
  */
 static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-		    struct eventpoll *ep)
+		    struct eventpoll *ep, int flags)
 {
 	struct qstr this;
 	char name[32];
@@ -1098,7 +1068,7 @@ static int ep_getfd(int *efd, struct ino
 		goto eexit_2;
 
 	/* Allocates a free descriptor to plug the file onto */
-	error = get_unused_fd();
+	error = get_unused_fd_flags(flags);
 	if (error < 0)
 		goto eexit_3;
 	fd = error;
@@ -1218,7 +1188,7 @@ static void ep_free(struct eventpoll *ep
  * the returned item, so the caller must call ep_release_epitem()
  * after finished using the "struct epitem".
  */
-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
+struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 {
 	int kcmp;
 	unsigned long flags;
@@ -1248,6 +1218,7 @@ static struct epitem *ep_find(struct eve
 
 	return epir;
 }
+EXPORT_SYMBOL_GPL(ep_find);
 
 
 /*
@@ -1266,13 +1237,13 @@ static void ep_use_epitem(struct epitem 
  * has finished using the structure. It might lead to freeing the
  * structure itself if the count goes to zero.
  */
-static void ep_release_epitem(struct epitem *epi)
+void ep_release_epitem(struct epitem *epi)
 {
 
 	if (atomic_dec_and_test(&epi->usecnt))
 		kmem_cache_free(epi_cache, epi);
 }
-
+EXPORT_SYMBOL_GPL(ep_release_epitem);
 
 /*
  * This is the callback that is used to add our wait queue to the
@@ -1318,7 +1289,7 @@ static void ep_rbtree_insert(struct even
 }
 
 
-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 		     struct file *tfile, int fd)
 {
 	int error, revents, pwake = 0;
@@ -1428,7 +1399,7 @@ eexit_2:
 eexit_1:
 	return error;
 }
-
+EXPORT_SYMBOL_GPL(ep_insert);
 
 /*
  * Modify the interest event mask by dropping an event if the new mask
diff -upr kernel-2.6.18-417.el5.orig/fs/exec.c kernel-2.6.18-417.el5-028stab121/fs/exec.c
--- kernel-2.6.18-417.el5.orig/fs/exec.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/exec.c	2017-01-13 08:40:41.000000000 -0500
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/mman.h>
+#include <linux/virtinfo.h>
 #include <linux/a.out.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
@@ -53,6 +54,9 @@
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
 #include <trace/signal.h>
+#include <linux/grsecurity.h>
+
+#include <ub/ub_vmpages.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -69,6 +73,8 @@ int suid_dumpable = 0;
 EXPORT_SYMBOL(suid_dumpable);
 /* The maximal length of core_pattern is also specified in sysctl.c */
 
+int sysctl_at_vsyscall;
+
 static struct linux_binfmt *formats;
 static DEFINE_RWLOCK(binfmt_lock);
 
@@ -259,9 +265,13 @@ static int __bprm_mm_init(struct linux_b
 	struct vm_area_struct *vma = NULL;
 	struct mm_struct *mm = bprm->mm;
 
-	bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (ub_memory_charge(mm, PAGE_SIZE, VM_STACK_FLAGS, NULL, UB_SOFT))
+		goto err_charge;
+
+	bprm->vma = vma = allocate_vma(mm, GFP_KERNEL);
 	if (!vma)
 		goto err;
+	memset(vma, 0, sizeof(*vma));
 
 	down_write(&mm->mmap_sem);
 	vma->vm_mm = mm;
@@ -300,9 +310,10 @@ static int __bprm_mm_init(struct linux_b
 err:
 	if (vma) {
 		bprm->vma = NULL;
-		kmem_cache_free(vm_area_cachep, vma);
+		free_vma(mm, vma);
 	}
-
+	ub_memory_uncharge(mm, PAGE_SIZE, VM_STACK_FLAGS, NULL);
+err_charge:
 	return err;
 }
 
@@ -561,6 +572,8 @@ static int shift_arg_pages(struct vm_are
 	unsigned long new_start = old_start - shift;
 	unsigned long new_end = old_end - shift;
 	struct mmu_gather *tlb;
+	unsigned long moved;
+	struct vm_area_struct *prev;
 
 	BUG_ON(new_start > new_end);
 
@@ -580,9 +593,9 @@ static int shift_arg_pages(struct vm_are
 	 * move the page tables downwards, on failure we rely on
 	 * process cleanup to remove whatever mess we made.
 	 */
-	if (length != move_page_tables(vma, old_start,
-				       vma, new_start, length))
-		return -ENOMEM;
+	moved = move_page_tables(vma, old_start, vma, new_start, length);
+	if (length != moved)
+		goto undo;
 
 	lru_add_drain();
 	tlb = tlb_gather_mmu(mm, 0);
@@ -610,6 +623,41 @@ static int shift_arg_pages(struct vm_are
 	vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
 
 	return 0;
+
+undo:
+	/*
+	 * move pages back.
+	 */
+	length = move_page_tables(vma, new_start, vma, old_start, moved);
+	if (length != moved) {
+		WARN_ON(1);
+		return -EFAULT;
+	}
+
+	/*
+	 * release page tables.
+	 */
+	if (find_vma_prev(mm, vma->vm_start, &prev) != vma) {
+		WARN_ON(1);
+		return -EFAULT;
+	}
+	tlb = tlb_gather_mmu(mm, 0);
+	if (new_end > old_start)
+		free_pgd_range(&tlb, new_start, old_start,
+				prev ? prev->vm_end : FIRST_USER_ADDRESS,
+				old_start);
+	else
+		free_pgd_range(&tlb, new_start, new_end,
+				prev ? prev->vm_end : FIRST_USER_ADDRESS,
+				old_start);
+	tlb_finish_mmu(tlb, new_start, new_end);
+
+	/*
+	 * shrink the vma to the old range.
+	 */
+	vma_adjust(vma, old_start, old_end, vma->vm_pgoff, NULL);
+
+	return -ENOMEM;
 }
 
 #define EXTRA_STACK_VM_PAGES	20	/* random */
@@ -764,10 +812,11 @@ int kernel_read(struct file *file, unsig
 
 EXPORT_SYMBOL(kernel_read);
 
-static int exec_mmap(struct mm_struct *mm)
+static int exec_mmap(struct linux_binprm *bprm)
 {
 	struct task_struct *tsk;
-	struct mm_struct * old_mm, *active_mm;
+	struct mm_struct *old_mm, *active_mm, *mm;
+	int ret;
 
 	/* Notify parent that we're no longer interested in the old VM */
 	tsk = current;
@@ -789,6 +838,10 @@ static int exec_mmap(struct mm_struct *m
 			return -EINTR;
 		}
 	}
+
+	ret = 0;
+	mm = bprm->mm;
+	mm->vps_dumpable = 1;
 	task_lock(tsk);
 	active_mm = tsk->active_mm;
 	tsk->mm = mm;
@@ -797,14 +850,24 @@ static int exec_mmap(struct mm_struct *m
 	tsk->flags |= PF_NEWMM_OLDCREDS;
 	task_unlock(tsk);
 	arch_pick_mmap_layout(mm);
+	bprm->mm = NULL;		/* We're using it now */
+
+#ifdef CONFIG_VZ_GENCALLS
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_EXECMMAP,
+				bprm) & NOTIFY_FAIL) {
+		/* similar to binfmt_elf */
+		send_sig(SIGKILL, current, 0);
+		ret = -ENOMEM;
+	}
+#endif
 	if (old_mm) {
 		up_read(&old_mm->mmap_sem);
 		BUG_ON(active_mm != old_mm);
 		mmput(old_mm);
-		return 0;
+		return ret;
 	}
 	mmdrop(active_mm);
-	return 0;
+	return ret;
 }
 
 /*
@@ -944,7 +1007,16 @@ static int de_thread(struct task_struct 
 		attach_pid(current, PIDTYPE_PID,  current->pid);
 		attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
 		attach_pid(current, PIDTYPE_SID,  current->signal->session);
+		set_virt_tgid(leader, virt_pid(current));
+		set_virt_pid(leader, virt_pid(current));
+		set_virt_pid(current, virt_tgid(current));
 		list_replace_rcu(&leader->tasks, &current->tasks);
+#ifdef CONFIG_VE
+		list_replace_rcu(&leader->ve_task_info.vetask_list,
+				&current->ve_task_info.vetask_list);
+		list_replace(&leader->ve_task_info.aux_list,
+			     &current->ve_task_info.aux_list);
+#endif
 
 		current->group_leader = current;
 		leader->group_leader = current;
@@ -1087,12 +1159,10 @@ int flush_old_exec(struct linux_binprm *
 	 * Release all of the old mmap stuff
 	 */
 	acct_arg_size(bprm, 0);
-	retval = exec_mmap(bprm->mm);
+	retval = exec_mmap(bprm);
 	if (retval)
 		goto mmap_failed;
 
-	bprm->mm = NULL;		/* We're using it now */
-
 	put_files_struct(files);
 
 	current->flags &= ~PF_RANDOMIZE;
@@ -1401,6 +1471,10 @@ int do_execve(char * filename,
 	struct file *file;
 	int retval;
 
+	retval = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
+	if (retval)
+		return retval;
+
 	retval = -ENOMEM;
 	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
@@ -1450,6 +1524,11 @@ int do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
+	if (!gr_tpe_allow(file)) {
+		retval = -EACCES;
+		goto out;
+	}
+
 	retval = search_binary_handler(bprm,regs);
 	if (retval >= 0) {
 		/* execve success */
@@ -1538,7 +1617,7 @@ static int format_corename(char *corenam
 			case 'p':
 				pid_in_pattern = 1;
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", current->tgid);
+					      "%d", virt_tgid(current));
 				if (rc > out_end - out_ptr)
 					goto out;
 				out_ptr += rc;
@@ -1582,7 +1661,7 @@ static int format_corename(char *corenam
 			case 'h':
 				down_read(&uts_sem);
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%s", system_utsname.nodename);
+					      "%s", utsname()->nodename);
 				up_read(&uts_sem);
 				if (rc > out_end - out_ptr)
 					goto out;
@@ -1618,7 +1697,7 @@ static int format_corename(char *corenam
 	if (!ispipe && !pid_in_pattern
             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
 		rc = snprintf(out_ptr, out_end - out_ptr,
-			      ".%d", current->tgid);
+			      ".%d", virt_tgid(current));
 		if (rc > out_end - out_ptr)
 			goto out;
 		out_ptr += rc;
@@ -1666,7 +1745,7 @@ static inline int zap_threads(struct tas
 		goto done;
 
 	rcu_read_lock();
-	for_each_process(g) {
+	for_each_process_ve(g) {
 		if (g == tsk->group_leader)
 			continue;
 
@@ -1760,7 +1839,7 @@ int do_coredump(long signr, int exit_cod
 	/*
 	 * If another thread got here first, or we are not dumpable, bail out.
 	 */
-	if (mm->core_waiters || !mm->dumpable) {
+	if (mm->core_waiters || !mm->dumpable || mm->vps_dumpable != 1) {
 		up_write(&mm->mmap_sem);
 		goto fail;
 	}
diff -upr kernel-2.6.18-417.el5.orig/fs/ext2/namei.c kernel-2.6.18-417.el5-028stab121/fs/ext2/namei.c
--- kernel-2.6.18-417.el5.orig/fs/ext2/namei.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ext2/namei.c	2017-01-13 08:40:24.000000000 -0500
@@ -31,6 +31,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -260,6 +261,8 @@ static int ext2_unlink(struct inode * di
 	struct page * page;
 	int err = -ENOENT;
 
+	DQUOT_INIT(inode);
+
 	de = ext2_find_entry (dir, dentry, &page);
 	if (!de)
 		goto out;
@@ -302,6 +305,9 @@ static int ext2_rename (struct inode * o
 	struct ext2_dir_entry_2 * old_de;
 	int err = -ENOENT;
 
+	if (new_inode)
+		DQUOT_INIT(new_inode);
+
 	old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
 	if (!old_de)
 		goto out;
diff -upr kernel-2.6.18-417.el5.orig/fs/ext2/super.c kernel-2.6.18-417.el5-028stab121/fs/ext2/super.c
--- kernel-2.6.18-417.el5.orig/fs/ext2/super.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ext2/super.c	2017-01-13 08:40:19.000000000 -0500
@@ -365,7 +365,6 @@ static int parse_options (char * options
 {
 	char * p;
 	substring_t args[MAX_OPT_ARGS];
-	unsigned long kind = EXT2_MOUNT_ERRORS_CONT;
 	int option;
 
 	if (!options)
@@ -405,13 +404,19 @@ static int parse_options (char * options
 			/* *sb_block = match_int(&args[0]); */
 			break;
 		case Opt_err_panic:
-			kind = EXT2_MOUNT_ERRORS_PANIC;
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			set_opt (sbi->s_mount_opt, ERRORS_PANIC);
 			break;
 		case Opt_err_ro:
-			kind = EXT2_MOUNT_ERRORS_RO;
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_RO);
 			break;
 		case Opt_err_cont:
-			kind = EXT2_MOUNT_ERRORS_CONT;
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_CONT);
 			break;
 		case Opt_nouid32:
 			set_opt (sbi->s_mount_opt, NO_UID32);
@@ -490,7 +495,6 @@ static int parse_options (char * options
 			return 0;
 		}
 	}
-	sbi->s_mount_opt |= kind;
 	return 1;
 }
 
@@ -703,13 +707,20 @@ static int ext2_fill_super(struct super_
 		set_opt(sbi->s_mount_opt, NO_UID32);
 	if (def_mount_opts & EXT2_DEFM_XATTR_USER)
 		set_opt(sbi->s_mount_opt, XATTR_USER);
-	if (def_mount_opts & EXT2_DEFM_ACL)
+	if (def_mount_opts & EXT2_DEFM_ACL) {
 		set_opt(sbi->s_mount_opt, POSIX_ACL);
+#ifndef CONFIG_EXT2_FS_POSIX_ACL
+		ext2_warning(sb, __FUNCTION__,
+			"Kernel is built without ACL support!");
+#endif
+	}
 	
 	if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC)
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO)
 		set_opt(sbi->s_mount_opt, ERRORS_RO);
+	else
+		set_opt(sbi->s_mount_opt, ERRORS_CONT);
 
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -1034,7 +1045,7 @@ static int ext2_remount (struct super_bl
 	es = sbi->s_es;
 	if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
 	    (old_mount_opt & EXT2_MOUNT_XIP)) &&
-	    invalidate_inodes(sb))
+	    invalidate_inodes(sb, 0))
 		ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
 			     "xip remain in cache (no functional problem)");
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
@@ -1244,7 +1255,7 @@ static struct file_system_type ext2_fs_t
 	.name		= "ext2",
 	.get_sb		= ext2_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_FIEMAP,
+	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_FIEMAP | FS_VIRTUALIZED,
 };
 
 static int __init init_ext2_fs(void)
diff -upr kernel-2.6.18-417.el5.orig/fs/ext3/file.c kernel-2.6.18-417.el5-028stab121/fs/ext3/file.c
--- kernel-2.6.18-417.el5.orig/fs/ext3/file.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ext3/file.c	2017-01-13 08:40:40.000000000 -0500
@@ -48,14 +48,15 @@ static int ext3_release_file (struct ino
 }
 
 static ssize_t
-ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_dentry->d_inode;
 	ssize_t ret;
 	int err;
 
-	ret = generic_file_aio_write(iocb, buf, count, pos);
+	ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
 
 	/*
 	 * Skip flushing if there was an error, or if nothing was written.
diff -upr kernel-2.6.18-417.el5.orig/fs/ext3/inode.c kernel-2.6.18-417.el5-028stab121/fs/ext3/inode.c
--- kernel-2.6.18-417.el5.orig/fs/ext3/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ext3/inode.c	2017-01-13 08:40:15.000000000 -0500
@@ -2807,8 +2807,10 @@ void ext3_read_inode(struct inode * inod
 		 */
 		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
 		if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-		    EXT3_INODE_SIZE(inode->i_sb))
+		    EXT3_INODE_SIZE(inode->i_sb)) {
+			brelse(bh);
 			goto bad_inode;
+		}
 		if (ei->i_extra_isize == 0) {
 			/* The extra space is currently unused. Use it. */
 			ei->i_extra_isize = sizeof(struct ext3_inode) -
diff -upr kernel-2.6.18-417.el5.orig/fs/ext3/ioctl.c kernel-2.6.18-417.el5-028stab121/fs/ext3/ioctl.c
--- kernel-2.6.18-417.el5.orig/fs/ext3/ioctl.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ext3/ioctl.c	2017-01-13 08:40:16.000000000 -0500
@@ -72,7 +72,7 @@ int ext3_ioctl (struct inode * inode, st
 		 * the relevant capability.
 		 */
 		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
-			if (!capable(CAP_SYS_RESOURCE)) {
+			if (!capable(CAP_SYS_ADMIN)) {
 				mutex_unlock(&inode->i_mutex);
 				return -EPERM;
 			}
diff -upr kernel-2.6.18-417.el5.orig/fs/ext3/namei.c kernel-2.6.18-417.el5-028stab121/fs/ext3/namei.c
--- kernel-2.6.18-417.el5.orig/fs/ext3/namei.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ext3/namei.c	2017-01-13 08:40:15.000000000 -0500
@@ -1001,6 +1001,7 @@ static struct buffer_head * ext3_dx_find
 				  (block<<EXT3_BLOCK_SIZE_BITS(sb))
 					  +((char *)de - bh->b_data))) {
 				brelse (bh);
+				*err = ERR_BAD_DX_DIR;
 				goto errout;
 			}
 			*res_dir = de;
@@ -1194,9 +1195,9 @@ static struct ext3_dir_entry_2 *do_split
 	char *data1 = (*bh)->b_data, *data2;
 	unsigned split, move, size, i;
 	struct ext3_dir_entry_2 *de = NULL, *de2;
-	int	err;
+	int	err = 0;
 
-	bh2 = ext3_append (handle, dir, &newblock, error);
+	bh2 = ext3_append (handle, dir, &newblock, &err);
 	if (!(bh2)) {
 		brelse(*bh);
 		*bh = NULL;
@@ -1205,14 +1206,9 @@ static struct ext3_dir_entry_2 *do_split
 
 	BUFFER_TRACE(*bh, "get_write_access");
 	err = ext3_journal_get_write_access(handle, *bh);
-	if (err) {
-	journal_error:
-		brelse(*bh);
-		brelse(bh2);
-		*bh = NULL;
-		ext3_std_error(dir->i_sb, err);
-		goto errout;
-	}
+	if (err)
+		goto journal_error;
+
 	BUFFER_TRACE(frame->bh, "get_write_access");
 	err = ext3_journal_get_write_access(handle, frame->bh);
 	if (err)
@@ -1266,8 +1262,16 @@ static struct ext3_dir_entry_2 *do_split
 		goto journal_error;
 	brelse (bh2);
 	dxtrace(dx_show_index ("frame", frame->entries));
-errout:
 	return de;
+
+journal_error:
+	brelse(*bh);
+	brelse(bh2);
+	*bh = NULL;
+	ext3_std_error(dir->i_sb, err);
+errout:
+	*error = err;
+	return NULL;
 }
 #endif
 
@@ -1364,7 +1368,7 @@ static int add_dirent_to_buf(handle_t *h
 	if (err)
 		ext3_std_error(dir->i_sb, err);
 	brelse(bh);
-	return 0;
+	return err;
 }
 
 #ifdef CONFIG_EXT3_INDEX
diff -upr kernel-2.6.18-417.el5.orig/fs/ext3/super.c kernel-2.6.18-417.el5-028stab121/fs/ext3/super.c
--- kernel-2.6.18-417.el5.orig/fs/ext3/super.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ext3/super.c	2017-01-13 08:40:19.000000000 -0500
@@ -159,20 +159,21 @@ static void ext3_handle_error(struct sup
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
-	if (test_opt (sb, ERRORS_RO)) {
-		printk (KERN_CRIT "Remounting filesystem read-only\n");
-		sb->s_flags |= MS_RDONLY;
-	} else {
+	if (!test_opt (sb, ERRORS_CONT)) {
 		journal_t *journal = EXT3_SB(sb)->s_journal;
 
 		EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
 		if (journal)
 			journal_abort(journal, -EIO);
 	}
-	if (test_opt(sb, ERRORS_PANIC))
-		panic("EXT3-fs (device %s): panic forced after error\n",
-			sb->s_id);
+	if (test_opt (sb, ERRORS_RO)) {
+		printk (KERN_CRIT "Remounting filesystem read-only\n");
+		sb->s_flags |= MS_RDONLY;
+	}
 	ext3_commit_super(sb, es, 1);
+	if (test_opt (sb, ERRORS_PANIC))
+		panic ("EXT3-fs (device %s): panic forced after error\n",
+				sb->s_id);
 }
 
 void ext3_error (struct super_block * sb, const char * function,
@@ -1491,8 +1492,13 @@ static int ext3_fill_super (struct super
 		set_opt(sbi->s_mount_opt, NO_UID32);
 	if (def_mount_opts & EXT3_DEFM_XATTR_USER)
 		set_opt(sbi->s_mount_opt, XATTR_USER);
-	if (def_mount_opts & EXT3_DEFM_ACL)
+	if (def_mount_opts & EXT3_DEFM_ACL) {
 		set_opt(sbi->s_mount_opt, POSIX_ACL);
+#ifndef CONFIG_EXT3_FS_POSIX_ACL
+		ext3_warning(sb, __FUNCTION__,
+			"Kernel is built without ACL support!");
+#endif
+	}
 	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
 		sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
 	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
@@ -1504,6 +1510,8 @@ static int ext3_fill_super (struct super
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
 		set_opt(sbi->s_mount_opt, ERRORS_RO);
+	else
+		set_opt(sbi->s_mount_opt, ERRORS_CONT);
 
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -2810,7 +2818,7 @@ static struct file_system_type ext3_fs_t
 	.get_sb		= ext3_get_sb,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV|FS_HAS_FIEMAP|FS_HAS_FREEZE
-			 |FS_HAS_TRYTOFREE,
+			 |FS_HAS_TRYTOFREE|FS_VIRTUALIZED,
 };
 
 static int __init init_ext3_fs(void)
diff -upr kernel-2.6.18-417.el5.orig/fs/ext3/xattr.c kernel-2.6.18-417.el5-028stab121/fs/ext3/xattr.c
--- kernel-2.6.18-417.el5.orig/fs/ext3/xattr.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ext3/xattr.c	2017-01-13 08:40:16.000000000 -0500
@@ -478,8 +478,15 @@ ext3_xattr_release_block(handle_t *handl
 			 struct buffer_head *bh)
 {
 	struct mb_cache_entry *ce = NULL;
+	int error = 0;
 
 	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
+	error = ext3_journal_get_write_access(handle, bh);
+	if (error)
+		 goto out;
+
+	lock_buffer(bh);
+
 	if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
 		ea_bdebug(bh, "refcount now=0; freeing");
 		if (ce)
@@ -488,21 +495,21 @@ ext3_xattr_release_block(handle_t *handl
 		get_bh(bh);
 		ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
 	} else {
-		if (ext3_journal_get_write_access(handle, bh) == 0) {
-			lock_buffer(bh);
-			BHDR(bh)->h_refcount = cpu_to_le32(
+		BHDR(bh)->h_refcount = cpu_to_le32(
 				le32_to_cpu(BHDR(bh)->h_refcount) - 1);
-			ext3_journal_dirty_metadata(handle, bh);
-			if (IS_SYNC(inode))
-				handle->h_sync = 1;
-			DQUOT_FREE_BLOCK(inode, 1);
-			unlock_buffer(bh);
-			ea_bdebug(bh, "refcount now=%d; releasing",
-				  le32_to_cpu(BHDR(bh)->h_refcount));
-		}
+		error = ext3_journal_dirty_metadata(handle, bh);
+		if (IS_SYNC(inode))
+			handle->h_sync = 1;
+		DQUOT_FREE_BLOCK(inode, 1);
+		ea_bdebug(bh, "refcount now=%d; releasing",
+			  le32_to_cpu(BHDR(bh)->h_refcount));
 		if (ce)
 			mb_cache_entry_release(ce);
 	}
+	unlock_buffer(bh);
+out:
+	ext3_std_error(inode->i_sb, error);
+	return;
 }
 
 struct ext3_xattr_info {
@@ -678,7 +685,7 @@ ext3_xattr_block_set(handle_t *handle, s
 	struct buffer_head *new_bh = NULL;
 	struct ext3_xattr_search *s = &bs->s;
 	struct mb_cache_entry *ce = NULL;
-	int error;
+	int error = 0;
 
 #define header(x) ((struct ext3_xattr_header *)(x))
 
@@ -687,16 +694,17 @@ ext3_xattr_block_set(handle_t *handle, s
 	if (s->base) {
 		ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev,
 					bs->bh->b_blocknr);
+		error = ext3_journal_get_write_access(handle, bs->bh);
+		if (error)
+			goto cleanup;
+		lock_buffer(bs->bh);
+
 		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
 			if (ce) {
 				mb_cache_entry_free(ce);
 				ce = NULL;
 			}
 			ea_bdebug(bs->bh, "modifying in-place");
-			error = ext3_journal_get_write_access(handle, bs->bh);
-			if (error)
-				goto cleanup;
-			lock_buffer(bs->bh);
 			error = ext3_xattr_set_entry(i, s);
 			if (!error) {
 				if (!IS_LAST_ENTRY(s->first))
@@ -716,6 +724,9 @@ ext3_xattr_block_set(handle_t *handle, s
 		} else {
 			int offset = (char *)s->here - bs->bh->b_data;
 
+			unlock_buffer(bs->bh);
+			journal_release_buffer(handle, bs->bh);
+
 			if (ce) {
 				mb_cache_entry_release(ce);
 				ce = NULL;
diff -upr kernel-2.6.18-417.el5.orig/fs/ext4/file.c kernel-2.6.18-417.el5-028stab121/fs/ext4/file.c
--- kernel-2.6.18-417.el5.orig/fs/ext4/file.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ext4/file.c	2017-01-13 08:40:40.000000000 -0500
@@ -86,8 +86,8 @@ ext4_unaligned_aio(struct inode *inode, 
 }
 
 static ssize_t
-ext4_file_write(struct kiocb *iocb, const char __user *buf,
-		size_t count, loff_t pos)
+ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_dentry->d_inode;
@@ -106,11 +106,11 @@ ext4_file_write(struct kiocb *iocb, cons
 		if (pos > sbi->s_bitmap_maxbytes)
 			return -EFBIG;
 
-		if (pos + count > sbi->s_bitmap_maxbytes)
-			count = sbi->s_bitmap_maxbytes - pos;
+		if (pos + iov_length(iov, nr_segs) > sbi->s_bitmap_maxbytes)
+			return -EFBIG;
 	} else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
 		            !is_sync_kiocb(iocb)))
-		unaligned_aio = ext4_unaligned_aio(inode, count, pos);
+		unaligned_aio = ext4_unaligned_aio(inode, nr_segs, pos);
 
 	/* Unaligned direct AIO must be serialized; see comment above */
 	if (unaligned_aio) {
@@ -127,7 +127,7 @@ ext4_file_write(struct kiocb *iocb, cons
 		ext4_aiodio_wait(inode);
  	}
 
-	ret = generic_file_aio_write(iocb, buf, count, pos);
+	ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
 
 	if (unaligned_aio)
 		mutex_unlock(&EXT4_I(inode)->i_aio_mutex);
diff -upr kernel-2.6.18-417.el5.orig/fs/ext4/inode.c kernel-2.6.18-417.el5-028stab121/fs/ext4/inode.c
--- kernel-2.6.18-417.el5.orig/fs/ext4/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ext4/inode.c	2017-01-13 08:40:16.000000000 -0500
@@ -5926,8 +5926,14 @@ int ext4_page_mkwrite(struct vm_area_str
 	int ret = -EINVAL;
 	void *fsdata;
 	struct file *file = vma->vm_file;
-	struct inode *inode = file->f_dentry->d_inode;
-	struct address_space *mapping = inode->i_mapping;
+	struct inode *inode;
+	struct address_space *mapping;
+
+	if (file->f_op->get_host)
+		file = file->f_op->get_host(file);
+
+	inode = file->f_dentry->d_inode;
+	mapping = inode->i_mapping;
 
 	/*
 	 * Get i_alloc_sem to stop truncates messing with the inode. We cannot
diff -upr kernel-2.6.18-417.el5.orig/fs/fcntl.c kernel-2.6.18-417.el5-028stab121/fs/fcntl.c
--- kernel-2.6.18-417.el5.orig/fs/fcntl.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/fcntl.c	2017-01-13 08:40:40.000000000 -0500
@@ -134,9 +134,9 @@ int dupfd(struct file *file, unsigned in
 	return fd;
 }
 
-EXPORT_SYMBOL_GPL(dupfd);
+EXPORT_SYMBOL(dupfd);
 
-asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
+static long do_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 {
 	int err = -EBADF;
 	struct file * file, *tofree;
@@ -174,7 +174,10 @@ asmlinkage long sys_dup2(unsigned int ol
 
 	rcu_assign_pointer(fdt->fd[newfd], file);
 	FD_SET(newfd, fdt->open_fds);
-	FD_CLR(newfd, fdt->close_on_exec);
+	if (flags & O_CLOEXEC)
+		FD_SET(newfd, fdt->close_on_exec);
+	else
+		FD_CLR(newfd, fdt->close_on_exec);
 	spin_unlock(&files->file_lock);
 
 	if (tofree)
@@ -192,6 +195,23 @@ out_fput:
 	goto out;
 }
 
+asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
+{
+	return do_dup3(oldfd, newfd, 0);
+}
+EXPORT_SYMBOL_GPL(sys_dup2);
+
+asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
+{
+	if ((flags & ~O_CLOEXEC) != 0)
+		return -EINVAL;
+
+	if (unlikely(oldfd == newfd))
+		return -EINVAL;
+
+	return do_dup3(oldfd, newfd, flags);
+}
+
 asmlinkage long sys_dup(unsigned int fildes)
 {
 	int ret = -EBADF;
@@ -209,6 +229,9 @@ static int setfl(int fd, struct file * f
 	struct inode * inode = filp->f_dentry->d_inode;
 	int error = 0;
 
+	if (!capable(CAP_SYS_RAWIO) && !odirect_enable)
+		arg &= ~O_DIRECT;
+
 	/*
 	 * O_APPEND cannot be cleared if the file is marked as append-only
 	 * and the file is open for write.
@@ -255,6 +278,7 @@ static int setfl(int fd, struct file * f
 static void f_modown(struct file *filp, unsigned long pid,
                      uid_t uid, uid_t euid, int force)
 {
+	pid = comb_vpid_to_pid(pid);
 	write_lock_irq(&filp->f_owner.lock);
 	if (force || !filp->f_owner.pid) {
 		filp->f_owner.pid = pid;
@@ -321,7 +345,7 @@ static long do_fcntl(int fd, unsigned in
 		 * current syscall conventions, the only way
 		 * to fix this will be in libc.
 		 */
-		err = filp->f_owner.pid;
+		err = comb_pid_to_vpid(filp->f_owner.pid);
 		force_successful_syscall_return();
 		break;
 	case F_SETOWN:
@@ -472,23 +496,29 @@ static void send_sigio_to_task(struct ta
 void send_sigio(struct fown_struct *fown, int fd, int band)
 {
 	struct task_struct *p;
+	struct file *f;
+	struct ve_struct *ve;
 	int pid;
 	
 	read_lock(&fown->lock);
 	pid = fown->pid;
 	if (!pid)
 		goto out_unlock_fown;
+
+	/* hack: fown's are always embedded in struct file */
+	f = container_of(fown, struct file, f_owner);
+	ve = f->owner_env;
 	
 	read_lock(&tasklist_lock);
 	if (pid > 0) {
-		p = find_task_by_pid(pid);
-		if (p) {
+		p = find_task_by_pid_all(pid);
+		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
 			send_sigio_to_task(p, fown, fd, band);
 		}
 	} else {
-		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
+		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
 			send_sigio_to_task(p, fown, fd, band);
-		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
+		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
 	}
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
@@ -505,6 +535,8 @@ static void send_sigurg_to_task(struct t
 int send_sigurg(struct fown_struct *fown)
 {
 	struct task_struct *p;
+	struct file *f;
+	struct ve_struct *ve;
 	int pid, ret = 0;
 	
 	read_lock(&fown->lock);
@@ -513,17 +545,19 @@ int send_sigurg(struct fown_struct *fown
 		goto out_unlock_fown;
 
 	ret = 1;
+	f = container_of(fown, struct file, f_owner);
+	ve = f->owner_env;
 	
 	read_lock(&tasklist_lock);
 	if (pid > 0) {
-		p = find_task_by_pid(pid);
-		if (p) {
+		p = find_task_by_pid_all(pid);
+		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
 			send_sigurg_to_task(p, fown);
 		}
 	} else {
-		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
+		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
 			send_sigurg_to_task(p, fown);
-		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
+		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
 	}
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
diff -upr kernel-2.6.18-417.el5.orig/fs/file.c kernel-2.6.18-417.el5-028stab121/fs/file.c
--- kernel-2.6.18-417.el5.orig/fs/file.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/file.c	2017-01-13 08:40:24.000000000 -0500
@@ -8,6 +8,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/time.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -18,6 +19,8 @@
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
 
+#include <ub/ub_mem.h>
+
 struct fdtable_defer {
 	spinlock_t lock;
 	struct work_struct wq;
@@ -45,10 +48,10 @@ struct file ** alloc_fd_array(int num)
 	struct file **new_fds;
 	int size = num * sizeof(struct file *);
 
-	new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
+	new_fds = (struct file **) ub_kmalloc(size, GFP_KERNEL);
 	if (new_fds != NULL)
 		return new_fds;
-	new_fds = (struct file **) vmalloc(size);
+	new_fds = (struct file **) ub_vmalloc(size);
 	return new_fds;
 }
 
@@ -216,11 +219,11 @@ fd_set * alloc_fdset(int num)
 	fd_set *new_fdset;
 	int size = num / 8;
 
-	new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
+	new_fdset = (fd_set *) ub_kmalloc(size, GFP_KERNEL);
 	if (new_fdset != NULL)
 		return new_fdset;
 
-	new_fdset = (fd_set *) vmalloc(size);
+	new_fdset = (fd_set *) ub_vmalloc(size);
 	return new_fdset;
 }
 
@@ -238,7 +241,7 @@ static struct fdtable *alloc_fdtable(int
   	fd_set *new_openset = NULL, *new_execset = NULL;
 	struct file **new_fds;
 
-	fdt = kzalloc(sizeof(*fdt), GFP_KERNEL);
+	fdt = kzalloc(sizeof(*fdt), GFP_KERNEL_UBC);
 	if (!fdt)
   		goto out;
 
@@ -304,7 +307,7 @@ out:
  * both fd array and fdset. It is expected to be called with the
  * files_lock held.
  */
-static int expand_fdtable(struct files_struct *files, int nr)
+int expand_fdtable(struct files_struct *files, int nr)
 	__releases(files->file_lock)
 	__acquires(files->file_lock)
 {
@@ -349,6 +352,7 @@ static int expand_fdtable(struct files_s
 out:
 	return error;
 }
+EXPORT_SYMBOL_GPL(expand_fdtable);
 
 /*
  * Expand files.
diff -upr kernel-2.6.18-417.el5.orig/fs/filesystems.c kernel-2.6.18-417.el5-028stab121/fs/filesystems.c
--- kernel-2.6.18-417.el5.orig/fs/filesystems.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/filesystems.c	2017-01-13 08:40:28.000000000 -0500
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/sched.h>	/* for 'current' */
+#include <linux/mount.h>
 #include <asm/uaccess.h>
 
 /*
@@ -22,8 +23,8 @@
  *	During the unload module must call unregister_filesystem().
  *	We can access the fields of list element if:
  *		1) spinlock is held or
- *		2) we hold the reference to the module.
- *	The latter can be guaranteed by call of try_module_get(); if it
+ *		2) we hold the reference to the element.
+ *	The latter can be guaranteed by call of try_filesystem(); if it
  *	returned 0 we must skip the element, otherwise we got the reference.
  *	Once the reference is obtained we can drop the spinlock.
  */
@@ -31,23 +32,45 @@
 static struct file_system_type *file_systems;
 static DEFINE_RWLOCK(file_systems_lock);
 
+int try_get_filesystem(struct file_system_type *fs)
+{
+	if (try_module_get(fs->owner)) {
+		get_ve(fs->owner_env);
+		return 1;
+	}
+	return 0;
+}
+
 /* WARNING: This can be used only if we _already_ own a reference */
 void get_filesystem(struct file_system_type *fs)
 {
+	get_ve(fs->owner_env);
 	__module_get(fs->owner);
 }
 
 void put_filesystem(struct file_system_type *fs)
 {
 	module_put(fs->owner);
+	put_ve(fs->owner_env);
 }
 
-static struct file_system_type **find_filesystem(const char *name)
+static inline int check_ve_fstype(struct file_system_type *p,
+		struct ve_struct *env)
+{
+	return ((p->fs_flags & FS_VIRTUALIZED) ||
+			ve_accessible_strict(p->owner_env, env));
+}
+
+static struct file_system_type **find_filesystem(const char *name,
+		struct ve_struct *env)
 {
 	struct file_system_type **p;
-	for (p=&file_systems; *p; p=&(*p)->next)
+	for (p=&file_systems; *p; p=&(*p)->next) {
+		if (!check_ve_fstype(*p, env))
+			continue;
 		if (strcmp((*p)->name,name) == 0)
 			break;
+	}
 	return p;
 }
 
@@ -74,8 +97,12 @@ int register_filesystem(struct file_syst
 	if (fs->next)
 		return -EBUSY;
 	INIT_LIST_HEAD(&fs->fs_supers);
+	if (fs->owner_env == NULL)
+		fs->owner_env = get_ve0();
+	if (fs->proto == NULL)
+		fs->proto = fs;
 	write_lock(&file_systems_lock);
-	p = find_filesystem(fs->name);
+	p = find_filesystem(fs->name, fs->owner_env);
 	if (*p)
 		res = -EBUSY;
 	else
@@ -119,6 +146,75 @@ int unregister_filesystem(struct file_sy
 
 EXPORT_SYMBOL(unregister_filesystem);
 
+#ifdef CONFIG_VE
+int register_ve_fs_type(struct ve_struct *ve, struct file_system_type *template,
+		struct file_system_type **p_fs_type, struct vfsmount **p_mnt)
+{
+	struct vfsmount *mnt;
+	struct file_system_type *local_fs_type;
+	int ret;
+
+	local_fs_type = kzalloc(sizeof(*local_fs_type) + sizeof(void *),
+					GFP_KERNEL);
+	if (local_fs_type == NULL)
+		return -ENOMEM;
+
+	local_fs_type->name = template->name;
+	local_fs_type->fs_flags = template->fs_flags;
+	local_fs_type->get_sb = template->get_sb;
+	local_fs_type->kill_sb = template->kill_sb;
+	local_fs_type->owner = template->owner;
+	local_fs_type->owner_env = ve;
+	local_fs_type->proto = template;
+
+	get_filesystem(local_fs_type);	/* get_ve() inside */
+
+	ret = register_filesystem(local_fs_type);
+	if (ret)
+		goto reg_err;
+
+	if (p_mnt == NULL) 
+		goto done; 
+
+	mnt = kern_mount(local_fs_type);
+	if (IS_ERR(mnt))
+		goto mnt_err;
+
+	*p_mnt = mnt;
+done:
+	*p_fs_type = local_fs_type;
+	return 0;
+
+mnt_err:
+	ret = PTR_ERR(mnt);
+	unregister_filesystem(local_fs_type); /* does not put */
+
+reg_err:
+	put_filesystem(local_fs_type);
+	kfree(local_fs_type);
+	printk(KERN_DEBUG
+	       "register_ve_fs_type(\"%s\") err=%d\n", template->name, ret);
+	return ret;
+}
+
+EXPORT_SYMBOL(register_ve_fs_type);
+
+void unregister_ve_fs_type(struct file_system_type *local_fs_type,
+		struct vfsmount *local_fs_mount)
+{
+	if (local_fs_mount == NULL && local_fs_type == NULL)
+		return;
+
+	unregister_filesystem(local_fs_type);
+	umount_ve_fs_type(local_fs_type, -1);
+	if (local_fs_mount)
+		kern_umount(local_fs_mount); /* alias to mntput, drop our ref */
+	put_filesystem(local_fs_type);
+}
+
+EXPORT_SYMBOL(unregister_ve_fs_type);
+#endif
+
 static int fs_index(const char __user * __name)
 {
 	struct file_system_type * tmp;
@@ -132,26 +228,49 @@ static int fs_index(const char __user * 
 
 	err = -EINVAL;
 	read_lock(&file_systems_lock);
-	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
+	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next) {
+		if (!check_ve_fstype(tmp, get_exec_env()))
+			continue;
 		if (strcmp(tmp->name,name) == 0) {
 			err = index;
 			break;
 		}
+		index++;
 	}
 	read_unlock(&file_systems_lock);
 	putname(name);
 	return err;
 }
 
+int check_fs_presence(const char *name)
+{
+	mm_segment_t curr_fs;
+	int res;
+
+	curr_fs = get_fs();
+	set_fs(KERNEL_DS);
+	res = fs_index(name);
+	set_fs(curr_fs);
+
+	return (res < 0) ? : 0;
+}
+EXPORT_SYMBOL(check_fs_presence);
+
 static int fs_name(unsigned int index, char __user * buf)
 {
 	struct file_system_type * tmp;
 	int len, res;
 
 	read_lock(&file_systems_lock);
-	for (tmp = file_systems; tmp; tmp = tmp->next, index--)
-		if (index <= 0 && try_module_get(tmp->owner))
-			break;
+	for (tmp = file_systems; tmp; tmp = tmp->next) {
+		if (!check_ve_fstype(tmp, get_exec_env()))
+			continue;
+		if (!index) {
+			if (try_get_filesystem(tmp))
+				break;
+		} else
+			index--;
+	}
 	read_unlock(&file_systems_lock);
 	if (!tmp)
 		return -EINVAL;
@@ -169,8 +288,9 @@ static int fs_maxindex(void)
 	int index;
 
 	read_lock(&file_systems_lock);
-	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
-		;
+	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next)
+		if (check_ve_fstype(tmp, get_exec_env()))
+			index++;
 	read_unlock(&file_systems_lock);
 	return index;
 }
@@ -206,9 +326,10 @@ int get_filesystem_list(char * buf)
 	read_lock(&file_systems_lock);
 	tmp = file_systems;
 	while (tmp && len < PAGE_SIZE - 80) {
-		len += sprintf(buf+len, "%s\t%s\n",
-			(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
-			tmp->name);
+		if (check_ve_fstype(tmp, get_exec_env()))
+			len += sprintf(buf+len, "%s\t%s\n",
+				(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
+				tmp->name);
 		tmp = tmp->next;
 	}
 	read_unlock(&file_systems_lock);
@@ -220,14 +341,14 @@ struct file_system_type *get_fs_type(con
 	struct file_system_type *fs;
 
 	read_lock(&file_systems_lock);
-	fs = *(find_filesystem(name));
-	if (fs && !try_module_get(fs->owner))
+	fs = *(find_filesystem(name, get_exec_env()));
+	if (fs && !try_get_filesystem(fs))
 		fs = NULL;
 	read_unlock(&file_systems_lock);
 	if (!fs && (request_module("%s", name) == 0)) {
 		read_lock(&file_systems_lock);
-		fs = *(find_filesystem(name));
-		if (fs && !try_module_get(fs->owner))
+		fs = *(find_filesystem(name, get_exec_env()));
+		if (fs && !try_get_filesystem(fs))
 			fs = NULL;
 		read_unlock(&file_systems_lock);
 	}
@@ -235,3 +356,5 @@ struct file_system_type *get_fs_type(con
 }
 
 EXPORT_SYMBOL(get_fs_type);
+EXPORT_SYMBOL(get_filesystem);
+EXPORT_SYMBOL(put_filesystem);
diff -upr kernel-2.6.18-417.el5.orig/fs/file_table.c kernel-2.6.18-417.el5-028stab121/fs/file_table.c
--- kernel-2.6.18-417.el5.orig/fs/file_table.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/file_table.c	2017-01-13 08:40:40.000000000 -0500
@@ -24,25 +24,29 @@
 
 #include <asm/atomic.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/ub_misc.h>
+
 /* sysctl tunables... */
 struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-/* public. Not pretty! */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
-
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 
 static inline void file_free_rcu(struct rcu_head *head)
 {
 	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
+	put_ve(f->owner_env);
 	kmem_cache_free(filp_cachep, f);
 }
 
 static inline void file_free(struct file *f)
 {
-	percpu_counter_dec(&nr_files);
+	if (f->f_ub == get_ub0())
+		percpu_counter_dec(&nr_files);
+	ub_file_uncharge(f);
 	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
 
@@ -90,11 +94,14 @@ struct file *get_empty_filp(void)
 	struct task_struct *tsk;
 	static int old_max;
 	struct file * f;
+	int acct;
 
+	acct = (get_exec_ub() == get_ub0());
 	/*
 	 * Privileged users can go above max_files
 	 */
-	if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
+	if (acct && get_nr_files() >= files_stat.max_files &&
+			!capable(CAP_SYS_ADMIN)) {
 		/*
 		 * percpu_counters are inaccurate.  Do an expensive check before
 		 * we go and fail.
@@ -106,9 +113,16 @@ struct file *get_empty_filp(void)
 	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
 	if (f == NULL)
 		goto fail;
-
-	percpu_counter_inc(&nr_files);
 	memset(f, 0, sizeof(*f));
+
+	if (ub_file_charge(f))
+		goto fail_ch;
+
+	if (acct)
+		percpu_counter_inc(&nr_files);
+
+	f->owner_env = get_ve(get_exec_env());
+
 	if (security_file_alloc(f))
 		goto fail_sec;
 
@@ -135,6 +149,10 @@ fail_sec:
 	file_free(f);
 fail:
 	return NULL;
+
+fail_ch:
+	kmem_cache_free(filp_cachep, f);
+	return NULL;
 }
 
 EXPORT_SYMBOL(get_empty_filp);
@@ -222,7 +240,10 @@ struct file fastcall *fget_light(unsigne
 	*fput_needed = 0;
 	if (likely((atomic_read(&files->count) == 1))) {
 		file = fcheck_files(files, fd);
+		if (unlikely(file && file->f_heavy))
+			goto slow;
 	} else {
+slow:
 		rcu_read_lock();
 		file = fcheck_files(files, fd);
 		if (file) {
@@ -249,32 +270,35 @@ void put_filp(struct file *file)
 	}
 }
 
-void file_move(struct file *file, struct list_head *list)
+void file_move(struct file *file, struct file_list *list)
 {
 	if (!list)
 		return;
-	file_list_lock();
-	list_move(&file->f_u.fu_list, list);
-	file_list_unlock();
+	file_kill(file);
+	file_list_lock(list);
+	file->f_list = list;
+	list_add(&file->f_u.fu_list, &list->fl_list);
+	file_list_unlock(list);
 }
 
 void file_kill(struct file *file)
 {
 	if (!list_empty(&file->f_u.fu_list)) {
-		file_list_lock();
+		file_list_lock(file->f_list);
 		list_del_init(&file->f_u.fu_list);
-		file_list_unlock();
+		file_list_unlock(file->f_list);
 	}
 }
 
+
 int fs_may_remount_ro(struct super_block *sb)
 {
-	struct list_head *p;
+	struct file *file;
+	int cpu;
 
 	/* Check that no files are currently opened for writing. */
-	file_list_lock();
-	list_for_each(p, &sb->s_files) {
-		struct file *file = list_entry(p, struct file, f_u.fu_list);
+	file_list_lock_sb(sb);
+	for_each_sb_file(file, sb, cpu) {
 		struct inode *inode = file->f_dentry->d_inode;
 
 		/* File with pending delete? */
@@ -285,13 +309,55 @@ int fs_may_remount_ro(struct super_block
 		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
 			goto too_bad;
 	}
-	file_list_unlock();
+	file_list_unlock_sb(sb);
 	return 1; /* Tis' cool bro. */
 too_bad:
-	file_list_unlock();
+	file_list_unlock_sb(sb);
 	return 0;
 }
 
+struct file *get_task_file(pid_t pid, int fd)
+{
+	int err;
+	struct task_struct *tsk;
+	struct files_struct *fs;
+	struct file *file;
+
+	err = -ESRCH;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_ve(pid);
+	if (tsk == NULL) {
+		read_unlock(&tasklist_lock);
+		goto out;
+	}
+
+	get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+
+	err = -EINVAL;
+	fs = get_files_struct(tsk);
+	if (fs == NULL)
+		goto out_put;
+
+	rcu_read_lock();
+	err = -EBADF;
+	file = fcheck_files(fs, fd);
+	if (file == NULL)
+		goto out_unlock;
+
+	err = 0;
+	get_file(file);
+
+out_unlock:
+	rcu_read_unlock();
+	put_files_struct(fs);
+out_put:
+	put_task_struct(tsk);
+out:
+	return err ? ERR_PTR(err) : file;
+}
+EXPORT_SYMBOL(get_task_file);
+
 void __init files_init(unsigned long mempages)
 { 
 	int n; 
diff -upr kernel-2.6.18-417.el5.orig/fs/fs-writeback.c kernel-2.6.18-417.el5-028stab121/fs/fs-writeback.c
--- kernel-2.6.18-417.el5.orig/fs/fs-writeback.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/fs-writeback.c	2017-01-13 08:40:40.000000000 -0500
@@ -23,6 +23,8 @@
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 
+#include <ub/io_acct.h>
+
 extern struct super_block *blockdev_superblock;
 
 /**
@@ -365,6 +367,11 @@ sync_sb_inodes(struct super_block *sb, s
 			continue;		/* blockdev has wrong queue */
 		}
 
+		if (wbc->only_this_ub && wbc->only_this_ub != mapping->dirtied_ub) {
+			list_move(&inode->i_list, &sb->s_dirty);
+			continue;		/* Skip not our inode */
+		}
+
 		/*
 		 * Was this inode dirtied after sync_sb_inodes was called?
 		 * This keeps sync from extra jobs and livelock.
@@ -384,7 +391,16 @@ sync_sb_inodes(struct super_block *sb, s
 		BUG_ON(inode->i_state & I_FREEING);
 		__iget(inode);
 		pages_skipped = wbc->pages_skipped;
-		__writeback_single_inode(inode, wbc);
+
+		if (current_is_pdflush()) {
+			struct user_beancounter *ub;
+
+			ub = switch_exec_ub(get_mapping_ub(inode->i_mapping));
+			__writeback_single_inode(inode, wbc);
+			put_beancounter(switch_exec_ub(ub));
+		} else
+			__writeback_single_inode(inode, wbc);
+
 		if (wbc->sync_mode == WB_SYNC_HOLD) {
 			inode->dirtied_when = jiffies;
 			list_move(&inode->i_list, &sb->s_dirty);
@@ -475,12 +491,14 @@ restart:
  * We add in the number of potentially dirty inodes, because each inode write
  * can dirty pagecache in the underlying blockdev.
  */
-void sync_inodes_sb(struct super_block *sb, int wait)
+void sync_inodes_sb_ub(struct super_block *sb, int wait, struct user_beancounter *ub)
 {
 	struct writeback_control wbc = {
 		.sync_mode	= wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
 		.range_start	= 0,
 		.range_end	= LLONG_MAX,
+		.only_this_ub	= ub,
+		.force_sync_io	= (wait == 2),
 	};
 	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
 	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
@@ -493,7 +511,12 @@ void sync_inodes_sb(struct super_block *
 	sync_sb_inodes(sb, &wbc);
 	spin_unlock(&inode_lock);
 }
-EXPORT_SYMBOL_GPL(sync_inodes_sb);
+EXPORT_SYMBOL(sync_inodes_sb);
+
+void sync_inodes_sb(struct super_block *sb, int wait)
+{
+	return sync_inodes_sb_ub(sb, wait, NULL);
+}
 
 /*
  * Rather lame livelock avoidance.
@@ -528,7 +551,7 @@ static void set_sb_syncing(int val)
  * outstanding dirty inodes, the writeback goes block-at-a-time within the
  * filesystem's write_inode().  This is extremely slow.
  */
-static void __sync_inodes(int wait)
+static void __sync_inodes(int wait, struct user_beancounter *ub)
 {
 	struct super_block *sb;
 
@@ -542,7 +565,7 @@ restart:
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
 		if (sb->s_root) {
-			sync_inodes_sb(sb, wait);
+			sync_inodes_sb_ub(sb, wait, ub);
 			sync_blockdev(sb->s_bdev);
 		}
 		up_read(&sb->s_umount);
@@ -553,17 +576,22 @@ restart:
 	spin_unlock(&sb_lock);
 }
 
-void sync_inodes(int wait)
+void sync_inodes_ub(int wait, struct user_beancounter *ub)
 {
 	set_sb_syncing(0);
-	__sync_inodes(0);
+	__sync_inodes(0, ub);
 
 	if (wait) {
 		set_sb_syncing(0);
-		__sync_inodes(1);
+		__sync_inodes(1, ub);
 	}
 }
 
+void sync_inodes(int wait)
+{
+	sync_inodes_ub(wait, NULL);
+}
+
 /**
  * write_inode_now	-	write an inode to disk
  * @inode: inode to write to disk
diff -upr kernel-2.6.18-417.el5.orig/fs/fuse/control.c kernel-2.6.18-417.el5-028stab121/fs/fuse/control.c
--- kernel-2.6.18-417.el5.orig/fs/fuse/control.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/fuse/control.c	2017-01-13 08:40:20.000000000 -0500
@@ -10,6 +10,7 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/ve_proto.h>
 
 #define FUSE_CTL_SUPER_MAGIC 0x65735543
 
@@ -17,7 +18,11 @@
  * This is non-NULL when the single instance of the control filesystem
  * exists.  Protected by fuse_mutex
  */
+#ifdef CONFIG_VE
+#define fuse_control_sb	(get_exec_env()->_fuse_control_sb)
+#else
 static struct super_block *fuse_control_sb;
+#endif
 
 static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
 {
@@ -213,12 +218,51 @@ static struct file_system_type fuse_ctl_
 	.kill_sb	= fuse_ctl_kill_sb,
 };
 
+#ifdef CONFIG_VE
+static int fuse_ctl_start(void *data)
+{
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)data;
+	if (ve->fuse_ctl_fs_type != NULL)
+		return -EBUSY;
+
+	return register_ve_fs_type(ve, &fuse_ctl_fs_type,
+			&ve->fuse_ctl_fs_type, NULL);
+}
+
+static void fuse_ctl_stop(void *data)
+{
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)data;
+	if (ve->fuse_ctl_fs_type == NULL)
+		return;
+
+	unregister_ve_fs_type(ve->fuse_ctl_fs_type, NULL);
+	/* fuse_ctl_fs_type is freed in real_put_ve -> free_ve_filesystems */
+}
+
+static struct ve_hook fuse_ctl_ve_hook = {
+	.init		= fuse_ctl_start,
+	.fini		= fuse_ctl_stop,
+	.owner		= THIS_MODULE,
+	.priority	= HOOK_PRIO_FS,
+};
+#endif
+
 int __init fuse_ctl_init(void)
 {
-	return register_filesystem(&fuse_ctl_fs_type);
+	int err;
+	
+	err = register_filesystem(&fuse_ctl_fs_type);
+	if (err == 0)
+		ve_hook_register(VE_SS_CHAIN, &fuse_ctl_ve_hook);
+	return err;
 }
 
 void fuse_ctl_cleanup(void)
 {
+	ve_hook_unregister(&fuse_ctl_ve_hook);
 	unregister_filesystem(&fuse_ctl_fs_type);
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/fuse/fuse_i.h kernel-2.6.18-417.el5-028stab121/fs/fuse/fuse_i.h
--- kernel-2.6.18-417.el5.orig/fs/fuse/fuse_i.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/fuse/fuse_i.h	2017-01-13 08:40:20.000000000 -0500
@@ -41,7 +41,11 @@
 #define FUSE_ALLOW_OTHER         (1 << 1)
 
 /** List of active connections */
+#ifdef CONFIG_VE
+#define fuse_conn_list	(get_exec_env()->_fuse_conn_list)
+#else
 extern struct list_head fuse_conn_list;
+#endif
 
 /** Global mutex protecting fuse_conn_list and the control filesystem */
 extern struct mutex fuse_mutex;
diff -upr kernel-2.6.18-417.el5.orig/fs/fuse/inode.c kernel-2.6.18-417.el5-028stab121/fs/fuse/inode.c
--- kernel-2.6.18-417.el5.orig/fs/fuse/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/fuse/inode.c	2017-01-13 08:40:20.000000000 -0500
@@ -17,13 +17,16 @@
 #include <linux/parser.h>
 #include <linux/statfs.h>
 #include <linux/random.h>
+#include <linux/ve_proto.h>
 
 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
 MODULE_LICENSE("GPL");
 
 static struct kmem_cache *fuse_inode_cachep;
+#ifndef CONFIG_VE
 struct list_head fuse_conn_list;
+#endif
 DEFINE_MUTEX(fuse_mutex);
 
 #define FUSE_SUPER_MAGIC 0x65735546
@@ -799,6 +802,40 @@ static void fuse_sysfs_cleanup(void)
 	subsystem_unregister(&fuse_subsys);
 }
 
+#ifdef CONFIG_VE
+static int fuse_start(void *data)
+{
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)data;
+	if (ve->fuse_fs_type != NULL)
+		return -EBUSY;
+
+	INIT_LIST_HEAD(&ve->_fuse_conn_list);
+	return register_ve_fs_type(ve, &fuse_fs_type, &ve->fuse_fs_type, NULL);
+}
+
+static void fuse_stop(void *data)
+{
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)data;
+	if (ve->fuse_fs_type == NULL)
+		return;
+
+	unregister_ve_fs_type(ve->fuse_fs_type, NULL);
+	/* fuse_fs_type is freed in real_put_ve -> free_ve_filesystems */
+	BUG_ON(!list_empty(&ve->_fuse_conn_list));
+}
+
+static struct ve_hook fuse_ve_hook = {
+	.init		= fuse_start,
+	.fini		= fuse_stop,
+	.owner		= THIS_MODULE,
+	.priority	= HOOK_PRIO_FS,
+};
+#endif
+
 static int __init fuse_init(void)
 {
 	int res;
@@ -823,6 +860,7 @@ static int __init fuse_init(void)
 	if (res)
 		goto err_sysfs_cleanup;
 
+	ve_hook_register(VE_SS_CHAIN, &fuse_ve_hook);
 	return 0;
 
  err_sysfs_cleanup:
@@ -839,6 +877,7 @@ static void __exit fuse_exit(void)
 {
 	printk(KERN_DEBUG "fuse exit\n");
 
+	ve_hook_unregister(&fuse_ve_hook);
 	fuse_ctl_cleanup();
 	fuse_sysfs_cleanup();
 	fuse_fs_cleanup();
diff -upr kernel-2.6.18-417.el5.orig/fs/gfs2/glock.c kernel-2.6.18-417.el5-028stab121/fs/gfs2/glock.c
--- kernel-2.6.18-417.el5.orig/fs/gfs2/glock.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/gfs2/glock.c	2017-01-13 08:40:20.000000000 -0500
@@ -1631,7 +1631,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd 
 		}
 
 		down_write(&gfs2_umount_flush_sem);
-		invalidate_inodes(sdp->sd_vfs);
+		invalidate_inodes(sdp->sd_vfs, 0);
 		up_write(&gfs2_umount_flush_sem);
 		msleep(10);
 	}
diff -upr kernel-2.6.18-417.el5.orig/fs/gfs2/ops_fstype.c kernel-2.6.18-417.el5-028stab121/fs/gfs2/ops_fstype.c
--- kernel-2.6.18-417.el5.orig/fs/gfs2/ops_fstype.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/gfs2/ops_fstype.c	2017-01-13 08:40:20.000000000 -0500
@@ -1107,7 +1107,7 @@ fail_locking:
 fail_lm:
 	gfs2_gl_hash_clear(sdp);
 	gfs2_lm_unmount(sdp);
-	while (invalidate_inodes(sb))
+	while (invalidate_inodes(sb, 0))
 		yield();
 fail_debug:
 	gfs2_delete_debugfs_file(sdp);
diff -upr kernel-2.6.18-417.el5.orig/fs/hugetlbfs/inode.c kernel-2.6.18-417.el5-028stab121/fs/hugetlbfs/inode.c
--- kernel-2.6.18-417.el5.orig/fs/hugetlbfs/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/hugetlbfs/inode.c	2017-01-13 08:40:19.000000000 -0500
@@ -743,7 +743,7 @@ struct file *hugetlb_zero_setup(size_t s
 	struct inode *inode;
 	struct dentry *dentry, *root;
 	struct qstr quick_string;
-	char buf[16];
+	char buf[64];
 	static atomic_t counter;
 
 	if (!can_do_hugetlb_shm())
@@ -753,7 +753,8 @@ struct file *hugetlb_zero_setup(size_t s
 		return ERR_PTR(-ENOMEM);
 
 	root = hugetlbfs_vfsmount->mnt_root;
-	snprintf(buf, 16, "%u", atomic_inc_return(&counter));
+	snprintf(buf, 16, "VE%d-%u", VEID(get_exec_env()),
+			atomic_inc_return(&counter));
 	quick_string.name = buf;
 	quick_string.len = strlen(quick_string.name);
 	quick_string.hash = 0;
diff -upr kernel-2.6.18-417.el5.orig/fs/inode.c kernel-2.6.18-417.el5-028stab121/fs/inode.c
--- kernel-2.6.18-417.el5.orig/fs/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/inode.c	2017-01-13 08:40:24.000000000 -0500
@@ -8,6 +8,7 @@
 #include <linux/mm.h>
 #include <linux/dcache.h>
 #include <linux/init.h>
+#include <linux/kernel_stat.h>
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/writeback.h>
@@ -22,6 +23,8 @@
 #include <linux/bootmem.h>
 #include <linux/inotify.h>
 #include <linux/mount.h>
+#include <linux/nsproxy.h>
+#include <linux/namespace.h>
 
 /*
  * This is needed for the following functions:
@@ -81,6 +84,7 @@ static struct hlist_head *inode_hashtabl
  * the i_state of an inode while it is in use..
  */
 DEFINE_SPINLOCK(inode_lock);
+EXPORT_SYMBOL(inode_lock);
 
 /*
  * iprune_mutex provides exclusion between the kswapd or try_to_free_pages
@@ -97,13 +101,15 @@ static DEFINE_MUTEX(iprune_mutex);
  */
 struct inodes_stat_t inodes_stat;
 
-static kmem_cache_t * inode_cachep __read_mostly;
+kmem_cache_t * inode_cachep __read_mostly;
+
+static struct address_space_operations vfs_empty_aops;
+struct inode_operations vfs_empty_iops;
+static struct file_operations vfs_empty_fops;
+EXPORT_SYMBOL(vfs_empty_iops);
 
 static struct inode *alloc_inode(struct super_block *sb)
 {
-	static const struct address_space_operations empty_aops;
-	static struct inode_operations empty_iops;
-	static const struct file_operations empty_fops;
 	struct inode *inode;
 
 	if (sb->s_op->alloc_inode)
@@ -118,8 +124,8 @@ static struct inode *alloc_inode(struct 
 		inode->i_blkbits = sb->s_blocksize_bits;
 		inode->i_flags = 0;
 		atomic_set(&inode->i_count, 1);
-		inode->i_op = &empty_iops;
-		inode->i_fop = &empty_fops;
+		inode->i_op = &vfs_empty_iops;
+		inode->i_fop = &vfs_empty_fops;
 		inode->i_nlink = 1;
 		atomic_set(&inode->i_writecount, 0);
 		inode->i_size = 0;
@@ -143,12 +149,13 @@ static struct inode *alloc_inode(struct 
 			return NULL;
 		}
 
-		mapping->a_ops = &empty_aops;
+		mapping->a_ops = &vfs_empty_aops;
  		mapping->host = inode;
 		mapping->flags = 0;
 		mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
 		mapping->assoc_mapping = NULL;
 		mapping->backing_dev_info = &default_backing_dev_info;
+		mapping->dirtied_ub = NULL;
 
 		/*
 		 * If the block_device provides a backing_dev_info for client
@@ -172,6 +179,7 @@ static struct inode *alloc_inode(struct 
 void destroy_inode(struct inode *inode) 
 {
 	BUG_ON(inode_has_buffers(inode));
+	BUG_ON(inode->i_data.dirtied_ub);
 	security_inode_free(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
@@ -233,6 +241,7 @@ void __iget(struct inode * inode)
 		list_move(&inode->i_list, &inode_in_use);
 	inodes_stat.nr_unused--;
 }
+EXPORT_SYMBOL(__iget);
 
 /**
  * clear_inode - clear an inode
@@ -298,13 +307,76 @@ static void dispose_list(struct list_hea
 	spin_unlock(&inode_lock);
 }
 
+static void show_header(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+
+	printk("VFS: Busy inodes after unmount. "
+			"sb = %p, fs type = %s, sb count = %d, "
+			"sb->s_root = %s\n", sb,
+			(sb->s_type != NULL) ? sb->s_type->name : "",
+			sb->s_count,
+			(sb->s_root != NULL) ?
+			(char *)sb->s_root->d_name.name : "");
+}
+
+static void show_inode(struct inode *inode)
+{
+	struct dentry *d;
+	struct vfsmount *mnt;
+	int i;
+
+	printk("inode = %p, inode->i_count = %d, "
+			"inode->i_nlink = %d, "
+			"inode->i_mode = %d, "
+			"inode->i_state = %ld, "
+			"inode->i_flags = %d, "
+			"inode->i_devices.next = %p, "
+			"inode->i_devices.prev = %p, "
+			"inode->i_ino = %ld\n",
+			inode,
+			atomic_read(&inode->i_count),
+			inode->i_nlink,
+			inode->i_mode,
+			inode->i_state,
+			inode->i_flags,
+			inode->i_devices.next,
+			inode->i_devices.prev,
+			inode->i_ino);
+	printk("inode dump: ");
+	for (i = 0; i < sizeof(*inode); i++)
+		printk("%2.2x ", *((u_char *)inode + i));
+	printk("\n");
+	list_for_each_entry(d, &inode->i_dentry, d_alias) {
+		printk("  d_alias %s d_count=%d d_flags=%x\n",
+			d->d_name.name, atomic_read(&d->d_count), d->d_flags);
+		for (i = 0; i < sizeof(*d); i++)
+			printk("%2.2x ", *((u_char *)d + i));
+		printk("\n");
+	}
+
+	spin_lock(&vfsmount_lock);
+	list_for_each_entry(mnt, &get_task_mnt_ns(current)->list, mnt_list) {
+		if (mnt->mnt_sb != inode->i_sb)
+			continue;
+		printk("mnt=%p count=%d flags=%x exp_mask=%x\n",
+				mnt, atomic_read(&mnt->mnt_count),
+				mnt->mnt_flags,
+				mnt->mnt_expiry_mark);
+		for (i = 0; i < sizeof(*mnt); i++)
+			printk("%2.2x ", *((u_char *)mnt + i));
+		printk("\n");
+	}
+	spin_unlock(&vfsmount_lock);
+}
+
 /*
  * Invalidate all inodes for a device.
  */
-static int invalidate_list(struct list_head *head, struct list_head *dispose)
+static int invalidate_list(struct list_head *head, struct list_head *dispose, int check)
 {
 	struct list_head *next;
-	int busy = 0, count = 0;
+	int busy = 0, count = 0, once = 1;
 
 	next = head->next;
 	for (;;) {
@@ -331,6 +403,14 @@ static int invalidate_list(struct list_h
 			continue;
 		}
 		busy = 1;
+
+		if (check) {
+			if (once) {
+				once = 0;
+				show_header(inode);
+			}
+			show_inode(inode);
+		}
 	}
 	/* only unused inodes may be cached with i_count zero */
 	inodes_stat.nr_unused -= count;
@@ -345,7 +425,7 @@ static int invalidate_list(struct list_h
  *	fails because there are busy inodes then a non zero value is returned.
  *	If the discard is successful all the inodes have been discarded.
  */
-int invalidate_inodes(struct super_block * sb)
+int invalidate_inodes(struct super_block * sb, int check)
 {
 	int busy;
 	LIST_HEAD(throw_away);
@@ -353,7 +433,7 @@ int invalidate_inodes(struct super_block
 	mutex_lock(&iprune_mutex);
 	spin_lock(&inode_lock);
 	inotify_unmount_inodes(&sb->s_inodes);
-	busy = invalidate_list(&sb->s_inodes, &throw_away);
+	busy = invalidate_list(&sb->s_inodes, &throw_away, check);
 	spin_unlock(&inode_lock);
 
 	dispose_list(&throw_away);
@@ -377,7 +457,7 @@ int __invalidate_device(struct block_dev
 		 * hold).
 		 */
 		shrink_dcache_sb(sb);
-		res = invalidate_inodes(sb);
+		res = invalidate_inodes(sb, 0);
 		drop_super(sb);
 	}
 	invalidate_bdev(bdev, 0);
@@ -472,6 +552,7 @@ static void prune_icache(int nr_to_scan)
  */
 static int shrink_icache_memory(int nr, gfp_t gfp_mask)
 {
+	KSTAT_PERF_ENTER(shrink_icache)
 	if (nr) {
 		/*
 		 * Nasty deadlock avoidance.  We may hold various FS locks,
@@ -482,6 +563,7 @@ static int shrink_icache_memory(int nr, 
 			return -1;
 		prune_icache(nr);
 	}
+	KSTAT_PERF_LEAVE(shrink_icache)
 	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
 
@@ -1096,7 +1178,8 @@ static void generic_forget_inode(struct 
 		if (!(inode->i_state & (I_DIRTY|I_LOCK)))
 			list_move(&inode->i_list, &inode_unused);
 		inodes_stat.nr_unused++;
-		if (!sb || (sb->s_flags & MS_ACTIVE)) {
+		if (!(inode->i_flags & S_NOUNUSE) &&
+		    (!sb || (sb->s_flags & MS_ACTIVE))) {
 			spin_unlock(&inode_lock);
 			return;
 		}
@@ -1203,6 +1286,41 @@ sector_t bmap(struct inode * inode, sect
 
 EXPORT_SYMBOL(bmap);
 
+unsigned __read_mostly relatime_interval = 24*60*60; /* one day */
+
+/*
+ * With relative atime, only update atime if the previous atime is
+ * earlier than either the ctime or mtime or if at least a day has
+ * passed since the last atime update.
+ */
+static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
+			     struct timespec now)
+{
+	if (!mnt || !(mnt->mnt_flags & MNT_RELATIME))
+		return 1;
+	/*
+	 * Is mtime younger than atime? If yes, update atime:
+	 */
+	if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0)
+		return 1;
+	/*
+	 * Is ctime younger than atime? If yes, update atime:
+	 */
+	if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0)
+		return 1;
+
+	/*
+	 * Is the previous atime value older than a update interval?
+	 * If yes, update atime:
+	 */
+	if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= relatime_interval)
+		return 1;
+	/*
+	 * Good, we can skip the atime update:
+	 */
+	return 0;
+}
+
 /**
  *	touch_atime	-	update the access time
  *	@mnt: mount the inode is accessed on
@@ -1237,6 +1355,10 @@ void touch_atime(struct vfsmount *mnt, s
 		return;
 
 	now = current_fs_time(inode->i_sb);
+
+	if (!relatime_need_update(mnt, inode, now))
+		return;
+
 	if (!timespec_equal(&inode->i_atime, &now)) {
 		inode->i_atime = now;
 		mark_inode_dirty_sync(inode);
@@ -1245,21 +1367,8 @@ void touch_atime(struct vfsmount *mnt, s
 
 EXPORT_SYMBOL(touch_atime);
 
-/**
- *	file_update_time	-	update mtime and ctime time
- *	@file: file accessed
- *
- *	Update the mtime and ctime members of an inode and mark the inode
- *	for writeback.  Note that this function is meant exclusively for
- *	usage in the file write path of filesystems, and filesystems may
- *	choose to explicitly ignore update via this function with the
- *	S_NOCTIME inode flag, e.g. for network filesystem where these
- *	timestamps are handled by the server.
- */
-
-void file_update_time(struct file *file)
+void inode_update_time(struct inode *inode)
 {
-	struct inode *inode = file->f_dentry->d_inode;
 	struct timespec now;
 	int sync_it = 0;
 
@@ -1286,8 +1395,27 @@ void file_update_time(struct file *file)
 		mark_inode_dirty_sync(inode);
 }
 
+
+/**
+ *	file_update_time	-	update mtime and ctime time
+ *	@file: file accessed
+ *
+ *	Update the mtime and ctime members of an inode and mark the inode
+ *	for writeback.  Note that this function is meant exclusively for
+ *	usage in the file write path of filesystems, and filesystems may
+ *	choose to explicitly ignore update via this function with the
+ *	S_NOCTIME inode flag, e.g. for network filesystem where these
+ *	timestamps are handled by the server.
+ */
+
+void file_update_time(struct file *file)
+{
+	inode_update_time(file->f_dentry->d_inode);
+}
+
 EXPORT_SYMBOL(file_update_time);
 
+
 int inode_needs_sync(struct inode *inode)
 {
 	if (IS_SYNC(inode))
diff -upr kernel-2.6.18-417.el5.orig/fs/inotify.c kernel-2.6.18-417.el5-028stab121/fs/inotify.c
--- kernel-2.6.18-417.el5.orig/fs/inotify.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/inotify.c	2017-01-13 08:40:26.000000000 -0500
@@ -31,6 +31,7 @@
 #include <linux/list.h>
 #include <linux/writeback.h>
 #include <linux/inotify.h>
+#include <linux/mount.h>
 
 static atomic_t inotify_cookie;
 
@@ -68,19 +69,6 @@ static atomic_t inotify_cookie;
  * inotify_add_watch() to the final put_inotify_watch().
  */
 
-/*
- * struct inotify_handle - represents an inotify instance
- *
- * This structure is protected by the mutex 'mutex'.
- */
-struct inotify_handle {
-	struct idr		idr;		/* idr mapping wd -> watch */
-	struct mutex		mutex;		/* protects this bad boy */
-	struct list_head	watches;	/* list of watches */
-	atomic_t		count;		/* reference count */
-	u32			last_wd;	/* the last wd allocated */
-	const struct inotify_operations *in_ops; /* inotify caller operations */
-};
 
 static inline void get_inotify_handle(struct inotify_handle *ih)
 {
@@ -131,6 +119,12 @@ void put_inotify_watch(struct inotify_wa
 		struct inotify_handle *ih = watch->ih;
 
 		iput(watch->inode);
+		dput(watch->dentry);
+		if (watch->mnt)
+			mnt_unpin(watch->mnt);
+		mntput(watch->mnt);
+		watch->dentry = NULL;
+		watch->mnt = NULL;
 		ih->in_ops->destroy_watch(watch);
 		put_inotify_handle(ih);
 	}
@@ -144,6 +138,24 @@ void unpin_inotify_watch(struct inotify_
 	deactivate_super(sb);
 }
 
+static void put_inotify_watch_delay(struct inotify_watch *watch, struct list_head *delay)
+{
+	/* delay put for removed watches, because it can be final put */
+	if (list_empty(&watch->i_list))
+		list_add(&watch->i_list, delay);
+	else if (atomic_dec_and_test(&watch->count))
+		BUG();
+}
+
+static void put_inotify_watch_list(struct list_head *delay)
+{
+	struct inotify_watch *watch, *next;
+
+	list_for_each_entry_safe(watch, next, delay, i_list)
+		put_inotify_watch(watch);
+	INIT_LIST_HEAD(delay);
+}
+
 /*
  * inotify_handle_get_wd - returns the next WD for use by the given handle
  *
@@ -229,7 +241,7 @@ static struct inotify_watch *inode_find_
 static void remove_watch_no_event(struct inotify_watch *watch,
 				  struct inotify_handle *ih)
 {
-	list_del(&watch->i_list);
+	list_del_init(&watch->i_list);
 	list_del(&watch->h_list);
 
 	if (!inotify_inode_watched(watch->inode))
@@ -300,6 +312,7 @@ void inotify_inode_queue_event(struct in
 			       const char *name, struct inode *n_inode)
 {
 	struct inotify_watch *watch, *next;
+	LIST_HEAD(delay);
 
 	if (!inotify_inode_watched(inode))
 		return;
@@ -309,15 +322,18 @@ void inotify_inode_queue_event(struct in
 		u32 watch_mask = watch->mask;
 		if (watch_mask & mask) {
 			struct inotify_handle *ih= watch->ih;
+			get_inotify_watch(watch);
 			mutex_lock(&ih->mutex);
 			if (watch_mask & IN_ONESHOT)
 				remove_watch_no_event(watch, ih);
 			ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
 						 name, n_inode);
 			mutex_unlock(&ih->mutex);
+			put_inotify_watch_delay(watch, &delay);
 		}
 	}
 	mutex_unlock(&inode->inotify_mutex);
+	put_inotify_watch_list(&delay);
 }
 EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
 
@@ -369,9 +385,10 @@ EXPORT_SYMBOL_GPL(inotify_get_cookie);
  * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
  * We temporarily drop inode_lock, however, and CAN block.
  */
-void inotify_unmount_inodes(struct list_head *list)
+void inotify_unmount_inodes_mnt(struct list_head *list, struct vfsmount *mnt)
 {
 	struct inode *inode, *next_i, *need_iput = NULL;
+	LIST_HEAD(delay);
 
 	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
 		struct inotify_watch *watch, *next_w;
@@ -427,20 +444,39 @@ void inotify_unmount_inodes(struct list_
 		watches = &inode->inotify_watches;
 		list_for_each_entry_safe(watch, next_w, watches, i_list) {
 			struct inotify_handle *ih= watch->ih;
+
+			if (mnt && mnt != watch->mnt)
+				continue;
+
+			get_inotify_watch(watch);
 			mutex_lock(&ih->mutex);
 			ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
 						 NULL, NULL);
 			inotify_remove_watch_locked(ih, watch);
 			mutex_unlock(&ih->mutex);
+			put_inotify_watch_delay(watch, &delay);
 		}
 		mutex_unlock(&inode->inotify_mutex);
+		put_inotify_watch_list(&delay);
 		iput(inode);		
 
 		spin_lock(&inode_lock);
 	}
 }
+
+void inotify_unmount_inodes(struct list_head *list)
+{
+	inotify_unmount_inodes_mnt(list, NULL);
+}
 EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
 
+void inotify_unmount_mnt(struct vfsmount *mnt)
+{
+	spin_lock(&inode_lock);
+	inotify_unmount_inodes_mnt(&mnt->mnt_sb->s_inodes, mnt);
+	spin_unlock(&inode_lock);
+}
+
 /**
  * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
  * @inode: inode that is about to be removed
@@ -448,15 +484,19 @@ EXPORT_SYMBOL_GPL(inotify_unmount_inodes
 void inotify_inode_is_dead(struct inode *inode)
 {
 	struct inotify_watch *watch, *next;
+	LIST_HEAD(delay);
 
 	mutex_lock(&inode->inotify_mutex);
 	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
 		struct inotify_handle *ih = watch->ih;
+		get_inotify_watch(watch);
 		mutex_lock(&ih->mutex);
 		inotify_remove_watch_locked(ih, watch);
 		mutex_unlock(&ih->mutex);
+		put_inotify_watch_delay(watch, &delay);
 	}
 	mutex_unlock(&inode->inotify_mutex);
+	put_inotify_watch_list(&delay);
 }
 EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
 
@@ -496,6 +536,8 @@ void inotify_init_watch(struct inotify_w
 	INIT_LIST_HEAD(&watch->i_list);
 	atomic_set(&watch->count, 0);
 	get_inotify_watch(watch); /* initial get */
+	watch->dentry = NULL;
+	watch->mnt = NULL;
 }
 EXPORT_SYMBOL_GPL(inotify_init_watch);
 
@@ -748,8 +790,10 @@ EXPORT_SYMBOL_GPL(inotify_find_update_wa
  * Caller must ensure it only calls inotify_add_watch() once per watch.
  * Calls inotify_handle_get_wd() so may sleep.
  */
-s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
-		      struct inode *inode, u32 mask)
+s32 __inotify_add_watch(struct inotify_handle *ih,
+		        struct inotify_watch *watch,
+			struct dentry *d, struct vfsmount *mnt,
+			struct inode * inode, u32 mask)
 {
 	int ret = 0;
 	int newly_watched;
@@ -776,6 +820,11 @@ s32 inotify_add_watch(struct inotify_han
 	 * Save a reference to the inode and bump the ref count to make it
 	 * official.  We hold a reference to nameidata, which makes this safe.
 	 */
+	if (d) {
+		watch->dentry = dget(d);
+		watch->mnt = mnt;
+		mnt_pin(mnt);
+	}
 	watch->inode = igrab(inode);
 
 	/* Add the watch to the handle's and the inode's list */
@@ -797,6 +846,19 @@ out:
 }
 EXPORT_SYMBOL_GPL(inotify_add_watch);
 
+s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
+		      struct inode *inode, u32 mask)
+{
+	return __inotify_add_watch(ih, watch, NULL, NULL, inode, mask);
+}
+
+s32 inotify_add_watch_dget(struct inotify_handle *ih,
+			   struct inotify_watch *watch, struct dentry *d,
+			   struct vfsmount *mnt, u32 mask)
+{
+	return __inotify_add_watch(ih, watch, d, mnt, d->d_inode, mask);
+}
+
 /**
  * inotify_clone_watch - put the watch next to existing one
  * @old: already installed watch
diff -upr kernel-2.6.18-417.el5.orig/fs/inotify_user.c kernel-2.6.18-417.el5-028stab121/fs/inotify_user.c
--- kernel-2.6.18-417.el5.orig/fs/inotify_user.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/inotify_user.c	2017-01-13 08:40:40.000000000 -0500
@@ -20,6 +20,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
@@ -65,46 +66,6 @@ int inotify_max_queued_events __read_mos
  * first event, or to inotify_destroy().
  */
 
-/*
- * struct inotify_device - represents an inotify instance
- *
- * This structure is protected by the mutex 'mutex'.
- */
-struct inotify_device {
-	wait_queue_head_t 	wq;		/* wait queue for i/o */
-	struct mutex		ev_mutex;	/* protects event queue */
-	struct mutex		up_mutex;	/* synchronizes watch updates */
-	struct list_head 	events;		/* list of queued events */
-	atomic_t		count;		/* reference count */
-	struct user_struct	*user;		/* user who opened this dev */
-	struct inotify_handle	*ih;		/* inotify handle */
-	unsigned int		queue_size;	/* size of the queue (bytes) */
-	unsigned int		event_count;	/* number of pending events */
-	unsigned int		max_events;	/* maximum number of events */
-};
-
-/*
- * struct inotify_kernel_event - An inotify event, originating from a watch and
- * queued for user-space.  A list of these is attached to each instance of the
- * device.  In read(), this list is walked and all events that can fit in the
- * buffer are returned.
- *
- * Protected by dev->ev_mutex of the device in which we are queued.
- */
-struct inotify_kernel_event {
-	struct inotify_event	event;	/* the user-space event */
-	struct list_head        list;	/* entry in inotify_device's list */
-	char			*name;	/* filename, if any */
-};
-
-/*
- * struct inotify_user_watch - our version of an inotify_watch, we add
- * a reference to the associated inotify_device.
- */
-struct inotify_user_watch {
-	struct inotify_device	*dev;	/* associated device */
-	struct inotify_watch	wdata;	/* inotify watch data */
-};
 
 #ifdef CONFIG_SYSCTL
 
@@ -373,8 +334,8 @@ static int find_inode(const char __user 
  *
  * Callers must hold dev->up_mutex.
  */
-static int create_watch(struct inotify_device *dev, struct inode *inode,
-			u32 mask)
+int inotify_create_watch(struct inotify_device *dev, struct dentry *d,
+			 struct vfsmount *mnt, u32 mask)
 {
 	struct inotify_user_watch *watch;
 	int ret;
@@ -394,12 +355,13 @@ static int create_watch(struct inotify_d
 	atomic_inc(&dev->user->inotify_watches);
 
 	inotify_init_watch(&watch->wdata);
-	ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
+	ret = inotify_add_watch_dget(dev->ih, &watch->wdata, d, mnt, mask);
 	if (ret < 0)
 		free_inotify_user_watch(&watch->wdata);
 
 	return ret;
 }
+EXPORT_SYMBOL(inotify_create_watch);
 
 /* Device Interface */
 
@@ -531,20 +493,21 @@ static long inotify_ioctl(struct file *f
 	return ret;
 }
 
-static const struct file_operations inotify_fops = {
+const struct file_operations inotify_fops = {
 	.poll           = inotify_poll,
 	.read           = inotify_read,
 	.release        = inotify_release,
 	.unlocked_ioctl = inotify_ioctl,
 	.compat_ioctl	= inotify_ioctl,
 };
+EXPORT_SYMBOL(inotify_fops);
 
 static const struct inotify_operations inotify_user_ops = {
 	.handle_event	= inotify_dev_queue_event,
 	.destroy_watch	= free_inotify_user_watch,
 };
 
-asmlinkage long sys_inotify_init(void)
+asmlinkage long sys_inotify_init1(int flags)
 {
 	struct inotify_device *dev;
 	struct inotify_handle *ih;
@@ -552,7 +515,11 @@ asmlinkage long sys_inotify_init(void)
 	struct file *filp;
 	int fd, ret;
 
-	fd = get_unused_fd();
+	/* Check the IN_* constants for consistency.  */
+	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
+		return -EINVAL;
+
+	fd = get_unused_fd_flags(flags & O_CLOEXEC);
 	if (fd < 0)
 		return fd;
 
@@ -587,7 +554,7 @@ asmlinkage long sys_inotify_init(void)
 	filp->f_dentry = dget(inotify_mnt->mnt_root);
 	filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
 	filp->f_mode = FMODE_READ;
-	filp->f_flags = O_RDONLY;
+	filp->f_flags = O_RDONLY  | (flags & O_NONBLOCK);
 	filp->private_data = dev;
 
 	INIT_LIST_HEAD(&dev->events);
@@ -615,6 +582,12 @@ out_put_fd:
 	return ret;
 }
 
+asmlinkage long sys_inotify_init(void)
+{
+	return sys_inotify_init1(0);
+}
+EXPORT_SYMBOL(sys_inotify_init);
+
 asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 {
 	struct inode *inode;
@@ -650,7 +623,7 @@ asmlinkage long sys_inotify_add_watch(in
 	mutex_lock(&dev->up_mutex);
 	ret = inotify_find_update_watch(dev->ih, inode, mask);
 	if (ret == -ENOENT)
-		ret = create_watch(dev, inode, mask);
+		ret = inotify_create_watch(dev, nd.dentry, nd.mnt, mask);
 	mutex_unlock(&dev->up_mutex);
 
 	path_release(&nd);
@@ -689,7 +662,7 @@ static int
 inotify_get_sb(struct file_system_type *fs_type, int flags,
 	       const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA, mnt);
+	return get_sb_pseudo(fs_type, "inotify", NULL, 0x2BAD1DEA, mnt);
 }
 
 static struct file_system_type inotify_fs_type = {
diff -upr kernel-2.6.18-417.el5.orig/fs/ioprio.c kernel-2.6.18-417.el5-028stab121/fs/ioprio.c
--- kernel-2.6.18-417.el5.orig/fs/ioprio.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ioprio.c	2017-01-13 08:40:21.000000000 -0500
@@ -25,6 +25,7 @@
 #include <linux/capability.h>
 #include <linux/syscalls.h>
 #include <linux/security.h>
+#include <ub/io_prio.h>
 
 int set_task_ioprio(struct task_struct *task, int ioprio)
 {
@@ -66,6 +67,9 @@ asmlinkage long sys_ioprio_set(int which
 	struct user_struct *user;
 	int ret;
 
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	switch (class) {
 		case IOPRIO_CLASS_RT:
 			if (!capable(CAP_SYS_ADMIN))
@@ -91,18 +95,18 @@ asmlinkage long sys_ioprio_set(int which
 			if (!who)
 				p = current;
 			else
-				p = find_task_by_pid(who);
+				p = find_task_by_pid_ve(who);
 			if (p)
 				ret = set_task_ioprio(p, ioprio);
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			do_each_task_pid_all(who, PIDTYPE_PGID, p) {
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
 					break;
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_all(who, PIDTYPE_PGID, p);
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
@@ -113,17 +117,25 @@ asmlinkage long sys_ioprio_set(int which
 			if (!user)
 				break;
 
-			do_each_thread(g, p) {
+			do_each_thread_all(g, p) {
 				if (p->uid != who)
 					continue;
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
 					goto free_uid;
-			} while_each_thread(g, p);
+			} while_each_thread_all(g, p);
 free_uid:
 			if (who)
 				free_uid(user);
 			break;
+		case IOPRIO_WHO_UBC:
+			if (class != IOPRIO_CLASS_BE) {
+				ret = -ERANGE;
+				break;
+			}
+
+			ret = bc_set_ioprio(who, data);
+			break;
 		default:
 			ret = -EINVAL;
 	}
@@ -180,14 +192,14 @@ asmlinkage long sys_ioprio_get(int which
 			if (!who)
 				p = current;
 			else
-				p = find_task_by_pid(who);
+				p = find_task_by_pid_ve(who);
 			if (p)
 				ret = get_task_ioprio(p);
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
 				tmpio = get_task_ioprio(p);
 				if (tmpio < 0)
 					continue;
@@ -195,7 +207,7 @@ asmlinkage long sys_ioprio_get(int which
 					ret = tmpio;
 				else
 					ret = ioprio_best(ret, tmpio);
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
@@ -206,7 +218,7 @@ asmlinkage long sys_ioprio_get(int which
 			if (!user)
 				break;
 
-			do_each_thread(g, p) {
+			do_each_thread_ve(g, p) {
 				if (p->uid != user->uid)
 					continue;
 				tmpio = get_task_ioprio(p);
@@ -216,7 +228,7 @@ asmlinkage long sys_ioprio_get(int which
 					ret = tmpio;
 				else
 					ret = ioprio_best(ret, tmpio);
-			} while_each_thread(g, p);
+			} while_each_thread_ve(g, p);
 
 			if (who)
 				free_uid(user);
diff -upr kernel-2.6.18-417.el5.orig/fs/jbd/commit.c kernel-2.6.18-417.el5-028stab121/fs/jbd/commit.c
--- kernel-2.6.18-417.el5.orig/fs/jbd/commit.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/jbd/commit.c	2017-01-13 08:40:15.000000000 -0500
@@ -902,7 +902,8 @@ restart_loop:
 	journal->j_committing_transaction = NULL;
 	spin_unlock(&journal->j_state_lock);
 
-	if (commit_transaction->t_checkpoint_list == NULL) {
+	if (commit_transaction->t_checkpoint_list == NULL &&
+	    commit_transaction->t_checkpoint_io_list == NULL) {
 		__journal_drop_transaction(journal, commit_transaction);
 	} else {
 		if (journal->j_checkpoint_transactions == NULL) {
diff -upr kernel-2.6.18-417.el5.orig/fs/jbd/journal.c kernel-2.6.18-417.el5-028stab121/fs/jbd/journal.c
--- kernel-2.6.18-417.el5.orig/fs/jbd/journal.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/jbd/journal.c	2017-01-13 08:40:15.000000000 -0500
@@ -211,10 +211,16 @@ end_loop:
 	return 0;
 }
 
-static void journal_start_thread(journal_t *journal)
+static int journal_start_thread(journal_t *journal)
 {
-	kthread_run(kjournald, journal, "kjournald");
+	struct task_struct *t;
+
+	t = kthread_run(kjournald, journal, "kjournald");
+	if (IS_ERR(t))
+		return PTR_ERR(t);
+
 	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
+	return 0;
 }
 
 static void journal_kill_thread(journal_t *journal)
@@ -864,8 +870,7 @@ static int journal_reset(journal_t *jour
 
 	/* Add the dynamic fields and write it to disk. */
 	journal_update_superblock(journal, 1);
-	journal_start_thread(journal);
-	return 0;
+	return journal_start_thread(journal);
 }
 
 /** 
diff -upr kernel-2.6.18-417.el5.orig/fs/jbd/transaction.c kernel-2.6.18-417.el5-028stab121/fs/jbd/transaction.c
--- kernel-2.6.18-417.el5.orig/fs/jbd/transaction.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/jbd/transaction.c	2017-01-13 08:40:18.000000000 -0500
@@ -26,6 +26,7 @@
 #include <linux/smp_lock.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/virtinfo.h>
 
 /*
  * get_transaction: obtain a new transaction_t object.
@@ -93,6 +94,8 @@ static int start_this_handle(journal_t *
 		goto out;
 	}
 
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_JOURNAL, NULL);
+
 alloc_transaction:
 	if (!journal->j_running_transaction) {
 		new_transaction = jbd_kmalloc(sizeof(*new_transaction),
diff -upr kernel-2.6.18-417.el5.orig/fs/jbd2/transaction.c kernel-2.6.18-417.el5-028stab121/fs/jbd2/transaction.c
--- kernel-2.6.18-417.el5.orig/fs/jbd2/transaction.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/jbd2/transaction.c	2017-01-13 08:40:18.000000000 -0500
@@ -25,6 +25,7 @@
 #include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/virtinfo.h>
 
 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
 
@@ -98,6 +99,8 @@ static int start_this_handle(journal_t *
 		goto out;
 	}
 
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_JOURNAL, NULL);
+
 alloc_transaction:
 	if (!journal->j_running_transaction) {
 		new_transaction = kzalloc(sizeof(*new_transaction),
diff -upr kernel-2.6.18-417.el5.orig/fs/Kconfig kernel-2.6.18-417.el5-028stab121/fs/Kconfig
--- kernel-2.6.18-417.el5.orig/fs/Kconfig	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/Kconfig	2017-01-13 08:40:24.000000000 -0500
@@ -445,6 +445,15 @@ config QUOTA
 	  with the quota tools. Probably the quota support is only useful for
 	  multi user systems. If unsure, say N.
 
+config QUOTA_COMPAT
+	bool "Compatibility with older quotactl interface"
+	depends on QUOTA
+	help
+	  This option enables compatibility layer for older version
+	  of quotactl interface with byte granularity (QUOTAON at 0x0100,
+	  GETQUOTA at 0x0D00).  Interface versions older than that one and
+	  with block granularity are still not supported.
+
 config QFMT_V1
 	tristate "Old quota format support"
 	depends on QUOTA
@@ -460,6 +469,39 @@ config QFMT_V2
 	  This quota format allows using quotas with 32-bit UIDs/GIDs. If you
 	  need this functionality say Y here.
 
+config SIM_FS
+	tristate "VPS filesystem"
+	depends on VZ_QUOTA
+	default m
+	help
+	  This file system is a part of Virtuozzo. It intoduces a fake
+	  superblock and blockdev to VE to hide real device and show
+	  statfs results taken from quota.
+
+config VZ_QUOTA
+	tristate "Virtuozzo Disk Quota support"
+	depends on QUOTA
+	select VZ_DEV
+	default m
+	help
+	  Virtuozzo Disk Quota imposes disk quota on directories with their
+	  files and subdirectories in total.  Such disk quota is used to
+	  account and limit disk usage by Virtuozzo VPS, but also may be used
+	  separately.
+
+config VZ_QUOTA_UNLOAD
+	bool "Unloadable Virtuozzo Disk Quota module"
+	depends on VZ_QUOTA=m
+	default n
+	help
+	  Make Virtuozzo Disk Quota module unloadable.
+	  Doesn't work reliably now.
+
+config VZ_QUOTA_UGID
+	bool "Per-user and per-group quota in Virtuozzo quota partitions"
+	depends on VZ_QUOTA!=n
+	default y
+
 config QUOTACTL
 	bool
 	depends on XFS_QUOTA || QUOTA
diff -upr kernel-2.6.18-417.el5.orig/fs/lockd/clntproc.c kernel-2.6.18-417.el5-028stab121/fs/lockd/clntproc.c
--- kernel-2.6.18-417.el5.orig/fs/lockd/clntproc.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/lockd/clntproc.c	2017-01-13 08:40:27.000000000 -0500
@@ -62,6 +62,68 @@ static void nlm_put_lockowner(struct nlm
 	kfree(lockowner);
 }
 
+static int nlm_walk_reserved(uint32_t pid, int del)
+{
+	struct ve_struct *ve;
+	struct hlist_head *rsv_list;
+
+	ve = get_exec_env();
+	rsv_list = &ve->nlm_reserved_pids;
+	if (!hlist_empty(rsv_list)) {
+		struct nlm_reserved_pid *rp;
+		struct hlist_node *n;
+
+		spin_lock(&ve->nlm_reserved_lock);
+		hlist_for_each_entry(rp, n, rsv_list, list)
+			if (rp->pid == pid)
+				break;
+
+		if (del && n) {
+			hlist_del(&rp->list);
+			kfree(rp);
+		}
+
+		spin_unlock(&ve->nlm_reserved_lock);
+
+		if (n != NULL)
+			return -EBUSY;
+	}
+
+	return 0;
+}
+
+static inline int nlm_pid_reserved(uint32_t pid)
+{
+	return nlm_walk_reserved(pid, 0);
+}
+
+static inline void nlm_release_reserved(int pid)
+{
+	if (nlm_walk_reserved(pid, 1) == 0)
+		printk("%s: Recreated lockowner wasn't reserved! pid %d\n",
+				__func__, pid);
+}
+
+int nlmclnt_reserve_pid(int pid)
+{
+	struct ve_struct *ve;
+	struct nlm_reserved_pid *n;
+
+	n = kmalloc(sizeof(*n), GFP_KERNEL);
+	if (n == NULL)
+		return -ENOMEM;
+
+	ve = get_exec_env();
+
+	n->pid = pid;
+	spin_lock(&ve->nlm_reserved_lock);
+	hlist_add_head(&n->list, &ve->nlm_reserved_pids);
+	spin_unlock(&ve->nlm_reserved_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(nlmclnt_reserve_pid);
+
 static inline int nlm_pidbusy(struct nlm_host *host, uint32_t pid)
 {
 	struct nlm_lockowner *lockowner;
@@ -69,7 +131,7 @@ static inline int nlm_pidbusy(struct nlm
 		if (lockowner->pid == pid)
 			return -EBUSY;
 	}
-	return 0;
+	return nlm_pid_reserved(pid);
 }
 
 static inline uint32_t __nlm_alloc_pid(struct nlm_host *host)
@@ -118,6 +180,48 @@ static struct nlm_lockowner *nlm_find_lo
 	return res;
 }
 
+int nlmclnt_set_lockowner(struct inode *inode,
+		struct file_lock *fl, fl_owner_t owner, int svid)
+{
+	int proto, vers;
+	struct nlm_host *host;
+	struct nlm_lockowner *new, *res;
+
+	vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
+	proto = NFS_CLIENT(inode)->cl_xprt->prot;
+
+	host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+	if (host == NULL)
+		return -ENOLCK;
+
+	new = (struct nlm_lockowner *)kmalloc(sizeof(*new), GFP_KERNEL);
+
+	spin_lock(&host->h_lock);
+	res = __nlm_find_lockowner(host, owner);
+	if (res != NULL) {
+		spin_unlock(&host->h_lock);
+		nlm_put_lockowner(res);
+		nlm_release_host(host);
+		kfree(new);
+		return -EBUSY;
+	}
+
+	atomic_set(&new->count, 1);
+	new->owner = owner;
+	new->pid = svid;
+	new->host = host; /* get-ed by nlmclnt_lookup_host */
+	list_add(&new->list, &host->h_lockowners);
+	spin_unlock(&host->h_lock);
+
+	nlmclnt_locks_init_private(fl, host);
+
+	nlm_put_lockowner(new);
+	nlm_release_reserved(svid);
+
+	return 0;
+}
+EXPORT_SYMBOL(nlmclnt_set_lockowner);
+
 /*
  * Initialize arguments for TEST/LOCK/UNLOCK/CANCEL calls
  */
@@ -129,11 +233,11 @@ static void nlmclnt_setlockargs(struct n
 	nlmclnt_next_cookie(&argp->cookie);
 	argp->state   = nsm_local_state;
 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
-	lock->caller  = system_utsname.nodename;
+	lock->caller  = utsname()->nodename;
 	lock->oh.data = req->a_owner;
 	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
 				(unsigned int)fl->fl_u.nfs_fl.owner->pid,
-				system_utsname.nodename);
+				utsname()->nodename);
 	lock->svid = fl->fl_u.nfs_fl.owner->pid;
 	lock->fl.fl_start = fl->fl_start;
 	lock->fl.fl_end = fl->fl_end;
@@ -154,6 +258,7 @@ nlmclnt_proc(struct inode *inode, int cm
 	struct nlm_host		*host;
 	struct nlm_rqst		*call;
 	int			status, proto, vers;
+	struct ve_struct	*ve;
 
 	vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
 	if (NFS_PROTO(inode)->version > 3) {
@@ -163,14 +268,17 @@ nlmclnt_proc(struct inode *inode, int cm
 
 	/* Retrieve transport protocol from NFS client */
 	proto = NFS_CLIENT(inode)->cl_xprt->prot;
+	ve = set_exec_env(NFS_CLIENT(inode)->cl_xprt->owner_env);
 
 	host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+	status = -ENOLCK;
 	if (host == NULL)
-		return -ENOLCK;
+		goto fail;
 
 	call = nlm_alloc_call(host);
+	status = -ENOMEM;
 	if (call == NULL)
-		return -ENOMEM;
+		goto fail;
 
 	nlmclnt_locks_init_private(fl, host);
 	/* Set up the argument struct */
@@ -191,6 +299,8 @@ nlmclnt_proc(struct inode *inode, int cm
 	fl->fl_ops = NULL;
 
 	dprintk("lockd: clnt proc returns %d\n", status);
+fail:
+	(void)set_exec_env(ve);
 	return status;
 }
 EXPORT_SYMBOL(nlmclnt_proc);
@@ -481,9 +591,15 @@ static void nlmclnt_locks_release_privat
 	nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
 }
 
+static int nlm_get_lockid(struct file_lock *fl)
+{
+	return fl->fl_u.nfs_fl.owner->pid;
+}
+
 static struct file_lock_operations nlmclnt_lock_ops = {
 	.fl_copy_lock = nlmclnt_locks_copy_lock,
 	.fl_release_private = nlmclnt_locks_release_private,
+	.fl_owner_id = nlm_get_lockid,
 };
 
 static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host)
diff -upr kernel-2.6.18-417.el5.orig/fs/lockd/host.c kernel-2.6.18-417.el5-028stab121/fs/lockd/host.c
--- kernel-2.6.18-417.el5.orig/fs/lockd/host.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/lockd/host.c	2017-01-13 08:40:23.000000000 -0500
@@ -34,7 +34,7 @@ static int			nrhosts;
 static DEFINE_MUTEX(nlm_host_mutex);
 
 
-static void			nlm_gc_hosts(void);
+static int			nlm_gc_hosts(envid_t veid);
 
 /*
  * Find an NLM server handle in the cache. If there is none, create it.
@@ -71,17 +71,19 @@ nlm_lookup_host(int server, struct socka
 	u32		addr;
 	int		hash;
 	int		cmp_src = 1;
+	envid_t		veid;
 
 	dprintk("lockd: nlm_lookup_host(%08x, p=%d, v=%d)\n",
 			(unsigned)(sin? ntohl(sin->sin_addr.s_addr) : 0), proto, version);
 
 	hash = NLM_ADDRHASH(sin->sin_addr.s_addr);
+	veid = get_exec_env()->veid;
 
 	/* Lock hash table */
 	mutex_lock(&nlm_host_mutex);
 
 	if (time_after_eq(jiffies, next_gc))
-		nlm_gc_hosts();
+		nlm_gc_hosts(veid);
 
 	if (!server || ssin->sin_addr.s_addr == INADDR_ANY)
 		cmp_src = 0;
@@ -97,6 +99,8 @@ nlm_lookup_host(int server, struct socka
 			continue;
 		if (cmp_src && !nlm_cmp_addr(&host->h_saddr, ssin))
 			continue;
+		if (!ve_accessible_strict_veid(host->h_owner_veid, veid))
+			continue;
 
 		if (nlm_cmp_addr(&host->h_addr, sin)) {
 			if (hp != nlm_hosts + hash) {
@@ -141,6 +145,7 @@ nlm_lookup_host(int server, struct socka
 	spin_lock_init(&host->h_lock);
 	INIT_LIST_HEAD(&host->h_granted);
 	INIT_LIST_HEAD(&host->h_reclaim);
+	host->h_owner_veid = veid;
 
 	if (++nrhosts > NLM_HOST_MAX)
 		next_gc = 0;
@@ -157,10 +162,15 @@ nlm_find_client(void)
 	 * and return it
 	 */
 	int hash;
+	envid_t veid;
+
+	veid = get_exec_env()->veid;
 	mutex_lock(&nlm_host_mutex);
 	for (hash = 0 ; hash < NLM_HOST_NRHASH; hash++) {
 		struct nlm_host *host, **hp;
 		for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) {
+			if (!ve_accessible_strict_veid(host->h_owner_veid, veid))
+				continue;
 			if (host->h_server &&
 			    host->h_killed == 0) {
 				nlm_get_host(host);
@@ -173,7 +183,6 @@ nlm_find_client(void)
 	return NULL;
 }
 
-				
 /*
  * Create the NLM RPC client for an NLM peer
  */
@@ -262,7 +271,8 @@ struct nlm_host * nlm_get_host(struct nl
 void nlm_release_host(struct nlm_host *host)
 {
 	if (host != NULL) {
-		dprintk("lockd: release host %s\n", host->h_name);
+		dprintk("lockd: release host %s in CT %u\n",
+			 host->h_name, host->h_owner_veid);
 		BUG_ON(atomic_read(&host->h_count) < 0);
 		if (atomic_dec_and_test(&host->h_count)) {
 			BUG_ON(!list_empty(&host->h_lockowners));
@@ -280,7 +290,11 @@ void
 nlm_shutdown_hosts(void)
 {
 	struct nlm_host	*host;
-	int		i;
+	int		i, nr_hosts_local;
+	envid_t		veid;
+
+	veid = get_exec_env()->veid;
+	nr_hosts_local = 0;
 
 	dprintk("lockd: shutting down host module\n");
 	mutex_lock(&nlm_host_mutex);
@@ -289,24 +303,29 @@ nlm_shutdown_hosts(void)
 	dprintk("lockd: nuking all hosts...\n");
 	for (i = 0; i < NLM_HOST_NRHASH; i++) {
 		for (host = nlm_hosts[i]; host; host = host->h_next) {
+			if (!ve_accessible_strict_veid(host->h_owner_veid, veid))
+				continue;
 			host->h_expires = jiffies - 1;
 			if (host->h_rpcclnt) {
 				rpc_shutdown_client(host->h_rpcclnt);
 				host->h_rpcclnt = NULL;
 			}
+			nr_hosts_local++;
 		}
 	}
 
 	/* Then, perform a garbage collection pass */
-	nlm_gc_hosts();
+	nr_hosts_local -= nlm_gc_hosts(veid);
 	mutex_unlock(&nlm_host_mutex);
 
 	/* complain if any hosts are left */
-	if (nrhosts) {
+	if (nr_hosts_local) {
 		printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
-		dprintk("lockd: %d hosts left:\n", nrhosts);
+		dprintk("lockd: %d hosts left:\n", nr_hosts_local);
 		for (i = 0; i < NLM_HOST_NRHASH; i++) {
 			for (host = nlm_hosts[i]; host; host = host->h_next) {
+				if (!ve_accessible_strict_veid(host->h_owner_veid, veid))
+					continue;
 				dprintk("       %s (cnt %d use %d exp %ld)\n",
 					host->h_name, atomic_read(&host->h_count),
 					host->h_inuse, host->h_expires);
@@ -320,17 +339,23 @@ nlm_shutdown_hosts(void)
  * This GC combines reference counting for async operations with
  * mark & sweep for resources held by remote clients.
  */
-static void
-nlm_gc_hosts(void)
+static int
+nlm_gc_hosts(envid_t veid)
 {
 	struct nlm_host	**q, *host;
 	struct rpc_clnt	*clnt;
-	int		i;
+	int		i, freed;
+
+	freed = 0;
 
 	dprintk("lockd: host garbage collection\n");
 	for (i = 0; i < NLM_HOST_NRHASH; i++) {
 		for (host = nlm_hosts[i]; host; host = host->h_next)
+		{
+			if (!ve_accessible_strict_veid(host->h_owner_veid, veid))
+				continue;
 			host->h_inuse = 0;
+		}
 	}
 
 	/* Mark all hosts that hold locks, blocks or shares */
@@ -340,6 +365,7 @@ nlm_gc_hosts(void)
 		q = &nlm_hosts[i];
 		while ((host = *q) != NULL) {
 			if (atomic_read(&host->h_count) || host->h_inuse
+			 || !ve_accessible_strict_veid(host->h_owner_veid, veid)
 			 || time_before(jiffies, host->h_expires)) {
 				dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
 					host->h_name, atomic_read(&host->h_count),
@@ -353,19 +379,122 @@ nlm_gc_hosts(void)
 			if (host->h_monitored && !host->h_killed)
 				nsm_unmonitor(host);
 			if ((clnt = host->h_rpcclnt) != NULL) {
+				spin_lock(&rpc_client_lock);
 				if (atomic_read(&clnt->cl_users)) {
 					printk(KERN_WARNING
 						"lockd: active RPC handle\n");
 					clnt->cl_dead = 1;
+					spin_unlock(&rpc_client_lock);
 				} else {
+					spin_unlock(&rpc_client_lock);
 					rpc_destroy_client(host->h_rpcclnt);
 				}
 			}
 			kfree(host);
 			nrhosts--;
+			freed++;
 		}
 	}
 
 	next_gc = jiffies + NLM_HOST_COLLECT;
+	return freed;
 }
 
+#ifdef CONFIG_VE
+/*
+ * Freeing all NLM hosts: need to kill all related rpc tasks/wait for their
+ * completion because they can have references to the host being killed.
+ */
+void ve_nlm_shutdown_hosts(struct ve_struct *ve)
+{
+	struct nlm_host	**q, *host;
+	int i, nr_hosts_local, cnt;
+	envid_t veid = ve->veid;
+
+	dprintk("lockd: shutting down host module for CT %u\n", veid);
+
+	nr_hosts_local = 0;
+	cnt = 10;
+	mutex_lock(&nlm_host_mutex);
+
+	for (i = 0; i < NLM_HOST_NRHASH; i++) {
+		for (host = nlm_hosts[i]; host; host = host->h_next) {
+			if (!ve_accessible_strict_veid(host->h_owner_veid, veid))
+				continue;
+			host->h_expires = jiffies - 1;
+			nr_hosts_local++;
+		}
+	}
+again:
+	/* Then, perform a garbage collection pass */
+	nr_hosts_local -= nlm_gc_hosts(veid);
+
+	if (nr_hosts_local) {
+		dprintk("lockd: %d hosts left in CT %u\n",
+			 nr_hosts_local, veid);
+		/* Make sure no async RPC task is in progress */
+		down_write(&rpc_async_task_lock);
+
+		for (i = 0; i < NLM_HOST_NRHASH; i++) {
+			q = &nlm_hosts[i];
+			while ((host = *q) != NULL) {
+				struct rpc_clnt	*clnt;
+
+				if (!ve_accessible_strict_veid(host->h_owner_veid, veid)) {
+					q = &host->h_next;
+					continue;
+				}
+				if ((clnt = host->h_rpcclnt) != NULL) {
+					spin_lock(&rpc_client_lock);
+					if (atomic_read(&clnt->cl_users)) {
+						printk(KERN_WARNING "lockd: "
+						  "active RPC clients in host "
+						  "%s (cnt %d use %d) "
+						  "in CT %u\n",
+						  host->h_name,
+						  atomic_read(&host->h_count),
+						  host->h_inuse, veid);
+
+						rpc_kill_client(clnt);
+						if (cnt == 0)
+							clnt->cl_dead = 1;
+						spin_unlock(&rpc_client_lock);
+					} else {
+						printk(KERN_WARNING "lockd: "
+						  "destroying clients in host "
+						  "%s (cnt %d use %d) "
+						  "in CT %u\n",
+						  host->h_name,
+						  atomic_read(&host->h_count),
+						  host->h_inuse, veid);
+						spin_unlock(&rpc_client_lock);
+						rpc_destroy_client(clnt);
+						host->h_rpcclnt = NULL;
+					}
+				}
+				if (cnt == 0) {
+					/* detach remaining hosts */
+					*q = host->h_next;
+					printk(KERN_WARNING "lockd: "
+						"%s (cnt %d use %d) "
+						"detached in CT %u\n",
+						host->h_name,
+						atomic_read(&host->h_count),
+						host->h_inuse, veid);
+				} else {
+					q = &host->h_next;
+				}
+			}
+		}
+
+		up_write(&rpc_async_task_lock);
+
+		if (cnt-- > 0) {
+			schedule_timeout_interruptible(HZ);
+			goto again;
+		}
+	}
+
+	mutex_unlock(&nlm_host_mutex);
+}
+#endif
diff -upr kernel-2.6.18-417.el5.orig/fs/lockd/mon.c kernel-2.6.18-417.el5-028stab121/fs/lockd/mon.c
--- kernel-2.6.18-417.el5.orig/fs/lockd/mon.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/lockd/mon.c	2017-01-13 08:40:15.000000000 -0500
@@ -152,7 +152,7 @@ xdr_encode_common(struct rpc_rqst *rqstp
 	 */
 	sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
 	if (!(p = xdr_encode_string(p, buffer))
-	 || !(p = xdr_encode_string(p, system_utsname.nodename)))
+	 || !(p = xdr_encode_string(p, utsname()->nodename)))
 		return ERR_PTR(-EIO);
 	*p++ = htonl(argp->prog);
 	*p++ = htonl(argp->vers);
diff -upr kernel-2.6.18-417.el5.orig/fs/lockd/svc.c kernel-2.6.18-417.el5-028stab121/fs/lockd/svc.c
--- kernel-2.6.18-417.el5.orig/fs/lockd/svc.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/lockd/svc.c	2017-01-13 08:40:27.000000000 -0500
@@ -25,6 +25,7 @@
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/mutex.h>
+#include <linux/ve_proto.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/stats.h>
@@ -50,11 +51,11 @@ EXPORT_SYMBOL(nlmsvc_ops);
 #include <net/sock.h>
 
 static DEFINE_MUTEX(nlmsvc_mutex);
-static unsigned int		nlmsvc_users;
-static pid_t			nlmsvc_pid;
-static struct svc_serv		*nlmsvc_serv;
-int				nlmsvc_grace_period;
-unsigned long			nlmsvc_timeout;
+static unsigned int		_nlmsvc_users;
+static pid_t			_nlmsvc_pid;
+static struct svc_serv		*_nlmsvc_serv;
+int				_nlmsvc_grace_period;
+unsigned long			_nlmsvc_timeout;
 
 static DECLARE_COMPLETION(lockd_start_done);
 static DECLARE_WAIT_QUEUE_HEAD(lockd_exit);
@@ -81,8 +82,6 @@ static const int		nlm_port_min = 0, nlm_
 
 static struct ctl_table_header * nlm_sysctl_table;
 
-static struct timer_list	nlm_grace_period_timer;
-
 static unsigned long set_grace_period(void)
 {
 	unsigned long grace_period;
@@ -97,9 +96,13 @@ static unsigned long set_grace_period(vo
 	return grace_period + jiffies;
 }
 
-static inline void clear_grace_period(unsigned long not_used)
+static inline void clear_grace_period(unsigned long exec_env)
 {
+	struct ve_struct *old_env;
+
+	old_env = set_exec_env((struct ve_struct *)exec_env);
 	nlmsvc_grace_period = 0;
+	(void)set_exec_env(old_env);
 }
 
 /*
@@ -111,6 +114,7 @@ lockd(struct svc_rqst *rqstp)
 	struct svc_serv	*serv = rqstp->rq_server;
 	int		err = 0;
 	unsigned long grace_period_expire;
+	struct timer_list nlm_grace_period_timer;
 
 	/* Lock module and set up kernel thread */
 	/* lockd_up is waiting for us to startup, so will
@@ -146,6 +150,7 @@ lockd(struct svc_rqst *rqstp)
 	init_timer(&nlm_grace_period_timer);
 	nlm_grace_period_timer.function = clear_grace_period;
 	nlm_grace_period_timer.expires = grace_period_expire;
+	nlm_grace_period_timer.data = get_exec_env();
 
 	add_timer(&nlm_grace_period_timer);
 
@@ -184,12 +189,17 @@ lockd(struct svc_rqst *rqstp)
 		 * recvfrom routine.
 		 */
 		err = svc_recv(serv, rqstp, timeout);
-		if (err == -EAGAIN || err == -EINTR)
+		if (err == -EAGAIN || err == -EINTR) {
+#ifdef CONFIG_VE
+			if (!get_exec_env()->is_running)
+				break;
+#endif
 			continue;
+		}
 		if (err < 0) {
 			printk(KERN_WARNING
-			       "lockd: terminating on error %d\n",
-			       -err);
+			       "CT#%u: lockd: terminating on error %d\n",
+			       get_exec_env()->veid, -err);
 			break;
 		}
 
@@ -202,7 +212,7 @@ lockd(struct svc_rqst *rqstp)
 
 	flush_signals(current);
 
-	del_timer(&nlm_grace_period_timer);
+	del_timer_sync(&nlm_grace_period_timer);
 
 	if (nlmsvc_ops)
 		nlmsvc_invalidate_all();
@@ -284,22 +294,24 @@ lockd_up_proto(int proto)
 	 * we should be the first user ...
 	 */
 	if (nlmsvc_users > 1)
-		printk(KERN_WARNING
-			"lockd_up: no pid, %d users??\n", nlmsvc_users);
+		printk(KERN_WARNING "CT#%u: lockd_up: no pid, %d users??\n",
+				get_exec_env()->veid, nlmsvc_users);
 
 	error = -ENOMEM;
 	serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE);
 	if (!serv) {
-		printk(KERN_WARNING "lockd_up: create service failed\n");
+		printk(KERN_WARNING "CT#%u: lockd_up: create service failed\n",
+				get_exec_env()->veid);
 		goto out;
 	}
 
 	if ((error = make_socks(serv, proto)) < 0) {
-		if (warned++ == 0) 
+		if (warned++ == 0)
 			printk(KERN_WARNING
-				"lockd_up: makesock failed, error=%d\n", error);
+				"CT#%u: lockd_up: makesock failed, error=%d\n",
+				get_exec_env()->veid, error);
 		goto destroy_and_out;
-	} 
+	}
 	warned = 0;
 
 	/*
@@ -308,7 +320,8 @@ lockd_up_proto(int proto)
 	error = svc_create_thread(lockd, serv);
 	if (error) {
 		printk(KERN_WARNING
-			"lockd_up: create thread failed, error=%d\n", error);
+			"CT#%u: lockd_up: create thread failed, error=%d\n",
+			get_exec_env()->veid, error);
 		goto destroy_and_out;
 	}
 	wait_for_completion(&lockd_start_done);
@@ -347,11 +360,14 @@ lockd_down(void)
 		if (--nlmsvc_users)
 			goto out;
 	} else
-		printk(KERN_WARNING "lockd_down: no users! pid=%d\n", nlmsvc_pid);
+		printk(KERN_WARNING "CT#%u: lockd_down: no users! pid=%d\n",
+			get_exec_env()->veid, nlmsvc_pid);
 
 	if (!nlmsvc_pid) {
 		if (warned++ == 0)
-			printk(KERN_WARNING "lockd_down: no lockd running.\n"); 
+			printk(KERN_WARNING
+				"CT#%u: lockd_down: no lockd running.\n",
+				get_exec_env()->veid);
 		goto out;
 	}
 	warned = 0;
@@ -484,6 +500,42 @@ static int lockd_authenticate(struct svc
 	return SVC_DENIED;
 }
 
+#ifdef CONFIG_VE
+extern void ve_nlm_shutdown_hosts(struct ve_struct *ve);
+
+static int ve_lockd_start(void *data)
+{
+	struct ve_struct *ve = (struct ve_struct *)data;
+
+	spin_lock_init(&ve->nlm_reserved_lock);
+	INIT_HLIST_HEAD(&ve->nlm_reserved_pids);
+	return 0;
+}
+
+static void ve_lockd_stop(void *data)
+{
+	struct ve_struct *ve = (struct ve_struct *)data;
+
+	ve_nlm_shutdown_hosts(ve);
+	flush_scheduled_work();
+
+	while (!hlist_empty(&ve->nlm_reserved_pids)) {
+		struct nlm_reserved_pid *p;
+
+		p = hlist_entry(ve->nlm_reserved_pids.first,
+				struct nlm_reserved_pid, list);
+		hlist_del(&p->list);
+		kfree(p);
+	}
+}
+
+static struct ve_hook lockd_hook = {
+	.init	  = ve_lockd_start,
+	.fini	  = ve_lockd_stop,
+	.owner	  = THIS_MODULE,
+	.priority = HOOK_PRIO_FS,
+};
+#endif
 
 param_set_min_max(port, int, simple_strtol, 0, 65535)
 param_set_min_max(grace_period, unsigned long, simple_strtoul,
@@ -512,12 +564,14 @@ module_param(nlm_max_connections, uint, 
 static int __init init_nlm(void)
 {
 	nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root, 0);
+	ve_hook_register(VE_SS_CHAIN, &lockd_hook);
 	return nlm_sysctl_table ? 0 : -ENOMEM;
 }
 
 static void __exit exit_nlm(void)
 {
 	/* FIXME: delete all NLM clients */
+	ve_hook_unregister(&lockd_hook);
 	nlm_shutdown_hosts();
 	unregister_sysctl_table(nlm_sysctl_table);
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/lockd/svclock.c kernel-2.6.18-417.el5-028stab121/fs/lockd/svclock.c
--- kernel-2.6.18-417.el5.orig/fs/lockd/svclock.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/lockd/svclock.c	2017-01-13 08:40:15.000000000 -0500
@@ -328,7 +328,7 @@ static int nlmsvc_setgrantargs(struct nl
 {
 	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
 	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-	call->a_args.lock.caller = system_utsname.nodename;
+	call->a_args.lock.caller = utsname()->nodename;
 	call->a_args.lock.oh.len = lock->oh.len;
 
 	/* set default data area */
diff -upr kernel-2.6.18-417.el5.orig/fs/lockd/xdr4.c kernel-2.6.18-417.el5-028stab121/fs/lockd/xdr4.c
--- kernel-2.6.18-417.el5.orig/fs/lockd/xdr4.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/lockd/xdr4.c	2017-01-13 08:40:20.000000000 -0500
@@ -133,6 +133,7 @@ nlm4_decode_lock(u32 *p, struct nlm_lock
 	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
+	set_flock_notpid(fl);
 	fl->fl_owner = current->files;
 	fl->fl_pid   = (pid_t)lock->svid;
 	fl->fl_flags = FL_POSIX;
@@ -304,6 +305,7 @@ nlm4svc_decode_shareargs(struct svc_rqst
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
+	set_flock_notpid(&lock->fl);
 	lock->svid = ~(u32) 0;
 	lock->fl.fl_pid = (pid_t)lock->svid;
 
@@ -421,6 +423,7 @@ nlm4clt_decode_testres(struct rpc_rqst *
 
 		memset(&resp->lock, 0, sizeof(resp->lock));
 		locks_init_lock(fl);
+		set_flock_notpid(fl);
 		excl = ntohl(*p++);
 		resp->lock.svid = ntohl(*p++);
 		fl->fl_pid = (pid_t)resp->lock.svid;
diff -upr kernel-2.6.18-417.el5.orig/fs/lockd/xdr.c kernel-2.6.18-417.el5-028stab121/fs/lockd/xdr.c
--- kernel-2.6.18-417.el5.orig/fs/lockd/xdr.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/lockd/xdr.c	2017-01-13 08:40:20.000000000 -0500
@@ -133,6 +133,7 @@ nlm_decode_lock(u32 *p, struct nlm_lock 
 	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
+	set_flock_notpid(fl);
 	fl->fl_owner = current->files;
 	fl->fl_pid   = (pid_t)lock->svid;
 	fl->fl_flags = FL_POSIX;
@@ -298,6 +299,7 @@ nlmsvc_decode_shareargs(struct svc_rqst 
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
+	set_flock_notpid(&lock->fl);
 	lock->svid = ~(u32) 0;
 	lock->fl.fl_pid = (pid_t)lock->svid;
 
@@ -415,6 +417,7 @@ nlmclt_decode_testres(struct rpc_rqst *r
 
 		memset(&resp->lock, 0, sizeof(resp->lock));
 		locks_init_lock(fl);
+		set_flock_notpid(fl);
 		excl = ntohl(*p++);
 		resp->lock.svid = ntohl(*p++);
 		fl->fl_pid = (pid_t)resp->lock.svid;
@@ -515,7 +518,7 @@ nlmclt_decode_res(struct rpc_rqst *req, 
  */
 #define NLM_void_sz		0
 #define NLM_cookie_sz		1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
-#define NLM_caller_sz		1+XDR_QUADLEN(sizeof(system_utsname.nodename))
+#define NLM_caller_sz		1+XDR_QUADLEN(sizeof(utsname()->nodename))
 #define NLM_netobj_sz		1+XDR_QUADLEN(XDR_MAX_NETOBJ)
 /* #define NLM_owner_sz		1+XDR_QUADLEN(NLM_MAXOWNER) */
 #define NLM_fhandle_sz		1+XDR_QUADLEN(NFS2_FHSIZE)
diff -upr kernel-2.6.18-417.el5.orig/fs/locks.c kernel-2.6.18-417.el5-028stab121/fs/locks.c
--- kernel-2.6.18-417.el5.orig/fs/locks.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/locks.c	2017-01-13 08:40:27.000000000 -0500
@@ -129,6 +129,8 @@
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 
+#include <ub/ub_misc.h>
+
 #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
 #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
 #define IS_LEASE(fl)	(fl->fl_flags & FL_LEASE)
@@ -145,10 +147,30 @@ static LIST_HEAD(blocked_list);
 static kmem_cache_t *filelock_cache __read_mostly;
 
 /* Allocate an empty lock structure. */
-static struct file_lock *locks_alloc_lock(void)
+struct file_lock *locks_alloc_lock(int charge)
 {
-	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+	struct file_lock *fl;
+
+	fl = kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+	if (fl == NULL)
+		goto out;
+#ifdef CONFIG_VE
+	fl->fl_notpid = 0;
+#endif
+#ifdef CONFIG_USER_RESOURCE
+	fl->fl_charged = 0;
+	if (!charge)
+		goto out;
+	if (!ub_flock_charge(fl, 1))
+		goto out;
+
+	kmem_cache_free(filelock_cache, fl);
+	fl = NULL;
+#endif
+out:
+	return fl;
 }
+EXPORT_SYMBOL(locks_alloc_lock);
 
 static void locks_release_private(struct file_lock *fl)
 {
@@ -169,15 +191,17 @@ static void locks_release_private(struct
 }
 
 /* Free a lock which is not in use. */
-static void locks_free_lock(struct file_lock *fl)
+void locks_free_lock(struct file_lock *fl)
 {
 	BUG_ON(waitqueue_active(&fl->fl_wait));
 	BUG_ON(!list_empty(&fl->fl_block));
 	BUG_ON(!list_empty(&fl->fl_link));
 
+	ub_flock_uncharge(fl);
 	locks_release_private(fl);
 	kmem_cache_free(filelock_cache, fl);
 }
+EXPORT_SYMBOL(locks_free_lock);
 
 void locks_init_lock(struct file_lock *fl)
 {
@@ -237,6 +261,9 @@ static void __locks_copy_lock(struct fil
 {
 	new->fl_owner = fl->fl_owner;
 	new->fl_pid = fl->fl_pid;
+#ifdef CONFIG_VE
+	new->fl_notpid = fl->fl_notpid;
+#endif
 	new->fl_file = NULL;
 	/* Clear the kABI toggle switch since the fl_lmops is cleared. */
 	new->fl_flags = fl->fl_flags & ~FL_GRANT;
@@ -284,7 +311,7 @@ static int flock_make_lock(struct file *
 	if (type < 0)
 		return type;
 	
-	fl = locks_alloc_lock();
+	fl = locks_alloc_lock(type != F_UNLCK);
 	if (fl == NULL)
 		return -ENOMEM;
 
@@ -471,7 +498,7 @@ static int lease_init(struct file *filp,
 /* Allocate a file_lock initialised to this type of lease */
 static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
 {
-	struct file_lock *fl = locks_alloc_lock();
+	struct file_lock *fl = locks_alloc_lock(1);
 	int error = -ENOMEM;
 
 	if (fl == NULL)
@@ -784,8 +811,15 @@ static int flock_lock_file(struct file *
 		goto out;
 	}
 
+	/*
+	 * Nont F_UNLCK request must be already charged in
+	 * flock_make_lock().
+	 *
+	 * actually new_fl must be charged not the request,
+	 * but we try to fail earlier
+	 */
 	error = -ENOMEM;
-	new_fl = locks_alloc_lock();
+	new_fl = locks_alloc_lock(0);
 	if (new_fl == NULL)
 		goto out;
 	/*
@@ -811,6 +845,10 @@ find_conflict:
 	}
 	if (request->fl_flags & FL_ACCESS)
 		goto out;
+
+	set_flock_charged(new_fl);
+	unset_flock_charged(request);
+
 	locks_copy_lock(new_fl, request);
 	locks_insert_lock(&inode->i_flock, new_fl);
 	new_fl = NULL;
@@ -842,8 +880,11 @@ static int __posix_lock_file_conf(struct
 	if (!(request->fl_flags & FL_ACCESS) &&
 	    (request->fl_type != F_UNLCK ||
 	     request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
-		new_fl = locks_alloc_lock();
-		new_fl2 = locks_alloc_lock();
+		if (request->fl_type != F_UNLCK)
+			new_fl = locks_alloc_lock(1);
+		else
+			new_fl = NULL;
+		new_fl2 = locks_alloc_lock(0);
 	}
 
 	lock_kernel();
@@ -977,7 +1018,7 @@ static int __posix_lock_file_conf(struct
 	 * bail out.
 	 */
 	error = -ENOLCK; /* "no luck" */
-	if (right && left == right && !new_fl2)
+	if (right && left == right && !(request->fl_type == F_UNLCK || new_fl2))
 		goto out;
 
 	error = 0;
@@ -988,23 +1029,32 @@ static int __posix_lock_file_conf(struct
 			goto out;
 		}
 
-		if (!new_fl) {
-			error = -ENOLCK;
+		error = -ENOLCK;
+		if (!new_fl)
+			goto out;
+		if (right && (left == right) && ub_flock_charge(new_fl, 1))
 			goto out;
-		}
 		locks_copy_lock(new_fl, request);
 		locks_insert_lock(before, new_fl);
 		new_fl = NULL;
+		error = 0;
 	}
 	if (right) {
 		if (left == right) {
 			/* The new lock breaks the old one in two pieces,
 			 * so we have to use the second new lock.
 			 */
+			error = -ENOLCK;
+			if (added && ub_flock_charge(new_fl2,
+						request->fl_type != F_UNLCK))
+				goto out;
+			/* FIXME move all fl_charged manipulations in ub code */
+			set_flock_charged(new_fl2);
 			left = new_fl2;
 			new_fl2 = NULL;
 			locks_copy_lock(left, right);
 			locks_insert_lock(before, left);
+			error = 0;
 		}
 		right->fl_start = request->fl_end + 1;
 		locks_wake_up_blocks(right);
@@ -1320,8 +1370,6 @@ void lease_get_mtime(struct inode *inode
 	struct file_lock *flock = inode->i_flock;
 	if (flock && IS_LEASE(flock) && (flock->fl_type & F_WRLCK))
 		*time = current_fs_time(inode->i_sb);
-	else
-		*time = inode->i_mtime;
 }
 
 EXPORT_SYMBOL(lease_get_mtime);
@@ -1396,7 +1444,7 @@ int __setlease(struct file *filp, long a
 
 	if (arg != F_UNLCK) {
 		error = -ENOMEM;
-		new_fl = locks_alloc_lock();
+		new_fl = locks_alloc_lock(1);
 		if (new_fl == NULL)
 			goto out;
 
@@ -1590,7 +1638,6 @@ int flock_lock_file_wait(struct file *fi
 	}
 	return error;
 }
-
 EXPORT_SYMBOL(flock_lock_file_wait);
 
 /**
@@ -1656,6 +1703,7 @@ asmlinkage long sys_flock(unsigned int f
  out:
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_flock);
 
 /**
  * vfs_test_lock - test file byte range lock
@@ -1676,7 +1724,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
 
 static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
 {
-	flock->l_pid = fl->fl_pid;
+	flock->l_pid = pid_to_vpid(fl->fl_pid);
 #if BITS_PER_LONG == 32
 	/*
 	 * Make sure we can represent the posix lock via
@@ -1698,7 +1746,7 @@ static int posix_lock_to_flock(struct fl
 #if BITS_PER_LONG == 32
 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
 {
-	flock->l_pid = fl->fl_pid;
+	flock->l_pid = pid_to_vpid(fl->fl_pid);
 	flock->l_start = fl->fl_start;
 	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
 		fl->fl_end - fl->fl_start + 1;
@@ -1786,7 +1834,7 @@ EXPORT_SYMBOL_GPL(vfs_lock_file);
 int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		struct flock __user *l)
 {
-	struct file_lock *file_lock = locks_alloc_lock();
+	struct file_lock *file_lock = locks_alloc_lock(0);
 	struct flock flock;
 	struct inode *inode;
 	struct file *f;
@@ -1921,7 +1969,7 @@ out:
 int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		struct flock64 __user *l)
 {
-	struct file_lock *file_lock = locks_alloc_lock();
+	struct file_lock *file_lock = locks_alloc_lock(0);
 	struct flock64 flock;
 	struct inode *inode;
 	struct file *f;
@@ -2135,7 +2183,12 @@ static void lock_get_status(struct seq_f
 			    loff_t id, char *pfx)
 {
 	struct inode *inode = NULL;
+	unsigned int fl_pid;
 
+	if (is_flock_notpid(fl))
+		fl_pid = fl->fl_pid;
+	else
+		fl_pid = pid_to_vpid(fl->fl_pid);
 	if (fl->fl_file != NULL)
 		inode = fl->fl_file->f_dentry->d_inode;
 
@@ -2177,16 +2230,16 @@ static void lock_get_status(struct seq_f
 	}
 	if (inode) {
 #ifdef WE_CAN_BREAK_LSLK_NOW
-		seq_printf(f, "%d %s:%ld ", fl->fl_pid,
+		seq_printf(f, "%d %s:%ld ", fl_pid,
 				inode->i_sb->s_id, inode->i_ino);
 #else
 		/* userspace relies on this representation of dev_t ;-( */
-		seq_printf(f, "%d %02x:%02x:%ld ", fl->fl_pid,
+		seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
 				MAJOR(inode->i_sb->s_dev),
 				MINOR(inode->i_sb->s_dev), inode->i_ino);
 #endif
 	} else {
-		seq_printf(f, "%d <none>:0 ", fl->fl_pid);
+		seq_printf(f, "%d <none>:0 ", fl_pid);
 	}
 	if (IS_POSIX(fl)) {
 		if (fl->fl_end == OFFSET_MAX)
@@ -2322,7 +2375,7 @@ EXPORT_SYMBOL(lock_may_write);
 static int __init filelock_init(void)
 {
 	filelock_cache = kmem_cache_create("file_lock_cache",
-			sizeof(struct file_lock), 0, SLAB_PANIC,
+			sizeof(struct file_lock), 0, SLAB_PANIC | SLAB_UBC,
 			init_once, NULL);
 	return 0;
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/Makefile kernel-2.6.18-417.el5-028stab121/fs/Makefile
--- kernel-2.6.18-417.el5.orig/fs/Makefile	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/Makefile	2017-01-13 08:40:40.000000000 -0500
@@ -11,13 +11,14 @@ obj-y :=	open.o read_write.o file_table.
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
 		ioprio.o pnode.o drop_caches.o splice.o sync.o \
-		stack.o
+		stack.o utimes.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
 obj-$(CONFIG_ANON_INODES)	+= anon_inodes.o
 obj-$(CONFIG_EVENTFD)		+= eventfd.o
+obj-$(CONFIG_SIGNALFD)		+= signalfd.o
 obj-$(CONFIG_COMPAT)		+= compat.o compat_ioctl.o
 
 nfsd-$(CONFIG_NFSD)		:= nfsctl.o
@@ -43,9 +44,15 @@ obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
 obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
 obj-$(CONFIG_QUOTACTL)		+= quota.o
+obj-$(CONFIG_VZ_QUOTA)		+= vzdquota.o
+vzdquota-y			+= vzdquot.o vzdq_mgmt.o vzdq_ops.o vzdq_tree.o
+vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_ugid.o
+vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_file.o
 
 obj-$(CONFIG_DNOTIFY)		+= dnotify.o
 
+obj-$(CONFIG_SIM_FS)		+= simfs.o
+
 obj-$(CONFIG_PROC_FS)		+= proc/
 obj-y				+= partitions/
 obj-$(CONFIG_SYSFS)		+= sysfs/
diff -upr kernel-2.6.18-417.el5.orig/fs/mpage.c kernel-2.6.18-417.el5-028stab121/fs/mpage.c
--- kernel-2.6.18-417.el5.orig/fs/mpage.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/mpage.c	2017-01-13 08:40:18.000000000 -0500
@@ -26,6 +26,8 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
+#include <linux/virtinfo.h>
+#include <ub/io_prio.h>
 
 /*
  * I/O completion handler for multipage BIOs.
@@ -780,6 +782,8 @@ retry:
 
 			done_index = page->index + 1;
 
+			virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 			lock_page(page);
 
 			/*
@@ -958,6 +962,9 @@ retry:
 			 * mapping
 			 */
 
+
+			virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 			lock_page(page);
 
 			if (unlikely(page->mapping != mapping)) {
@@ -995,6 +1002,7 @@ retry:
 						&last_block_in_bio, &ret, wbc,
 						page->mapping->a_ops->writepage);
 			}
+
 			if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
 				unlock_page(page);
 			if (ret || (--(wbc->nr_to_write) <= 0))
diff -upr kernel-2.6.18-417.el5.orig/fs/namei.c kernel-2.6.18-417.el5-028stab121/fs/namei.c
--- kernel-2.6.18-417.el5.orig/fs/namei.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/namei.c	2017-01-13 08:40:41.000000000 -0500
@@ -141,6 +141,7 @@ char * getname(const char __user * filen
 {
 	char *tmp, *result;
 
+	ub_dentry_checkup();
 	result = ERR_PTR(-ENOMEM);
 	tmp = __getname();
 	if (tmp)  {
@@ -361,6 +362,24 @@ void path_release_on_umount(struct namei
 }
 
 /**
+ * path_connected - Verify that a ns->dentry is below nd->mnt.mnt_root
+ * @nd: nameidate to verify
+ *
+ * Rename can sometimes move a file or directory outside of a bind
+ * mount, path_connected allows those cases to be detected.
+ */
+static bool path_connected(const struct nameidata *nd)
+{
+	struct vfsmount *mnt = nd->mnt;
+
+	/* Only bind mounts can have disconnected paths */
+	if (mnt->mnt_root == mnt->mnt_sb->s_root)
+		return true;
+
+	return is_subdir(nd->dentry, mnt->mnt_root);
+}
+
+/**
  * release_open_intent - free up open intent resources
  * @nd: pointer to nameidata
  */
@@ -409,6 +428,21 @@ static struct dentry * cached_lookup(str
 	if (!dentry)
 		dentry = d_lookup(parent, name);
 
+	/*
+	 * The revalidation rules are simple:
+	 * d_revalidate operation is called when we're about to use a cached
+	 * dentry rather than call d_lookup.
+	 * d_revalidate method may unhash the dentry itself or return FALSE, in
+	 * which case if the dentry can be released d_lookup will be called.
+	 *
+	 * Additionally, by request of NFS people
+	 * (http://linux.bkbits.net:8080/linux-2.4/cset@1.181?nav=index.html|src/|src/fs|related/fs/namei.c)
+	 * d_revalidate is called when `/', `.' or `..' are looked up.
+	 * Since re-lookup is impossible on them, we introduce a hack and
+	 * return an error in this case.
+	 *
+	 *     2003/02/19  SAW
+	 */
 	if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
 		dentry = do_revalidate(dentry, nd);
 
@@ -508,6 +542,7 @@ static struct dentry * real_lookup(struc
 	struct dentry * result;
 	struct inode *dir = parent->d_inode;
 
+repeat:
 	mutex_lock(&dir->i_mutex);
 	/*
 	 * First re-do the cached lookup just in case it was created
@@ -553,7 +588,7 @@ out_unlock:
 	if (result->d_op && result->d_op->d_revalidate) {
 		result = do_revalidate(result, nd);
 		if (!result)
-			result = ERR_PTR(-ENOENT);
+			goto repeat;
 	}
 	return result;
 }
@@ -938,7 +973,7 @@ int __follow_down(struct vfsmount **mnt,
 	return 0;
 }
 
-static __always_inline void follow_dotdot(struct nameidata *nd)
+static __always_inline int follow_dotdot(struct nameidata *nd)
 {
 	while(1) {
 		struct vfsmount *parent;
@@ -950,12 +985,19 @@ static __always_inline void follow_dotdo
                         read_unlock(&current->fs->lock);
 			break;
 		}
-                read_unlock(&current->fs->lock);
+		read_unlock(&current->fs->lock);
+#ifdef CONFIG_VE
+		if (nd->dentry == get_exec_env()->fs_root &&
+		    !ve_is_super(get_exec_env()))
+			break;
+#endif
 		spin_lock(&dcache_lock);
 		if (nd->dentry != nd->mnt->mnt_root) {
 			nd->dentry = dget(nd->dentry->d_parent);
 			spin_unlock(&dcache_lock);
 			dput(old);
+			if (unlikely(!path_connected(nd)))
+				return -ENOENT;
 			break;
 		}
 		spin_unlock(&dcache_lock);
@@ -972,7 +1014,9 @@ static __always_inline void follow_dotdo
 		mntput(nd->mnt);
 		nd->mnt = parent;
 	}
-	follow_mount(&nd->mnt, &nd->dentry);
+	if (!(nd->flags & LOOKUP_DIVE))
+		follow_mount(&nd->mnt, &nd->dentry);
+	return 0;
 }
 
 /*
@@ -986,13 +1030,17 @@ static int do_lookup(struct nameidata *n
 	struct vfsmount *mnt = nd->mnt;
 	struct dentry *dentry = __d_lookup(nd->dentry, name);
 	int flags = nd->flags;
-	int ret;
+	int ret = 0;
 
 	if (!dentry)
 		goto need_lookup;
 	if (dentry->d_op && dentry->d_op->d_revalidate)
 		goto need_revalidate;
 done:
+	if ((nd->flags & LOOKUP_STRICT) && d_mountpoint(dentry)) {
+		dput(dentry);
+		return -ENOENT;
+	}
 	path->mnt = mnt;
 	path->dentry = dentry;
 	/*
@@ -1001,7 +1049,8 @@ done:
 	 */
 	if (!(nd->flags & LOOKUP_CONTINUE) && name->name[name->len] == '/')
 		flags |= LOOKUP_DIRECTORY;
-	ret = managed_dentry(path, flags);
+	if (!(nd->flags & LOOKUP_DIVE))
+		ret = managed_dentry(path, flags);
 	if (unlikely(ret < 0))
 		dput_path(path, nd);
 	return ret;
@@ -1038,6 +1087,7 @@ fail:
 static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
 {
 	return inode && inode->i_op && unlikely(inode->i_op->follow_link) &&
+		!(lookup_flags & LOOKUP_STRICT) &&
 		((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
 }
 
@@ -1053,6 +1103,7 @@ static fastcall int __link_path_walk(con
 {
 	struct vfs_path next;
 	struct inode *inode;
+	int real_components = 0;
 	int err, atomic;
 	unsigned int lookup_flags = nd->flags;
 
@@ -1110,7 +1161,9 @@ static fastcall int __link_path_walk(con
 			case 2:	
 				if (this.name[1] != '.')
 					break;
-				follow_dotdot(nd);
+				err = follow_dotdot(nd);
+				if (err)
+					goto out_pput;
 				inode = nd->dentry->d_inode;
 				/* fallthrough */
 			case 1:
@@ -1126,6 +1179,7 @@ static fastcall int __link_path_walk(con
 				break;
 		}
 		/* This does the actual lookups.. */
+		real_components++;
 		err = do_lookup(nd, &this, &next, atomic);
 		if (err)
 			break;
@@ -1139,6 +1193,9 @@ static fastcall int __link_path_walk(con
 			goto out_dput;
 
 		if (inode->i_op->follow_link) {
+			err = -ENOENT;
+			if (lookup_flags & LOOKUP_STRICT)
+				goto out_dput;
 			err = do_follow_link(&next, nd);
 			if (err)
 				goto return_err;
@@ -1170,7 +1227,9 @@ last_component:
 			case 2:	
 				if (this.name[1] != '.')
 					break;
-				follow_dotdot(nd);
+				err = follow_dotdot(nd);
+				if (err)
+					goto out_pput;
 				inode = nd->dentry->d_inode;
 				/* fallthrough */
 			case 1:
@@ -1206,30 +1265,45 @@ lookup_parent:
 		nd->last_type = LAST_NORM;
 		if (this.name[0] != '.')
 			goto return_base;
-		if (this.len == 1)
+		if (this.len == 1) {
 			nd->last_type = LAST_DOT;
-		else if (this.len == 2 && this.name[1] == '.')
+			goto return_reval;
+		} else if (this.len == 2 && this.name[1] == '.') {
 			nd->last_type = LAST_DOTDOT;
-		else
-			goto return_base;
+			goto return_reval;
+		}
+return_base:
+		if (!(nd->flags & LOOKUP_NOAREACHECK)) {
+			err = check_area_access_ve(nd->dentry, nd->mnt);
+			if (err)
+				break;
+		}
+		return 0;
 return_reval:
 		/*
 		 * We bypassed the ordinary revalidation routines.
 		 * We may need to check the cached dentry for staleness.
 		 */
-		if (nd->dentry && nd->dentry->d_sb &&
+		if (!real_components && nd->dentry && nd->dentry->d_sb &&
 		    (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
 			err = -ESTALE;
 			/* Note: we do not d_invalidate() */
 			if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
+				/*
+				 * This lookup is for `/' or `.' or `..'.
+				 * The filesystem unhashed the dentry itself
+				 * inside d_revalidate (otherwise, d_invalidate
+				 * wouldn't succeed).  As a special courtesy to
+				 * NFS we return an error.   2003/02/19  SAW
+				 */
 				break;
 		}
-return_base:
-		return 0;
+		goto return_base;
 out_dput:
 		dput_path(&next, nd);
 		break;
 	}
+out_pput:
 	path_release(nd);
 return_err:
 	return err;
@@ -2150,6 +2224,7 @@ asmlinkage long sys_mknod(const char __u
 {
 	return sys_mknodat(AT_FDCWD, filename, mode, dev);
 }
+EXPORT_SYMBOL_GPL(sys_mknod);
 
 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
@@ -2208,6 +2283,7 @@ asmlinkage long sys_mkdir(const char __u
 {
 	return sys_mkdirat(AT_FDCWD, pathname, mode);
 }
+EXPORT_SYMBOL_GPL(sys_mkdir);
 
 /*
  * We try to drop the dentry early: we should have
@@ -2236,6 +2312,7 @@ void dentry_unhash(struct dentry *dentry
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 }
+EXPORT_SYMBOL(sys_symlink);
 
 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
@@ -2315,6 +2392,7 @@ asmlinkage long sys_rmdir(const char __u
 {
 	return do_rmdir(AT_FDCWD, pathname);
 }
+EXPORT_SYMBOL_GPL(sys_rmdir);
 
 int vfs_unlink(struct inode *dir, struct dentry *dentry)
 {
@@ -2415,6 +2493,7 @@ asmlinkage long sys_unlink(const char __
 {
 	return do_unlinkat(AT_FDCWD, pathname);
 }
+EXPORT_SYMBOL_GPL(sys_unlink);
 
 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
 {
@@ -2571,6 +2650,7 @@ asmlinkage long sys_link(const char __us
 {
 	return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
+EXPORT_SYMBOL(sys_rename);
 
 /*
  * The worst of all namespace operations - renaming directory. "Perverted"
@@ -2682,6 +2762,9 @@ int vfs_rename(struct inode *old_dir, st
 	int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
 	const char *old_name;
 
+	if (DQUOT_RENAME(old_dentry->d_inode, old_dir, new_dir))
+		return -EXDEV;
+
 	if (old_dentry->d_inode == new_dentry->d_inode)
  		return 0;
  
diff -upr kernel-2.6.18-417.el5.orig/fs/namespace.c kernel-2.6.18-417.el5-028stab121/fs/namespace.c
--- kernel-2.6.18-417.el5.orig/fs/namespace.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/namespace.c	2017-01-13 08:40:41.000000000 -0500
@@ -22,6 +22,8 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/mount.h>
+#include <linux/inotify.h>
+#include <linux/ve_proto.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include "pnode.h"
@@ -39,13 +41,18 @@ static inline int sysfs_init(void)
 
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
+EXPORT_SYMBOL(vfsmount_lock);
 
 static int event;
 
 static struct list_head *mount_hashtable __read_mostly;
 static int hash_mask __read_mostly, hash_bits __read_mostly;
 static kmem_cache_t *mnt_cache __read_mostly;
-static struct rw_semaphore namespace_sem;
+struct rw_semaphore namespace_sem;
+EXPORT_SYMBOL(namespace_sem);
+
+unsigned int sysctl_ve_mount_nr = 4096;
+unsigned int sysctl_ve_ifa_nr = 4096;
 
 /* /sys/fs */
 decl_subsys(fs, NULL, NULL);
@@ -59,11 +66,25 @@ static inline unsigned long hash(struct 
 	return tmp & hash_mask;
 }
 
+/*
+ * Operations with a big amount of mount points can require a lot of time.
+ * This operations take the global lock namespace_sem, so they can affect
+ * other containers.
+ */
+
 struct vfsmount *alloc_vfsmnt(const char *name)
 {
-	struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
+	struct vfsmount *mnt = NULL;
+	struct ve_struct *ve = get_exec_env();
+
+	if (atomic_add_return(1, &ve->mnt_nr) > sysctl_ve_mount_nr &&
+							!ve_is_super(ve))
+		goto out_mnt_nr_dec;
+
+	mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
 	if (mnt) {
 		memset(mnt, 0, sizeof(struct vfsmount));
+		mnt->owner = VEID(get_exec_env());
 		atomic_set(&mnt->mnt_count, 1);
 		INIT_LIST_HEAD(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
@@ -75,13 +96,15 @@ struct vfsmount *alloc_vfsmnt(const char
 		INIT_LIST_HEAD(&mnt->mnt_slave);
 		if (name) {
 			int size = strlen(name) + 1;
-			char *newname = kmalloc(size, GFP_KERNEL);
+			char *newname = kmalloc(size, GFP_KERNEL_UBC);
 			if (newname) {
 				memcpy(newname, name, size);
 				mnt->mnt_devname = newname;
 			}
 		}
-	}
+	} else
+out_mnt_nr_dec:
+		atomic_dec(&ve->mnt_nr);
 	return mnt;
 }
 
@@ -96,6 +119,13 @@ EXPORT_SYMBOL(simple_set_mnt);
 
 void free_vfsmnt(struct vfsmount *mnt)
 {
+	struct ve_struct *ve = get_ve_by_id(mnt->owner);
+
+	if (ve) {
+		atomic_dec(&ve->mnt_nr);
+		put_ve(ve);
+	}
+
 	kfree(mnt->mnt_devname);
 	kmem_cache_free(mnt_cache, mnt);
 }
@@ -141,7 +171,7 @@ struct vfsmount *lookup_mnt(struct vfsmo
 
 static inline int check_mnt(struct vfsmount *mnt)
 {
-	return mnt->mnt_namespace == current->namespace;
+	return mnt->mnt_namespace == current->nsproxy->namespace;
 }
 
 static void touch_namespace(struct namespace *ns)
@@ -210,7 +240,7 @@ static void commit_tree(struct vfsmount 
 	touch_namespace(n);
 }
 
-static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
+struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
 {
 	struct list_head *next = p->mnt_mounts.next;
 	if (next == &p->mnt_mounts) {
@@ -225,6 +255,7 @@ static struct vfsmount *next_mnt(struct 
 	}
 	return list_entry(next, struct vfsmount, mnt_child);
 }
+EXPORT_SYMBOL(next_mnt);
 
 static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
 {
@@ -276,6 +307,18 @@ static struct vfsmount *clone_mnt(struct
 	return mnt;
 }
 
+struct vfsmount *vfs_bind_mount(struct vfsmount *old, struct dentry *root)
+{
+	struct vfsmount *mnt;
+
+	mnt = clone_mnt(old, root, 0);
+	if (!mnt)
+		return ERR_PTR(-ENOMEM);
+
+	return mnt;
+}
+EXPORT_SYMBOL_GPL(vfs_bind_mount);
+
 static inline void __mntput(struct vfsmount *mnt)
 {
 	struct super_block *sb = mnt->mnt_sb;
@@ -298,6 +341,7 @@ repeat:
 		spin_unlock(&vfsmount_lock);
 		acct_auto_close_mnt(mnt);
 		security_sb_umount_close(mnt);
+		inotify_unmount_mnt(mnt);
 		goto repeat;
 	}
 }
@@ -357,10 +401,41 @@ static inline void mangle(struct seq_fil
 	seq_escape(m, s, " \t\n\\");
 }
 
+static int prepare_mnt_root_mangle(struct vfsmount *mnt,
+		char **path_buf, char **path)
+{
+	/* skip FS_NOMOUNT mounts (rootfs) */
+	if (mnt->mnt_sb->s_flags & MS_NOUSER)
+		return -EACCES;
+
+	*path_buf = (char *)__get_free_page(GFP_KERNEL);
+	if (!*path_buf)
+		return -ENOMEM;
+
+	*path = d_path(mnt->mnt_root, mnt, *path_buf, PAGE_SIZE);
+	if (IS_ERR(*path)) {
+		free_page((unsigned long)*path_buf);
+		/*
+		 * This means that the file position will be incremented, i.e.
+		 * the total number of "invisible" vfsmnt will leak.
+		 */
+		return -EACCES;
+	}
+	return 0;
+}
+
+static void show_type(struct seq_file *m, struct super_block *sb)
+{
+	if (!sb->s_op->show_type)
+		mangle(m, sb->s_type->name);
+	else
+	        sb->s_op->show_type(m, sb);
+}
+
 static int show_vfsmnt(struct seq_file *m, void *v)
 {
 	struct vfsmount *mnt = v;
-	int err = 0;
+	int err;
 	static struct proc_fs_info {
 		int flag;
 		char *str;
@@ -376,15 +451,28 @@ static int show_vfsmnt(struct seq_file *
 		{ MNT_NOEXEC, ",noexec" },
 		{ MNT_NOATIME, ",noatime" },
 		{ MNT_NODIRATIME, ",nodiratime" },
+		{ MNT_RELATIME, ",relatime" },
 		{ 0, NULL }
 	};
 	struct proc_fs_info *fs_infop;
+	char *path_buf, *path;
+
+	err = prepare_mnt_root_mangle(mnt, &path_buf, &path);
+	if (err < 0)
+		return (err == -EACCES ? 0 : err);
 
-	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+	if (ve_is_super(get_exec_env()) ||
+	    !(mnt->mnt_sb->s_type->fs_flags & FS_MANGLE_PROC))
+		mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+	else {
+		seq_puts(m, "/dev/");
+		mangle(m, mnt->mnt_sb->s_type->name);
+	}
 	seq_putc(m, ' ');
-	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	mangle(m, path);
+	free_page((unsigned long) path_buf);
 	seq_putc(m, ' ');
-	mangle(m, mnt->mnt_sb->s_type->name);
+	show_type(m, mnt->mnt_sb);
 	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
 	for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
 		if (mnt->mnt_sb->s_flags & fs_infop->flag)
@@ -410,18 +498,27 @@ struct seq_operations mounts_op = {
 static int show_vfsstat(struct seq_file *m, void *v)
 {
 	struct vfsmount *mnt = v;
-	int err = 0;
+	char *path_buf, *path;
+	int err;
+
+	err = prepare_mnt_root_mangle(mnt, &path_buf, &path);
+	if (err < 0)
+		return (err == -EACCES ? 0 : err);
 
 	/* device */
 	if (mnt->mnt_devname) {
 		seq_puts(m, "device ");
-		mangle(m, mnt->mnt_devname);
+		if (ve_is_super(get_exec_env()))
+			mangle(m, mnt->mnt_devname);
+		else
+			mangle(m, mnt->mnt_sb->s_type->name);
 	} else
 		seq_puts(m, "no device");
 
 	/* mount point */
 	seq_puts(m, " mounted on ");
-	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	mangle(m, path);
+	free_page((unsigned long)path_buf);
 	seq_putc(m, ' ');
 
 	/* file system type */
@@ -520,6 +617,7 @@ void release_mounts(struct list_head *he
 		mntput(mnt);
 	}
 }
+EXPORT_SYMBOL(release_mounts);
 
 void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
 {
@@ -542,6 +640,7 @@ void umount_tree(struct vfsmount *mnt, i
 		change_mnt_propagation(p, MS_PRIVATE);
 	}
 }
+EXPORT_SYMBOL(umount_tree);
 
 static int do_umount(struct vfsmount *mnt, int flags)
 {
@@ -629,6 +728,39 @@ static int do_umount(struct vfsmount *mn
 	return retval;
 }
 
+#ifdef CONFIG_VE
+void umount_ve_fs_type(struct file_system_type *local_fs_type, int veid)
+{
+	struct vfsmount *mnt;
+	struct list_head *p, *q;
+	LIST_HEAD(kill);
+	LIST_HEAD(umount_list);
+
+	down_write(&namespace_sem);
+	spin_lock(&vfsmount_lock);
+	list_for_each_safe(p, q, &current->nsproxy->namespace->list) {
+		mnt = list_entry(p, struct vfsmount, mnt_list);
+		if (mnt->mnt_sb->s_type != local_fs_type)
+			continue;
+		if (veid >= 0 && mnt->owner != veid)
+			continue;
+		list_del(p);
+		list_add(p, &kill);
+	}
+
+	while (!list_empty(&kill)) {
+		LIST_HEAD(kill2);
+		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
+		umount_tree(mnt, 1, &kill2);
+		list_splice(&kill2, &umount_list);
+	}
+	spin_unlock(&vfsmount_lock);
+	up_write(&namespace_sem);
+	release_mounts(&umount_list);
+}
+EXPORT_SYMBOL(umount_ve_fs_type);
+#endif
+
 /*
  * Now umount can handle mount points as well as block devices.
  * This is important for filesystems which use unnamed block devices.
@@ -652,7 +784,7 @@ asmlinkage long sys_umount(char __user *
 		goto dput_and_out;
 
 	retval = -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN) && !capable(CAP_SYS_ADMIN))
 		goto dput_and_out;
 
 	retval = do_umount(nd.mnt, flags);
@@ -676,7 +808,7 @@ asmlinkage long sys_oldumount(char __use
 
 static int mount_is_safe(struct nameidata *nd)
 {
-	if (capable(CAP_SYS_ADMIN))
+	if (capable(CAP_VE_SYS_ADMIN) || capable(CAP_SYS_ADMIN))
 		return 0;
 	return -EPERM;
 #ifdef notyet
@@ -858,7 +990,7 @@ static int attach_recursive_mnt(struct v
 	if (parent_nd) {
 		detach_mnt(source_mnt, parent_nd);
 		attach_mnt(source_mnt, nd);
-		touch_namespace(current->namespace);
+		touch_namespace(current->nsproxy->namespace);
 	} else {
 		mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
 		commit_tree(source_mnt);
@@ -872,6 +1004,37 @@ static int attach_recursive_mnt(struct v
 	return 0;
 }
 
+void replace_mount(struct vfsmount *src_mnt, struct vfsmount *dst_mnt)
+{
+	struct nameidata src_nd, dst_nd;
+	LIST_HEAD(umount_list);
+
+	down_write(&namespace_sem);
+	spin_lock(&vfsmount_lock);
+
+	detach_mnt(dst_mnt, &dst_nd);
+	umount_tree(dst_mnt, 0, &umount_list);
+
+	if (src_mnt->mnt_parent != src_mnt) {
+		detach_mnt(src_mnt, &src_nd);
+		attach_mnt(src_mnt, &dst_nd);
+	} else {
+		memset(&src_nd, 0, sizeof(src_nd));
+		mnt_set_mountpoint(dst_nd.mnt, dst_nd.dentry, src_mnt);
+		commit_tree(src_mnt);
+	}
+
+	spin_unlock(&vfsmount_lock);
+	up_write(&namespace_sem);
+
+	path_release(&src_nd);
+	path_release(&dst_nd);
+	release_mounts(&umount_list);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(replace_mount);
+
 static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
 {
 	int err;
@@ -915,6 +1078,8 @@ static int do_change_type(struct nameida
 
 	if (nd->dentry != nd->mnt->mnt_root)
 		return -EINVAL;
+	if (!ve_accessible_veid(nd->mnt->owner, get_exec_env()->veid))
+		return -EPERM;
 
 	down_write(&namespace_sem);
 	spin_lock(&vfsmount_lock);
@@ -928,7 +1093,8 @@ static int do_change_type(struct nameida
 /*
  * do loopback mount.
  */
-static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
+static int do_loopback(struct nameidata *nd, char *old_name, int recurse,
+		int mnt_flags)
 {
 	struct nameidata old_nd;
 	struct vfsmount *mnt = NULL;
@@ -958,6 +1124,7 @@ static int do_loopback(struct nameidata 
 	if (!mnt)
 		goto out;
 
+	mnt->mnt_flags |= mnt_flags;
 	err = graft_tree(mnt, nd);
 	if (err) {
 		LIST_HEAD(umount_list);
@@ -983,8 +1150,9 @@ static int do_remount(struct nameidata *
 {
 	int err;
 	struct super_block *sb = nd->mnt->mnt_sb;
+	int bind;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
 	if (!check_mnt(nd->mnt))
@@ -993,12 +1161,23 @@ static int do_remount(struct nameidata *
 	if (nd->dentry != nd->mnt->mnt_root)
 		return -EINVAL;
 
+	if (!ve_accessible_veid(nd->mnt->owner, get_exec_env()->veid))
+		return -EPERM;
+
+	/* do not allow to remount bind-mounts with another mountpoint flags */
+	bind = 0;
+	if (nd->dentry != sb->s_root) {
+		if ((flags & ~(MS_BIND|MS_POSIXACL|MS_NOUSER)) != 0)
+			return -EINVAL;
+		bind = 1;
+	}
+
 	down_write(&sb->s_umount);
-	err = do_remount_sb(sb, flags, data, 0);
+	err = bind ? 0 : do_remount_sb(sb, flags, data, 0);
 	if (!err)
 		nd->mnt->mnt_flags = mnt_flags;
 	up_write(&sb->s_umount);
-	if (!err)
+	if (!err && !bind)
 		security_sb_post_remount(nd->mnt, flags, data);
 	return err;
 }
@@ -1018,7 +1197,7 @@ static int do_move_mount(struct nameidat
 	struct nameidata old_nd, parent_nd;
 	struct vfsmount *p;
 	int err = 0;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	if (!old_name || !*old_name)
 		return -EINVAL;
@@ -1026,6 +1205,10 @@ static int do_move_mount(struct nameidat
 	if (err)
 		return err;
 
+	err = -EPERM;
+	if (!ve_accessible_veid(old_nd.mnt->owner, get_exec_env()->veid))
+		goto out_nosem;
+
 	down_write(&namespace_sem);
 	err = __follow_down(&nd->mnt, &nd->dentry, true);
 	if (err < 0)
@@ -1083,6 +1266,7 @@ out:
 	up_write(&namespace_sem);
 	if (!err)
 		path_release(&parent_nd);
+out_nosem:
 	path_release(&old_nd);
 	return err;
 }
@@ -1100,7 +1284,7 @@ static int do_new_mount(struct nameidata
 		return -EINVAL;
 
 	/* we need capabilities... */
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
 	mnt = do_kern_mount(type, flags, name, data);
@@ -1131,7 +1315,8 @@ int do_add_mount(struct vfsmount *newmnt
 
 	/* Refuse the same filesystem on the same mount point */
 	err = -EBUSY;
-	if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
+	if (!(mnt_flags & MNT_CPT) &&
+	    nd->mnt->mnt_sb == newmnt->mnt_sb &&
 	    nd->mnt->mnt_root == nd->dentry)
 		goto unlock;
 
@@ -1140,6 +1325,11 @@ int do_add_mount(struct vfsmount *newmnt
 		goto unlock;
 
 	newmnt->mnt_flags = mnt_flags;
+
+	/* make this before graft_tree reveals mnt_root to the world... */
+	if (nd->dentry->d_flags & DCACHE_VIRTUAL)
+		newmnt->mnt_root->d_flags |= DCACHE_VIRTUAL;
+
 	if ((err = graft_tree(newmnt, nd)))
 		goto unlock;
 
@@ -1378,8 +1568,11 @@ int copy_mount_options(const void __user
 	 * gave us is valid.  Just in case, we'll zero
 	 * the remainder of the page.
 	 */
-	/* copy_from_user cannot cross TASK_SIZE ! */
-	size = TASK_SIZE - (unsigned long)data;
+	if (segment_eq(get_fs(), KERNEL_DS))
+		size = PAGE_SIZE;
+	else 
+		/* copy_from_user cannot cross TASK_SIZE ! */
+		size = TASK_SIZE - (unsigned long)data;
 	if (size > PAGE_SIZE)
 		size = PAGE_SIZE;
 
@@ -1429,6 +1622,10 @@ long do_mount(char *dev_name, char *dir_
 	if (data_page)
 		((char *)data_page)[PAGE_SIZE - 1] = 0;
 
+	/* Default to relatime unless overriden */
+	if (!(flags & MS_NOATIME))
+		mnt_flags |= MNT_RELATIME;
+
 	/* Separate the per-mountpoint flags */
 	if (flags & MS_NOSUID)
 		mnt_flags |= MNT_NOSUID;
@@ -1440,9 +1637,11 @@ long do_mount(char *dev_name, char *dir_
 		mnt_flags |= MNT_NOATIME;
 	if (flags & MS_NODIRATIME)
 		mnt_flags |= MNT_NODIRATIME;
+	if (flags & MS_STRICTATIME)
+		mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
 
 	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
-		   MS_NOATIME | MS_NODIRATIME);
+		   MS_NOATIME | MS_NODIRATIME | MS_STRICTATIME | MS_CPTMOUNT);
 
 	/* ... and get the mountpoint */
 	retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
@@ -1457,7 +1656,7 @@ long do_mount(char *dev_name, char *dir_
 		retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
 				    data_page);
 	else if (flags & MS_BIND)
-		retval = do_loopback(&nd, dev_name, flags & MS_REC);
+		retval = do_loopback(&nd, dev_name, flags & MS_REC, mnt_flags);
 	else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
 		retval = do_change_type(&nd, flags);
 	else if (flags & MS_MOVE)
@@ -1476,7 +1675,7 @@ dput_out:
  */
 struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 {
-	struct namespace *namespace = tsk->namespace;
+	struct namespace *namespace = tsk->nsproxy->namespace;
 	struct namespace *new_ns;
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
 	struct vfsmount *p, *q;
@@ -1543,7 +1742,7 @@ struct namespace *dup_namespace(struct t
 
 int copy_namespace(int flags, struct task_struct *tsk)
 {
-	struct namespace *namespace = tsk->namespace;
+	struct namespace *namespace = tsk->nsproxy->namespace;
 	struct namespace *new_ns;
 	int err = 0;
 
@@ -1566,7 +1765,7 @@ int copy_namespace(int flags, struct tas
 		goto out;
 	}
 
-	tsk->namespace = new_ns;
+	tsk->nsproxy->namespace = new_ns;
 
 out:
 	put_namespace(namespace);
@@ -1614,6 +1813,7 @@ out1:
 	free_page(type_page);
 	return retval;
 }
+EXPORT_SYMBOL_GPL(sys_mount);
 
 /*
  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
@@ -1660,6 +1860,7 @@ void set_fs_pwd(struct fs_struct *fs, st
 		mntput(old_pwdmnt);
 	}
 }
+EXPORT_SYMBOL_GPL(set_fs_pwd);
 
 static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
 {
@@ -1667,7 +1868,7 @@ static void chroot_fs_refs(struct nameid
 	struct fs_struct *fs;
 
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_ve(g, p) {
 		task_lock(p);
 		fs = p->fs;
 		if (fs) {
@@ -1682,7 +1883,7 @@ static void chroot_fs_refs(struct nameid
 			put_fs_struct(fs);
 		} else
 			task_unlock(p);
-	} while_each_thread(g, p);
+	} while_each_thread_ve(g, p);
 	read_unlock(&tasklist_lock);
 }
 
@@ -1791,7 +1992,7 @@ asmlinkage long sys_pivot_root(const cha
 	detach_mnt(user_nd.mnt, &root_parent);
 	attach_mnt(user_nd.mnt, &old_nd);     /* mount old root on put_old */
 	attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
-	touch_namespace(current->namespace);
+	touch_namespace(current->nsproxy->namespace);
 	spin_unlock(&vfsmount_lock);
 	chroot_fs_refs(&user_nd, &new_nd);
 	security_sb_post_pivotroot(&user_nd, &new_nd);
@@ -1817,7 +2018,6 @@ static void __init init_mount_tree(void)
 {
 	struct vfsmount *mnt;
 	struct namespace *namespace;
-	struct task_struct *g, *p;
 
 	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
 	if (IS_ERR(mnt))
@@ -1833,13 +2033,8 @@ static void __init init_mount_tree(void)
 	namespace->root = mnt;
 	mnt->mnt_namespace = namespace;
 
-	init_task.namespace = namespace;
-	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
-		get_namespace(namespace);
-		p->namespace = namespace;
-	} while_each_thread(g, p);
-	read_unlock(&tasklist_lock);
+	init_task.nsproxy->namespace = namespace;
+	get_namespace(namespace);
 
 	set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
 	set_fs_root(current->fs, namespace->root, namespace->root->mnt_root);
@@ -1854,7 +2049,8 @@ void __init mnt_init(unsigned long mempa
 	init_rwsem(&namespace_sem);
 
 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
-			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_UBC,
+			NULL, NULL);
 
 	mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
 
@@ -1910,3 +2106,4 @@ void __put_namespace(struct namespace *n
 	release_mounts(&umount_list);
 	kfree(namespace);
 }
+EXPORT_SYMBOL_GPL(__put_namespace);
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/client.c kernel-2.6.18-417.el5-028stab121/fs/nfs/client.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/client.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/client.c	2017-01-13 08:40:27.000000000 -0500
@@ -125,6 +125,7 @@ static struct nfs_client *nfs_alloc_clie
 
 	atomic_set(&clp->cl_count, 1);
 	clp->cl_cons_state = NFS_CS_INITING;
+	clp->owner_env = get_ve(get_exec_env());
 
 	clp->cl_nfsversion = nfsversion;
 	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
@@ -155,6 +156,7 @@ static struct nfs_client *nfs_alloc_clie
 error_3:
 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
 		nfs_callback_down();
+	put_ve(clp->owner_env);
 error_2:
 	rpciod_down();
 	__clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
@@ -195,6 +197,7 @@ static void nfs_free_client(struct nfs_c
 		rpciod_down();
 
 	kfree(clp->cl_hostname);
+	put_ve(clp->owner_env);
 	kfree(clp);
 
 	dprintk("<-- nfs_free_client()\n");
@@ -228,7 +231,9 @@ static struct nfs_client *__nfs_find_cli
 				int proto, int nfsversion, int match_port)
 {
 	struct nfs_client *clp;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
 		/* Don't match clients that failed to initialise properly */
 		if (clp->cl_cons_state < 0)
@@ -238,6 +243,9 @@ static struct nfs_client *__nfs_find_cli
 		if (clp->cl_nfsversion != nfsversion)
 			continue;
 
+		if (!ve_accessible_strict(clp->owner_env, ve))
+			continue;
+
 		if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
 			   sizeof(clp->cl_addr.sin_addr)) != 0)
 			continue;
@@ -744,6 +752,7 @@ static struct nfs_server *nfs_alloc_serv
 		return NULL;
 
 	server->client = server->client_acl = ERR_PTR(-EINVAL);
+	server->owner_env = get_ve(get_exec_env());
 
 	/* Zero out the NFS state stuff */
 	INIT_LIST_HEAD(&server->client_link);
@@ -756,6 +765,7 @@ static struct nfs_server *nfs_alloc_serv
 
 	server->io_stats = nfs_alloc_iostats();
 	if (!server->io_stats) {
+		put_ve(server->owner_env);
 		kfree(server);
 		return NULL;
 	}
@@ -768,6 +778,11 @@ static struct nfs_server *nfs_alloc_serv
  */
 void nfs_free_server(struct nfs_server *server)
 {
+	struct ve_struct *ve, *old_ve;
+ 
+	ve = server->owner_env;
+	old_ve = set_exec_env(ve);
+
 	dprintk("--> nfs_free_server()\n");
 
 	spin_lock(&nfs_client_lock);
@@ -789,7 +804,38 @@ void nfs_free_server(struct nfs_server *
 	kfree(server);
 	nfs_release_automount_timer();
 	dprintk("<-- nfs_free_server()\n");
+
+	(void)set_exec_env(old_ve);
+	put_ve(ve);
+}
+
+#ifdef CONFIG_VE
+void nfs_change_server_params(void *data, int flags, int timeo, int retrans)
+{
+	struct nfs_server *nfs_server = data;
+	struct nfs_client *nfs_client = nfs_server->nfs_client;		
+	struct rpc_clnt *rpc_client = nfs_server->client;
+	struct rpc_xprt *cl_xprt = rpc_client->cl_xprt;	
+	int proto = (nfs_server->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP 
+							: IPPROTO_UDP;
+	struct rpc_timeout timeparams;
+
+	nfs_server->flags = (nfs_server->flags & ~NFS_MOUNT_SOFT) | flags;
+	if (!(nfs_server->flags & NFS_MOUNT_SOFT))
+		rpc_client->cl_softrtry = 0;
+
+	nfs_init_timeout_values(&timeparams, proto, timeo, retrans);
+
+	nfs_client->retrans_timeo = timeparams.to_initval;
+	nfs_client->retrans_count = timeparams.to_retries;
+
+	spin_lock_bh(&cl_xprt->transport_lock);
+	cl_xprt->timeout = timeparams;
+	rpc_init_rtt(&rpc_client->cl_rtt_default, cl_xprt->timeout.to_initval);
+	spin_unlock_bh(&cl_xprt->transport_lock);
 }
+EXPORT_SYMBOL(nfs_change_server_params);
+#endif
 
 /*
  * Create a version 2 or 3 volume record
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/dir.c kernel-2.6.18-417.el5-028stab121/fs/nfs/dir.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/dir.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/dir.c	2017-01-13 08:40:23.000000000 -0500
@@ -33,6 +33,7 @@
 #include <linux/pagevec.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
+#include <linux/quotaops.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -792,8 +793,16 @@ static int nfs_lookup_revalidate(struct 
 	inode = dentry->d_inode;
 
 	if (!inode) {
-		if (nfs_neg_need_reval(dir, dentry, nd))
-			goto out_bad;
+		if (dentry->d_flags & DCACHE_LOCALCACHE)
+			goto out_valid;
+
+		if (nfs_neg_need_reval(dir, dentry, nd)) {
+			unsigned long verifier = nfs_save_change_attribute(dir);
+			error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
+			if (error != -ENOENT)
+				goto out_bad;
+			nfs_set_verifier(dentry, verifier);
+		}
 		goto out_valid;
 	}
 
@@ -814,6 +823,9 @@ static int nfs_lookup_revalidate(struct 
 	if (NFS_STALE(inode))
 		goto out_bad;
 
+	if (dentry->d_flags & DCACHE_LOCALCACHE)
+		goto out_valid;
+
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error)
 		goto out_bad;
@@ -925,6 +937,7 @@ static struct dentry *nfs_lookup(struct 
 	int error;
 	struct nfs_fh fhandle;
 	struct nfs_fattr fattr;
+	struct nfs_dq_info qi;
 
 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
@@ -934,16 +947,22 @@ static struct dentry *nfs_lookup(struct 
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
 		goto out;
 
+	nfs_dq_empty(dir, dentry, &qi);
+
 	res = ERR_PTR(-ENOMEM);
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 
 	lock_kernel();
 
+	/* Hmm. Too hard violation of VFS API. We can sacrifice atomicity
+	 * of O_EXCL in network-wide sense. --ANK
+	 */
 	/*
 	 * If we're doing an exclusive create, optimize away the lookup
 	 * but don't hash the dentry.
 	 */
-	if (nfs_is_exclusive_create(dir, nd)) {
+	if (!(dentry->d_flags & DCACHE_LOCALCACHE) &&
+	    nfs_is_exclusive_create(dir, nd)) {
 		d_instantiate(dentry, NULL);
 		res = NULL;
 		goto out_unlock;
@@ -964,7 +983,7 @@ static struct dentry *nfs_lookup(struct 
 		res = ERR_PTR(error);
 		goto out_unblock_sillyrename;
 	}
-	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
+	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr, &qi);
 	res = (struct dentry *)inode;
 	if (IS_ERR(res))
 		goto out_unblock_sillyrename;
@@ -1130,6 +1149,7 @@ static struct dentry *nfs_readdir_lookup
 	};
 	struct inode *inode;
 	unsigned long verf = nfs_save_change_attribute(dir);
+	struct nfs_dq_info qi;
 
 	switch (name.len) {
 		case 2:
@@ -1174,7 +1194,8 @@ static struct dentry *nfs_readdir_lookup
 	if (dentry == NULL)
 		return NULL;
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
-	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
+	nfs_dq_empty(dir, dentry, &qi);
+	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, &qi);
 	if (IS_ERR(inode)) {
 		dput(dentry);
 		return NULL;
@@ -1197,7 +1218,7 @@ out_renew:
  * Code common to create, mkdir, and mknod.
  */
 int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
-				struct nfs_fattr *fattr)
+				struct nfs_fattr *fattr, struct nfs_dq_info * qi)
 {
 	struct dentry *parent = dget_parent(dentry);
 	struct inode *dir = parent->d_inode;
@@ -1221,7 +1242,9 @@ int nfs_instantiate(struct dentry *dentr
 		if (error < 0)
 			goto out_error;
 	}
-	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
+	if (qi)
+		qi->d = dentry;
+	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, qi);
 	error = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_error;
@@ -1332,6 +1355,8 @@ static int nfs_rmdir(struct inode *dir, 
 	dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
 			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
+	DQUOT_INIT(dentry->d_inode);
+
 	lock_kernel();
 	error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
 	/* Ensure the VFS deletes this inode */
@@ -1371,8 +1396,11 @@ static int nfs_safe_remove(struct dentry
 		if (error == 0)
 			inode->i_nlink--;
 		nfs_mark_for_revalidate(inode);
-	} else
+	} else {
+		/* How can this happen? --ANK */
+		printk("nfs_safe_remove: remove inode==NULL\n");
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
+	}
 out:
 	return error;
 }
@@ -1390,6 +1418,8 @@ static int nfs_unlink(struct inode *dir,
 	dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry->d_name.name);
 
+	DQUOT_INIT(dentry->d_inode);
+
 	lock_kernel();
 	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
@@ -1502,6 +1532,9 @@ nfs_link(struct dentry *old_dentry, stru
 		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
+	if (DQUOT_CHECK_SPACE(dir))
+		return -EDQUOT;
+
 	lock_kernel();
 	d_drop(dentry);
 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
@@ -1545,6 +1578,12 @@ static int nfs_rename(struct inode *old_
 	struct dentry *dentry = NULL, *rehash = NULL;
 	int error = -EBUSY;
 
+	if (DQUOT_CHECK_SPACE(new_dir))
+		return -EDQUOT;
+
+	if (new_inode)
+		DQUOT_INIT(new_inode);
+
 	/*
 	 * To prevent any new references to the target during the rename,
 	 * we unhash the dentry and free the inode in advance.
@@ -1626,6 +1665,15 @@ out:
 		d_rehash(rehash);
 	if (!error) {
 		d_move(old_dentry, new_dentry);
+		if (new_dentry->d_inode) {
+			/* Deleted directory does not pass through
+			 * delete_inode, because i_nlink stays not-zero.
+			 * Here we use the fact, that each directory
+			 * is additionally dget() by caller, so that
+			 * d_delete() does not clear d_inode.
+			 */
+			new_dentry->d_inode->i_nlink = 0;
+		}
 		nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
 	}
 
@@ -1888,6 +1936,9 @@ int nfs_permission(struct inode *inode, 
 	if (nd != NULL && (nd->flags & LOOKUP_ACCESS))
 		goto force_lookup;
 
+	if (test_bit(NFS_DFLAG_LOCAL, &NFS_I(inode)->dflags))
+		return generic_permission(inode, mask, NULL);
+
 	switch (inode->i_mode & S_IFMT) {
 		case S_IFLNK:
 			goto out;
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/direct.c kernel-2.6.18-417.el5-028stab121/fs/nfs/direct.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/direct.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/direct.c	2017-01-13 08:40:40.000000000 -0500
@@ -45,6 +45,7 @@
 #include <linux/file.h>
 #include <linux/pagemap.h>
 #include <linux/kref.h>
+#include <linux/task_io_accounting_ops.h>
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
@@ -372,6 +373,9 @@ static ssize_t nfs_direct_read(struct ki
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
 
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+	task_io_account_read(count);
+
 	dreq = nfs_direct_req_alloc();
 	if (dreq == NULL)
 		goto out;
@@ -730,6 +734,9 @@ static ssize_t nfs_direct_write(struct k
 	size_t wsize = NFS_SERVER(inode)->wsize;
 	int sync = 0;
 
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+	task_io_account_write(count);
+
 	dreq = nfs_direct_req_alloc();
 	if (!dreq)
 		goto out;
@@ -762,8 +769,8 @@ out:
 /**
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
- * @buf: user's buffer into which to read data
- * @count: number of bytes to read
+ * @iov: vector of user buffers into which to read data
+ * @nr_segs: size of iov vector
  * @pos: byte offset in file where reading starts
  *
  * We use this function for direct reads instead of calling
@@ -780,17 +787,24 @@ out:
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
+				unsigned long nr_segs, loff_t pos)
 {
 	ssize_t retval = -EINVAL;
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
+	/* XXX: temporary */
+	const char __user *buf = iov[0].iov_base;
+	size_t count = iov[0].iov_len;
 
 	dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
+	if (nr_segs != 1)
+		return -EINVAL;
+
 	if (count < 0)
 		goto out;
 	retval = -EFAULT;
@@ -815,8 +829,8 @@ out:
 /**
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
- * @buf: user's buffer from which to write data
- * @count: number of bytes to write
+ * @iov: vector of user buffers from which to write data
+ * @nr_segs: size of iov vector
  * @pos: byte offset in file where writing starts
  *
  * We use this function for direct writes instead of calling
@@ -837,17 +851,24 @@ out:
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
+				unsigned long nr_segs, loff_t pos)
 {
 	ssize_t retval;
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
+	/* XXX: temporary */
+	const char __user *buf = iov[0].iov_base;
+	size_t count = iov[0].iov_len;
 
 	dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
+	if (nr_segs != 1)
+		return -EINVAL;
+
 	retval = generic_write_checks(file, &pos, &count, 0);
 	if (retval)
 		goto out;
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/file.c kernel-2.6.18-417.el5-028stab121/fs/nfs/file.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/file.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/file.c	2017-01-13 08:40:41.000000000 -0500
@@ -28,6 +28,7 @@
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
 #include <linux/swap.h>
+#include <linux/quotaops.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -42,8 +43,10 @@ static int nfs_file_release(struct inode
 static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
 static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
 static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
-static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
-static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
+static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
+				unsigned long nr_segs, loff_t pos);
+static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
+				unsigned long nr_segs, loff_t pos);
 static int  nfs_file_flush(struct file *, fl_owner_t id);
 static int  nfs_fsync(struct file *, struct dentry *dentry, int datasync);
 static int nfs_check_flags(int flags);
@@ -56,8 +59,8 @@ const struct file_operations nfs_file_op
 	.llseek		= nfs_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
-	.aio_read		= nfs_file_read,
-	.aio_write		= nfs_file_write,
+	.aio_read	= nfs_file_read,
+	.aio_write	= nfs_file_write,
 	.mmap		= nfs_file_mmap,
 	.open		= nfs_file_open,
 	.flush		= nfs_file_flush,
@@ -212,15 +215,17 @@ nfs_file_flush(struct file *file, fl_own
 }
 
 static ssize_t
-nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
+nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
 {
 	struct dentry * dentry = iocb->ki_filp->f_dentry;
 	struct inode * inode = dentry->d_inode;
 	ssize_t result;
+	size_t count = iov_length(iov, nr_segs);
 
 #ifdef CONFIG_NFS_DIRECTIO
 	if (iocb->ki_filp->f_flags & O_DIRECT)
-		return nfs_file_direct_read(iocb, buf, count, pos);
+		return nfs_file_direct_read(iocb, iov, nr_segs, pos);
 #endif
 
 	dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
@@ -230,7 +235,7 @@ nfs_file_read(struct kiocb *iocb, char _
 	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 	nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
 	if (!result)
-		result = generic_file_aio_read(iocb, buf, count, pos);
+		result = generic_file_aio_read(iocb, iov, nr_segs, pos);
 	return result;
 }
 
@@ -339,6 +344,10 @@ static int nfs_write_begin(struct file *
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	struct page *page;
 	int once_thru = 0;
+	struct inode * inode = mapping->host;
+
+	if (nfs_dq_check_space(inode, index))
+		return -EDQUOT;
 
 	dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
 		file->f_dentry->d_parent->d_name.name,
@@ -473,6 +482,11 @@ static int nfs_vm_page_mkwrite(struct vm
 	int ret = -EINVAL;
 	struct address_space *mapping;
 
+	if (filp->f_op->get_host) {
+		filp = filp->f_op->get_host(filp);
+		dentry = filp->f_dentry;
+	}
+
 	dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		filp->f_mapping->host->i_ino,
@@ -505,15 +519,17 @@ static struct vm_operations_struct nfs_f
 };
 
 static ssize_t
-nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
+	       unsigned long nr_segs, loff_t pos)
 {
 	struct dentry * dentry = iocb->ki_filp->f_dentry;
 	struct inode * inode = dentry->d_inode;
 	ssize_t result;
+	size_t count = iov_length(iov, nr_segs);
 
 #ifdef CONFIG_NFS_DIRECTIO
 	if (iocb->ki_filp->f_flags & O_DIRECT)
-		return nfs_file_direct_write(iocb, buf, count, pos);
+		return nfs_file_direct_write(iocb, iov, nr_segs, pos);
 #endif
 
 	dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
@@ -537,7 +553,7 @@ nfs_file_write(struct kiocb *iocb, const
 		goto out;
 
 	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
-	result = generic_file_aio_write(iocb, buf, count, pos);
+	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
 	/* Return error values for O_SYNC and IS_SYNC() */
 	if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
 		int err = nfs_do_fsync(iocb->ki_filp->private_data, inode);
@@ -610,7 +626,8 @@ static int do_unlk(struct file *filp, in
 	 */
 	lock_kernel();
 	/* Use local locking if mounted with "-onolock" */
-	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) &&
+			!(fl->fl_flags & FL_LOCAL))
 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
 	else
 		status = do_vfs_lock(filp, fl);
@@ -633,7 +650,8 @@ static int do_setlk(struct file *filp, i
 
 	lock_kernel();
 	/* Use local locking if mounted with "-onolock" */
-	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) &&
+			!(fl->fl_flags & FL_LOCAL))
 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
 	else
 		status = do_vfs_lock(filp, fl);
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/getroot.c kernel-2.6.18-417.el5-028stab121/fs/nfs/getroot.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/getroot.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/getroot.c	2017-01-13 08:40:22.000000000 -0500
@@ -70,7 +70,7 @@ struct dentry *nfs_get_root(struct super
 		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
 		fattr.nlink = 2;
 
-		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		iroot = nfs_fhget(sb, &dummyfh, &fattr, 0);
 		if (IS_ERR(iroot))
 			return ERR_PTR(PTR_ERR(iroot));
 
@@ -92,7 +92,7 @@ struct dentry *nfs_get_root(struct super
 		return ERR_PTR(error);
 	}
 
-	inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
+	inode = nfs_fhget(sb, mntfh, fsinfo.fattr, 0);
 	if (IS_ERR(inode)) {
 		dprintk("nfs_get_root: get root inode failed\n");
 		return ERR_PTR(PTR_ERR(inode));
@@ -257,7 +257,7 @@ struct dentry *nfs4_get_root(struct supe
 		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
 		fattr.nlink = 2;
 
-		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		iroot = nfs_fhget(sb, &dummyfh, &fattr, 0);
 		if (IS_ERR(iroot))
 			return ERR_PTR(PTR_ERR(iroot));
 
@@ -285,7 +285,7 @@ struct dentry *nfs4_get_root(struct supe
 		return ERR_PTR(error);
 	}
 
-	inode = nfs_fhget(sb, mntfh, &fattr);
+	inode = nfs_fhget(sb, mntfh, &fattr, 0);
 	if (IS_ERR(inode)) {
 		dprintk("nfs_get_root: get root inode failed\n");
 		return ERR_PTR(PTR_ERR(inode));
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/inode.c kernel-2.6.18-417.el5-028stab121/fs/nfs/inode.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/inode.c	2017-01-13 08:40:24.000000000 -0500
@@ -39,6 +39,8 @@
 #include <linux/nfs_xdr.h>
 #include <linux/compat.h>
 #include <linux/writeback.h>
+#include <linux/quotaops.h>
+#include <linux/vzquota.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -168,6 +170,108 @@ void nfs_clear_inode(struct inode *inode
 	nfs_access_zap_cache(inode);
 }
 
+/* Added only to hook DQUOT_FREE_INODE. --ANK */
+
+void nfs_delete_inode (struct inode * inode)
+{
+	truncate_inode_pages(&inode->i_data, 0);
+
+	if (is_bad_inode(inode))
+		goto no_delete;
+
+	if (inode->i_blocks)
+		DQUOT_FREE_SPACE_NODIRTY(inode, (qsize_t)inode->i_blocks<<9);
+	DQUOT_FREE_INODE(inode);
+	DQUOT_DROP(inode);
+	inode->i_flags |= S_NOQUOTA;
+
+no_delete:
+	clear_inode(inode);
+}
+
+int nfs_dq_init(struct inode * dir, struct nfs_dq_info *dqi, int check_space)
+{
+	struct inode * inode;
+	struct nfs_inode *nfsi;
+
+	dqi->inode = NULL;
+
+	/* First, check space in parent directory */
+	if (DQUOT_CHECK_SPACE(dir))
+		return -EDQUOT;
+
+	/* Second, allocate "quota" inode and initialize required fields */
+	inode = new_inode(dir->i_sb);
+	if (inode == NULL)
+		return -ENOMEM;
+
+	inode->i_uid = current->fsuid;
+	inode->i_gid = current->fsgid;
+	/* Is this optional? */
+	if (dir->i_mode & S_ISGID)
+		inode->i_gid = dir->i_gid;
+
+	nfsi = NFS_I(inode);
+	nfsi->access_cache = RB_ROOT;
+#ifdef CONFIG_NFS_FSCACHE
+	nfsi->fscache = NULL;
+#endif
+
+	if (DQUOT_ALLOC_INODE(inode))
+		goto out_drop;
+
+	if (check_space && DQUOT_CHECK_SPACE(inode))
+		goto out_err_noblock;
+
+	dqi->inode = inode;
+	dqi->dir = dir;
+	dqi->d = NULL;
+	return 0;
+
+out_err_noblock:
+	DQUOT_FREE_INODE(inode);
+out_drop:
+	DQUOT_DROP(inode);
+	inode->i_flags |= S_NOQUOTA;
+	iput(inode);
+	return -EDQUOT;
+}
+
+void nfs_dq_destroy(struct nfs_dq_info * dqi)
+{
+	if (dqi->inode) {
+		DQUOT_FREE_INODE(dqi->inode);
+		DQUOT_DROP(dqi->inode);
+		dqi->inode->i_flags |= S_NOQUOTA;
+		iput(dqi->inode);
+	}
+}
+
+void
+nfs_dq_empty(struct inode * dir, struct dentry * de, struct nfs_dq_info *dqi)
+{
+	dqi->inode = NULL;
+	dqi->dir = dir;
+	dqi->d = de;
+}
+
+
+/* If end of page beyond current end of file or file is sparse,
+ * we check quota. Not good, but the best we can do under curcumstances.
+ */
+int nfs_dq_check_space(struct inode *inode, pgoff_t index)
+{
+	loff_t i_size = i_size_read(inode);
+
+	if (index >= (i_size + PAGE_SIZE - 1)/PAGE_SIZE ||
+	    inode->i_blocks < (i_size + 511)/512) {
+		if (DQUOT_CHECK_SPACE(inode))
+			return -EDQUOT;
+	}
+	return 0;
+}
+
+
 /**
  * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
  */
@@ -286,6 +390,7 @@ nfs_init_locked(struct inode *inode, voi
 
 	NFS_FILEID(inode) = fattr->fileid;
 	nfs_copy_fh(NFS_FH(inode), desc->fh);
+	NFS_I(inode)->dflags = 0;
 	return 0;
 }
 
@@ -297,7 +402,8 @@ nfs_init_locked(struct inode *inode, voi
  * instead of inode number.
  */
 struct inode *
-nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
+nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr,
+	  struct nfs_dq_info * qi)
 {
 	struct nfs_find_desc desc = {
 		.fh	= fh,
@@ -323,12 +429,29 @@ nfs_fhget(struct super_block *sb, struct
 
 	if (inode->i_state & I_NEW) {
 		struct nfs_inode *nfsi = NFS_I(inode);
+		blkcnt_t blocks;
 
 		/* We set i_ino for the few things that still rely on it, such
 		 * as printing messages; stat and filldir use the fileid
 		 * directly since i_ino may not be large enough */
 		inode->i_ino = fattr->fileid;
 
+		if (qi && qi->inode) {
+			DQUOT_SWAP(inode, qi->inode);
+		}
+
+		if (qi) {
+			int localcache = 0;
+
+			if (qi->d)
+				localcache = qi->d->d_flags & DCACHE_LOCALCACHE;
+			else if (qi->dir)
+				localcache = test_bit(NFS_DFLAG_LOCAL, &NFS_I(qi->dir)->dflags);
+			if (localcache)
+				set_bit(NFS_DFLAG_LOCAL, &NFS_I(inode)->dflags);
+		}
+
+
 		/* We can't support update_atime(), since the server will reset it */
 		inode->i_flags |= S_NOATIME|S_NOCMTIME|S_NOATTRKILL;
 		inode->i_mode = fattr->mode;
@@ -391,19 +514,50 @@ nfs_fhget(struct super_block *sb, struct
 		if (fattr->valid & NFS_ATTR_FATTR_GROUP)
 			inode->i_gid = fattr->gid;
 		if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
-			inode->i_blocks = fattr->du.nfs2.blocks;
+			blocks = fattr->du.nfs2.blocks;
 		if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
 			/*
 			 * report the blocks in 512byte units
 			 */
-			inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
+			blocks = nfs_calc_block_size(fattr->du.nfs3.used);
+		}
+		if (qi && qi->inode) {
+			inode->i_blocks = 0;
+			/* Before I forgot this _again_. inode is still
+			 * not attached to dentry. If vzquota is on, it
+			 * is OK, it is initialized already.
+			 * But if it was initialized to be out of quota
+			 * subtree, vzquota tries to reinitialize it.
+			 * And fails.
+			 */
+			DQUOT_SYNC_BLOCKS(inode, blocks);
+		} else {
+			inode->i_blocks = blocks;
 		}
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
 		nfsi->access_cache = RB_ROOT;
 		unlock_new_inode(inode);
-	} else
+	} else {
+		int localcache = 0;
+
+		if (qi && qi->inode) {
+			printk("NFS: unexpected inode\n");
+		}
+
+		if (qi) {
+			if (qi->d)
+				localcache = qi->d->d_flags & DCACHE_LOCALCACHE;
+			else if (qi->dir)
+				localcache = test_bit(NFS_DFLAG_LOCAL, &NFS_I(qi->dir)->dflags);
+		}
+		if (localcache)
+			set_bit(NFS_DFLAG_LOCAL, &NFS_I(inode)->dflags);
+		else
+			clear_bit(NFS_DFLAG_LOCAL, &NFS_I(inode)->dflags);
+
 		nfs_refresh_inode(inode, fattr);
+	}
 	dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
 		inode->i_sb->s_id,
 		(long long)NFS_FILEID(inode),
@@ -431,6 +585,9 @@ nfs_setattr(struct dentry *dentry, struc
 	if (attr->ia_valid & ATTR_SIZE) {
 		if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
 			attr->ia_valid &= ~ATTR_SIZE;
+		else if (attr->ia_size > i_size_read(inode) &&
+			 DQUOT_CHECK_SPACE(inode))
+			return -EDQUOT;
 	}
 
 	/* Optimization: if the end result is no change, don't RPC */
@@ -438,6 +595,13 @@ nfs_setattr(struct dentry *dentry, struc
 	if (attr->ia_valid == 0)
 		return 0;
 
+	if (((attr->ia_valid & ATTR_UID) && attr->ia_uid != inode->i_uid) ||
+	    ((attr->ia_valid & ATTR_GID) && attr->ia_gid != inode->i_gid)) {
+		error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
+		if (error)
+			return error;
+	}
+
 	lock_kernel();
 	/* Write all dirty data */
 	filemap_write_and_wait(inode->i_mapping);
@@ -589,20 +753,6 @@ int nfs_getattr(struct vfsmount *mnt, st
 	int err;
 
 	/*
-	 * Flush out writes to the server in order to update c/mtime.
-	 *
-	 * Hold the i_mutex to suspend application writes temporarily;
-	 * this prevents long-running writing applications from blocking
-	 * nfs_sync_inode_wait.
-	 */
-	if (S_ISREG(inode->i_mode)) {
-		mutex_lock(&inode->i_mutex);
-		nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
-		mutex_unlock(&inode->i_mutex);
-	}
-
-
-	/*
 	 * We may force a getattr if the user cares about atime.
 	 *
 	 * Note that we only have to check the vfsmount flags here:
@@ -776,6 +926,25 @@ int nfs_release(struct inode *inode, str
 	return 0;
 }
 
+static void nfs_sync_blocks(struct inode * inode, struct nfs_fattr * fattr)
+{
+	blkcnt_t blocks;
+
+	if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0))
+		return;
+
+	if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
+		/*
+		 * report the blocks in 512byte units
+		 */
+		blocks = nfs_calc_block_size(fattr->du.nfs3.used);
+ 	}
+	if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
+		blocks = fattr->du.nfs2.blocks;
+
+	DQUOT_SYNC_BLOCKS(inode, blocks);
+}
+
 /*
  * This function is called whenever some part of NFS notices that
  * the cached attributes have to be refreshed.
@@ -826,6 +995,8 @@ __nfs_revalidate_inode(struct nfs_server
 		goto out;
 	}
 
+	nfs_sync_blocks(inode, &fattr);
+
 	if (nfsi->cache_validity & NFS_INO_INVALID_ACL)
 		nfs_zap_acl_cache(inode);
 
@@ -1097,6 +1268,8 @@ int nfs_refresh_inode(struct inode *inod
 	spin_lock(&inode->i_lock);
 	status = nfs_refresh_inode_locked(inode, fattr);
 	spin_unlock(&inode->i_lock);
+	if (status == 0)
+		nfs_sync_blocks(inode, fattr);
 	return status;
 }
 
@@ -1126,7 +1299,12 @@ int nfs_post_op_update_inode(struct inod
 
 	spin_lock(&inode->i_lock);
 	status = nfs_post_op_update_inode_locked(inode, fattr);
-	spin_unlock(&inode->i_lock);
+	if ((fattr->valid & NFS_ATTR_FATTR) != 0) {
+		spin_unlock(&inode->i_lock);
+		if (status == 0)
+			nfs_sync_blocks(inode, fattr);
+	} else
+		spin_unlock(&inode->i_lock);
 	return status;
 }
 
@@ -1318,15 +1496,6 @@ static int nfs_update_inode(struct inode
 		}
 	}
 
-	if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
-		/*
-		 * report the blocks in 512byte units
-		 */
-		inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
- 	}
-	if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
-		inode->i_blocks = fattr->du.nfs2.blocks;
-
 	/* Update attrtimeo value if we're out of the unstable period */
 	if (invalid & NFS_INO_INVALID_ATTR) {
 		nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
@@ -1505,6 +1674,10 @@ static int __init init_nfs_fs(void)
 
 	err = nfsiod_start();
 	if (err)
+		goto out7;
+
+	err = rpciod_up();
+	if (err)
 		goto out6;
 
 	err = nfs_fs_proc_init();
@@ -1553,13 +1726,17 @@ out3:
 out4:
 	nfs_fs_proc_exit();
 out5:
-	nfsiod_stop();
+	rpciod_down();
 out6:
+	nfsiod_stop();
+out7:
 	return err;
 }
 
 static void __exit exit_nfs_fs(void)
 {
+	rpciod_down();
+
 	nfs_destroy_directcache();
 	nfs_destroy_writepagecache();
 	nfs_destroy_readpagecache();
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/internal.h kernel-2.6.18-417.el5-028stab121/fs/nfs/internal.h
--- kernel-2.6.18-417.el5.orig/fs/nfs/internal.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/internal.h	2017-01-13 08:40:24.000000000 -0500
@@ -126,10 +126,24 @@ extern struct inode *nfs_alloc_inode(str
 extern void nfs_destroy_inode(struct inode *);
 extern int nfs_write_inode(struct inode *,int);
 extern void nfs_clear_inode(struct inode *);
+extern void nfs_delete_inode(struct inode *);
 #ifdef CONFIG_NFS_V4
 extern void nfs4_clear_inode(struct inode *);
 #endif
 
+struct nfs_dq_info
+{
+	struct inode * inode;
+	struct inode * dir;
+	struct dentry * d;
+};
+
+int nfs_dq_init(struct inode * dir, struct nfs_dq_info *, int check_space);
+void nfs_dq_destroy(struct nfs_dq_info * dqi);
+int nfs_dq_check_space(struct inode *inode, pgoff_t index);
+void nfs_dq_empty(struct inode * dir, struct dentry * de, struct nfs_dq_info *dqi);
+
+
 /* super.c */
 extern struct file_system_type nfs_xdev_fs_type;
 #ifdef CONFIG_NFS_V4
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/namespace.c kernel-2.6.18-417.el5-028stab121/fs/nfs/namespace.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/namespace.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/namespace.c	2017-01-13 08:40:23.000000000 -0500
@@ -155,11 +155,22 @@ out:
 	return mnt;
 }
 
+static struct dentry *nfs_fake_lookup(struct inode *vdir, struct dentry *vde,
+		struct nameidata *nd)
+{
+	/* Unfortunately, nothing can be done except this to prevent
+	   OOPS in VEFS code accessing NFS submount
+	*/
+	return ERR_PTR(-ENOENT);
+}
+
 struct inode_operations nfs_mountpoint_inode_operations = {
 	.getattr	= nfs_getattr,
+	.lookup		= nfs_fake_lookup,
 };
 
 struct inode_operations nfs_referral_inode_operations = {
+	.lookup		= nfs_fake_lookup,
 };
 
 static void nfs_expire_automounts(void *data)
@@ -185,7 +196,7 @@ static struct vfsmount *nfs_do_clone_mou
 					   struct nfs_clone_mount *mountdata)
 {
 #ifdef CONFIG_NFS_V4
-	struct vfsmount *mnt = NULL;
+	struct vfsmount *mnt = ERR_PTR(-EINVAL);
 	switch (server->nfs_client->cl_nfsversion) {
 		case 2:
 		case 3:
@@ -228,7 +239,7 @@ struct vfsmount *nfs_do_submount(const s
 	if (mountdata == NULL)
 		goto free_page;
 
-	mountdata->sb = mnt_parent->mnt_sb;
+	mountdata->sb = dentry->d_inode->i_sb;
 	mountdata->dentry = dentry;
 	mountdata->fh = fh;
 	mountdata->fattr = fattr;
@@ -237,7 +248,8 @@ struct vfsmount *nfs_do_submount(const s
 	mnt = (struct vfsmount *)devname;
 	if (IS_ERR(devname))
 		goto free_mountdata;
-	mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, mountdata);
+	mnt = nfs_do_clone_mount(NFS_SERVER(dentry->d_inode),
+				devname, mountdata);
 free_mountdata:
 	free_page((unsigned long)mountdata);
 free_page:
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/nfs3proc.c kernel-2.6.18-417.el5-028stab121/fs/nfs/nfs3proc.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/nfs3proc.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/nfs3proc.c	2017-01-13 08:40:23.000000000 -0500
@@ -18,6 +18,7 @@
 #include <linux/lockd/bind.h>
 #include <linux/smp_lock.h>
 #include <linux/nfs_mount.h>
+#include <linux/quotaops.h>
 
 #include "iostat.h"
 #include "internal.h"
@@ -349,6 +350,7 @@ nfs3_proc_create(struct inode *dir, stru
 	};
 	mode_t mode = sattr->ia_mode;
 	int status;
+	struct nfs_dq_info qi;
 
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	arg.createmode = NFS3_CREATE_UNCHECKED;
@@ -360,6 +362,11 @@ nfs3_proc_create(struct inode *dir, stru
 
 	sattr->ia_mode &= ~current->fs->umask;
 
+	status = nfs_dq_init(dir, &qi, 0);
+	if (status)
+		return status;
+	qi.d = dentry;
+
 again:
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
@@ -385,7 +392,7 @@ again:
 	}
 
 	if (status == 0)
-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+		status = nfs_instantiate(dentry, &fhandle, &fattr, &qi);
 	if (status != 0)
 		goto out;
 
@@ -413,6 +420,7 @@ again:
 	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
 out:
 	dprintk("NFS reply create: %d\n", status);
+	nfs_dq_destroy(&qi);
 	return status;
 }
 
@@ -452,8 +460,10 @@ nfs3_proc_unlink_done(struct rpc_task *t
 	struct nfs_removeres *res;
 	if (nfs3_async_handle_jukebox(task, dir))
 		return 0;
-	res = task->tk_msg.rpc_resp;
-	nfs_post_op_update_inode(dir, &res->dir_attr);
+	if (task->tk_status >= 0) {
+		res = task->tk_msg.rpc_resp;
+		nfs_post_op_update_inode(dir, &res->dir_attr);
+	}
 	return 1;
 }
 
@@ -537,6 +547,8 @@ nfs3_proc_link(struct inode *inode, stru
 	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
+	if (status == 0)
+		inode->i_nlink++;
 	nfs_post_op_update_inode(inode, &fattr);
 	dprintk("NFS reply link: %d\n", status);
 	return status;
@@ -567,21 +579,28 @@ nfs3_proc_symlink(struct inode *dir, str
 		.rpc_resp	= &res,
 	};
 	int			status;
+	struct nfs_dq_info qi;
 
 	if (len > NFS3_MAXPATHLEN)
 		return -ENAMETOOLONG;
 
 	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
 
+	status = nfs_dq_init(dir, &qi, 1);
+	if (status)
+		return status;
+	qi.d = dentry;
+
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
-	status = nfs_instantiate(dentry, &fhandle, &fattr);
+	status = nfs_instantiate(dentry, &fhandle, &fattr, &qi);
 out:
 	dprintk("NFS reply symlink: %d\n", status);
+	nfs_dq_destroy(&qi);
 	return status;
 }
 
@@ -608,9 +627,15 @@ nfs3_proc_mkdir(struct inode *dir, struc
 	};
 	int mode = sattr->ia_mode;
 	int status;
+	struct nfs_dq_info qi;
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
 
+	status = nfs_dq_init(dir, &qi, 1);
+	if (status)
+		return status;
+	qi.d = dentry;
+
 	sattr->ia_mode &= ~current->fs->umask;
 
 	nfs_fattr_init(&dir_attr);
@@ -619,12 +644,13 @@ nfs3_proc_mkdir(struct inode *dir, struc
 	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
-	status = nfs_instantiate(dentry, &fhandle, &fattr);
+	status = nfs_instantiate(dentry, &fhandle, &fattr, &qi);
 	if (status != 0)
 		goto out;
 	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
 out:
 	dprintk("NFS reply mkdir: %d\n", status);
+	nfs_dq_destroy(&qi);
 	return status;
 }
 
@@ -733,6 +759,7 @@ nfs3_proc_mknod(struct inode *dir, struc
 	};
 	mode_t mode = sattr->ia_mode;
 	int status;
+	struct nfs_dq_info qi;
 
 	switch (sattr->ia_mode & S_IFMT) {
 	case S_IFBLK:	arg.type = NF3BLK;  break;
@@ -747,18 +774,24 @@ nfs3_proc_mknod(struct inode *dir, struc
 
 	sattr->ia_mode &= ~current->fs->umask;
 
+	status = nfs_dq_init(dir, &qi, 0);
+	if (status)
+		return status;
+	qi.d = dentry;
+
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
-	status = nfs_instantiate(dentry, &fh, &fattr);
+	status = nfs_instantiate(dentry, &fh, &fattr, &qi);
 	if (status != 0)
 		goto out;
 	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
 out:
 	dprintk("NFS reply mknod: %d\n", status);
+	nfs_dq_destroy(&qi);
 	return status;
 }
 
@@ -821,8 +854,10 @@ static int nfs3_read_done(struct rpc_tas
 	if (nfs3_async_handle_jukebox(task, data->inode))
 		return -EAGAIN;
 
-	nfs_invalidate_atime(data->inode);
-	nfs_refresh_inode(data->inode, &data->fattr);
+	if (task->tk_status >= 0) {
+		nfs_invalidate_atime(data->inode);
+		nfs_refresh_inode(data->inode, &data->fattr);
+	}
 	return 0;
 }
 
@@ -871,7 +906,8 @@ static int nfs3_commit_done(struct rpc_t
 {
 	if (nfs3_async_handle_jukebox(task, data->inode))
 		return -EAGAIN;
-	nfs_refresh_inode(data->inode, data->res.fattr);
+	if (task->tk_status >= 0)
+		nfs_refresh_inode(data->inode, data->res.fattr);
 	return 0;
 }
 
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/nfs4proc.c kernel-2.6.18-417.el5-028stab121/fs/nfs/nfs4proc.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/nfs4proc.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/nfs4proc.c	2017-01-13 08:40:22.000000000 -0500
@@ -358,7 +358,7 @@ static struct nfs4_state *nfs4_opendata_
 
 	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
 		goto out;
-	inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
+	inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr, 0);
 	if (IS_ERR(inode))
 		goto out;
 	state = nfs4_get_open_state(inode, data->owner);
@@ -2228,7 +2228,7 @@ static int _nfs4_proc_symlink(struct ino
 	if (!status) {
 		update_changeattr(dir, &res.dir_cinfo);
 		nfs_post_op_update_inode(dir, res.dir_fattr);
-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+		status = nfs_instantiate(dentry, &fhandle, &fattr, NULL);
 	}
 	return status;
 }
@@ -2281,7 +2281,7 @@ static int _nfs4_proc_mkdir(struct inode
 	if (!status) {
 		update_changeattr(dir, &res.dir_cinfo);
 		nfs_post_op_update_inode(dir, res.dir_fattr);
-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+		status = nfs_instantiate(dentry, &fhandle, &fattr, NULL);
 	}
 	return status;
 }
@@ -2402,7 +2402,7 @@ static int _nfs4_proc_mknod(struct inode
 	if (status == 0) {
 		update_changeattr(dir, &res.dir_cinfo);
 		nfs_post_op_update_inode(dir, res.dir_fattr);
-		status = nfs_instantiate(dentry, &fh, &fattr);
+		status = nfs_instantiate(dentry, &fh, &fattr, NULL);
 	}
 	return status;
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/nfsroot.c kernel-2.6.18-417.el5-028stab121/fs/nfs/nfsroot.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/nfsroot.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/nfsroot.c	2017-01-13 08:40:15.000000000 -0500
@@ -327,7 +327,7 @@ static int __init root_nfs_name(char *na
 	/* Override them by options set on kernel command-line */
 	root_nfs_parse(name, buf);
 
-	cp = system_utsname.nodename;
+	cp = utsname()->nodename;
 	if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
 		printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
 		return -1;
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/proc.c kernel-2.6.18-417.el5-028stab121/fs/nfs/proc.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/proc.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/proc.c	2017-01-13 08:40:22.000000000 -0500
@@ -270,7 +270,7 @@ nfs_proc_create(struct inode *dir, struc
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	if (status == 0)
-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+		status = nfs_instantiate(dentry, &fhandle, &fattr, NULL);
 	dprintk("NFS reply create: %d\n", status);
 	return status;
 }
@@ -322,7 +322,7 @@ nfs_proc_mknod(struct inode *dir, struct
 		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	}
 	if (status == 0)
-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+		status = nfs_instantiate(dentry, &fhandle, &fattr, NULL);
 	dprintk("NFS reply mknod: %d\n", status);
 	return status;
 }
@@ -459,7 +459,7 @@ nfs_proc_symlink(struct inode *dir, stru
 	if (status == 0) {
 		nfs_fattr_init(&fattr);
 		fhandle.size = 0;
-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+		status = nfs_instantiate(dentry, &fhandle, &fattr, NULL);
 	}
 
 	dprintk("NFS reply symlink: %d\n", status);
@@ -493,7 +493,7 @@ nfs_proc_mkdir(struct inode *dir, struct
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	if (status == 0)
-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+		status = nfs_instantiate(dentry, &fhandle, &fattr, NULL);
 	dprintk("NFS reply mkdir: %d\n", status);
 	return status;
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/read.c kernel-2.6.18-417.el5-028stab121/fs/nfs/read.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/read.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/read.c	2017-01-13 08:40:22.000000000 -0500
@@ -639,7 +639,7 @@ int nfs_readpage(struct file *file, stru
 			goto out_error;
 	} else
 		ctx = get_nfs_open_context((struct nfs_open_context *)
-				file->private_data);
+				file_private(file));
 	if (!IS_SYNC(inode)) {
 		error = nfs_readpage_async(ctx, inode, page);
 		goto out;
@@ -709,7 +709,7 @@ int nfs_readpages(struct file *filp, str
 			return -EBADF;
 	} else
 		desc.ctx = get_nfs_open_context((struct nfs_open_context *)
-				filp->private_data);
+				file_private(filp));
 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 	if (!list_empty(&head)) {
 		int err = nfs_pagein_list(&head, server->rpages);
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/super.c kernel-2.6.18-417.el5-028stab121/fs/nfs/super.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/super.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/super.c	2017-01-13 08:40:27.000000000 -0500
@@ -44,6 +44,10 @@
 #include <linux/vfs.h>
 #include <linux/inet.h>
 #include <linux/nfs_xdr.h>
+#include <linux/ve_proto.h>
+#include <linux/vzcalluser.h>
+#include <linux/ve_nfs.h>
+#include <linux/writeback.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -70,7 +74,8 @@ static struct file_system_type nfs_fs_ty
 	.name		= "nfs",
 	.get_sb		= nfs_get_sb,
 	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|
+			  FS_BINARY_MOUNTDATA|FS_VIRTUALIZED,
 };
 
 struct file_system_type nfs_xdev_fs_type = {
@@ -78,7 +83,8 @@ struct file_system_type nfs_xdev_fs_type
 	.name		= "nfs",
 	.get_sb		= nfs_xdev_get_sb,
 	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|
+			  FS_BINARY_MOUNTDATA|FS_VIRTUALIZED,
 };
 
 static struct super_operations nfs_sops = {
@@ -87,6 +93,7 @@ static struct super_operations nfs_sops 
 	.write_inode	= nfs_write_inode,
 	.statfs		= nfs_statfs,
 	.clear_inode	= nfs_clear_inode,
+	.delete_inode	= nfs_delete_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
 	.show_stats	= nfs_show_stats,
@@ -139,6 +146,119 @@ static struct super_operations nfs4_sops
 
 static struct shrinker *acl_shrinker;
 
+#ifdef CONFIG_VE
+static int ve_nfs_start(void *data)
+{
+	return 0;
+}
+
+inline int is_nfs_automount(struct vfsmount *mnt)
+{
+	struct vfsmount *submnt;
+
+	spin_lock(&vfsmount_lock);
+	list_for_each_entry(submnt, &nfs_automount_list, mnt_expire) {
+		if (mnt == submnt) {
+			spin_unlock(&vfsmount_lock);
+			return 1;
+		}
+	}
+	spin_unlock(&vfsmount_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(is_nfs_automount);
+
+int ve_nfs_sync(struct ve_struct *env, int wait)
+{
+	struct super_block *sb;
+
+	spin_lock(&sb_lock);
+rescan:
+	list_for_each_entry(sb, &nfs_fs_type.fs_supers, s_instances) {
+		int env_nfs = 0;
+
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+
+		down_read(&sb->s_umount);
+		if (sb->s_root) {
+			struct rpc_clnt *clnt = NFS_SB(sb)->client;
+			struct ve_struct *owner_env = clnt->cl_xprt->owner_env;
+
+			env_nfs = ve_accessible_strict(owner_env, env);
+			if (env_nfs && !clnt->cl_broken) 
+				sync_inodes_sb(sb, wait);
+		}
+		up_read(&sb->s_umount);
+
+		spin_lock(&sb_lock);
+
+		/* This logic is taken from sync_inodes()  */
+		if (__put_super_and_need_restart(sb))
+			goto rescan;
+
+		/* Check for dirty inodes to catch sync_inodes_sb() failure */
+		if (env_nfs && wait && (!list_empty(&sb->s_io) ||
+					!list_empty(&sb->s_dirty))) {
+			spin_unlock(&sb_lock);
+			return -EBUSY;
+		}
+	}
+
+	spin_unlock(&sb_lock);
+	return 0;
+}
+EXPORT_SYMBOL(ve_nfs_sync);
+
+static void ve_nfs_stop(void *data)
+{
+	struct ve_struct *ve;
+	struct super_block *sb;
+
+	flush_scheduled_work();
+
+	ve = (struct ve_struct *)data;
+	/* Basically, on a valid stop we can be here iff NFS was mounted
+	   read-only. In such a case client force-stop is not a problem.
+	   If we are here and NFS is read-write, we are in a FORCE stop, so
+	   force the client to stop.
+	   Lock daemon is already dead.
+	   Only superblock client remains. Den */
+
+	down_write(&rpc_async_task_lock);
+
+	spin_lock(&sb_lock);
+	list_for_each_entry(sb, &nfs_fs_type.fs_supers, s_instances) {
+		struct nfs_server *srv;
+		struct ve_struct *owner_env;
+
+		srv = NFS_SB(sb);
+		owner_env = srv->client->cl_xprt->owner_env;
+
+		if (ve_accessible_strict(owner_env, ve)) {
+			rpc_kill_client(srv->client);
+			rpc_kill_client(srv->client_acl);
+		}
+	}
+	spin_unlock(&sb_lock);
+
+	/* Make sure no async RPC task is in progress */
+	up_write(&rpc_async_task_lock);
+
+	umount_ve_fs_type(&nfs_fs_type, ve->veid);
+
+	flush_scheduled_work();
+}
+
+static struct ve_hook nfs_hook = {
+	.init	  = ve_nfs_start,
+	.fini	  = ve_nfs_stop,
+	.owner	  = THIS_MODULE,
+	.priority = HOOK_PRIO_NET_POST,
+};
+#endif
+
 /*
  * Register the NFS filesystems
  */
@@ -159,6 +279,7 @@ int __init register_nfs_fs(void)
 		goto error_2;
 #endif
 	acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
+	ve_hook_register(VE_INIT_EXIT_CHAIN, &nfs_hook);
 	return 0;
 
 #ifdef CONFIG_NFS_V4
@@ -176,6 +297,7 @@ error_0:
  */
 void __exit unregister_nfs_fs(void)
 {
+	ve_hook_unregister(&nfs_hook);
 	if (acl_shrinker != NULL)
 		remove_shrinker(acl_shrinker);
 #ifdef CONFIG_NFS_V4
@@ -515,6 +637,9 @@ static int nfs_validate_mount_data(struc
 		return -EPROTONOSUPPORT;
 	}
 #endif /* CONFIG_NFS_V3 */
+	/* Set flavor explicitly because of SuSe mount specific -dmon */
+	if (!(data->flags & NFS_MOUNT_SECFLAVOUR) && !data->pseudoflavor)
+		data->pseudoflavor = RPC_AUTH_UNIX;
 
 	/* We now require that the mount process passes the remote address */
 	if (data->addr.sin_addr.s_addr == INADDR_ANY) {
@@ -523,6 +648,11 @@ static int nfs_validate_mount_data(struc
 		return -EINVAL;
 	}
 
+	if (!(data->flags & NFS_MOUNT_VER3)) {
+		printk("NFSv2 is broken and not supported\n");
+		return -EPROTONOSUPPORT;
+	}
+
 	/* Prepare the root filehandle */
 	if (data->flags & NFS_MOUNT_VER3)
 		mntfh->size = data->root.size;
@@ -671,6 +801,9 @@ static int nfs_compare_super(struct supe
 	struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
 	int mntflags = sb_mntdata->mntflags;
 
+	if (!ve_accessible_strict(old->client->cl_xprt->owner_env,
+				  get_exec_env()))
+		return 0;
 	if (memcmp(&old->nfs_client->cl_addr,
 				&server->nfs_client->cl_addr,
 				sizeof(old->nfs_client->cl_addr)) != 0)
@@ -697,6 +830,11 @@ static int nfs_get_sb(struct file_system
 		.err_on_noshare = 0,
 	};
 	int error;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+	if (!(ve->features & VE_FEATURE_NFS))
+		return -ENODEV;
 
 	/* Validate the mount data */
 	error = nfs_validate_mount_data(data, &mntfh);
@@ -794,11 +932,15 @@ static int nfs_xdev_get_sb(struct file_s
 		.err_on_noshare = 0,
 	};
 	int error;
+	struct ve_struct *old_ve;
 
 	dprintk("--> nfs_xdev_get_sb()\n");
 
+	server = NFS_SB(data->sb);
+	old_ve = set_exec_env(server->owner_env);
+
 	/* create a new volume representation */
-	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+	server = nfs_clone_server(server, data->fh, data->fattr);
 	if (IS_ERR(server)) {
 		error = PTR_ERR(server);
 		goto out_err_noserver;
@@ -847,18 +989,22 @@ static int nfs_xdev_get_sb(struct file_s
 	mnt->mnt_sb = s;
 	mnt->mnt_root = mntroot;
 
+	(void)set_exec_env(old_ve);
+
 	dprintk("<-- nfs_xdev_get_sb() = 0\n");
 	return 0;
 
 out_err_nosb:
 	nfs_free_server(server);
 out_err_noserver:
+	(void)set_exec_env(old_ve);
 	dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
 	return error;
 
 error_splat_super:
 	up_write(&s->s_umount);
 	deactivate_super(s);
+	(void)set_exec_env(old_ve);
 	dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
 	return error;
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/unlink.c kernel-2.6.18-417.el5-028stab121/fs/nfs/unlink.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/unlink.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/unlink.c	2017-01-13 08:40:41.000000000 -0500
@@ -22,14 +22,6 @@
 #include "iostat.h"
 #include "delegation.h"
 
-struct nfs_unlinkdata {
-	struct hlist_node list;
-	struct nfs_removeargs args;
-	struct nfs_removeres res;
-	struct inode *dir;
-	struct rpc_cred	*cred;
-};
-
 /**
  * nfs_free_unlinkdata - release data from a sillydelete operation.
  * @data: pointer to unlink structure.
@@ -119,10 +111,11 @@ static void nfs_async_unlink_done(struct
 static void nfs_async_unlink_release(void *calldata)
 {
 	struct nfs_unlinkdata	*data = calldata;
+	struct super_block *sb = data->dir->i_sb;
 
 	nfs_dec_sillycount(data->dir);
-	nfs_sb_deactive(NFS_SERVER(data->dir));
 	nfs_free_unlinkdata(data);
+	nfs_sb_deactive(NFS_SB(sb));
 }
 
 static const struct rpc_call_ops nfs_unlink_ops = {
diff -upr kernel-2.6.18-417.el5.orig/fs/nfs/write.c kernel-2.6.18-417.el5-028stab121/fs/nfs/write.c
--- kernel-2.6.18-417.el5.orig/fs/nfs/write.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfs/write.c	2017-01-13 08:40:40.000000000 -0500
@@ -61,6 +61,7 @@
 #include <linux/nfs_page.h>
 #include <asm/uaccess.h>
 #include <linux/smp_lock.h>
+#include <linux/task_io_accounting_ops.h>
 
 #include "delegation.h"
 #include "iostat.h"
@@ -265,6 +266,8 @@ static int nfs_writepage_sync(struct nfs
 		return -ENOMEM;
 	}
 
+	task_io_account_write(count);
+
 	wdata->flags = how;
 	wdata->cred = ctx->cred;
 	wdata->inode = inode;
@@ -316,7 +319,7 @@ static int nfs_writepage_sync(struct nfs
 io_error:
 	nfs_end_page_writeback(page);
 	nfs_put_lock_context(wdata->args.lock_context);
-	nfs_writedata_release(wdata);
+	nfs_writedata_free(wdata);
 	return written ? written : result;
 }
 
@@ -389,13 +392,17 @@ int nfs_writepage(struct page *page, str
 	if (page->index >= end_index+1 || !offset)
 		goto out;
 do_it:
+	err = nfs_dq_check_space(inode, page->index);
+	if (err)
+		goto out;
+
 	ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE);
 	if (ctx == NULL) {
 		err = -EBADF;
 		goto out;
 	}
 	lock_kernel();
-	if (!IS_SYNC(inode) && inode_referenced) {
+	if (!IS_SYNC(inode) && inode_referenced && !wbc->force_sync_io) {
 		err = nfs_writepage_async(ctx, inode, page, 0, offset);
 		if (!wbc->for_writepages)
 			nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
@@ -782,6 +789,7 @@ static struct nfs_page * nfs_update_requ
 			}
 			spin_unlock(&nfsi->req_lock);
 			nfs_mark_request_dirty(new);
+			task_io_account_write(bytes);
 			return new;
 		}
 		spin_unlock(&nfsi->req_lock);
@@ -828,13 +836,16 @@ static struct nfs_page * nfs_update_requ
 
 	/* Okay, the request matches. Update the region */
 	if (offset < req->wb_offset) {
+		task_io_account_write(req->wb_offset - offset);
 		req->wb_offset = offset;
 		req->wb_pgbase = offset;
 		req->wb_bytes = rqend - req->wb_offset;
 	}
 
-	if (end > rqend)
+	if (end > rqend) {
+		task_io_account_write(end - rqend);
 		req->wb_bytes = end - req->wb_offset;
+	}
 
 	return req;
 }
@@ -1103,7 +1114,7 @@ out_bad:
 	while (!list_empty(&list)) {
 		data = list_entry(list.next, struct nfs_write_data, pages);
 		list_del(&data->pages);
-		nfs_writedata_release(data);
+		nfs_writedata_free(data);
 	}
 	nfs_mark_request_dirty(req);
 	nfs_clear_page_writeback(req);
diff -upr kernel-2.6.18-417.el5.orig/fs/nfsd/export.c kernel-2.6.18-417.el5-028stab121/fs/nfsd/export.c
--- kernel-2.6.18-417.el5.orig/fs/nfsd/export.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfsd/export.c	2017-01-13 08:40:24.000000000 -0500
@@ -27,6 +27,7 @@
 #include <linux/mount.h>
 #include <linux/hash.h>
 #include <linux/module.h>
+#include <linux/quotaops.h>
 
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
@@ -57,7 +58,6 @@ static int		exp_verify_string(char *cp, 
 #define	EXPKEY_HASHBITS		8
 #define	EXPKEY_HASHMAX		(1 << EXPKEY_HASHBITS)
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
-static struct cache_head *expkey_table[EXPKEY_HASHMAX];
 
 static void expkey_put(struct kref *ref)
 {
@@ -89,7 +89,11 @@ static void expkey_request(struct cache_
 
 static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
 static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
+#ifdef CONFIG_VE
+#define svc_expkey_cache (*(get_exec_env()->nfsd_data->key_cache))
+#else
 static struct cache_detail svc_expkey_cache;
+#endif
 
 static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 {
@@ -259,10 +263,9 @@ static struct cache_head *expkey_alloc(v
 		return NULL;
 }
 
-static struct cache_detail svc_expkey_cache = {
+static struct cache_detail __svc_expkey_cache = {
 	.owner		= THIS_MODULE,
 	.hash_size	= EXPKEY_HASHMAX,
-	.hash_table	= expkey_table,
 	.name		= "nfsd.fh",
 	.cache_put	= expkey_put,
 	.cache_request	= expkey_request,
@@ -319,8 +322,6 @@ svc_expkey_update(struct svc_expkey *new
 #define	EXPORT_HASHMAX		(1<< EXPORT_HASHBITS)
 #define	EXPORT_HASHMASK		(EXPORT_HASHMAX -1)
 
-static struct cache_head *export_table[EXPORT_HASHMAX];
-
 static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
 {
 	int i;
@@ -383,7 +384,8 @@ static int check_export(struct inode *in
 	 * 2:  We must be able to find an inode from a filehandle.
 	 *       This means that s_export_op must be set.
 	 */
-	if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
+	if (!(inode->i_sb->s_type->fs_flags &
+			(FS_REQUIRES_DEV | FS_NFS_EXPORTABLE)) &&
 	    !(flags & NFSEXP_FSID)) {
 		dprintk("exp_export: export of non-dev fs without fsid");
 		return -EINVAL;
@@ -547,6 +549,8 @@ static int svc_export_parse(struct cache
 	err = path_lookup(buf, 0, &nd);
 	if (err) goto out_no_path;
 
+	DQUOT_INIT(nd.dentry->d_inode);
+
 	exp.h.flags = 0;
 	exp.ex_client = dom;
 	exp.ex_mnt = nd.mnt;
@@ -710,10 +714,9 @@ static struct cache_head *svc_export_all
 		return NULL;
 }
 
-struct cache_detail svc_export_cache = {
+struct cache_detail __svc_export_cache = {
 	.owner		= THIS_MODULE,
 	.hash_size	= EXPORT_HASHMAX,
-	.hash_table	= export_table,
 	.name		= "nfsd.export",
 	.cache_put	= svc_export_put,
 	.cache_request	= svc_export_request,
@@ -725,6 +728,26 @@ struct cache_detail svc_export_cache = {
 	.alloc		= svc_export_alloc,
 };
 
+#ifdef CONFIG_VE
+#define svc_export_cache (*(get_exec_env()->nfsd_data->exp_cache))
+void exp_put(struct svc_export *exp)
+{
+	cache_put(&exp->h, &svc_export_cache);
+}
+#else
+struct cache_detail svc_export_cache;
+#endif
+
+dev_t exp_get_dev(struct svc_export *ex)
+{
+	/*
+	 * we should return the device reported by the
+	 * stat syscall inside the container
+	 */
+
+	return ex->ex_mnt->mnt_sb->s_dev;
+}
+
 static struct svc_export *
 svc_export_lookup(struct svc_export *exp)
 {
@@ -946,8 +969,9 @@ static int exp_hash(struct auth_domain *
 {
 	u32 fsid[2];
 	struct inode *inode = exp->ex_dentry->d_inode;
-	dev_t dev = inode->i_sb->s_dev;
+	dev_t dev;
 
+	dev = exp_get_dev(exp);
 	if (old_valid_dev(dev)) {
 		mk_fsid_v0(fsid, dev, inode->i_ino);
 		return exp_set_key(clp, 0, fsid, exp);
@@ -960,8 +984,10 @@ static void exp_unhash(struct svc_export
 {
 	struct svc_expkey *ek;
 	struct inode *inode = exp->ex_dentry->d_inode;
+	dev_t ex_dev;
 
-	ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
+	ex_dev = exp_get_dev(exp);
+	ek = exp_get_key(exp->ex_client, ex_dev, inode->i_ino);
 	if (!IS_ERR(ek)) {
 		sunrpc_invalidate(&ek->h, &svc_expkey_cache);
 		cache_put(&ek->h, &svc_expkey_cache);
@@ -1037,6 +1063,8 @@ exp_export(struct nfsctl_export *nxp)
 
 	dprintk("nfsd: creating export entry %p for client %p\n", exp, clp);
 
+	DQUOT_INIT(nd.dentry->d_inode);
+
 	new.h.expiry_time = NEVER;
 	new.h.flags = 0;
 	new.ex_path = kstrdup(nxp->ex_path, GFP_KERNEL);
@@ -1362,18 +1390,18 @@ static void *e_start(struct seq_file *m,
 	export = n & ((1LL<<32) - 1);
 
 	
-	for (ch=export_table[hash]; ch; ch=ch->next)
+	for (ch=svc_export_cache.hash_table[hash]; ch; ch=ch->next)
 		if (!export--)
 			return ch;
 	n &= ~((1LL<<32) - 1);
 	do {
 		hash++;
 		n += 1LL<<32;
-	} while(hash < EXPORT_HASHMAX && export_table[hash]==NULL);
+	} while(hash < EXPORT_HASHMAX && svc_export_cache.hash_table[hash]==NULL);
 	if (hash >= EXPORT_HASHMAX)
 		return NULL;
 	*pos = n+1;
-	return export_table[hash];
+	return svc_export_cache.hash_table[hash];
 }
 
 static void *e_next(struct seq_file *m, void *p, loff_t *pos)
@@ -1391,14 +1419,14 @@ static void *e_next(struct seq_file *m, 
 		return ch->next;
 	}
 	*pos &= ~((1LL<<32) - 1);
-	while (hash < EXPORT_HASHMAX && export_table[hash] == NULL) {
+	while (hash < EXPORT_HASHMAX && svc_export_cache.hash_table[hash] == NULL) {
 		hash++;
 		*pos += 1LL<<32;
 	}
 	if (hash >= EXPORT_HASHMAX)
 		return NULL;
 	++*pos;
-	return export_table[hash];
+	return svc_export_cache.hash_table[hash];
 }
 
 static void e_stop(struct seq_file *m, void *p)
@@ -1613,14 +1641,30 @@ exp_verify_string(char *cp, int max)
 /*
  * Initialize the exports module.
  */
-void
-nfsd_export_init(void)
+int nfsd_export_init(void)
 {
-	dprintk("nfsd: initializing export module.\n");
+	struct cache_detail *exp, *key;
+
+	exp = cache_alloc(&__svc_export_cache, EXPORT_HASHMAX);
+	if (exp == NULL)
+		goto err_exp;
 
-	cache_register(&svc_export_cache);
-	cache_register(&svc_expkey_cache);
+	cache_register(exp);
 
+	key = cache_alloc(&__svc_expkey_cache, EXPKEY_HASHMAX);
+	if (key == NULL)
+		goto err_key;
+
+	cache_register(key);
+
+	get_exec_env()->nfsd_data->exp_cache = exp;
+	get_exec_env()->nfsd_data->key_cache = key;
+	return 0;
+
+err_key:
+	cache_free(exp);
+err_exp:
+	return -ENOMEM;
 }
 
 /*
@@ -1641,17 +1685,9 @@ nfsd_export_flush(void)
 void
 nfsd_export_shutdown(void)
 {
-
-	dprintk("nfsd: shutting down export module.\n");
-
 	exp_writelock();
-
-	if (cache_unregister(&svc_expkey_cache))
-		printk(KERN_ERR "nfsd: failed to unregister expkey cache\n");
-	if (cache_unregister(&svc_export_cache))
-		printk(KERN_ERR "nfsd: failed to unregister export cache\n");
+	cache_free(get_exec_env()->nfsd_data->exp_cache);
+	cache_free(get_exec_env()->nfsd_data->key_cache);
 	svcauth_unix_purge();
-
 	exp_writeunlock();
-	dprintk("nfsd: export shutdown complete.\n");
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/nfsd/nfs3xdr.c kernel-2.6.18-417.el5-028stab121/fs/nfsd/nfs3xdr.c
--- kernel-2.6.18-417.el5.orig/fs/nfsd/nfs3xdr.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfsd/nfs3xdr.c	2017-01-13 08:40:23.000000000 -0500
@@ -179,6 +179,7 @@ encode_fattr3(struct svc_rqst *rqstp, u3
 		p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat->dev));
 	p = xdr_encode_hyper(p, (u64) stat->ino);
 	p = encode_time3(p, &stat->atime);
+	time = stat->mtime;
 	lease_get_mtime(dentry->d_inode, &time); 
 	p = encode_time3(p, &time);
 	p = encode_time3(p, &stat->ctime);
diff -upr kernel-2.6.18-417.el5.orig/fs/nfsd/nfsctl.c kernel-2.6.18-417.el5-028stab121/fs/nfsd/nfsctl.c
--- kernel-2.6.18-417.el5.orig/fs/nfsd/nfsctl.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfsd/nfsctl.c	2017-01-13 08:40:23.000000000 -0500
@@ -37,6 +37,9 @@
 #include <linux/nfsd/syscall.h>
 #include <linux/nfsd/interface.h>
 
+#include <linux/ve_proto.h>
+#include <linux/vzcalluser.h>
+
 #include <asm/uaccess.h>
 
 int nfsd_port = 2049;
@@ -613,42 +616,165 @@ static struct file_system_type nfsd_fs_t
 	.kill_sb	= kill_litter_super,
 };
 
+static int nfsd_init_proc(void)
+{
+	struct proc_dir_entry *de;
+
+	if (!proc_mkdir("fs/nfs", NULL)) {
+		printk("Can't create \"fs/nfs\"\n");
+		goto err_proc_nfs;
+	}
+
+	de = create_proc_entry("fs/nfs/exports", 0, NULL);
+	if (de == NULL) {
+		printk("Can't create exports\n");
+		goto err_proc_exp;
+	}
+
+	de->proc_fops = &exports_operations;
+	return 0;
+
+err_proc_exp:
+	remove_proc_entry("fs/nfs", NULL);
+err_proc_nfs:
+	return -ENOMEM;
+}
+
+static void nfsd_exit_proc(void)
+{
+	remove_proc_entry("fs/nfs/exports", NULL);
+	remove_proc_entry("fs/nfs", NULL);
+}
+
+static int ve_init_nfsctl(void *data)
+{
+	struct ve_struct *ve = data;
+	struct ve_nfsd_data *d;
+	int err = -ENOMEM;
+
+	if (!(ve->features & VE_FEATURE_NFSD))
+		return 0;
+
+	d = kzalloc(sizeof(struct ve_nfsd_data), GFP_KERNEL);
+	if (d == NULL)
+		goto err_data;
+
+	INIT_LIST_HEAD(&d->nfsd_list);
+	ve->nfsd_data = d;
+
+	err = nfsd_init_proc();
+	if (err)
+		goto err_proc;
+
+	err = nfsd_export_init();
+	if (err)
+		goto err_exp;
+
+	err = nfsd_stat_init();
+	if (err)
+		goto err_stat;
+
+	if (!ve_is_super(ve)) {
+		err = register_ve_fs_type(ve, &nfsd_fs_type, &d->nfsd_fs, NULL);
+		if (err) {
+			printk("Can't register nfsdfs\n");
+			goto err_nfsdfs;
+		}
+	}
+
+	return 0;
+
+err_nfsdfs:
+	nfsd_stat_shutdown();
+err_stat:
+	nfsd_export_shutdown();
+err_exp:
+	nfsd_exit_proc();
+err_proc:
+	kfree(d);
+err_data:
+	return err;
+}
+
+static void ve_exit_nfsctl(void *data)
+{
+	struct ve_struct *ve = data;
+	struct ve_nfsd_data *d = ve->nfsd_data;
+
+	if (d == NULL)
+		return;
+
+	nfsd_stat_shutdown();
+
+	if (!ve_is_super(ve))
+		unregister_ve_fs_type(d->nfsd_fs, NULL);
+
+	nfsd_kill_all(&d->nfsd_list);
+	nfsd_export_shutdown();
+	nfsd_exit_proc();
+
+	ve->nfsd_data = NULL;
+	kfree(d);
+}
+
+static struct ve_hook nfsd_ctl_hook = {
+	.init = ve_init_nfsctl,
+	.fini = ve_exit_nfsctl,
+	.owner	  = THIS_MODULE,
+	.priority = HOOK_PRIO_NET_POST,
+};
+
+int report_stale = 0;
+static struct ctl_table_header *nfs_ctl;
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 9475,
+		.procname	= "nfs_stale",
+		.data		= &report_stale,
+		.maxlen		= sizeof(report_stale),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table,
+	},
+	{ .ctl_name = 0 }
+};
+
 static int __init init_nfsd(void)
 {
 	int retval;
 	printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
 
-	nfsd_stat_init();	/* Statistics */
 	nfsd_cache_init();	/* RPC reply cache */
-	nfsd_export_init();	/* Exports table */
 	nfsd_lockd_init();	/* lockd->nfsd callbacks */
 	nfs4_state_init();	/* NFSv4 locking state */
 	nfsd_idmap_init();      /* Name to ID mapping */
-	if (proc_mkdir("fs/nfs", NULL)) {
-		struct proc_dir_entry *entry;
-		entry = create_proc_entry("fs/nfs/exports", 0, NULL);
-		if (entry)
-			entry->proc_fops =  &exports_operations;
-	}
+	ve_hook_register(VE_SS_CHAIN, &nfsd_ctl_hook);
+	ve_init_nfsctl(get_ve0());
 	retval = register_filesystem(&nfsd_fs_type);
 	if (retval) {
-		nfsd_export_shutdown();
+		ve_exit_nfsctl(get_ve0());
 		nfsd_cache_shutdown();
-		remove_proc_entry("fs/nfs/exports", NULL);
-		remove_proc_entry("fs/nfs", NULL);
-		nfsd_stat_shutdown();
 		nfsd_lockd_shutdown();
 	}
+
+	nfs_ctl = register_sysctl_table(root_table, 0);
 	return retval;
 }
 
 static void __exit exit_nfsd(void)
 {
-	nfsd_export_shutdown();
+	unregister_sysctl_table(nfs_ctl);
 	nfsd_cache_shutdown();
-	remove_proc_entry("fs/nfs/exports", NULL);
-	remove_proc_entry("fs/nfs", NULL);
-	nfsd_stat_shutdown();
+	ve_hook_unregister(&nfsd_ctl_hook);
+	ve_exit_nfsctl(get_ve0());
 	nfsd_lockd_shutdown();
 	nfsd_idmap_shutdown();
 	unregister_filesystem(&nfsd_fs_type);
diff -upr kernel-2.6.18-417.el5.orig/fs/nfsd/nfsfh.c kernel-2.6.18-417.el5-028stab121/fs/nfsd/nfsfh.c
--- kernel-2.6.18-417.el5.orig/fs/nfsd/nfsfh.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfsd/nfsfh.c	2017-01-13 08:40:23.000000000 -0500
@@ -296,8 +296,16 @@ skip_pseudoflavor_check:
 out:
 	if (exp && !IS_ERR(exp))
 		exp_put(exp);
-	if (error == nfserr_stale)
+	if (error == nfserr_stale) {
+		extern int report_stale;
+
+		if (report_stale) {
+			printk("%s: return STALE in %d\n", __func__, get_exec_env()->veid);
+			dump_stack();
+		}
+
 		nfsdstats.fh_stale++;
+	}
 	return error;
 }
 
@@ -353,7 +361,9 @@ fh_compose(struct svc_fh *fhp, struct sv
 	struct inode * inode = dentry->d_inode;
 	struct dentry *parent = dentry->d_parent;
 	__u32 *datap;
-	dev_t ex_dev = exp->ex_dentry->d_inode->i_sb->s_dev;
+	dev_t ex_dev;
+
+	ex_dev = exp_get_dev(exp);
 
 	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
 		MAJOR(ex_dev), MINOR(ex_dev),
@@ -531,7 +541,7 @@ fh_put(struct svc_fh *fhp)
 		nfsd_nr_put++;
 	}
 	if (exp) {
-		cache_put(&exp->h, &svc_export_cache);
+		exp_put(exp);
 		fhp->fh_export = NULL;
 	}
 	return;
diff -upr kernel-2.6.18-417.el5.orig/fs/nfsd/nfssvc.c kernel-2.6.18-417.el5-028stab121/fs/nfsd/nfssvc.c
--- kernel-2.6.18-417.el5.orig/fs/nfsd/nfssvc.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfsd/nfssvc.c	2017-01-13 08:40:23.000000000 -0500
@@ -49,10 +49,8 @@
  */
 #define	SIG_NOCLEAN	SIGHUP
 
-extern struct svc_program	nfsd_program;
 static void			nfsd(struct svc_rqst *rqstp);
 struct timeval			nfssvc_boot;
-       struct svc_serv 		*nfsd_serv;
 static atomic_t			nfsd_busy;
 static unsigned long		nfsd_last_call;
 static DEFINE_SPINLOCK(nfsd_call_lock);
@@ -61,7 +59,8 @@ struct nfsd_list {
 	struct list_head 	list;
 	struct task_struct	*task;
 };
-static struct list_head nfsd_list = LIST_HEAD_INIT(nfsd_list);
+
+#define ve_nfsd_list	(get_exec_env()->nfsd_data->nfsd_list)
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
@@ -121,7 +120,6 @@ struct svc_program		nfsd_program = {
 	.pg_vers		= nfsd_versions,	/* version table */
 	.pg_name		= "nfsd",		/* program name */
 	.pg_class		= "nfsd",		/* authentication class */
-	.pg_stats		= &nfsd_svcstats,	/* version table */
 	.pg_authenticate	= &svc_set_client,	/* export authentication */
 
 };
@@ -129,7 +127,7 @@ struct svc_program		nfsd_program = {
 int nfsd_vers(int vers, enum vers_op change)
 {
 	if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
-		return -1;
+		return 0;
 	switch(change) {
 	case NFSD_SET:
 		nfsd_versions[vers] = nfsd_version[vers];
@@ -176,11 +174,8 @@ static void nfsd_last_thread(struct svc_
 	nfsd_racache_shutdown();
 	nfs4_state_shutdown();
 
-	printk(KERN_WARNING "nfsd: last server has exited\n");
-	if (killsig != SIG_NOCLEAN) {
-		printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
+	if (killsig != SIG_NOCLEAN)
 		nfsd_export_flush();
-	}
 }
 void nfsd_reset_versions(void)
 {
@@ -230,7 +225,8 @@ int nfsd_create_serv(void)
 	}
 
 	atomic_set(&nfsd_busy, 0);
-	nfsd_serv = svc_create(&nfsd_program, nfsd_max_blksize);
+	nfsd_serv = __svc_create(&nfsd_program, nfsd_max_blksize,
+			get_exec_env()->nfsd_data->svc_stat);
 	if (nfsd_serv == NULL)
 		err = -ENOMEM;
 	svc_shutdown(nfsd_serv, nfsd_last_thread);
@@ -263,11 +259,29 @@ static int nfsd_init_socks(int port)
 	return 0;
 }
 
+static void nfsd_kill(struct list_head *lh, int nrservs)
+{
+	struct list_head *victim;
+
+	victim = lh->next;
+	while (nrservs > 0 && victim != lh) {
+		struct nfsd_list *nl =
+			list_entry(victim, struct nfsd_list, list);
+		victim = victim->next;
+		send_sig(SIG_NOCLEAN, nl->task, 1);
+		nrservs--;
+	}
+}
+
+void nfsd_kill_all(struct list_head *lh)
+{
+	nfsd_kill(lh, INT_MAX);
+}
+
 int
 nfsd_svc(unsigned short port, int nrservs)
 {
 	int	error;
-	struct list_head *victim;
 	
 	lock_kernel();
 	dprintk("nfsd: creating service: port %d tcp %d udp %d\n",
@@ -307,14 +321,8 @@ nfsd_svc(unsigned short port, int nrserv
 			break;
 		}
 	}
-	victim = nfsd_list.next;
-	while (nrservs < 0 && victim != &nfsd_list) {
-		struct nfsd_list *nl =
-			list_entry(victim,struct nfsd_list, list);
-		victim = victim->next;
-		send_sig(SIG_NOCLEAN, nl->task, 1);
-		nrservs++;
-	}
+
+	nfsd_kill(&ve_nfsd_list, -nrservs);
  failure:
 	svc_destroy(nfsd_serv);		/* Release server */
  out:
@@ -382,7 +390,7 @@ nfsd(struct svc_rqst *rqstp)
 	nfsdstats.th_cnt++;
 
 	me.task = current;
-	list_add(&me.list, &nfsd_list);
+	list_add(&me.list, &ve_nfsd_list);
 
 	unlock_kernel();
 
diff -upr kernel-2.6.18-417.el5.orig/fs/nfsd/stats.c kernel-2.6.18-417.el5-028stab121/fs/nfsd/stats.c
--- kernel-2.6.18-417.el5.orig/fs/nfsd/stats.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfsd/stats.c	2017-01-13 08:40:23.000000000 -0500
@@ -35,10 +35,11 @@
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/stats.h>
 
+#ifndef CONFIG_VE
 struct nfsd_stats	nfsdstats;
-struct svc_stat		nfsd_svcstats = {
-	.program	= &nfsd_program,
-};
+#endif
+
+extern struct svc_program	nfsd_program;
 
 static int nfsd_proc_show(struct seq_file *seq, void *v)
 {
@@ -70,7 +71,7 @@ static int nfsd_proc_show(struct seq_fil
 	seq_putc(seq, '\n');
 	
 	/* show my rpc info */
-	svc_seq_show(seq, &nfsd_svcstats);
+	svc_seq_show(seq, get_exec_env()->nfsd_data->svc_stat);
 
 #ifdef CONFIG_NFSD_V4
 	/* Show count for individual nfsv4 operations */
@@ -98,14 +99,27 @@ static const struct file_operations nfsd
 	.release = single_release,
 };
 
-void
+int
 nfsd_stat_init(void)
 {
-	svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops);
+	struct ve_nfsd_data *d;
+
+	d = get_exec_env()->nfsd_data;
+	d->svc_stat = kzalloc(sizeof(struct svc_stat), GFP_KERNEL);
+	if (d->svc_stat == NULL)
+		return -ENOMEM;
+
+	d->svc_stat->program = &nfsd_program;
+	svc_proc_register(d->svc_stat, &nfsd_proc_fops);
+	return 0;
 }
 
 void
 nfsd_stat_shutdown(void)
 {
+	struct ve_nfsd_data *d;
+
+	d = get_exec_env()->nfsd_data;
 	svc_proc_unregister("nfsd");
+	kfree(d->svc_stat);
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/nfsd/vfs.c kernel-2.6.18-417.el5-028stab121/fs/nfsd/vfs.c
--- kernel-2.6.18-417.el5.orig/fs/nfsd/vfs.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/nfsd/vfs.c	2017-01-13 08:40:24.000000000 -0500
@@ -56,6 +56,8 @@
 #endif /* CONFIG_NFSD_V4 */
 #include <linux/jhash.h>
 
+#include <linux/vzquota.h>
+
 #include <asm/uaccess.h>
 
 #define NFSDDBG_FACILITY		NFSDDBG_FILEOP
@@ -77,25 +79,10 @@
  * If you increase the number of cached files very much, you'll need to
  * add a hash table here.
  */
-struct raparms {
-	struct raparms		*p_next;
-	unsigned int		p_count;
-	ino_t			p_ino;
-	dev_t			p_dev;
-	int			p_set;
-	struct file_ra_state	p_ra;
-	unsigned int		p_hindex;
-};
 
-struct raparm_hbucket {
-	struct raparms		*pb_head;
-	spinlock_t		pb_lock;
-} ____cacheline_aligned_in_smp;
-
-#define RAPARM_HASH_BITS	4
-#define RAPARM_HASH_SIZE	(1<<RAPARM_HASH_BITS)
-#define RAPARM_HASH_MASK	(RAPARM_HASH_SIZE-1)
-static struct raparm_hbucket	raparm_hash[RAPARM_HASH_SIZE];
+#ifndef CONFIG_VE
+struct raparm_hbucket	raparm_hash[RAPARM_HASH_SIZE];
+#endif
 
 /* 
  * Called from nfsd_lookup and encode_dirent. Check if we have crossed 
@@ -354,6 +341,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str
 			put_write_access(inode);
 			goto out_nfserr;
 		}
+		vzquota_cur_qmblk_set(fhp->fh_export->ex_dentry->d_inode);
 		DQUOT_INIT(inode);
 	}
 
@@ -719,6 +707,7 @@ nfsd_open(struct svc_rqst *rqstp, struct
 		else
 			flags = O_WRONLY|O_LARGEFILE;
 
+		vzquota_cur_qmblk_set(fhp->fh_export->ex_dentry->d_inode);
 		DQUOT_INIT(inode);
 	}
 	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_mnt), flags);
@@ -812,7 +801,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
 		if (ra->p_count == 0)
 			frap = rap;
 	}
-	depth = nfsdstats.ra_size*11/10;
+	depth = nfsdstats.ra_size;
 	if (!frap) {	
 		spin_unlock(&rab->pb_lock);
 		return NULL;
@@ -880,6 +869,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st
 	struct raparms	*ra;
 	mm_segment_t	oldfs;
 	int		err;
+	dev_t		ex_dev;
 
 	err = nfserr_perm;
 	inode = file->f_dentry->d_inode;
@@ -890,7 +880,9 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st
 #endif
 
 	/* Get readahead parameters */
-	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
+	BUG_ON(file->f_vfsmnt != fhp->fh_export->ex_mnt);
+	ex_dev = exp_get_dev(fhp->fh_export);
+	ra = nfsd_get_raparms(ex_dev, inode->i_ino);
 
 	if (ra && ra->p_set)
 		file->f_ra = ra->p_ra;
@@ -996,7 +988,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, s
 	if (err >= 0 && stable) {
 		static ino_t	last_ino;
 		static dev_t	last_dev;
+		dev_t		ex_dev;
 
+		BUG_ON(file->f_vfsmnt != exp->ex_mnt);
+		ex_dev = exp_get_dev(exp);
 		/*
 		 * Gathered writes: If another process is currently
 		 * writing to the file, there's a high chance
@@ -1011,7 +1006,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, s
 		 */
 		if (EX_WGATHER(exp)) {
 			if (atomic_read(&inode->i_writecount) > 1
-			    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
+			    || (last_ino == inode->i_ino && last_dev == ex_dev)) {
 				dprintk("nfsd: write defer %d\n", current->pid);
 				msleep(10);
 				dprintk("nfsd: write resume %d\n", current->pid);
@@ -1026,7 +1021,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, s
 #endif
 		}
 		last_ino = inode->i_ino;
-		last_dev = inode->i_sb->s_dev;
+		last_dev = ex_dev;
 	}
 
 	dprintk("nfsd: write complete err=%d\n", err);
@@ -1974,7 +1969,7 @@ nfsd_racache_init(int cache_size)
 
 		raparm = &raparm_hash[i].pb_head;
 		for (j = 0; j < nperbucket; j++) {
-			*raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
+			*raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL_UBC);
 			if (!*raparm)
 				goto out_nomem;
 			raparm = &(*raparm)->p_next;
diff -upr kernel-2.6.18-417.el5.orig/fs/ntfs/file.c kernel-2.6.18-417.el5-028stab121/fs/ntfs/file.c
--- kernel-2.6.18-417.el5.orig/fs/ntfs/file.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ntfs/file.c	2017-01-13 08:40:40.000000000 -0500
@@ -2176,20 +2176,18 @@ out:
 /**
  * ntfs_file_aio_write -
  */
-static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf,
-		size_t count, loff_t pos)
+static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	ssize_t ret;
-	struct iovec local_iov = { .iov_base = (void __user *)buf,
-				   .iov_len = count };
 
 	BUG_ON(iocb->ki_pos != pos);
 
 	mutex_lock(&inode->i_mutex);
-	ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+	ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
 	mutex_unlock(&inode->i_mutex);
 	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 		int err = sync_page_range(inode, mapping, pos, ret);
diff -upr kernel-2.6.18-417.el5.orig/fs/ntfs/super.c kernel-2.6.18-417.el5-028stab121/fs/ntfs/super.c
--- kernel-2.6.18-417.el5.orig/fs/ntfs/super.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ntfs/super.c	2017-01-13 08:40:15.000000000 -0500
@@ -3076,7 +3076,7 @@ iput_tmp_ino_err_out_now:
 	 * method again... FIXME: Do we need to do this twice now because of
 	 * attribute inodes? I think not, so leave as is for now... (AIA)
 	 */
-	if (invalidate_inodes(sb)) {
+	if (invalidate_inodes(sb, 0)) {
 		ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
 				"driver bug.");
 		/* Copied from fs/super.c. I just love this message. (-; */
diff -upr kernel-2.6.18-417.el5.orig/fs/ocfs2/file.c kernel-2.6.18-417.el5-028stab121/fs/ocfs2/file.c
--- kernel-2.6.18-417.el5.orig/fs/ocfs2/file.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/ocfs2/file.c	2017-01-13 08:40:40.000000000 -0500
@@ -960,25 +960,23 @@ static inline int ocfs2_write_should_rem
 }
 
 static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
-				    const char __user *buf,
-				    size_t count,
+				    const struct iovec *iov,
+				    unsigned long nr_segs,
 				    loff_t pos)
 {
-	struct iovec local_iov = { .iov_base = (void __user *)buf,
-				   .iov_len = count };
 	int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0;
 	u32 clusters;
 	struct file *filp = iocb->ki_filp;
 	struct inode *inode = filp->f_dentry->d_inode;
 	loff_t newsize, saved_pos;
 
-	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
-		   (unsigned int)count,
+	mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+		   (unsigned int)nr_segs,
 		   filp->f_dentry->d_name.len,
 		   filp->f_dentry->d_name.name);
 
 	/* happy write of zero bytes */
-	if (count == 0)
+	if (iocb->ki_left == 0)
 		return 0;
 
 	if (!inode) {
@@ -1047,7 +1045,7 @@ static ssize_t ocfs2_file_aio_write(stru
 		} else {
 			saved_pos = iocb->ki_pos;
 		}
-		newsize = count + saved_pos;
+		newsize = iocb->ki_left + saved_pos;
 
 		mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
 		     (long long) saved_pos, (long long) newsize,
@@ -1080,7 +1078,7 @@ static ssize_t ocfs2_file_aio_write(stru
 		if (!clusters)
 			break;
 
-		ret = ocfs2_extend_file(inode, NULL, newsize, count);
+		ret = ocfs2_extend_file(inode, NULL, newsize, iocb->ki_left);
 		if (ret < 0) {
 			if (ret != -ENOSPC)
 				mlog_errno(ret);
@@ -1097,7 +1095,7 @@ static ssize_t ocfs2_file_aio_write(stru
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb);
 
-	ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+	ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos);
 
 	/* buffered aio wouldn't have proper lock coverage today */
 	BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
@@ -1131,16 +1129,16 @@ out:
 }
 
 static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
-				   char __user *buf,
-				   size_t count,
+				   const struct iovec *iov,
+				   unsigned long nr_segs,
 				   loff_t pos)
 {
 	int ret = 0, rw_level = -1, have_alloc_sem = 0;
 	struct file *filp = iocb->ki_filp;
 	struct inode *inode = filp->f_dentry->d_inode;
 
-	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
-		   (unsigned int)count,
+	mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+		   (unsigned int)nr_segs,
 		   filp->f_dentry->d_name.len,
 		   filp->f_dentry->d_name.name);
 
@@ -1184,7 +1182,7 @@ static ssize_t ocfs2_file_aio_read(struc
 	}
 	ocfs2_meta_unlock(inode, 0);
 
-	ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos);
+	ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
 	if (ret == -EINVAL)
 		mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n");
 
diff -upr kernel-2.6.18-417.el5.orig/fs/open.c kernel-2.6.18-417.el5-028stab121/fs/open.c
--- kernel-2.6.18-417.el5.orig/fs/open.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/open.c	2017-01-13 08:40:40.000000000 -0500
@@ -25,6 +25,7 @@
 #include <linux/fs.h>
 #include <linux/personality.h>
 #include <linux/pagemap.h>
+#include <linux/faudit.h>
 #include <linux/syscalls.h>
 #include <linux/rcupdate.h>
 #include <linux/audit.h>
@@ -53,7 +54,21 @@ int vfs_statfs(struct dentry *dentry, st
 
 EXPORT_SYMBOL(vfs_statfs);
 
-static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
+int faudit_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	struct faudit_statfs_arg arg;
+
+	arg.sb = sb;
+	arg.stat = buf;
+
+	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS, &arg)
+			!= NOTIFY_DONE)
+		return arg.err;
+	return 0;
+}
+
+static int vfs_statfs_native(struct dentry *dentry, struct vfsmount *mnt,
+		struct statfs *buf)
 {
 	struct kstatfs st;
 	int retval;
@@ -62,6 +77,10 @@ static int vfs_statfs_native(struct dent
 	if (retval)
 		return retval;
 
+	retval = faudit_statfs(mnt->mnt_sb, &st);
+	if (retval)
+		return retval;
+
 	if (sizeof(*buf) == sizeof(st))
 		memcpy(buf, &st, sizeof(st));
 	else {
@@ -96,7 +115,8 @@ static int vfs_statfs_native(struct dent
 	return 0;
 }
 
-static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
+static int vfs_statfs64(struct dentry *dentry, struct vfsmount *mnt,
+		struct statfs64 *buf)
 {
 	struct kstatfs st;
 	int retval;
@@ -105,6 +125,10 @@ static int vfs_statfs64(struct dentry *d
 	if (retval)
 		return retval;
 
+	retval = faudit_statfs(mnt->mnt_sb, &st);
+	if (retval)
+		return retval;
+
 	if (sizeof(*buf) == sizeof(st))
 		memcpy(buf, &st, sizeof(st));
 	else {
@@ -131,7 +155,7 @@ asmlinkage long sys_statfs(const char __
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs tmp;
-		error = vfs_statfs_native(nd.dentry, &tmp);
+		error = vfs_statfs_native(nd.dentry, nd.mnt, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -150,7 +174,7 @@ asmlinkage long sys_statfs64(const char 
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs64 tmp;
-		error = vfs_statfs64(nd.dentry, &tmp);
+		error = vfs_statfs64(nd.dentry, nd.mnt, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -169,7 +193,7 @@ asmlinkage long sys_fstatfs(unsigned int
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs_native(file->f_dentry, &tmp);
+	error = vfs_statfs_native(file->f_dentry, file->f_vfsmnt, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -190,7 +214,7 @@ asmlinkage long sys_fstatfs64(unsigned i
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs64(file->f_dentry, &tmp);
+	error = vfs_statfs64(file->f_dentry, file->f_vfsmnt, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -442,52 +466,16 @@ out:
  */
 asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
 {
-	int error;
-	struct nameidata nd;
-	struct inode * inode;
-	struct iattr newattrs;
-
-	error = user_path_walk(filename, &nd);
-	if (error)
-		goto out;
-	inode = nd.dentry->d_inode;
+	struct timeval tv[2];
 
-	error = -EROFS;
-	if (IS_RDONLY(inode))
-		goto dput_and_out;
-
-	/* Don't worry, the checks are done in inode_change_ok() */
-	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
 	if (times) {
-		error = -EPERM;
-		if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-			goto dput_and_out;
-
-		error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
-		newattrs.ia_atime.tv_nsec = 0;
-		if (!error)
-			error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime);
-		newattrs.ia_mtime.tv_nsec = 0;
-		if (error)
-			goto dput_and_out;
-
-		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
-	} else {
-                error = -EACCES;
-                if (IS_IMMUTABLE(inode))
-                        goto dput_and_out;
-
-		if (current->fsuid != inode->i_uid &&
-		    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
-			goto dput_and_out;
+		if (get_user(tv[0].tv_sec, &times->actime) ||
+		    get_user(tv[1].tv_sec, &times->modtime))
+			return -EFAULT;
+		tv[0].tv_usec = 0;
+		tv[1].tv_usec = 0;
 	}
-	mutex_lock(&inode->i_mutex);
-	error = notify_change(nd.dentry, &newattrs);
-	mutex_unlock(&inode->i_mutex);
-dput_and_out:
-	path_release(&nd);
-out:
-	return error;
+	return do_utimes(AT_FDCWD, filename, times ? tv : NULL, 0);
 }
 
 #endif
@@ -496,14 +484,19 @@ out:
  * must be owner or have write permission.
  * Else, update from *times, must be owner or super user.
  */
-long do_utimes(int dfd, char __user *filename, struct timeval *times)
+long do_utimes(int dfd, char __user *filename, struct timeval *times, int flags)
 {
-	int error;
+	int error = -EINVAL;
 	struct nameidata nd;
 	struct inode * inode;
 	struct iattr newattrs;
+	int follow;
 
-	error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
+	if ((flags & ~AT_SYMLINK_NOFOLLOW) != 0)
+		goto out;
+
+	follow = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+	error = __user_walk_fd(dfd, filename, follow, &nd);
 
 	if (error)
 		goto out;
@@ -549,7 +542,7 @@ asmlinkage long sys_futimesat(int dfd, c
 
 	if (utimes && copy_from_user(&times, utimes, sizeof(times)))
 		return -EFAULT;
-	return do_utimes(dfd, filename, utimes ? times : NULL);
+	return do_utimes(dfd, filename, utimes ? times : NULL, 0);
 }
 
 asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes)
@@ -734,15 +727,20 @@ out:
 	return err;
 }
 
-asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
-			     mode_t mode)
+static long do_fchmodat(int dfd, const char __user *filename, mode_t mode,
+			int flags)
 {
 	struct nameidata nd;
 	struct inode * inode;
-	int error;
+	int error = -EINVAL;
 	struct iattr newattrs;
+	int follow;
+
+	if ((flags & ~AT_SYMLINK_NOFOLLOW) != 0)
+		goto out;
 
-	error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
+	follow = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+	error = __user_walk_fd(dfd, filename, follow, &nd);
 	if (error)
 		goto out;
 	inode = nd.dentry->d_inode;
@@ -769,10 +767,17 @@ out:
 	return error;
 }
 
+asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
+			     mode_t mode)
+{
+	return do_fchmodat(dfd, filename, mode, 0);
+}
+
 asmlinkage long sys_chmod(const char __user *filename, mode_t mode)
 {
 	return sys_fchmodat(AT_FDCWD, filename, mode);
 }
+EXPORT_SYMBOL_GPL(sys_chmod);
 
 static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
 {
@@ -821,6 +826,7 @@ asmlinkage long sys_chown(const char __u
 	}
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_chown);
 
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 			     gid_t group, int flag)
@@ -894,7 +900,7 @@ static struct file *__dentry_open(struct
 	f->f_vfsmnt = mnt;
 	f->f_pos = 0;
 	f->f_op = fops_get(inode->i_fop);
-	file_move(f, &inode->i_sb->s_files);
+	file_move(f, per_cpu_ptr(inode->i_sb->s_files, raw_smp_processor_id()));
 
 	if (!open && f->f_op)
 		open = f->f_op->open;
@@ -1031,6 +1037,7 @@ struct file *nameidata_to_filp(struct na
 	return filp;
 }
 
+int odirect_enable = 0;
 /*
  * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
  * error.
@@ -1040,6 +1047,9 @@ struct file *dentry_open(struct dentry *
 	int error;
 	struct file *f;
 
+	if (!capable(CAP_SYS_RAWIO) && !odirect_enable)
+		flags &= ~O_DIRECT;
+
 	error = -ENFILE;
 	f = get_empty_filp();
 	if (f == NULL) {
@@ -1055,7 +1065,7 @@ EXPORT_SYMBOL(dentry_open);
 /*
  * Find an empty file descriptor entry, and mark it busy.
  */
-int get_unused_fd(void)
+int get_unused_fd_flags(int flags)
 {
 	struct files_struct * files = current->files;
 	int fd, error;
@@ -1092,7 +1102,10 @@ repeat:
 	}
 
 	FD_SET(fd, fdt->open_fds);
-	FD_CLR(fd, fdt->close_on_exec);
+	if (flags & O_CLOEXEC)
+		FD_SET(fd, fdt->close_on_exec);
+	else
+		FD_CLR(fd, fdt->close_on_exec);
 	files->next_fd = fd + 1;
 #if 1
 	/* Sanity check */
@@ -1108,6 +1121,11 @@ out:
 	return error;
 }
 
+int get_unused_fd(void)
+{
+	return get_unused_fd_flags(0);
+}
+
 EXPORT_SYMBOL(get_unused_fd);
 
 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
@@ -1160,7 +1178,7 @@ long do_sys_open(int dfd, const char __u
 	int fd = PTR_ERR(tmp);
 
 	if (!IS_ERR(tmp)) {
-		fd = get_unused_fd();
+		fd = get_unused_fd_flags(flags);
 		if (fd >= 0) {
 			struct file *f = do_filp_open(dfd, tmp, flags, mode);
 			if (IS_ERR(f)) {
@@ -1188,6 +1206,7 @@ asmlinkage long sys_open(const char __us
 	prevent_tail_call(ret);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(sys_open);
 
 asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
 			   int mode)
@@ -1320,3 +1339,28 @@ int nonseekable_open(struct inode *inode
 }
 
 EXPORT_SYMBOL(nonseekable_open);
+
+int lsyscall_enable = 1;
+asmlinkage long sys_lchmod(char __user * filename, mode_t mode)
+{
+	if (!lsyscall_enable)
+		return -ENOSYS;
+	return do_fchmodat(AT_FDCWD, filename, mode, AT_SYMLINK_NOFOLLOW);
+}
+
+asmlinkage long sys_lutime(char __user * filename,
+		struct utimbuf __user * times)
+{
+	struct timeval tv[2];
+
+	if (!lsyscall_enable)
+		return -ENOSYS;
+	if (times) {
+		if (get_user(tv[0].tv_sec, &times->actime) ||
+		    get_user(tv[1].tv_sec, &times->modtime))
+			return -EFAULT;
+		tv[0].tv_usec = 0;
+		tv[1].tv_usec = 0;
+	}
+	return do_utimes(AT_FDCWD, filename, times ? tv : NULL, AT_SYMLINK_NOFOLLOW);
+}
diff -upr kernel-2.6.18-417.el5.orig/fs/partitions/check.c kernel-2.6.18-417.el5-028stab121/fs/partitions/check.c
--- kernel-2.6.18-417.el5.orig/fs/partitions/check.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/partitions/check.c	2017-01-13 08:40:40.000000000 -0500
@@ -127,6 +127,7 @@ char *disk_name(struct gendisk *hd, int 
 
 	return buf;
 }
+EXPORT_SYMBOL(disk_name);
 
 const char *bdevname(struct block_device *bdev, char *buf)
 {
@@ -230,6 +231,11 @@ static struct sysfs_ops part_sysfs_ops =
 	.store	=	part_attr_store,
 };
 
+static ssize_t part_uevent_show(struct hd_struct *p, char *buf)
+{
+	return kobject_uevent_show(&p->kobj, buf);
+}
+
 static ssize_t part_uevent_store(struct hd_struct * p,
 				 const char *page, size_t count)
 {
@@ -275,7 +281,8 @@ static ssize_t part_stats_read(struct hd
 	return res;
 }
 static struct part_attribute part_attr_uevent = {
-	.attr = {.name = "uevent", .mode = S_IWUSR },
+	.attr = {.name = "uevent", .mode = S_IRUGO | S_IWUSR },
+	.show	= part_uevent_show,
 	.store	= part_uevent_store
 };
 static struct part_attribute part_attr_dev = {
diff -upr kernel-2.6.18-417.el5.orig/fs/pipe.c kernel-2.6.18-417.el5-028stab121/fs/pipe.c
--- kernel-2.6.18-417.el5.orig/fs/pipe.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/pipe.c	2017-01-13 08:40:40.000000000 -0500
@@ -22,6 +22,8 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
+#include <ub/ub_mem.h>
+
 /*
  * We use a start+len construction, which provides full use of the 
  * allocated memory.
@@ -433,7 +435,7 @@ redo1:
 			size_t remaining;
 
 			if (!page) {
-				page = alloc_page(GFP_HIGHUSER);
+				page = alloc_page(GFP_HIGHUSER | __GFP_UBC);
 				if (unlikely(!page)) {
 					ret = ret ? : -ENOMEM;
 					break;
@@ -839,7 +841,7 @@ struct pipe_inode_info * alloc_pipe_info
 {
 	struct pipe_inode_info *pipe;
 
-	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_UBC);
 	if (pipe) {
 		init_waitqueue_head(&pipe->wait);
 		pipe->r_counter = pipe->w_counter = 1;
@@ -991,26 +993,32 @@ struct file *create_read_pipe(struct fil
 	return f;
 }
 
-int do_pipe(int *fd)
+static int do_pipe_flags(int *fd, int flags)
 {
 	struct file *fw, *fr;
 	int error;
 	int fdw, fdr;
 
+	if (flags & ~(O_CLOEXEC | O_NONBLOCK))
+		return -EINVAL;
+
 	fw = create_write_pipe();
 	if (IS_ERR(fw))
 		return PTR_ERR(fw);
+	fw->f_flags |= (flags & O_NONBLOCK);
+
 	fr = create_read_pipe(fw);
 	error = PTR_ERR(fr);
 	if (IS_ERR(fr))
 		goto err_write_pipe;
+	fr->f_flags |= (flags & O_NONBLOCK);
 
-	error = get_unused_fd();
+	error = get_unused_fd_flags(flags);
 	if (error < 0)
 		goto err_read_pipe;
 	fdr = error;
 
-	error = get_unused_fd();
+	error = get_unused_fd_flags(flags);
 	if (error < 0)
 		goto err_fdr;
 	fdw = error;
@@ -1039,8 +1047,29 @@ int do_pipe(int *fd)
 	return error;
 }
 
+int do_pipe(int *fd)
+{
+	return do_pipe_flags(fd, 0);
+}
+
 EXPORT_SYMBOL_GPL(do_pipe);
 
+asmlinkage long sys_pipe2(int __user *fildes, int flags)
+{
+	int fd[2];
+	int error;
+
+	error = do_pipe_flags(fd, flags);
+	if (!error) {
+		if (copy_to_user(fildes, fd, sizeof(fd))) {
+			sys_close(fd[0]);
+			sys_close(fd[1]);
+			error = -EFAULT;
+		}
+	}
+	return error;
+}
+
 /*
  * sys_pipe() is the normal C calling standard for creating
  * a pipe. It's not the way Unix traditionally does this, though.
diff -upr kernel-2.6.18-417.el5.orig/fs/proc/array.c kernel-2.6.18-417.el5-028stab121/fs/proc/array.c
--- kernel-2.6.18-417.el5.orig/fs/proc/array.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/proc/array.c	2017-01-13 08:40:28.000000000 -0500
@@ -78,6 +78,9 @@
 #include <linux/delayacct.h>
 #include <linux/resource.h>
 #include <linux/ptrace.h>
+#include <linux/fairsched.h>
+
+#include <ub/beancounter.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -166,10 +169,16 @@ static inline char * task_state(struct t
 	struct group_info *group_info;
 	int g;
 	struct fdtable *fdt = NULL;
+	pid_t pid, ppid, tgid, vpid;
+
+	pid = get_task_pid(p);
+	tgid = get_task_tgid(p);
 
 	read_lock(&tasklist_lock);
 	tracer = tracehook_tracer_task(p);
-	tracer_pid = tracer == NULL ? 0 : tracer->pid;
+	tracer_pid = tracer == NULL ? 0 : get_task_pid(tracer);
+	ppid = get_task_ppid(p);
+	vpid = (pid_alive(p) ? virt_pid(p) : 0);
 
 	buffer += sprintf(buffer,
 		"State:\t%s\n"
@@ -178,13 +187,19 @@ static inline char * task_state(struct t
 		"Pid:\t%d\n"
 		"PPid:\t%d\n"
 		"TracerPid:\t%d\n"
+#ifdef CONFIG_FAIRSCHED
+		"FNid:\t%d\n"
+#endif
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p),
 		(p->sleep_avg/1024)*100/(1020000000/1024),
-	       	p->tgid,
-		p->pid, pid_alive(p) ? p->group_leader->parent->tgid : 0,
+	       	tgid,
+		pid, ppid,
 		tracer_pid,
+#ifdef CONFIG_FAIRSCHED
+		task_fairsched_node_id(p),
+#endif
 		p->uid, p->euid, p->suid, p->fsuid,
 		p->gid, p->egid, p->sgid, p->fsgid);
 	read_unlock(&tasklist_lock);
@@ -207,6 +222,16 @@ static inline char * task_state(struct t
 	put_group_info(group_info);
 
 	buffer += sprintf(buffer, "\n");
+
+#ifdef CONFIG_VE
+	buffer += sprintf(buffer,
+			"envID:\t%d\n"
+			"VPid:\t%d\n"
+			"PNState:\t%u\n"
+			"StopState:\t%u\n",
+			VE_TASK_INFO(p)->owner_env->veid,
+			vpid, p->pn_state, p->stopped_state);
+#endif
 	return buffer;
 }
 
@@ -252,7 +277,7 @@ static void collect_sigign_sigcatch(stru
 
 static inline char * task_sig(struct task_struct *p, char *buffer)
 {
-	sigset_t pending, shpending, blocked, ignored, caught;
+	sigset_t pending, shpending, blocked, ignored, caught, saved;
 	int num_threads = 0;
 	unsigned long qsize = 0;
 	unsigned long qlim = 0;
@@ -262,6 +287,7 @@ static inline char * task_sig(struct tas
 	sigemptyset(&blocked);
 	sigemptyset(&ignored);
 	sigemptyset(&caught);
+	sigemptyset(&saved);
 
 	/* Gather all the data with the appropriate locks held */
 	read_lock(&tasklist_lock);
@@ -270,6 +296,7 @@ static inline char * task_sig(struct tas
 		pending = p->pending.signal;
 		shpending = p->signal->shared_pending.signal;
 		blocked = p->blocked;
+		saved = p->saved_sigmask;
 		collect_sigign_sigcatch(p, &ignored, &caught);
 		num_threads = atomic_read(&p->signal->count);
 		qsize = atomic_read(&p->user->sigpending);
@@ -287,6 +314,7 @@ static inline char * task_sig(struct tas
 	buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
 	buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
 	buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
+	buffer = render_sigset_t("SigSvd:\t", &saved, buffer);
 
 	return buffer;
 }
@@ -301,10 +329,27 @@ static inline char *task_cap(struct task
 			    cap_t(p->cap_effective));
 }
 
+#ifdef CONFIG_USER_RESOURCE
+static inline void ub_dump_task_info(struct task_struct *tsk,
+		char *stsk, int ltsk, char *smm, int lmm)
+{
+	print_ub_uid(tsk->task_bc.task_ub, stsk, ltsk);
+	task_lock(tsk);
+	if (tsk->mm)
+		print_ub_uid(tsk->mm->mm_ub, smm, lmm);
+	else
+		strncpy(smm, "N/A", lmm);
+	task_unlock(tsk);
+}
+#endif
+
 int proc_pid_status(struct task_struct *task, char * buffer)
 {
 	char * orig = buffer;
 	struct mm_struct *mm = get_task_mm(task);
+#ifdef CONFIG_USER_RESOURCE
+	char tsk_ub_info[64], mm_ub_info[64];
+#endif
 
 	buffer = task_name(task, buffer);
 	buffer = task_state(task, buffer);
@@ -316,6 +361,14 @@ int proc_pid_status(struct task_struct *
 	buffer = task_sig(task, buffer);
 	buffer = task_cap(task, buffer);
 	buffer = cpuset_task_status_allowed(task, buffer);
+#ifdef CONFIG_USER_RESOURCE
+	ub_dump_task_info(task,
+			tsk_ub_info, sizeof(tsk_ub_info),
+			mm_ub_info, sizeof(mm_ub_info));
+
+	buffer += sprintf(buffer, "TaskUB:\t%s\n", tsk_ub_info);
+	buffer += sprintf(buffer, "MMUB:\t%s\n", mm_ub_info);
+#endif
 	return buffer - orig;
 }
 
@@ -338,6 +391,11 @@ static int do_task_stat(struct task_stru
 	unsigned long rsslim = 0;
 	struct task_struct *t;
 	char tcomm[sizeof(task->comm)];
+#ifdef CONFIG_USER_RESOURCE
+	char ub_task_info[64];
+	char ub_mm_info[64];
+#endif
+	int is_super = ve_is_super(get_exec_env());
 
 	state = *get_task_state(task);
 	vsize = eip = esp = 0;
@@ -380,11 +438,11 @@ static int do_task_stat(struct task_stru
 	}
 	if (task->signal) {
 		if (task->signal->tty) {
-			tty_pgrp = task->signal->tty->pgrp;
+			tty_pgrp = pid_to_vpid(task->signal->tty->pgrp);
 			tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
 		}
-		pgid = process_group(task);
-		sid = task->signal->session;
+		pgid = get_task_pgid(task);
+		sid = get_task_sid(task);
 		cmin_flt = task->signal->cmin_flt;
 		cmaj_flt = task->signal->cmaj_flt;
 		cutime = task->signal->cutime;
@@ -397,7 +455,7 @@ static int do_task_stat(struct task_stru
 			stime = cputime_add(stime, task->signal->stime);
 		}
 	}
-	ppid = pid_alive(task) ? task->group_leader->parent->tgid : 0;
+	ppid = get_task_ppid(task);
 	read_unlock(&tasklist_lock);
 	mutex_unlock(&tty_mutex);
 
@@ -419,17 +477,34 @@ static int do_task_stat(struct task_stru
 	priority = task_prio(task);
 	nice = task_nice(task);
 
+#ifndef CONFIG_VE
 	/* Temporary variable needed for gcc-2.96 */
 	/* convert timespec -> nsec*/
 	start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
 				+ task->start_time.tv_nsec;
 	/* convert nsec -> ticks */
 	start_time = nsec_to_clock_t(start_time);
+#else
+	start_time = ve_relative_clock(&task->start_time);
+#endif
+
+#ifdef CONFIG_USER_RESOURCE
+	ub_dump_task_info(task,
+			ub_task_info, sizeof(ub_task_info),
+			ub_mm_info, sizeof(ub_mm_info));
+#endif
 
 	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n",
-		task->pid,
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu"
+#ifdef CONFIG_VE
+	" 0 0 0 0 0 0 0 %d %u"
+#endif
+#ifdef CONFIG_USER_RESOURCE
+	" %s %s"
+#endif
+	"\n",
+		get_task_pid(task),
 		tcomm,
 		state,
 		ppid,
@@ -470,10 +545,19 @@ static int do_task_stat(struct task_stru
 		0UL,
 		0UL,
 		task->exit_signal,
-		task_cpu(task),
+		is_super ? task_pcpu(task) : task_cpu(task),
 		task->rt_priority,
 		task->policy,
-		(unsigned long long)delayacct_blkio_ticks(task));
+		(unsigned long long)delayacct_blkio_ticks(task)
+#ifdef CONFIG_VE
+		, virt_pid(task),
+		VEID(VE_TASK_INFO(task)->owner_env)
+#endif
+#ifdef CONFIG_USER_RESOURCE
+		, ub_task_info,
+		ub_mm_info
+#endif
+		);
 	if(mm)
 		mmput(mm);
 	return res;
diff -upr kernel-2.6.18-417.el5.orig/fs/proc/base.c kernel-2.6.18-417.el5-028stab121/fs/proc/base.c
--- kernel-2.6.18-417.el5.orig/fs/proc/base.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/proc/base.c	2017-01-13 08:40:26.000000000 -0500
@@ -49,6 +49,7 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
@@ -73,6 +74,7 @@
 #include <linux/audit.h>
 #include <linux/poll.h>
 #include <linux/elf.h>
+#include <linux/nsproxy.h>
 #include "internal.h"
 
 /* NOTE:
@@ -204,6 +206,8 @@ enum pid_directory_inos {
 	PROC_TGID_FDINFO,
 	PROC_TID_FDINFO,
 
+	PROC_TGID_STACK,
+	PROC_TID_STACK,
 	/* Add new entries before this */
 	PROC_TID_FD_DIR = 0x8000,	/* 0x8000-0xffff */
 };
@@ -271,6 +275,9 @@ static struct pid_entry tgid_base_stuff[
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 	E(PROC_TGID_IO,             "io",  S_IFREG|S_IRUSR),
 #endif
+#ifdef CONFIG_STACKTRACE_PROC
+	E(PROC_TGID_STACK,     "stack",   S_IFREG|S_IRUGO),
+#endif
 
 	{0,0,NULL,0}
 };
@@ -319,6 +326,9 @@ static struct pid_entry tid_base_stuff[]
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 	E(PROC_TID_IO,         "io",      S_IFREG|S_IRUSR),
 #endif
+#ifdef CONFIG_STACKTRACE_PROC
+	E(PROC_TID_STACK,      "stack",   S_IFREG|S_IRUGO),
+#endif
 
 	{0,0,NULL,0}
 };
@@ -355,6 +365,7 @@ static int proc_fd_info(struct inode *in
 	struct files_struct *files = NULL;
 	struct file *file;
 	int fd = proc_fd(inode);
+	int err = -ENOENT;
 
 	if (task) {
 		files = get_files_struct(task);
@@ -368,6 +379,9 @@ static int proc_fd_info(struct inode *in
 		spin_lock(&files->file_lock);
 		file = fcheck_files(files, fd);
 		if (file) {
+			err = -EACCES;
+			if (d_root_check(file->f_dentry, file->f_vfsmnt))
+				goto out;
 			if (mnt)
 				*mnt = mntget(file->f_vfsmnt);
 			if (dentry)
@@ -378,14 +392,13 @@ static int proc_fd_info(struct inode *in
 					 "flags:\t0%o\n",
 					 (long long) file->f_pos,
 					 file->f_flags);
-			spin_unlock(&files->file_lock);
-			put_files_struct(files);
-			return 0;
+			err = 0;
 		}
+out:
 		spin_unlock(&files->file_lock);
 		put_files_struct(files);
 	}
-	return -ENOENT;
+	return err;
 }
 
 static int proc_fd_link(struct inode *inode, struct dentry **dentry,
@@ -444,6 +457,8 @@ static int __ptrace_may_attach(struct ta
 	 * or halting the specified task is impossible.
 	 */
 	int dumpable = 0;
+	int vps_dumpable = 1;
+
 	/* Don't let security modules deny introspection */
 	if (task == current)
 		return 0;
@@ -455,10 +470,17 @@ static int __ptrace_may_attach(struct ta
 	     (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
 		return -EPERM;
 	smp_rmb();
-	if (task->mm)
+	if (task->mm) {
 		dumpable = task->mm->dumpable;
+		vps_dumpable = (task->mm->vps_dumpable == 1);
+	}
+
 	if (dumpable != SUID_DUMP_USER && !capable(CAP_SYS_PTRACE))
 		return -EPERM;
+	if (!vps_dumpable && !ve_is_super(get_exec_env()))
+		return -EPERM;
+	if (!ve_accessible(VE_TASK_INFO(task)->owner_env, get_exec_env()))
+		return -EPERM;
 
 	return security_ptrace(current, task);
 }
@@ -485,10 +507,12 @@ static int proc_cwd_link(struct inode *i
 	}
 	if (fs) {
 		read_lock(&fs->lock);
-		*mnt = mntget(fs->pwdmnt);
-		*dentry = dget(fs->pwd);
+		result = d_root_check(fs->pwd, fs->pwdmnt);
+		if (!result) {
+			*mnt = mntget(fs->pwdmnt);
+			*dentry = dget(fs->pwd);
+		}
 		read_unlock(&fs->lock);
-		result = 0;
 		put_fs_struct(fs);
 	}
 	return result;
@@ -618,6 +642,43 @@ static int proc_pid_wchan(struct task_st
 }
 #endif /* CONFIG_KALLSYMS */
 
+#ifdef CONFIG_STACKTRACE_PROC
+#include <linux/stacktrace.h>
+#define MAX_STACK_TRACE_DEPTH	64
+
+static int proc_pid_stack(struct task_struct *task, char *buffer)
+{
+	struct stack_trace trace;
+	unsigned long *entries;
+	int i, ret = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
+
+	trace.nr_entries	= 0;
+	trace.max_entries	= MAX_STACK_TRACE_DEPTH;
+	trace.entries		= entries;
+	trace.skip		= 0;
+	trace.all_contexts	= 0;
+	save_stack_trace(&trace, task);
+
+	for (i = 0; i < trace.nr_entries; i++) {
+		ret += sprint_symbol(buffer + ret, entries[i]);
+		ret += sprintf(buffer + ret, "\n");
+		if (ret > PAGE_SIZE - KSYM_SYMBOL_LEN - 1)
+			break;
+	}
+
+	kfree(entries);
+
+	return ret;
+}
+#endif
+
 #ifdef CONFIG_SCHEDSTATS
 /*
  * Provides /proc/PID/schedstat
@@ -679,7 +740,7 @@ static int do_io_accounting(struct task_
 			ioac.cancelled_write_bytes +=
 					sigaux->ioac.cancelled_write_bytes;
 
-			while_each_thread(task, t) {
+			while_each_thread_ve(task, t) {
 				rchar += t->rchar;
 				wchar += t->wchar;
 				syscr += t->syscr;
@@ -728,17 +789,31 @@ static int proc_tgid_io_accounting(struc
 static int proc_fd_access_allowed(struct inode *inode)
 {
 	struct task_struct *task;
-	int allowed = 0;
+	int err;
+
 	/* Allow access to a task's file descriptors if it is us or we
 	 * may use ptrace attach to the process and find out that
 	 * information.
 	 */
+	err = -ENOENT;
 	task = get_proc_task(inode);
 	if (task) {
-		allowed = ptrace_may_attach(task);
+		if (ptrace_may_attach(task))
+			err = 0;
+		else
+			/*
+			 * This clever ptrace_may_attach() may play a trick
+			 * on us. If the task is zombie it will consider this
+			 * task to be not dumpable at all and will deny any
+			 * ptracing in VE. Not a big deal for ptrace(), but
+			 * following the link will fail with the -EACCESS
+			 * reason. Some software is unable to stand such a
+			 * swindle and refuses to work :(
+			 */
+			err = (task->mm ? -EACCES : -ENOENT);
 		put_task_struct(task);
 	}
-	return allowed;
+	return err;
 }
 
 static int proc_setattr(struct dentry *dentry, struct iattr *attr)
@@ -818,11 +893,7 @@ static int mounts_open(struct inode *ino
 	int ret = -EINVAL;
 
 	if (task) {
-		task_lock(task);
-		namespace = task->namespace;
-		if (namespace)
-			get_namespace(namespace);
-		task_unlock(task);
+		namespace = get_task_mnt_ns(task);
 		put_task_struct(task);
 	}
 
@@ -889,11 +960,7 @@ static int mountstats_open(struct inode 
 		struct task_struct *task = get_proc_task(inode);
 
 		if (task) {
-			task_lock(task);
-			namespace = task->namespace;
-			if (namespace)
-				get_namespace(namespace);
-			task_unlock(task);
+			namespace = get_task_mnt_ns(task);
 			put_task_struct(task);
 		}
 
@@ -1206,6 +1273,8 @@ static ssize_t oom_adjust_write(struct f
 	oom_adjust = simple_strtol(buffer, &end, 0);
 	if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE)
 		return -EINVAL;
+	if (oom_adjust == OOM_DISABLE && !ve_is_super(get_exec_env()))
+		return -EPERM;
 	if (*end == '\n')
 		end++;
 	task = get_proc_task(file->f_dentry->d_inode);
@@ -1442,13 +1511,14 @@ static struct file_operations proc_secco
 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;
-	int error = -EACCES;
+	int error;
 
 	/* We don't need a base pointer in the /proc filesystem */
 	path_release(nd);
 
 	/* Are we allowed to snoop on the tasks file descriptors? */
-	if (!proc_fd_access_allowed(inode))
+	error = proc_fd_access_allowed(inode);
+	if (error < 0)
 		goto out;
 
 	error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
@@ -1485,13 +1555,14 @@ static int do_proc_readlink(struct dentr
 
 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
 {
-	int error = -EACCES;
+	int error;
 	struct inode *inode = dentry->d_inode;
 	struct dentry *de;
 	struct vfsmount *mnt = NULL;
 
 	/* Are we allowed to snoop on the tasks file descriptors? */
-	if (!proc_fd_access_allowed(inode))
+	error = proc_fd_access_allowed(inode);
+	if (error < 0)
 		goto out;
 
 	error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
@@ -1515,6 +1586,7 @@ static int proc_readfd_common(struct fil
 			      filldir_t filldir, unsigned int d_type)
 {
 	struct dentry *dentry = filp->f_dentry;
+	struct ve_struct *ve = dentry->d_sb->s_type->owner_env;
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *p = get_proc_task(inode);
 	unsigned int fd, tid, ino;
@@ -1527,7 +1599,7 @@ static int proc_readfd_common(struct fil
 	if (!p)
 		goto out_no_task;
 	retval = 0;
-	tid = p->pid;
+	tid = get_task_pid_ve(p, ve);
 
 	fd = filp->f_pos;
 	switch (fd) {
@@ -1598,6 +1670,7 @@ static int proc_pident_readdir(struct fi
 	int pid;
 	struct dentry *dentry = filp->f_dentry;
 	struct inode *inode = dentry->d_inode;
+	struct ve_struct *ve = dentry->d_sb->s_type->owner_env;
 	struct task_struct *task = get_proc_task(inode);
 	struct pid_entry *p;
 	ino_t ino;
@@ -1608,7 +1681,7 @@ static int proc_pident_readdir(struct fi
 		goto out;
 
 	ret = 0;
-	pid = task->pid;
+	pid = get_task_pid_ve(task, ve);
 	put_task_struct(task);
 	i = filp->f_pos;
 	switch (i) {
@@ -1683,6 +1756,10 @@ static struct inode *proc_pid_make_inode
 {
 	struct inode * inode;
 	struct proc_inode *ei;
+	struct ve_struct *ve = sb->s_type->owner_env;
+
+	if (!ve_accessible(VE_TASK_INFO(task)->owner_env, ve))
+		return NULL;
 
 	/* We need a new inode */
 	
@@ -1693,7 +1770,7 @@ static struct inode *proc_pid_make_inode
 	/* Common stuff */
 	ei = PROC_I(inode);
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	inode->i_ino = fake_ino(task->pid, ino);
+	inode->i_ino = fake_ino(get_task_pid_ve(task, ve), ino);
 	inode->i_op = &proc_def_inode_operations;
 
 	/*
@@ -2270,6 +2347,13 @@ static struct dentry *proc_pident_lookup
 			ei->op.proc_read = proc_pid_wchan;
 			break;
 #endif
+#ifdef CONFIG_STACKTRACE_PROC
+		case PROC_TID_STACK:
+		case PROC_TGID_STACK:
+			inode->i_fop = &proc_info_file_operations;
+			ei->op.proc_read = proc_pid_stack;
+			break;
+#endif
 #ifdef CONFIG_SCHEDSTATS
 		case PROC_TID_SCHEDSTAT:
 		case PROC_TGID_SCHEDSTAT:
@@ -2418,14 +2502,14 @@ static int proc_self_readlink(struct den
 			      int buflen)
 {
 	char tmp[PROC_NUMBUF];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", get_task_tgid(current));
 	return vfs_readlink(dentry,buffer,buflen,tmp);
 }
 
 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	char tmp[PROC_NUMBUF];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", get_task_tgid(current));
 	return ERR_PTR(vfs_follow_link(nd,tmp));
 }	
 
@@ -2455,15 +2539,16 @@ static struct inode_operations proc_self
  *       that no dcache entries will exist at process exit time it
  *       just makes it very unlikely that any will persist.
  */
-void proc_flush_task(struct task_struct *task)
+static void __proc_flush_task(struct task_struct *task,
+		int pid, int tgid, struct dentry *root)
 {
 	struct dentry *dentry, *leader, *dir;
 	char buf[PROC_NUMBUF];
 	struct qstr name;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
-	dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+	name.len = snprintf(buf, sizeof(buf), "%d", pid);
+	dentry = d_hash_and_lookup(root, &name);
 	if (dentry) {
 		shrink_dcache_parent(dentry);
 		d_drop(dentry);
@@ -2474,8 +2559,8 @@ void proc_flush_task(struct task_struct 
 		goto out;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
-	leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+	name.len = snprintf(buf, sizeof(buf), "%d", tgid);
+	leader = d_hash_and_lookup(root, &name);
 	if (!leader)
 		goto out;
 
@@ -2486,7 +2571,7 @@ void proc_flush_task(struct task_struct 
 		goto out_put_leader;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
+	name.len = snprintf(buf, sizeof(buf), "%d", pid);
 	dentry = d_hash_and_lookup(dir, &name);
 	if (dentry) {
 		shrink_dcache_parent(dentry);
@@ -2501,6 +2586,19 @@ out:
 	return;
 }
 
+void proc_flush_task(struct task_struct *task)
+{
+	__proc_flush_task(task, task->pid, task->tgid,
+			proc_mnt->mnt_root);
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		return;
+
+	__proc_flush_task(task, virt_pid(task), virt_tgid(task),
+			task->ve_task_info.owner_env->proc_mnt->mnt_root);
+#endif
+}
+
 /* SMP-safe */
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
@@ -2530,7 +2628,19 @@ struct dentry *proc_pid_lookup(struct in
 		goto out;
 
 	rcu_read_lock();
-	task = find_task_by_pid(tgid);
+	task = find_task_by_pid_ve(tgid);
+	/* In theory we are allowed to lookup both /proc/VIRT_PID and
+	 * /proc/GLOBAL_PID inside VE. However, current /proc implementation
+	 * cannot maintain two references to one task, so that we have
+	 * to prohibit /proc/GLOBAL_PID.
+	 */
+	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tgid)) {
+		/* However, VE_ENTERed tasks are exception, they use global
+		 * pids.
+		 */
+		if (virt_pid(task) != tgid)
+			task = NULL;
+	}
 	if (task)
 		get_task_struct(task);
 	rcu_read_unlock();
@@ -2581,7 +2691,12 @@ static struct dentry *proc_task_lookup(s
 		goto out;
 
 	rcu_read_lock();
-	task = find_task_by_pid(tid);
+	task = find_task_by_pid_ve(tid);
+	/* See comment above in similar place. */
+	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tid)) {
+		if (virt_pid(task) != tid)
+			task = NULL;
+	}
 	if (task)
 		get_task_struct(task);
 	rcu_read_unlock();
@@ -2624,7 +2739,7 @@ out_no_task:
  * Find the first task with tgid >= tgid
  *
  */
-static struct task_struct *next_tgid(unsigned int tgid)
+static struct task_struct *next_tgid(unsigned int tgid, struct ve_struct *ve)
 {
 	struct task_struct *task;
 	struct pid *pid;
@@ -2632,9 +2747,15 @@ static struct task_struct *next_tgid(uns
 	rcu_read_lock();
 retry:
 	task = NULL;
-	pid = find_ge_pid(tgid);
+	pid = find_ge_pid(tgid, ve);
 	if (pid) {
-		tgid = pid->nr + 1;
+		if (ve_is_super(ve))
+			tgid = pid->nr + 1;
+#ifdef CONFIG_VE
+		else
+			tgid = pid->vnr + 1;
+#endif
+
 		task = pid_task(pid, PIDTYPE_PID);
 		/* What we to know is if the pid we have find is the
 		 * pid of a thread_group_leader.  Testing for task
@@ -2671,6 +2792,7 @@ int proc_pid_readdir(struct file * filp,
 	unsigned int nr;
 	struct task_struct *task;
 	int tgid;
+	struct ve_struct *ve;
 	filldir_t __filldir;
 
 	if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
@@ -2686,9 +2808,10 @@ int proc_pid_readdir(struct file * filp,
 	}
 
 	tgid = filp->f_pos - TGID_OFFSET;
-	for (task = next_tgid(tgid);
+	ve = filp->f_dentry->d_sb->s_type->owner_env;
+	for (task = next_tgid(tgid, ve);
 	     task;
-	     put_task_struct(task), task = next_tgid(tgid + 1)) {
+	     put_task_struct(task), task = next_tgid(tgid + 1, ve)) {
 		int len;
 		ino_t ino;
 
@@ -2697,7 +2820,7 @@ int proc_pid_readdir(struct file * filp,
 		else
 			__filldir = fake_filldir;
 
-		tgid = task->pid;
+		tgid = get_task_pid_ve(task, ve);
 		filp->f_pos = tgid + TGID_OFFSET;
 		len = snprintf(buf, sizeof(buf), "%d", tgid);
 		ino = fake_ino(tgid, PROC_TGID_INO);
@@ -2724,14 +2847,18 @@ out:
  * threads past it.
  */
 static struct task_struct *first_tid(struct task_struct *leader,
-					int tid, int nr)
+					int tid, int nr, struct ve_struct *ve)
 {
 	struct task_struct *pos;
 
 	rcu_read_lock();
 	/* Attempt to start with the pid of a thread */
 	if (tid && (nr > 0)) {
-		pos = find_task_by_pid(tid);
+		struct ve_struct *old_ve;
+
+		old_ve = set_exec_env(ve);
+		pos = find_task_by_pid_ve(tid);
+		(void) set_exec_env(old_ve);
 		if (pos && (pos->group_leader == leader))
 			goto found;
 	}
@@ -2792,6 +2919,7 @@ static int proc_task_readdir(struct file
 	ino_t ino;
 	int tid;
 	unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
+	struct ve_struct *ve = dentry->d_sb->s_type->owner_env;
 
 	if (!leader)
 		goto out_no_task;
@@ -2817,11 +2945,11 @@ static int proc_task_readdir(struct file
 	 */
 	tid = filp->f_version;
 	filp->f_version = 0;
-	for (task = first_tid(leader, tid, pos - 2);
+	for (task = first_tid(leader, tid, pos - 2, ve);
 	     task;
 	     task = next_tid(task), pos++) {
 		int len;
-		tid = task->pid;
+		tid = get_task_pid_ve(task, ve);
 		len = snprintf(buf, sizeof(buf), "%d", tid);
 		ino = fake_ino(tid, PROC_TID_INO);
 		if (filldir(dirent, buf, len, pos, ino, DT_DIR) < 0) {
@@ -2854,3 +2982,34 @@ static int proc_task_getattr(struct vfsm
 
 	return 0;
 }
+
+/* Check whether dentry belongs to a task that already died */
+int proc_dentry_of_dead_task(struct dentry *dentry)
+{
+	if (dentry->d_inode->i_fop == &dummy_proc_pid_file_operations)
+		return 1;
+
+	return (dentry->d_op == &pid_dentry_operations &&
+		 proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first == NULL);
+}
+EXPORT_SYMBOL(proc_dentry_of_dead_task);
+
+/* Place it here to avoid use vzrst module count */
+static ssize_t dummy_proc_pid_read(struct file * file, char __user * buf,
+				 size_t count, loff_t *ppos)
+{
+	return -ESRCH;
+}
+
+static ssize_t dummy_proc_pid_write(struct file * file, const char * buf,
+				  size_t count, loff_t *ppos)
+{
+	return -ESRCH;
+}
+
+struct file_operations dummy_proc_pid_file_operations = {
+	.read		= dummy_proc_pid_read,
+	.write		= dummy_proc_pid_write,
+};
+
+EXPORT_SYMBOL(dummy_proc_pid_file_operations);
diff -upr kernel-2.6.18-417.el5.orig/fs/proc/generic.c kernel-2.6.18-417.el5-028stab121/fs/proc/generic.c
--- kernel-2.6.18-417.el5.orig/fs/proc/generic.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/proc/generic.c	2017-01-13 08:40:21.000000000 -0500
@@ -10,6 +10,7 @@
 
 #include <linux/errno.h>
 #include <linux/time.h>
+#include <linux/fs.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/module.h>
@@ -239,6 +240,10 @@ static int proc_notify_change(struct den
 	struct proc_dir_entry *de = PDE(inode);
 	int error;
 
+	if ((iattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) &&
+	    LPDE(inode) == GPDE(inode))
+		return -EPERM;
+
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		goto out;
@@ -247,9 +252,12 @@ static int proc_notify_change(struct den
 	if (error)
 		goto out;
 	
-	de->uid = inode->i_uid;
-	de->gid = inode->i_gid;
-	de->mode = inode->i_mode;
+	if (iattr->ia_valid & ATTR_UID)
+		de->uid = inode->i_uid;
+	if (iattr->ia_valid & ATTR_GID)
+		de->gid = inode->i_gid;
+	if (iattr->ia_valid & ATTR_MODE)
+		de->mode = inode->i_mode;
 out:
 	return error;
 }
@@ -259,10 +267,21 @@ static int proc_getattr(struct vfsmount 
 {
 	struct inode *inode = dentry->d_inode;
 	struct proc_dir_entry *de = PROC_I(inode)->pde;
-	if (de && de->nlink)
-		inode->i_nlink = de->nlink;
+	struct proc_dir_entry *gde = GPDE(inode);
 
 	generic_fillattr(inode, stat);
+
+	if (de && de->nlink)
+		stat->nlink = de->nlink;
+	/* if dentry is found in both trees and it is a directory
+	 * then inode's nlink count must be altered, because local
+	 * and global subtrees may differ.
+	 * on the other hand, they may intersect, so actual nlink
+	 * value is difficult to calculate - upper estimate is used
+	 * instead of it.
+	 */
+	if (de && gde && de != gde && gde->nlink > 1)
+		stat->nlink += gde->nlink - 2;
 	return 0;
 }
 
@@ -275,7 +294,7 @@ static struct inode_operations proc_file
  * returns the struct proc_dir_entry for "/proc/tty/driver", and
  * returns "serial" in residual.
  */
-static int xlate_proc_name(const char *name,
+static int __xlate_proc_name(struct proc_dir_entry *root, const char *name,
 			   struct proc_dir_entry **ret, const char **residual)
 {
 	const char     		*cp = name, *next;
@@ -283,8 +302,13 @@ static int xlate_proc_name(const char *n
 	int			len;
 	int 			rtn = 0;
 
+	if (*ret) {
+		de_get(*ret);
+		return 0;
+	}
+
 	spin_lock(&proc_subdir_lock);
-	de = &proc_root;
+	de = root;
 	while (1) {
 		next = strchr(cp, '/');
 		if (!next)
@@ -302,12 +326,29 @@ static int xlate_proc_name(const char *n
 		cp += len + 1;
 	}
 	*residual = cp;
-	*ret = de;
+	*ret = de_get(de);
 out:
 	spin_unlock(&proc_subdir_lock);
 	return rtn;
 }
 
+#ifndef CONFIG_VE
+#define xlate_proc_loc_name xlate_proc_name
+#else
+static int xlate_proc_loc_name(const char *name,
+			   struct proc_dir_entry **ret, const char **residual)
+{
+	return __xlate_proc_name(get_exec_env()->proc_root,
+			name, ret, residual);
+}
+#endif
+
+static int xlate_proc_name(const char *name,
+		struct proc_dir_entry **ret, const char **residual)
+{
+	return __xlate_proc_name(&proc_root, name, ret, residual);
+}
+
 static DEFINE_IDR(proc_inum_idr);
 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 
@@ -379,6 +420,20 @@ static struct dentry_operations proc_den
 	.d_delete	= proc_delete_dentry,
 };
 
+static struct proc_dir_entry *__proc_lookup(struct proc_dir_entry *dir,
+		struct dentry *d)
+{
+	struct proc_dir_entry *de;
+
+	for (de = dir->subdir; de; de = de->next) {
+		if (de->namelen != d->d_name.len)
+			continue;
+		if (!memcmp(d->d_name.name, de->name, de->namelen))
+			break;
+	}
+	return de_get(de);
+}
+
 /*
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
@@ -386,36 +441,111 @@ static struct dentry_operations proc_den
 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = NULL;
-	struct proc_dir_entry * de;
+	struct proc_dir_entry *lde, *gde;
 	int error = -ENOENT;
 
 	lock_kernel();
 	spin_lock(&proc_subdir_lock);
-	de = PDE(dir);
-	if (de) {
-		for (de = de->subdir; de ; de = de->next) {
-			if (de->namelen != dentry->d_name.len)
-				continue;
-			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
-				unsigned int ino = de->low_ino;
-
-				spin_unlock(&proc_subdir_lock);
-				error = -EINVAL;
-				inode = proc_get_inode(dir->i_sb, ino, de);
-				spin_lock(&proc_subdir_lock);
-				break;
-			}
-		}
-	}
+	lde = LPDE(dir);
+	if (lde)
+		lde = __proc_lookup(lde, dentry);
+	if (lde && !try_module_get(lde->owner)) {
+		de_put(lde);
+		lde = NULL;
+	}
+#ifdef CONFIG_VE
+	gde = GPDE(dir);
+	if (gde)
+		gde = __proc_lookup(gde, dentry);
+	if (!lde && gde && !try_module_get(gde->owner)) {
+		de_put(gde);
+		gde = NULL;
+	}
+#else
+	gde = NULL;
+#endif
 	spin_unlock(&proc_subdir_lock);
+
+	/*
+	 * There are following possible cases after lookup:
+	 *
+	 * lde		gde
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * NULL		NULL		ENOENT
+	 * loc		NULL		found in local tree
+	 * loc		glob		found in both trees
+	 * NULL		glob		found in global tree
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 *
+	 * We initialized inode as follows after lookup:
+	 *
+	 * inode->lde	inode->gde
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * loc		NULL		in local tree
+	 * loc		glob		both trees
+	 * glob		glob		global tree
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * i.e. inode->lde is always initialized
+	 */
+
+	if (lde == NULL && gde == NULL)
+		goto out;
+
+	if (lde != NULL)
+		inode = proc_get_inode(dir->i_sb, lde->low_ino, lde);
+	else
+		inode = proc_get_inode(dir->i_sb, gde->low_ino, gde);
+
+	/*
+	 * We can sleep in proc_get_inode(), but since we have i_sem
+	 * being taken, no one can setup GPDE/LPDE on this inode.
+	 */
+	if (!inode)
+		goto out_put;
+
+#ifdef CONFIG_VE
+	GPDE(inode) = de_get(gde);
+	if (gde)
+		__module_get(gde->owner);
+
+	/* dentry found in global tree only must not be writable
+	 * in non-super ve.
+	 */
+	if (lde == NULL && !(gde->mode & S_ISVTX) &&
+			!ve_is_super(dir->i_sb->s_type->owner_env))
+		inode->i_mode &= ~S_IWUGO;
+#endif
+  	unlock_kernel();
+	dentry->d_op = &proc_dentry_operations;
+	d_add(dentry, inode);
+	de_put(lde);
+	de_put(gde);
+	return NULL;
+  
+out_put:
+	if (lde)
+		module_put(lde->owner);
+	else
+		module_put(gde->owner);
+	de_put(lde);
+	de_put(gde);
+out:
 	unlock_kernel();
+ 	return ERR_PTR(error);
+}
 
-	if (inode) {
-		dentry->d_op = &proc_dentry_operations;
-		d_add(dentry, inode);
-		return NULL;
+static inline int in_tree(struct proc_dir_entry *de, struct proc_dir_entry *dir)
+{
+	struct proc_dir_entry *gde;
+
+	for (gde = dir->subdir; gde; gde = gde->next) {
+		if (de->namelen != gde->namelen)
+			continue;
+		if (memcmp(de->name, gde->name, gde->namelen))
+			continue;
+		return 1;
 	}
-	return ERR_PTR(error);
+	return 0;
 }
 
 /*
@@ -430,7 +560,7 @@ struct dentry *proc_lookup(struct inode 
 int proc_readdir(struct file * filp,
 	void * dirent, filldir_t filldir)
 {
-	struct proc_dir_entry * de;
+	struct proc_dir_entry *de, *tmp;
 	unsigned int ino;
 	int i;
 	struct inode *inode = filp->f_dentry->d_inode;
@@ -465,11 +595,8 @@ int proc_readdir(struct file * filp,
 			de = de->subdir;
 			i -= 2;
 			for (;;) {
-				if (!de) {
-					ret = 1;
-					spin_unlock(&proc_subdir_lock);
-					goto out;
-				}
+				if (!de)
+					goto chk_global;
 				if (!i)
 					break;
 				de = de->next;
@@ -478,14 +605,56 @@ int proc_readdir(struct file * filp,
 
 			do {
 				/* filldir passes info to user space */
+				de_get(de);
 				spin_unlock(&proc_subdir_lock);
-				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
-					    de->low_ino, de->mode >> 12) < 0)
+				if (filldir(dirent, de->name, de->namelen,
+						filp->f_pos, de->low_ino,
+						de->mode >> 12) < 0) {
+					de_put(de);
 					goto out;
+				}
 				spin_lock(&proc_subdir_lock);
+				tmp = de->next;
+				de_put(de);
 				filp->f_pos++;
-				de = de->next;
+				de = tmp;
 			} while (de);
+chk_global:
+#ifdef CONFIG_VE
+			de = GPDE(inode);
+			if (de == NULL)
+				goto done;
+
+			de = de->subdir;
+			while (de) {
+				/* skip local names */
+				if (in_tree(de, LPDE(inode))) {
+					de = de->next;
+					continue;
+				}
+
+				if (i > 0) {
+					i--;
+					de = de->next;
+					continue;
+				}
+
+				de_get(de);
+				spin_unlock(&proc_subdir_lock);
+				if (filldir(dirent, de->name, de->namelen,
+						filp->f_pos, de->low_ino,
+						de->mode >> 12) < 0) {
+					de_put(de);
+					goto out;
+				}
+				spin_lock(&proc_subdir_lock);
+				tmp = de->next;
+				de_put(de);
+				filp->f_pos++;
+				de = tmp;
+			}
+done:
+#endif
 			spin_unlock(&proc_subdir_lock);
 	}
 	ret = 1;
@@ -522,8 +691,13 @@ static int proc_register(struct proc_dir
 	dp->low_ino = i;
 
 	spin_lock(&proc_subdir_lock);
+	if (dir->deleted) {
+		spin_unlock(&proc_subdir_lock);
+		return -ENOENT;
+	}
+
 	dp->next = dir->subdir;
-	dp->parent = dir;
+	dp->parent = de_get(dir);
 	dir->subdir = dp;
 	spin_unlock(&proc_subdir_lock);
 
@@ -559,18 +733,18 @@ static struct proc_dir_entry *__proc_cre
 	/* make sure name is valid */
 	if (!name || !strlen(name)) goto out;
 
-	if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
+	if (xlate_proc_loc_name(name, parent, &fn) != 0)
 		goto out;
 
 	/* At this point there must not be any '/' characters beyond *fn */
 	if (strchr(fn, '/'))
-		goto out;
+		goto out_put;
 
 	len = strlen(fn);
 
 	aux = kzalloc(sizeof(struct proc_dir_entry_aux) + len + 1, GFP_KERNEL);
 	if (!aux)
-		goto out;
+		goto out_put;
 
 	ent = &aux->pde;
 
@@ -584,8 +758,13 @@ static struct proc_dir_entry *__proc_cre
         aux->pde_users = 0;
 	spin_lock_init(&aux->pde_unload_lock);
 	aux->pde_unload_completion = NULL;
- out:
+	atomic_set(&ent->count, 1);
 	return ent;
+
+out_put:
+	de_put(*parent);
+out:
+	return NULL;
 }
 
 struct proc_dir_entry *proc_symlink(const char *name,
@@ -611,6 +790,7 @@ struct proc_dir_entry *proc_symlink(cons
 			kfree(pdeaux);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
@@ -631,6 +811,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
 		        kfree(pdeaux);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
@@ -672,9 +853,28 @@ struct proc_dir_entry *create_proc_entry
 		        kfree(pdeaux);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
+EXPORT_SYMBOL(remove_proc_glob_entry);
+
+struct proc_dir_entry *create_proc_glob_entry(const char *name, mode_t mode,
+		struct proc_dir_entry *parent)
+{
+	const char *path;
+	struct proc_dir_entry *ent;
+
+	path = name;
+	if (xlate_proc_name(path, &parent, &name) != 0)
+		return NULL;
+
+	ent = create_proc_entry(name, mode, parent);
+	de_put(parent);
+	return ent;
+}
+
+EXPORT_SYMBOL(create_proc_glob_entry);
 
 struct proc_dir_entry *proc_create(const char *name, mode_t mode,
 				   struct proc_dir_entry *parent,
@@ -732,7 +932,7 @@ void free_proc_entry(struct proc_dir_ent
  * Remove a /proc entry and free it if it's not currently in use.
  * If it is in use, we set the 'deleted' flag.
  */
-void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+static void __remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 {
 	struct proc_dir_entry **p;
 	struct proc_dir_entry *de;
@@ -740,8 +940,6 @@ void remove_proc_entry(const char *name,
 	const char *fn = name;
 	int len;
 
-	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
-		goto out;
 	len = strlen(fn);
 
 	spin_lock(&proc_subdir_lock);
@@ -780,16 +978,43 @@ continue_removing:
 			parent->nlink--;
 		de->nlink = 0;
 		WARN_ON(de->subdir);
-		if (!atomic_read(&de->count))
-			free_proc_entry(de);
-		else {
-			de->deleted = 1;
-			printk("remove_proc_entry: %s/%s busy, count=%d\n",
-				parent->name, de->name, atomic_read(&de->count));
-		}
+		de->deleted = 1;
+		de_put(parent);
+		de_put(de);
 		break;
 	}
 	spin_unlock(&proc_subdir_lock);
-out:
-	return;
+}
+
+void remove_proc_loc_entry(const char *name, struct proc_dir_entry *parent)
+{
+	const char *path;
+
+	path = name;
+	if (xlate_proc_loc_name(path, &parent, &name) != 0)
+		return;
+
+	__remove_proc_entry(name, parent);
+	de_put(parent);
+}
+
+void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent)
+{
+	const char *path;
+
+	path = name;
+	if (xlate_proc_name(path, &parent, &name) != 0)
+		return;
+
+	__remove_proc_entry(name, parent);
+	de_put(parent);
+}
+
+void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+{
+	remove_proc_loc_entry(name, parent);
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		remove_proc_glob_entry(name, parent);
+#endif
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/proc/inode.c kernel-2.6.18-417.el5-028stab121/fs/proc/inode.c
--- kernel-2.6.18-417.el5.orig/fs/proc/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/proc/inode.c	2017-01-13 08:40:30.000000000 -0500
@@ -39,35 +39,27 @@ static match_table_t tokens = {
 	{Opt_err, NULL},
 };
 
-static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
-{
-	if (de)
-		atomic_inc(&de->count);
-	return de;
-}
-
 /*
  * Decrements the use count and checks for deferred deletion.
  */
-static void de_put(struct proc_dir_entry *de)
+void de_put(struct proc_dir_entry *de)
 {
-	if (de) {	
-		lock_kernel();		
-		if (!atomic_read(&de->count)) {
-			printk("de_put: entry %s already free!\n", de->name);
-			unlock_kernel();
-			return;
-		}
+	if (de) {
+		if (unlikely(!atomic_read(&de->count)))
+			goto out_bad;
 
 		if (atomic_dec_and_test(&de->count)) {
-			if (de->deleted) {
-				printk("de_put: deferred delete of %s\n",
-					de->name);
-				free_proc_entry(de);
-			}
-		}		
-		unlock_kernel();
+			if (unlikely(!de->deleted))
+				goto out_bad;
+
+			free_proc_entry(de);
+		}
 	}
+	return;
+
+out_bad:
+	printk("de_put: bad dentry %s count:%d deleted:%d\n",
+			de->name, atomic_read(&de->count), de->deleted);
 }
 
 /*
@@ -83,12 +75,19 @@ static void proc_delete_inode(struct ino
 	put_pid(PROC_I(inode)->pid);
 
 	/* Let go of any associated proc directory entry */
-	de = PROC_I(inode)->pde;
+	de = LPDE(inode);
 	if (de) {
 		if (de->owner)
 			module_put(de->owner);
 		de_put(de);
 	}
+#ifdef CONFIG_VE
+	de = GPDE(inode);
+	if (de) {
+		module_put(de->owner);
+		de_put(de);
+	}
+#endif
 	clear_inode(inode);
 }
 
@@ -115,6 +114,9 @@ static struct inode *proc_alloc_inode(st
 	ei->pde = NULL;
 	inode = &ei->vfs_inode;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+#ifdef CONFIG_VE
+	GPDE(inode) = NULL;
+#endif
 	return inode;
 }
 
@@ -507,19 +509,14 @@ struct inode *proc_get_inode(struct supe
 	 */
 	de_get(de);
 
-	WARN_ON(de && de->deleted);
-
-	if (de != NULL && !try_module_get(de->owner))
-		goto out_mod;
-
 	inode = iget(sb, ino);
 	if (!inode)
-		goto out_ino;
+		goto out_mod;
 
 	PROC_I(inode)->pde = de;
 	if (de) {
 		if (de->mode) {
-			inode->i_mode = de->mode;
+			inode->i_mode = (de->mode & ~S_ISVTX);
 			inode->i_uid = de->uid;
 			inode->i_gid = de->gid;
 		}
@@ -539,9 +536,6 @@ struct inode *proc_get_inode(struct supe
 
 	return inode;
 
-out_ino:
-	if (de != NULL)
-		module_put(de->owner);
 out_mod:
 	de_put(de);
 	return NULL;
@@ -560,7 +554,9 @@ int proc_fill_super(struct super_block *
 	s->s_magic = PROC_SUPER_MAGIC;
 	s->s_op = &proc_sops;
 	s->s_time_gran = 1;
-	
+
+	/* proc_root.owner == NULL, just a formal call */
+	__module_get(proc_root.owner);
 	root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
 	if (!root_inode)
 		goto out_no_root;
@@ -569,6 +565,12 @@ int proc_fill_super(struct super_block *
 	s->s_root = d_alloc_root(root_inode);
 	if (!s->s_root)
 		goto out_no_root;
+#ifdef CONFIG_VE
+	LPDE(root_inode) = de_get(get_exec_env()->proc_root);
+	GPDE(root_inode) = &proc_root;
+#else
+	LPDE(root_inode) = &proc_root;
+#endif
 	return 0;
 
 out_no_root:
diff -upr kernel-2.6.18-417.el5.orig/fs/proc/kcore.c kernel-2.6.18-417.el5-028stab121/fs/proc/kcore.c
--- kernel-2.6.18-417.el5.orig/fs/proc/kcore.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/proc/kcore.c	2017-01-13 08:40:16.000000000 -0500
@@ -87,8 +87,7 @@ static size_t get_kcore_size(int *nphdr,
 			sizeof(struct elf_prpsinfo) +
 			sizeof(struct task_struct);
 	*elf_buflen = PAGE_ALIGN(*elf_buflen);
-	/* Access to kcore is not allowed (except elf headers) */
-	return *elf_buflen;
+	return size + *elf_buflen;
 }
 
 
@@ -304,9 +303,6 @@ read_kcore(struct file *file, char __use
 	} else
 		read_unlock(&kclist_lock);
 
-	/* Access to kcore is not allowed (except elf headers). */
-	return acc;
-
 	/*
 	 * Check to see if our file offset matches with any of
 	 * the addresses in the elf_phdr on our list.
diff -upr kernel-2.6.18-417.el5.orig/fs/proc/kmsg.c kernel-2.6.18-417.el5-028stab121/fs/proc/kmsg.c
--- kernel-2.6.18-417.el5.orig/fs/proc/kmsg.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/proc/kmsg.c	2017-01-13 08:40:21.000000000 -0500
@@ -11,6 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/poll.h>
 #include <linux/fs.h>
+#include <linux/veprintk.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -40,7 +42,7 @@ static ssize_t kmsg_read(struct file *fi
 
 static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 {
-	poll_wait(file, &log_wait, wait);
+	poll_wait(file, &ve_log_wait, wait);
 	if (do_syslog(9, NULL, 0))
 		return POLLIN | POLLRDNORM;
 	return 0;
@@ -53,3 +55,4 @@ const struct file_operations proc_kmsg_o
 	.open		= kmsg_open,
 	.release	= kmsg_release,
 };
+EXPORT_SYMBOL(proc_kmsg_operations);
diff -upr kernel-2.6.18-417.el5.orig/fs/proc/proc_misc.c kernel-2.6.18-417.el5-028stab121/fs/proc/proc_misc.c
--- kernel-2.6.18-417.el5.orig/fs/proc/proc_misc.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/proc/proc_misc.c	2017-01-13 08:40:28.000000000 -0500
@@ -31,6 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
+#include <linux/virtinfo.h>
 #include <linux/smp.h>
 #include <linux/signal.h>
 #include <linux/module.h>
@@ -44,7 +45,11 @@
 #include <linux/jiffies.h>
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/compile.h>
 #include <linux/crash_dump.h>
+#include <linux/vmstat.h>
+#include <linux/vsched.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -52,8 +57,10 @@
 #include <asm/div64.h>
 #include "internal.h"
 
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+#ifdef CONFIG_FAIRSCHED
+#include <linux/fairsched.h>
+#endif
+
 /*
  * Warning: stuff below (imported functions) assumes that its output will fit
  * into one page. For some of those functions it may be wrong. Moreover, we
@@ -82,15 +89,33 @@ static int loadavg_read_proc(char *page,
 {
 	int a, b, c;
 	int len;
-
-	a = avenrun[0] + (FIXED_1/200);
-	b = avenrun[1] + (FIXED_1/200);
-	c = avenrun[2] + (FIXED_1/200);
+	unsigned long __nr_running;
+	int __nr_threads;
+	unsigned long *__avenrun;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+
+	if (ve_is_super(ve)) {
+		__avenrun = &avenrun[0];
+		__nr_running = nr_running();
+		__nr_threads = nr_threads;
+	} 
+#ifdef CONFIG_VE
+	else {
+		__avenrun = &ve->avenrun[0];
+		__nr_running = nr_running_vsched(this_vsched());
+		__nr_threads = atomic_read(&ve->pcounter);
+	}
+#endif
+	a = __avenrun[0] + (FIXED_1/200);
+	b = __avenrun[1] + (FIXED_1/200);
+	c = __avenrun[2] + (FIXED_1/200);
 	len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
 		LOAD_INT(a), LOAD_FRAC(a),
 		LOAD_INT(b), LOAD_FRAC(b),
 		LOAD_INT(c), LOAD_FRAC(c),
-		nr_running(), nr_threads, last_pid);
+		__nr_running, __nr_threads, last_pid);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
@@ -103,6 +128,13 @@ static int uptime_read_proc(char *page, 
 	cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
 
 	do_posix_clock_monotonic_gettime(&uptime);
+#ifdef CONFIG_VE
+	if (!ve_is_super(get_exec_env())) {
+		set_normalized_timespec(&uptime,
+		      uptime.tv_sec - get_exec_env()->start_timespec.tv_sec,
+		      uptime.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
+	}
+#endif
 	cputime_to_timespec(idletime, &idle);
 	len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
 			(unsigned long) uptime.tv_sec,
@@ -113,37 +145,51 @@ static int uptime_read_proc(char *page, 
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
-static int meminfo_read_proc(char *page, char **start, off_t off,
-				 int count, int *eof, void *data)
+int meminfo_read_proc_ub(struct user_beancounter *ub, char *page)
 {
-	struct sysinfo i;
+	struct meminfo mi;
 	int len;
-	unsigned long inactive;
-	unsigned long active;
-	unsigned long free;
-	unsigned long committed;
-	unsigned long allowed;
+	unsigned long dummy;
 	struct vmalloc_info vmi;
-	long cached;
 
-	get_zone_counts(&active, &inactive, &free);
+	get_zone_counts(&mi.active, &mi.inactive, &dummy);
 
 /*
  * display in kilobytes.
  */
 #define K(x) ((x) << (PAGE_SHIFT - 10))
-	si_meminfo(&i);
-	si_swapinfo(&i);
-	committed = atomic_read(&vm_committed_space);
-	allowed = ((totalram_pages - hugetlb_total_pages())
+	si_meminfo(&mi.si);
+	si_swapinfo(&mi.si);
+	mi.committed_space = atomic_read(&vm_committed_space);
+	mi.swapcache = total_swapcache_pages;
+	mi.allowed = ((totalram_pages - hugetlb_total_pages())
 		* sysctl_overcommit_ratio / 100) + total_swap_pages;
 
-	cached = global_page_state(NR_FILE_PAGES) -
-			total_swapcache_pages - i.bufferram;
-	if (cached < 0)
-		cached = 0;
+	mi.cache = global_page_state(NR_FILE_PAGES) -
+			total_swapcache_pages - mi.si.bufferram;
+	if (mi.cache < 0)
+		mi.cache = 0;
 
 	get_vmalloc_info(&vmi);
+	mi.vmalloc_used = vmi.used >> PAGE_SHIFT;
+	mi.vmalloc_largest = vmi.largest_chunk >> PAGE_SHIFT;
+	mi.vmalloc_total = VMALLOC_TOTAL >> PAGE_SHIFT;
+
+	mi.pi.nr_file_dirty = global_page_state(NR_FILE_DIRTY);
+	mi.pi.nr_writeback = global_page_state(NR_WRITEBACK);
+	mi.pi.nr_anon_pages = global_page_state(NR_ANON_PAGES);
+	mi.pi.nr_file_mapped = global_page_state(NR_FILE_MAPPED);
+	mi.pi.nr_slab = global_page_state(NR_SLAB);
+	mi.pi.nr_pagetable = global_page_state(NR_PAGETABLE);
+	mi.pi.nr_unstable_nfs = global_page_state(NR_UNSTABLE_NFS);
+	mi.pi.nr_bounce = global_page_state(NR_BOUNCE);
+
+	mi.ub = ub;
+#ifdef CONFIG_USER_RESOURCE
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
+			& NOTIFY_FAIL)
+		return -ENOMSG;
+#endif
 
 	/*
 	 * Tagged format, for easy grepping and expansion.
@@ -175,40 +221,52 @@ static int meminfo_read_proc(char *page,
 		"VmallocTotal: %8lu kB\n"
 		"VmallocUsed:  %8lu kB\n"
 		"VmallocChunk: %8lu kB\n",
-		K(i.totalram),
-		K(i.freeram),
-		K(i.bufferram),
-		K(cached),
-		K(total_swapcache_pages),
-		K(active),
-		K(inactive),
-		K(i.totalhigh),
-		K(i.freehigh),
-		K(i.totalram-i.totalhigh),
-		K(i.freeram-i.freehigh),
-		K(i.totalswap),
-		K(i.freeswap),
-		K(global_page_state(NR_FILE_DIRTY)),
-		K(global_page_state(NR_WRITEBACK)),
-		K(global_page_state(NR_ANON_PAGES)),
-		K(global_page_state(NR_FILE_MAPPED)),
-		K(global_page_state(NR_SLAB)),
-		K(global_page_state(NR_PAGETABLE)),
-		K(global_page_state(NR_UNSTABLE_NFS)),
-		K(global_page_state(NR_BOUNCE)),
-		K(allowed),
-		K(committed),
-		(unsigned long)VMALLOC_TOTAL >> 10,
-		vmi.used >> 10,
-		vmi.largest_chunk >> 10
+		K(mi.si.totalram),
+		K(mi.si.freeram),
+		K(mi.si.bufferram),
+		K(mi.cache),
+		K(mi.swapcache),
+		K(mi.active),
+		K(mi.inactive),
+		K(mi.si.totalhigh),
+		K(mi.si.freehigh),
+		K(mi.si.totalram - mi.si.totalhigh),
+		K(mi.si.freeram - mi.si.freehigh),
+		K(mi.si.totalswap),
+		K(mi.si.freeswap),
+		K(mi.pi.nr_file_dirty),
+		K(mi.pi.nr_writeback),
+		K(mi.pi.nr_anon_pages),
+		K(mi.pi.nr_file_mapped),
+		K(mi.pi.nr_slab),
+		K(mi.pi.nr_pagetable),
+		K(mi.pi.nr_unstable_nfs),
+		K(mi.pi.nr_bounce),
+		K(mi.allowed),
+		K(mi.committed_space),
+		K(mi.vmalloc_total),
+		K(mi.vmalloc_used),
+		K(mi.vmalloc_largest)
 		);
 
 		len += hugetlb_report_meminfo(page + len);
 
-	return proc_calc_metrics(page, start, off, count, eof, len);
+	return len;
 #undef K
 }
 
+static int meminfo_read_proc(char *page, char **start, off_t off,
+		int count, int *eof, void *data)
+{
+	int err;
+
+	err = meminfo_read_proc_ub(NULL, page);
+	if (err < 0)
+		return err;
+
+	return proc_calc_metrics(page, start, off, count, eof, err);
+}
+
 extern struct seq_operations fragmentation_op;
 static int fragmentation_open(struct inode *inode, struct file *file)
 {
@@ -240,8 +298,17 @@ static int version_read_proc(char *page,
 				 int count, int *eof, void *data)
 {
 	int len;
+	struct new_utsname *utsname;
 
-	strcpy(page, linux_banner);
+	if (ve_is_super(get_exec_env()))
+		strcpy(page, linux_banner);
+	else {
+		utsname = &current->nsproxy->uts_ns->name;
+		sprintf(page, "Linux version %s ("
+		      LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") ("
+		      LINUX_COMPILER ") %s\n",
+		      utsname->release, utsname->version);
+	}
 	len = strlen(page);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
@@ -278,6 +345,9 @@ static int devinfo_show(struct seq_file 
 
 static void *devinfo_start(struct seq_file *f, loff_t *pos)
 {
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
+
 	if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
 		return pos;
 	return NULL;
@@ -433,18 +503,14 @@ static struct file_operations proc_slabs
 #endif
 #endif
 
-static int show_stat(struct seq_file *p, void *v)
+static void show_stat_ve0(struct seq_file *p)
 {
 	int i;
-	unsigned long jif;
 	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
 	u64 sum = 0;
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
-	jif = - wall_to_monotonic.tv_sec;
-	if (wall_to_monotonic.tv_nsec)
-		--jif;
 
 	for_each_possible_cpu(i) {
 		int j;
@@ -498,9 +564,91 @@ static int show_stat(struct seq_file *p,
 	for (i = 0; i < NR_IRQS; i++)
 		seq_printf(p, " %u", kstat_irqs(i));
 #endif
+#ifdef CONFIG_VM_EVENT_COUNTERS
+	seq_printf(p, "\nswap %lu %lu\n",
+			vm_events(PSWPIN), vm_events(PSWPOUT));
+#else
+	seq_printf(p, "\nswap 0 0\n");
+#endif
+}
+
+#ifdef CONFIG_VE
+static void show_stat_ve(struct seq_file *p, struct ve_struct *env)
+{
+	int i;
+	u64 user, nice, system;
+	cycles_t idle, iowait;
+	cpumask_t ve_cpus;
+
+	ve_cpu_online_map(env, &ve_cpus);
+
+	user = nice = system = idle = iowait = 0;
+	for_each_cpu_mask(i, ve_cpus) {
+		user += VE_CPU_STATS(env, i)->user;
+		nice += VE_CPU_STATS(env, i)->nice;
+		system += VE_CPU_STATS(env, i)->system;
+
+		idle += ve_sched_get_idle_time(i);
+		iowait += ve_sched_get_iowait_time(i);
+	}
+
+	seq_printf(p, "cpu  %llu %llu %llu %llu %llu 0 0 0\n",
+		(unsigned long long)cputime64_to_clock_t(user),
+		(unsigned long long)cputime64_to_clock_t(nice),
+		(unsigned long long)cputime64_to_clock_t(system),
+		(unsigned long long)cycles_to_clocks(idle),
+		(unsigned long long)cycles_to_clocks(iowait));
+
+	for_each_cpu_mask(i, ve_cpus) {
+		user = VE_CPU_STATS(env, i)->user;
+		nice = VE_CPU_STATS(env, i)->nice;
+		system = VE_CPU_STATS(env, i)->system;
+
+		idle = ve_sched_get_idle_time(i);
+		iowait = ve_sched_get_iowait_time(i);
+		seq_printf(p, "cpu%d %llu %llu %llu %llu %llu 0 0 0\n",
+			i,
+			(unsigned long long)cputime64_to_clock_t(user),
+			(unsigned long long)cputime64_to_clock_t(nice),
+			(unsigned long long)cputime64_to_clock_t(system),
+			(unsigned long long)cycles_to_clocks(idle),
+			(unsigned long long)cycles_to_clocks(iowait));
+	}
+	seq_printf(p, "intr 0\nswap 0 0\n");
+}
+#endif
+
+int show_stat(struct seq_file *p, void *v)
+{
+	extern unsigned long total_forks;
+	unsigned long seq, jif;
+	struct ve_struct *env;
+	unsigned long __nr_running, __nr_iowait;
+ 
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		jif = - wall_to_monotonic.tv_sec;
+		if (wall_to_monotonic.tv_nsec)
+			--jif;
+	} while (read_seqretry(&xtime_lock, seq));
+
+	env = get_exec_env();
+	if (ve_is_super(env)) {
+		show_stat_ve0(p);
+		__nr_running = nr_running();
+		__nr_iowait = nr_iowait();
+	}
+#ifdef CONFIG_VE
+	else {
+		show_stat_ve(p, env);
+		__nr_running = nr_running_vsched(this_vsched());
+		__nr_iowait = nr_iowait_ve();
+		jif += env->start_timespec.tv_sec;
+	}
+#endif
 
 	seq_printf(p,
-		"\nctxt %llu\n"
+		"ctxt %llu\n"
 		"btime %lu\n"
 		"processes %lu\n"
 		"procs_running %lu\n"
@@ -508,8 +656,8 @@ static int show_stat(struct seq_file *p,
 		nr_context_switches(),
 		(unsigned long)jif,
 		total_forks,
-		nr_running(),
-		nr_iowait());
+		__nr_running,
+		__nr_iowait);
 
 	return 0;
 }
@@ -586,6 +734,36 @@ static struct file_operations proc_inter
 	.release	= seq_release,
 };
 
+static int show_softirqs(struct seq_file *p, void *v)
+{
+	int irq, cpu;
+
+	seq_printf(p, "                ");
+	for_each_possible_cpu(cpu)
+		seq_printf(p, "CPU%-8d", cpu);
+	seq_printf(p, "\n");
+
+	for (irq = 0; irq < NR_SOFTIRQS; irq++) {
+		seq_printf(p, "%8s:", softirq_to_name[irq]);
+		for_each_possible_cpu(cpu)
+			seq_printf(p, " %10u", kstat_softirqs_cpu(irq, cpu));
+		seq_printf(p, "\n");
+	}
+	return 0;
+}
+
+static int softirqs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_softirqs, NULL);
+}
+
+static const struct file_operations proc_softirqs_operations = {
+	.open		= softirqs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int filesystems_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
@@ -598,7 +776,8 @@ static int cmdline_read_proc(char *page,
 {
 	int len;
 
-	len = sprintf(page, "%s\n", saved_command_line);
+	len = sprintf(page, "%s\n",
+		ve_is_super(get_exec_env()) ? saved_command_line : "quiet");
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
@@ -640,12 +819,28 @@ static int ptcache_read_proc(char *page,
 static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
 				   size_t count, loff_t *ppos)
 {
+	struct ve_struct *cur = get_exec_env();
+	static int pnum = 10;
+
 	if (count) {
-		char c;
+		int i, cnt;
+		char c[32];
 
-		if (get_user(c, buf))
+		cnt = min(count, sizeof(c));
+		if (copy_from_user(c, buf, cnt))
 			return -EFAULT;
-		__handle_sysrq(c, NULL, NULL, 0);
+
+		for (i = 0; i < cnt && c[i] != '\n'; i++) {
+			if (!ve_is_super(cur)) {
+				if (!pnum)
+					continue;
+				printk("SysRq: CT#%u sent '%c' magic key.\n",
+					cur->veid, c[i]);
+				pnum--;
+				continue;
+			}
+			__handle_sysrq(c[i], NULL, NULL, 0);
+		}
 	}
 	return count;
 }
@@ -705,6 +900,7 @@ void __init proc_misc_init(void)
 	create_seq_entry("partitions", 0, &proc_partitions_operations);
 	create_seq_entry("stat", 0, &proc_stat_operations);
 	create_seq_entry("interrupts", 0, &proc_interrupts_operations);
+	create_seq_entry("softirqs", 0, &proc_softirqs_operations);
 #ifdef CONFIG_SLAB
 	create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
 #ifdef CONFIG_DEBUG_SLAB_LEAK
diff -upr kernel-2.6.18-417.el5.orig/fs/proc/proc_tty.c kernel-2.6.18-417.el5-028stab121/fs/proc/proc_tty.c
--- kernel-2.6.18-417.el5.orig/fs/proc/proc_tty.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/proc/proc_tty.c	2017-01-13 08:40:20.000000000 -0500
@@ -106,24 +106,35 @@ static int show_tty_driver(struct seq_fi
 /* iterator */
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-	struct list_head *p;
+	struct tty_driver *drv;
+
 	loff_t l = *pos;
-	list_for_each(p, &tty_drivers)
+	read_lock(&tty_driver_guard);
+	list_for_each_entry(drv, &tty_drivers, tty_drivers) {
+		if (!ve_accessible_strict(drv->owner_env, get_exec_env()))
+			continue;
 		if (!l--)
-			return list_entry(p, struct tty_driver, tty_drivers);
+			return drv;
+	}
 	return NULL;
 }
 
 static void *t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
+	struct tty_driver *drv;
+
 	(*pos)++;
-	return p==&tty_drivers ? NULL :
-			list_entry(p, struct tty_driver, tty_drivers);
+	drv = (struct tty_driver *)v;
+	list_for_each_entry_continue(drv, &tty_drivers, tty_drivers) {
+		if (ve_accessible_strict(drv->owner_env, get_exec_env()))
+			return drv;
+	}
+	return NULL;
 }
 
 static void t_stop(struct seq_file *m, void *v)
 {
+	read_unlock(&tty_driver_guard);
 }
 
 static struct seq_operations tty_drivers_op = {
diff -upr kernel-2.6.18-417.el5.orig/fs/proc/root.c kernel-2.6.18-417.el5-028stab121/fs/proc/root.c
--- kernel-2.6.18-417.el5.orig/fs/proc/root.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/proc/root.c	2017-01-13 08:40:20.000000000 -0500
@@ -19,7 +19,10 @@
 
 #include "internal.h"
 
-struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
+#ifndef CONFIG_VE
+struct proc_dir_entry *proc_net, *proc_net_stat;
+#endif
+struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
 
 #ifdef CONFIG_SYSCTL
 struct proc_dir_entry *proc_sys_root;
@@ -31,12 +34,14 @@ static int proc_get_sb(struct file_syste
 	return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
 }
 
-static struct file_system_type proc_fs_type = {
+struct file_system_type proc_fs_type = {
 	.name		= "proc",
 	.get_sb		= proc_get_sb,
 	.kill_sb	= kill_anon_super,
 };
 
+EXPORT_SYMBOL(proc_fs_type);
+
 void __init proc_root_init(void)
 {
 	int err = proc_init_inodecache();
@@ -82,8 +87,19 @@ void __init proc_root_init(void)
 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
 )
 {
+	struct ve_struct *ve = get_exec_env();
+
 	generic_fillattr(dentry->d_inode, stat);
-	stat->nlink = proc_root.nlink + nr_processes();
+	stat->nlink = proc_root.nlink;
+	if (ve_is_super(ve))
+		stat->nlink += nr_processes();
+#ifdef CONFIG_VE
+	else
+		/* not really processes count, it's not right, but it's ok */
+		stat->nlink += atomic_read(&ve->pcounter);
+	/* the same logic as in the proc_getattr */
+	stat->nlink += ve->proc_root->nlink - 2;
+#endif
 	return 0;
 }
 
@@ -175,7 +191,9 @@ EXPORT_SYMBOL(proc_create);
 EXPORT_SYMBOL(remove_proc_entry);
 EXPORT_SYMBOL(proc_root);
 EXPORT_SYMBOL(proc_root_fs);
+#ifndef CONFIG_VE
 EXPORT_SYMBOL(proc_net);
 EXPORT_SYMBOL(proc_net_stat);
+#endif
 EXPORT_SYMBOL(proc_bus);
 EXPORT_SYMBOL(proc_root_driver);
diff -upr kernel-2.6.18-417.el5.orig/fs/proc/task_mmu.c kernel-2.6.18-417.el5-028stab121/fs/proc/task_mmu.c
--- kernel-2.6.18-417.el5.orig/fs/proc/task_mmu.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/proc/task_mmu.c	2017-01-13 08:40:19.000000000 -0500
@@ -104,9 +104,12 @@ int proc_exe_link(struct inode *inode, s
 	}
 
 	if (vma) {
-		*mnt = mntget(vma->vm_file->f_vfsmnt);
-		*dentry = dget(vma->vm_file->f_dentry);
-		result = 0;
+		result = d_root_check(vma->vm_file->f_dentry,
+				vma->vm_file->f_vfsmnt);
+		if (!result) {
+			*mnt = mntget(vma->vm_file->f_vfsmnt);
+			*dentry = dget(vma->vm_file->f_dentry);
+		}
 	}
 
 	up_read(&mm->mmap_sem);
diff -upr kernel-2.6.18-417.el5.orig/fs/proc/task_nommu.c kernel-2.6.18-417.el5-028stab121/fs/proc/task_nommu.c
--- kernel-2.6.18-417.el5.orig/fs/proc/task_nommu.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/proc/task_nommu.c	2017-01-13 08:40:19.000000000 -0500
@@ -126,9 +126,12 @@ int proc_exe_link(struct inode *inode, s
 	}
 
 	if (vma) {
-		*mnt = mntget(vma->vm_file->f_vfsmnt);
-		*dentry = dget(vma->vm_file->f_dentry);
-		result = 0;
+		result = d_root_check(vma->vm_file->f_dentry,
+				vma->vm_file->f_vfsmnt);
+		if (!result) {
+			*mnt = mntget(vma->vm_file->f_vfsmnt);
+			*dentry = dget(vma->vm_file->f_dentry);
+		}
 	}
 
 	up_read(&mm->mmap_sem);
diff -upr kernel-2.6.18-417.el5.orig/fs/quota.c kernel-2.6.18-417.el5-028stab121/fs/quota.c
--- kernel-2.6.18-417.el5.orig/fs/quota.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/quota.c	2017-01-13 08:40:24.000000000 -0500
@@ -18,6 +18,10 @@
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 
+#ifdef CONFIG_QUOTA_COMPAT
+#include <linux/quota-compat.h>
+#endif
+
 /* Check validity of generic quotactl commands */
 static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id)
 {
@@ -81,11 +85,11 @@ static int generic_quotactl_valid(struct
 	if (cmd == Q_GETQUOTA) {
 		if (((type == USRQUOTA && current->euid != id) ||
 		     (type == GRPQUOTA && !in_egroup_p(id))) &&
-		    !capable(CAP_SYS_ADMIN))
+		    !capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 	}
 	else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO)
-		if (!capable(CAP_SYS_ADMIN))
+		if (!capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 
 	return 0;
@@ -132,10 +136,10 @@ static int xqm_quotactl_valid(struct sup
 	if (cmd == Q_XGETQUOTA) {
 		if (((type == XQM_USRQUOTA && current->euid != id) ||
 		     (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
-		     !capable(CAP_SYS_ADMIN))
+		     !capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 	} else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
-		if (!capable(CAP_SYS_ADMIN))
+		if (!capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 	}
 
@@ -180,7 +184,8 @@ static void quota_sync_sb(struct super_b
 			continue;
 		if (!sb_has_quota_enabled(sb, cnt))
 			continue;
-		discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]);
+		if (sb_dqopt(sb)->files[cnt])
+			discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]);
 	}
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -216,7 +221,7 @@ restart:
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
-		if (sb->s_root && sb->s_qcop->quota_sync)
+		if (sb->s_root && sb->s_qcop && sb->s_qcop->quota_sync)
 			quota_sync_sb(sb, type);
 		up_read(&sb->s_umount);
 		spin_lock(&sb_lock);
@@ -337,6 +342,208 @@ static int do_quotactl(struct super_bloc
 	return 0;
 }
 
+static struct super_block *quota_get_sb(const char __user *special)
+{
+	struct super_block *sb;
+	struct block_device *bdev;
+	char *tmp;
+
+	tmp = getname(special);
+	if (IS_ERR(tmp))
+		return (struct super_block *)tmp;
+	bdev = lookup_bdev(tmp, FMODE_QUOTACTL);
+	putname(tmp);
+	if (IS_ERR(bdev))
+		return (struct super_block *)bdev;
+	sb = get_super(bdev);
+	bdput(bdev);
+	if (!sb)
+		return ERR_PTR(-ENODEV);
+	return sb;
+}
+
+#ifdef CONFIG_QUOTA_COMPAT
+
+struct compat_dqinfo {
+	unsigned int dqi_bgrace;
+	unsigned int dqi_igrace;
+	unsigned int dqi_flags;
+	unsigned int dqi_blocks;
+	unsigned int dqi_free_blk;
+	unsigned int dqi_free_entry;
+};
+
+struct compat_dqstats {
+	__u32 lookups;
+	__u32 drops;
+	__u32 reads;
+	__u32 writes;
+	__u32 cache_hits;
+	__u32 allocated_dquots;
+	__u32 free_dquots;
+	__u32 syncs;
+	__u32 version;
+};
+
+asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr);
+static long compat_quotactl(unsigned int cmds, unsigned int type,
+		const char __user *special, qid_t id,
+		void __user *addr)
+{
+	struct super_block *sb;
+	long ret;
+
+	sb = NULL;
+	switch (cmds) {
+		case QC_QUOTAON:
+			return sys_quotactl(QCMD(Q_QUOTAON, type),
+					special, id, addr);
+
+		case QC_QUOTAOFF:
+			return sys_quotactl(QCMD(Q_QUOTAOFF, type),
+					special, id, addr);
+
+		case QC_SYNC:
+			return sys_quotactl(QCMD(Q_SYNC, type),
+					special, id, addr);
+
+		case QC_GETQUOTA: {
+			struct if_dqblk idq;
+			struct compat_v2_dqblk cdq;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
+			if (ret)
+				break;
+			ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
+			if (ret)
+				break;
+			cdq.dqb_ihardlimit = idq.dqb_ihardlimit;
+			cdq.dqb_isoftlimit = idq.dqb_isoftlimit;
+			cdq.dqb_curinodes = idq.dqb_curinodes;
+			cdq.dqb_bhardlimit = idq.dqb_bhardlimit;
+			cdq.dqb_bsoftlimit = idq.dqb_bsoftlimit;
+			cdq.dqb_curspace = idq.dqb_curspace;
+			cdq.dqb_btime = idq.dqb_btime;
+			cdq.dqb_itime = idq.dqb_itime;
+			ret = 0;
+			if (copy_to_user(addr, &cdq, sizeof(cdq)))
+				ret = -EFAULT;
+			break;
+		}
+
+		case QC_SETQUOTA:
+		case QC_SETUSE:
+		case QC_SETQLIM: {
+			struct if_dqblk idq;
+			struct compat_v2_dqblk cdq;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_SETQUOTA, id);
+			if (ret)
+				break;
+			ret = -EFAULT;
+			if (copy_from_user(&cdq, addr, sizeof(cdq)))
+				break;
+			idq.dqb_ihardlimit = cdq.dqb_ihardlimit;
+			idq.dqb_isoftlimit = cdq.dqb_isoftlimit;
+			idq.dqb_curinodes = cdq.dqb_curinodes;
+			idq.dqb_bhardlimit = cdq.dqb_bhardlimit;
+			idq.dqb_bsoftlimit = cdq.dqb_bsoftlimit;
+			idq.dqb_curspace = cdq.dqb_curspace;
+			idq.dqb_valid = 0;
+			if (cmds == QC_SETQUOTA || cmds == QC_SETQLIM)
+				idq.dqb_valid |= QIF_LIMITS;
+			if (cmds == QC_SETQUOTA || cmds == QC_SETUSE)
+				idq.dqb_valid |= QIF_USAGE;
+			ret = sb->s_qcop->set_dqblk(sb, type, id, &idq);
+			break;
+		}
+
+		case QC_GETINFO: {
+			struct if_dqinfo iinf;
+			struct compat_dqinfo cinf;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
+			if (ret)
+				break;
+			ret = sb->s_qcop->get_info(sb, type, &iinf);
+			if (ret)
+				break;
+			cinf.dqi_bgrace = iinf.dqi_bgrace;
+			cinf.dqi_igrace = iinf.dqi_igrace;
+			cinf.dqi_flags = 0;
+			if (iinf.dqi_flags & DQF_INFO_DIRTY)
+				cinf.dqi_flags |= 0x0010;
+			cinf.dqi_blocks = 0;
+			cinf.dqi_free_blk = 0;
+			cinf.dqi_free_entry = 0;
+			ret = 0;
+			if (copy_to_user(addr, &cinf, sizeof(cinf)))
+				ret = -EFAULT;
+			break;
+		}
+
+		case QC_SETINFO:
+		case QC_SETGRACE:
+		case QC_SETFLAGS: {
+			struct if_dqinfo iinf;
+			struct compat_dqinfo cinf;
+
+			sb = quota_get_sb(special);
+			ret = PTR_ERR(sb);
+			if (IS_ERR(sb))
+				break;
+			ret = check_quotactl_valid(sb, type, Q_SETINFO, id);
+			if (ret)
+				break;
+			ret = -EFAULT;
+			if (copy_from_user(&cinf, addr, sizeof(cinf)))
+				break;
+			iinf.dqi_bgrace = cinf.dqi_bgrace;
+			iinf.dqi_igrace = cinf.dqi_igrace;
+			iinf.dqi_flags = cinf.dqi_flags;
+			iinf.dqi_valid = 0;
+			if (cmds == QC_SETINFO || cmds == QC_SETGRACE)
+				iinf.dqi_valid |= IIF_BGRACE | IIF_IGRACE;
+			if (cmds == QC_SETINFO || cmds == QC_SETFLAGS)
+				iinf.dqi_valid |= IIF_FLAGS;
+			ret = sb->s_qcop->set_info(sb, type, &iinf);
+			break;
+		}
+
+		case QC_GETSTATS: {
+			struct compat_dqstats stat;
+
+			memset(&stat, 0, sizeof(stat));
+			stat.version = 6*10000+5*100+0;
+			ret = 0;
+			if (copy_to_user(addr, &stat, sizeof(stat)))
+				ret = -EFAULT;
+			break;
+		}
+
+		default:
+			ret = -ENOSYS;
+			break;
+	}
+	if (sb && !IS_ERR(sb))
+		drop_super(sb);
+	return ret;
+}
+
+#endif
+
 /*
  * This is the system call interface. This communicates with
  * the user-level programs. Currently this only supports diskquota
@@ -347,25 +554,20 @@ asmlinkage long sys_quotactl(unsigned in
 {
 	uint cmds, type;
 	struct super_block *sb = NULL;
-	struct block_device *bdev;
-	char *tmp;
 	int ret;
 
 	cmds = cmd >> SUBCMDSHIFT;
 	type = cmd & SUBCMDMASK;
 
+#ifdef CONFIG_QUOTA_COMPAT
+	if (cmds >= 0x0100 && cmds < 0x3000)
+		return compat_quotactl(cmds, type, special, id, addr);
+#endif
+
 	if (cmds != Q_SYNC || special) {
-		tmp = getname(special);
-		if (IS_ERR(tmp))
-			return PTR_ERR(tmp);
-		bdev = lookup_bdev(tmp);
-		putname(tmp);
-		if (IS_ERR(bdev))
-			return PTR_ERR(bdev);
-		sb = get_super(bdev);
-		bdput(bdev);
-		if (!sb)
-			return -ENODEV;
+		sb = quota_get_sb(special);
+		if (IS_ERR(sb))
+			return PTR_ERR(sb);
 	}
 
 	ret = check_quotactl_valid(sb, type, cmds, id);
diff -upr kernel-2.6.18-417.el5.orig/fs/read_write.c kernel-2.6.18-417.el5-028stab121/fs/read_write.c
--- kernel-2.6.18-417.el5.orig/fs/read_write.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/read_write.c	2017-01-13 08:40:40.000000000 -0500
@@ -19,6 +19,8 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
+#include <ub/beancounter.h>
+
 const struct file_operations generic_ro_fops = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
@@ -263,14 +265,20 @@ static void wait_on_retry_sync_kiocb(str
 
 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 {
+	struct iovec iov = { .iov_base = buf, .iov_len = len };
 	struct kiocb kiocb;
 	ssize_t ret;
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	while (-EIOCBRETRY ==
-		(ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
+	kiocb.ki_left = len;
+
+	for (;;) {
+		ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
+		if (ret != -EIOCBRETRY)
+			break;
 		wait_on_retry_sync_kiocb(&kiocb);
+	}
 
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
@@ -315,14 +323,20 @@ EXPORT_SYMBOL(vfs_read);
 
 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 {
+	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
 	struct kiocb kiocb;
 	ssize_t ret;
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	while (-EIOCBRETRY ==
-	       (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
+	kiocb.ki_left = len;
+
+	for (;;) {
+		ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
+		if (ret != -EIOCBRETRY)
+			break;
 		wait_on_retry_sync_kiocb(&kiocb);
+	}
 
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
@@ -375,6 +389,29 @@ static inline void file_pos_write(struct
 	file->f_pos = pos;
 }
 
+static inline void bc_acct_write(ssize_t bytes)
+{
+	struct user_beancounter *ub;
+
+	if (bytes > 0) {
+		ub = get_exec_ub();
+		ub_percpu_inc(ub, write);
+		ub_percpu_add(ub, wchar, bytes);
+	}
+}
+
+static inline void bc_acct_read(ssize_t bytes)
+{
+	struct user_beancounter *ub;
+
+	if (bytes > 0) {
+		ub = get_exec_ub();
+		ub_percpu_inc(ub, read);
+		ub_percpu_add(ub, rchar, bytes);
+	}
+}
+
+
 asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
 {
 	struct file *file;
@@ -387,6 +424,8 @@ asmlinkage ssize_t sys_read(unsigned int
 		ret = vfs_read(file, buf, count, &pos);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_read(ret);
 	}
 
 	return ret;
@@ -405,6 +444,8 @@ asmlinkage ssize_t sys_write(unsigned in
 		ret = vfs_write(file, buf, count, &pos);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_write(ret);
 	}
 
 	return ret;
@@ -428,6 +469,8 @@ asmlinkage ssize_t sys_pread64(unsigned 
 		if (file->f_mode & FMODE_PREAD)
 			ret = vfs_read(file, buf, count, &pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_read(ret);
 	}
 
 	return ret;
@@ -449,6 +492,8 @@ asmlinkage ssize_t sys_pwrite64(unsigned
 		if (file->f_mode & FMODE_PWRITE)  
 			ret = vfs_write(file, buf, count, &pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_write(ret);
 	}
 
 	return ret;
@@ -479,6 +524,74 @@ EXPORT_UNUSED_SYMBOL(iov_shorten);  /*  
 /* A write operation does a read from user space and vice versa */
 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 
+ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
+			      unsigned long nr_segs, unsigned long fast_segs,
+			      struct iovec *fast_pointer,
+			      struct iovec **ret_pointer)
+  {
+	unsigned long seg;
+  	ssize_t ret;
+	struct iovec *iov = fast_pointer;
+
+  	/*
+  	 * SuS says "The readv() function *may* fail if the iovcnt argument
+  	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+  	 * traditionally returned zero for zero segments, so...
+  	 */
+	if (nr_segs == 0) {
+		ret = 0;
+  		goto out;
+	}
+
+  	/*
+  	 * First get the "struct iovec" from user memory and
+  	 * verify all the pointers
+  	 */
+	if (nr_segs > UIO_MAXIOV) {
+		ret = -EINVAL;
+  		goto out;
+	}
+	if (nr_segs > fast_segs) {
+  		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+		if (iov == NULL) {
+			ret = -ENOMEM;
+  			goto out;
+		}
+  	}
+	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
+		ret = -EFAULT;
+  		goto out;
+	}
+
+  	/*
+	 * According to the Single Unix Specification we should return EINVAL
+	 * if an element length is < 0 when cast to ssize_t or if the
+	 * total length would overflow the ssize_t return value of the
+	 * system call.
+  	 */
+	ret = 0;
+  	for (seg = 0; seg < nr_segs; seg++) {
+  		void __user *buf = iov[seg].iov_base;
+  		ssize_t len = (ssize_t)iov[seg].iov_len;
+
+		/* see if we we're about to use an invalid len or if
+		 * it's about to overflow ssize_t */
+		if (len < 0 || (ret + len < ret)) {
+			ret = -EINVAL;
+  			goto out;
+		}
+		if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
+			ret = -EFAULT;
+  			goto out;
+		}
+
+		ret += len;
+  	}
+out:
+	*ret_pointer = iov;
+	return ret;
+}
+
 static ssize_t do_readv_writev(int type, struct file *file,
 			       const struct iovec __user * uvector,
 			       unsigned long nr_segs, loff_t *pos)
@@ -490,64 +603,20 @@ static ssize_t do_readv_writev(int type,
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov=iovstack, *vector;
 	ssize_t ret;
-	int seg;
 	io_fn_t fn;
 	iov_fn_t fnv;
 
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	ret = 0;
-	if (nr_segs == 0)
+	if (!file->f_op) {
+		ret = -EINVAL;
 		goto out;
-
-	/*
-	 * First get the "struct iovec" from user memory and
-	 * verify all the pointers
-	 */
-	ret = -EINVAL;
-	if (nr_segs > UIO_MAXIOV)
-		goto out;
-	if (!file->f_op)
-		goto out;
-	if (nr_segs > UIO_FASTIOV) {
-		ret = -ENOMEM;
-		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
-		if (!iov)
-			goto out;
 	}
-	ret = -EFAULT;
-	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector)))
-		goto out;
 
-	/*
-	 * Single unix specification:
-	 * We should -EINVAL if an element length is not >= 0 and fitting an
-	 * ssize_t.  The total length is fitting an ssize_t
-	 *
-	 * Be careful here because iov_len is a size_t not an ssize_t
-	 */
-	tot_len = 0;
-	ret = -EINVAL;
-	for (seg = 0; seg < nr_segs; seg++) {
-		void __user *buf = iov[seg].iov_base;
-		ssize_t len = (ssize_t)iov[seg].iov_len;
-
-		if (len < 0)	/* size_t not fitting an ssize_t .. */
-			goto out;
-		if (unlikely(!access_ok(vrfy_dir(type), buf, len)))
-			goto Efault;
-		tot_len += len;
-		if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */
-			goto out;
-	}
-	if (tot_len == 0) {
-		ret = 0;
+	ret = rw_copy_check_uvector(type, uvector, nr_segs,
+			ARRAY_SIZE(iovstack), iovstack, &iov);
+	if (ret <= 0)
 		goto out;
-	}
 
+	tot_len = ret;
 	ret = rw_verify_area(type, file, pos, tot_len);
 	if (ret < 0)
 		goto out;
@@ -601,9 +670,6 @@ out:
 			fsnotify_modify(file->f_dentry);
 	}
 	return ret;
-Efault:
-	ret = -EFAULT;
-	goto out;
 }
 
 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
@@ -645,6 +711,8 @@ sys_readv(unsigned long fd, const struct
 		ret = vfs_readv(file, vec, vlen, &pos);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_read(ret);
 	}
 
 	if (ret > 0)
@@ -666,6 +734,8 @@ sys_writev(unsigned long fd, const struc
 		ret = vfs_writev(file, vec, vlen, &pos);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_write(ret);
 	}
 
 	if (ret > 0)
@@ -674,6 +744,58 @@ sys_writev(unsigned long fd, const struc
 	return ret;
 }
 
+static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
+{
+#define HALF_LONG_BITS (BITS_PER_LONG / 2)
+	return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
+}
+
+asmlinkage ssize_t
+sys_preadv(unsigned long fd, const struct iovec __user *vec,
+	   unsigned long vlen, unsigned long pos_l, unsigned long pos_h)
+{
+	loff_t pos = pos_from_hilo(pos_h, pos_l);
+	struct file *file;
+	ssize_t ret = -EBADF;
+	int fput_needed;
+
+	if (pos < 0)
+		return -EINVAL;
+
+	file = fget_light(fd, &fput_needed);
+	if (file) {
+		ret = -ESPIPE;
+		if (file->f_mode & FMODE_PREAD)
+			ret = vfs_readv(file, vec, vlen, &pos);
+		fput_light(file, fput_needed);
+	}
+
+	return ret;
+}
+
+asmlinkage ssize_t
+sys_pwritev(unsigned long fd, const struct iovec __user *vec,
+	   unsigned long vlen, unsigned long pos_l, unsigned long pos_h)
+{
+	loff_t pos = pos_from_hilo(pos_h, pos_l);
+	struct file *file;
+	ssize_t ret = -EBADF;
+	int fput_needed;
+
+	if (pos < 0)
+		return -EINVAL;
+
+	file = fget_light(fd, &fput_needed);
+	if (file) {
+		ret = -ESPIPE;
+		if (file->f_mode & FMODE_PWRITE)
+			ret = vfs_writev(file, vec, vlen, &pos);
+		fput_light(file, fput_needed);
+	}
+
+	return ret;
+}
+
 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 			   size_t count, loff_t max)
 {
diff -upr kernel-2.6.18-417.el5.orig/fs/reiserfs/file.c kernel-2.6.18-417.el5-028stab121/fs/reiserfs/file.c
--- kernel-2.6.18-417.el5.orig/fs/reiserfs/file.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/reiserfs/file.c	2017-01-13 08:40:40.000000000 -0500
@@ -1333,7 +1333,7 @@ static ssize_t reiserfs_file_write(struc
 			if (err)
 				return err;
 		}
-		result = generic_file_write(file, buf, count, ppos);
+		result = do_sync_write(file, buf, count, ppos);
 
 		if (after_file_end) {	/* Now update i_size and remove the savelink */
 			struct reiserfs_transaction_handle th;
@@ -1565,7 +1565,7 @@ static ssize_t reiserfs_file_write(struc
 }
 
 const struct file_operations reiserfs_file_operations = {
-	.read = generic_file_read,
+	.read = do_sync_read,
 	.write = reiserfs_file_write,
 	.ioctl = reiserfs_ioctl,
 	.mmap = generic_file_mmap,
diff -upr kernel-2.6.18-417.el5.orig/fs/reiserfs/item_ops.c kernel-2.6.18-417.el5-028stab121/fs/reiserfs/item_ops.c
--- kernel-2.6.18-417.el5.orig/fs/reiserfs/item_ops.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/reiserfs/item_ops.c	2017-01-13 08:40:15.000000000 -0500
@@ -23,7 +23,7 @@ static void sd_decrement_key(struct cpu_
 {
 	key->on_disk_key.k_objectid--;
 	set_cpu_key_k_type(key, TYPE_ANY);
-	set_cpu_key_k_offset(key, (loff_t) (-1));
+	set_cpu_key_k_offset(key, (loff_t)(~0ULL >> 1));
 }
 
 static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize)
diff -upr kernel-2.6.18-417.el5.orig/fs/reiserfs/namei.c kernel-2.6.18-417.el5-028stab121/fs/reiserfs/namei.c
--- kernel-2.6.18-417.el5.orig/fs/reiserfs/namei.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/reiserfs/namei.c	2017-01-13 08:40:24.000000000 -0500
@@ -863,6 +863,9 @@ static int reiserfs_rmdir(struct inode *
 	INITIALIZE_PATH(path);
 	struct reiserfs_dir_entry de;
 
+	inode = dentry->d_inode;
+	DQUOT_INIT(inode);
+
 	/* we will be doing 2 balancings and update 2 stat data, we change quotas
 	 * of the owner of the directory and of the owner of the parent directory.
 	 * The quota structure is possibly deleted only on last iput => outside
@@ -887,8 +890,6 @@ static int reiserfs_rmdir(struct inode *
 		goto end_rmdir;
 	}
 
-	inode = dentry->d_inode;
-
 	reiserfs_update_inode_transaction(inode);
 	reiserfs_update_inode_transaction(dir);
 
@@ -951,6 +952,7 @@ static int reiserfs_unlink(struct inode 
 	unsigned long savelink;
 
 	inode = dentry->d_inode;
+	DQUOT_INIT(inode);
 
 	/* in this transaction we can be doing at max two balancings and update
 	 * two stat datas, we change quotas of the owner of the directory and of
@@ -1258,6 +1260,8 @@ static int reiserfs_rename(struct inode 
 
 	old_inode = old_dentry->d_inode;
 	new_dentry_inode = new_dentry->d_inode;
+	if (new_dentry_inode)
+		DQUOT_INIT(new_dentry_inode);
 
 	// make sure, that oldname still exists and points to an object we
 	// are going to rename
diff -upr kernel-2.6.18-417.el5.orig/fs/reiserfs/super.c kernel-2.6.18-417.el5-028stab121/fs/reiserfs/super.c
--- kernel-2.6.18-417.el5.orig/fs/reiserfs/super.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/reiserfs/super.c	2017-01-13 08:40:15.000000000 -0500
@@ -445,8 +445,9 @@ static void reiserfs_kill_sb(struct supe
 			dput(REISERFS_SB(s)->priv_root);
 			REISERFS_SB(s)->priv_root = NULL;
 		}
-		kill_block_super(s);
 	}
+
+	kill_block_super(s);
 }
 
 static void reiserfs_put_super(struct super_block *s)
diff -upr kernel-2.6.18-417.el5.orig/fs/select.c kernel-2.6.18-417.el5-028stab121/fs/select.c
--- kernel-2.6.18-417.el5.orig/fs/select.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/select.c	2017-01-13 08:40:18.000000000 -0500
@@ -24,6 +24,8 @@
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
 
+#include <ub/ub_mem.h>
+
 #include <asm/uaccess.h>
 
 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
@@ -366,7 +368,8 @@ static int core_sys_select(int n, fd_set
 	if (size > sizeof(stack_fds) / 6) {
 		/* Not enough space in on-stack array; must use kmalloc */
 		ret = -ENOMEM;
-		bits = kmalloc(6 * size, GFP_KERNEL);
+		bits = kmalloc(6 * size, size > PAGE_SIZE / 6 ?
+				GFP_KERNEL_UBC : GFP_KERNEL);
 		if (!bits)
 			goto out_nofds;
 	}
@@ -690,6 +693,7 @@ int do_sys_poll(struct pollfd __user *uf
 	   on 64 bit archs to avoid unaligned access */
 	long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
 	struct poll_list *stack_pp = NULL;
+	int flags;
 
 	/* Do a sanity check on nfds ... */
 	if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
@@ -701,9 +705,14 @@ int do_sys_poll(struct pollfd __user *uf
 	walk = NULL;
 	i = nfds;
 	err = -ENOMEM;
+
+	flags = GFP_KERNEL_UBC;
 	while(i!=0) {
 		struct poll_list *pp;
 		int num, size;
+		if (i <= POLLFD_PER_PAGE)
+			flags = GFP_KERNEL;
+
 		if (stack_pp == NULL)
 			num = N_STACK_PPS;
 		else
@@ -714,7 +723,7 @@ int do_sys_poll(struct pollfd __user *uf
 		if (!stack_pp)
 			stack_pp = pp = (struct poll_list *)stack_pps;
 		else {
-			pp = kmalloc(size, GFP_KERNEL);
+			pp = kmalloc(size, flags);
 			if (!pp)
 				goto out_fds;
 		}
diff -upr kernel-2.6.18-417.el5.orig/fs/seq_file.c kernel-2.6.18-417.el5-028stab121/fs/seq_file.c
--- kernel-2.6.18-417.el5.orig/fs/seq_file.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/seq_file.c	2017-01-13 08:40:19.000000000 -0500
@@ -177,21 +177,23 @@ EXPORT_SYMBOL(seq_read);
 
 static int traverse(struct seq_file *m, loff_t offset)
 {
-	loff_t pos = 0;
+	loff_t pos = 0, index;
 	int error = 0;
 	void *p;
 
 	m->version = 0;
-	m->index = 0;
+	index = 0;
 	m->count = m->from = 0;
-	if (!offset)
+	if (!offset) {
+		m->index = index;
 		return 0;
+	}
 	if (!m->buf) {
 		m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
 		if (!m->buf)
 			return -ENOMEM;
 	}
-	p = m->op->start(m, &m->index);
+	p = m->op->start(m, &index);
 	while (p) {
 		error = PTR_ERR(p);
 		if (IS_ERR(p))
@@ -204,15 +206,17 @@ static int traverse(struct seq_file *m, 
 		if (pos + m->count > offset) {
 			m->from = offset - pos;
 			m->count -= m->from;
+			m->index = index;
 			break;
 		}
 		pos += m->count;
 		m->count = 0;
 		if (pos == offset) {
-			m->index++;
+			index++;
+			m->index = index;
 			break;
 		}
-		p = m->op->next(m, p, &m->index);
+		p = m->op->next(m, p, &index);
 	}
 	m->op->stop(m, p);
 	return error;
@@ -345,6 +349,8 @@ int seq_path(struct seq_file *m,
 	if (m->count < m->size) {
 		char *s = m->buf + m->count;
 		char *p = d_path(dentry, mnt, s, m->size - m->count);
+		if (IS_ERR(p) && PTR_ERR(p) != -ENAMETOOLONG)
+			return 0;
 		if (!IS_ERR(p)) {
 			while (s <= p) {
 				char c = *p++;
diff -upr kernel-2.6.18-417.el5.orig/fs/signalfd.c kernel-2.6.18-417.el5-028stab121/fs/signalfd.c
--- kernel-2.6.18-417.el5.orig/fs/signalfd.c	2017-01-13 08:40:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/signalfd.c	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,269 @@
+/*
+ *  fs/signalfd.c
+ *
+ *  Copyright (C) 2003  Linus Torvalds
+ *
+ *  Mon Mar 5, 2007: Davide Libenzi <davidel@xmailserver.org>
+ *      Changed ->read() to return a siginfo strcture instead of signal number.
+ *      Fixed locking in ->poll().
+ *      Added sighand-detach notification.
+ *      Added fd re-use in sys_signalfd() syscall.
+ *      Now using anonymous inode source.
+ *      Thanks to Oleg Nesterov for useful code review and suggestions.
+ *      More comments and suggestions from Arnd Bergmann.
+ *  Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
+ *      Retrieve multiple signals with one read() call
+ *  Sun Jul 15, 2007: Davide Libenzi <davidel@xmailserver.org>
+ *      Attach to the sighand only during read() and poll().
+ */
+
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/list.h>
+#include <linux/anon_inodes.h>
+#include <linux/signalfd.h>
+#include <linux/syscalls.h>
+#include <linux/module.h>
+
+static int signalfd_release(struct inode *inode, struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+static unsigned int signalfd_poll(struct file *file, poll_table *wait)
+{
+	struct signalfd_ctx *ctx = file->private_data;
+	unsigned int events = 0;
+
+	poll_wait(file, &current->sighand->signalfd_wqh, wait);
+
+	spin_lock_irq(&current->sighand->siglock);
+	if (next_signal(&current->pending, &ctx->sigmask) ||
+	    next_signal(&current->signal->shared_pending,
+			&ctx->sigmask))
+		events |= POLLIN;
+	spin_unlock_irq(&current->sighand->siglock);
+
+	return events;
+}
+
+/*
+ * Copied from copy_siginfo_to_user() in kernel/signal.c
+ */
+static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
+			     siginfo_t const *kinfo)
+{
+	long err;
+
+	BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128);
+
+	/*
+	 * Unused members should be zero ...
+	 */
+	err = __clear_user(uinfo, sizeof(*uinfo));
+
+	/*
+	 * If you change siginfo_t structure, please be sure
+	 * this code is fixed accordingly.
+	 */
+	err |= __put_user(kinfo->si_signo, &uinfo->ssi_signo);
+	err |= __put_user(kinfo->si_errno, &uinfo->ssi_errno);
+	err |= __put_user((short) kinfo->si_code, &uinfo->ssi_code);
+	switch (kinfo->si_code & __SI_MASK) {
+	case __SI_KILL:
+		err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
+		err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
+		break;
+	case __SI_TIMER:
+		 err |= __put_user(kinfo->si_tid, &uinfo->ssi_tid);
+		 err |= __put_user(kinfo->si_overrun, &uinfo->ssi_overrun);
+		 err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr);
+		break;
+	case __SI_POLL:
+		err |= __put_user(kinfo->si_band, &uinfo->ssi_band);
+		err |= __put_user(kinfo->si_fd, &uinfo->ssi_fd);
+		break;
+	case __SI_FAULT:
+		err |= __put_user((long) kinfo->si_addr, &uinfo->ssi_addr);
+#ifdef __ARCH_SI_TRAPNO
+		err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno);
+#endif
+		break;
+	case __SI_CHLD:
+		err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
+		err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
+		err |= __put_user(kinfo->si_status, &uinfo->ssi_status);
+		err |= __put_user(kinfo->si_utime, &uinfo->ssi_utime);
+		err |= __put_user(kinfo->si_stime, &uinfo->ssi_stime);
+		break;
+	case __SI_RT: /* This is not generated by the kernel as of now. */
+	case __SI_MESGQ: /* But this is */
+		err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
+		err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
+		err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr);
+		break;
+	default:
+		/*
+		 * This case catches also the signals queued by sigqueue().
+		 */
+		err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
+		err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
+		err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr);
+		err |= __put_user(kinfo->si_int, &uinfo->ssi_int);
+		break;
+	}
+
+	return err ? -EFAULT: sizeof(*uinfo);
+}
+
+static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info,
+				int nonblock)
+{
+	ssize_t ret;
+	DECLARE_WAITQUEUE(wait, current);
+
+	spin_lock_irq(&current->sighand->siglock);
+	ret = dequeue_signal(current, &ctx->sigmask, info);
+	switch (ret) {
+	case 0:
+		if (!nonblock)
+			break;
+		ret = -EAGAIN;
+	default:
+		spin_unlock_irq(&current->sighand->siglock);
+		return ret;
+	}
+
+	add_wait_queue(&current->sighand->signalfd_wqh, &wait);
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		ret = dequeue_signal(current, &ctx->sigmask, info);
+		if (ret != 0)
+			break;
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		spin_unlock_irq(&current->sighand->siglock);
+		schedule();
+		spin_lock_irq(&current->sighand->siglock);
+	}
+	spin_unlock_irq(&current->sighand->siglock);
+
+	remove_wait_queue(&current->sighand->signalfd_wqh, &wait);
+	__set_current_state(TASK_RUNNING);
+
+	return ret;
+}
+
+/*
+ * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative
+ * error code. The "count" parameter must be at least the size of a
+ * "struct signalfd_siginfo".
+ */
+static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
+			     loff_t *ppos)
+{
+	struct signalfd_ctx *ctx = file->private_data;
+	struct signalfd_siginfo __user *siginfo;
+	int nonblock = file->f_flags & O_NONBLOCK;
+	ssize_t ret, total = 0;
+	siginfo_t info;
+
+	count /= sizeof(struct signalfd_siginfo);
+	if (!count)
+		return -EINVAL;
+
+	siginfo = (struct signalfd_siginfo __user *) buf;
+	do {
+		ret = signalfd_dequeue(ctx, &info, nonblock);
+		if (unlikely(ret <= 0))
+			break;
+		ret = signalfd_copyinfo(siginfo, &info);
+		if (ret < 0)
+			break;
+		siginfo++;
+		total += ret;
+		nonblock = 1;
+	} while (--count);
+
+	return total ? total: ret;
+}
+
+static const struct file_operations signalfd_fops = {
+	.release	= signalfd_release,
+	.poll		= signalfd_poll,
+	.read		= signalfd_read,
+};
+
+asmlinkage long sys_signalfd4(int ufd, sigset_t __user * user_mask,
+				size_t sizemask, int flags)
+{
+	sigset_t sigmask;
+
+	/* Check the SFD_* constants for consistency.  */
+	BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
+
+	if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
+		return -EINVAL;
+
+	if (sizemask != sizeof(sigset_t) ||
+	    copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
+		return -EINVAL;
+	sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+	signotset(&sigmask);
+
+	return do_signalfd(ufd, &sigmask, flags);
+}
+
+long do_signalfd(int ufd, sigset_t *sigmask, int flags)
+{
+	struct signalfd_ctx *ctx;
+
+	if (ufd == -1) {
+		ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+		if (!ctx)
+			return -ENOMEM;
+
+		ctx->sigmask = *sigmask;
+
+		/*
+		 * When we call this, the initialization must be complete, since
+		 * anon_inode_getfd() will install the fd.
+		 */
+		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
+				       O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK)));
+		if (ufd < 0)
+			kfree(ctx);
+	} else {
+		struct file *file = fget(ufd);
+		if (!file)
+			return -EBADF;
+		ctx = file->private_data;
+		if (file->f_op != &signalfd_fops) {
+			fput(file);
+			return -EINVAL;
+		}
+		spin_lock_irq(&current->sighand->siglock);
+		ctx->sigmask = *sigmask;
+		spin_unlock_irq(&current->sighand->siglock);
+
+		wake_up(&current->sighand->signalfd_wqh);
+		fput(file);
+	}
+
+	return ufd;
+}
+EXPORT_SYMBOL_GPL(do_signalfd);
+
+asmlinkage long sys_signalfd(int ufd, sigset_t __user * user_mask, size_t sizemask)
+{
+	return sys_signalfd4(ufd, user_mask, sizemask, 0);
+}
diff -upr kernel-2.6.18-417.el5.orig/fs/simfs.c kernel-2.6.18-417.el5-028stab121/fs/simfs.c
--- kernel-2.6.18-417.el5.orig/fs/simfs.c	2017-01-13 08:40:23.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/simfs.c	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,434 @@
+/*
+ *  fs/simfs.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/namei.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/vzquota.h>
+#include <linux/statfs.h>
+#include <linux/virtinfo.h>
+#include <linux/faudit.h>
+#include <linux/genhd.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/seq_file.h>
+#include <linux/quotaops.h>
+#include <linux/string.h>
+
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+
+#define SIMFS_GET_LOWER_FS_SB(sb) sb->s_root->d_sb
+
+static struct super_operations sim_super_ops;
+
+static int sim_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		struct kstat *stat)
+{
+	struct super_block *sb;
+	struct inode *inode;
+
+	inode = dentry->d_inode;
+	if (!inode->i_op->getattr) {
+		generic_fillattr(inode, stat);
+		if (!stat->blksize) {
+			unsigned blocks;
+
+			sb = inode->i_sb;
+			blocks = (stat->size + sb->s_blocksize-1) >>
+				sb->s_blocksize_bits;
+			stat->blocks = (sb->s_blocksize / 512) * blocks;
+			stat->blksize = sb->s_blocksize;
+		}
+	} else {
+		int err;
+
+		err = inode->i_op->getattr(mnt, dentry, stat);
+		if (err)
+			return err;
+	}
+
+	sb = mnt->mnt_sb;
+	if (sb->s_op == &sim_super_ops)
+		stat->dev = sb->s_dev;
+	return 0;
+}
+
+static void quota_get_stat(struct super_block *sb, struct kstatfs *buf)
+{
+	int err;
+	struct dq_stat qstat;
+	struct virt_info_quota q;
+	long free_file, adj_file;
+	s64 blk, free_blk, adj_blk;
+	int bsize_bits;
+
+	q.super = sb;
+	q.qstat = &qstat;
+	err = virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_GETSTAT, &q);
+	if (err != NOTIFY_OK)
+		return;
+
+	bsize_bits = ffs(buf->f_bsize) - 1;
+	
+	if (qstat.bsoftlimit > qstat.bcurrent)
+		free_blk = (qstat.bsoftlimit - qstat.bcurrent) >> bsize_bits;
+	else
+		free_blk = 0;
+	/*
+	 * In the regular case, we always set buf->f_bfree and buf->f_blocks to
+	 * the values reported by quota.  In case of real disk space shortage,
+	 * we adjust the values.  We want this adjustment to look as if the
+	 * total disk space were reduced, not as if the usage were increased.
+	 *    -- SAW
+	 */
+	adj_blk = 0;
+	if (buf->f_bfree < free_blk)
+		adj_blk = free_blk - buf->f_bfree;
+	buf->f_bfree = free_blk - adj_blk;
+
+	if (free_blk < buf->f_bavail)
+		buf->f_bavail = free_blk;
+
+	blk = (qstat.bsoftlimit >> bsize_bits) - adj_blk;
+	buf->f_blocks = blk > LONG_MAX ? LONG_MAX : blk;
+
+
+	free_file = 0;
+	if (qstat.icurrent < qstat.isoftlimit)
+		free_file = qstat.isoftlimit - qstat.icurrent;
+
+	if (buf->f_type == REISERFS_SUPER_MAGIC)
+		/*
+		 * reiserfs doesn't initialize f_ffree and f_files values of
+		 * kstatfs because it doesn't have an inode limit.
+		 */
+		buf->f_ffree = free_file;
+	adj_file = 0;
+	if (buf->f_ffree < free_file)
+		adj_file = free_file - buf->f_ffree;
+	buf->f_ffree = free_file - adj_file;
+	buf->f_files = qstat.isoftlimit - adj_file;
+}
+
+static int sim_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	int err;
+	struct super_block *lsb;
+	struct kstatfs statbuf;
+
+	err = 0;
+	if (sb->s_op != &sim_super_ops)
+		return 0;
+
+	memset(&statbuf, 0, sizeof(statbuf));
+	lsb = SIMFS_GET_LOWER_FS_SB(sb);
+
+	err = -ENOSYS;
+	if (lsb && lsb->s_op && lsb->s_op->statfs)
+		err = lsb->s_op->statfs(sb->s_root, &statbuf);
+	if (err)
+		return err;
+
+	quota_get_stat(sb, &statbuf);
+
+	buf->f_files    = statbuf.f_files;
+	buf->f_ffree    = statbuf.f_ffree;
+	buf->f_blocks   = statbuf.f_blocks;
+	buf->f_bfree    = statbuf.f_bfree;
+	buf->f_bavail   = statbuf.f_bavail;
+	return 0;
+}
+
+static int sim_systemcall(struct vnotifier_block *me, unsigned long n,
+		void *d, int old_ret)
+{
+	int err;
+
+	switch (n) {
+	case VIRTINFO_FAUDIT_STAT: {
+		struct faudit_stat_arg *arg;
+
+		arg = (struct faudit_stat_arg *)d;
+		err = sim_getattr(arg->mnt, arg->dentry, arg->stat);
+		arg->err = err;
+		}
+		break;
+	case VIRTINFO_FAUDIT_STATFS: {
+		struct faudit_statfs_arg *arg;
+
+		arg = (struct faudit_statfs_arg *)d;
+		err = sim_statfs(arg->sb, arg->stat);
+		arg->err = err;
+		}
+		break;
+	default:
+		return old_ret;
+	}
+	return (err ? NOTIFY_BAD : NOTIFY_OK);
+}
+
+static struct inode *sim_quota_root(struct super_block *sb)
+{
+	return sb->s_root->d_inode;
+}
+
+/*
+ * NOTE: We need to setup s_bdev field on super block, since sys_quotactl()
+ * does lookup_bdev() and get_super() which are comparing sb->s_bdev.
+ * so this is a MUST if we want unmodified sys_quotactl
+ * to work correctly on /dev/simfs inside VE
+ */
+static int sim_init_blkdev(struct super_block *sb)
+{
+	static struct hd_struct fake_hd;
+	struct block_device *blkdev;
+
+	blkdev = bdget(sb->s_dev);
+	if (blkdev == NULL)
+		return -ENOMEM;
+
+	blkdev->bd_part = &fake_hd;	/* required for bdev_read_only() */
+	sb->s_bdev = blkdev;
+
+	return 0;
+}
+
+static void sim_free_blkdev(struct super_block *sb)
+{
+	/* set bd_part back to NULL */
+	sb->s_bdev->bd_part = NULL;
+	bdput(sb->s_bdev);
+}
+
+static void sim_quota_init(struct super_block *sb)
+{
+	struct virt_info_quota viq;
+
+	viq.super = sb;
+	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_ON, &viq);
+}
+
+static void sim_quota_free(struct super_block *sb)
+{
+	struct virt_info_quota viq;
+
+	viq.super = sb;
+	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_OFF, &viq);
+}
+
+static void sim_show_type(struct seq_file *m, struct super_block *sb)
+{
+#ifdef CONFIG_QUOTA
+	if (vzquota_fake_fstype(current))
+		seq_escape(m, VZQUOTA_FAKE_FSTYPE, " \t\n\\");
+	else
+#endif
+		seq_escape(m, sb->s_type->name, " \t\n\\");
+}
+
+static int sim_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+#ifdef CONFIG_QUOTA
+	if (sb_has_quota_enabled(mnt->mnt_sb, USRQUOTA))
+		seq_puts(m, ",usrquota");
+	if (sb_has_quota_enabled(mnt->mnt_sb, GRPQUOTA))
+		seq_puts(m, ",grpquota");
+#endif
+	return 0;
+}
+
+static struct super_operations sim_super_ops = {
+#ifdef CONFIG_QUOTA
+	.show_type	= &sim_show_type,
+	.show_options	= &sim_show_options,
+#endif
+	.get_quota_root	= &sim_quota_root,
+};
+
+extern struct export_operations export_op_default;
+
+#define SIM_CALL_LOWER(method, sb, args...)		\
+	struct super_block *lsb;			\
+	struct export_operations *lop;			\
+							\
+	lsb = SIMFS_GET_LOWER_FS_SB(sb);		\
+	lop = lsb->s_export_op;				\
+	if (lop->method == NULL)			\
+		lop = &export_op_default;		\
+							\
+	return lop->method(lsb, ## args)
+
+#define SIM_CALL_DENTRY(method, dentry, args...)	\
+	struct super_block *lsb;			\
+	struct export_operations *lop;			\
+							\
+	lsb = (dentry)->d_sb;				\
+	lop = lsb->s_export_op;				\
+	if (lop->method == NULL)			\
+		lop = &export_op_default;		\
+							\
+	return lop->method(dentry, ## args)
+
+static struct dentry *sim_decode_fh(struct super_block *sb,
+		__u32 *fh, int fh_len, int fh_type,
+		int (*acceptable)(void *context, struct dentry *de),
+		void *context)
+{
+	SIM_CALL_LOWER(decode_fh, sb, fh, fh_len, fh_type,
+			acceptable, context);
+}
+
+static int sim_encode_fh(struct dentry *de, __u32 *fh, int *max_len,
+		int connectable)
+{
+	SIM_CALL_DENTRY(encode_fh, de, fh, max_len, connectable);
+}
+
+static int sim_get_name(struct dentry *parent, char *name, struct dentry *child)
+{
+	SIM_CALL_DENTRY(get_name, parent, name, child);
+}
+
+static struct dentry *sim_get_parent(struct dentry *child)
+{
+	SIM_CALL_DENTRY(get_parent, child);
+}
+
+static struct dentry *sim_get_dentry(struct super_block *sb, void *inump)
+{
+	SIM_CALL_LOWER(get_dentry, sb, inump);
+}
+
+static struct dentry * sim_find_dentry(struct super_block *sb, void *obj,
+		void *parent, int (*acceptable)(void *, struct dentry *),
+		void *context)
+{
+	SIM_CALL_LOWER(find_exported_dentry, sb, obj, parent, acceptable, context);
+}
+
+static struct export_operations sim_export_ops = {
+	.decode_fh = sim_decode_fh,
+	.encode_fh = sim_encode_fh,
+	.get_name = sim_get_name,
+	.get_parent = sim_get_parent,
+	.get_dentry = sim_get_dentry,
+	.find_exported_dentry = sim_find_dentry,
+};
+
+static int sim_fill_super(struct super_block *s, void *data)
+{
+	int err;
+	struct nameidata *nd;
+
+	err = set_anon_super(s, NULL);
+	if (err)
+		goto out;
+
+	err = 0;
+	nd = (struct nameidata *)data;
+	s->s_fs_info = mntget(nd->mnt);
+	s->s_root = dget(nd->dentry);
+	s->s_op = &sim_super_ops;
+	s->s_export_op = &sim_export_ops;
+out:
+	return err;
+}
+
+static int sim_get_sb(struct file_system_type *type, int flags,
+		const char *dev_name, void *opt, struct vfsmount *mnt)
+{
+	int err;
+	struct nameidata nd;
+	struct super_block *sb;
+
+	err = -EINVAL;
+	if (opt == NULL)
+		goto out;
+
+	err = path_lookup(opt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+	if (err)
+		goto out;
+
+	sb = sget(type, NULL, sim_fill_super, &nd);
+	err = PTR_ERR(sb);
+	if (IS_ERR(sb))
+		goto out_path;
+
+	err = sim_init_blkdev(sb);
+	if (err)
+		goto out_killsb;
+
+	sim_quota_init(sb);
+
+	path_release(&nd);
+	return simple_set_mnt(mnt, sb);
+
+out_killsb:
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+out_path:
+	path_release(&nd);
+out:
+	return err;
+}
+
+static void sim_kill_sb(struct super_block *sb)
+{
+	dput(sb->s_root);
+	sb->s_root = NULL;
+	mntput((struct vfsmount *)(sb->s_fs_info));
+
+	sim_quota_free(sb);
+	sim_free_blkdev(sb);
+
+	kill_anon_super(sb);
+}
+
+static struct file_system_type sim_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "simfs",
+	.get_sb		= sim_get_sb,
+	.kill_sb	= sim_kill_sb,
+	.fs_flags	= FS_MANGLE_PROC,
+};
+
+static struct vnotifier_block sim_syscalls = {
+	.notifier_call = sim_systemcall,
+};
+
+static int __init init_simfs(void)
+{
+	int err;
+
+	err = register_filesystem(&sim_fs_type);
+	if (err)
+		return err;
+
+	virtinfo_notifier_register(VITYPE_FAUDIT, &sim_syscalls);
+	return 0;
+}
+
+static void __exit exit_simfs(void)
+{
+	virtinfo_notifier_unregister(VITYPE_FAUDIT, &sim_syscalls);
+	unregister_filesystem(&sim_fs_type);
+}
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Open Virtuozzo Simulation of File System");
+MODULE_LICENSE("GPL v2");
+
+module_init(init_simfs);
+module_exit(exit_simfs);
diff -upr kernel-2.6.18-417.el5.orig/fs/smbfs/inode.c kernel-2.6.18-417.el5-028stab121/fs/smbfs/inode.c
--- kernel-2.6.18-417.el5.orig/fs/smbfs/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/smbfs/inode.c	2017-01-13 08:40:15.000000000 -0500
@@ -231,7 +231,7 @@ smb_invalidate_inodes(struct smb_sb_info
 {
 	VERBOSE("\n");
 	shrink_dcache_sb(SB_of(server));
-	invalidate_inodes(SB_of(server));
+	invalidate_inodes(SB_of(server), 0);
 }
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/fs/smbfs/sock.c kernel-2.6.18-417.el5-028stab121/fs/smbfs/sock.c
--- kernel-2.6.18-417.el5.orig/fs/smbfs/sock.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/smbfs/sock.c	2017-01-13 08:40:16.000000000 -0500
@@ -100,6 +100,7 @@ smb_close_socket(struct smb_sb_info *ser
 
 		VERBOSE("closing socket %p\n", sock);
 		sock->sk->sk_data_ready = server->data_ready;
+		sock->sk->sk_user_data = NULL;
 		server->sock_file = NULL;
 		fput(file);
 	}
diff -upr kernel-2.6.18-417.el5.orig/fs/splice.c kernel-2.6.18-417.el5-028stab121/fs/splice.c
--- kernel-2.6.18-417.el5.orig/fs/splice.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/splice.c	2017-01-13 08:40:18.000000000 -0500
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/uio.h>
+#include <ub/io_acct.h>
 
 struct partial_page {
 	unsigned int offset;
@@ -383,6 +384,8 @@ __generic_file_splice_read(struct file *
 			if (flags & SPLICE_F_NONBLOCK)
 				break;
 
+			virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 			lock_page(page);
 
 			/*
diff -upr kernel-2.6.18-417.el5.orig/fs/stat.c kernel-2.6.18-417.el5-028stab121/fs/stat.c
--- kernel-2.6.18-417.el5.orig/fs/stat.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/stat.c	2017-01-13 08:40:16.000000000 -0500
@@ -15,6 +15,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
+#include <linux/faudit.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -42,11 +43,19 @@ int vfs_getattr(struct vfsmount *mnt, st
 {
 	struct inode *inode = dentry->d_inode;
 	int retval;
+	struct faudit_stat_arg arg;
 
 	retval = security_inode_getattr(mnt, dentry);
 	if (retval)
 		return retval;
 
+	arg.mnt = mnt;
+	arg.dentry = dentry;
+	arg.stat = stat;
+	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STAT, &arg)
+			!= NOTIFY_DONE)
+		return arg.err;
+
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(mnt, dentry, stat);
 
diff -upr kernel-2.6.18-417.el5.orig/fs/super.c kernel-2.6.18-417.el5-028stab121/fs/super.c
--- kernel-2.6.18-417.el5.orig/fs/super.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/super.c	2017-01-13 08:40:40.000000000 -0500
@@ -37,6 +37,7 @@
 #include <linux/idr.h>
 #include <linux/kobject.h>
 #include <linux/mutex.h>
+#include <linux/ve_proto.h>
 #include <asm/uaccess.h>
 
 
@@ -45,7 +46,9 @@ void put_filesystem(struct file_system_t
 struct file_system_type *get_fs_type(const char *name);
 
 LIST_HEAD(super_blocks);
+EXPORT_SYMBOL_GPL(super_blocks);
 DEFINE_SPINLOCK(sb_lock);
+EXPORT_SYMBOL_GPL(sb_lock);
 
 /**
  *	alloc_super	-	create new superblock
@@ -58,6 +61,7 @@ static struct super_block *alloc_super(s
 {
 	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
 	static struct super_operations default_op;
+	int cpu;
 
 	if (s) {
 		if (security_sb_alloc(s)) {
@@ -65,21 +69,31 @@ static struct super_block *alloc_super(s
 			s = NULL;
 			goto out;
 		}
+		s->s_files = alloc_percpu(struct file_list);
+		if (!s->s_files) {
+			security_sb_free(s);
+			kfree(s);
+			s = NULL;
+			goto out;
+		}
+		for_each_possible_cpu(cpu)
+			file_list_init(per_cpu_ptr(s->s_files, cpu));
 		INIT_LIST_HEAD(&s->s_dirty);
 		INIT_LIST_HEAD(&s->s_io);
-		INIT_LIST_HEAD(&s->s_files);
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
-		lockdep_set_class(&s->s_umount, &type->s_umount_key);
+		lockdep_set_class(&s->s_umount,
+				&type->proto->s_umount_key);
 		/*
 		 * The locking rules for s_lock are up to the
 		 * filesystem. For example ext3fs has different
 		 * lock ordering than usbfs:
 		 */
-		lockdep_set_class(&s->s_lock, &type->s_lock_key);
+		lockdep_set_class(&s->s_lock,
+				&type->proto->s_lock_key);
 		down_write(&s->s_umount);
 		s->s_count = S_BIAS;
 		atomic_set(&s->s_active, 1);
@@ -106,6 +120,7 @@ out:
  */
 static inline void destroy_super(struct super_block *s)
 {
+	free_percpu(s->s_files);
 	security_sb_free(s);
 	kfree(s);
 }
@@ -147,6 +162,7 @@ int __put_super_and_need_restart(struct 
 	BUG_ON(sb->s_count == 0);
 	return 0;
 }
+EXPORT_SYMBOL(__put_super_and_need_restart);
 
 /**
  *	put_super	-	drop a temporary reference to superblock
@@ -244,16 +260,18 @@ void generic_shutdown_super(struct super
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
 		/* bad name - it should be evict_inodes() */
-		invalidate_inodes(sb);
+		invalidate_inodes(sb, 0);
 		lock_kernel();
 
 		if (sop->write_super && sb->s_dirt)
 			sop->write_super(sb);
+		if (sb->dq_op && sb->dq_op->shutdown)
+			sb->dq_op->shutdown(sb);
 		if (sop->put_super)
 			sop->put_super(sb);
 
 		/* Forget any remaining inodes */
-		if (invalidate_inodes(sb)) {
+		if (invalidate_inodes(sb, 1)) {
 			printk("VFS: Busy inodes after unmount of %s. "
 			   "Self-destruct in 5 seconds.  Have a nice day...\n",
 			   sb->s_id);
@@ -482,17 +500,26 @@ rescan:
 	spin_unlock(&sb_lock);
 	return NULL;
 }
+EXPORT_SYMBOL(user_get_super);
 
 asmlinkage long sys_ustat(unsigned dev, struct ustat __user * ubuf)
 {
+	dev_t kdev;
         struct super_block *s;
         struct ustat tmp;
         struct kstatfs sbuf;
-	int err = -EINVAL;
+	int err;
+
+	kdev = new_decode_dev(dev);
+	err = get_device_perms_ve(S_IFBLK, kdev, FMODE_READ);
+	if (err)
+		goto out;
+
+	err = -EINVAL;
+	s = user_get_super(kdev);
+	if (s == NULL)
+		goto out;
 
-        s = user_get_super(new_decode_dev(dev));
-        if (s == NULL)
-                goto out;
 	err = vfs_statfs(s->s_root, &sbuf);
 	drop_super(s);
 	if (err)
@@ -518,13 +545,14 @@ out:
 static void mark_files_ro(struct super_block *sb)
 {
 	struct file *f;
+	int cpu;
 
-	file_list_lock();
-	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
+	file_list_lock_sb(sb);
+	for_each_sb_file(f, sb, cpu) {
 		if (S_ISREG(f->f_dentry->d_inode->i_mode) && file_count(f))
 			f->f_mode &= ~FMODE_WRITE;
 	}
-	file_list_unlock();
+	file_list_unlock_sb(sb);
 }
 
 /**
@@ -606,6 +634,13 @@ void emergency_remount(void)
 static struct idr unnamed_dev_idr;
 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
 
+/* for compatibility with coreutils still unaware of new minor sizes */
+int unnamed_dev_majors[] = {
+	0, 144, 145, 146, 242, 243, 244, 245,
+	246, 247, 248, 249, 250, 251, 252, 253
+};
+EXPORT_SYMBOL(unnamed_dev_majors);
+
 int set_anon_super(struct super_block *s, void *data)
 {
 	int dev;
@@ -623,13 +658,13 @@ int set_anon_super(struct super_block *s
 	else if (error)
 		return -EAGAIN;
 
-	if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
+	if ((dev & MAX_ID_MASK) >= (1 << MINORBITS)) {
 		spin_lock(&unnamed_dev_lock);
 		idr_remove(&unnamed_dev_idr, dev);
 		spin_unlock(&unnamed_dev_lock);
 		return -EMFILE;
 	}
-	s->s_dev = MKDEV(0, dev & MINORMASK);
+	s->s_dev = make_unnamed_dev(dev);
 	return 0;
 }
 
@@ -637,8 +672,9 @@ EXPORT_SYMBOL(set_anon_super);
 
 void kill_anon_super(struct super_block *sb)
 {
-	int slot = MINOR(sb->s_dev);
+	int slot;
 
+	slot = unnamed_dev_idx(sb->s_dev);
 	generic_shutdown_super(sb);
 	spin_lock(&unnamed_dev_lock);
 	idr_remove(&unnamed_dev_idr, slot);
diff -upr kernel-2.6.18-417.el5.orig/fs/sync.c kernel-2.6.18-417.el5-028stab121/fs/sync.c
--- kernel-2.6.18-417.el5.orig/fs/sync.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/sync.c	2017-01-13 08:40:40.000000000 -0500
@@ -11,6 +11,8 @@
 #include <linux/linkage.h>
 #include <linux/pagemap.h>
 
+#include <ub/beancounter.h>
+
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
 			SYNC_FILE_RANGE_WAIT_AFTER)
 
@@ -115,6 +117,11 @@ asmlinkage long sys_sync_file_range(int 
 			!S_ISLNK(i_mode))
 		goto out_put;
 
+	if (!sysctl_fsync_enable && !ve_is_super(get_exec_env())) {
+		ret = 0;
+		goto out_put;
+	}
+
 	ret = do_sync_file_range(file, offset, endbyte, flags);
 out_put:
 	fput_light(file, fput_needed);
@@ -130,13 +137,17 @@ int do_sync_file_range(struct file *file
 {
 	int ret;
 	struct address_space *mapping;
+	struct user_beancounter *ub;
 
 	mapping = file->f_mapping;
 	if (!mapping) {
 		ret = -EINVAL;
-		goto out;
+		goto out_noacct;
 	}
 
+	ub = get_exec_ub();
+	ub_percpu_inc(ub, frsync);
+
 	ret = 0;
 	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
 		ret = wait_on_page_writeback_range(mapping,
@@ -159,6 +170,8 @@ int do_sync_file_range(struct file *file
 					endbyte >> PAGE_CACHE_SHIFT);
 	}
 out:
+	ub_percpu_inc(ub, frsync_done);
+out_noacct:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(do_sync_file_range);
diff -upr kernel-2.6.18-417.el5.orig/fs/sysfs/bin.c kernel-2.6.18-417.el5-028stab121/fs/sysfs/bin.c
--- kernel-2.6.18-417.el5.orig/fs/sysfs/bin.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/sysfs/bin.c	2017-01-13 08:40:19.000000000 -0500
@@ -120,6 +120,9 @@ static int open(struct inode * inode, st
 	struct bin_attribute * attr = to_bin_attr(file->f_dentry);
 	int error = -EINVAL;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	if (!kobj || !attr)
 		goto Done;
 
@@ -196,6 +199,9 @@ int sysfs_create_bin_file(struct kobject
 
 int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
 	return 0;
 }
diff -upr kernel-2.6.18-417.el5.orig/fs/sysfs/dir.c kernel-2.6.18-417.el5-028stab121/fs/sysfs/dir.c
--- kernel-2.6.18-417.el5.orig/fs/sysfs/dir.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/sysfs/dir.c	2017-01-13 08:40:19.000000000 -0500
@@ -196,6 +196,9 @@ int sysfs_create_dir(struct kobject * ko
 	struct dentry * parent;
 	int error = 0;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	BUG_ON(!kobj);
 
 	if (kobj->parent)
@@ -339,10 +342,14 @@ void sysfs_remove_subdir(struct dentry *
 
 void sysfs_remove_dir(struct kobject * kobj)
 {
-	struct dentry * dentry = dget(kobj->dentry);
+	struct dentry * dentry;
 	struct sysfs_dirent * parent_sd;
 	struct sysfs_dirent * sd, * tmp;
 
+	if (!ve_sysfs_alowed())
+		return;
+
+	dentry = dget(kobj->dentry);
 	if (!dentry)
 		return;
 
@@ -371,6 +378,9 @@ int sysfs_rename_dir(struct kobject * ko
 	int error = 0;
 	struct dentry * new_dentry, * parent;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	if (!strcmp(kobject_name(kobj), new_name))
 		return -EINVAL;
 
diff -upr kernel-2.6.18-417.el5.orig/fs/sysfs/file.c kernel-2.6.18-417.el5-028stab121/fs/sysfs/file.c
--- kernel-2.6.18-417.el5.orig/fs/sysfs/file.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/sysfs/file.c	2017-01-13 08:40:20.000000000 -0500
@@ -335,7 +335,7 @@ static int sysfs_open_file(struct inode 
 {
 	char *p = d_path(filp->f_dentry, sysfs_mount, last_sysfs_file,
 			sizeof(last_sysfs_file));
-	if (p)
+	if (!IS_ERR(p))
 		memmove(last_sysfs_file, p, strlen(p) + 1);
 	return check_perm(inode,filp);
 }
@@ -465,6 +465,9 @@ int sysfs_add_file(struct dentry * dir, 
 
 int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 {
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	BUG_ON(!kobj || !kobj->dentry || !attr);
 
 	return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
@@ -483,6 +486,9 @@ int sysfs_update_file(struct kobject * k
 	struct dentry * victim;
 	int res = -ENOENT;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	mutex_lock(&dir->d_inode->i_mutex);
 	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
 	if (!IS_ERR(victim)) {
@@ -553,6 +559,9 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
 void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 {
+	if (!ve_sysfs_alowed())
+		return;
+
 	sysfs_hash_and_remove(kobj->dentry,attr->name);
 }
 
diff -upr kernel-2.6.18-417.el5.orig/fs/sysfs/group.c kernel-2.6.18-417.el5-028stab121/fs/sysfs/group.c
--- kernel-2.6.18-417.el5.orig/fs/sysfs/group.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/sysfs/group.c	2017-01-13 08:40:19.000000000 -0500
@@ -46,6 +46,9 @@ int sysfs_create_group(struct kobject * 
 	struct dentry * dir;
 	int error;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	BUG_ON(!kobj || !kobj->dentry);
 
 	if (grp->name) {
@@ -68,6 +71,9 @@ void sysfs_remove_group(struct kobject *
 {
 	struct dentry * dir;
 
+	if (!ve_sysfs_alowed())
+		return;
+
 	if (grp->name)
 		dir = lookup_one_len(grp->name, kobj->dentry,
 				strlen(grp->name));
diff -upr kernel-2.6.18-417.el5.orig/fs/sysfs/inode.c kernel-2.6.18-417.el5-028stab121/fs/sysfs/inode.c
--- kernel-2.6.18-417.el5.orig/fs/sysfs/inode.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/sysfs/inode.c	2017-01-13 08:40:19.000000000 -0500
@@ -16,8 +16,6 @@
 #include <linux/security.h>
 #include "sysfs.h"
 
-extern struct super_block * sysfs_sb;
-
 static inline void set_inode_attr(struct inode * inode, struct iattr * iattr);
 
 static const struct address_space_operations sysfs_aops = {
diff -upr kernel-2.6.18-417.el5.orig/fs/sysfs/mount.c kernel-2.6.18-417.el5-028stab121/fs/sysfs/mount.c
--- kernel-2.6.18-417.el5.orig/fs/sysfs/mount.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/sysfs/mount.c	2017-01-13 08:40:22.000000000 -0500
@@ -7,6 +7,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
+#include <linux/module.h>
 #include <linux/init.h>
 
 #include "sysfs.h"
@@ -14,9 +15,13 @@
 /* Random magic number */
 #define SYSFS_MAGIC 0x62656572
 
+#ifndef CONFIG_VE
 struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
+#endif
+
 kmem_cache_t *sysfs_dir_cachep;
+EXPORT_SYMBOL(sysfs_dir_cachep);
 
 static struct super_operations sysfs_ops = {
 	.statfs		= simple_statfs,
@@ -35,6 +40,15 @@ static struct sysfs_dirent sysfs_root = 
 	.s_ino		= 1,
 };
 
+#ifdef CONFIG_VE
+static void init_ve0_sysfs_root(void)
+{
+	get_ve0()->sysfs_root = &sysfs_root;
+}
+
+#define sysfs_root (*(get_exec_env()->sysfs_root))
+#endif
+
 static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
@@ -76,16 +90,21 @@ static int sysfs_get_sb(struct file_syst
 	return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
 }
 
-static struct file_system_type sysfs_fs_type = {
+struct file_system_type sysfs_fs_type = {
 	.name		= "sysfs",
 	.get_sb		= sysfs_get_sb,
 	.kill_sb	= kill_litter_super,
 };
 
+EXPORT_SYMBOL(sysfs_fs_type);
+
 int __init sysfs_init(void)
 {
 	int err = -ENOMEM;
 
+#ifdef CONFIG_VE
+	init_ve0_sysfs_root();
+#endif
 	sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache",
 					      sizeof(struct sysfs_dirent),
 					      0, 0, NULL, NULL);
diff -upr kernel-2.6.18-417.el5.orig/fs/sysfs/symlink.c kernel-2.6.18-417.el5-028stab121/fs/sysfs/symlink.c
--- kernel-2.6.18-417.el5.orig/fs/sysfs/symlink.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/sysfs/symlink.c	2017-01-13 08:40:19.000000000 -0500
@@ -85,6 +85,9 @@ int sysfs_create_link(struct kobject * k
 	struct dentry * dentry = kobj->dentry;
 	int error = -EEXIST;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	BUG_ON(!kobj || !kobj->dentry || !name);
 
 	mutex_lock(&dentry->d_inode->i_mutex);
@@ -103,6 +106,9 @@ int sysfs_create_link(struct kobject * k
 
 void sysfs_remove_link(struct kobject * kobj, const char * name)
 {
+	if(!ve_sysfs_alowed())
+		return;
+
 	sysfs_hash_and_remove(kobj->dentry,name);
 }
 
diff -upr kernel-2.6.18-417.el5.orig/fs/sysfs/sysfs.h kernel-2.6.18-417.el5-028stab121/fs/sysfs/sysfs.h
--- kernel-2.6.18-417.el5.orig/fs/sysfs/sysfs.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/sysfs/sysfs.h	2017-01-13 08:40:19.000000000 -0500
@@ -21,7 +21,16 @@ struct sysfs_dirent {
 	atomic_t		s_event;
 };
 
-extern struct vfsmount * sysfs_mount;
+#ifndef CONFIG_VE
+extern struct vfsmount *sysfs_mount;
+extern struct super_block *sysfs_sb;
+#define ve_sysfs_alowed()	(1)
+#else
+#define sysfs_mount		(get_exec_env()->sysfs_mnt)
+#define sysfs_sb		(get_exec_env()->sysfs_sb)
+#define ve_sysfs_alowed()	(sysfs_sb != NULL)
+#endif
+
 extern kmem_cache_t *sysfs_dir_cachep;
 
 extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
@@ -49,7 +58,6 @@ int sysfs_setxattr(struct dentry *dentry
 
 extern spinlock_t sysfs_lock;
 extern struct rw_semaphore sysfs_rename_sem;
-extern struct super_block * sysfs_sb;
 extern const struct file_operations sysfs_dir_operations;
 extern const struct file_operations sysfs_file_operations;
 extern const struct file_operations bin_fops;
diff -upr kernel-2.6.18-417.el5.orig/fs/utimes.c kernel-2.6.18-417.el5-028stab121/fs/utimes.c
--- kernel-2.6.18-417.el5.orig/fs/utimes.c	2017-01-13 08:40:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/utimes.c	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,184 @@
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/stat.h>
+#include <linux/utime.h>
+#include <linux/compat.h>
+#include <asm/uaccess.h>
+
+static int nsec_valid(long nsec)
+{
+	if (nsec == UTIME_OMIT || nsec == UTIME_NOW)
+		return 1;
+
+	return nsec >= 0 && nsec <= 999999999;
+}
+
+static int utimes_common(struct dentry *dentry, struct timespec *times)
+{
+	int error;
+	struct iattr newattrs;
+	struct inode *inode = dentry->d_inode;
+
+	error = -EROFS;
+	if (IS_RDONLY(inode))
+		goto out;
+
+	if (times && times[0].tv_nsec == UTIME_NOW &&
+		     times[1].tv_nsec == UTIME_NOW)
+		times = NULL;
+
+	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
+	if (times) {
+		if (times[0].tv_nsec == UTIME_OMIT)
+			newattrs.ia_valid &= ~ATTR_ATIME;
+		else if (times[0].tv_nsec != UTIME_NOW) {
+			newattrs.ia_atime.tv_sec = times[0].tv_sec;
+			newattrs.ia_atime.tv_nsec = times[0].tv_nsec;
+			newattrs.ia_valid |= ATTR_ATIME_SET;
+		}
+
+		if (times[1].tv_nsec == UTIME_OMIT)
+			newattrs.ia_valid &= ~ATTR_MTIME;
+		else if (times[1].tv_nsec != UTIME_NOW) {
+			newattrs.ia_mtime.tv_sec = times[1].tv_sec;
+			newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
+			newattrs.ia_valid |= ATTR_MTIME_SET;
+		}
+		/*
+		 * if neither ATTR_ATIME_SET nor ATTR_MTIME_SET were used
+		 * we need to check permissions, because
+		 * inode_change_ok() won't do it.
+		 */
+		if (!(newattrs.ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET))) {
+			error = -EPERM;
+			if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
+				goto out;
+		}
+	} else {
+		/*
+		 * If times is NULL (or both times are UTIME_NOW),
+		 * then we need to check permissions, because
+		 * inode_change_ok() won't do it.
+		 */
+		error = -EACCES;
+		if (IS_IMMUTABLE(inode))
+			goto out;
+
+		if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) {
+			error = permission(inode, MAY_WRITE, NULL);
+			if (error)
+				goto out;
+		}
+	}
+	mutex_lock(&inode->i_mutex);
+	error = notify_change(dentry, &newattrs);
+	mutex_unlock(&inode->i_mutex);
+
+out:
+	return error;
+}
+
+/*
+ * do_utimes - change times on filename or file descriptor
+ * @dfd: open file descriptor, -1 or AT_FDCWD
+ * @filename: path name or NULL
+ * @times: new times or NULL
+ * @flags: zero or more flags (only AT_SYMLINK_NOFOLLOW for the moment)
+ *
+ * If filename is NULL and dfd refers to an open file, then operate on
+ * the file.  Otherwise look up filename, possibly using dfd as a
+ * starting point.
+ *
+ * If times==NULL, set access and modification to current time,
+ * must be owner or have write permission.
+ * Else, update from *times, must be owner or super user.
+ */
+static long __do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
+{
+	int error = -EINVAL;
+
+	if (times && (!nsec_valid(times[0].tv_nsec) ||
+		      !nsec_valid(times[1].tv_nsec))) {
+		goto out;
+	}
+
+	if (flags & ~AT_SYMLINK_NOFOLLOW)
+		goto out;
+
+	if (filename == NULL && dfd != AT_FDCWD) {
+		struct file *file;
+
+		if (flags & AT_SYMLINK_NOFOLLOW)
+			goto out;
+
+		file = fget(dfd);
+		error = -EBADF;
+		if (!file)
+			goto out;
+
+		error = utimes_common(file->f_dentry, times);
+		fput(file);
+	} else {
+		struct nameidata nd;
+		int lookup_flags = 0;
+
+		if (!(flags & AT_SYMLINK_NOFOLLOW))
+			lookup_flags |= LOOKUP_FOLLOW;
+
+		error = __user_walk_fd(dfd, filename, lookup_flags, &nd);
+		if (error)
+			goto out;
+
+		error = utimes_common(nd.dentry, times);
+		path_release(&nd);
+	}
+
+out:
+	return error;
+}
+
+asmlinkage long sys_utimensat(int dfd, char __user *filename,
+		struct timespec __user *utimes, int flags)
+{
+	struct timespec tstimes[2];
+
+	if (utimes) {
+		if (copy_from_user(&tstimes, utimes, sizeof(tstimes)))
+			return -EFAULT;
+
+		/* Nothing to do, we must not even check the path.  */
+		if (tstimes[0].tv_nsec == UTIME_OMIT &&
+		    tstimes[1].tv_nsec == UTIME_OMIT)
+			return 0;
+	}
+
+	return __do_utimes(dfd, filename,  utimes ? tstimes : NULL, flags);
+}
+
+#ifdef CONFIG_COMPAT
+
+asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename,
+		struct compat_timespec __user *t, int flags)
+{
+	struct timespec tv[2];
+
+	if  (t) {
+		if (get_compat_timespec(&tv[0], &t[0]) ||
+		    get_compat_timespec(&tv[1], &t[1]))
+			return -EFAULT;
+
+		if ((tv[0].tv_nsec == UTIME_OMIT || tv[0].tv_nsec == UTIME_NOW)
+		    && tv[0].tv_sec != 0)
+			return -EINVAL;
+		if ((tv[1].tv_nsec == UTIME_OMIT || tv[1].tv_nsec == UTIME_NOW)
+		    && tv[1].tv_sec != 0)
+			return -EINVAL;
+
+		if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT)
+			return 0;
+	}
+	return __do_utimes(dfd, filename, t ? tv : NULL, flags);
+}
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/fs/vzdq_file.c kernel-2.6.18-417.el5-028stab121/fs/vzdq_file.c
--- kernel-2.6.18-417.el5.orig/fs/vzdq_file.c	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/vzdq_file.c	2017-01-13 08:40:24.000000000 -0500
@@ -0,0 +1,928 @@
+/*
+ *
+ * Copyright (C) 2005 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo quota files as proc entry implementation.
+ * It is required for std quota tools to work correctly as they are expecting
+ * aquota.user and aquota.group files.
+ */
+
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
+#include <linux/quotaio_v2.h>
+#include <asm/uaccess.h>
+
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/vzdq_tree.h>
+#include <linux/vzquota.h>
+
+/* ----------------------------------------------------------------------
+ *
+ * File read operation
+ *
+ * FIXME: functions in this section (as well as many functions in vzdq_ugid.c,
+ * perhaps) abuse vz_quota_mutex.
+ * Taking a global mutex for lengthy and user-controlled operations inside
+ * VPSs is not a good idea in general.
+ * In this case, the reasons for taking this mutex are completely unclear,
+ * especially taking into account that the only function that has comments
+ * about the necessity to be called under this mutex
+ * (create_proc_quotafile) is actually called OUTSIDE it.
+ *
+ * --------------------------------------------------------------------- */
+
+#define DQBLOCK_SIZE		1024
+#define DQUOTBLKNUM		21U
+#define DQTREE_DEPTH		4
+#define TREENUM_2_BLKNUM(num)	(((num) + 1) << 1)
+#define ISINDBLOCK(num)		((num)%2 != 0)
+#define FIRST_DATABLK	  	2  /* first even number */
+#define LAST_IND_LEVEL		(DQTREE_DEPTH - 1)
+#define CONVERT_LEVEL(level)	((level) * (QUOTAID_EBITS/QUOTAID_BBITS))
+#define GETLEVINDX(ind, lev)	(((ind) >> QUOTAID_BBITS*(lev)) \
+					& QUOTATREE_BMASK)
+
+#if (QUOTAID_EBITS / QUOTAID_BBITS) != (QUOTATREE_DEPTH / DQTREE_DEPTH)
+#error xBITS and DQTREE_DEPTH does not correspond
+#endif
+
+#define BLOCK_NOT_FOUND	1
+
+/* data for quota file -- one per proc entry */
+struct quotatree_data {
+	struct list_head	list;
+	struct vz_quota_master	*qmblk;
+	int			type;	/* type of the tree */
+};
+
+/* serialized by vz_quota_mutex */
+static LIST_HEAD(qf_data_head);
+
+static const u_int32_t vzquota_magics[] = V2_INITQMAGICS;
+static const u_int32_t vzquota_versions[] = V2_INITQVERSIONS;
+static const char aquota_user[] = "aquota.user";
+static const char aquota_group[] = "aquota.group";
+
+
+static inline loff_t get_depoff(int depth)
+{
+	loff_t res = 1;
+	while (depth) {
+		res += (1 << ((depth - 1)*QUOTAID_EBITS + 1));
+		depth--;
+	}
+	return res;
+}
+
+static inline loff_t get_blknum(loff_t num, int depth)
+{
+	loff_t res;
+	res = (num << 1) + get_depoff(depth);
+	return res;
+}
+
+static int get_depth(loff_t num)
+{
+	int i;
+	for (i = 0; i < DQTREE_DEPTH; i++) {
+		if (num >= get_depoff(i) && (i == DQTREE_DEPTH - 1
+				|| num < get_depoff(i + 1)))
+			return i;
+	}
+	return -1;
+}
+
+static inline loff_t get_offset(loff_t num)
+{
+	loff_t res, tmp;
+
+	tmp = get_depth(num);
+	if (tmp < 0)
+		return -1;
+	num -= get_depoff(tmp);
+	BUG_ON(num < 0);
+	res = num >> 1;
+
+	return res;
+}
+
+static inline loff_t get_quot_blk_num(struct quotatree_tree *tree, int level)
+{
+	/* return maximum available block num */
+	return tree->levels[level].freenum;
+}
+
+static inline loff_t get_block_num(struct quotatree_tree *tree)
+{
+	loff_t ind_blk_num, quot_blk_num, max_ind, max_quot;
+
+	quot_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH) - 1);
+	max_quot = TREENUM_2_BLKNUM(quot_blk_num);
+	ind_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH - 1));
+	max_ind = (quot_blk_num) ? get_blknum(ind_blk_num, LAST_IND_LEVEL)
+		: get_blknum(ind_blk_num, 0);
+
+	return (max_ind > max_quot) ? max_ind + 1 : max_quot + 1;
+}
+
+/*  Write quota file header */
+static int read_header(void *buf, struct quotatree_tree *tree,
+	struct dq_info *dq_ugid_info, int type)
+{
+	struct v2_disk_dqheader *dqh;
+	struct v2_disk_dqinfo *dq_disk_info;
+
+	dqh = buf;
+	dq_disk_info = buf + sizeof(struct v2_disk_dqheader);
+
+	dqh->dqh_magic = vzquota_magics[type];
+	dqh->dqh_version = vzquota_versions[type];
+
+	dq_disk_info->dqi_bgrace = dq_ugid_info[type].bexpire;
+	dq_disk_info->dqi_igrace = dq_ugid_info[type].iexpire;
+	dq_disk_info->dqi_flags = 0;	/* no flags */
+	dq_disk_info->dqi_blocks = get_block_num(tree);
+	dq_disk_info->dqi_free_blk = 0;	/* first block in the file */
+	dq_disk_info->dqi_free_entry = FIRST_DATABLK;
+
+	return 0;
+}
+
+static int get_block_child(int depth, struct quotatree_node *p, u_int32_t *buf)
+{
+	int i, j, lev_num;
+
+	lev_num = QUOTATREE_DEPTH/DQTREE_DEPTH - 1;
+	for (i = 0; i < BLOCK_SIZE/sizeof(u_int32_t); i++) {
+		struct quotatree_node *next, *parent;
+
+		parent = p;
+		next = p;
+		for (j = lev_num; j >= 0; j--) {
+			if (!next->blocks[GETLEVINDX(i,j)]) {
+				buf[i] = 0;
+				goto bad_branch;
+			}
+			parent = next;
+			next = next->blocks[GETLEVINDX(i,j)];
+		}
+		buf[i] = (depth == DQTREE_DEPTH - 1) ?
+			TREENUM_2_BLKNUM(parent->num)
+			: get_blknum(next->num, depth + 1);
+
+	bad_branch:
+		;
+	}
+
+	return 0;
+}
+
+/*
+ * Write index block to disk (or buffer)
+ * @buf has length 256*sizeof(u_int32_t) bytes
+ */
+static int read_index_block(int num, u_int32_t *buf,
+		struct quotatree_tree *tree)
+{
+	struct quotatree_node *p;
+	u_int32_t index;
+	loff_t off;
+	int depth, res;
+
+	res = BLOCK_NOT_FOUND; 
+	index = 0;
+	depth = get_depth(num);
+	off = get_offset(num);
+	if (depth < 0 || off < 0)
+		return -EINVAL;
+
+	list_for_each_entry(p, &tree->levels[CONVERT_LEVEL(depth)].usedlh,
+			list) {
+		if (p->num >= off)
+			res = 0;
+		if (p->num != off)
+			continue;
+		get_block_child(depth, p, buf);
+		break;
+	}
+
+	return res;
+}
+
+static inline void convert_quot_format(struct v2_disk_dqblk *dq,
+		struct vz_quota_ugid *vzq)
+{
+	dq->dqb_id = vzq->qugid_id;
+	dq->dqb_ihardlimit = vzq->qugid_stat.ihardlimit;
+	dq->dqb_isoftlimit = vzq->qugid_stat.isoftlimit;
+	dq->dqb_curinodes = vzq->qugid_stat.icurrent;
+	dq->dqb_bhardlimit = vzq->qugid_stat.bhardlimit / QUOTABLOCK_SIZE;
+	dq->dqb_bsoftlimit = vzq->qugid_stat.bsoftlimit / QUOTABLOCK_SIZE;
+	dq->dqb_curspace = vzq->qugid_stat.bcurrent;
+	dq->dqb_btime = vzq->qugid_stat.btime;
+	dq->dqb_itime = vzq->qugid_stat.itime;
+}
+
+static int read_dquot(loff_t num, void *buf, struct quotatree_tree *tree)
+{
+	int res, i, entries = 0;
+	struct v2_disk_dqdbheader *dq_header;
+	struct quotatree_node *p;
+	struct v2_disk_dqblk *blk = buf + sizeof(struct v2_disk_dqdbheader);
+
+	res = BLOCK_NOT_FOUND;
+	dq_header = buf;
+	memset(dq_header, 0, sizeof(*dq_header));
+
+	list_for_each_entry(p, &(tree->levels[QUOTATREE_DEPTH - 1].usedlh),
+			list) {
+		if (TREENUM_2_BLKNUM(p->num) >= num)
+			res = 0;
+		if (TREENUM_2_BLKNUM(p->num) != num)
+			continue;
+
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {
+			if (!p->blocks[i])
+				continue;
+			convert_quot_format(blk + entries,
+					(struct vz_quota_ugid *)p->blocks[i]);
+			entries++;
+			res = 0;
+		}
+		break;
+	}
+	dq_header->dqdh_entries = entries;
+
+	return res;
+}
+
+static int read_block(int num, void *buf, struct quotatree_tree *tree,
+	struct dq_info *dq_ugid_info, int magic)
+{
+	int res;
+
+	memset(buf, 0, DQBLOCK_SIZE);
+	if (!num)
+		res = read_header(buf, tree, dq_ugid_info, magic);
+	else if (ISINDBLOCK(num))
+		res = read_index_block(num, (u_int32_t*)buf, tree);
+	else
+		res = read_dquot(num, buf, tree);
+
+	return res;
+}
+
+/*
+ * FIXME: this function can handle quota files up to 2GB only.
+ */
+static int read_proc_quotafile(char *page, char **start, off_t off, int count,
+		int *eof, void *data)
+{
+	off_t blk_num, blk_off, buf_off;
+	char *tmp;
+	size_t buf_size;
+	struct quotatree_data *qtd;
+	struct quotatree_tree *tree;
+	struct dq_info *dqi;
+	int res;
+
+	*start = NULL;
+	tmp = kmalloc(DQBLOCK_SIZE, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	qtd = data;
+	mutex_lock(&vz_quota_mutex);
+	mutex_lock(&qtd->qmblk->dq_mutex);
+
+	res = 0;
+	tree = QUGID_TREE(qtd->qmblk, qtd->type);
+	if (!tree) {
+		*eof = 1;
+		goto out_dq;
+	}
+
+	dqi = &qtd->qmblk->dq_ugid_info[qtd->type];
+
+	buf_off = 0;
+	buf_size = count;
+	blk_num = off / DQBLOCK_SIZE;
+	blk_off = off % DQBLOCK_SIZE;
+
+	while (buf_size > 0) {
+		off_t len;
+
+		len = min((size_t)(DQBLOCK_SIZE-blk_off), buf_size);
+		res = read_block(blk_num, tmp, tree, dqi, qtd->type);
+		if (res < 0)
+			goto out_err;
+		if (res == BLOCK_NOT_FOUND) {
+			*eof = 1;
+			break;
+		} 
+		memcpy(page + buf_off, tmp + blk_off, len);
+
+		blk_num++;
+		buf_size -= len;
+		blk_off = 0;
+		buf_off += len;
+	}
+	res = buf_off;
+
+out_err:
+	*start += count;
+out_dq:
+	mutex_unlock(&qtd->qmblk->dq_mutex);
+	mutex_unlock(&vz_quota_mutex);
+	kfree(tmp);
+
+	return res;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota/QID/aquota.* files
+ *
+ * FIXME: this code lacks serialization of read/readdir/lseek.
+ * However, this problem should be fixed after the mainstream issue of what
+ * appears to be non-atomic read and update of file position in sys_read.
+ *
+ * --------------------------------------------------------------------- */
+
+static inline unsigned long vzdq_aquot_getino(dev_t dev)
+{
+	return 0xec000000UL + dev;
+}
+
+static inline dev_t vzdq_aquot_getidev(struct inode *inode)
+{
+	return (dev_t)(unsigned long)PROC_I(inode)->op.proc_get_link;
+}
+
+static inline void vzdq_aquot_setidev(struct inode *inode, dev_t dev)
+{
+	PROC_I(inode)->op.proc_get_link = (void *)(unsigned long)dev;
+}
+
+static ssize_t vzdq_aquotf_read(struct file *file,
+		char __user *buf, size_t size, loff_t *ppos)
+{
+	char *page;
+	size_t bufsize;
+	ssize_t l, l2, copied;
+	char *start;
+	struct inode *inode;
+	struct block_device *bdev;
+	struct super_block *sb;
+	struct quotatree_data data;
+	int eof, err;
+
+	err = -ENOMEM;
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (page == NULL)
+		goto out_err;
+
+	err = -ENODEV;
+	inode = file->f_dentry->d_inode;
+	bdev = bdget(vzdq_aquot_getidev(inode));
+	if (bdev == NULL)
+		goto out_err;
+	sb = get_super(bdev);
+	bdput(bdev);
+	if (sb == NULL)
+		goto out_err;
+	data.qmblk = vzquota_find_qmblk(sb);
+	data.type = PROC_I(inode)->fd - 1;
+	drop_super(sb);
+	if (data.qmblk == NULL || data.qmblk == VZ_QUOTA_BAD)
+		goto out_err;
+
+	copied = 0;
+	l = l2 = 0;
+	while (1) {
+		bufsize = min(size, (size_t)PAGE_SIZE);
+		if (bufsize <= 0)
+			break;
+
+		l = read_proc_quotafile(page, &start, *ppos, bufsize,
+				&eof, &data);
+		if (l <= 0)
+			break;
+
+		l2 = copy_to_user(buf, page, l);
+		copied += l - l2;
+		if (l2)
+			break;
+
+		buf += l;
+		size -= l;
+		*ppos += (unsigned long)start;
+		l = l2 = 0;
+	}
+
+	qmblk_put(data.qmblk);
+	free_page((unsigned long)page);
+	if (copied)
+		return copied;
+	else if (l2)		/* last copy_to_user failed */
+		return -EFAULT;
+	else			/* read error or EOF */
+		return l;
+
+out_err:
+	if (page != NULL)
+		free_page((unsigned long)page);
+	return err;
+}
+
+static struct file_operations vzdq_aquotf_file_operations = {
+	.read		= &vzdq_aquotf_read,
+};
+
+static struct inode_operations vzdq_aquotf_inode_operations = {
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota/QID directory
+ *
+ * --------------------------------------------------------------------- */
+
+static int vzdq_aquotq_readdir(struct file *file, void *data, filldir_t filler)
+{
+	loff_t n;
+	int err;
+
+	n = file->f_pos;
+	for (err = 0; !err; n++) {
+		/* ppc32 can't cmp 2 long long's in switch, calls __cmpdi2() */
+		switch ((unsigned long)n) {
+		case 0:
+			err = (*filler)(data, ".", 1, n,
+					file->f_dentry->d_inode->i_ino,
+					DT_DIR);
+			break;
+		case 1:
+			err = (*filler)(data, "..", 2, n,
+					parent_ino(file->f_dentry), DT_DIR);
+			break;
+		case 2:
+			err = (*filler)(data, aquota_user,
+					sizeof(aquota_user)-1, n,
+					file->f_dentry->d_inode->i_ino
+								+ USRQUOTA + 1,
+					DT_REG);
+			break;
+		case 3:
+			err = (*filler)(data, aquota_group,
+					sizeof(aquota_group)-1, n,
+					file->f_dentry->d_inode->i_ino 
+								+ GRPQUOTA + 1,
+					DT_REG);
+			break;
+		default:
+			goto out;
+		}
+	}
+out:
+	file->f_pos = n;
+	return err;
+}
+
+struct vzdq_aquotq_lookdata {
+	dev_t dev;
+	int type;
+	struct vz_quota_master *qmblk;
+};
+
+static int vzdq_aquotq_looktest(struct inode *inode, void *data)
+{
+	struct vzdq_aquotq_lookdata *d;
+
+	d = data;
+	return inode->i_op == &vzdq_aquotf_inode_operations &&
+	       vzdq_aquot_getidev(inode) == d->dev &&
+	       PROC_I(inode)->fd == d->type + 1;
+}
+
+static int vzdq_aquotq_lookset(struct inode *inode, void *data)
+{
+	struct vzdq_aquotq_lookdata *d;
+	struct quotatree_tree *tree;
+
+	d = data;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_ino = vzdq_aquot_getino(d->dev) + d->type + 1;
+	inode->i_mode = S_IFREG | S_IRUSR;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_nlink = 1;
+	inode->i_op = &vzdq_aquotf_inode_operations;
+	inode->i_fop = &vzdq_aquotf_file_operations;
+	PROC_I(inode)->fd = d->type + 1;
+	vzdq_aquot_setidev(inode, d->dev);
+
+	/* Setting size */
+	tree = QUGID_TREE(d->qmblk, d->type);
+	inode->i_size = get_block_num(tree) * 1024;
+	return 0;
+}
+
+static int vzdq_aquotq_revalidate(struct dentry *vdentry, struct nameidata *nd)
+{
+	return 0;
+}
+
+static struct dentry_operations vzdq_aquotq_dentry_operations = {
+	.d_revalidate	= &vzdq_aquotq_revalidate,
+};
+
+static struct vz_quota_master *find_qmblk_by_dev(dev_t dev)
+{
+	struct super_block *sb;
+	struct vz_quota_master *qmblk;
+
+	qmblk = NULL;
+	sb = user_get_super(dev);
+	if (sb != NULL) {
+		qmblk = vzquota_find_qmblk(sb);
+		drop_super(sb);
+
+		if (qmblk == VZ_QUOTA_BAD)
+			qmblk = NULL;
+	}
+
+	return qmblk;
+}
+
+static struct dentry *vzdq_aquotq_lookup(struct inode *dir,
+		struct dentry *dentry,
+		struct nameidata *nd)
+{
+	struct inode *inode;
+	struct vzdq_aquotq_lookdata d;
+	int k;
+
+	if (dentry->d_name.len == sizeof(aquota_user)-1) {
+		if (memcmp(dentry->d_name.name, aquota_user,
+					sizeof(aquota_user)-1))
+			goto out;
+		k = USRQUOTA;
+	} else if (dentry->d_name.len == sizeof(aquota_group)-1) {
+		if (memcmp(dentry->d_name.name, aquota_group,
+					sizeof(aquota_group)-1))
+			goto out;
+		k = GRPQUOTA;
+	} else
+		goto out;
+	d.dev = vzdq_aquot_getidev(dir);
+	d.type = k;
+	d.qmblk = find_qmblk_by_dev(d.dev);
+	if (d.qmblk == NULL)
+		goto out;
+
+	inode = iget5_locked(dir->i_sb, dir->i_ino + k + 1,
+			vzdq_aquotq_looktest, vzdq_aquotq_lookset, &d);
+
+	/* qmlbk ref is not needed, we used it for i_size calculation only */
+	qmblk_put(d.qmblk);
+	if (inode == NULL)
+		goto out;
+
+	unlock_new_inode(inode);
+	dentry->d_op = &vzdq_aquotq_dentry_operations;
+	d_add(dentry, inode);
+	return NULL;
+
+out:
+	return ERR_PTR(-ENOENT);
+}
+
+static struct file_operations vzdq_aquotq_file_operations = {
+	.read		= &generic_read_dir,
+	.readdir	= &vzdq_aquotq_readdir,
+};
+
+static struct inode_operations vzdq_aquotq_inode_operations = {
+	.lookup		= &vzdq_aquotq_lookup,
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota directory
+ *
+ * --------------------------------------------------------------------- */
+
+struct vzdq_aquot_de {
+	struct list_head list;
+	struct vfsmount *mnt;
+};
+
+static int vzdq_aquot_buildmntlist(struct ve_struct *ve,
+		struct list_head *head)
+{
+	struct vfsmount *rmnt, *mnt;
+	struct vzdq_aquot_de *p;
+	int err;
+
+#ifdef CONFIG_VE
+	rmnt = mntget(ve->fs_rootmnt);
+#else
+	read_lock(&current->fs->lock);
+	rmnt = mntget(current->fs->rootmnt);
+	read_unlock(&current->fs->lock);
+#endif
+	mnt = rmnt;
+	spin_lock(&vfsmount_lock);
+	while (1) {
+		list_for_each_entry(p, head, list) {
+			if (p->mnt->mnt_sb == mnt->mnt_sb)
+				goto skip;
+		}
+
+		err = -ENOMEM;
+		p = kmalloc(sizeof(*p), GFP_ATOMIC);
+		if (p == NULL)
+			goto out;
+		p->mnt = mntget(mnt);
+		list_add_tail(&p->list, head);
+
+skip:
+		err = 0;
+		if (list_empty(&mnt->mnt_mounts)) {
+			while (1) {
+				if (mnt == rmnt)
+					goto out;
+				if (mnt->mnt_child.next !=
+						&mnt->mnt_parent->mnt_mounts)
+					break;
+				mnt = mnt->mnt_parent;
+			}
+			mnt = list_entry(mnt->mnt_child.next,
+					struct vfsmount, mnt_child);
+		} else
+			mnt = list_entry(mnt->mnt_mounts.next,
+					struct vfsmount, mnt_child);
+	}
+out:
+	spin_unlock(&vfsmount_lock);
+	mntput(rmnt);
+	return err;
+}
+
+static void vzdq_aquot_releasemntlist(struct ve_struct *ve,
+		struct list_head *head)
+{
+	struct vzdq_aquot_de *p;
+
+	while (!list_empty(head)) {
+		p = list_entry(head->next, typeof(*p), list);
+		mntput(p->mnt);
+		list_del(&p->list);
+		kfree(p);
+	}
+}
+
+static int vzdq_aquotd_readdir(struct file *file, void *data, filldir_t filler)
+{
+	struct ve_struct *ve, *old_ve;
+	struct list_head mntlist;
+	struct vzdq_aquot_de *de;
+	struct super_block *sb;
+	struct vz_quota_master *qmblk;
+	loff_t i, n;
+	char buf[24];
+	int l, err;
+
+	i = 0;
+	n = file->f_pos;
+	ve = file->f_dentry->d_sb->s_type->owner_env;
+	old_ve = set_exec_env(ve);
+
+	INIT_LIST_HEAD(&mntlist);
+#ifdef CONFIG_VE
+	/*
+	 * The only reason of disabling readdir for the host system is that
+	 * this readdir can be slow and CPU consuming with large number of VPSs
+	 * (or just mount points).
+	 */
+	err = ve_is_super(ve);
+#else
+	err = 0;
+#endif
+	if (!err) {
+		err = vzdq_aquot_buildmntlist(ve, &mntlist);
+		if (err)
+			goto out_err;
+	}
+
+	if (i >= n) {
+		if ((*filler)(data, ".", 1, i,
+					file->f_dentry->d_inode->i_ino, DT_DIR))
+			goto out_fill;
+	}
+	i++;
+
+	if (i >= n) {
+		if ((*filler)(data, "..", 2, i,
+					parent_ino(file->f_dentry), DT_DIR))
+			goto out_fill;
+	}
+	i++;
+
+	list_for_each_entry (de, &mntlist, list) {
+		sb = de->mnt->mnt_sb;
+		if (get_device_perms_ve(S_IFBLK, sb->s_dev, FMODE_QUOTACTL))
+			continue;
+
+		qmblk = vzquota_find_qmblk(sb);
+		if (qmblk == NULL || qmblk == VZ_QUOTA_BAD)
+			continue;
+
+		qmblk_put(qmblk);
+		i++;
+		if (i <= n)
+			continue;
+
+		l = sprintf(buf, "%08x", new_encode_dev(sb->s_dev));
+		if ((*filler)(data, buf, l, i - 1,
+					vzdq_aquot_getino(sb->s_dev), DT_DIR))
+			break;
+	}
+
+out_fill:
+	err = 0;
+	file->f_pos = i;
+out_err:
+	vzdq_aquot_releasemntlist(ve, &mntlist);
+	(void)set_exec_env(old_ve);
+	return err;
+}
+
+static int vzdq_aquotd_looktest(struct inode *inode, void *data)
+{
+	return inode->i_op == &vzdq_aquotq_inode_operations &&
+	       vzdq_aquot_getidev(inode) == (dev_t)(unsigned long)data;
+}
+
+static int vzdq_aquotd_lookset(struct inode *inode, void *data)
+{
+	dev_t dev;
+
+	dev = (dev_t)(unsigned long)data;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_ino = vzdq_aquot_getino(dev);
+	inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_nlink = 2;
+	inode->i_op = &vzdq_aquotq_inode_operations;
+	inode->i_fop = &vzdq_aquotq_file_operations;
+	vzdq_aquot_setidev(inode, dev);
+	return 0;
+}
+
+static struct dentry *vzdq_aquotd_lookup(struct inode *dir,
+		struct dentry *dentry,
+		struct nameidata *nd)
+{
+	struct ve_struct *ve, *old_ve;
+	const unsigned char *s;
+	int l;
+	dev_t dev;
+	struct inode *inode;
+
+	ve = dir->i_sb->s_type->owner_env;
+	old_ve = set_exec_env(ve);
+#ifdef CONFIG_VE
+	/*
+	 * Lookup is much lighter than readdir, so it can be allowed for the
+	 * host system.  But it would be strange to be able to do lookup only
+	 * without readdir...
+	 */
+	if (ve_is_super(ve))
+		goto out;
+#endif
+
+	dev = 0;
+	l = dentry->d_name.len;
+	if (l <= 0)
+		goto out;
+	for (s = dentry->d_name.name; l > 0; s++, l--) {
+		if (!isxdigit(*s))
+			goto out;
+		if (dev & ~(~0UL >> 4))
+			goto out;
+		dev <<= 4;
+		if (isdigit(*s))
+			dev += *s - '0';
+		else if (islower(*s))
+			dev += *s - 'a' + 10;
+		else
+			dev += *s - 'A' + 10;
+	}
+	dev = new_decode_dev(dev);
+
+	if (get_device_perms_ve(S_IFBLK, dev, FMODE_QUOTACTL))
+		goto out;
+
+	inode = iget5_locked(dir->i_sb, vzdq_aquot_getino(dev),
+			vzdq_aquotd_looktest, vzdq_aquotd_lookset,
+			(void *)(unsigned long)dev);
+	if (inode == NULL)
+		goto out;
+	unlock_new_inode(inode);
+
+	d_add(dentry, inode);
+	(void)set_exec_env(old_ve);
+	return NULL;
+
+out:
+	(void)set_exec_env(old_ve);
+	return ERR_PTR(-ENOENT);
+}
+
+static int vzdq_aquotd_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		struct kstat *stat)
+{
+	struct ve_struct *ve, *old_ve;
+	struct list_head mntlist, *pos;
+
+	generic_fillattr(dentry->d_inode, stat);
+	ve = dentry->d_sb->s_type->owner_env;
+#ifdef CONFIG_VE
+	/*
+	 * The only reason of disabling getattr for the host system is that
+	 * this getattr can be slow and CPU consuming with large number of VPSs
+	 * (or just mount points).
+	 */
+	if (ve_is_super(ve))
+		return 0;
+#endif
+	INIT_LIST_HEAD(&mntlist);
+	old_ve = set_exec_env(ve);
+	if (!vzdq_aquot_buildmntlist(ve, &mntlist))
+		list_for_each(pos, &mntlist)
+			stat->nlink++;
+	vzdq_aquot_releasemntlist(ve, &mntlist);
+	(void)set_exec_env(old_ve);
+	return 0;
+}
+
+static struct file_operations vzdq_aquotd_file_operations = {
+	.read		= &generic_read_dir,
+	.readdir	= &vzdq_aquotd_readdir,
+};
+
+static struct inode_operations vzdq_aquotd_inode_operations = {
+	.lookup		= &vzdq_aquotd_lookup,
+	.getattr	= &vzdq_aquotd_getattr,
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Initialization and deinitialization
+ *
+ * --------------------------------------------------------------------- */
+
+/*
+ * FIXME: creation of proc entries here is unsafe with respect to module
+ * unloading.
+ */
+void vzaquota_init(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_glob_entry("vz/vzaquota",
+			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
+	if (de != NULL) {
+		de->proc_iops = &vzdq_aquotd_inode_operations;
+		de->proc_fops = &vzdq_aquotd_file_operations;
+	} else
+		printk("VZDQ: vz/vzaquota creation failed\n");
+#if defined(CONFIG_SYSCTL)
+	de = create_proc_glob_entry("sys/fs/quota",
+			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
+	if (de == NULL)
+		printk("VZDQ: sys/fs/quota creation failed\n");
+#endif
+}
+
+void vzaquota_fini(void)
+{
+	remove_proc_entry("vz/vzaquota", NULL);
+}
diff -upr kernel-2.6.18-417.el5.orig/fs/vzdq_mgmt.c kernel-2.6.18-417.el5-028stab121/fs/vzdq_mgmt.c
--- kernel-2.6.18-417.el5.orig/fs/vzdq_mgmt.c	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/vzdq_mgmt.c	2017-01-13 08:40:24.000000000 -0500
@@ -0,0 +1,770 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <asm/semaphore.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/quota.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+
+
+/* ----------------------------------------------------------------------
+ * Switching quota on.
+ * --------------------------------------------------------------------- */
+
+/*
+ * check limits copied from user
+ */
+int vzquota_check_sane_limits(struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+
+	/* softlimit must be less then hardlimit */
+	if (qstat->bsoftlimit > qstat->bhardlimit)
+		goto out;
+
+	if (qstat->isoftlimit > qstat->ihardlimit)
+		goto out;
+
+	err = 0;
+out:
+	return err;
+}
+
+/*
+ * check usage values copied from user
+ */
+int vzquota_check_sane_values(struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+
+	/* expiration time must not be set if softlimit was not exceeded */
+	if (qstat->bcurrent < qstat->bsoftlimit && qstat->btime != 0)
+		goto out;
+
+	if (qstat->icurrent < qstat->isoftlimit && qstat->itime != 0)
+		goto out;
+
+	err = vzquota_check_sane_limits(qstat);
+out:
+	return err;
+}
+
+/*
+ * create new quota master block
+ * this function should:
+ *  - copy limits and usage parameters from user buffer;
+ *  - allock, initialize quota block and insert it to hash;
+ */
+static int vzquota_create(unsigned int quota_id,
+		struct vz_quota_stat __user *u_qstat, int compat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	mutex_lock(&vz_quota_mutex);
+
+	err = -EFAULT;
+	if (!compat) {
+		if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
+			goto out;
+	} else {
+#ifdef CONFIG_COMPAT
+		struct compat_vz_quota_stat cqstat;
+		if (copy_from_user(&cqstat, u_qstat, sizeof(cqstat)))
+			goto out;
+		compat_dqstat2dqstat(&cqstat.dq_stat, &qstat.dq_stat);
+		compat_dqinfo2dqinfo(&cqstat.dq_info, &qstat.dq_info);
+#endif
+	}
+
+	err = -EINVAL;
+	if (quota_id == 0)
+		goto out;
+
+	if (vzquota_check_sane_values(&qstat.dq_stat))
+		goto out;
+	err = 0;
+	qmblk = vzquota_alloc_master(quota_id, &qstat);
+
+	if (IS_ERR(qmblk)) /* ENOMEM or EEXIST */
+		err = PTR_ERR(qmblk);
+out:
+	mutex_unlock(&vz_quota_mutex);
+
+	return err;
+}
+
+/**
+ * vzquota_on - turn quota on
+ *
+ * This function should:
+ *  - find and get refcnt of directory entry for quota root and corresponding
+ *    mountpoint;
+ *  - find corresponding quota block and mark it with given path;
+ *  - check quota tree;
+ *  - initialize quota for the tree root.
+ */
+static int vzquota_on(unsigned int quota_id, const char __user *quota_root,
+					char __user *buf)
+{
+	int err;
+	struct nameidata nd;
+	struct vz_quota_master *qmblk;
+	struct super_block *dqsb;
+
+	dqsb = NULL;
+	mutex_lock(&vz_quota_mutex);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out;
+
+	err = user_path_walk(quota_root, &nd);
+	if (err)
+		goto out;
+	/* init path must be a directory */
+	err = -ENOTDIR;
+	if (!S_ISDIR(nd.dentry->d_inode->i_mode))
+		goto out_path;
+
+	qmblk->dq_root_dentry = nd.dentry;
+	qmblk->dq_root_mnt = nd.mnt;
+	qmblk->dq_sb = nd.dentry->d_inode->i_sb;
+	err = vzquota_get_super(qmblk->dq_sb);
+	if (err)
+		goto out_super;
+
+	/*
+	 * Serialization with quota initialization and operations is performed
+	 * through generation check: generation is memorized before qmblk is
+	 * found and compared under inode_qmblk_lock with assignment.
+	 *
+	 * Note that the dentry tree is shrunk only for high-level logical
+	 * serialization, purely as a courtesy to the user: to have consistent
+	 * quota statistics, files should be closed etc. on quota on.
+	 */
+	err = vzquota_on_qmblk(qmblk->dq_sb, qmblk->dq_root_dentry->d_inode,
+			qmblk, buf);
+	if (err)
+		goto out_init;
+	qmblk->dq_state = VZDQ_WORKING;
+
+	mutex_unlock(&vz_quota_mutex);
+	return 0;
+
+out_init:
+	dqsb = qmblk->dq_sb;
+out_super:
+	/* clear for qmblk_put/quota_free_master */
+	qmblk->dq_sb = NULL;
+	qmblk->dq_root_dentry = NULL;
+	qmblk->dq_root_mnt = NULL;
+out_path:
+	path_release(&nd);
+out:
+	if (dqsb)
+		vzquota_put_super(dqsb);
+	mutex_unlock(&vz_quota_mutex);
+	return err;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Switching quota off.
+ * --------------------------------------------------------------------- */
+
+/*
+ * destroy quota block by ID
+ */
+static int vzquota_destroy(unsigned int quota_id)
+{
+	int err;
+	struct vz_quota_master *qmblk;
+	struct dentry *dentry;
+	struct vfsmount *mnt;
+
+	mutex_lock(&vz_quota_mutex);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EBUSY;
+	if (qmblk->dq_state == VZDQ_WORKING)
+		goto out; /* quota_off first */
+
+	list_del_init(&qmblk->dq_hash);
+	dentry = qmblk->dq_root_dentry;
+	qmblk->dq_root_dentry = NULL;
+	mnt = qmblk->dq_root_mnt;
+	qmblk->dq_root_mnt = NULL;
+
+	if (qmblk->dq_sb)
+		vzquota_put_super(qmblk->dq_sb);
+	mutex_unlock(&vz_quota_mutex);
+
+	qmblk_put(qmblk);
+	dput(dentry);
+	mntput(mnt);
+	return 0;
+
+out:
+	mutex_unlock(&vz_quota_mutex);
+	return err;
+}
+
+/**
+ * vzquota_off - turn quota off
+ */
+
+static int __vzquota_sync_list(struct list_head *lh,
+		struct vz_quota_master *qmblk,
+		enum writeback_sync_modes sync_mode)
+{
+	struct writeback_control wbc;
+	LIST_HEAD(list);
+	struct vz_quota_ilink *qlnk;
+	struct inode *inode;
+	int err, ret;
+
+	memset(&wbc, 0, sizeof(wbc));
+	wbc.sync_mode = sync_mode;
+
+	err = ret = 0;
+	while (!list_empty(lh)) {
+		if (need_resched()) {
+			inode_qmblk_unlock(qmblk->dq_sb);
+			schedule();
+			inode_qmblk_lock(qmblk->dq_sb);
+			continue;
+		}
+
+		qlnk = list_first_entry(lh, struct vz_quota_ilink, list);
+		list_move(&qlnk->list, &list);
+
+		inode = igrab(QLNK_INODE(qlnk));
+		if (!inode)
+			continue;
+
+		inode_qmblk_unlock(qmblk->dq_sb);
+
+		wbc.nr_to_write = LONG_MAX;
+		ret = sync_inode(inode, &wbc);
+		if (ret)
+			err = ret;
+		iput(inode);
+
+		inode_qmblk_lock(qmblk->dq_sb);
+	}
+
+	list_splice(&list, lh);
+	return err;
+}
+
+static int vzquota_sync_list(struct list_head *lh,
+		struct vz_quota_master *qmblk)
+{
+	(void)__vzquota_sync_list(lh, qmblk, WB_SYNC_NONE);
+	return __vzquota_sync_list(lh, qmblk, WB_SYNC_ALL);
+}
+
+static int vzquota_sync_inodes(struct vz_quota_master *qmblk)
+{
+	int err;
+	LIST_HEAD(qlnk_list);
+
+	list_splice_init(&qmblk->dq_ilink_list, &qlnk_list);
+	err = vzquota_sync_list(&qlnk_list, qmblk);
+	if (!err && !list_empty(&qmblk->dq_ilink_list))
+		err = -EBUSY;
+	list_splice(&qlnk_list, &qmblk->dq_ilink_list);
+
+	return err;
+}
+
+static int vzquota_off(unsigned int quota_id, char __user *buf, int force)
+{
+	int err, ret;
+	struct vz_quota_master *qmblk;
+
+	mutex_lock(&vz_quota_mutex);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EALREADY;
+	if (qmblk->dq_state != VZDQ_WORKING)
+		goto out;
+
+	inode_qmblk_lock(qmblk->dq_sb); /* protects dq_ilink_list also */
+	ret = vzquota_sync_inodes(qmblk);
+	inode_qmblk_unlock(qmblk->dq_sb);
+
+	err = vzquota_off_qmblk(qmblk->dq_sb, qmblk, buf, force);
+	if (err)
+		goto out;
+
+	err = ret;
+	/* vzquota_destroy will free resources */
+	qmblk->dq_state = VZDQ_STOPING;
+out:
+	mutex_unlock(&vz_quota_mutex);
+
+	return err;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Other VZQUOTA ioctl's.
+ * --------------------------------------------------------------------- */
+
+/*
+ * this function should:
+ * - set new limits/buffer under quota master block lock
+ * - if new softlimit less then usage, then set expiration time
+ * - no need to alloc ugid hash table - we'll do that on demand
+ */
+int vzquota_update_limit(struct dq_stat *_qstat,
+		struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+	if (vzquota_check_sane_limits(qstat))
+		goto out;
+
+	err = 0;
+
+	/* limits */
+	_qstat->bsoftlimit = qstat->bsoftlimit;
+	_qstat->bhardlimit = qstat->bhardlimit;
+	/*
+	 * If the soft limit is exceeded, administrator can override the moment
+	 * when the grace period for limit exceeding ends.
+	 * Specifying the moment may be useful if the soft limit is set to be
+	 * lower than the current usage.  In the latter case, if the grace
+	 * period end isn't specified, the grace period will start from the
+	 * moment of the first write operation.
+	 * There is a race with the user level.  Soft limit may be already
+	 * exceeded before the limit change, and grace period end calculated by
+	 * the kernel will be overriden.  User level may check if the limit is
+	 * already exceeded, but check and set calls are not atomic.
+	 * This race isn't dangerous.  Under normal cicrumstances, the
+	 * difference between the grace period end calculated by the kernel and
+	 * the user level should be not greater than as the difference between
+	 * the moments of check and set calls, i.e. not bigger than the quota
+	 * timer resolution - 1 sec.
+	 */
+	if (qstat->btime != (time_t)0 &&
+			_qstat->bcurrent >= _qstat->bsoftlimit)
+		_qstat->btime = qstat->btime;
+
+	_qstat->isoftlimit = qstat->isoftlimit;
+	_qstat->ihardlimit = qstat->ihardlimit;
+	if (qstat->itime != (time_t)0 &&
+			_qstat->icurrent >= _qstat->isoftlimit)
+		_qstat->itime = qstat->itime;
+
+out:
+	return err;
+}
+
+/*
+ * set new quota limits.
+ * this function should:
+ *  copy new limits from user level
+ *  - find quota block
+ *  - set new limits and flags.
+ */
+static int vzquota_setlimit(unsigned int quota_id,
+		struct vz_quota_stat __user *u_qstat, int compat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	mutex_lock(&vz_quota_mutex); /* for hash list protection */
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (!compat) {
+		if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
+			goto out;
+	} else {
+#ifdef CONFIG_COMPAT
+		struct compat_vz_quota_stat cqstat;
+		if (copy_from_user(&cqstat, u_qstat, sizeof(cqstat)))
+			goto out;
+		compat_dqstat2dqstat(&cqstat.dq_stat, &qstat.dq_stat);
+		compat_dqinfo2dqinfo(&cqstat.dq_info, &qstat.dq_info);
+#endif
+	}
+
+	qmblk_data_write_lock(qmblk);
+	err = vzquota_update_limit(&qmblk->dq_stat, &qstat.dq_stat);
+	if (err == 0)
+		qmblk->dq_info = qstat.dq_info;
+	qmblk_data_write_unlock(qmblk);
+
+out:
+	mutex_unlock(&vz_quota_mutex);
+	return err;
+}
+
+/*
+ * get quota limits.
+ * very simple - just return stat buffer to user
+ */
+static int vzquota_getstat(unsigned int quota_id,
+		struct vz_quota_stat __user *u_qstat, int compat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	mutex_lock(&vz_quota_mutex);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	qmblk_data_read_lock(qmblk);
+	/* copy whole buffer under lock */
+	memcpy(&qstat.dq_stat, &qmblk->dq_stat, sizeof(qstat.dq_stat));
+	memcpy(&qstat.dq_info, &qmblk->dq_info, sizeof(qstat.dq_info));
+	qmblk_data_read_unlock(qmblk);
+
+	if (!compat)
+		err = copy_to_user(u_qstat, &qstat, sizeof(qstat));
+	else {
+#ifdef CONFIG_COMPAT
+		struct compat_vz_quota_stat cqstat;
+		dqstat2compat_dqstat(&qstat.dq_stat, &cqstat.dq_stat);
+		dqinfo2compat_dqinfo(&qstat.dq_info, &cqstat.dq_info);
+		err = copy_to_user(u_qstat, &cqstat, sizeof(cqstat));
+#endif
+	}
+	if (err)
+		err = -EFAULT;
+
+out:
+	mutex_unlock(&vz_quota_mutex);
+	return err;
+}
+
+/*
+ * This is a system call to turn per-VE disk quota on.
+ * Note this call is allowed to run ONLY from VE0
+ */
+long do_vzquotactl(int cmd, unsigned int quota_id,
+		struct vz_quota_stat __user *qstat, const char __user *ve_root,
+		int compat)
+{
+	int ret;
+	int force = 0;
+
+	ret = -EPERM;
+	/* access allowed only from root of VE0 */
+	if (!capable(CAP_SYS_RESOURCE) ||
+	    !capable(CAP_SYS_ADMIN))
+		goto out;
+
+	switch (cmd) {
+		case VZ_DQ_CREATE:
+			ret = vzquota_create(quota_id, qstat, compat);
+			break;
+		case VZ_DQ_DESTROY:
+			ret = vzquota_destroy(quota_id);
+			break;
+		case VZ_DQ_ON:
+			/* 
+			 * qstat is just a pointer to userspace buffer to
+			 * store busy files path in case of vzquota_on fail
+			 */
+			ret = vzquota_on(quota_id, ve_root, (char *)qstat);
+			break;
+		case VZ_DQ_OFF_FORCED:
+			force = 1;
+		case VZ_DQ_OFF:
+			/* 
+			 * ve_root is just a pointer to userspace buffer to
+			 * store busy files path in case of vzquota_off fail
+			 */
+			ret = vzquota_off(quota_id, (char *)ve_root, force);
+			break;
+		case VZ_DQ_SETLIMIT:
+			ret = vzquota_setlimit(quota_id, qstat, compat);
+			break;
+		case VZ_DQ_GETSTAT:
+			ret = vzquota_getstat(quota_id, qstat, compat);
+			break;
+
+		default:
+			ret = -EINVAL;
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Proc filesystem routines
+ * ---------------------------------------------------------------------*/
+
+#if defined(CONFIG_PROC_FS)
+
+#define QUOTA_UINT_LEN		15
+#define QUOTA_TIME_LEN_FMT_UINT	"%11u"
+#define QUOTA_NUM_LEN_FMT_UINT	"%15u"
+#define QUOTA_NUM_LEN_FMT_ULL	"%15Lu"
+#define QUOTA_TIME_LEN_FMT_STR	"%11s"
+#define QUOTA_NUM_LEN_FMT_STR	"%15s"
+#define QUOTA_PROC_MAX_LINE_LEN 2048
+
+/*
+ * prints /proc/ve_dq header line
+ */
+static int print_proc_header(char * buffer)
+{
+	return sprintf(buffer,
+		       "%-11s"
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_TIME_LEN_FMT_STR
+		       QUOTA_TIME_LEN_FMT_STR
+		       "\n",
+		       "qid: path", 
+		       "usage", "softlimit", "hardlimit", "time", "expire");
+}
+
+/*
+ * prints proc master record id, dentry path
+ */
+static int print_proc_master_id(char * buffer, char * path_buf,
+		struct vz_quota_master * qp)
+{
+	char *path;
+	int over;
+
+	path = NULL;
+	switch (qp->dq_state) {
+		case VZDQ_WORKING:
+			if (!path_buf) {
+				path = "";
+				break;
+			}
+			path = d_path(qp->dq_root_dentry,
+				      qp->dq_root_mnt, path_buf, PAGE_SIZE);
+			if (IS_ERR(path)) {
+				path = "";
+				break;
+			}
+			/* do not print large path, truncate it */
+			over = strlen(path) -
+				(QUOTA_PROC_MAX_LINE_LEN - 3 - 3 -
+				 	QUOTA_UINT_LEN);
+			if (over > 0) {
+				path += over - 3;
+				path[0] = path[1] = path[3] = '.';
+			}
+			break;
+		case VZDQ_STARTING:
+			path = "-- started --";
+			break;
+		case VZDQ_STOPING:
+			path = "-- stopped --";
+			break;
+	}
+
+	return sprintf(buffer, "%u: %s\n", qp->dq_id, path);
+}
+
+/*
+ * prints struct vz_quota_stat data
+ */
+static int print_proc_stat(char * buffer, struct dq_stat *qs,
+		struct dq_info *qi)
+{
+	return sprintf(buffer,
+		       "%11s"
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_TIME_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       "\n"
+		       "%11s"
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       "\n",
+		       "1k-blocks",
+		       (unsigned long long)qs->bcurrent >> 10,
+		       (unsigned long long)qs->bsoftlimit >> 10,
+		       (unsigned long long)qs->bhardlimit >> 10,
+		       (unsigned int)qs->btime,
+		       (unsigned int)qi->bexpire,
+		       "inodes",
+		       qs->icurrent,
+		       qs->isoftlimit,
+		       qs->ihardlimit,
+		       (unsigned int)qs->itime,
+		       (unsigned int)qi->iexpire);
+}
+
+
+/*
+ * for /proc filesystem output
+ */
+static int vzquota_read_proc(char *page, char **start, off_t off, int count,
+			   int *eof, void *data)
+{
+	int len, i;
+	off_t printed = 0;
+	char *p = page;
+	struct vz_quota_master *qp;
+	struct vz_quota_ilink *ql2;
+	struct list_head *listp;
+	char *path_buf;
+
+	path_buf = (char*)__get_free_page(GFP_KERNEL);
+	if (path_buf == NULL)
+		return -ENOMEM;
+
+	len = print_proc_header(p);
+	printed += len;
+	if (off < printed) /* keep header in output */ {
+		*start = p + off;
+		p += len;
+	}
+
+	mutex_lock(&vz_quota_mutex);
+
+	/* traverse master hash table for all records */
+	for (i = 0; i < vzquota_hash_size; i++) {
+		list_for_each(listp, &vzquota_hash_table[i]) {
+			qp = list_entry(listp,
+					struct vz_quota_master, dq_hash);
+
+			/* Skip other VE's information if not root of VE0 */
+			if ((!capable(CAP_SYS_ADMIN) ||
+			     !capable(CAP_SYS_RESOURCE))) {
+				ql2 = INODE_QLNK(current->fs->root->d_inode);
+				if (ql2 == NULL || qp != ql2->qmblk)
+					continue;
+			}
+			/*
+			 * Now print the next record
+			 */
+			len = 0;
+			/* we print quotaid and path only in VE0 */
+			if (capable(CAP_SYS_ADMIN))
+				len += print_proc_master_id(p+len,path_buf, qp);
+			len += print_proc_stat(p+len, &qp->dq_stat,
+					&qp->dq_info);
+			printed += len;
+			/* skip unnecessary lines */
+			if (printed <= off)
+				continue;
+			p += len;
+			/* provide start offset */
+			if (*start == NULL)
+				*start = p + (off - printed);
+			/* have we printed all requested size? */
+			if (PAGE_SIZE - (p - page) < QUOTA_PROC_MAX_LINE_LEN ||
+			    (p - *start) >= count)
+				goto out;
+		}
+	}
+
+	*eof = 1; /* checked all hash */
+out:
+	mutex_unlock(&vz_quota_mutex);
+
+	len = 0;
+	if (*start != NULL) {
+		len = (p - *start);
+		if (len > count)
+			len = count;
+	}
+
+	if (path_buf)
+		free_page((unsigned long) path_buf);
+
+	return len;
+}
+
+/*
+ * Register procfs read callback
+ */
+int vzquota_proc_init(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_entry_mod("vz/vzquota", S_IFREG|S_IRUSR, NULL,
+			THIS_MODULE);
+	if (de == NULL) {
+		/* create "vz" subdirectory, if not exist */
+		de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+		if (de == NULL)
+			goto out_err;
+		de = create_proc_entry_mod("vzquota", S_IFREG|S_IRUSR, de,
+				THIS_MODULE);
+		if (de == NULL)
+			goto out_err;
+	}
+	de->read_proc = vzquota_read_proc;
+	de->data = NULL;
+	return 0;
+out_err:
+	return -EBUSY;
+}
+
+void vzquota_proc_release(void)
+{
+	/* Unregister procfs read callback */
+	remove_proc_entry("vz/vzquota", NULL);
+}
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/fs/vzdq_ops.c kernel-2.6.18-417.el5-028stab121/fs/vzdq_ops.c
--- kernel-2.6.18-417.el5.orig/fs/vzdq_ops.c	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/vzdq_ops.c	2017-01-13 08:40:24.000000000 -0500
@@ -0,0 +1,654 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <asm/semaphore.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/vzquota.h>
+#include <linux/vzsnap.h>
+
+
+/* ----------------------------------------------------------------------
+ * Quota superblock operations - helper functions.
+ * --------------------------------------------------------------------- */
+
+static inline void vzquota_incr_inodes(struct dq_stat *dqstat,
+		unsigned long number)
+{
+	dqstat->icurrent += number;
+}
+
+static inline void vzquota_incr_space(struct dq_stat *dqstat,
+		__u64 number)
+{
+	dqstat->bcurrent += number;
+}
+
+static inline void vzquota_decr_inodes(struct dq_stat *dqstat,
+		unsigned long number)
+{
+	if (dqstat->icurrent > number)
+		dqstat->icurrent -= number;
+	else
+		dqstat->icurrent = 0;
+	if (dqstat->icurrent < dqstat->isoftlimit)
+		dqstat->itime = (time_t) 0;
+}
+
+static inline void vzquota_decr_space(struct dq_stat *dqstat,
+		__u64 number)
+{
+	if (dqstat->bcurrent > number)
+		dqstat->bcurrent -= number;
+	else
+		dqstat->bcurrent = 0;
+	if (dqstat->bcurrent < dqstat->bsoftlimit)
+		dqstat->btime = (time_t) 0;
+}
+
+/*
+ * better printk() message or use /proc/vzquotamsg interface
+ * similar to /proc/kmsg
+ */
+static inline void vzquota_warn(struct dq_info *dq_info, int dq_id, int flag,
+		const char *fmt)
+{
+	if (dq_info->flags & flag) /* warning already printed for this
+				       masterblock */
+		return;
+	printk(fmt, dq_id);
+	dq_info->flags |= flag;
+}
+
+/*
+ * ignore_hardlimit -
+ *
+ * Intended to allow superuser of VE0 to overwrite hardlimits.
+ *
+ * ignore_hardlimit() has a very bad feature:
+ *
+ *	writepage() operation for writable mapping of a file with holes
+ *	may trigger get_block() with wrong current and as a consequence,
+ *	opens a possibility to overcommit hardlimits
+ */
+/* for the reason above, it is disabled now */
+static inline int ignore_hardlimit(struct dq_info *dqstat)
+{
+#if 0
+	return	ve_is_super(get_exec_env()) &&
+		capable(CAP_SYS_RESOURCE) &&
+		(dqstat->options & VZ_QUOTA_OPT_RSQUASH);
+#else
+	return 0;
+#endif
+}
+
+static int vzquota_check_inodes(struct dq_info *dq_info,
+		struct dq_stat *dqstat,
+		unsigned long number, int dq_id)
+{
+	if (number == 0)
+		return QUOTA_OK;
+
+	if (dqstat->icurrent + number > dqstat->ihardlimit &&
+	    !ignore_hardlimit(dq_info)) {
+		vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
+			   "VZ QUOTA: file hardlimit reached for id=%d\n");
+		return NO_QUOTA;
+	}
+
+	if (dqstat->icurrent + number > dqstat->isoftlimit) {
+		if (dqstat->itime == (time_t)0) {
+			vzquota_warn(dq_info, dq_id, 0,
+				"VZ QUOTA: file softlimit exceeded "
+				"for id=%d\n");
+			dqstat->itime = CURRENT_TIME_SECONDS +
+				dq_info->iexpire;
+		} else if (CURRENT_TIME_SECONDS >= dqstat->itime &&
+			   !ignore_hardlimit(dq_info)) {
+			vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
+				"VZ QUOTA: file softlimit expired "
+				"for id=%d\n");
+			return NO_QUOTA;
+		}
+	}
+
+	return QUOTA_OK;
+}
+
+static int vzquota_check_space(struct dq_info *dq_info,
+		struct dq_stat *dqstat,
+		__u64 number, int dq_id, char prealloc)
+{
+	if (number == 0)
+		return QUOTA_OK;
+
+	if (prealloc == DQUOT_CMD_FORCE)
+		return QUOTA_OK;
+
+	if (dqstat->bcurrent + number > dqstat->bhardlimit &&
+	    !ignore_hardlimit(dq_info)) {
+		if (!prealloc)
+			vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
+				"VZ QUOTA: disk hardlimit reached "
+				"for id=%d\n");
+		return NO_QUOTA;
+	}
+
+	if (dqstat->bcurrent + number > dqstat->bsoftlimit) {
+		if (dqstat->btime == (time_t)0) {
+			if (!prealloc) {
+				vzquota_warn(dq_info, dq_id, 0,
+					"VZ QUOTA: disk softlimit exceeded "
+					"for id=%d\n");
+				dqstat->btime = CURRENT_TIME_SECONDS
+							+ dq_info->bexpire;
+			} else {
+				/*
+				 * Original Linux quota doesn't allow
+				 * preallocation to exceed softlimit so
+				 * exceeding will be always printed
+				 */
+				return NO_QUOTA;
+			}
+		} else if (CURRENT_TIME_SECONDS >= dqstat->btime &&
+			   !ignore_hardlimit(dq_info)) {
+			if (!prealloc)
+				vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
+					"VZ QUOTA: disk quota "
+					"softlimit expired "
+					"for id=%d\n");
+			return NO_QUOTA;
+		}
+	}
+
+	return QUOTA_OK;
+}
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+static int vzquota_check_ugid_inodes(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		int type, unsigned long number)
+{
+	struct dq_info *dqinfo;
+	struct dq_stat *dqstat;
+
+	if (qugid[type] == NULL)
+		return QUOTA_OK;
+	if (qugid[type] == VZ_QUOTA_UGBAD)
+		return NO_QUOTA;
+
+	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
+		return QUOTA_OK;
+	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
+		return QUOTA_OK;
+	if (number == 0)
+		return QUOTA_OK;
+
+	dqinfo = &qmblk->dq_ugid_info[type];
+	dqstat = &qugid[type]->qugid_stat;
+
+	if (dqstat->ihardlimit != 0 &&
+	    dqstat->icurrent + number > dqstat->ihardlimit)
+		return NO_QUOTA;
+
+	if (dqstat->isoftlimit != 0 &&
+	    dqstat->icurrent + number > dqstat->isoftlimit) {
+		if (dqstat->itime == (time_t)0)
+			dqstat->itime = CURRENT_TIME_SECONDS +
+				dqinfo->iexpire;
+		else if (CURRENT_TIME_SECONDS >= dqstat->itime)
+			return NO_QUOTA;
+	}
+
+	return QUOTA_OK;
+}
+
+static int vzquota_check_ugid_space(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		int type, __u64 number, char prealloc)
+{
+	struct dq_info *dqinfo;
+	struct dq_stat *dqstat;
+
+	if (prealloc == DQUOT_CMD_FORCE)
+		return QUOTA_OK;
+
+	if (qugid[type] == NULL)
+		return QUOTA_OK;
+	if (qugid[type] == VZ_QUOTA_UGBAD)
+		return NO_QUOTA;
+
+	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
+		return QUOTA_OK;
+	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
+		return QUOTA_OK;
+	if (number == 0)
+		return QUOTA_OK;
+
+	dqinfo = &qmblk->dq_ugid_info[type];
+	dqstat = &qugid[type]->qugid_stat;
+
+	if (dqstat->bhardlimit != 0 &&
+	    dqstat->bcurrent + number > dqstat->bhardlimit)
+		return NO_QUOTA;
+
+	if (dqstat->bsoftlimit != 0 &&
+	    dqstat->bcurrent + number > dqstat->bsoftlimit) {
+		if (dqstat->btime == (time_t)0) {
+			if (!prealloc)
+				dqstat->btime = CURRENT_TIME_SECONDS
+							+ dqinfo->bexpire;
+			else
+				/*
+				 * Original Linux quota doesn't allow
+				 * preallocation to exceed softlimit so
+				 * exceeding will be always printed
+				 */
+				return NO_QUOTA;
+		} else if (CURRENT_TIME_SECONDS >= dqstat->btime)
+			return NO_QUOTA;
+	}
+
+	return QUOTA_OK;
+}
+#endif
+
+/* ----------------------------------------------------------------------
+ * Quota superblock operations
+ * --------------------------------------------------------------------- */
+
+/*
+ * S_NOQUOTA note.
+ * In the current kernel (2.6.8.1), S_NOQUOTA flag is set only for
+ *  - quota file (absent in our case)
+ *  - after explicit DQUOT_DROP (earlier than clear_inode) in functions like
+ *    filesystem-specific new_inode, before the inode gets outside links.
+ * For the latter case, the only quota operation where care about S_NOQUOTA
+ * might be required is vzquota_drop, but there S_NOQUOTA has already been
+ * checked in DQUOT_DROP().
+ * So, S_NOQUOTA may be ignored for now in the VZDQ code.
+ *
+ * The above note is not entirely correct.
+ * Both for ext2 and ext3 filesystems, DQUOT_FREE_INODE is called from
+ * delete_inode if new_inode fails (for example, because of inode quota
+ * limits), so S_NOQUOTA check is needed in free_inode.
+ * This seems to be the dark corner of the current quota API.
+ */
+
+/*
+ * Initialize quota operations for the specified inode.
+ */
+static int vzquota_initialize(struct inode *inode, int type)
+{
+	vzquota_inode_init_call(inode);
+	return 0; /* ignored by caller */
+}
+
+/*
+ * Release quota for the specified inode.
+ */
+static int vzquota_drop(struct inode *inode)
+{
+	vzquota_inode_drop_call(inode);
+	return 0; /* ignored by caller */
+}
+
+/*
+ * Allocate block callback.
+ *
+ * If (prealloc) disk quota exceeding warning is not printed.
+ * See Linux quota to know why.
+ *
+ * Return:
+ *	QUOTA_OK == 0 on SUCCESS
+ *	NO_QUOTA == 1 if allocation should fail
+ */
+static int vzquota_alloc_space(struct inode *inode,
+			     qsize_t number, int prealloc)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	int ret = QUOTA_OK;
+	struct vzsnap_struct *vzs = NULL;
+
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid[MAXQUOTAS];
+#endif
+
+		/* checking first */
+		ret = vzquota_check_space(&qmblk->dq_info, &qmblk->dq_stat,
+				number, qmblk->dq_id, prealloc);
+		if (ret == NO_QUOTA)
+			goto no_quota;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
+			ret = vzquota_check_ugid_space(qmblk, qugid,
+					cnt, number, prealloc);
+			if (ret == NO_QUOTA)
+				goto no_quota;
+		}
+		/* check ok, may increment */
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			if (qugid[cnt] == NULL)
+				continue;
+			vzquota_incr_space(&qugid[cnt]->qugid_stat, number);
+		}
+#endif
+		vzquota_incr_space(&qmblk->dq_stat, number);
+		if (qmblk->dq_snap)
+			vzs = vzsnap_get(qmblk->dq_snap);
+		vzquota_data_unlock(inode, &data);
+	}
+
+	inode_add_bytes(inode, number);
+
+	if (vzs)
+		vzs->ops->addblock(vzs, inode);
+
+	might_sleep();
+	return QUOTA_OK;
+
+no_quota:
+	vzquota_data_unlock(inode, &data);
+	return NO_QUOTA;
+}
+
+/*
+ * Allocate inodes callback.
+ *
+ * Return:
+ *	QUOTA_OK == 0 on SUCCESS
+ *	NO_QUOTA == 1 if allocation should fail
+ */
+static int vzquota_alloc_inode(const struct inode *inode, unsigned long number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	int ret = QUOTA_OK;
+
+	qmblk = vzquota_inode_data((struct inode *)inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid *qugid[MAXQUOTAS];
+#endif
+
+		/* checking first */
+		ret = vzquota_check_inodes(&qmblk->dq_info, &qmblk->dq_stat,
+				number, qmblk->dq_id);
+		if (ret == NO_QUOTA)
+			goto no_quota;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
+			ret = vzquota_check_ugid_inodes(qmblk, qugid,
+					cnt, number);
+			if (ret == NO_QUOTA)
+				goto no_quota;
+		}
+		/* check ok, may increment */
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			if (qugid[cnt] == NULL)
+				continue;
+			vzquota_incr_inodes(&qugid[cnt]->qugid_stat, number);
+		}
+#endif
+		vzquota_incr_inodes(&qmblk->dq_stat, number);
+		vzquota_data_unlock((struct inode *)inode, &data);
+	}
+
+	might_sleep();
+	return QUOTA_OK;
+
+no_quota:
+	vzquota_data_unlock((struct inode *)inode, &data);
+	return NO_QUOTA;
+}
+
+/*
+ * Free space callback.
+ */
+static int vzquota_free_space(struct inode *inode, qsize_t number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA; /* isn't checked by the caller */
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid;
+#endif
+
+		vzquota_decr_space(&qmblk->dq_stat, number);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid = INODE_QLNK(inode)->qugid[cnt];
+			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
+				continue;
+			vzquota_decr_space(&qugid->qugid_stat, number);
+		}
+#endif
+		vzquota_data_unlock(inode, &data);
+	}
+	inode_sub_bytes(inode, number);
+	might_sleep();
+	return QUOTA_OK;
+}
+
+/*
+ * Free inodes callback.
+ */
+static int vzquota_free_inode(const struct inode *inode, unsigned long number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	qmblk = vzquota_inode_data((struct inode *)inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid;
+#endif
+
+		vzquota_decr_inodes(&qmblk->dq_stat, number);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid = INODE_QLNK(inode)->qugid[cnt];
+			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
+				continue;
+			vzquota_decr_inodes(&qugid->qugid_stat, number);
+		}
+#endif
+		vzquota_data_unlock((struct inode *)inode, &data);
+	}
+	might_sleep();
+	return QUOTA_OK;
+}
+
+void vzquota_inode_off(struct inode * inode)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	/* The call is made through virtinfo, it can be an inode
+	 * not controlled by vzquota.
+	 */
+	if (inode->i_sb->dq_op != &vz_quota_operations)
+		return;
+
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return;
+
+	if (qmblk == NULL) {
+		/* Tricky place. If qmblk == NULL, it means that this inode
+		 * is not in area controlled by vzquota (except for rare
+		 * case of already set S_NOQUOTA). But we have to set
+		 * S_NOQUOTA in any case because vzquota can be turned
+		 * on later, when this inode is invalid from viewpoint
+		 * of vzquota.
+		 *
+		 * To be safe, we reacquire vzquota lock.
+		 * The assumption is that it would not hurt to call
+		 * vzquota_inode_drop() more than once, but it must
+		 * be called at least once after S_NOQUOTA is set.
+		 */
+		inode_qmblk_lock(inode->i_sb);
+		inode->i_flags |= S_NOQUOTA;
+		inode_qmblk_unlock(inode->i_sb);
+	} else {
+		loff_t bytes = inode_get_bytes(inode);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid;
+#endif
+
+		inode->i_flags |= S_NOQUOTA;
+
+		vzquota_decr_space(&qmblk->dq_stat, bytes);
+		vzquota_decr_inodes(&qmblk->dq_stat, 1);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid = INODE_QLNK(inode)->qugid[cnt];
+			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
+				continue;
+			vzquota_decr_space(&qugid->qugid_stat, bytes);
+			vzquota_decr_inodes(&qugid->qugid_stat, 1);
+		}
+#endif
+
+		vzquota_data_unlock(inode, &data);
+	}
+	vzquota_inode_drop_call(inode);
+}
+
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+
+/*
+ * helper function for quota_transfer
+ * check that we can add inode to this quota_id
+ */
+static int vzquota_transfer_check(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		unsigned int type, __u64 size)
+{
+	if (vzquota_check_ugid_space(qmblk, qugid, type, size, 0) != QUOTA_OK ||
+	    vzquota_check_ugid_inodes(qmblk, qugid, type, 1) != QUOTA_OK)
+		return -1;
+	return 0;
+}
+
+int vzquota_transfer_usage(struct inode *inode,
+		int mask,
+		struct vz_quota_ilink *qlnk)
+{
+	struct vz_quota_ugid *qugid_old;
+	__u64 space;
+	int i;
+
+	space = inode_get_bytes(inode);
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (!(mask & (1 << i)))
+			continue;
+		/*
+		 * Do not permit chown a file if its owner does not have
+		 * ugid record. This might happen if we somehow exceeded
+		 * the UID/GID (e.g. set uglimit less than number of users).
+		 */
+		if (INODE_QLNK(inode)->qugid[i] == VZ_QUOTA_UGBAD)
+			return -1;
+		if (vzquota_transfer_check(qlnk->qmblk, qlnk->qugid, i, space))
+			return -1;
+	}
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (!(mask & (1 << i)))
+			continue;
+		qugid_old = INODE_QLNK(inode)->qugid[i];
+		vzquota_decr_space(&qugid_old->qugid_stat, space);
+		vzquota_decr_inodes(&qugid_old->qugid_stat, 1);
+		vzquota_incr_space(&qlnk->qugid[i]->qugid_stat, space);
+		vzquota_incr_inodes(&qlnk->qugid[i]->qugid_stat, 1);
+	}
+	return 0;
+}
+
+/*
+ * Transfer the inode between diffent user/group quotas.
+ */
+static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
+{
+	return vzquota_inode_transfer_call(inode, iattr) ?
+		NO_QUOTA : QUOTA_OK;
+}
+
+static void vzquota_swap_inode(struct inode *inode, struct inode *tmpl)
+{
+	vzquota_inode_swap_call(inode, tmpl);
+}
+
+
+#else /* CONFIG_VZ_QUOTA_UGID */
+
+static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
+{
+	return QUOTA_OK;
+}
+
+#endif
+
+/*
+ * Called under following semaphores:
+ *	old_d->d_inode->i_sb->s_vfs_rename_sem
+ *	old_d->d_inode->i_sem
+ *	new_d->d_inode->i_sem
+ * [not verified  --SAW]
+ */
+static int vzquota_rename(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	return vzquota_rename_check(inode, old_dir, new_dir) ?
+		NO_QUOTA : QUOTA_OK;
+}
+
+extern void vzquota_shutdown_super(struct super_block *sb);
+
+/*
+ * Structure of superblock diskquota operations.
+ */
+struct dquot_operations vz_quota_operations = {
+	.initialize	= vzquota_initialize,
+	.drop		= vzquota_drop,
+	.alloc_space	= vzquota_alloc_space,
+	.alloc_inode	= vzquota_alloc_inode,
+	.free_space	= vzquota_free_space,
+	.free_inode	= vzquota_free_inode,
+	.transfer	= vzquota_transfer,
+	.rename		= vzquota_rename,
+
+	.swap_inode	= vzquota_swap_inode,
+	.shutdown	= vzquota_shutdown_super,
+};
diff -upr kernel-2.6.18-417.el5.orig/fs/vzdq_tree.c kernel-2.6.18-417.el5-028stab121/fs/vzdq_tree.c
--- kernel-2.6.18-417.el5.orig/fs/vzdq_tree.c	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/vzdq_tree.c	2017-01-13 08:40:24.000000000 -0500
@@ -0,0 +1,286 @@
+/*
+ *
+ * Copyright (C) 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo quota tree implementation
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/vzdq_tree.h>
+
+struct quotatree_tree *quotatree_alloc(void)
+{
+	int l;
+	struct quotatree_tree *tree;
+
+	tree = kmalloc(sizeof(struct quotatree_tree), GFP_KERNEL);
+	if (tree == NULL)
+		goto out;
+
+	for (l = 0; l < QUOTATREE_DEPTH; l++) {
+		INIT_LIST_HEAD(&tree->levels[l].usedlh);
+		INIT_LIST_HEAD(&tree->levels[l].freelh);
+		tree->levels[l].freenum = 0;
+	}
+	tree->root = NULL;
+	tree->leaf_num = 0;
+out:
+	return tree;
+}
+
+static struct quotatree_node *
+quotatree_follow(struct quotatree_tree *tree, quotaid_t id, int level,
+		struct quotatree_find_state *st)
+{
+	void **block;
+	struct quotatree_node *parent;
+	int l, index;
+
+	parent = NULL;
+	block = (void **)&tree->root;
+	l = 0;
+	while (l < level && *block != NULL) {
+		index = (id >>  QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
+		parent = *block;
+		block = parent->blocks + index;
+		l++;
+	}
+	if (st != NULL) {
+		st->block = block;
+		st->level = l;
+	}
+
+	return parent;
+}
+
+void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st)
+{
+	quotatree_follow(tree, id, QUOTATREE_DEPTH, st);
+	if (st->level == QUOTATREE_DEPTH)
+		return *st->block;
+	else
+		return NULL;
+}
+
+void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index)
+{
+	int i, count;
+	struct quotatree_node *p;
+	void *leaf;
+
+	if (QTREE_LEAFNUM(tree) <= index)
+		return NULL;
+
+	count = 0;
+	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {	
+			leaf = p->blocks[i];
+			if (leaf == NULL)
+				continue;
+			if (count == index)
+				return leaf;
+			count++;
+		}
+	}
+	return NULL;
+}
+
+/* returns data leaf (vz_quota_ugid) after _existent_ ugid (@id)
+ * in the tree... */
+void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id)
+{
+	int off;
+	struct quotatree_node *parent, *p;
+	struct list_head *lh;
+
+	/* get parent refering correct quota tree node of the last level */
+	parent = quotatree_follow(tree, id, QUOTATREE_DEPTH, NULL);
+	if (!parent)
+		return NULL;
+
+	off = (id & QUOTATREE_BMASK) + 1;	/* next ugid */
+	lh = &parent->list;
+	do {
+		p = list_entry(lh, struct quotatree_node, list);
+		for ( ; off < QUOTATREE_BSIZE; off++)
+			if (p->blocks[off])
+				return p->blocks[off];
+		off = 0;
+		lh = lh->next;
+	} while (lh != &QTREE_LEAFLVL(tree)->usedlh);
+
+	return NULL;
+}
+
+int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st, void *data)
+{
+	struct quotatree_node *p;
+	int l, index;
+
+	while (st->level < QUOTATREE_DEPTH) {
+		l = st->level;
+		if (!list_empty(&tree->levels[l].freelh)) {
+			p = list_entry(tree->levels[l].freelh.next,
+					struct quotatree_node, list);
+			list_del(&p->list);
+		} else {
+			p = kmalloc(sizeof(struct quotatree_node), GFP_NOFS | __GFP_NOFAIL);
+			if (p == NULL)
+				return -ENOMEM;
+			/* save block number in the l-level
+			 * it uses for quota file generation */
+			p->num = tree->levels[l].freenum++;
+		}
+		list_add(&p->list, &tree->levels[l].usedlh);
+		memset(p->blocks, 0, sizeof(p->blocks));
+		*st->block = p;
+
+		index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
+		st->block = p->blocks + index;
+		st->level++;
+	}
+	tree->leaf_num++;
+	*st->block = data;
+
+	return 0;
+}
+
+static struct quotatree_node *
+quotatree_remove_ptr(struct quotatree_tree *tree, quotaid_t id,
+		int level)
+{
+	struct quotatree_node *parent;
+	struct quotatree_find_state st;
+
+	parent = quotatree_follow(tree, id, level, &st);
+	if (st.level == QUOTATREE_DEPTH)
+		tree->leaf_num--;
+	*st.block = NULL;
+	return parent;
+}
+
+void quotatree_remove(struct quotatree_tree *tree, quotaid_t id)
+{
+	struct quotatree_node *p;
+	int level, i;
+
+	p = quotatree_remove_ptr(tree, id, QUOTATREE_DEPTH);
+	for (level = QUOTATREE_DEPTH - 1; level >= QUOTATREE_CDEPTH; level--) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++)
+			if (p->blocks[i] != NULL)
+				return;
+		list_move(&p->list, &tree->levels[level].freelh);
+		p = quotatree_remove_ptr(tree, id, level);
+	}
+}
+
+#if 0
+static void quotatree_walk(struct quotatree_tree *tree,
+		struct quotatree_node *node_start,
+		quotaid_t id_start,
+		int level_start, int level_end,
+		int (*callback)(struct quotatree_tree *,
+				quotaid_t id,
+				int level,
+				void *ptr,
+				void *data),
+		void *data)
+{
+	struct quotatree_node *p;
+	int l, shift, index;
+	quotaid_t id;
+	struct quotatree_find_state st;
+
+	p = node_start;
+	l = level_start;
+	shift = (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
+	id = id_start;
+	index = 0;
+
+	/*
+	 * Invariants:
+	 * shift == (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
+	 * id & ((1 << shift) - 1) == 0
+	 * p is l-level node corresponding to id
+	 */
+	do {
+		if (!p)
+			break;
+
+		if (l < level_end) {
+			for (; index < QUOTATREE_BSIZE; index++)
+				if (p->blocks[index] != NULL)
+					break;
+			if (index < QUOTATREE_BSIZE) {
+				/* descend */
+				p = p->blocks[index];
+				l++;
+				shift -= QUOTAID_BBITS;
+				id += (quotaid_t)index << shift;
+				index = 0;
+				continue;
+			}
+		}
+
+		if ((*callback)(tree, id, l, p, data))
+			break;
+
+		/* ascend and to the next node */
+		p = quotatree_follow(tree, id, l, &st);
+
+		index = ((id >> shift) & QUOTATREE_BMASK) + 1;
+		l--;
+		shift += QUOTAID_BBITS;
+		id &= ~(((quotaid_t)1 << shift) - 1);
+	} while (l >= level_start);
+}
+#endif
+
+static void free_list(struct list_head *node_list)
+{
+	struct quotatree_node *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, node_list, list) {
+		list_del(&p->list);
+		kfree(p);
+	}
+}
+
+static inline void quotatree_free_nodes(struct quotatree_tree *tree)
+{
+	int i;
+
+	for (i = 0; i < QUOTATREE_DEPTH; i++) {
+		free_list(&tree->levels[i].usedlh);
+		free_list(&tree->levels[i].freelh);
+	}
+}
+
+static void quotatree_free_leafs(struct quotatree_tree *tree,
+		void (*dtor)(void *))
+{
+	int i;
+	struct quotatree_node *p;
+
+	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {
+			if (p->blocks[i] == NULL)
+				continue;
+
+			dtor(p->blocks[i]);
+		}
+	}
+}
+
+void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *))
+{
+	quotatree_free_leafs(tree, dtor);
+	quotatree_free_nodes(tree);
+	kfree(tree);
+}
diff -upr kernel-2.6.18-417.el5.orig/fs/vzdq_ugid.c kernel-2.6.18-417.el5-028stab121/fs/vzdq_ugid.c
--- kernel-2.6.18-417.el5.orig/fs/vzdq_ugid.c	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/vzdq_ugid.c	2017-01-13 08:40:24.000000000 -0500
@@ -0,0 +1,1220 @@
+/*
+ * Copyright (C) 2002 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo UID/GID disk quota implementation
+ */
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/smp_lock.h>
+#include <linux/rcupdate.h>
+#include <asm/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/quota.h>
+#include <linux/quotaio_v2.h>
+#include <linux/virtinfo.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
+#include <linux/vmalloc.h>
+
+#include <linux/vzctl.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+
+/*
+ * XXX
+ * may be something is needed for sb->s_dquot->info[]?
+ */
+
+#define USRQUOTA_MASK		(1 << USRQUOTA)
+#define GRPQUOTA_MASK		(1 << GRPQUOTA)
+#define QTYPE2MASK(type)	(1 << (type))
+
+static kmem_cache_t *vz_quota_ugid_cachep;
+
+inline struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid)
+{
+	if (qugid != VZ_QUOTA_UGBAD)
+		atomic_inc(&qugid->qugid_count);
+	return qugid;
+}
+
+/* we don't limit users with zero limits */
+static inline int vzquota_fake_stat(struct dq_stat *stat)
+{
+	return stat->bhardlimit == 0 && stat->bsoftlimit == 0 &&
+		stat->ihardlimit == 0 && stat->isoftlimit == 0;
+}
+
+/* callback function for quotatree_free() */
+static inline void vzquota_free_qugid(void *ptr)
+{
+	kmem_cache_free(vz_quota_ugid_cachep, ptr);
+}
+
+/*
+ * destroy ugid, if it have zero refcount, limits and usage
+ * must be called under qmblk->dq_mutex
+ */
+void vzquota_put_ugid(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid)
+{
+	if (qugid == VZ_QUOTA_UGBAD)
+		return;
+	qmblk_data_read_lock(qmblk);
+	if (atomic_dec_and_test(&qugid->qugid_count) &&
+	    (qmblk->dq_flags & VZDQUG_FIXED_SET) == 0 &&
+	    vzquota_fake_stat(&qugid->qugid_stat) &&
+	    qugid->qugid_stat.bcurrent == 0 &&
+	    qugid->qugid_stat.icurrent == 0) {
+		quotatree_remove(QUGID_TREE(qmblk, qugid->qugid_type),
+				qugid->qugid_id);
+		qmblk->dq_ugid_count--;
+		vzquota_free_qugid(qugid);
+	}
+	qmblk_data_read_unlock(qmblk);
+}
+
+/*
+ * Get ugid block by its index, like it would present in array.
+ * In reality, this is not array - this is leafs chain of the tree.
+ * NULL if index is out of range.
+ * qmblk semaphore is required to protect the tree.
+ */
+static inline struct vz_quota_ugid *
+vzquota_get_byindex(struct vz_quota_master *qmblk, unsigned int index, int type)
+{
+	return quotatree_leaf_byindex(QUGID_TREE(qmblk, type), index);
+}
+
+/*
+ * get next element from ugid "virtual array"
+ * ugid must be in current array and this array may not be changed between
+ * two accesses (quaranteed by "stopped" quota state and quota semaphore)
+ * qmblk semaphore is required to protect the tree
+ */
+static inline struct vz_quota_ugid *
+vzquota_get_next(struct vz_quota_master *qmblk, struct vz_quota_ugid *qugid)
+{
+	return quotatree_get_next(QUGID_TREE(qmblk, qugid->qugid_type),
+			qugid->qugid_id);
+}
+
+/*
+ * requires dq_mutex
+ */
+struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
+			unsigned int quota_id, int type, int flags)
+{
+	struct vz_quota_ugid *qugid;
+	struct quotatree_tree *tree;
+	struct quotatree_find_state st;
+
+	tree = QUGID_TREE(qmblk, type);
+	qugid = quotatree_find(tree, quota_id, &st);
+	if (qugid)
+		goto success;
+
+	/* caller does not want alloc */
+	if (flags & VZDQUG_FIND_DONT_ALLOC)
+		goto fail;
+
+	if (flags & VZDQUG_FIND_FAKE)
+		goto doit;
+
+	/* check limit */
+	if (qmblk->dq_ugid_count >= qmblk->dq_ugid_max)
+		goto fail;
+
+	/* see comment at VZDQUG_FIXED_SET define */
+	if (qmblk->dq_flags & VZDQUG_FIXED_SET)
+		goto fail;
+
+doit:
+	/* alloc new structure */
+	qugid = kmem_cache_alloc(vz_quota_ugid_cachep,
+			SLAB_NOFS | __GFP_NOFAIL);
+	if (qugid == NULL)
+		goto fail;
+
+	/* initialize new structure */
+	qugid->qugid_id = quota_id;
+	memset(&qugid->qugid_stat, 0, sizeof(qugid->qugid_stat));
+	qugid->qugid_type = type;
+	atomic_set(&qugid->qugid_count, 0);
+
+	/* insert in tree */
+	if (quotatree_insert(tree, quota_id, &st, qugid) < 0)
+		goto fail_insert;
+	qmblk->dq_ugid_count++;
+
+success:
+	vzquota_get_ugid(qugid);
+	return qugid;
+
+fail_insert:
+	vzquota_free_qugid(qugid);
+fail:
+	return VZ_QUOTA_UGBAD;
+}
+
+/*
+ * takes dq_mutex, may schedule
+ */
+struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
+			unsigned int quota_id, int type, int flags)
+{
+	struct vz_quota_ugid *qugid;
+
+	mutex_lock(&qmblk->dq_mutex);
+	qugid = __vzquota_find_ugid(qmblk, quota_id, type, flags);
+	mutex_unlock(&qmblk->dq_mutex);
+
+	return qugid;
+}
+
+/*
+ * destroy all ugid records on given quota master
+ */
+void vzquota_kill_ugid(struct vz_quota_master *qmblk)
+{
+	BUG_ON((qmblk->dq_gid_tree == NULL && qmblk->dq_uid_tree != NULL) ||
+		(qmblk->dq_uid_tree == NULL && qmblk->dq_gid_tree != NULL));
+
+	if (qmblk->dq_uid_tree != NULL) {
+		quotatree_free(qmblk->dq_uid_tree, vzquota_free_qugid);
+		quotatree_free(qmblk->dq_gid_tree, vzquota_free_qugid);
+	}
+}
+
+
+/* ----------------------------------------------------------------------
+ * Management interface to ugid quota for (super)users.
+ * --------------------------------------------------------------------- */
+
+static int vzquota_initialize2(struct inode *inode, int type)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_drop2(struct inode *inode)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_alloc_space2(struct inode *inode,
+			     qsize_t number, int prealloc)
+{
+	inode_add_bytes(inode, number);
+	return QUOTA_OK;
+}
+
+static int vzquota_alloc_inode2(const struct inode *inode, unsigned long number)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_free_space2(struct inode *inode, qsize_t number)
+{
+	inode_sub_bytes(inode, number);
+	return QUOTA_OK;
+}
+
+static int vzquota_free_inode2(const struct inode *inode, unsigned long number)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_transfer2(struct inode *inode, struct iattr *iattr)
+{
+	return QUOTA_OK;
+}
+
+struct dquot_operations vz_quota_operations2 = {
+	.initialize	= vzquota_initialize2,
+	.drop		= vzquota_drop2,
+	.alloc_space	= vzquota_alloc_space2,
+	.alloc_inode	= vzquota_alloc_inode2,
+	.free_space	= vzquota_free_space2,
+	.free_inode	= vzquota_free_inode2,
+	.transfer	= vzquota_transfer2,
+};
+
+
+asmlinkage long sys_unlink(const char __user * pathname);
+asmlinkage long sys_rename(const char __user * oldname,
+	       const char __user * newname);
+asmlinkage long sys_symlink(const char __user * oldname,
+	       const char __user * newname);
+
+/* called under sb->s_umount semaphore */
+static int vz_restore_symlink(struct super_block *sb, char *path, int type)
+{
+	mm_segment_t oldfs;
+	char *newpath;
+	char dest[64];
+	const char *names[] = {
+		[USRQUOTA] "aquota.user",
+		[GRPQUOTA] "aquota.group"
+	};
+	int err;
+
+	newpath = kmalloc(strlen(path) + sizeof(".new"), GFP_KERNEL);
+	if (newpath == NULL)
+		return -ENOMEM;
+
+	strcpy(newpath, path);
+	strcat(newpath, ".new");
+
+	sprintf(dest, "/proc/vz/vzaquota/%08x/%s",
+			new_encode_dev(sb->s_dev), names[type]);
+
+	/*
+	 * Lockdep will learn unneeded dependency while unlink(2):
+	 *	->s_umount => ->i_mutex/1 => ->i_mutex
+	 * Reverse dependency is,
+	 *	open_namei() => ->i_mutex => lookup_hash() => __lookup_hash()
+	 *	=> ->lookup() \eq vzdq_aquotq_lookup() => find_qmblk_by_dev()
+	 *	=> user_get_super() => ->s_umount
+	 *
+	 * However, first set of ->i_mutex'es belong to /, second to /proc .
+	 * Right fix is to get rid of vz_restore_symlink(), of course.
+	 */
+	up_read(&sb->s_umount);
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sys_unlink(newpath);
+	if (err < 0 && err != -ENOENT)
+		goto out_restore;
+	err = sys_symlink(dest, newpath);
+	if (err < 0)
+		goto out_restore;
+	err = sys_rename(newpath, path);
+out_restore:
+	set_fs(oldfs);
+
+	down_read(&sb->s_umount);
+	/* umounted meanwhile? */
+	if (err == 0 && !sb->s_root)
+		err = -ENODEV;
+
+	kfree(newpath);
+	return err;
+}
+
+/* called under sb->s_umount semaphore */
+static int vz_quota_on(struct super_block *sb, int type,
+		int format_id, char *path)
+{
+	struct vz_quota_master *qmblk;
+	int mask, mask2;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = vz_restore_symlink(sb, path, type);
+	if (err < 0)
+		goto out_put;
+
+	mutex_lock(&vz_quota_mutex);
+	mask = 0;
+	mask2 = 0;
+	sb->dq_op = &vz_quota_operations2;
+	sb->s_qcop = &vz_quotactl_operations;
+	if (type == USRQUOTA) {
+		mask = DQUOT_USR_ENABLED;
+		mask2 = VZDQ_USRQUOTA;
+	}
+	if (type == GRPQUOTA) {
+		mask = DQUOT_GRP_ENABLED;
+		mask2 = VZDQ_GRPQUOTA;
+	}
+	err = -EBUSY;
+	if (qmblk->dq_flags & mask2)
+		goto out_sem;
+
+	err = 0;
+	qmblk->dq_flags |= mask2;
+	sb->s_dquot.flags |= mask;
+
+out_sem:
+	mutex_unlock(&vz_quota_mutex);
+out_put:
+	qmblk_put(qmblk);
+out:
+	return err;
+}
+
+static int vz_quota_off(struct super_block *sb, int type)
+{
+	struct vz_quota_master *qmblk;
+	int mask2;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	mutex_lock(&vz_quota_mutex);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	mask2 = 0;
+	if (type == USRQUOTA)
+		mask2 = VZDQ_USRQUOTA;
+	if (type == GRPQUOTA)
+		mask2 = VZDQ_GRPQUOTA;
+	err = -EINVAL;
+	if (!(qmblk->dq_flags & mask2))
+		goto out;
+
+	qmblk->dq_flags &= ~mask2;
+	err = 0;
+
+out:
+	mutex_unlock(&vz_quota_mutex);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+static int vz_quota_sync(struct super_block *sb, int type)
+{
+	return 0;	/* vz quota is always uptodate */
+}
+
+static int vz_get_dqblk(struct super_block *sb, int type,
+		qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid *ugid;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	mutex_lock(&vz_quota_mutex);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = 0;
+	ugid = vzquota_find_ugid(qmblk, id, type, VZDQUG_FIND_DONT_ALLOC);
+	if (ugid != VZ_QUOTA_UGBAD) {
+		qmblk_data_read_lock(qmblk);
+		di->dqb_bhardlimit = ugid->qugid_stat.bhardlimit >> 10;
+		di->dqb_bsoftlimit = ugid->qugid_stat.bsoftlimit >> 10;
+		di->dqb_curspace = ugid->qugid_stat.bcurrent;
+		di->dqb_ihardlimit = ugid->qugid_stat.ihardlimit;
+		di->dqb_isoftlimit = ugid->qugid_stat.isoftlimit;
+		di->dqb_curinodes = ugid->qugid_stat.icurrent;
+		di->dqb_btime = ugid->qugid_stat.btime;
+		di->dqb_itime = ugid->qugid_stat.itime;
+		qmblk_data_read_unlock(qmblk);
+		di->dqb_valid = QIF_ALL;
+		vzquota_put_ugid(qmblk, ugid);
+	} else {
+		memset(di, 0, sizeof(*di));
+		di->dqb_valid = QIF_ALL;
+	}
+
+out:
+	mutex_unlock(&vz_quota_mutex);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+/* must be called under vz_quota_mutex */
+static int __vz_set_dqblk(struct vz_quota_master *qmblk,
+		int type, qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_ugid *ugid;
+
+	ugid = vzquota_find_ugid(qmblk, id, type, 0);
+	if (ugid == VZ_QUOTA_UGBAD)
+		return -ESRCH;
+
+	qmblk_data_write_lock(qmblk);
+	/*
+	 * Subtle compatibility breakage.
+	 *
+	 * Some old non-vz kernel quota didn't start grace period
+	 * if the new soft limit happens to be below the usage.
+	 * Non-vz kernel quota in 2.4.20 starts the grace period
+	 * (if it hasn't been started).
+	 * Current non-vz kernel performs even more complicated
+	 * manipulations...
+	 *
+	 * Also, current non-vz kernels have inconsistency related to 
+	 * the grace time start.  In regular operations the grace period
+	 * is started if the usage is greater than the soft limit (and,
+	 * strangely, is cancelled if the usage is less).
+	 * However, set_dqblk starts the grace period if the usage is greater
+	 * or equal to the soft limit.
+	 *
+	 * Here we try to mimic the behavior of the current non-vz kernel.
+	 */
+	if (di->dqb_valid & QIF_BLIMITS) {
+		ugid->qugid_stat.bhardlimit =
+			(__u64)di->dqb_bhardlimit << 10;
+		ugid->qugid_stat.bsoftlimit =
+			(__u64)di->dqb_bsoftlimit << 10;
+		if (di->dqb_bsoftlimit == 0 ||
+		    ugid->qugid_stat.bcurrent < ugid->qugid_stat.bsoftlimit)
+			ugid->qugid_stat.btime = 0;
+		else if (!(di->dqb_valid & QIF_BTIME))
+			ugid->qugid_stat.btime = CURRENT_TIME_SECONDS
+				+ qmblk->dq_ugid_info[type].bexpire;
+		else
+			ugid->qugid_stat.btime = di->dqb_btime;
+	}
+	if (di->dqb_valid & QIF_ILIMITS) {
+		ugid->qugid_stat.ihardlimit = di->dqb_ihardlimit;
+		ugid->qugid_stat.isoftlimit = di->dqb_isoftlimit;
+		if (di->dqb_isoftlimit == 0 ||
+		    ugid->qugid_stat.icurrent < ugid->qugid_stat.isoftlimit)
+			ugid->qugid_stat.itime = 0;
+		else if (!(di->dqb_valid & QIF_ITIME))
+			ugid->qugid_stat.itime = CURRENT_TIME_SECONDS
+				+ qmblk->dq_ugid_info[type].iexpire;
+		else
+			ugid->qugid_stat.itime = di->dqb_itime;
+	}
+	qmblk_data_write_unlock(qmblk);
+	vzquota_put_ugid(qmblk, ugid);
+
+	return 0;
+}
+
+static int vz_set_dqblk(struct super_block *sb, int type,
+		qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	mutex_lock(&vz_quota_mutex);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+	err = __vz_set_dqblk(qmblk, type, id, di);
+out:
+	mutex_unlock(&vz_quota_mutex);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+static int vz_get_dqinfo(struct super_block *sb, int type,
+		struct if_dqinfo *ii)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	mutex_lock(&vz_quota_mutex);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = 0;
+	ii->dqi_bgrace = qmblk->dq_ugid_info[type].bexpire;
+	ii->dqi_igrace = qmblk->dq_ugid_info[type].iexpire;
+	ii->dqi_flags = 0;
+	ii->dqi_valid = IIF_ALL;
+
+out:
+	mutex_unlock(&vz_quota_mutex);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+/* must be called under vz_quota_mutex */
+static int __vz_set_dqinfo(struct vz_quota_master *qmblk,
+		int type, struct if_dqinfo *ii)
+{
+	if (ii->dqi_valid & IIF_FLAGS)
+		if (ii->dqi_flags & DQF_MASK)
+			return -EINVAL;
+
+	if (ii->dqi_valid & IIF_BGRACE)
+		qmblk->dq_ugid_info[type].bexpire = ii->dqi_bgrace;
+	if (ii->dqi_valid & IIF_IGRACE)
+		qmblk->dq_ugid_info[type].iexpire = ii->dqi_igrace;
+	return 0;
+}
+
+static int vz_set_dqinfo(struct super_block *sb, int type,
+		struct if_dqinfo *ii)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	mutex_lock(&vz_quota_mutex);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+	err = __vz_set_dqinfo(qmblk, type, ii);
+out:
+	mutex_unlock(&vz_quota_mutex);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+#ifdef CONFIG_QUOTA_COMPAT
+
+#define Q_GETQUOTI_SIZE 1024
+
+#define UGID2DQBLK(dst, src)						\
+	do {								\
+		(dst)->dqb_ihardlimit = (src)->qugid_stat.ihardlimit;	\
+		(dst)->dqb_isoftlimit = (src)->qugid_stat.isoftlimit;	\
+		(dst)->dqb_curinodes = (src)->qugid_stat.icurrent;	\
+		/* in 1K blocks */					\
+		(dst)->dqb_bhardlimit = (src)->qugid_stat.bhardlimit >> 10; \
+		/* in 1K blocks */					\
+		(dst)->dqb_bsoftlimit = (src)->qugid_stat.bsoftlimit >> 10; \
+		/* in bytes, 64 bit */					\
+		(dst)->dqb_curspace = (src)->qugid_stat.bcurrent;	\
+		(dst)->dqb_btime = (src)->qugid_stat.btime;		\
+		(dst)->dqb_itime = (src)->qugid_stat.itime;		\
+	} while (0)
+
+static int vz_get_quoti(struct super_block *sb, int type, qid_t idx,
+		struct v2_disk_dqblk __user *dqblk)
+{
+	struct vz_quota_master *qmblk;
+	struct v2_disk_dqblk *data, *kbuf;
+	struct vz_quota_ugid *ugid;
+	int count;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = -ENOMEM;
+	kbuf = vmalloc(Q_GETQUOTI_SIZE * sizeof(*kbuf));
+	if (!kbuf)
+		goto out;
+
+	mutex_lock(&vz_quota_mutex);
+	mutex_lock(&qmblk->dq_mutex);
+	for (ugid = vzquota_get_byindex(qmblk, idx, type), count = 0;
+		ugid != NULL && count < Q_GETQUOTI_SIZE;
+		count++)
+	{
+		data = kbuf + count;
+		qmblk_data_read_lock(qmblk);
+		UGID2DQBLK(data, ugid);
+		qmblk_data_read_unlock(qmblk);
+		data->dqb_id = ugid->qugid_id;
+
+		/* Find next entry */
+		ugid = vzquota_get_next(qmblk, ugid);
+		BUG_ON(ugid != NULL && ugid->qugid_type != type);
+	}
+	mutex_unlock(&qmblk->dq_mutex);
+	mutex_unlock(&vz_quota_mutex);
+
+	err = count;
+	if (copy_to_user(dqblk, kbuf, count * sizeof(*kbuf)))
+		err = -EFAULT;
+
+	vfree(kbuf);
+out:
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+
+	return err;
+}
+
+#endif
+
+struct quotactl_ops vz_quotactl_operations = {
+	.quota_on	= vz_quota_on,
+	.quota_off	= vz_quota_off,
+	.quota_sync	= vz_quota_sync,
+	.get_info	= vz_get_dqinfo,
+	.set_info	= vz_set_dqinfo,
+	.get_dqblk	= vz_get_dqblk,
+	.set_dqblk	= vz_set_dqblk,
+#ifdef CONFIG_QUOTA_COMPAT
+	.get_quoti	= vz_get_quoti,
+#endif
+};
+
+
+/* ----------------------------------------------------------------------
+ * Management interface for host system admins.
+ * --------------------------------------------------------------------- */
+
+static int quota_ugid_addstat(unsigned int quota_id, unsigned int ugid_size,
+		struct vz_quota_iface __user *u_ugid_buf, int compat)
+{
+	struct vz_quota_master *qmblk;
+	int ret;
+
+	mutex_lock(&vz_quota_mutex);
+
+	ret = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	ret = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out; /* working quota doesn't accept new ugids */
+
+	ret = 0;
+	/* start to add ugids */
+	for (ret = 0; ret < ugid_size; ret++) {
+		struct vz_quota_iface ugid_buf;
+		struct vz_quota_ugid *ugid;
+
+		if (!compat) {
+			if (copy_from_user(&ugid_buf, u_ugid_buf,
+							sizeof(ugid_buf)))
+				break;
+			u_ugid_buf++; /* next user buffer */
+		} else {
+#ifdef CONFIG_COMPAT
+			struct compat_vz_quota_iface oqif;
+			if (copy_from_user(&oqif, u_ugid_buf,
+							sizeof(oqif)))
+				break;
+			ugid_buf.qi_id = oqif.qi_id;
+			ugid_buf.qi_type = oqif.qi_type;
+			compat_dqstat2dqstat(&oqif.qi_stat, &ugid_buf.qi_stat);
+			u_ugid_buf = (struct vz_quota_iface __user *)
+					(((void *)u_ugid_buf) + sizeof(oqif));
+#endif
+		}
+
+		if (ugid_buf.qi_type >= MAXQUOTAS)
+			break; /* bad quota type - this is the only check */
+
+		ugid = vzquota_find_ugid(qmblk,
+				ugid_buf.qi_id, ugid_buf.qi_type, 0);
+		if (ugid == VZ_QUOTA_UGBAD) {
+			qmblk->dq_flags |= VZDQUG_FIXED_SET;
+			break; /* limit reached */
+		}
+
+		/* update usage/limits 
+		 * we can copy the data without the lock, because the data
+		 * cannot be modified in VZDQ_STARTING state */
+		ugid->qugid_stat = ugid_buf.qi_stat;
+
+		vzquota_put_ugid(qmblk, ugid);
+	}
+out:
+	mutex_unlock(&vz_quota_mutex);
+
+	return ret;
+}
+
+static int quota_ugid_setgrace(unsigned int quota_id,
+		struct dq_info __user u_dq_info[], int compat)
+{
+	struct vz_quota_master *qmblk;
+	struct dq_info dq_info[MAXQUOTAS];
+	struct dq_info *target;
+	int err, type;
+
+	mutex_lock(&vz_quota_mutex);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+	
+	err = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out; /* working quota doesn't accept changing options */
+
+	err = -EFAULT;
+	if (!compat) {
+		if (copy_from_user(dq_info, u_dq_info, sizeof(dq_info)))
+			goto out;
+	} else {
+#ifdef CONFIG_COMPAT
+		struct compat_dq_info odqi[MAXQUOTAS];
+		if (copy_from_user(odqi, u_dq_info, sizeof(odqi)))
+			goto out;
+		for (type = 0; type < MAXQUOTAS; type++)
+			compat_dqinfo2dqinfo(&odqi[type], &dq_info[type]);
+#endif
+	}
+
+	err = 0;
+
+	/* update in qmblk */
+	for (type = 0; type < MAXQUOTAS; type++) {
+		target = &qmblk->dq_ugid_info[type];
+		target->bexpire = dq_info[type].bexpire;
+		target->iexpire = dq_info[type].iexpire;
+	}
+out:
+	mutex_unlock(&vz_quota_mutex);
+
+	return err;
+}
+
+static int do_quota_ugid_getstat(struct vz_quota_master *qmblk, int index, int size,
+		struct vz_quota_iface *u_ugid_buf)
+{
+	int type, count;
+	struct vz_quota_ugid *ugid;
+
+	if (QTREE_LEAFNUM(qmblk->dq_uid_tree) +
+	    QTREE_LEAFNUM(qmblk->dq_gid_tree)
+	    		<= index)
+		return 0;
+
+	count = 0;
+
+	type = index < QTREE_LEAFNUM(qmblk->dq_uid_tree) ? USRQUOTA : GRPQUOTA;
+	if (type == GRPQUOTA)
+		index -= QTREE_LEAFNUM(qmblk->dq_uid_tree);
+
+	/* loop through ugid and then qgid quota */
+repeat:
+	for (ugid = vzquota_get_byindex(qmblk, index, type);
+		ugid != NULL && count < size;
+		ugid = vzquota_get_next(qmblk, ugid), count++)
+	{
+		struct vz_quota_iface ugid_buf;
+
+		/* form interface buffer and send in to user-level */
+		qmblk_data_read_lock(qmblk);
+		memcpy(&ugid_buf.qi_stat, &ugid->qugid_stat,
+				sizeof(ugid_buf.qi_stat));
+		qmblk_data_read_unlock(qmblk);
+		ugid_buf.qi_id = ugid->qugid_id;
+		ugid_buf.qi_type = ugid->qugid_type;
+
+		memcpy(u_ugid_buf, &ugid_buf, sizeof(ugid_buf));
+		u_ugid_buf++; /* next portion of user buffer */
+	}
+
+	if (type == USRQUOTA && count < size) {
+		type = GRPQUOTA;
+		index = 0;
+		goto repeat;
+	}
+
+	return count;
+}
+
+static int quota_ugid_getstat(unsigned int quota_id,
+		int index, int size, struct vz_quota_iface __user *u_ugid_buf,
+		int compat)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_iface *k_ugid_buf;
+	int err;
+
+	if (index < 0 || size < 0)
+		return -EINVAL;
+
+	if (size > INT_MAX / sizeof(struct vz_quota_iface))
+		return -EINVAL;
+
+	k_ugid_buf = vmalloc(size * sizeof(struct vz_quota_iface));
+	if (k_ugid_buf == NULL)
+		return -ENOMEM;
+
+	mutex_lock(&vz_quota_mutex);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	mutex_lock(&qmblk->dq_mutex);
+	err = do_quota_ugid_getstat(qmblk, index, size, k_ugid_buf);
+	mutex_unlock(&qmblk->dq_mutex);
+	if (err < 0)
+		goto out;
+
+	if (!compat) {
+		if (copy_to_user(u_ugid_buf, k_ugid_buf,
+					err * sizeof(struct vz_quota_iface)))
+			err = -EFAULT;
+	} else {
+#ifdef CONFIG_COMPAT
+		struct compat_vz_quota_iface oqif;
+		int i;
+		for (i = 0; i < err; i++) {
+			oqif.qi_id = k_ugid_buf[i].qi_id;
+			oqif.qi_type = k_ugid_buf[i].qi_type;
+			dqstat2compat_dqstat(&k_ugid_buf[i].qi_stat,
+					  &oqif.qi_stat);
+			if (copy_to_user(u_ugid_buf, &oqif, sizeof(oqif)))
+				err = -EFAULT;
+			u_ugid_buf = (struct vz_quota_iface __user *)
+					(((void *)u_ugid_buf) + sizeof(oqif));
+		}
+#endif
+	}
+
+out:
+	mutex_unlock(&vz_quota_mutex);
+	vfree(k_ugid_buf);
+	return err;
+}
+
+static int quota_ugid_getgrace(unsigned int quota_id,
+		struct dq_info __user u_dq_info[], int compat)
+{
+	struct vz_quota_master *qmblk;
+	struct dq_info dq_info[MAXQUOTAS];
+	struct dq_info *target;
+	int err, type;
+
+	mutex_lock(&vz_quota_mutex);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+	
+	err = 0;
+	/* update from qmblk */
+	for (type = 0; type < MAXQUOTAS; type ++) {
+		target = &qmblk->dq_ugid_info[type];
+		dq_info[type].bexpire = target->bexpire;
+		dq_info[type].iexpire = target->iexpire;
+		dq_info[type].flags = target->flags;
+	}
+
+	if (!compat) {
+		if (copy_to_user(u_dq_info, dq_info, sizeof(dq_info)))
+			err = -EFAULT;
+	} else {
+#ifdef CONFIG_COMPAT
+		struct compat_dq_info odqi[MAXQUOTAS];
+		for (type = 0; type < MAXQUOTAS; type ++)
+			dqinfo2compat_dqinfo(&dq_info[type], &odqi[type]);
+		if (copy_to_user(u_dq_info, odqi, sizeof(odqi)))
+			err = -EFAULT;
+#endif
+	}
+out:
+	mutex_unlock(&vz_quota_mutex);
+
+	return err;
+}
+
+static int quota_ugid_getconfig(unsigned int quota_id, 
+		struct vz_quota_ugid_stat __user *info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_stat kinfo;
+	int err;
+
+	mutex_lock(&vz_quota_mutex);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+	
+	err = 0;
+	kinfo.limit = qmblk->dq_ugid_max;
+	kinfo.count = qmblk->dq_ugid_count;
+	kinfo.flags = qmblk->dq_flags;
+
+	if (copy_to_user(info, &kinfo, sizeof(kinfo)))
+		err = -EFAULT;
+out:
+	mutex_unlock(&vz_quota_mutex);
+
+	return err;
+}
+
+static int quota_ugid_setconfig(unsigned int quota_id,
+		struct vz_quota_ugid_stat __user *info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_stat kinfo;
+	int err;
+
+	mutex_lock(&vz_quota_mutex);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&kinfo, info, sizeof(kinfo)))
+		goto out;
+
+	err = 0;
+	qmblk->dq_ugid_max = kinfo.limit;
+	if (qmblk->dq_state == VZDQ_STARTING) {
+		qmblk->dq_flags = kinfo.flags;
+		if (qmblk->dq_flags & VZDQUG_ON)
+			qmblk->dq_flags |= VZDQ_USRQUOTA | VZDQ_GRPQUOTA;
+	}		
+
+out:
+	mutex_unlock(&vz_quota_mutex);
+
+	return err;
+}
+
+static int quota_ugid_setlimit(unsigned int quota_id,
+		struct vz_quota_ugid_setlimit __user *u_lim)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_setlimit lim;
+	int err;
+
+	mutex_lock(&vz_quota_mutex);
+
+	err = -ESRCH;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&lim, u_lim, sizeof(lim)))
+		goto out;
+
+	err = __vz_set_dqblk(qmblk, lim.type, lim.id, &lim.dqb);
+
+out:
+	mutex_unlock(&vz_quota_mutex);
+
+	return err;
+}
+
+static int quota_ugid_setinfo(unsigned int quota_id,
+		struct vz_quota_ugid_setinfo __user *u_info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_setinfo info;
+	int err;
+
+	mutex_lock(&vz_quota_mutex);
+
+	err = -ESRCH;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		goto out;
+
+	err = __vz_set_dqinfo(qmblk, info.type, &info.dqi);
+
+out:
+	mutex_unlock(&vz_quota_mutex);
+
+	return err;
+}
+
+/*
+ * This is a system call to maintain UGID quotas
+ * Note this call is allowed to run ONLY from VE0
+ */
+long do_vzquotaugidctl(int cmd, unsigned int quota_id,
+		unsigned int ugid_index, unsigned int ugid_size,
+		void *addr, int compat)
+{
+	int ret;
+
+	ret = -EPERM;
+	/* access allowed only from root of VE0 */
+	if (!capable(CAP_SYS_RESOURCE) ||
+	    !capable(CAP_SYS_ADMIN))
+		goto out;
+
+	switch (cmd) {
+		case VZ_DQ_UGID_GETSTAT:
+			ret = quota_ugid_getstat(quota_id,
+					ugid_index, ugid_size,
+				       	(struct vz_quota_iface __user *)addr,
+					compat);
+			break;
+		case VZ_DQ_UGID_ADDSTAT:
+			ret = quota_ugid_addstat(quota_id, ugid_size,
+					(struct vz_quota_iface __user *) addr,
+					compat);
+			break;
+		case VZ_DQ_UGID_GETGRACE:
+			ret = quota_ugid_getgrace(quota_id,
+					(struct dq_info __user *)addr, compat);
+			break;
+		case VZ_DQ_UGID_SETGRACE:
+			ret = quota_ugid_setgrace(quota_id,
+					(struct dq_info __user *)addr, compat);
+			break;
+		case VZ_DQ_UGID_GETCONFIG:
+			ret = quota_ugid_getconfig(quota_id,
+					(struct vz_quota_ugid_stat __user *)
+								addr);
+			break;
+		case VZ_DQ_UGID_SETCONFIG:
+			ret = quota_ugid_setconfig(quota_id,
+					(struct vz_quota_ugid_stat __user *)
+								addr);
+			break;
+		case VZ_DQ_UGID_SETLIMIT:
+			ret = quota_ugid_setlimit(quota_id,
+					(struct vz_quota_ugid_setlimit __user *)
+								addr);
+			break;
+		case VZ_DQ_UGID_SETINFO:
+			ret = quota_ugid_setinfo(quota_id,
+					(struct vz_quota_ugid_setinfo __user *)
+								addr);
+			break;
+		default:
+			ret = -EINVAL;
+			goto out;
+	}
+out:
+	return ret;
+}
+
+static void ugid_quota_on_sb(struct super_block *sb)
+{
+	struct super_block *real_sb;
+	struct vz_quota_master *qmblk;
+
+	if (!sb->s_op->get_quota_root)
+		return;
+
+	real_sb = sb->s_op->get_quota_root(sb)->i_sb;
+	if (real_sb->dq_op != &vz_quota_operations)
+		return;
+
+	sb->dq_op = &vz_quota_operations2;
+	sb->s_qcop = &vz_quotactl_operations;
+	INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+	INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+	sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
+	sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
+
+	qmblk = vzquota_find_qmblk(sb);
+	if ((qmblk == NULL) || (qmblk == VZ_QUOTA_BAD))
+		return;
+	mutex_lock(&vz_quota_mutex);
+	if (qmblk->dq_flags & VZDQ_USRQUOTA)
+		sb->s_dquot.flags |= DQUOT_USR_ENABLED;
+	if (qmblk->dq_flags & VZDQ_GRPQUOTA)
+		sb->s_dquot.flags |= DQUOT_GRP_ENABLED;
+	mutex_unlock(&vz_quota_mutex);
+	qmblk_put(qmblk);
+}
+
+static void ugid_quota_off_sb(struct super_block *sb)
+{
+	/* can't make quota off on mounted super block */
+	BUG_ON(sb->s_root != NULL);
+}
+
+static int ugid_notifier_call(struct vnotifier_block *self,
+		unsigned long n, void *data, int old_ret)
+{
+	struct virt_info_quota *viq;
+
+	viq = (struct virt_info_quota *)data;
+
+	switch (n) {
+	case VIRTINFO_QUOTA_ON:
+		ugid_quota_on_sb(viq->super);
+		break;
+	case VIRTINFO_QUOTA_OFF:
+		ugid_quota_off_sb(viq->super);
+		break;
+	case VIRTINFO_QUOTA_GETSTAT:
+		break;
+	default:
+		return old_ret;
+	}
+	return NOTIFY_OK;
+}
+
+static struct vnotifier_block ugid_notifier_block = {
+	.notifier_call = ugid_notifier_call,
+};
+
+/* ----------------------------------------------------------------------
+ * Init/exit.
+ * --------------------------------------------------------------------- */
+
+int vzquota_ugid_init(void)
+{
+	int err;
+
+	vz_quota_ugid_cachep = kmem_cache_create("vz_quota_ugid",
+				      sizeof(struct vz_quota_ugid),
+				      0, SLAB_HWCACHE_ALIGN,
+				      NULL, NULL);
+	if (vz_quota_ugid_cachep == NULL)
+		goto err_slab;
+
+	err = register_quota_format(&vz_quota_empty_v2_format);
+	if (err)
+		goto err_reg;
+
+	virtinfo_notifier_register(VITYPE_QUOTA, &ugid_notifier_block);
+	return 0;
+
+err_reg:
+	kmem_cache_destroy(vz_quota_ugid_cachep);
+	return err;
+
+err_slab:
+	printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
+	return -ENOMEM;
+}
+
+void vzquota_ugid_release(void)
+{
+	virtinfo_notifier_unregister(VITYPE_QUOTA, &ugid_notifier_block);
+	unregister_quota_format(&vz_quota_empty_v2_format);
+
+	if (kmem_cache_destroy(vz_quota_ugid_cachep))
+		printk(KERN_ERR "VZQUOTA: kmem_cache_destroy failed\n");
+}
diff -upr kernel-2.6.18-417.el5.orig/fs/vzdquot.c kernel-2.6.18-417.el5-028stab121/fs/vzdquot.c
--- kernel-2.6.18-417.el5.orig/fs/vzdquot.c	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/vzdquot.c	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,2069 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains the core of Virtuozzo disk quota implementation:
+ * maintenance of VZDQ information in inodes,
+ * external interfaces,
+ * module entry.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <asm/atomic.h>
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/quota.h>
+#include <linux/rcupdate.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+#include <linux/virtinfo.h>
+#include <linux/vzdq_tree.h>
+#include <linux/vzsnap.h>
+#include <linux/mount.h>
+
+/* ----------------------------------------------------------------------
+ *
+ * Locking
+ *
+ * ---------------------------------------------------------------------- */
+
+/*
+ * Serializes on/off and all other do_vzquotactl operations.
+ * Protects qmblk hash.
+ */
+struct mutex vz_quota_mutex;
+
+/*
+ * Data access locks
+ *  inode_qmblk
+ *	protects qmblk pointers in all inodes and qlnk content in general
+ *	(but not qmblk content);
+ *	also protects related qmblk invalidation procedures;
+ *	can't be per-inode because of vzquota_dtree_qmblk complications
+ *	and problems with serialization with quota_on,
+ *	but can be per-superblock;
+ *  qmblk_data
+ *	protects qmblk fields (such as current usage)
+ *  quota_data
+ *	protects charge/uncharge operations, thus, implies
+ *	qmblk_data lock and, if CONFIG_VZ_QUOTA_UGID, inode_qmblk lock
+ *	(to protect ugid pointers).
+ *
+ * Lock order:
+ *  inode_qmblk_lock -> dcache_lock
+ *  inode_qmblk_lock -> qmblk_data
+ */
+static spinlock_t vzdq_qmblk_lock = SPIN_LOCK_UNLOCKED;
+
+inline void inode_qmblk_lock(struct super_block *sb)
+{
+	spin_lock(&vzdq_qmblk_lock);
+}
+
+inline void inode_qmblk_unlock(struct super_block *sb)
+{
+	spin_unlock(&vzdq_qmblk_lock);
+}
+
+inline void qmblk_data_read_lock(struct vz_quota_master *qmblk)
+{
+	spin_lock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_read_unlock(struct vz_quota_master *qmblk)
+{
+	spin_unlock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_write_lock(struct vz_quota_master *qmblk)
+{
+	spin_lock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_write_unlock(struct vz_quota_master *qmblk)
+{
+	spin_unlock(&qmblk->dq_data_lock);
+}
+
+struct quota_format_type vz_quota_empty_v2_format = {
+	.qf_fmt_id	= QFMT_VFS_V0,
+	.qf_ops		= NULL,
+	.qf_owner	= THIS_MODULE,
+};
+
+/* ----------------------------------------------------------------------
+ *
+ * Master hash table handling.
+ *
+ * SMP not safe, serialied by vz_quota_mutex within quota syscalls
+ *
+ * --------------------------------------------------------------------- */
+
+static kmem_cache_t *vzquota_cachep;
+
+/*
+ * Hash function.
+ */
+#define QHASH_BITS		6
+#define	VZ_QUOTA_HASH_SIZE	(1 << QHASH_BITS)
+#define QHASH_MASK		(VZ_QUOTA_HASH_SIZE - 1)
+
+struct list_head vzquota_hash_table[VZ_QUOTA_HASH_SIZE];
+int vzquota_hash_size = VZ_QUOTA_HASH_SIZE;
+
+static inline int vzquota_hash_func(unsigned int qid)
+{
+	return (((qid >> QHASH_BITS) ^ qid) & QHASH_MASK);
+}
+
+/**
+ * vzquota_alloc_master - alloc and instantiate master quota record
+ *
+ * Returns:
+ *	pointer to newly created record if SUCCESS
+ *	-ENOMEM if out of memory
+ *	-EEXIST if record with given quota_id already exist
+ */
+struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
+		struct vz_quota_stat *qstat)
+{
+	int err;
+	struct vz_quota_master *qmblk;
+
+	err = -EEXIST;
+	if (vzquota_find_master(quota_id) != NULL)
+		goto out;
+
+	err = -ENOMEM;
+	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
+	if (qmblk == NULL)
+		goto out;
+#ifdef CONFIG_VZ_QUOTA_UGID
+	qmblk->dq_uid_tree = quotatree_alloc();
+	if (!qmblk->dq_uid_tree)
+		goto out_free;
+
+	qmblk->dq_gid_tree = quotatree_alloc();
+	if (!qmblk->dq_gid_tree)
+		goto out_free_tree;
+#endif
+
+	qmblk->dq_state = VZDQ_STARTING;
+	mutex_init(&qmblk->dq_mutex);
+	spin_lock_init(&qmblk->dq_data_lock);
+
+	qmblk->dq_id = quota_id;
+	qmblk->dq_stat = qstat->dq_stat;
+	qmblk->dq_info = qstat->dq_info;
+	qmblk->dq_root_dentry = NULL;
+	qmblk->dq_root_mnt = NULL;
+	qmblk->dq_sb = NULL;
+	qmblk->dq_snap = NULL;
+	qmblk->dq_ugid_count = 0;
+	qmblk->dq_ugid_max = 0;
+	qmblk->dq_flags = 0;
+	memset(qmblk->dq_ugid_info, 0, sizeof(qmblk->dq_ugid_info));
+	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
+
+	atomic_set(&qmblk->dq_count, 1);
+
+	/* insert in hash chain */
+	list_add(&qmblk->dq_hash,
+		&vzquota_hash_table[vzquota_hash_func(quota_id)]);
+
+	/* success */
+	return qmblk;
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+out_free_tree:
+	quotatree_free(qmblk->dq_uid_tree, NULL);
+out_free:
+	kmem_cache_free(vzquota_cachep, qmblk);
+#endif
+out:
+	return ERR_PTR(err);
+}
+
+static struct vz_quota_master *vzquota_alloc_fake(void)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
+	if (qmblk == NULL)
+		return NULL;
+	memset(qmblk, 0, sizeof(*qmblk));
+	qmblk->dq_state = VZDQ_STOPING;
+	qmblk->dq_flags = VZDQ_NOQUOT;
+	spin_lock_init(&qmblk->dq_data_lock);
+	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
+	atomic_set(&qmblk->dq_count, 1);
+	return qmblk;
+}
+
+/**
+ * vzquota_find_master - find master record with given id
+ *
+ * Returns qmblk without touching its refcounter.
+ * Called under vz_quota_mutex.
+ */
+struct vz_quota_master *vzquota_find_master(unsigned int quota_id)
+{
+	int i;
+	struct vz_quota_master *qp;
+
+	i = vzquota_hash_func(quota_id);
+	list_for_each_entry(qp, &vzquota_hash_table[i], dq_hash) {
+		if (qp->dq_id == quota_id)
+			return qp;
+	}
+	return NULL;
+}
+
+/**
+ * vzquota_free_master - release resources taken by qmblk, freeing memory
+ *
+ * qmblk is assumed to be already taken out from the hash.
+ * Should be called outside vz_quota_mutex.
+ */
+void vzquota_free_master(struct vz_quota_master *qmblk)
+{
+#ifdef CONFIG_VZ_QUOTA_UGID
+	vzquota_kill_ugid(qmblk);
+#endif
+	BUG_ON(!list_empty(&qmblk->dq_ilink_list));
+	kmem_cache_free(vzquota_cachep, qmblk);
+}
+
+
+static inline int vzquota_cur_qmblk_check(void)
+{
+	return current->magic == VZDQ_CUR_MAGIC;
+}
+
+static inline struct inode *vzquota_cur_qmblk_fetch(void)
+{
+	return current->ino;
+}
+
+#if 0
+static inline void vzquota_cur_qmblk_reset(void)
+{
+	current->magic = 0;
+}
+#endif
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Superblock quota operations
+ *
+ * --------------------------------------------------------------------- */
+
+/*
+ * Kernel structure abuse.
+ * We use files[0] pointer as an int variable:
+ * reference counter of how many quota blocks uses this superblock.
+ * files[1] is used for generations structure which helps us to track
+ * when traversing of dentries is really required.
+ */
+#define __VZ_QUOTA_NOQUOTA(sb)		sb->s_dquot.vzdq_master
+#define __VZ_QUOTA_TSTAMP(sb)		((struct timeval *)\
+						&sb->s_dquot.dqio_mutex)
+
+#if defined(VZ_QUOTA_UNLOAD)
+
+#define __VZ_QUOTA_SBREF(sb)		sb->s_dquot.vzdq_count
+
+struct dquot_operations *orig_dq_op;
+struct quotactl_ops *orig_dq_cop;
+
+/**
+ * quota_get_super - account for new a quoted tree under the superblock
+ *
+ * One superblock can have multiple directory subtrees with different VZ
+ * quotas.  We keep a counter of such subtrees and set VZ quota operations or
+ * reset the default ones.
+ *
+ * Called under vz_quota_mutex (from quota_on).
+ */
+int vzquota_get_super(struct super_block *sb)
+{
+	if (sb->dq_op != &vz_quota_operations) {
+		down(&sb->s_dquot.dqonoff_sem);
+		if (sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) {
+			up(&sb->s_dquot.dqonoff_sem);
+			return -EEXIST;
+		}
+		if (orig_dq_op == NULL && sb->dq_op != NULL)
+			orig_dq_op = sb->dq_op;
+		sb->dq_op = &vz_quota_operations;
+		if (orig_dq_cop == NULL && sb->s_qcop != NULL)
+			orig_dq_cop = sb->s_qcop;
+		/* XXX this may race with sys_quotactl */
+#ifdef CONFIG_VZ_QUOTA_UGID
+		sb->s_qcop = &vz_quotactl_operations;
+#else
+		sb->s_qcop = NULL;
+#endif
+		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
+		sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
+		/*
+		 * To get quotaops.h call us we need to mark superblock
+		 * as having quota.  These flags mark the moment when
+		 * our dq_op start to be called.
+		 *
+		 * The ordering of dq_op and s_dquot.flags assignment
+		 * needs to be enforced, but other CPUs do not do rmb()
+		 * between s_dquot.flags and dq_op accesses.
+		 */
+		wmb(); synchronize_sched();
+		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
+		__module_get(THIS_MODULE);
+		up(&sb->s_dquot.dqonoff_sem);
+	}
+	/* protected by vz_quota_mutex */
+	__VZ_QUOTA_SBREF(sb)++;
+	return 0;
+}
+
+/**
+ * quota_put_super - release superblock when one quota tree goes away
+ *
+ * Called under vz_quota_mutex.
+ */
+void vzquota_put_super(struct super_block *sb)
+{
+	int count;
+
+	count = --__VZ_QUOTA_SBREF(sb);
+	if (count == 0) {
+		down(&sb->s_dquot.dqonoff_sem);
+		sb->s_dquot.flags = 0;
+		wmb(); synchronize_sched();
+		sema_init(&sb->s_dquot.dqio_sem, 1);
+		sb->s_qcop = orig_dq_cop;
+		sb->dq_op = orig_dq_op;
+		inode_qmblk_lock(sb);
+		quota_gen_put(SB_QGEN(sb));
+		SB_QGEN(sb) = NULL;
+		/* release qlnk's without qmblk */
+		remove_inode_quota_links_list(&non_vzquota_inodes_lh,
+				sb, NULL);
+		/*
+		 * Races with quota initialization:
+		 * after this inode_qmblk_unlock all inode's generations are
+		 * invalidated, quota_inode_qmblk checks superblock operations.
+		 */
+		inode_qmblk_unlock(sb);
+		/*
+		 * Module refcounting: in theory, this is the best place
+		 * to call module_put(THIS_MODULE).
+		 * In reality, it can't be done because we can't be sure that
+		 * other CPUs do not enter our code segment through dq_op
+		 * cached long time ago.  Quotaops interface isn't supposed to
+		 * go into modules currently (that is, into unloadable
+		 * modules).  By omitting module_put, our module isn't
+		 * unloadable.
+		 */
+		up(&sb->s_dquot.dqonoff_sem);
+	}
+}
+
+#else
+
+/**
+ * vzquota_shutdown_super - callback on umount
+ */
+void vzquota_shutdown_super(struct super_block *sb)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = __VZ_QUOTA_NOQUOTA(sb);
+	__VZ_QUOTA_NOQUOTA(sb) = NULL;
+	if (qmblk != NULL)
+		qmblk_put(qmblk);
+}
+
+/**
+ * vzquota_get_super - account for new a quoted tree under the superblock
+ *
+ * One superblock can have multiple directory subtrees with different VZ
+ * quotas.
+ *
+ * Called under vz_quota_mutex (from vzquota_on).
+ */
+int vzquota_get_super(struct super_block *sb)
+{
+	struct vz_quota_master *qnew;
+	int err;
+
+	mutex_lock(&sb->s_dquot.dqonoff_mutex);
+	err = -EEXIST;
+	if ((sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) &&
+	    sb->dq_op != &vz_quota_operations)
+		goto out_up;
+
+	/*
+	 * This allocation code should be under sb->dq_op check below, but
+	 * it doesn't really matter...
+	 */
+	if (__VZ_QUOTA_NOQUOTA(sb) == NULL) {
+		qnew = vzquota_alloc_fake();
+		if (qnew == NULL)
+			goto out_up;
+		__VZ_QUOTA_NOQUOTA(sb) = qnew;
+	}
+
+	if (sb->dq_op != &vz_quota_operations) {
+		sb->dq_op = &vz_quota_operations;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		sb->s_qcop = &vz_quotactl_operations;
+#else
+		sb->s_qcop = NULL;
+#endif
+		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
+
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+		/* these 2 list heads are checked in sync_dquots() */
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		sb->s_dquot.info[USRQUOTA].dqi_format =
+						&vz_quota_empty_v2_format;
+		sb->s_dquot.info[GRPQUOTA].dqi_format =
+						&vz_quota_empty_v2_format;
+
+		/*
+		 * To get quotaops.h to call us we need to mark superblock
+		 * as having quota.  These flags mark the moment when
+		 * our dq_op start to be called.
+		 *
+		 * The ordering of dq_op and s_dquot.flags assignment
+		 * needs to be enforced, but other CPUs do not do rmb()
+		 * between s_dquot.flags and dq_op accesses.
+		 */
+		wmb(); synchronize_sched();
+		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
+	}
+	err = 0;
+
+out_up:
+	mutex_unlock(&sb->s_dquot.dqonoff_mutex);
+	return err;
+}
+
+/**
+ * vzquota_put_super - one quota tree less on this superblock
+ *
+ * Called under vz_quota_mutex.
+ */
+void vzquota_put_super(struct super_block *sb)
+{
+	/*
+	 * Even if this put is the last one,
+	 * sb->s_dquot.flags can't be cleared, because otherwise vzquota_drop
+	 * won't be called and the remaining qmblk references won't be put.
+	 */
+}
+
+#endif
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Helpers for inode -> qmblk link maintenance
+ *
+ * --------------------------------------------------------------------- */
+
+#define __VZ_QUOTA_EMPTY		((void *)0xbdbdbdbd)
+#define VZ_QUOTA_IS_NOQUOTA(qm, sb)	((qm)->dq_flags & VZDQ_NOQUOT)
+#define VZ_QUOTA_EMPTY_IOPS		(&vfs_empty_iops)
+extern struct inode_operations vfs_empty_iops;
+
+static int VZ_QUOTA_IS_ACTUAL(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk == VZ_QUOTA_BAD)
+		return 1;
+	if (qmblk == __VZ_QUOTA_EMPTY)
+		return 0;
+	if (qmblk->dq_flags & VZDQ_NOACT)
+		/* not actual (invalidated) qmblk */
+		return 0;
+	return 1;
+}
+
+static inline int vzquota_qlnk_is_empty(struct vz_quota_ilink *qlnk)
+{
+	return qlnk->qmblk == __VZ_QUOTA_EMPTY;
+}
+
+static inline void set_qlnk_origin(struct vz_quota_ilink *qlnk,
+		unsigned char origin)
+{
+	qlnk->origin[0] = qlnk->origin[1];
+	qlnk->origin[1] = origin;
+}
+
+static inline void vzquota_qlnk_set_empty(struct vz_quota_ilink *qlnk)
+{
+	qlnk->qmblk = __VZ_QUOTA_EMPTY;
+	set_qlnk_origin(qlnk, VZ_QUOTAO_SETE);
+}
+
+void vzquota_qlnk_init(struct vz_quota_ilink *qlnk)
+{
+	memset(qlnk, 0, sizeof(*qlnk));
+	INIT_LIST_HEAD(&qlnk->list);
+	vzquota_qlnk_set_empty(qlnk);
+	set_qlnk_origin(qlnk, VZ_QUOTAO_INIT);
+}
+
+void vzquota_qlnk_destroy(struct vz_quota_ilink *qlnk)
+{
+	might_sleep();
+	if (vzquota_qlnk_is_empty(qlnk))
+		return;
+#if defined(CONFIG_VZ_QUOTA_UGID)
+	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD) {
+		struct vz_quota_master *qmblk;
+		struct vz_quota_ugid *quid, *qgid;
+		qmblk = qlnk->qmblk;
+		quid = qlnk->qugid[USRQUOTA];
+		qgid = qlnk->qugid[GRPQUOTA];
+		if (quid != NULL || qgid != NULL) {
+			mutex_lock(&qmblk->dq_mutex);
+			if (qgid != NULL)
+				vzquota_put_ugid(qmblk, qgid);
+			if (quid != NULL)
+				vzquota_put_ugid(qmblk, quid);
+			mutex_unlock(&qmblk->dq_mutex);
+		}
+	}
+#endif
+	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qlnk->qmblk);
+	set_qlnk_origin(qlnk, VZ_QUOTAO_DESTR);
+}
+
+/**
+ * vzquota_qlnk_swap - swap inode's and temporary vz_quota_ilink contents
+ * @qlt: temporary
+ * @qli: inode's
+ *
+ * Locking is provided by the caller (depending on the context).
+ * After swap, @qli is inserted into the corresponding dq_ilink_list,
+ * @qlt list is reinitialized.
+ */
+static void vzquota_qlnk_swap(struct vz_quota_ilink *qlt,
+		struct vz_quota_ilink *qli)
+{
+	struct vz_quota_master *qb;
+	struct vz_quota_ugid *qu;
+	int i;
+
+	qb = qlt->qmblk;
+	qlt->qmblk = qli->qmblk;
+	qli->qmblk = qb;
+	list_del_init(&qli->list);
+	if (qb != __VZ_QUOTA_EMPTY && qb != VZ_QUOTA_BAD)
+		list_add(&qli->list, &qb->dq_ilink_list);
+	INIT_LIST_HEAD(&qlt->list);
+	set_qlnk_origin(qli, VZ_QUOTAO_SWAP);
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		qu = qlt->qugid[i];
+		qlt->qugid[i] = qli->qugid[i];
+		qli->qugid[i] = qu;
+	}
+}
+
+/**
+ * vzquota_qlnk_reinit_locked - destroy qlnk content, called under locks
+ *
+ * Called under dcache_lock and inode_qmblk locks.
+ * Returns 1 if locks were dropped inside, 0 if atomic.
+ */
+static int vzquota_qlnk_reinit_locked(struct vz_quota_ilink *qlnk,
+		struct inode *inode)
+{
+	if (vzquota_qlnk_is_empty(qlnk))
+		return 0;
+	if (qlnk->qmblk == VZ_QUOTA_BAD) {
+		vzquota_qlnk_set_empty(qlnk);
+		set_qlnk_origin(qlnk, VZ_QUOTAO_RE_LOCK);
+		return 0;
+	}
+	spin_unlock(&dcache_lock);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(qlnk);
+	vzquota_qlnk_init(qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	spin_lock(&dcache_lock);
+	return 1;
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_qlnk_reinit_attr - destroy and reinit qlnk content
+ *
+ * Similar to vzquota_qlnk_reinit_locked, called under different locks.
+ */
+static int vzquota_qlnk_reinit_attr(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct vz_quota_master *qmblk)
+{
+	if (vzquota_qlnk_is_empty(qlnk))
+		return 0;
+	/* may be optimized if qlnk->qugid all NULLs */
+	qmblk_data_write_unlock(qmblk);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(qlnk);
+	vzquota_qlnk_init(qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	qmblk_data_write_lock(qmblk);
+	return 1;
+}
+#endif
+
+/**
+ * vzquota_qlnk_fill - fill vz_quota_ilink content
+ * @qlnk: vz_quota_ilink to fill
+ * @inode: inode for which @qlnk is filled (i_sb, i_uid, i_gid)
+ * @qmblk: qmblk to which this @qlnk will belong
+ *
+ * Called under dcache_lock and inode_qmblk locks.
+ * Returns 1 if locks were dropped inside, 0 if atomic.
+ * @qlnk is expected to be empty.
+ */
+static int vzquota_qlnk_fill(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct vz_quota_master *qmblk)
+{
+	if (qmblk != VZ_QUOTA_BAD)
+		qmblk_get(qmblk);
+	qlnk->qmblk = qmblk;
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+	if (qmblk != VZ_QUOTA_BAD &&
+	    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
+	    (qmblk->dq_flags & VZDQUG_ON)) {
+		struct vz_quota_ugid *quid, *qgid;
+
+		spin_unlock(&dcache_lock);
+		inode_qmblk_unlock(inode->i_sb);
+
+		mutex_lock(&qmblk->dq_mutex);
+		quid = __vzquota_find_ugid(qmblk, inode->i_uid, USRQUOTA, 0);
+		qgid = __vzquota_find_ugid(qmblk, inode->i_gid, GRPQUOTA, 0);
+		mutex_unlock(&qmblk->dq_mutex);
+
+		inode_qmblk_lock(inode->i_sb);
+		spin_lock(&dcache_lock);
+		qlnk->qugid[USRQUOTA] = quid;
+		qlnk->qugid[GRPQUOTA] = qgid;
+		return 1;
+	}
+#endif
+
+	return 0;
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_qlnk_fill_attr - fill vz_quota_ilink content for uid, gid
+ *
+ * This function is a helper for vzquota_transfer, and differs from
+ * vzquota_qlnk_fill only by locking.
+ */
+static int vzquota_qlnk_fill_attr(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct iattr *iattr,
+		int mask,
+		struct vz_quota_master *qmblk)
+{
+	qmblk_get(qmblk);
+	qlnk->qmblk = qmblk;
+
+	if (mask) {
+		struct vz_quota_ugid *quid, *qgid;
+
+		quid = qgid = NULL; /* to make gcc happy */
+		if (!(mask & (1 << USRQUOTA)))
+			quid = vzquota_get_ugid(INODE_QLNK(inode)->
+							qugid[USRQUOTA]);
+		if (!(mask & (1 << GRPQUOTA)))
+			qgid = vzquota_get_ugid(INODE_QLNK(inode)->
+							qugid[GRPQUOTA]);
+
+		qmblk_data_write_unlock(qmblk);
+		inode_qmblk_unlock(inode->i_sb);
+
+		mutex_lock(&qmblk->dq_mutex);
+		if (mask & (1 << USRQUOTA))
+			quid = __vzquota_find_ugid(qmblk, iattr->ia_uid,
+					USRQUOTA, 0);
+		if (mask & (1 << GRPQUOTA))
+			qgid = __vzquota_find_ugid(qmblk, iattr->ia_gid,
+					GRPQUOTA, 0);
+		mutex_unlock(&qmblk->dq_mutex);
+
+		inode_qmblk_lock(inode->i_sb);
+		qmblk_data_write_lock(qmblk);
+		qlnk->qugid[USRQUOTA] = quid;
+		qlnk->qugid[GRPQUOTA] = qgid;
+		return 1;
+	}
+
+	return 0;
+}
+#endif
+
+/**
+ * __vzquota_inode_init - make sure inode's qlnk is initialized
+ *
+ * May be called if qlnk is already initialized, detects this situation itself.
+ * Called under inode_qmblk_lock.
+ */
+static void __vzquota_inode_init(struct inode *inode, unsigned char origin)
+{
+	if (inode->i_dquot[USRQUOTA] == NODQUOT) {
+		vzquota_qlnk_init(INODE_QLNK(inode));
+		inode->i_dquot[USRQUOTA] = (void *)~(unsigned long)NODQUOT;
+	}
+	set_qlnk_origin(INODE_QLNK(inode), origin);
+}
+
+/**
+ * vzquota_inode_drop - destroy VZ quota information in the inode
+ *
+ * Inode must not be externally accessible or dirty.
+ */
+static void vzquota_inode_drop(struct inode *inode)
+{
+	struct vz_quota_ilink qlnk;
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	vzquota_qlnk_swap(&qlnk, INODE_QLNK(inode));
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_DRCAL);
+	inode->i_dquot[USRQUOTA] = NODQUOT;
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+}
+
+/**
+ * vzquota_inode_qmblk_set - initialize inode's qlnk
+ * @inode: inode to be initialized
+ * @qmblk: quota master block to which this inode should belong (may be BAD)
+ * @qlnk: placeholder to store data to resolve locking issues
+ *
+ * Returns 1 if locks were dropped and rechecks possibly needed, 0 otherwise.
+ * Called under dcache_lock and inode_qmblk locks.
+ * @qlnk will be destroyed in the caller chain.
+ *
+ * It is not mandatory to restart parent checks since quota on/off currently
+ * shrinks dentry tree and checks that there are not outside references.
+ * But if at some time that shink is removed, restarts will be required.
+ * Additionally, the restarts prevent inconsistencies if the dentry tree
+ * changes (inode is moved).  This is not a big deal, but anyway...
+ */
+static int vzquota_inode_qmblk_set(struct inode *inode,
+		struct vz_quota_master *qmblk,
+		struct vz_quota_ilink *qlnk)
+{
+	if (qmblk == NULL) {
+		printk(KERN_ERR "VZDQ: NULL in set, orig {%u, %u}, "
+				"dev %s, inode %lu, fs %s\n",
+				INODE_QLNK(inode)->origin[0],
+				INODE_QLNK(inode)->origin[1],
+				inode->i_sb->s_id, inode->i_ino,
+				inode->i_sb->s_type->name);
+		printk(KERN_ERR "current %d (%s), VE %d\n",
+				current->pid, current->comm,
+				VEID(get_exec_env()));
+		dump_stack();
+		qmblk = VZ_QUOTA_BAD;
+	}
+	while (1) {
+		if (vzquota_qlnk_is_empty(qlnk) &&
+		    vzquota_qlnk_fill(qlnk, inode, qmblk))
+			return 1;
+		if (qlnk->qmblk == qmblk)
+			break;
+		if (vzquota_qlnk_reinit_locked(qlnk, inode))
+			return 1;
+	}
+	vzquota_qlnk_swap(qlnk, INODE_QLNK(inode));
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_QSET);
+	return 0;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * vzquota_inode_qmblk (inode -> qmblk lookup) parts
+ *
+ * --------------------------------------------------------------------- */
+
+static char *vzquota_check_parent(struct inode *parent, struct inode *inode)
+{
+	char *msg;
+
+	msg = "uninitialized parent";
+	if (vzquota_qlnk_is_empty(INODE_QLNK(parent)))
+		goto out;
+	msg = "parent not in tree";
+	if (list_empty(&parent->i_dentry))
+		goto out;
+	msg = "parent has 0 refcount";
+	if (!atomic_read(&parent->i_count))
+		goto out;
+	msg = "parent has different sb";
+	if (parent->i_sb != inode->i_sb)
+		goto out;
+
+	msg = NULL;
+out:
+	return msg;
+}
+
+static int vzquota_dparents_check_attach(struct inode *inode)
+{
+	if (!list_empty(&inode->i_dentry))
+		return 0;
+	printk(KERN_ERR "VZDQ: no parent for "
+			"dev %s, inode %lu, fs %s\n",
+			inode->i_sb->s_id,
+			inode->i_ino,
+			inode->i_sb->s_type->name);
+	return -1;
+}
+
+static struct inode *vzquota_dparents_check_actual(struct inode *inode)
+{
+	struct dentry *de;
+
+	list_for_each_entry(de, &inode->i_dentry, d_alias) {
+		if (de->d_parent == de) /* detached dentry, perhaps */
+			continue;
+		/* first access to parent, make sure its qlnk initialized */
+		__vzquota_inode_init(de->d_parent->d_inode, VZ_QUOTAO_ACT);
+		if (!VZ_QUOTA_IS_ACTUAL(de->d_parent->d_inode))
+			return de->d_parent->d_inode;
+	}
+	return NULL;
+}
+
+static struct vz_quota_master *vzquota_dparents_check_same(struct inode *inode)
+{
+	struct dentry *de;
+	struct vz_quota_master *qmblk;
+	char *msg = "";
+
+	qmblk = NULL;
+	list_for_each_entry(de, &inode->i_dentry, d_alias) {
+		if (de->d_parent == de) /* detached dentry, perhaps */
+			continue;
+		if (qmblk == NULL) {
+			qmblk = INODE_QLNK(de->d_parent->d_inode)->qmblk;
+			continue;
+		}
+		if (INODE_QLNK(de->d_parent->d_inode)->qmblk != qmblk) {
+			printk(KERN_WARNING "VZDQ: multiple quotas for "
+					"dev %s, inode %lu, fs %s\n",
+					inode->i_sb->s_id,
+					inode->i_ino,
+					inode->i_sb->s_type->name);
+			qmblk = VZ_QUOTA_BAD;
+			break;
+		}
+	}
+
+	if (qmblk != NULL)
+		goto out;
+
+	if (vzquota_cur_qmblk_check()) {
+		struct inode *parent;
+
+		parent = vzquota_cur_qmblk_fetch();
+		msg = vzquota_check_parent(parent, inode);
+		if (msg != NULL)
+			goto fail;
+
+		msg = "parent not actual";
+		if (!VZ_QUOTA_IS_ACTUAL(parent))
+			goto fail;
+
+		qmblk = INODE_QLNK(parent)->qmblk;
+		goto out;
+	}
+fail:
+	printk(KERN_WARNING "VZDQ: not attached to tree, "
+			"dev %s, inode %lu, fs %s. %s\n",
+			inode->i_sb->s_id,
+			inode->i_ino,
+			inode->i_sb->s_type->name, msg);
+	qmblk = VZ_QUOTA_BAD;
+out:
+	return qmblk;
+}
+
+/* NFS root is disconnected dentry. */
+
+static int is_nfs_root(struct inode * inode)
+{
+	struct dentry *de;
+
+	if (inode->i_sb->s_magic != 0x6969)
+		return 0;
+
+	if (list_empty(&inode->i_dentry))
+		return 0;
+
+	list_for_each_entry(de, &inode->i_dentry, d_alias) {
+		if (de->d_parent != de)
+			return 0;
+		if (d_unhashed(de))
+			return 0;
+		if (!(de->d_flags & DCACHE_DISCONNECTED))
+			return 0;
+	}
+	return 1;
+}
+
+static void vzquota_dbranch_actualize(struct inode *inode,
+		struct inode *refinode)
+{
+	struct inode *pinode;
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk;
+
+	vzquota_qlnk_init(&qlnk);
+
+start:
+	if (inode == inode->i_sb->s_root->d_inode || is_nfs_root(inode)) {
+		/* filesystem root */
+		atomic_inc(&inode->i_count);
+		do {
+			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+		} while (vzquota_inode_qmblk_set(inode, qmblk, &qlnk));
+		goto out;
+	}
+
+	if (!vzquota_dparents_check_attach(inode)) {
+		pinode = vzquota_dparents_check_actual(inode);
+		if (pinode != NULL) {
+			inode = pinode;
+			goto start;
+		}
+	}
+
+	atomic_inc(&inode->i_count);
+	while (1) {
+		if (VZ_QUOTA_IS_ACTUAL(inode)) /* actualized without us */
+			break;
+		/*
+		 * Need to check parents again if we have slept inside
+		 * vzquota_inode_qmblk_set() in the loop.
+		 * If the state of parents is different, just return and repeat
+		 * the actualizing process again from the inode passed to
+		 * vzquota_inode_qmblk_recalc().
+		 */
+		if (!vzquota_dparents_check_attach(inode)) {
+			if (vzquota_dparents_check_actual(inode) != NULL)
+				break;
+			qmblk = vzquota_dparents_check_same(inode);
+		} else
+			qmblk = VZ_QUOTA_BAD;
+		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk)){/* success */
+			set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_ACT);
+			break;
+		}
+	}
+
+out:
+	spin_unlock(&dcache_lock);
+	inode_qmblk_unlock(refinode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+	iput(inode);
+	inode_qmblk_lock(refinode->i_sb);
+	spin_lock(&dcache_lock);
+}
+
+static void vzquota_dtree_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	struct inode *pinode;
+	struct vz_quota_master *qmblk;
+
+	if (inode == inode->i_sb->s_root->d_inode || is_nfs_root(inode)) {
+		/* filesystem root */
+		do {
+			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+		} while (vzquota_inode_qmblk_set(inode, qmblk, qlnk));
+		return;
+	}
+
+start:
+	if (VZ_QUOTA_IS_ACTUAL(inode))
+		return;
+	/*
+	 * Here qmblk is (re-)initialized for all ancestors.
+	 * This is not a very efficient procedure, but it guarantees that
+	 * the quota tree is consistent (that is, the inode doesn't have two
+	 * ancestors with different qmblk).
+	 */
+	if (!vzquota_dparents_check_attach(inode)) {
+		pinode = vzquota_dparents_check_actual(inode);
+		if (pinode != NULL) {
+			vzquota_dbranch_actualize(pinode, inode);
+			goto start;
+		}
+		qmblk = vzquota_dparents_check_same(inode);
+	} else
+		qmblk = VZ_QUOTA_BAD;
+
+	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
+		goto start;
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_DTREE);
+}
+
+static void vzquota_det_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	struct inode *parent;
+	struct vz_quota_master *qmblk;
+	char *msg;
+	int cnt;
+	time_t timeout;
+
+	cnt = 0;
+	parent = NULL;
+start:
+	/*
+	 * qmblk of detached inodes shouldn't be considered as not actual.
+	 * They are not in any dentry tree, so quota on/off shouldn't affect
+	 * them.
+	 */
+	if (!vzquota_qlnk_is_empty(INODE_QLNK(inode)))
+		return;
+
+	timeout = 3;
+	qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+	/*
+	 * Scenario:
+	 *	open
+	 *	unlink
+	 * 	quotaon
+	 *	generic_delete_inode
+	 *
+	 * This is the first time vzquota sees inode. inode is outside of
+	 * vzquota area of interest, otherwise quotaon would have got -EBUSY
+	 * due to shrink_dcache_parent().
+	 * inode is almost completely destroyed, so don't intervene.
+	 * 
+	 * dev@:
+	 * However, there is a small race here...
+	 * dput() first removes itself from all the lists,
+	 * so shrink_dcache_parent() can succeed while dentry_iput is not
+	 * done yet.
+	 */
+	if (inode->i_state & I_FREEING)
+		goto set;
+
+	msg = "detached inode not in creation";
+	if (inode->i_op != VZ_QUOTA_EMPTY_IOPS)
+		goto fail;
+	qmblk = VZ_QUOTA_BAD;
+	msg = "unexpected creation context";
+	if (!vzquota_cur_qmblk_check())
+		goto fail;
+	timeout = 0;
+	parent = vzquota_cur_qmblk_fetch();
+	msg = vzquota_check_parent(parent, inode);
+	if (msg != NULL)
+		goto fail;
+
+	if (!VZ_QUOTA_IS_ACTUAL(parent)) {
+		vzquota_dbranch_actualize(parent, inode);
+		goto start;
+	}
+
+	qmblk = INODE_QLNK(parent)->qmblk;
+set:
+	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
+		goto start;
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_DET);
+	return;
+
+fail:
+	{
+		struct timeval tv, tvo;
+		do_gettimeofday(&tv);
+		memcpy(&tvo, __VZ_QUOTA_TSTAMP(inode->i_sb), sizeof(tvo));
+		tv.tv_sec -= tvo.tv_sec;
+		if (tv.tv_usec < tvo.tv_usec) {
+			tv.tv_sec--;
+			tv.tv_usec += USEC_PER_SEC - tvo.tv_usec;
+		} else
+			tv.tv_usec -= tvo.tv_usec;
+		if (tv.tv_sec < timeout)
+			goto set;
+		printk(KERN_ERR "VZDQ: %s, orig {%u, %u},"
+			" dev %s, inode %lu, fs %s\n",
+			msg,
+			INODE_QLNK(inode)->origin[0],
+			INODE_QLNK(inode)->origin[1],
+			inode->i_sb->s_id, inode->i_ino,
+			inode->i_sb->s_type->name);
+		printk(KERN_ERR "i_count %u, ", atomic_read(&inode->i_count));
+		printk(KERN_ERR "i_mode %o, ", inode->i_mode);
+		printk(KERN_ERR "i_state %lx, ", inode->i_state);
+		printk(KERN_ERR "i_flags %x\n", inode->i_flags);
+		printk(KERN_ERR "i_op %p, vfs_empty_iops %p, "
+				"i_fop %p, i_mapping %p\n",
+				inode->i_op, &vfs_empty_iops,
+				inode->i_fop, inode->i_mapping);
+		if (!cnt++) {
+			printk(KERN_ERR "current %d (%s), VE %d,"
+				" time %ld.%06ld\n",
+				current->pid, current->comm,
+				VEID(get_exec_env()),
+				tv.tv_sec, (long)tv.tv_usec);
+			dump_stack();
+		}
+		if (parent != NULL)
+			printk(KERN_ERR "VZDQ: parent of %lu is %lu\n",
+				inode->i_ino, parent->i_ino);
+	}
+	goto set;
+}
+
+static void vzquota_inode_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	spin_lock(&dcache_lock);
+	if (!list_empty(&inode->i_dentry))
+		vzquota_dtree_qmblk_recalc(inode, qlnk);
+	else
+		vzquota_det_qmblk_recalc(inode, qlnk);
+	spin_unlock(&dcache_lock);
+}
+
+/**
+ * vzquota_inode_qmblk - obtain inode's qmblk
+ *
+ * Returns qmblk with refcounter taken, %NULL if not under
+ * VZ quota or %VZ_QUOTA_BAD.
+ *
+ * FIXME: This function should be removed when vzquota_find_qmblk /
+ * get_quota_root / vzquota_dstat code is cleaned up.
+ */
+struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk;
+
+	might_sleep();
+
+	if (inode->i_sb->dq_op != &vz_quota_operations)
+		return NULL;
+#if defined(VZ_QUOTA_UNLOAD)
+#error Make sure qmblk does not disappear
+#endif
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+	    !VZ_QUOTA_IS_ACTUAL(inode))
+		vzquota_inode_qmblk_recalc(inode, &qlnk);
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != VZ_QUOTA_BAD) {
+		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb))
+			qmblk_get(qmblk);
+		else
+			qmblk = NULL;
+	}
+
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+	return qmblk;
+}
+
+/**
+ * vzquota_find_qmblk - helper to emulate quota on virtual filesystems
+ *
+ * This function finds a quota master block corresponding to the root of
+ * a virtual filesystem.
+ * Returns a quota master block with reference taken, or %NULL if not under
+ * quota, or %VZ_QUOTA_BAD if quota inconsistency is found (and all allocation
+ * operations will fail).
+ *
+ * Note: this function uses vzquota_inode_qmblk().
+ * The latter is a rather confusing function: it returns qmblk that used to be
+ * on the inode some time ago (without guarantee that it still has any
+ * relations to the inode).  So, vzquota_find_qmblk() leaves it up to the
+ * caller to think whether the inode could have changed its qmblk and what to
+ * do in that case.
+ * Currently, the callers appear to not care :(
+ */
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *sb)
+{
+	struct inode *qrinode;
+	struct vz_quota_master *qmblk;
+
+	qmblk = NULL;
+	qrinode = NULL;
+	if (sb->s_op->get_quota_root != NULL)
+		qrinode = sb->s_op->get_quota_root(sb);
+	if (qrinode != NULL)
+		qmblk = vzquota_inode_qmblk(qrinode);
+	return qmblk;
+}
+
+/* ----------------------------------------------------------------------
+ *
+ * Calls from quota operations
+ *
+ * --------------------------------------------------------------------- */
+
+/**
+ * vzquota_inode_init_call - call from DQUOT_INIT
+ */
+void vzquota_inode_init_call(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	/* initializes inode's quota inside */
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		vzquota_data_unlock(inode, &data);
+
+	/*
+	 * The check is needed for repeated new_inode() calls from a single
+	 * ext3 call like create or mkdir in case of -ENOSPC.
+	 */
+	spin_lock(&dcache_lock);
+	if (!list_empty(&inode->i_dentry))
+		vzquota_cur_qmblk_set(inode);
+	spin_unlock(&dcache_lock);
+}
+
+void vzquota_inode_swap_call(struct inode *inode, struct inode *tmpl)
+{
+	struct vz_quota_master *qmblk;
+
+	__vzquota_inode_init(inode, VZ_QUOTAO_INIT);
+
+	might_sleep();
+
+	inode_qmblk_lock(tmpl->i_sb);
+	if (unlikely(tmpl->i_flags & S_NOQUOTA)) {
+		inode_qmblk_unlock(tmpl->i_sb);
+		return;
+	}
+	__vzquota_inode_init(tmpl, VZ_QUOTAO_INICAL);
+
+	qmblk = INODE_QLNK(tmpl)->qmblk;
+	if (qmblk != VZ_QUOTA_BAD) {
+		void * uq;
+		list_del_init(&INODE_QLNK(tmpl)->list);
+		vzquota_qlnk_swap(INODE_QLNK(tmpl), INODE_QLNK(inode));
+		uq = inode->i_dquot[USRQUOTA];
+		inode->i_dquot[USRQUOTA] = tmpl->i_dquot[USRQUOTA];
+		tmpl->i_dquot[USRQUOTA] = uq;
+		tmpl->i_flags |= S_NOQUOTA;
+		inode_qmblk_unlock(inode->i_sb);
+
+		vzquota_inode_drop(tmpl);
+	} else {
+		inode_qmblk_unlock(tmpl->i_sb);
+	}
+}
+
+
+/**
+ * vzquota_inode_drop_call - call from DQUOT_DROP
+ */
+void vzquota_inode_drop_call(struct inode *inode)
+{
+	vzquota_inode_drop(inode);
+}
+
+/**
+ * vzquota_inode_data - initialize (if nec.) and lock inode quota ptrs
+ * @inode: the inode
+ * @data: storage space
+ *
+ * Returns: qmblk is NULL or VZ_QUOTA_BAD or actualized qmblk.
+ * On return if qmblk is neither NULL nor VZ_QUOTA_BAD:
+ *   qmblk in inode's qlnk is the same as returned,
+ *   ugid pointers inside inode's qlnk are valid,
+ *   some locks are taken (and should be released by vzquota_data_unlock).
+ * If qmblk is NULL or VZ_QUOTA_BAD, locks are NOT taken.
+ */
+struct vz_quota_master *vzquota_inode_data(struct inode *inode,
+		struct vz_quota_datast *data)
+{
+	struct vz_quota_master *qmblk;
+
+	might_sleep();
+
+	vzquota_qlnk_init(&data->qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	if (unlikely(inode->i_flags & S_NOQUOTA)) {
+		inode_qmblk_unlock(inode->i_sb);
+		return NULL;
+	}
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+	    !VZ_QUOTA_IS_ACTUAL(inode))
+		vzquota_inode_qmblk_recalc(inode, &data->qlnk);
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != VZ_QUOTA_BAD) {
+		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb)) {
+			/*
+			 * Note that in the current implementation,
+			 * inode_qmblk_lock can theoretically be dropped here.
+			 * This place is serialized with quota_off because
+			 * quota_off fails when there are extra dentry
+			 * references and syncs inodes before removing quota
+			 * information from them.
+			 * However, quota usage information should stop being
+			 * updated immediately after vzquota_off.
+			 */
+			qmblk_data_write_lock(qmblk);
+		} else {
+			inode_qmblk_unlock(inode->i_sb);
+			qmblk = NULL;
+		}
+	} else {
+		inode_qmblk_unlock(inode->i_sb);
+	}
+	return qmblk;
+}
+
+void vzquota_data_unlock(struct inode *inode,
+		struct vz_quota_datast *data)
+{
+	qmblk_data_write_unlock(INODE_QLNK(inode)->qmblk);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&data->qlnk);
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_inode_transfer_call - call from vzquota_transfer
+ */
+int vzquota_inode_transfer_call(struct inode *inode, struct iattr *iattr)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	struct vz_quota_ilink qlnew;
+	int mask;
+	int ret;
+
+	might_sleep();
+	vzquota_qlnk_init(&qlnew);
+start:
+	qmblk = vzquota_inode_data(inode, &data);
+	ret = NO_QUOTA;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out_destr;
+	ret = QUOTA_OK;
+	if (qmblk == NULL)
+		goto out_destr;
+	qmblk_get(qmblk);
+
+	ret = QUOTA_OK;
+	if (!(qmblk->dq_flags & VZDQUG_ON))
+		/* no ugid quotas */
+		goto out_unlock;
+
+	mask = 0;
+	if ((iattr->ia_valid & ATTR_UID) && iattr->ia_uid != inode->i_uid)
+		mask |= 1 << USRQUOTA;
+	if ((iattr->ia_valid & ATTR_GID) && iattr->ia_gid != inode->i_gid)
+		mask |= 1 << GRPQUOTA;
+	while (1) {
+		if (vzquota_qlnk_is_empty(&qlnew) &&
+		    vzquota_qlnk_fill_attr(&qlnew, inode, iattr, mask, qmblk))
+			break;
+		if (qlnew.qmblk == INODE_QLNK(inode)->qmblk &&
+		    qlnew.qmblk == qmblk)
+			goto finish;
+		if (vzquota_qlnk_reinit_attr(&qlnew, inode, qmblk))
+			break;
+	}
+
+	/* prepare for restart */
+	vzquota_data_unlock(inode, &data);
+	qmblk_put(qmblk);
+	goto start;
+
+finish:
+	/* all references obtained successfully */
+	ret = vzquota_transfer_usage(inode, mask, &qlnew);
+	if (!ret) {
+		vzquota_qlnk_swap(&qlnew, INODE_QLNK(inode));
+		set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_TRANS);
+	}
+out_unlock:
+	vzquota_data_unlock(inode, &data);
+	qmblk_put(qmblk);
+out_destr:
+	vzquota_qlnk_destroy(&qlnew);
+	return ret;
+}
+#endif
+
+int vzquota_rename_check(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk1, qlnk2, qlnk3;
+	int c, ret;
+
+	if (inode->i_sb != old_dir->i_sb || inode->i_sb != new_dir->i_sb)
+		return -1;
+
+	might_sleep();
+
+	vzquota_qlnk_init(&qlnk1);
+	vzquota_qlnk_init(&qlnk2);
+	vzquota_qlnk_init(&qlnk3);
+	inode_qmblk_lock(inode->i_sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+	__vzquota_inode_init(old_dir, VZ_QUOTAO_INICAL);
+	__vzquota_inode_init(new_dir, VZ_QUOTAO_INICAL);
+
+	do {
+		c = 0;
+		if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+		    !VZ_QUOTA_IS_ACTUAL(inode)) {
+			vzquota_inode_qmblk_recalc(inode, &qlnk1);
+			c++;
+		}
+		if (vzquota_qlnk_is_empty(INODE_QLNK(new_dir)) ||
+		    !VZ_QUOTA_IS_ACTUAL(new_dir)) {
+			vzquota_inode_qmblk_recalc(new_dir, &qlnk2);
+			c++;
+		}
+	} while (c);
+
+	ret = 0;
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != INODE_QLNK(new_dir)->qmblk) {
+		ret = -1;
+			while (vzquota_qlnk_is_empty(INODE_QLNK(old_dir)) ||
+			       !VZ_QUOTA_IS_ACTUAL(old_dir)) {
+				vzquota_inode_qmblk_recalc(old_dir, &qlnk3);
+			}
+		if (qmblk != VZ_QUOTA_BAD &&
+		    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
+		    qmblk->dq_root_dentry->d_inode == inode &&
+		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(new_dir)->qmblk,
+			    				inode->i_sb) &&
+		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(old_dir)->qmblk,
+			    				inode->i_sb))
+			/* quota root rename is allowed */
+			ret = 0;
+	}
+
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk3);
+	vzquota_qlnk_destroy(&qlnk2);
+	vzquota_qlnk_destroy(&qlnk1);
+	return ret;
+}
+
+/*
+ * Scan parent subdirs and find busy dentries names/path
+ * @parent: parent dentry
+ * @buf: buffer to store path.
+ */
+static void vzdquota_read_busy_dentries(struct dentry * parent,
+			struct vfsmount *vfsmnt, char *buf, int buflen)
+{
+	struct dentry *this_parent = parent;
+	struct list_head *next;
+	char *res, *end, *start;
+	struct vfsmount *rootmnt;
+	struct dentry *root;
+	int len;
+
+	if (!buf || buflen <= 0)
+		return;
+
+	/* From d_path() ... */
+	read_lock(&current->fs->lock);
+	rootmnt = mntget(current->fs->rootmnt);
+	root = dget(current->fs->root);
+	read_unlock(&current->fs->lock);
+
+	spin_lock(&dcache_lock);
+
+	end = buf + buflen;
+	start = buf;
+repeat:
+	next = this_parent->d_subdirs.next;
+resume:
+	while (next != &this_parent->d_subdirs) {
+		struct list_head *tmp = next;
+		struct dentry *dentry;
+		int subdirs;
+
+		dentry = list_entry(tmp, struct dentry, d_u.d_child);
+		next = tmp->next;
+		subdirs = !list_empty(&dentry->d_subdirs); 
+
+		if (atomic_read(&dentry->d_count) && !subdirs) {
+			if (!buflen)
+				goto out;
+			/*
+			 * Note: __d_path will store filename at the
+			 * end of buf.
+			 */
+			res = __d_path(dentry, vfsmnt, root, rootmnt,
+							buf, buflen);
+			/* Exit if name is too long */
+			if (IS_ERR(res))
+				goto out;
+
+			/*
+			 * Move the string obtained by __d_path,
+			 * behind the last dentry path in buf.
+			 */
+			len = end - res;
+			BUG_ON(len <= 0);
+
+			memmove(buf, res, len);
+
+			/* Trick: replace \0 by \n */
+			if (buf != start)
+				*(char *)(buf - 1) = '\n';
+
+			buf += len;
+			buflen -= len;
+		}
+
+		/*
+		 * Descend a level if the d_subdirs list is non-empty.
+		 */
+		if (subdirs) {
+			this_parent = dentry;
+			goto repeat;
+		}
+	}
+	/*
+	 * All done at this level ... ascend and resume the search.
+	 */
+	if (this_parent != parent) {
+		next = this_parent->d_u.d_child.next;
+		this_parent = this_parent->d_parent;
+		goto resume;
+	}
+out:
+	/* From d_path() ... */
+	spin_unlock(&dcache_lock);
+	dput(root);
+	mntput(rootmnt);
+}
+
+/* ----------------------------------------------------------------------
+ *
+ * qmblk-related parts of on/off operations
+ *
+ * --------------------------------------------------------------------- */
+
+/**
+ * vzquota_check_dtree - check dentry tree if quota on/off is allowed
+ *
+ * This function doesn't allow quota to be turned on/off if some dentries in
+ * the tree have external references.
+ * In addition to technical reasons, it enforces user-space correctness:
+ * current usage (taken from or reported to the user space) can be meaningful
+ * and accurate only if the tree is not being modified.
+ * Side effect: additional vfsmount structures referencing the tree (bind
+ * mounts of tree nodes to some other places) are not allowed at on/off time.
+ *
+ * Store busy dentries path to the buf (if passed) in case of vzquota_off
+ * ioctl fail.
+ */
+int vzquota_check_dtree(struct vz_quota_master *qmblk, int off,
+						char *buf, int buflen)
+{
+	struct dentry *dentry;
+	int err, count;
+
+	err = -EBUSY;
+	dentry = qmblk->dq_root_dentry;
+
+	if (d_unhashed(dentry) && dentry != dentry->d_sb->s_root)
+		goto unhashed;
+
+	/* attempt to shrink */
+  	if (!list_empty(&dentry->d_subdirs)) {
+		spin_unlock(&dcache_lock);
+		inode_qmblk_unlock(dentry->d_sb);
+		shrink_dcache_parent(dentry);
+		inode_qmblk_lock(dentry->d_sb);
+		spin_lock(&dcache_lock);
+		if (!list_empty(&dentry->d_subdirs)) {
+        		spin_unlock(&dcache_lock);
+			vzdquota_read_busy_dentries(dentry, qmblk->dq_root_mnt,
+								buf, buflen);
+			spin_lock(&dcache_lock);
+			goto out;
+		}
+
+		count = 1;
+		if (dentry == dentry->d_sb->s_root)
+			count += 2;	/* sb and mnt refs */
+		if (atomic_read(&dentry->d_count) < count) {
+			printk(KERN_ERR "%s: too small count %d vs %d.\n",
+					__FUNCTION__,
+					atomic_read(&dentry->d_count), count);
+			goto out;
+		}
+		if (atomic_read(&dentry->d_count) > count)
+			goto out;
+	}
+
+	err = 0;
+out:
+	return err;
+
+unhashed:
+	/*
+	 * Quota root is removed.
+	 * Allow to turn quota off, but not on.
+	 */
+	if (off)
+		err = 0;
+	goto out;
+}
+
+int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
+		struct vz_quota_master *qmblk, char __user *ubuf)
+{
+	struct vz_quota_ilink qlnk;
+	struct vz_quota_master *qold, *qnew;
+	int err;
+	char *buf;
+
+	buf = (ubuf != NULL) ? (char *)__get_free_page(GFP_KERNEL) : NULL;
+
+	might_sleep();
+
+	qold = NULL;
+	qnew = vzquota_alloc_fake();
+	if (qnew == NULL) {
+		free_page((unsigned long)buf);
+		return -ENOMEM;
+	}
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	spin_lock(&dcache_lock);
+	while (1) {
+		err = vzquota_check_dtree(qmblk, 0, buf, PAGE_SIZE);
+		if (err)
+			break;
+		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk))
+			break;
+	}
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_ON);
+	if (!err) {
+		struct dentry * dentry = qmblk->dq_root_dentry;
+		spin_lock(&dentry->d_lock);
+		dentry->d_flags |= DCACHE_LOCALCACHE;
+		spin_unlock(&dentry->d_lock);
+	}
+	spin_unlock(&dcache_lock);
+
+	if (!err) {
+		qold = __VZ_QUOTA_NOQUOTA(sb);
+		qold->dq_flags |= VZDQ_NOACT;
+		__VZ_QUOTA_NOQUOTA(sb) = qnew;
+	}
+
+	inode_qmblk_unlock(sb);
+	vzquota_qlnk_destroy(&qlnk);
+	if (qold != NULL)
+		qmblk_put(qold);
+
+	if (buf) {
+		(void)copy_to_user(ubuf, buf, PAGE_SIZE);
+		free_page((unsigned long)buf);
+	}
+	return err;
+}
+
+int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk,
+						char __user *ubuf, int force)
+{
+	int ret;
+	char *buf;
+
+	buf = (ubuf != NULL) ? (char *)__get_free_page(GFP_KERNEL) : NULL;
+
+	ret = 0;
+	inode_qmblk_lock(sb);
+
+	spin_lock(&dcache_lock);
+	if (vzquota_check_dtree(qmblk, 1, buf, PAGE_SIZE) && !force)
+		ret = -EBUSY;
+	if (!ret) {
+		struct dentry * dentry = qmblk->dq_root_dentry;
+		spin_lock(&dentry->d_lock);
+		dentry->d_flags &= ~DCACHE_LOCALCACHE;
+		spin_unlock(&dentry->d_lock);
+	}
+	spin_unlock(&dcache_lock);
+
+	if (!ret)
+		qmblk->dq_flags |= VZDQ_NOACT | VZDQ_NOQUOT;
+	inode_qmblk_unlock(sb);
+
+	if (buf) {
+		(void)copy_to_user(ubuf, buf, PAGE_SIZE);
+		free_page((unsigned long)buf);
+	}
+	return ret;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * External interfaces
+ *
+ * ---------------------------------------------------------------------*/
+
+static int vzquota_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	switch (cmd) {
+	case VZCTL_QUOTA_NEW_CTL: {
+		struct vzctl_quotactl qb;
+
+		err = -EFAULT;
+		if (copy_from_user(&qb, (void __user *)arg, sizeof(qb)))
+			break;
+		err = do_vzquotactl(qb.cmd, qb.quota_id,
+				qb.qstat, qb.ve_root, 0);
+		break;
+	}
+#ifdef CONFIG_VZ_QUOTA_UGID
+	case VZCTL_QUOTA_UGID_CTL: {
+		struct vzctl_quotaugidctl qub;
+
+		err = -EFAULT;
+		if (copy_from_user(&qub, (void __user *)arg, sizeof(qub)))
+			break;
+		err = do_vzquotaugidctl(qub.cmd, qub.quota_id,
+				qub.ugid_index, qub.ugid_size, qub.addr, 0);
+		break;
+	}
+#endif
+	default:
+		err = -ENOTTY;
+	}
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+static int compat_vzquota_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	switch (cmd) {
+	case VZCTL_COMPAT_QUOTA_CTL: {
+		struct compat_vzctl_quotactl cs;
+
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+		err = do_vzquotactl(cs.cmd, cs.quota_id,
+				compat_ptr(cs.qstat),
+				compat_ptr(cs.ve_root), 1);
+		break;
+	}
+#ifdef CONFIG_VZ_QUOTA_UGID
+	case VZCTL_COMPAT_QUOTA_UGID_CTL: {
+		struct compat_vzctl_quotaugidctl cs;
+
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+
+		err = do_vzquotaugidctl(cs.cmd, cs.quota_id, cs.ugid_index,
+				cs.ugid_size, compat_ptr(cs.addr), 1);
+		break;
+	}
+#endif
+	default:
+		err = -ENOIOCTLCMD;
+	}
+	return err;
+}
+#endif
+
+static struct vzioctlinfo vzdqcalls = {
+	.type		= VZDQCTLTYPE,
+	.ioctl		= vzquota_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= compat_vzquota_ioctl,
+#endif
+	.owner		= THIS_MODULE,
+};
+
+/**
+ * vzquota_dstat - get quota usage info for virtual superblock
+ */
+static int vzquota_dstat(struct super_block *super, struct dq_stat *qstat)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = vzquota_find_qmblk(super);
+	if (qmblk == NULL)
+		return -ENOENT;
+	if (qmblk == VZ_QUOTA_BAD) {
+		memset(qstat, 0, sizeof(*qstat));
+		return 0;
+	}
+
+	qmblk_data_read_lock(qmblk);
+	memcpy(qstat, &qmblk->dq_stat, sizeof(*qstat));
+	qmblk_data_read_unlock(qmblk);
+	qmblk_put(qmblk);
+	return 0;
+}
+
+int
+vzquota_snap_init(struct super_block *vsuper, struct vzsnap_struct *vzs)
+{
+	int err;
+	struct vz_quota_master *qmblk;
+
+	qmblk = vzquota_find_qmblk(vsuper);
+	if (qmblk == NULL)
+		return -ENOENT;
+	if (qmblk == VZ_QUOTA_BAD)
+		return -ENOENT;
+
+	err = -EBUSY;
+	qmblk_data_write_lock(qmblk);
+	if (!qmblk->dq_snap && qmblk->dq_root_mnt && qmblk->dq_root_dentry &&
+			qmblk->dq_root_mnt->mnt_sb->s_bdev) {
+		qmblk->dq_snap = vzsnap_get(vzs);
+		vzs->vzdq_mnt = mntget(qmblk->dq_root_mnt);
+		vzs->vzdq_root = dget(qmblk->dq_root_dentry);
+		err = 0;
+	}
+	qmblk_data_write_unlock(qmblk);
+
+	qmblk_put(qmblk);
+	return err;
+}
+EXPORT_SYMBOL(vzquota_snap_init);
+
+int vzquota_snap_stop(struct super_block *super, struct vzsnap_struct *vzs)
+{
+	int err;
+	struct vz_quota_master *qmblk;
+
+	qmblk = vzquota_find_qmblk(super);
+	if (qmblk == NULL)
+		return -ENOENT;
+	if (qmblk == VZ_QUOTA_BAD)
+		return -ENOENT;
+
+	err = -ENOENT;
+	qmblk_data_write_lock(qmblk);
+	if (qmblk->dq_snap == vzs) {
+		err = 0;
+		qmblk->dq_snap = NULL;
+	}
+	qmblk_data_write_unlock(qmblk);
+
+	qmblk_put(qmblk);
+	return err;
+}
+EXPORT_SYMBOL(vzquota_snap_stop);
+
+/* ----------------------------------------------------------------------
+ *
+ * Init/exit helpers
+ *
+ * ---------------------------------------------------------------------*/
+
+static int vzquota_cache_init(void)
+{
+	int i;
+
+	vzquota_cachep = kmem_cache_create("vz_quota_master",
+					 sizeof(struct vz_quota_master),
+					 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (vzquota_cachep == NULL) {
+		printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
+		goto nomem2;
+	}
+	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&vzquota_hash_table[i]);
+
+	return 0;
+
+nomem2:
+	return -ENOMEM;
+}
+
+static void vzquota_cache_release(void)
+{
+	int i;
+
+	/* sanity check */
+	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
+		if (!list_empty(&vzquota_hash_table[i]))
+			BUG();
+
+	/* release caches */
+	if (kmem_cache_destroy(vzquota_cachep))
+		printk(KERN_ERR
+			"VZQUOTA: vz_quota_master kmem_cache_destroy failed\n");
+	vzquota_cachep = NULL;
+}
+
+static int quota_notifier_call(struct vnotifier_block *self,
+		unsigned long n, void *data, int err)
+{
+	struct virt_info_quota *viq;
+	struct super_block *sb;
+
+	viq = (struct virt_info_quota *)data;
+	switch (n) {
+	case VIRTINFO_QUOTA_ON:
+		err = NOTIFY_BAD;
+		if (!try_module_get(THIS_MODULE))
+			break;
+		sb = viq->super;
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		err = NOTIFY_OK;
+		break;
+	case VIRTINFO_QUOTA_OFF:
+		module_put(THIS_MODULE);
+		err = NOTIFY_OK;
+		break;
+	case VIRTINFO_QUOTA_GETSTAT:
+		err = NOTIFY_BAD;
+		if (vzquota_dstat(viq->super, viq->qstat))
+			break;
+		err = NOTIFY_OK;
+		break;
+	case VIRTINFO_QUOTA_DISABLE:
+		err = NOTIFY_OK;
+		vzquota_inode_off((struct inode *)data);
+		break;
+	}
+	return err;
+}
+
+struct vnotifier_block quota_notifier_block = {
+	.notifier_call = quota_notifier_call,
+	.priority = INT_MAX,
+};
+
+/* ----------------------------------------------------------------------
+ *
+ * Init/exit procedures
+ *
+ * ---------------------------------------------------------------------*/
+
+static int __init vzquota_init(void)
+{
+	int err;
+
+	if ((err = vzquota_cache_init()) != 0)
+		goto out_cache;
+
+	if ((err = vzquota_proc_init()) != 0)
+		goto out_proc;
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+	if ((err = vzquota_ugid_init()) != 0)
+		goto out_ugid;
+#endif
+
+	mutex_init(&vz_quota_mutex);
+	vzioctl_register(&vzdqcalls);
+	virtinfo_notifier_register(VITYPE_QUOTA, &quota_notifier_block);
+#if defined(CONFIG_VZ_QUOTA_UGID) && defined(CONFIG_PROC_FS)
+	vzaquota_init();
+#endif
+
+	return 0;
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+out_ugid:
+	vzquota_proc_release();
+#endif
+out_proc:
+	vzquota_cache_release();
+out_cache:
+	return err;
+}
+
+#if defined(VZ_QUOTA_UNLOAD)
+static void __exit vzquota_release(void)
+{
+	virtinfo_notifier_unregister(VITYPE_QUOTA, &quota_notifier_block);
+	vzioctl_unregister(&vzdqcalls);
+#ifdef CONFIG_VZ_QUOTA_UGID
+#ifdef CONFIG_PROC_FS
+	vzaquota_fini();
+#endif
+	vzquota_ugid_release();
+#endif
+	vzquota_proc_release();
+	vzquota_cache_release();
+}
+#endif
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Disk Quota");
+MODULE_LICENSE("GPL v2");
+
+module_init(vzquota_init)
+#if defined(VZ_QUOTA_UNLOAD)
+module_exit(vzquota_release)
+#endif
diff -upr kernel-2.6.18-417.el5.orig/fs/xattr.c kernel-2.6.18-417.el5-028stab121/fs/xattr.c
--- kernel-2.6.18-417.el5.orig/fs/xattr.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/xattr.c	2017-01-13 08:40:23.000000000 -0500
@@ -111,6 +111,15 @@ vfs_setxattr(struct dentry *dentry, char
 	struct inode *inode = dentry->d_inode;
 	int error;
 
+#if defined(CONFIG_VE) && defined(CONFIG_SYSCTL)
+	if (!ve_is_super(get_exec_env())) {
+		if (ve_xattr_policy == VE_XATTR_POLICY_IGNORE)
+			return 0;
+		else if (ve_xattr_policy == VE_XATTR_POLICY_REJECT)
+			return -EPERM;
+	}
+#endif
+
 	error = xattr_permission(inode, name, MAY_WRITE);
 	if (error)
 		return error;
@@ -126,7 +135,7 @@ out:
 	mutex_unlock(&inode->i_mutex);
 	return error;
 }
-EXPORT_SYMBOL_GPL(vfs_setxattr);
+EXPORT_SYMBOL(vfs_setxattr);
 
 ssize_t
 vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
@@ -162,7 +171,7 @@ vfs_getxattr(struct dentry *dentry, char
 
 	return error;
 }
-EXPORT_SYMBOL_GPL(vfs_getxattr);
+EXPORT_SYMBOL(vfs_getxattr);
 
 int
 vfs_removexattr(struct dentry *dentry, char *name)
@@ -189,7 +198,7 @@ vfs_removexattr(struct dentry *dentry, c
 		fsnotify_xattr(dentry);
 	return error;
 }
-EXPORT_SYMBOL_GPL(vfs_removexattr);
+EXPORT_SYMBOL(vfs_removexattr);
 
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/fs/xfs/linux-2.6/xfs_file.c kernel-2.6.18-417.el5-028stab121/fs/xfs/linux-2.6/xfs_file.c
--- kernel-2.6.18-417.el5.orig/fs/xfs/linux-2.6/xfs_file.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/fs/xfs/linux-2.6/xfs_file.c	2017-01-13 08:40:40.000000000 -0500
@@ -49,76 +49,74 @@ static struct vm_operations_struct xfs_f
 STATIC_INLINE ssize_t
 __xfs_file_read(
 	struct kiocb		*iocb,
-	char			__user *buf,
+	const struct iovec	*iov,
+	unsigned long		nr_segs,
 	int			ioflags,
-	size_t			count,
 	loff_t			pos)
 {
-	struct iovec		iov = {buf, count};
 	struct file		*file = iocb->ki_filp;
 
 	BUG_ON(iocb->ki_pos != pos);
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
-	return xfs_read(XFS_I(file->f_dentry->d_inode), iocb, &iov, 1, &iocb->ki_pos, ioflags);
+	return xfs_read(XFS_I(file->f_dentry->d_inode), iocb, iov, nr_segs, &iocb->ki_pos, ioflags);
 }
 
 STATIC ssize_t
 xfs_file_aio_read(
 	struct kiocb		*iocb,
-	char			__user *buf,
-	size_t			count,
+	const struct iovec	*iov,
+	unsigned long		nr_segs,
 	loff_t			pos)
 {
-	return __xfs_file_read(iocb, buf, IO_ISAIO, count, pos);
+	return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
 }
 
 STATIC ssize_t
 xfs_file_aio_read_invis(
 	struct kiocb		*iocb,
-	char			__user *buf,
-	size_t			count,
+	const struct iovec	*iov,
+	unsigned long		nr_segs,
 	loff_t			pos)
 {
-	return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+	return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
 }
 
 STATIC_INLINE ssize_t
 __xfs_file_write(
 	struct kiocb		*iocb,
-	const char		__user *buf,
+	const struct iovec	*iov,
+	unsigned long		nr_segs,
 	int			ioflags,
-	size_t			count,
 	loff_t			pos)
 {
-	struct iovec	iov = {(void __user *)buf, count};
 	struct file	*file = iocb->ki_filp;
 
 	BUG_ON(iocb->ki_pos != pos);
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
-	return xfs_write(XFS_I(file->f_mapping->host), iocb, &iov, 1,
+	return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
 				&iocb->ki_pos, ioflags);
 }
 
 STATIC ssize_t
 xfs_file_aio_write(
 	struct kiocb		*iocb,
-	const char		__user *buf,
-	size_t			count,
+	const struct iovec	*iov,
+	unsigned long		nr_segs,
 	loff_t			pos)
 {
-	return __xfs_file_write(iocb, buf, IO_ISAIO, count, pos);
+	return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
 }
 
 STATIC ssize_t
 xfs_file_aio_write_invis(
 	struct kiocb		*iocb,
-	const char		__user *buf,
-	size_t			count,
+	const struct iovec	*iov,
+	unsigned long		nr_segs,
 	loff_t			pos)
 {
-	return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+	return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
 }
 
 STATIC_INLINE ssize_t
diff -upr kernel-2.6.18-417.el5.orig/grsecurity/gracl.c kernel-2.6.18-417.el5-028stab121/grsecurity/gracl.c
--- kernel-2.6.18-417.el5.orig/grsecurity/gracl.c	2017-01-13 08:40:41.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/grsecurity/gracl.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,137 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/tty.h>
+#include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/sysctl.h>
+#include <linux/netdevice.h>
+#include <linux/ptrace.h>
+#include <linux/grsecurity.h>
+#include <linux/grinternal.h>
+#include <linux/percpu.h>
+
+#include <asm/uaccess.h>
+#include <asm/errno.h>
+#include <asm/mman.h>
+
+extern char *gr_shared_page[4];
+	
+static char *
+gen_full_path(struct dentry *dentry, struct vfsmount *vfsmnt,
+              struct dentry *root, struct vfsmount *rootmnt, char *buf, int buflen)
+{
+	char *end = buf + buflen;
+	char *retval;
+	int namelen = 0;
+
+	*--end = '\0';
+
+	retval = end - 1;
+	*retval = '/';
+
+	if (dentry == root && vfsmnt == rootmnt)
+		return retval;
+	if (dentry != vfsmnt->mnt_root && !IS_ROOT(dentry)) {
+		namelen = strlen(dentry->d_name.name);
+		buflen -= namelen;
+		if (buflen < 2)
+			goto err;
+		if (dentry->d_parent != root || vfsmnt != rootmnt)
+			buflen--;
+	}
+
+	retval = __d_path(dentry->d_parent, vfsmnt, root, rootmnt, buf, buflen);
+	if (unlikely(IS_ERR(retval)))
+err:
+		retval = strcpy(buf, "<path too long>");
+	else if (namelen != 0) {
+		end = buf + buflen - 1; // accounts for null termination
+		if (dentry->d_parent != root || vfsmnt != rootmnt)
+			*end++ = '/'; // accounted for above with buflen--
+		memcpy(end, dentry->d_name.name, namelen);
+	}
+
+	return retval;
+}
+
+static char *
+d_real_path(const struct dentry *dentry, const struct vfsmount *vfsmnt,
+	    char *buf, int buflen)
+{
+	char *res;
+	struct dentry *root;
+	struct vfsmount *rootmnt;
+
+	/* we can't use real_root, real_root_mnt, because they belong only to the RBAC system */
+#ifdef CONFIG_VE
+	/* Don't use child_reaper, because it's VE0 process */ 
+	root = dget(get_exec_env()->fs_root);
+	rootmnt = mntget(get_exec_env()->fs_rootmnt);
+#else
+	read_lock(&child_reaper->fs->lock);
+	root = dget(child_reaper->fs->root);
+	rootmnt = mntget(child_reaper->fs->rootmnt);
+	read_unlock(&child_reaper->fs->lock);
+#endif
+
+	spin_lock(&dcache_lock);
+	res = gen_full_path((struct dentry *)dentry, (struct vfsmount *)vfsmnt, root, rootmnt, buf, buflen);
+	spin_unlock(&dcache_lock);
+
+	dput(root);
+	mntput(rootmnt);
+	return res;
+}
+
+char *
+gr_to_filename(const struct dentry *dentry, const struct vfsmount *mnt)
+{
+	return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0], smp_processor_id()),
+			   PAGE_SIZE);
+}
+
+char *
+gr_to_filename2(const struct dentry *dentry, const struct vfsmount *mnt)
+{
+	return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[2], smp_processor_id()),
+			   PAGE_SIZE);
+}
+
+char *
+gr_to_filename3(const struct dentry *dentry, const struct vfsmount *mnt)
+{
+	return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[3], smp_processor_id()),
+			   PAGE_SIZE);
+}
+
+int
+gr_acl_handle_mmap(const struct file *file, const unsigned long prot)
+{
+	if (unlikely(!file || !(prot & PROT_EXEC)))
+		return 1;
+
+	if (!gr_tpe_allow(file))
+		return 0;
+	return 1;
+}
+
+int
+gr_acl_handle_mprotect(const struct file *file, const unsigned long prot)
+{
+	if (unlikely(!file || !(prot & PROT_EXEC)))
+		return 1;
+
+	if (!gr_tpe_allow(file))
+		return 0;
+	return 1;
+}
diff -upr kernel-2.6.18-417.el5.orig/grsecurity/grsec_disabled.c kernel-2.6.18-417.el5-028stab121/grsecurity/grsec_disabled.c
--- kernel-2.6.18-417.el5.orig/grsecurity/grsec_disabled.c	2017-01-13 08:40:41.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/grsecurity/grsec_disabled.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,39 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+void
+gr_copy_label(struct task_struct *tsk)
+{
+	return;
+}
+
+int
+gr_acl_handle_mmap(const struct file *file, const unsigned long prot,
+		   unsigned int *vm_flags)
+{
+	return 1;
+}
+
+void
+grsecurity_init(void)
+{
+	return;
+}
+
+void
+gr_acl_handle_exit(void)
+{
+	return;
+}
+
+int
+gr_acl_handle_mprotect(const struct file *file, const unsigned long prot)
+{
+	return 1;
+}
+
+void grsecurity_setup(void)
+{
+}
+EXPORT_SYMBOL(grsecurity_setup);
diff -upr kernel-2.6.18-417.el5.orig/grsecurity/grsec_init.c kernel-2.6.18-417.el5-028stab121/grsecurity/grsec_init.c
--- kernel-2.6.18-417.el5.orig/grsecurity/grsec_init.c	2017-01-13 08:40:41.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/grsecurity/grsec_init.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,89 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/percpu.h>
+#include <linux/module.h>
+
+#ifdef CONFIG_VE
+#include <linux/grinternal.h>
+#else
+int grsec_enable_tpe;
+int grsec_tpe_gid;
+int grsec_enable_tpe_all;
+int grsec_lock;
+#endif
+
+spinlock_t grsec_alert_lock = SPIN_LOCK_UNLOCKED;
+
+unsigned long grsec_alert_wtime = 0;
+unsigned long grsec_alert_fyet = 0;
+
+spinlock_t grsec_audit_lock = SPIN_LOCK_UNLOCKED;
+
+char *gr_shared_page[4];
+
+char *gr_alert_log_fmt;
+char *gr_audit_log_fmt;
+
+char *gr_alert_log_buf;
+char *gr_audit_log_buf;
+
+void grsecurity_setup(void)
+{
+#if !defined(CONFIG_GRKERNSEC_SYSCTL) || defined(CONFIG_GRKERNSEC_SYSCTL_ON)
+#ifndef CONFIG_GRKERNSEC_SYSCTL
+	grsec_lock = 1;
+#endif
+#ifdef CONFIG_GRKERNSEC_TPE
+	grsec_enable_tpe = 1;
+	grsec_tpe_gid = CONFIG_GRKERNSEC_TPE_GID;
+#ifdef CONFIG_GRKERNSEC_TPE_ALL
+	grsec_enable_tpe_all = 1;
+#endif
+#endif
+#endif
+}
+EXPORT_SYMBOL(grsecurity_setup);
+
+void
+grsecurity_init(void)
+{
+	int j;
+	/* create the per-cpu shared pages */
+
+	for (j = 0; j < 4; j++) {
+		gr_shared_page[j] = (char *)__alloc_percpu(PAGE_SIZE);
+		if (gr_shared_page[j] == NULL) {
+			panic("Unable to allocate grsecurity shared page");
+			return;
+		}
+	}
+
+	/* allocate log buffers */
+	gr_alert_log_fmt = kmalloc(512, GFP_KERNEL);
+	if (!gr_alert_log_fmt) {
+		panic("Unable to allocate grsecurity alert log format buffer");
+		return;
+	}
+	gr_audit_log_fmt = kmalloc(512, GFP_KERNEL);
+	if (!gr_audit_log_fmt) {
+		panic("Unable to allocate grsecurity audit log format buffer");
+		return;
+	}
+	gr_alert_log_buf = (char *) get_zeroed_page(GFP_KERNEL);
+	if (!gr_alert_log_buf) {
+		panic("Unable to allocate grsecurity alert log buffer");
+		return;
+	}
+	gr_audit_log_buf = (char *) get_zeroed_page(GFP_KERNEL);
+	if (!gr_audit_log_buf) {
+		panic("Unable to allocate grsecurity audit log buffer");
+		return;
+	}
+	grsecurity_setup();
+
+	return;
+}
diff -upr kernel-2.6.18-417.el5.orig/grsecurity/grsec_log.c kernel-2.6.18-417.el5-028stab121/grsecurity/grsec_log.c
--- kernel-2.6.18-417.el5.orig/grsecurity/grsec_log.c	2017-01-13 08:40:41.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/grsecurity/grsec_log.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,122 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/tty.h>
+#include <linux/fs.h>
+#include <linux/grinternal.h>
+
+#define BEGIN_LOCKS(x) \
+	if (x != GR_DO_AUDIT) \
+		spin_lock(&grsec_alert_lock); \
+	else \
+		spin_lock(&grsec_audit_lock)
+
+#define END_LOCKS(x) \
+	if (x != GR_DO_AUDIT) \
+		spin_unlock(&grsec_alert_lock); \
+	else \
+		spin_unlock(&grsec_audit_lock);
+
+enum {
+	FLOODING,
+	NO_FLOODING
+};
+
+extern char *gr_alert_log_fmt;
+extern char *gr_audit_log_fmt;
+extern char *gr_alert_log_buf;
+extern char *gr_audit_log_buf;
+
+static int gr_log_start(int audit)
+{
+	char *loglevel = (audit == GR_DO_AUDIT) ? KERN_INFO : KERN_ALERT;
+	char *fmt = (audit == GR_DO_AUDIT) ? gr_audit_log_fmt : gr_alert_log_fmt;
+	char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf;
+
+	if (audit == GR_DO_AUDIT)
+		goto set_fmt;
+
+	if (!grsec_alert_wtime || jiffies - grsec_alert_wtime > CONFIG_GRKERNSEC_FLOODTIME * HZ) {
+		grsec_alert_wtime = jiffies;
+		grsec_alert_fyet = 0;
+	} else if ((jiffies - grsec_alert_wtime < CONFIG_GRKERNSEC_FLOODTIME * HZ) && (grsec_alert_fyet < CONFIG_GRKERNSEC_FLOODBURST)) {
+		grsec_alert_fyet++;
+	} else if (grsec_alert_fyet == CONFIG_GRKERNSEC_FLOODBURST) {
+		grsec_alert_wtime = jiffies;
+		grsec_alert_fyet++;
+		ve_printk(VE_LOG, KERN_ALERT "grsec: more alerts, logging disabled for %d seconds\n", CONFIG_GRKERNSEC_FLOODTIME);
+		return FLOODING;
+	} else return FLOODING;
+
+set_fmt:
+	memset(buf, 0, PAGE_SIZE);
+	sprintf(fmt, "%s%s", loglevel, "grsec: ");
+	strcpy(buf, fmt);
+
+	return NO_FLOODING;
+}
+
+static void gr_log_middle(int audit, const char *msg, va_list ap)
+{
+	char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf;
+	unsigned int len = strlen(buf);
+
+	vsnprintf(buf + len, PAGE_SIZE - len - 1, msg, ap);
+
+	return;
+}
+
+static void gr_log_middle_varargs(int audit, const char *msg, ...)
+{
+	char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf;
+	unsigned int len = strlen(buf);
+	va_list ap;
+
+	va_start(ap, msg);
+	vsnprintf(buf + len, PAGE_SIZE - len - 1, msg, ap);
+	va_end(ap);
+
+	return;
+}
+
+static void gr_log_end(int audit)
+{
+	char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf;
+	unsigned int len = strlen(buf);
+
+	snprintf(buf + len, PAGE_SIZE - len - 1, DEFAULTSECMSG, DEFAULTSECARGS(current));
+	ve_printk(VE_LOG, "%s\n", buf);
+
+	return;
+}
+
+void gr_log_varargs(int audit, const char *msg, int argtypes, ...)
+{
+	int logtype;
+	struct dentry *dentry;
+	struct vfsmount *mnt;
+	va_list ap;
+
+	BEGIN_LOCKS(audit);
+	logtype = gr_log_start(audit);
+	if (logtype == FLOODING) {
+		END_LOCKS(audit);
+		return;
+	}
+	va_start(ap, argtypes);
+	switch (argtypes) {
+	/* 
+	 * Only GR_FILENAME is now supported in VZ
+	 */
+	case GR_FILENAME:
+		dentry = va_arg(ap, struct dentry *);
+		mnt = va_arg(ap, struct vfsmount *);
+		gr_log_middle_varargs(audit, msg, gr_to_filename(dentry, mnt));
+		break;
+	default:
+		gr_log_middle(audit, msg, ap);
+	}
+	va_end(ap);
+	gr_log_end(audit);
+	END_LOCKS(audit);
+}
diff -upr kernel-2.6.18-417.el5.orig/grsecurity/grsec_sysctl.c kernel-2.6.18-417.el5-028stab121/grsecurity/grsec_sysctl.c
--- kernel-2.6.18-417.el5.orig/grsecurity/grsec_sysctl.c	2017-01-13 08:40:41.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/grsecurity/grsec_sysctl.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,108 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sysctl.h>
+#include <linux/grsecurity.h>
+#include <linux/grinternal.h>
+
+int
+gr_handle_sysctl_mod(const char *dirname, const char *name, const int op)
+{
+#ifdef CONFIG_GRKERNSEC_SYSCTL
+	if (!strcmp(dirname, "grsecurity") && grsec_lock && (op & 002)) {
+		gr_log_str(GR_DONT_AUDIT, GR_SYSCTL_MSG, name);
+		return -EACCES;
+	}
+#endif
+	return 0;
+}
+
+#ifdef CONFIG_GRKERNSEC_SYSCTL
+static int grsec_proc_dointvec(ctl_table *ctl, int write, struct file * filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+#ifdef CONFIG_VE
+	struct ctl_table fake_table;
+	struct ve_struct *env = get_exec_env();
+
+	if (!ve_is_super(env)) {
+		memcpy(&fake_table, ctl, sizeof(struct ctl_table));
+		fake_table.data = (char *)((unsigned long)&env->grsec +
+			(unsigned long)ctl->data -
+			(unsigned long)&get_ve0()->grsec);
+		ctl = &fake_table;
+	}
+#endif
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	return ret;
+}
+
+enum {GS_TPE = 1, GS_TPE_GID, GS_TPE_ALL, GS_LOCK};
+
+static ctl_table grsecurity_table[] = {
+	{
+		.ctl_name	= CTL_KERN,
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= grsecurity_table + 2,
+	},
+	{	.ctl_name	= 0 },
+	{
+		.ctl_name	= KERN_GRSECURITY,
+		.procname	= "grsecurity",
+		.mode		= 0500,
+		.child		= grsecurity_table + 4,
+	},
+	{	.ctl_name	= 0 },
+#ifdef CONFIG_GRKERNSEC_TPE
+	{
+		.ctl_name	= GS_TPE,
+		.procname	= "tpe",
+		.data		= &ve0.grsec.enable_tpe,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= &grsec_proc_dointvec,
+		.virt_handler	= 1,
+	},
+	{
+		.ctl_name	= GS_TPE_GID,
+		.procname	= "tpe_gid",
+		.data		= &ve0.grsec.tpe_gid,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= &grsec_proc_dointvec,
+		.virt_handler	= 1,
+	},
+#endif
+#ifdef CONFIG_GRKERNSEC_TPE_ALL
+	{
+		.ctl_name	= GS_TPE_ALL,
+		.procname	= "tpe_restrict_all",
+		.data		= &ve0.grsec.enable_tpe_all,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= &grsec_proc_dointvec,
+		.virt_handler	= 1,
+	},
+#endif
+	{
+		.ctl_name	= GS_LOCK,
+		.procname	= "grsec_lock",
+		.data		= &ve0.grsec.lock,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= &grsec_proc_dointvec,
+		.virt_handler	= 1,
+	},
+	{ .ctl_name = 0 }
+};
+
+static int grsec_sysctl_init(void)
+{
+	register_sysctl_table(grsecurity_table, 0);
+	return 0;
+}
+
+late_initcall(grsec_sysctl_init);
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/grsecurity/grsec_tpe.c kernel-2.6.18-417.el5-028stab121/grsecurity/grsec_tpe.c
--- kernel-2.6.18-417.el5.orig/grsecurity/grsec_tpe.c	2017-01-13 08:40:41.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/grsecurity/grsec_tpe.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,37 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/grinternal.h>
+
+extern int gr_acl_tpe_check(void);
+
+int
+gr_tpe_allow(const struct file *file)
+{
+#ifdef CONFIG_GRKERNSEC_TPE
+	struct inode *inode = file->f_dentry->d_parent->d_inode;
+
+	if (current->uid && ((grsec_enable_tpe &&
+#ifdef CONFIG_GRKERNSEC_TPE_INVERT
+	    !in_group_p(grsec_tpe_gid)
+#else
+	    in_group_p(grsec_tpe_gid)
+#endif
+	    )) &&
+	    (inode->i_uid || (!inode->i_uid && ((inode->i_mode & S_IWGRP) ||
+						(inode->i_mode & S_IWOTH))))) {
+		gr_log_fs_generic(GR_DONT_AUDIT, GR_EXEC_TPE_MSG, file->f_dentry, file->f_vfsmnt);
+		return 0;
+	}
+#ifdef CONFIG_GRKERNSEC_TPE_ALL
+	if (current->uid && grsec_enable_tpe && grsec_enable_tpe_all &&
+	    ((inode->i_uid && (inode->i_uid != current->uid)) ||
+	     (inode->i_mode & S_IWGRP) || (inode->i_mode & S_IWOTH))) {
+		gr_log_fs_generic(GR_DONT_AUDIT, GR_EXEC_TPE_MSG, file->f_dentry, file->f_vfsmnt);
+		return 0;
+	}
+#endif
+#endif
+	return 1;
+}
diff -upr kernel-2.6.18-417.el5.orig/grsecurity/Kconfig kernel-2.6.18-417.el5-028stab121/grsecurity/Kconfig
--- kernel-2.6.18-417.el5.orig/grsecurity/Kconfig	2017-01-13 08:40:41.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/grsecurity/Kconfig	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,129 @@
+#
+# grecurity configuration
+#
+
+menu "Grsecurity"
+
+config GRKERNSEC
+	bool "Grsecurity"
+	help
+	  If you say Y here, you will be able to configure many features
+	  that will enhance the security of your system.  It is highly
+	  recommended that you say Y here and read through the help
+	  for each option so that you fully understand the features and
+	  can evaluate their usefulness for your machine.
+
+menu "Executable Protections"
+depends on GRKERNSEC
+
+config GRKERNSEC_TPE
+	bool "Trusted Path Execution (TPE)"
+	help
+	  If you say Y here, you will be able to choose a gid to add to the
+	  supplementary groups of users you want to mark as "untrusted."
+	  These users will not be able to execute any files that are not in
+	  root-owned directories writable only by root.  If the sysctl option
+	  is enabled, a sysctl option with name "tpe" is created.
+
+config GRKERNSEC_TPE_ALL
+	bool "Partially restrict non-root users"
+	depends on GRKERNSEC_TPE
+	help
+	  If you say Y here, All non-root users other than the ones in the
+	  group specified in the main TPE option will only be allowed to
+	  execute files in directories they own that are not group or
+	  world-writable, or in directories owned by root and writable only by
+	  root.  If the sysctl option is enabled, a sysctl option with name
+	  "tpe_restrict_all" is created.
+
+config GRKERNSEC_TPE_INVERT
+	bool "Invert GID option"
+	depends on GRKERNSEC_TPE
+	help
+	  If you say Y here, the group you specify in the TPE configuration will
+	  decide what group TPE restrictions will be *disabled* for.  This
+	  option is useful if you want TPE restrictions to be applied to most
+	  users on the system.
+
+config GRKERNSEC_TPE_GID
+	int "GID for untrusted users"
+	depends on GRKERNSEC_TPE && !GRKERNSEC_TPE_INVERT
+	default 1005
+	help
+	  If you have selected the "Invert GID option" above, setting this
+	  GID determines what group TPE restrictions will be *disabled* for.
+	  If you have not selected the "Invert GID option" above, setting this
+	  GID determines what group TPE restrictions will be *enabled* for.
+	  If the sysctl option is enabled, a sysctl option with name "tpe_gid"
+	  is created.
+
+config GRKERNSEC_TPE_GID
+	int "GID for trusted users"
+	depends on GRKERNSEC_TPE && GRKERNSEC_TPE_INVERT
+	default 1005
+	help
+	  If you have selected the "Invert GID option" above, setting this
+	  GID determines what group TPE restrictions will be *disabled* for.
+	  If you have not selected the "Invert GID option" above, setting this
+	  GID determines what group TPE restrictions will be *enabled* for.
+	  If the sysctl option is enabled, a sysctl option with name "tpe_gid"
+	  is created.
+
+endmenu
+menu "Sysctl support"
+depends on GRKERNSEC && SYSCTL
+
+config GRKERNSEC_SYSCTL
+	bool "Sysctl support"
+	help
+	  If you say Y here, you will be able to change the options that
+	  grsecurity runs with at bootup, without having to recompile your
+	  kernel.  You can echo values to files in /proc/sys/kernel/grsecurity
+	  to enable (1) or disable (0) various features.  All the sysctl entries
+	  are mutable until the "grsec_lock" entry is set to a non-zero value.
+	  All features enabled in the kernel configuration are disabled at boot
+	  if you do not say Y to the "Turn on features by default" option.
+	  All options should be set at startup, and the grsec_lock entry should
+	  be set to a non-zero value after all the options are set.
+	  *THIS IS EXTREMELY IMPORTANT*
+
+config GRKERNSEC_SYSCTL_ON
+	bool "Turn on features by default"
+	depends on GRKERNSEC_SYSCTL
+	help
+	  If you say Y here, instead of having all features enabled in the
+	  kernel configuration disabled at boot time, the features will be
+	  enabled at boot time.  It is recommended you say Y here unless
+	  there is some reason you would want all sysctl-tunable features to
+	  be disabled by default.  As mentioned elsewhere, it is important
+	  to enable the grsec_lock entry once you have finished modifying
+	  the sysctl entries.
+
+endmenu
+
+menu "Logging Options"
+depends on GRKERNSEC
+
+config GRKERNSEC_FLOODTIME
+	int "Seconds in between log messages (minimum)"
+	default 10
+	help
+	  This option allows you to enforce the number of seconds between
+	  grsecurity log messages.  The default should be suitable for most
+	  people, however, if you choose to change it, choose a value small enough
+	  to allow informative logs to be produced, but large enough to
+	  prevent flooding.
+
+config GRKERNSEC_FLOODBURST
+	int "Number of messages in a burst (maximum)"
+	default 4
+	help
+	  This option allows you to choose the maximum number of messages allowed
+	  within the flood time interval you chose in a separate option.  The
+	  default should be suitable for most people, however if you find that
+	  many of your logs are being interpreted as flooding, you may want to
+	  raise this value.
+
+endmenu
+
+endmenu
diff -upr kernel-2.6.18-417.el5.orig/grsecurity/Makefile kernel-2.6.18-417.el5-028stab121/grsecurity/Makefile
--- kernel-2.6.18-417.el5.orig/grsecurity/Makefile	2017-01-13 08:40:41.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/grsecurity/Makefile	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,15 @@
+# grsecurity's ACL system was originally written in 2001 by Michael Dalton
+# during 2001-2005 it has been completely redesigned by Brad Spengler
+# into an RBAC system
+#
+# All code in this directory and various hooks inserted throughout the kernel
+# are copyright Brad Spengler, and released under the GPL v2 or higher
+
+obj-y = grsec_tpe.o grsec_sysctl.o
+
+obj-$(CONFIG_GRKERNSEC) += grsec_init.o gracl.o grsec_log.o
+
+ifndef CONFIG_GRKERNSEC
+obj-y += grsec_disabled.o
+endif
+
diff -upr kernel-2.6.18-417.el5.orig/include/acpi/acpi_numa.h kernel-2.6.18-417.el5-028stab121/include/acpi/acpi_numa.h
--- kernel-2.6.18-417.el5.orig/include/acpi/acpi_numa.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/acpi/acpi_numa.h	2017-01-13 08:40:15.000000000 -0500
@@ -11,11 +11,8 @@
 #define MAX_PXM_DOMAINS (256) /* Old pxm spec is defined 8 bit */
 #endif
 
-extern int __cpuinitdata pxm_to_node_map[MAX_PXM_DOMAINS];
-extern int __cpuinitdata node_to_pxm_map[MAX_NUMNODES];
-
-extern int __cpuinit pxm_to_node(int);
-extern int __cpuinit node_to_pxm(int);
+extern int pxm_to_node(int);
+extern int node_to_pxm(int);
 extern int __cpuinit acpi_map_pxm_to_node(int);
 extern void __cpuinit acpi_unmap_pxm_to_node(int);
 
diff -upr kernel-2.6.18-417.el5.orig/include/asm-arm26/tlbflush.h kernel-2.6.18-417.el5-028stab121/include/asm-arm26/tlbflush.h
--- kernel-2.6.18-417.el5.orig/include/asm-arm26/tlbflush.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-arm26/tlbflush.h	2017-01-13 08:40:19.000000000 -0500
@@ -25,7 +25,7 @@ static inline void memc_update_all(void)
 {
 	struct task_struct *p;
 	cpu_memc_update_all(init_mm.pgd);
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (!p->mm)
 			continue;
 		cpu_memc_update_all(p->mm->pgd);
diff -upr kernel-2.6.18-417.el5.orig/include/asm-generic/fcntl.h kernel-2.6.18-417.el5-028stab121/include/asm-generic/fcntl.h
--- kernel-2.6.18-417.el5.orig/include/asm-generic/fcntl.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-generic/fcntl.h	2017-01-13 08:40:40.000000000 -0500
@@ -48,6 +48,9 @@
 #ifndef O_NOATIME
 #define O_NOATIME	01000000
 #endif
+#ifndef O_CLOEXEC
+#define O_CLOEXEC	02000000	/* set close_on_exec */
+#endif
 
 #ifndef O_NDELAY
 #define O_NDELAY	O_NONBLOCK
diff -upr kernel-2.6.18-417.el5.orig/include/asm-generic/mman.h kernel-2.6.18-417.el5-028stab121/include/asm-generic/mman.h
--- kernel-2.6.18-417.el5.orig/include/asm-generic/mman.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-generic/mman.h	2017-01-13 08:40:16.000000000 -0500
@@ -35,6 +35,8 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_DEACTIVATE	32		/* deactivate page */
+
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
 #define MAP_FILE	0
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/a.out.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/a.out.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/a.out.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/a.out.h	2017-01-13 08:40:26.000000000 -0500
@@ -19,7 +19,7 @@ struct exec
 
 #ifdef __KERNEL__
 
-#define STACK_TOP	(TASK_SIZE - PAGE_SIZE)  /* 1 page for vdso */
+#define STACK_TOP	(TASK_SIZE - PAGE_SIZE * 2)  /* 2 pages for vdso */
 #define STACK_TOP_MAX	STACK_TOP
 
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/atomic_kmap.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/atomic_kmap.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/atomic_kmap.h	2017-01-13 08:40:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/atomic_kmap.h	2017-01-13 08:40:14.000000000 -0500
@@ -0,0 +1,94 @@
+/*
+ * atomic_kmap.h: temporary virtual kernel memory mappings
+ *
+ * Copyright (C) 2003 Ingo Molnar <mingo@redhat.com>
+ */
+
+#ifndef _ASM_ATOMIC_KMAP_H
+#define _ASM_ATOMIC_KMAP_H
+
+#ifdef __KERNEL__
+
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+#define HIGHMEM_DEBUG 1
+#else
+#define HIGHMEM_DEBUG 0
+#endif
+
+extern pte_t *kmap_pte;
+#define kmap_prot PAGE_KERNEL
+#define kmap_prot_nocache PAGE_KERNEL_NOCACHE
+
+#define NR_SHARED_PMDS ((0xffffffff-PKMAP_BASE+1)/PMD_SIZE)
+
+static inline unsigned long __kmap_atomic_vaddr(enum km_type type)
+{
+	enum fixed_addresses idx;
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	return __fix_to_virt(FIX_KMAP_BEGIN + idx);
+}
+
+static inline void *__kmap_atomic_noflush(struct page *page, enum km_type type)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+	/*
+	 * NOTE: entries that rely on some secondary TLB-flush
+	 * effect must not be global:
+	 */
+	set_pte(kmap_pte-idx, mk_pte(page, PAGE_KERNEL));
+
+	return (void*) vaddr;
+}
+
+static inline void *__kmap_atomic(struct page *page, enum km_type type)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#if HIGHMEM_DEBUG
+	BUG_ON(!pte_none(*(kmap_pte-idx)));
+#else
+	/*
+	 * Performance optimization - do not flush if the new
+	 * pte is the same as the old one:
+	 */
+	if (pte_val(*(kmap_pte-idx)) == pte_val(mk_pte(page, kmap_prot)))
+		return (void *) vaddr;
+#endif
+	set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+	__flush_tlb_one(vaddr);
+
+	return (void*) vaddr;
+}
+
+static inline void __kunmap_atomic(void *kvaddr, enum km_type type)
+{
+#if HIGHMEM_DEBUG
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+	BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx));
+	/*
+	 * force other mappings to Oops if they'll try to access
+	 * this pte without first remap it
+	 */
+	set_pte(kmap_pte - idx, __pte(0));	/* pte_clear(kmap_pte-idx); */
+	__flush_tlb_one(vaddr);
+#endif
+}
+
+#define __kunmap_atomic_type(type) \
+		__kunmap_atomic((void *)__kmap_atomic_vaddr(type), (type))
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_ATOMIC_KMAP_H */
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/bug.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/bug.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/bug.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/bug.h	2017-01-13 08:40:15.000000000 -0500
@@ -14,7 +14,10 @@
 #define BUG()								\
 do {									\
 	__asm__ __volatile__("ud2\n"					\
+			     "\t.byte 0x66\n"				\
+			     "\t.byte 0xb8\n" /* mov $xxx, %ax */	\
 			     "\t.word %c0\n"				\
+			     "\t.byte 0xb8\n" /* mov $xxx, %eax */	\
 			     "\t.long %c1\n"				\
 			      : : "i" (__LINE__), "i" (__FILE__));	\
 	unreachable();							\
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/bugs.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/bugs.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/bugs.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/bugs.h	2017-01-13 08:40:15.000000000 -0500
@@ -193,6 +193,6 @@ static void __init check_bugs(void)
 	check_fpu();
 	check_hlt();
 	check_popad();
-	system_utsname.machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+	init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
 	alternative_instructions(); 
 }
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/cacheflush.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/cacheflush.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/cacheflush.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/cacheflush.h	2017-01-13 08:40:16.000000000 -0500
@@ -1,9 +1,6 @@
 #ifndef _I386_CACHEFLUSH_H
 #define _I386_CACHEFLUSH_H
 
-/* Keep includes the same across arches.  */
-#include <linux/mm.h>
-
 /* Caches aren't brain-dead on the intel. */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
@@ -18,6 +15,9 @@
 #define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
+/* Keep includes the same across arches. */
+#include <linux/mm.h>
+
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
 	memcpy(dst, src, len)
 #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/checksum.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/checksum.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/checksum.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/checksum.h	2017-01-13 08:40:14.000000000 -0500
@@ -27,7 +27,7 @@ asmlinkage unsigned int csum_partial(con
  * better 64-bit) boundary
  */
 
-asmlinkage unsigned int csum_partial_copy_generic(const unsigned char *src, unsigned char *dst,
+asmlinkage unsigned int direct_csum_partial_copy_generic( const char *src, char *dst,
 						  int len, int sum, int *src_err_ptr, int *dst_err_ptr);
 
 /*
@@ -41,7 +41,10 @@ static __inline__
 unsigned int csum_partial_copy_nocheck (const unsigned char *src, unsigned char *dst,
 					int len, int sum)
 {
-	return csum_partial_copy_generic ( src, dst, len, sum, NULL, NULL);
+	/*
+	 * The direct function is OK for kernel-space => kernel-space copies:
+	 */
+	return direct_csum_partial_copy_generic ( src, dst, len, sum, NULL, NULL);
 }
 
 static __inline__
@@ -49,8 +52,9 @@ unsigned int csum_partial_copy_from_user
 						int len, int sum, int *err_ptr)
 {
 	might_sleep();
-	return csum_partial_copy_generic((__force unsigned char *)src, dst,
-					len, sum, err_ptr, NULL);
+	if (copy_from_user(dst, src, len))
+		*err_ptr = -EFAULT;
+	return csum_partial(dst, len, sum);
 }
 
 /*
@@ -176,14 +180,29 @@ static __inline__ unsigned short int csu
  *	Copy and checksum to user
  */
 #define HAVE_CSUM_COPY_USER
-static __inline__ unsigned int csum_and_copy_to_user(const unsigned char *src,
+static __inline__ unsigned int direct_csum_and_copy_to_user(const char *src, 
 						     unsigned char __user *dst,
 						     int len, int sum, 
 						     int *err_ptr)
 {
 	might_sleep();
 	if (access_ok(VERIFY_WRITE, dst, len))
-		return csum_partial_copy_generic(src, (__force unsigned char *)dst, len, sum, NULL, err_ptr);
+		return direct_csum_partial_copy_generic(src, dst, len, sum, NULL, err_ptr);
+
+	if (len)
+		*err_ptr = -EFAULT;
+
+	return -1; /* invalid checksum */
+}
+
+static __inline__ unsigned int csum_and_copy_to_user(const char *src, char __user *dst,
+				    int len, int sum, int *err_ptr)
+{
+	if (access_ok(VERIFY_WRITE, dst, len)) {
+		if (copy_to_user(dst, src, len))
+			*err_ptr = -EFAULT;
+		return csum_partial(src, len, sum);
+	}
 
 	if (len)
 		*err_ptr = -EFAULT;
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/desc.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/desc.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/desc.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/desc.h	2017-01-13 08:40:14.000000000 -0500
@@ -14,9 +14,9 @@
 
 #include <asm/mmu.h>
 
-extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
+extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
 
-DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+extern unsigned char cpu_16bit_stack[NR_CPUS][CPU_16BIT_STACK_SIZE];
 
 struct Xgt_desc_struct {
 	unsigned short size;
@@ -33,6 +33,11 @@ static inline struct desc_struct *get_cp
 	return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
 }
 
+extern void trap_init_virtual_IDT(void);
+extern void trap_init_virtual_GDT(void);
+
+asmlinkage int system_call(void);
+
 #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
 #define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
 
@@ -120,31 +125,8 @@ static inline void load_TLS(struct threa
 #undef C
 }
 
-static inline void clear_LDT(void)
-{
-	int cpu = get_cpu();
-
-	set_ldt_desc(cpu, &default_ldt[0], 5);
-	load_LDT_desc();
-	put_cpu();
-}
-
-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
-{
-	void *segments = pc->ldt;
-	int count = pc->size;
-
-	if (likely(!count)) {
-		segments = &default_ldt[0];
-		count = 5;
-	}
-		
-	set_ldt_desc(cpu, segments, count);
-	load_LDT_desc();
-}
+extern struct page *default_ldt_page;
+extern void load_LDT_nolock(mm_context_t *pc, int cpu);
 
 static inline void load_LDT(mm_context_t *pc)
 {
@@ -176,6 +158,6 @@ extern void arch_add_exec_range(struct m
 extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
 extern void arch_flush_exec_range(struct mm_struct *mm);
 
-#endif /* !__ASSEMBLY__ */
 
+#endif /* !__ASSEMBLY__ */
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/elf.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/elf.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/elf.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/elf.h	2017-01-13 08:40:40.000000000 -0500
@@ -112,7 +112,7 @@ typedef struct user_fxsr_struct elf_fpxr
    For the moment, we have only optimizations for the Intel generations,
    but that could change... */
 
-#define ELF_PLATFORM  (system_utsname.machine)
+#define ELF_PLATFORM  (utsname()->machine)
 
 #define SET_PERSONALITY(ex, ibcs2) do { } while (0)
 
@@ -159,12 +159,12 @@ extern void __kernel_vsyscall;
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
 struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
-				       int uses_interp);
+				int uses_interp, unsigned long map_address);
 
-extern unsigned int vdso_enabled;
+extern int vdso_enabled;
 
 #define ARCH_DLINFO						\
-do if (vdso_enabled) {						\
+do if (vdso_enabled && sysctl_at_vsyscall) {			\
 		NEW_AUX_ENT(AT_SYSINFO,	VDSO_ENTRY);		\
 		NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE);	\
 } while (0)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/fixmap.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/fixmap.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/fixmap.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/fixmap.h	2017-01-13 08:40:14.000000000 -0500
@@ -27,17 +27,15 @@
 #include <asm/acpi.h>
 #include <asm/apicdef.h>
 #include <asm/page.h>
-#ifdef CONFIG_HIGHMEM
 #include <linux/threads.h>
 #include <asm/kmap_types.h>
-#endif
 
 /*
  * Here we define all the compile-time 'special' virtual
  * addresses. The point is to have a constant address at
  * compile time, but to set the physical address only
- * in the boot process. We allocate these special addresses
- * from the end of virtual memory (0xfffff000) backwards.
+ * in the boot process. We allocate these special  addresses
+ * from the end of virtual memory (0xffffe000) backwards.
  * Also this lets us do fail-safe vmalloc(), we
  * can guarantee that these special addresses and
  * vmalloc()-ed addresses never overlap.
@@ -50,11 +48,24 @@
  * TLB entries of such buffers will not be flushed across
  * task switches.
  */
+
+/*
+ * on UP currently we will have no trace of the fixmap mechanizm,
+ * no page table allocations, etc. This might change in the
+ * future, say framebuffers for the console driver(s) could be
+ * fix-mapped?
+ */
+
+#define TSS_SIZE	sizeof(struct tss_struct)
+#define FIX_TSS_COUNT	((TSS_SIZE * NR_CPUS + PAGE_SIZE - 1)/ PAGE_SIZE)
+
 enum fixed_addresses {
 	FIX_HOLE,
 	FIX_VDSO,
 #ifdef CONFIG_X86_LOCAL_APIC
 	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
+#else
+	FIX_VSTACK_HOLE_1,
 #endif
 #ifdef CONFIG_X86_IO_APIC
 	FIX_IO_APIC_BASE_0,
@@ -66,16 +77,26 @@ enum fixed_addresses {
 	FIX_LI_PCIA,	/* Lithium PCI Bridge A */
 	FIX_LI_PCIB,	/* Lithium PCI Bridge B */
 #endif
-#ifdef CONFIG_X86_F00F_BUG
-	FIX_F00F_IDT,	/* Virtual mapping for IDT */
+	FIX_IDT,
+#ifdef CONFIG_X86_HIGH_ENTRY
+	FIX_GDT_1,
+	FIX_GDT_0,
+	FIX_TSS_N,
+	FIX_TSS_0 = FIX_TSS_N + FIX_TSS_COUNT - 1,
+	FIX_ENTRY_TRAMPOLINE_N,
+	FIX_ENTRY_TRAMPOLINE_0 = FIX_ENTRY_TRAMPOLINE_N + 8,
+	FIX_16BIT_STACK_N,
+	FIX_16BIT_STACK_0 = FIX_16BIT_STACK_N + (NR_CPUS * 1024) / PAGE_SIZE,
 #endif
 #ifdef CONFIG_X86_CYCLONE_TIMER
 	FIX_CYCLONE_TIMER, /*cyclone timer register*/
+	FIX_VSTACK_HOLE_2,
 #endif 
-#ifdef CONFIG_HIGHMEM
-	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+	/* reserved pte's for temporary kernel mappings */
+	__FIX_KMAP_BEGIN,
+	FIX_KMAP_BEGIN = __FIX_KMAP_BEGIN + (__FIX_KMAP_BEGIN & 1) +
+		((__FIXADDR_TOP >> PAGE_SHIFT) & 1),
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
-#endif
 #ifdef CONFIG_ACPI
 	FIX_ACPI_BEGIN,
 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
@@ -106,7 +127,9 @@ extern void __set_fixmap (enum fixed_add
 #define clear_fixmap(idx) \
 		__set_fixmap(idx, 0, __pgprot(0))
 
-#define FIXADDR_TOP	((unsigned long)__FIXADDR_TOP)
+ /* IMPORTANT: we have to align FIXADDR_TOP so that the virtual stack */
+ /* is THREAD_SIZE aligned. */
+#define FIXADDR_TOP	__FIXADDR_TOP
 
 #define __FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
 #define __FIXADDR_BOOT_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/futex.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/futex.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/futex.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/futex.h	2017-01-13 08:40:14.000000000 -0500
@@ -43,7 +43,7 @@
 	: "r" (oparg), "i" (-EFAULT), "1" (0))
 
 static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+__futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 {
 	int op = (encoded_op >> 28) & 7;
 	int cmp = (encoded_op >> 24) & 15;
@@ -53,8 +53,10 @@ futex_atomic_op_inuser (int encoded_op, 
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
+#ifndef CONFIG_X86_UACCESS_INDIRECT
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
+#endif
 
 	inc_preempt_count();
 
@@ -104,8 +106,44 @@ futex_atomic_op_inuser (int encoded_op, 
 	return ret;
 }
 
+#ifdef CONFIG_X86_UACCESS_INDIRECT
+//struct page;
+void* __kmap_atomic_user_page(unsigned long addr, struct page** p,
+			spinlock_t **ptlp, int write);
+void __kunmap_atomic_user_page(void * addr, struct page* p, spinlock_t *ptlp);
+#endif
+
+
 static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+{
+	int ret;
+#ifdef CONFIG_X86_UACCESS_INDIRECT
+	void *maddr;
+	struct page *page = NULL;
+	spinlock_t *ptlp;
+
+	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	inc_preempt_count();
+	maddr = __kmap_atomic_user_page((unsigned long)uaddr, &page, &ptlp, 1);
+	dec_preempt_count();
+	uaddr = (int __user *)(maddr + ((unsigned long)uaddr & (PAGE_SIZE-1)));
+	if (!maddr)
+		return -EFAULT;
+#endif
+
+	ret = __futex_atomic_op_inuser(encoded_op, uaddr);
+
+#ifdef CONFIG_X86_UACCESS_INDIRECT
+	__kunmap_atomic_user_page(maddr, page, ptlp);
+#endif
+	return ret;
+}
+
+static inline int
+__futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
@@ -131,5 +169,28 @@ futex_atomic_cmpxchg_inatomic(int __user
 	return oldval;
 }
 
+static inline int
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+{
+	int ret;
+#ifdef CONFIG_X86_UACCESS_INDIRECT
+	void *maddr;
+	struct page *page = NULL;
+	spinlock_t *ptlp;
+
+	maddr = __kmap_atomic_user_page((unsigned long)uaddr, &page, &ptlp, 1);
+	uaddr = (int __user *)(maddr + ((unsigned long)uaddr & (PAGE_SIZE-1)));
+	if (!maddr)
+		return -EFAULT;
+#endif
+
+	ret = __futex_atomic_cmpxchg_inatomic(uaddr, oldval, newval);
+
+#ifdef CONFIG_X86_UACCESS_INDIRECT
+	__kunmap_atomic_user_page(maddr, page, ptlp);
+#endif
+	return ret;
+}
+
 #endif
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/hardirq.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/hardirq.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/hardirq.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/hardirq.h	2017-01-13 08:40:16.000000000 -0500
@@ -8,6 +8,7 @@ typedef struct {
 	unsigned int __softirq_pending;
 	unsigned long idle_timestamp;
 	unsigned int __nmi_count;	/* arch dependent */
+	unsigned int __reschedule_count;
 	unsigned int apic_timer_irqs;	/* arch dependent */
 } ____cacheline_aligned irq_cpustat_t;
 
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/highmem.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/highmem.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/highmem.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/highmem.h	2017-01-13 08:40:14.000000000 -0500
@@ -24,12 +24,11 @@
 #include <linux/threads.h>
 #include <asm/kmap_types.h>
 #include <asm/tlbflush.h>
+#include <asm/atomic_kmap.h>
 
 /* declarations for highmem.c */
 extern unsigned long highstart_pfn, highend_pfn;
 
-extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
 extern pte_t *pkmap_page_table;
 
 /*
@@ -62,6 +61,7 @@ extern void FASTCALL(kunmap_high(struct 
 void *kmap(struct page *page);
 void kunmap(struct page *page);
 void *kmap_atomic(struct page *page, enum km_type type);
+void *kmap_atomic_pte(pte_t *pte, enum km_type type);
 void kunmap_atomic(void *kvaddr, enum km_type type);
 void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
 struct page *kmap_atomic_to_page(void *ptr);
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/kmap_types.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/kmap_types.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/kmap_types.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/kmap_types.h	2017-01-13 08:40:14.000000000 -0500
@@ -1,30 +1,36 @@
 #ifndef _ASM_KMAP_TYPES_H
 #define _ASM_KMAP_TYPES_H
 
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-# define D(n) __KM_FENCE_##n ,
-#else
-# define D(n)
-#endif
+#include <linux/thread_info.h>
 
 enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
-};
+	/*
+	 * IMPORTANT: don't move these 3 entries, be wary when adding entries,
+	 * the 4G/4G virtual stack must be THREAD_SIZE aligned on each cpu.
+	 */
+	KM_BOUNCE_READ,
+	KM_VSTACK_BASE,
+	__KM_VSTACK_TOP = KM_VSTACK_BASE + STACK_PAGE_COUNT-1,
+	KM_VSTACK_TOP = __KM_VSTACK_TOP + (__KM_VSTACK_TOP % 2),
 
-#undef D
+	KM_LDT_PAGE15,
+	KM_LDT_PAGE0 = KM_LDT_PAGE15 + 16-1,
+	KM_USER_COPY,
+	KM_VSTACK_HOLE,
+	KM_SKB_SUNRPC_DATA,
+	KM_SKB_DATA_SOFTIRQ,
+	KM_USER0,
+	KM_USER1,
+	KM_BIO_SRC_IRQ,
+	KM_BIO_DST_IRQ,
+	KM_PTE0,
+	KM_PTE1,
+	KM_IRQ0,
+	KM_IRQ1,
+	KM_SOFTIRQ0,
+	KM_SOFTIRQ1,
+	__KM_TYPE_NR,
+	KM_TYPE_NR=__KM_TYPE_NR + (__KM_TYPE_NR % 2)
+};
 
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/mach-xen/asm/processor.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/mach-xen/asm/processor.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/mach-xen/asm/processor.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/mach-xen/asm/processor.h	2017-01-13 08:40:40.000000000 -0500
@@ -227,6 +227,21 @@ static inline unsigned int cpuid_edx(uns
 	return edx;
 }
 
+#ifdef CONFIG_SMP
+void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op);
+#else
+static inline void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+	cpuid(op, eax, ebx, ecx, edx);
+}
+
+static inline u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op)
+{
+	return cpuid_eax(op);
+}
+#endif
+
 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
 
 /*
@@ -530,6 +545,9 @@ static inline void __load_esp0(struct ts
 	HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)
 #endif
 
+#define load_virtual_esp0(tss, task) load_esp0(tss, &(task)->thread) 
+#define __get_cpu_tss(cpu) (&per_cpu(init_tss, cpu))
+
 #define start_thread(regs, new_eip, new_esp) do {		\
 	__asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0));	\
 	set_fs(USER_DS);					\
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/mach-xen/asm/system.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/mach-xen/asm/system.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/mach-xen/asm/system.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/mach-xen/asm/system.h	2017-01-13 08:40:40.000000000 -0500
@@ -462,6 +462,8 @@ static inline unsigned long long __cmpxc
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
 #endif
 
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
 #include <linux/irqflags.h>
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/mman.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/mman.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/mman.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/mman.h	2017-01-13 08:40:16.000000000 -0500
@@ -11,6 +11,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 #define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_EXECPRIO	0x40000		/* do soft ubc charge */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/mmu_context.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/mmu_context.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/mmu_context.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/mmu_context.h	2017-01-13 08:40:14.000000000 -0500
@@ -28,6 +28,10 @@ static inline void switch_mm(struct mm_s
 {
 	int cpu = smp_processor_id();
 
+#ifdef CONFIG_X86_SWITCH_PAGETABLES
+	if (tsk->mm)
+		tsk->thread_info->user_pgd = (void *)__pa(tsk->mm->pgd);
+#endif
 	if (likely(prev != next)) {
 		/* stop flush ipis for the previous mm */
 		cpu_clear(cpu, prev->cpu_vm_mask);
@@ -38,12 +42,14 @@ static inline void switch_mm(struct mm_s
 		cpu_set(cpu, next->cpu_vm_mask);
 
 		/* Re-load page tables */
+#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
 		load_cr3(next->pgd);
+#endif
 
 		/*
 		 * load the LDT, if the LDT is different:
 		 */
-		if (unlikely(prev->context.ldt != next->context.ldt))
+		if (unlikely(prev->context.size + next->context.size))
 			load_LDT_nolock(&next->context, cpu);
 	}
 #ifdef CONFIG_SMP
@@ -55,7 +61,9 @@ static inline void switch_mm(struct mm_s
 			/* We were in lazy tlb mode and leave_mm disabled 
 			 * tlb flush IPI delivery. We must reload %cr3.
 			 */
+#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
 			load_cr3(next->pgd);
+#endif
 			load_LDT_nolock(&next->context, cpu);
 		}
 	}
@@ -66,6 +74,6 @@ static inline void switch_mm(struct mm_s
 	asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
 
 #define activate_mm(prev, next) \
-	switch_mm((prev),(next),NULL)
+	switch_mm((prev),(next),current)
 
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/mmu.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/mmu.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/mmu.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/mmu.h	2017-01-13 08:40:14.000000000 -0500
@@ -10,10 +10,13 @@
  * exec_limit is used to track the range PROT_EXEC
  * mappings span.
  */
+
+#define MAX_LDT_PAGES 16
+
 typedef struct { 
 	int size;
 	struct semaphore sem;
-	void *ldt;
+	struct page *ldt_pages[MAX_LDT_PAGES];
 	struct desc_struct user_cs;
 	unsigned long exec_limit;
 	void *vdso;
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/page.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/page.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/page.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/page.h	2017-01-13 08:40:14.000000000 -0500
@@ -11,8 +11,6 @@
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
-
-
 #ifdef CONFIG_X86_USE_3DNOW
 
 #include <asm/mmx.h>
@@ -92,8 +90,23 @@ typedef struct { unsigned long pgprot; }
  *
  * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
  * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ *
+ * Note: on PAE the kernel must never go below 32 MB, we use the
+ * first 8 entries of the 2-level boot pgd for PAE magic.
  */
 
+#ifdef __ASSEMBLY__
+#define __PAGE_OFFSET		CONFIG_PAGE_OFFSET
+#else
+#define __PAGE_OFFSET		((unsigned long)CONFIG_PAGE_OFFSET)
+#endif
+
+#ifdef CONFIG_X86_4G_VM_LAYOUT
+#define TASK_SIZE		(0xc0000000)
+#else
+#define TASK_SIZE		(__PAGE_OFFSET)
+#endif
+
 #ifndef __ASSEMBLY__
 
 struct vm_area_struct;
@@ -112,12 +125,6 @@ extern int devmem_is_allowed(unsigned lo
 
 #endif /* __ASSEMBLY__ */
 
-#ifdef __ASSEMBLY__
-#define __PAGE_OFFSET		CONFIG_PAGE_OFFSET
-#else
-#define __PAGE_OFFSET		((unsigned long)CONFIG_PAGE_OFFSET)
-#endif
-
 /*
  * Under exec-shield we don't use the generic fixmap gate area.
  * The vDSO ("gate area") has a normal vma found the normal ways.
@@ -126,7 +133,8 @@ extern int devmem_is_allowed(unsigned lo
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
-#define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
+#define __MAXMEM		(-__PAGE_OFFSET-__VMALLOC_RESERVE)
+#define MAXMEM			((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
 #define __pa_symbol(x)		__pa(x)
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/pgtable.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/pgtable.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/pgtable.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/pgtable.h	2017-01-13 08:40:14.000000000 -0500
@@ -15,15 +15,12 @@
 #include <asm/processor.h>
 #include <asm/fixmap.h>
 #include <linux/threads.h>
+#include <linux/slab.h>
 
 #ifndef _I386_BITOPS_H
 #include <asm/bitops.h>
 #endif
 
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
 struct mm_struct;
 struct vm_area_struct;
 
@@ -36,20 +33,27 @@ extern unsigned long empty_zero_page[102
 extern pgd_t swapper_pg_dir[1024];
 extern kmem_cache_t *pgd_cache;
 extern kmem_cache_t *pmd_cache;
+extern kmem_cache_t *kpmd_cache;
 extern spinlock_t pgd_lock;
 extern struct page *pgd_list;
 
 void pmd_ctor(void *, kmem_cache_t *, unsigned long);
+void kpmd_ctor(void *, kmem_cache_t *, unsigned long);
 void pgd_ctor(void *, kmem_cache_t *, unsigned long);
 void pgd_dtor(void *, kmem_cache_t *, unsigned long);
 void pgtable_cache_init(void);
-void paging_init(void);
+extern void paging_init(void);
+void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end);
 
 /*
  * The Linux x86 paging architecture is 'compile-time dual-mode', it
  * implements both the traditional 2-level x86 page tables and the
  * newer 3-level PAE-mode page tables.
  */
+
+extern void init_entry_mappings(void);
+extern void entry_trampoline_setup(void);
+
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level-defs.h>
 # define PMD_SIZE	(1UL << PMD_SHIFT)
@@ -61,7 +65,12 @@ void paging_init(void);
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-#define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
+#if defined(CONFIG_X86_PAE) && defined(CONFIG_X86_4G_VM_LAYOUT)
+# define USER_PTRS_PER_PGD	4
+#else
+# define USER_PTRS_PER_PGD	((TASK_SIZE/PGDIR_SIZE) + ((TASK_SIZE % PGDIR_SIZE) + PGDIR_SIZE-1)/PGDIR_SIZE)
+#endif
+
 #define FIRST_USER_ADDRESS	0
 
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
@@ -315,6 +324,7 @@ static inline void clone_pgd_range(pgd_t
  */
 
 #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+#define mk_pte_phys(physpage, pgprot) pfn_pte((physpage) >> PAGE_SHIFT, pgprot)
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
@@ -459,4 +469,11 @@ extern void noexec_setup(const char *str
 #define __HAVE_ARCH_PTE_SAME
 #include <asm-generic/pgtable.h>
 
+/*
+ * The size of the low 1:1 mappings we use during bootup,
+ * SMP-boot and ACPI-sleep:
+ */
+#define LOW_MAPPINGS_SIZE (16*1024*1024)
+
+
 #endif /* _I386_PGTABLE_H */
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/processor.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/processor.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/processor.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/processor.h	2017-01-13 08:40:15.000000000 -0500
@@ -18,7 +18,6 @@
 #include <asm/system.h>
 #include <linux/cache.h>
 #include <linux/threads.h>
-#include <asm/percpu.h>
 #include <linux/cpumask.h>
 
 /* flag for disabling the tsc */
@@ -110,8 +109,6 @@ struct cpuinfo_x86 {
 
 extern struct cpuinfo_x86 boot_cpu_data;
 extern struct cpuinfo_x86 new_cpu_data;
-extern struct tss_struct doublefault_tss;
-DECLARE_PER_CPU(struct tss_struct, init_tss);
 
 #ifdef CONFIG_SMP
 extern struct cpuinfo_x86 cpu_data[];
@@ -128,6 +125,7 @@ extern void identify_cpu(struct cpuinfo_
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern void early_init_intel(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;
 
 #ifdef CONFIG_X86_HT
@@ -228,6 +226,21 @@ static inline unsigned int cpuid_edx(uns
 	return edx;
 }
 
+#ifdef CONFIG_SMP
+void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op);
+#else
+static inline void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+	cpuid(op, eax, ebx, ecx, edx);
+}
+
+static inline u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op)
+{
+	return cpuid_eax(op);
+}
+#endif
+
 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
 
 /*
@@ -338,11 +351,6 @@ extern unsigned int mca_pentium_flag;
 /* Boot loader type from the setup header */
 extern int bootloader_type;
 
-/*
- * User space process size: 3GB (default).
- */
-#define TASK_SIZE	(PAGE_OFFSET)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
@@ -357,6 +365,7 @@ extern unsigned long arch_align_stack(un
  * Size of io_bitmap.
  */
 #define IO_BITMAP_BITS  65536
+ 
 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
 #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
 #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
@@ -463,11 +472,24 @@ struct tss_struct {
 	unsigned long stack[64];
 } __attribute__((packed));
 
+extern struct tss_struct init_tss[NR_CPUS];
+extern struct tss_struct doublefault_tss;
+
 #define ARCH_MIN_TASKALIGN	16
 
+
+#ifdef CONFIG_4KSTACKS
+#define STACK_PAGE_COUNT	(4096/PAGE_SIZE)
+#else
+#define STACK_PAGE_COUNT	(8192/PAGE_SIZE)
+#endif
+
+
+
 struct thread_struct {
 /* cached TLS descriptors. */
 	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+	void *stack_page[STACK_PAGE_COUNT];
 	unsigned long	esp0;
 	unsigned long	sysenter_cs;
 	unsigned long	eip;
@@ -507,12 +529,14 @@ struct thread_struct {
 #define INIT_TSS  {							\
 	.esp0		= sizeof(init_stack) + (long)&init_stack,	\
 	.ss0		= __KERNEL_DS,					\
+	.esp1		= sizeof(init_tss[0]) + (long)&init_tss[0],	\
 	.ss1		= __KERNEL_CS,					\
 	.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,			\
 	.io_bitmap	= { [ 0 ... IO_BITMAP_LONGS] = ~0 },		\
 }
 
-static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+static inline void
+load_esp0(struct tss_struct *tss, struct thread_struct *thread)
 {
 	tss->esp0 = thread->esp0;
 	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
@@ -578,6 +602,33 @@ extern void prepare_to_copy(struct task_
  */
 extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
+#ifdef CONFIG_X86_HIGH_ENTRY
+
+#define virtual_esp0(task) \
+	((unsigned long)((task)->thread_info->virtual_stack + ((task)->thread.esp0 - (unsigned long)(task)->thread_info->real_stack)))
+#define load_virtual_esp0(tss, task)					\
+	do {								\
+		tss->esp0 = virtual_esp0(task);				\
+		if (likely(cpu_has_sep) && unlikely(tss->ss1 != (task)->thread.sysenter_cs)) {	\
+			tss->ss1 = (task)->thread.sysenter_cs;		\
+			wrmsr(MSR_IA32_SYSENTER_CS,			\
+				(task)->thread.sysenter_cs, 0);		\
+		}							\
+	} while (0)
+
+#else
+
+#define virtual_esp0(task) ((task)->thread.esp0)
+#define load_virtual_esp0(tss, task) load_esp0(tss, &(task)->thread) 
+
+#endif
+
+#ifndef CONFIG_XEN
+#define __get_cpu_tss(cpu) (init_tss + (cpu))
+#else
+#define __get_cpu_tss(cpu) (&per_cpu(init_tss, cpu))
+#endif
+
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack);
 
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/string.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/string.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/string.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/string.h	2017-01-13 08:40:14.000000000 -0500
@@ -58,6 +58,29 @@ __asm__ __volatile__(
 return dest;
 }
 
+/*
+ * This is a more generic variant of strncpy_count() suitable for
+ * implementing string-access routines with all sorts of return
+ * code semantics. It's used by mm/usercopy.c.
+ */
+static inline size_t strncpy_count(char * dest,const char *src,size_t count)
+{
+	__asm__ __volatile__(
+
+	"1:\tdecl %0\n\t"
+	"js 2f\n\t"
+	"lodsb\n\t"
+	"stosb\n\t"
+	"testb %%al,%%al\n\t"
+	"jne 1b\n\t"
+	"2:"
+	"incl %0"
+	: "=c" (count)
+	:"S" (src),"D" (dest),"0" (count) : "memory");
+
+	return count;
+}
+
 #define __HAVE_ARCH_STRCAT
 static inline char * strcat(char * dest,const char * src)
 {
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/system.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/system.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/system.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/system.h	2017-01-13 08:40:16.000000000 -0500
@@ -110,7 +110,7 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
 
 #define read_cr3() ({ \
 	unsigned int __dummy; \
-	__asm__ ( \
+	__asm__ __volatile__( \
 		"movl %%cr3,%0\n\t" \
 		:"=r" (__dummy)); \
 	__dummy; \
@@ -120,7 +120,7 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
 
 #define read_cr4() ({ \
 	unsigned int __dummy; \
-	__asm__( \
+	__asm__ __volatile__( \
 		"movl %%cr4,%0\n\t" \
 		:"=r" (__dummy)); \
 	__dummy; \
@@ -128,7 +128,7 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
 #define read_cr4_safe() ({			      \
 	unsigned int __dummy;			      \
 	/* This could fault if %cr4 does not exist */ \
-	__asm__("1: movl %%cr4, %0		\n"   \
+	__asm__ __volatile__("1: movl %%cr4, %0		\n" \
 		"2:				\n"   \
 		".section __ex_table,\"a\"	\n"   \
 		".long 1b,2b			\n"   \
@@ -464,6 +464,8 @@ static inline unsigned long long __cmpxc
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
 #endif
 
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
 #include <linux/irqflags.h>
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/thread_info.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/thread_info.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/thread_info.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/thread_info.h	2017-01-13 08:40:24.000000000 -0500
@@ -38,6 +38,7 @@ struct thread_info {
 						   0-0xFFFFFFFF for kernel-thread
 						*/
 	void			*sysenter_return;
+	void			*real_stack, *virtual_stack, *user_pgd;
 	struct restart_block    restart_block;
 
 	unsigned long           previous_esp;   /* ESP of the previous stack in case
@@ -67,7 +68,7 @@ struct thread_info {
  */
 #ifndef __ASSEMBLY__
 
-#define INIT_THREAD_INFO(tsk)			\
+#define INIT_THREAD_INFO(tsk, thread_info)	\
 {						\
 	.task		= &tsk,			\
 	.exec_domain	= &default_exec_domain,	\
@@ -78,6 +79,7 @@ struct thread_info {
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
 	},					\
+	.real_stack	= &thread_info,		\
 }
 
 #define init_thread_info	(init_thread_union.thread_info)
@@ -99,13 +101,13 @@ static inline struct thread_info *curren
 	({							\
 		struct thread_info *ret;			\
 								\
-		ret = kmalloc(THREAD_SIZE, GFP_KERNEL);		\
+		ret = kmalloc(THREAD_SIZE, GFP_KERNEL_UBC);	\
 		if (ret)					\
 			memset(ret, 0, THREAD_SIZE);		\
 		ret;						\
 	})
 #else
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL_UBC)
 #endif
 
 #define free_thread_info(info)	kfree(info)
@@ -135,6 +137,7 @@ static inline struct thread_info *curren
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* restore singlestep on return to user mode */
 #define TIF_IRET		5	/* return with iret */
+#define TIF_DB7			6	/* has debug registers */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
@@ -142,6 +145,7 @@ static inline struct thread_info *curren
 #define TIF_DEBUG		17	/* uses debug registers */
 #define TIF_IO_BITMAP		18	/* uses I/O bitmap */
 #define TIF_FORCED_TF		19	/* true if TF in eflags artificially */
+#define TIF_FREEZE		20	/* Freeze request (atomic PF_FREEZE) */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -149,6 +153,7 @@ static inline struct thread_info *curren
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
 #define _TIF_IRET		(1<<TIF_IRET)
+#define _TIF_DB7		(1<<TIF_DB7)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
@@ -158,9 +163,9 @@ static inline struct thread_info *curren
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
-  (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP))
+  (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_DB7))
 /* work to do on any return to u-space */
-#define _TIF_ALLWORK_MASK	(0x0000FFFF & ~_TIF_SECCOMP)
+#define _TIF_ALLWORK_MASK	(0x0000FFFF & ~(_TIF_SECCOMP | _TIF_DB7))
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/tlbflush.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/tlbflush.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/tlbflush.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/tlbflush.h	2017-01-13 08:40:40.000000000 -0500
@@ -1,7 +1,6 @@
 #ifndef _I386_TLBFLUSH_H
 #define _I386_TLBFLUSH_H
 
-#include <linux/mm.h>
 #include <asm/processor.h>
 
 #define __flush_tlb()							\
@@ -63,6 +62,7 @@ extern unsigned long pgkern_mask;
 	} while (0)
 #endif
 
+#include <linux/mm.h>
 /*
  * TLB flushing:
  *
@@ -86,22 +86,28 @@ extern unsigned long pgkern_mask;
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
 	if (mm == current->active_mm)
 		__flush_tlb();
+#endif
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 	unsigned long addr)
 {
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
 	if (vma->vm_mm == current->active_mm)
 		__flush_tlb_one(addr);
+#endif
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 	unsigned long start, unsigned long end)
 {
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
 	if (vma->vm_mm == current->active_mm)
 		__flush_tlb();
+#endif
 }
 
 #else
@@ -112,11 +118,10 @@ static inline void flush_tlb_range(struc
 	__flush_tlb()
 
 extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
 extern void flush_tlb_mm(struct mm_struct *);
 extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
 
-#define flush_tlb()	flush_tlb_current_task()
+#define flush_tlb()	flush_tlb_all()
 
 static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
 {
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/tsc.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/tsc.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/tsc.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/tsc.h	2017-01-13 08:40:19.000000000 -0500
@@ -31,13 +31,17 @@ static inline cycles_t get_cycles(void)
 {
 	unsigned long long ret = 0;
 
-#ifndef CONFIG_X86_TSC
-	if (!cpu_has_tsc)
-		return 0;
-#endif
-
 #if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
 	rdtscll(ret);
+#elif defined(CONFIG_VE)
+	/*
+	 * get_cycles is used in the following calculations:
+	 * - VPS idle and iowait times in kernel/shced.h
+	 * - task's sleep time to be shown with SyRq-t
+	 * - kstat latencies in linux/vzstat.h
+	 * - sched latency via wakeup_stamp in linux/ve_task.h
+	 */
+#warning "some of VPS statistics won't be correct without get_cycles() (kstat_lat, ve_idle, etc)"
 #endif
 	return ret;
 }
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/uaccess.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/uaccess.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/uaccess.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/uaccess.h	2017-01-13 08:40:14.000000000 -0500
@@ -25,7 +25,7 @@
 
 
 #define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFFUL)
-#define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
+#define USER_DS		MAKE_MM_SEG(TASK_SIZE)
 
 #define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
@@ -126,6 +126,56 @@ extern void __get_user_4(void);
 		:"=a" (ret),"=d" (x) \
 		:"0" (ptr))
 
+extern int get_user_size(unsigned int size, void *val, const void *ptr);
+extern int put_user_size(unsigned int size, const void *val, void *ptr);
+extern int zero_user_size(unsigned int size, void *ptr);
+extern int copy_str_fromuser_size(unsigned int size, void *val, const void *ptr);
+extern int strlen_fromuser_size(unsigned int size, const void *ptr);
+
+
+/*
+ * GCC 2.96 has stupid bug which forces us to use volatile or barrier below.
+ * without volatile or barrier compiler generates ABSOLUTELY wrong code which
+ * igonores XXX_size function return code, but generates EFAULT :)))
+ * the bug was found in sys_utime()
+ */
+# define indirect_get_user(x,ptr)					\
+({	int __ret_gu,__val_gu;						\
+	__typeof__(ptr) __ptr_gu = (ptr);				\
+	__ret_gu = get_user_size(sizeof(*__ptr_gu), &__val_gu,__ptr_gu) ? -EFAULT : 0;\
+	barrier();							\
+	(x) = (__typeof__(*__ptr_gu))__val_gu;				\
+	__ret_gu;							\
+})
+#define indirect_put_user(x,ptr)					\
+({									\
+	int __ret_pu;							\
+	__typeof__(*(ptr)) *__ptr_pu = (ptr), __x_pu = (x);		\
+	__ret_pu = put_user_size(sizeof(*__ptr_pu),			\
+		&__x_pu, __ptr_pu) ? -EFAULT : 0;			\
+	barrier();							\
+	__ret_pu;							\
+})
+#define __indirect_put_user indirect_put_user
+#define __indirect_get_user indirect_get_user
+
+#define indirect_copy_from_user(to,from,n) get_user_size(n,to,from)
+#define indirect_copy_to_user(to,from,n) put_user_size(n,from,to)
+
+#define __indirect_copy_from_user indirect_copy_from_user
+#define __indirect_copy_to_user indirect_copy_to_user
+
+#define indirect_strncpy_from_user(dst, src, count) \
+		copy_str_fromuser_size(count, dst, src)
+
+extern int strlen_fromuser_size(unsigned int size, const void *ptr);
+#define indirect_strnlen_user(str, n) strlen_fromuser_size(n, str)
+#define indirect_strlen_user(str) indirect_strnlen_user(str, ~0UL >> 1)
+
+extern int zero_user_size(unsigned int size, void *ptr);
+
+#define indirect_clear_user(mem, len) zero_user_size(len, mem)
+#define __indirect_clear_user clear_user
 
 /* Careful: we have to cast the result to the type of the pointer for sign reasons */
 /**
@@ -145,7 +195,7 @@ extern void __get_user_4(void);
  * Returns zero on success, or -EFAULT on error.
  * On error, the variable @x is set to zero.
  */
-#define get_user(x,ptr)							\
+#define direct_get_user(x,ptr)						\
 ({	int __ret_gu;							\
 	unsigned long __val_gu;						\
 	__chk_user_ptr(ptr);						\
@@ -194,7 +244,7 @@ extern void __put_user_8(void);
  */
 #ifdef CONFIG_X86_WP_WORKS_OK
 
-#define put_user(x,ptr)						\
+#define direct_put_user(x,ptr)					\
 ({	int __ret_pu;						\
 	__typeof__(*(ptr)) __pu_val;				\
 	__chk_user_ptr(ptr);					\
@@ -210,7 +260,7 @@ extern void __put_user_8(void);
 })
 
 #else
-#define put_user(x,ptr)						\
+#define direct_put_user(x,ptr)					\
 ({								\
  	int __ret_pu;						\
 	__typeof__(*(ptr)) __pus_tmp = x;			\
@@ -244,7 +294,7 @@ extern void __put_user_8(void);
  * Returns zero on success, or -EFAULT on error.
  * On error, the variable @x is set to zero.
  */
-#define __get_user(x,ptr) \
+#define __direct_get_user(x,ptr) \
   __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
 
 
@@ -267,7 +317,7 @@ extern void __put_user_8(void);
  *
  * Returns zero on success, or -EFAULT on error.
  */
-#define __put_user(x,ptr) \
+#define __direct_put_user(x,ptr) \
   __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
 
 #define __put_user_nocheck(x,ptr,size)				\
@@ -419,7 +469,7 @@ unsigned long __must_check __copy_from_u
  * On success, this will be zero.
  */
 static __always_inline unsigned long __must_check
-__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
+__direct_copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 {
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
@@ -440,10 +490,32 @@ __copy_to_user_inatomic(void __user *to,
 }
 
 static __always_inline unsigned long __must_check
-__copy_to_user(void __user *to, const void *from, unsigned long n)
+__direct_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	might_sleep();
+	return __direct_copy_to_user_inatomic(to, from, n);
+}
+
+/**
+ * copy_to_user: - Copy a block of data into user space.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from kernel space to user space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+static inline unsigned long
+direct_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-       might_sleep();
-       return __copy_to_user_inatomic(to, from, n);
+	might_sleep();
+	if (access_ok(VERIFY_WRITE, to, n))
+		n = __direct_copy_to_user_inatomic(to, from, n);
+	return n;
 }
 
 /**
@@ -469,7 +541,7 @@ __copy_to_user(void __user *to, const vo
  * for explanation of why this is needed.
  */
 static __always_inline unsigned long
-__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
+__direct_copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 {
 	/* Avoid zeroing the tail if the copy fails..
 	 * If 'n' is constant and 1, 2, or 4, we do still zero on a failure,
@@ -494,7 +566,7 @@ __copy_from_user_inatomic(void *to, cons
 	return __copy_from_user_ll_nozero(to, from, n);
 }
 static __always_inline unsigned long
-__copy_from_user(void *to, const void __user *from, unsigned long n)
+__direct_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	might_sleep();
 	if (__builtin_constant_p(n)) {
@@ -515,6 +587,35 @@ __copy_from_user(void *to, const void __
 	return __copy_from_user_ll(to, from, n);
 }
 
+/**
+ * copy_from_user: - Copy a block of data from user space.
+ * @to:   Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from user space to kernel space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+static inline unsigned long
+direct_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	might_sleep();
+	if (access_ok(VERIFY_READ, from, n))
+		n = __direct_copy_from_user_inatomic(to, from, n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+
+
+#ifndef CONFIG_X86_UACCESS_INDIRECT
 #define ARCH_HAS_NOCACHE_UACCESS
 
 static __always_inline unsigned long __copy_from_user_nocache(void *to,
@@ -544,15 +645,7 @@ __copy_from_user_inatomic_nocache(void *
 {
        return __copy_from_user_ll_nocache_nozero(to, from, n);
 }
-
-unsigned long __must_check copy_to_user(void __user *to,
-				const void *from, unsigned long n);
-unsigned long __must_check copy_from_user(void *to,
-				const void __user *from, unsigned long n);
-long __must_check strncpy_from_user(char *dst, const char __user *src,
-				long count);
-long __must_check __strncpy_from_user(char *dst,
-				const char __user *src, long count);
+#endif
 
 /**
  * strlen_user: - Get the size of a string in user space.
@@ -568,10 +661,74 @@ long __must_check __strncpy_from_user(ch
  * If there is a limit on the length of a valid string, you may wish to
  * consider using strnlen_user() instead.
  */
-#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
 
-long strnlen_user(const char __user *str, long n);
-unsigned long __must_check clear_user(void __user *mem, unsigned long len);
-unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
+long direct_strncpy_from_user(char *dst, const char *src, long count);
+long __direct_strncpy_from_user(char *dst, const char *src, long count);
+#define direct_strlen_user(str) direct_strnlen_user(str, ~0UL >> 1)
+long direct_strnlen_user(const char *str, long n);
+unsigned long direct_clear_user(void *mem, unsigned long len);
+unsigned long __direct_clear_user(void *mem, unsigned long len);
+
+extern int indirect_uaccess;
+
+#ifdef CONFIG_X86_UACCESS_INDIRECT
+
+/*
+ * Return code and zeroing semantics:
+
+ __clear_user          0                      <-> bytes not done
+ clear_user            0                      <-> bytes not done
+ __copy_to_user        0                      <-> bytes not done
+ copy_to_user          0                      <-> bytes not done
+ __copy_from_user      0                      <-> bytes not done, zero rest
+ copy_from_user        0                      <-> bytes not done, zero rest
+ __get_user            0                      <-> -EFAULT
+ get_user              0                      <-> -EFAULT
+ __put_user            0                      <-> -EFAULT
+ put_user              0                      <-> -EFAULT
+ strlen_user           strlen + 1             <-> 0
+ strnlen_user          strlen + 1 (or n+1)    <-> 0
+ strncpy_from_user     strlen (or n)          <-> -EFAULT
+
+ */
+
+#define __clear_user(mem,len) __indirect_clear_user(mem,len)
+#define clear_user(mem,len) indirect_clear_user(mem,len)
+#define __copy_to_user(to,from,n) __indirect_copy_to_user(to,from,n)
+#define __copy_to_user_inatomic(to,from,n) __indirect_copy_to_user(to,from,n)
+#define copy_to_user(to,from,n) indirect_copy_to_user(to,from,n)
+#define __copy_from_user(to,from,n) __indirect_copy_from_user(to,from,n)
+#define __copy_from_user_inatomic(to,from,n) __indirect_copy_from_user(to,from,n)
+#define copy_from_user(to,from,n) indirect_copy_from_user(to,from,n)
+#define __get_user(val,ptr) __indirect_get_user(val,ptr)
+#define get_user(val,ptr) indirect_get_user(val,ptr)
+#define __put_user(val,ptr) __indirect_put_user(val,ptr)
+#define put_user(val,ptr) indirect_put_user(val,ptr)
+#define strlen_user(str) indirect_strlen_user(str)
+#define strnlen_user(src,count) indirect_strnlen_user(src,count)
+#define strncpy_from_user(dst,src,count) \
+			indirect_strncpy_from_user(dst,src,count)
+
+#else
+
+#define __clear_user __direct_clear_user
+#define clear_user direct_clear_user
+#define __copy_to_user __direct_copy_to_user_inatomic
+#define __copy_to_user_inatomic __direct_copy_to_user_inatomic
+#define __direct_copy_to_user __direct_copy_to_user_inatomic
+#define copy_to_user direct_copy_to_user
+#define __copy_from_user __direct_copy_from_user_inatomic
+#define __copy_from_user_inatomic __direct_copy_from_user_inatomic
+#define __direct_copy_from_user __direct_copy_from_user_inatomic
+#define copy_from_user direct_copy_from_user
+#define __get_user __direct_get_user
+#define get_user direct_get_user
+#define __put_user __direct_put_user
+#define put_user direct_put_user
+#define strlen_user direct_strlen_user
+#define strnlen_user direct_strnlen_user
+#define strncpy_from_user direct_strncpy_from_user
+
+#endif /* CONFIG_X86_UACCESS_INDIRECT */
 
 #endif /* __i386_UACCESS_H */
diff -upr kernel-2.6.18-417.el5.orig/include/asm-i386/unistd.h kernel-2.6.18-417.el5-028stab121/include/asm-i386/unistd.h
--- kernel-2.6.18-417.el5.orig/include/asm-i386/unistd.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-i386/unistd.h	2017-01-13 08:40:40.000000000 -0500
@@ -324,30 +324,43 @@
 #define __NR_vmsplice		316
 #define __NR_move_pages		317
 #define __NR_getcpu		318
-/* #define __NR_epoll_pwait	319 */
-/* #define __NR_utimensat	320 */
-/* #define __NR_signalfd	321 */
+#define __NR_epoll_pwait	319
+#define __NR_utimensat		320
+#define __NR_signalfd		321
 /* #define __NR_timerfd_create	322 */
 #define __NR_eventfd		323
 #define __NR_fallocate		324
 /* #define __NR_timerfd_settime	325 */
 /* #define __NR_timerfd_gettime	326 */
-/* #define __NR_signalfd4	327 */
+#define __NR_signalfd4		327
 /* #define __NR_eventfd2	328 */
-/* #define __NR_epoll_create1	329 */
-/* #define __NR_dup3		330 */
-/* #define __NR_pipe2		331 */
-/* #define __NR_inotify_init1	332 */
-/* #define __NR_preadv		333 */
-/* #define __NR_pwritev		334 */
+#define __NR_epoll_create1	329
+#define __NR_dup3		330
+#define __NR_pipe2		331
+#define __NR_inotify_init1	332
+#define __NR_preadv		333
+#define __NR_pwritev		334
 /* #define __NR_rt_tgsigqueueinfo	335 */
 /* #define __NR_perf_event_open	336 */
 #define __NR_recvmmsg		337
 #define __NR_prlimit64		338
+#define __NR_fairsched_mknod	500     /* FairScheduler syscalls */
+#define __NR_fairsched_rmnod	501
+#define __NR_fairsched_chwt	502
+#define __NR_fairsched_mvpr	503
+#define __NR_fairsched_rate	504
+#define __NR_fairsched_vcpus	505
+#define __NR_fairsched_cpumask	506
+#define __NR_getluid		510
+#define __NR_setluid		511
+#define __NR_setublimit		512
+#define __NR_ubstat		513
+#define __NR_lchmod		516
+#define __NR_lutime		517
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 339
+#define NR_syscalls 518
 
 #ifndef __KERNEL_SYSCALLS_NO_ERRNO__
 /*
diff -upr kernel-2.6.18-417.el5.orig/include/asm-ia64/mman.h kernel-2.6.18-417.el5-028stab121/include/asm-ia64/mman.h
--- kernel-2.6.18-417.el5.orig/include/asm-ia64/mman.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-ia64/mman.h	2017-01-13 08:40:16.000000000 -0500
@@ -18,6 +18,7 @@
 #define MAP_NORESERVE	0x04000		/* don't check for reservations */
 #define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO	0x40000		/* soft ubc charge */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff -upr kernel-2.6.18-417.el5.orig/include/asm-ia64/pgalloc.h kernel-2.6.18-417.el5-028stab121/include/asm-ia64/pgalloc.h
--- kernel-2.6.18-417.el5.orig/include/asm-ia64/pgalloc.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-ia64/pgalloc.h	2017-01-13 08:40:17.000000000 -0500
@@ -23,6 +23,8 @@
 #include <linux/page-flags.h>
 #include <linux/threads.h>
 
+#include <ub/ub_mem.h>
+
 #include <asm/mmu_context.h>
 
 DECLARE_PER_CPU(unsigned long *, __pgtable_quicklist);
@@ -41,7 +43,7 @@ static inline long pgtable_quicklist_tot
 	return ql_size;
 }
 
-static inline void *pgtable_quicklist_alloc(void)
+static inline void *pgtable_quicklist_alloc(int charge)
 {
 	unsigned long *ret = NULL;
 
@@ -49,13 +51,21 @@ static inline void *pgtable_quicklist_al
 
 	ret = pgtable_quicklist;
 	if (likely(ret != NULL)) {
+		if (ub_page_charge(virt_to_page(ret), 0, 
+					charge ? __GFP_UBC|__GFP_SOFT_UBC : 0)) {
+			ret = NULL;
+			goto out;
+		}
+
 		pgtable_quicklist = (unsigned long *)(*ret);
 		ret[0] = 0;
 		--pgtable_quicklist_size;
+out:
 		preempt_enable();
 	} else {
 		preempt_enable();
-		ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+		ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO |
+				(charge ? __GFP_UBC | __GFP_SOFT_UBC : 0));
 	}
 
 	return ret;
@@ -73,6 +83,7 @@ static inline void pgtable_quicklist_fre
 #endif
 
 	preempt_disable();
+	ub_page_uncharge(virt_to_page(pgtable_entry), 0);
 	*(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist;
 	pgtable_quicklist = (unsigned long *)pgtable_entry;
 	++pgtable_quicklist_size;
@@ -81,7 +92,7 @@ static inline void pgtable_quicklist_fre
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(1);
 }
 
 static inline void pgd_free(pgd_t * pgd)
@@ -98,7 +109,7 @@ pgd_populate(struct mm_struct *mm, pgd_t
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(1);
 }
 
 static inline void pud_free(pud_t * pud)
@@ -116,7 +127,7 @@ pud_populate(struct mm_struct *mm, pud_t
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(1);
 }
 
 static inline void pmd_free(pmd_t * pmd)
@@ -145,13 +156,14 @@ pmd_populate_kernel(struct mm_struct *mm
 static inline struct page *pte_alloc_one(struct mm_struct *mm,
 					 unsigned long addr)
 {
-	return virt_to_page(pgtable_quicklist_alloc());
+	void * pg = pgtable_quicklist_alloc(1);
+	return pg ? virt_to_page(pg) : NULL;
 }
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long addr)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(0);
 }
 
 static inline void pte_free(struct page *pte)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-ia64/processor.h kernel-2.6.18-417.el5-028stab121/include/asm-ia64/processor.h
--- kernel-2.6.18-417.el5.orig/include/asm-ia64/processor.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-ia64/processor.h	2017-01-13 08:40:19.000000000 -0500
@@ -306,7 +306,7 @@ struct thread_struct {
 	regs->loadrs = 0;									\
 	regs->r8 = current->mm->dumpable;	/* set "don't zap registers" flag */		\
 	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
-	if (unlikely(current->mm->dumpable != SUID_DUMP_USER)) {				\
+	if (unlikely(current->mm->dumpable != SUID_DUMP_USER || !current->mm->vps_dumpable)) {	\
 		/*										\
 		 * Zap scratch regs to avoid leaking bits between processes with different	\
 		 * uid/privileges.								\
diff -upr kernel-2.6.18-417.el5.orig/include/asm-ia64/system.h kernel-2.6.18-417.el5-028stab121/include/asm-ia64/system.h
--- kernel-2.6.18-417.el5.orig/include/asm-ia64/system.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-ia64/system.h	2017-01-13 08:40:16.000000000 -0500
@@ -103,6 +103,7 @@ extern struct ia64_boot_param {
  * Linus just yet.  Grrr...
  */
 #define set_mb(var, value)	do { (var) = (value); mb(); } while (0)
+#define set_wmb(var, value)     do { (var) = (value); mb(); } while (0)
 
 #define safe_halt()         ia64_pal_halt_light()    /* PAL_HALT_LIGHT */
 
diff -upr kernel-2.6.18-417.el5.orig/include/asm-ia64/thread_info.h kernel-2.6.18-417.el5-028stab121/include/asm-ia64/thread_info.h
--- kernel-2.6.18-417.el5.orig/include/asm-ia64/thread_info.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-ia64/thread_info.h	2017-01-13 08:40:16.000000000 -0500
@@ -90,6 +90,7 @@ struct thread_info {
 #define TIF_MEMDIE		17
 #define TIF_MCA_INIT		18	/* this task is processing MCA or INIT */
 #define TIF_DB_DISABLED		19	/* debug trap disabled for fsyscall */
+#define TIF_FREEZE		20	/* Freeze request, atomic version of PF_FREEZE */
 #define TIF_RESTORE_RSE		21	/* user RBS is newer than kernel RBS */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-ia64/timex.h kernel-2.6.18-417.el5-028stab121/include/asm-ia64/timex.h
--- kernel-2.6.18-417.el5.orig/include/asm-ia64/timex.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-ia64/timex.h	2017-01-13 08:40:16.000000000 -0500
@@ -10,6 +10,7 @@
  *			Also removed cacheflush_time as it's entirely unused.
  */
 
+#ifdef __KERNEL__
 #include <asm/intrinsics.h>
 #include <asm/processor.h>
 
@@ -39,4 +40,8 @@ get_cycles (void)
 	return ret;
 }
 
+extern unsigned int cpu_khz;
+
+#endif
+
 #endif /* _ASM_IA64_TIMEX_H */
diff -upr kernel-2.6.18-417.el5.orig/include/asm-ia64/unistd.h kernel-2.6.18-417.el5-028stab121/include/asm-ia64/unistd.h
--- kernel-2.6.18-417.el5.orig/include/asm-ia64/unistd.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-ia64/unistd.h	2017-01-13 08:40:16.000000000 -0500
@@ -312,11 +312,13 @@
 /* #define __NR_pwritev			1320 */
 /* #define __NR_rt_tgsigqueueinfo	1321 */
 #define __NR_recvmmsg			1322
+#define __NR_lchmod			1509
+#define __NR_lutime			1510
 
 #ifdef __KERNEL__
 
 
-#define NR_syscalls			299 /* length of syscall table */
+#define NR_syscalls			487 /* length of syscall table */
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
diff -upr kernel-2.6.18-417.el5.orig/include/asm-powerpc/current.h kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/current.h
--- kernel-2.6.18-417.el5.orig/include/asm-powerpc/current.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/current.h	2017-01-13 08:40:15.000000000 -0500
@@ -13,6 +13,7 @@ struct task_struct;
 
 #ifdef __powerpc64__
 #include <asm/paca.h>
+#include <linux/stddef.h>
 
 #define current		(get_paca()->__current)
 
diff -upr kernel-2.6.18-417.el5.orig/include/asm-powerpc/elf.h kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/elf.h
--- kernel-2.6.18-417.el5.orig/include/asm-powerpc/elf.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/elf.h	2017-01-13 08:40:26.000000000 -0500
@@ -275,7 +275,7 @@ extern int ucache_bsize;
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
 struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
-				       int uses_interp);
+				int uses_interp, unsigned long map_address);
 #define VDSO_AUX_ENT(a,b) NEW_AUX_ENT(a,b);
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/include/asm-powerpc/mman.h kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/mman.h
--- kernel-2.6.18-417.el5.orig/include/asm-powerpc/mman.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/mman.h	2017-01-13 08:40:17.000000000 -0500
@@ -23,5 +23,6 @@
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO   0x20000         /* do soft ubc charge */
 
 #endif	/* _ASM_POWERPC_MMAN_H */
diff -upr kernel-2.6.18-417.el5.orig/include/asm-powerpc/pgalloc.h kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/pgalloc.h
--- kernel-2.6.18-417.el5.orig/include/asm-powerpc/pgalloc.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/pgalloc.h	2017-01-13 08:40:18.000000000 -0500
@@ -40,7 +40,8 @@ extern kmem_cache_t *pgtable_cache[];
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
+	return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM],
+			GFP_KERNEL_UBC | __GFP_SOFT_UBC);
 }
 
 static inline void pgd_free(pgd_t *pgd)
@@ -56,7 +57,7 @@ static inline void pgd_free(pgd_t *pgd)
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
-				GFP_KERNEL|__GFP_REPEAT);
+				GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
 }
 
 static inline void pud_free(pud_t *pud)
@@ -92,7 +93,7 @@ static inline void pmd_populate_kernel(s
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
-				GFP_KERNEL|__GFP_REPEAT);
+				GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
 }
 
 static inline void pmd_free(pmd_t *pmd)
@@ -100,17 +101,27 @@ static inline void pmd_free(pmd_t *pmd)
 	kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
 }
 
+static inline pte_t *do_pte_alloc(gfp_t flags)
+{
+	return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], flags);
+}
+
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
-				GFP_KERNEL|__GFP_REPEAT);
+	return do_pte_alloc(GFP_KERNEL | __GFP_REPEAT);
 }
 
 static inline struct page *pte_alloc_one(struct mm_struct *mm,
 					 unsigned long address)
 {
-	return virt_to_page(pte_alloc_one_kernel(mm, address));
+	pte_t *pte;
+
+	pte = do_pte_alloc(GFP_KERNEL_UBC | __GFP_SOFT_UBC);
+	if (pte == NULL)
+		return NULL;
+	else
+		return virt_to_page(pte);
 }
 		
 static inline void pte_free_kernel(pte_t *pte)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-powerpc/systbl.h kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/systbl.h
--- kernel-2.6.18-417.el5.orig/include/asm-powerpc/systbl.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/systbl.h	2017-01-13 08:40:40.000000000 -0500
@@ -313,4 +313,21 @@ SYSCALL(ni_syscall)
 SYSCALL_SPU(eventfd)
 SYSCALL(ni_syscall)
 COMPAT_SYS(fallocate)
-SYSCALL(subpage_prot)
+SYSCALL(subpage_prot) /* 310 */
+SYS_SKIP(311, 400)
+SYSCALL(ni_syscall)
+SYS_SKIP_END()
+SYSCALL(fairsched_mknod) /* 400 */
+SYSCALL(fairsched_rmnod)
+SYSCALL(fairsched_chwt)
+SYSCALL(fairsched_mvpr)
+SYSCALL(fairsched_rate)
+SYSCALL(fairsched_vcpus)
+SYS_SKIP(406, 410)
+SYSCALL(ni_syscall)
+SYS_SKIP_END()
+SYSCALL(getluid) /* 410 */
+SYSCALL(setluid)
+SYSCALL(setublimit)
+SYSCALL(ubstat)
+COMPAT_SYS_SPU(utimensat)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-powerpc/thread_info.h kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/thread_info.h
--- kernel-2.6.18-417.el5.orig/include/asm-powerpc/thread_info.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/thread_info.h	2017-01-13 08:40:16.000000000 -0500
@@ -127,6 +127,8 @@ static inline struct thread_info *curren
 #define TIF_RESTOREALL		12	/* Restore all regs (implies NOERROR) */
 #define TIF_NOERROR		14	/* Force successful syscall return */
 #define TIF_RESTORE_SIGMASK	15	/* Restore signal mask in do_signal */
+#define TIF_FREEZE		16	/* Freeze request, atomic version
+					   of PF_FREEZE */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-powerpc/unistd.h kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/unistd.h
--- kernel-2.6.18-417.el5.orig/include/asm-powerpc/unistd.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-powerpc/unistd.h	2017-01-13 08:40:40.000000000 -0500
@@ -335,9 +335,23 @@
 
 #define __NR_subpage_prot	310
 
+#define __NR_utimensat		304
+
+#define __NR_fairsched_mknod	400
+#define __NR_fairsched_rmnod	401
+#define __NR_fairsched_chwt	402
+#define __NR_fairsched_mvpr	403
+#define __NR_fairsched_rate	404
+#define __NR_fairsched_vcpus	405
+
+#define __NR_getluid            410
+#define __NR_setluid            411
+#define __NR_setublimit         412
+#define __NR_ubstat             413
+
 #ifdef __KERNEL__
 
-#define __NR_syscalls		311
+#define __NR_syscalls		414
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff -upr kernel-2.6.18-417.el5.orig/include/asm-s390/pgalloc.h kernel-2.6.18-417.el5-028stab121/include/asm-s390/pgalloc.h
--- kernel-2.6.18-417.el5.orig/include/asm-s390/pgalloc.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-s390/pgalloc.h	2017-01-13 08:40:16.000000000 -0500
@@ -43,7 +43,8 @@ extern void diag10(unsigned long addr);
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	pgd_t *pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ALLOC_ORDER);
+	pgd_t *pgd = (pgd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC,
+						PGD_ALLOC_ORDER);
 	int i;
 
 	if (!pgd)
@@ -74,7 +75,8 @@ static inline void pgd_free(pgd_t *pgd)
 #else /* __s390x__ */
 static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 {
-	pmd_t *pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ALLOC_ORDER);
+	pmd_t *pmd = (pmd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC,
+						PMD_ALLOC_ORDER);
 	int i;
 
 	if (!pmd)
@@ -116,13 +118,10 @@ pmd_populate(struct mm_struct *mm, pmd_t
 	pmd_populate_kernel(mm, pmd, (pte_t *)((page-mem_map) << PAGE_SHIFT));
 }
 
-/*
- * page table entry allocation/free routines.
- */
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
+static inline pte_t *pte_alloc(struct mm_struct *mm, unsigned long vmaddr,
+		gfp_t mask)
 {
-	pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pte_t *pte = (pte_t *) __get_free_page(mask);
 	int i;
 
 	if (!pte)
@@ -134,10 +133,20 @@ pte_alloc_one_kernel(struct mm_struct *m
 	return pte;
 }
 
+/*
+ * page table entry allocation/free routines.
+ */
+static inline pte_t *
+pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
+{
+	return pte_alloc(mm, vmaddr, GFP_KERNEL | __GFP_REPEAT);
+}
+
 static inline struct page *
 pte_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, vmaddr);
+	pte_t *pte = pte_alloc(mm, vmaddr, GFP_KERNEL_UBC | __GFP_SOFT_UBC |
+			__GFP_REPEAT);
 	if (pte)
 		return virt_to_page(pte);
 	return NULL;
diff -upr kernel-2.6.18-417.el5.orig/include/asm-sh/bugs.h kernel-2.6.18-417.el5-028stab121/include/asm-sh/bugs.h
--- kernel-2.6.18-417.el5.orig/include/asm-sh/bugs.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-sh/bugs.h	2017-01-13 08:40:15.000000000 -0500
@@ -18,7 +18,7 @@ static void __init check_bugs(void)
 {
 	extern char *get_cpu_subtype(void);
 	extern unsigned long loops_per_jiffy;
-	char *p= &system_utsname.machine[2]; /* "sh" */
+	char *p= &init_utsname()->machine[2]; /* "sh" */
 
 	cpu_data->loops_per_jiffy = loops_per_jiffy;
 
diff -upr kernel-2.6.18-417.el5.orig/include/asm-sparc/unistd.h kernel-2.6.18-417.el5-028stab121/include/asm-sparc/unistd.h
--- kernel-2.6.18-417.el5.orig/include/asm-sparc/unistd.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-sparc/unistd.h	2017-01-13 08:40:40.000000000 -0500
@@ -318,8 +318,9 @@
 #define __NR_unshare		299
 #define __NR_set_robust_list	300
 #define __NR_get_robust_list	301
+#define __NR_utimensat		310
 
-#define NR_SYSCALLS		302
+#define NR_SYSCALLS		311
 
 #ifdef __KERNEL__
 /* WARNING: You MAY NOT add syscall numbers larger than 301, since
diff -upr kernel-2.6.18-417.el5.orig/include/asm-sparc64/mman.h kernel-2.6.18-417.el5-028stab121/include/asm-sparc64/mman.h
--- kernel-2.6.18-417.el5.orig/include/asm-sparc64/mman.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-sparc64/mman.h	2017-01-13 08:40:17.000000000 -0500
@@ -21,6 +21,7 @@
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO	0x20000		/* do soft ubc charge */
 
 /* XXX Need to add flags to SunOS's mctl, mlockall, and madvise system
  * XXX calls.
diff -upr kernel-2.6.18-417.el5.orig/include/asm-sparc64/pgalloc.h kernel-2.6.18-417.el5-028stab121/include/asm-sparc64/pgalloc.h
--- kernel-2.6.18-417.el5.orig/include/asm-sparc64/pgalloc.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-sparc64/pgalloc.h	2017-01-13 08:40:17.000000000 -0500
@@ -17,7 +17,7 @@ extern kmem_cache_t *pgtable_cache;
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
+	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL_UBC);
 }
 
 static inline void pgd_free(pgd_t *pgd)
@@ -30,7 +30,7 @@ static inline void pgd_free(pgd_t *pgd)
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	return kmem_cache_alloc(pgtable_cache,
-				GFP_KERNEL|__GFP_REPEAT);
+				GFP_KERNEL_UBC|__GFP_REPEAT);
 }
 
 static inline void pmd_free(pmd_t *pmd)
@@ -48,7 +48,8 @@ static inline pte_t *pte_alloc_one_kerne
 static inline struct page *pte_alloc_one(struct mm_struct *mm,
 					 unsigned long address)
 {
-	return virt_to_page(pte_alloc_one_kernel(mm, address));
+	return virt_to_page(kmem_cache_alloc(pgtable_cache,
+                                GFP_KERNEL_UBC|__GFP_REPEAT));
 }
 		
 static inline void pte_free_kernel(pte_t *pte)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-sparc64/thread_info.h kernel-2.6.18-417.el5-028stab121/include/asm-sparc64/thread_info.h
--- kernel-2.6.18-417.el5.orig/include/asm-sparc64/thread_info.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-sparc64/thread_info.h	2017-01-13 08:40:24.000000000 -0500
@@ -162,14 +162,14 @@ register struct thread_info *current_thr
 	struct thread_info *ret;				\
 								\
 	ret = (struct thread_info *)				\
-	  __get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER);	\
+	  __get_free_pages(GFP_KERNEL_UBC, __THREAD_INFO_ORDER);\
 	if (ret)						\
 		memset(ret, 0, PAGE_SIZE<<__THREAD_INFO_ORDER);	\
 	ret;							\
 })
 #else
 #define alloc_thread_info(tsk) \
-	((struct thread_info *)__get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER))
+	((struct thread_info *)__get_free_pages(GFP_KERNEL_UBC, __THREAD_INFO_ORDER))
 #endif
 
 #define free_thread_info(ti) \
@@ -236,6 +236,7 @@ register struct thread_info *current_thr
 #define TIF_ABI_PENDING		12
 #define TIF_MEMDIE		13
 #define TIF_POLLING_NRFLAG	14
+#define TIF_FREEZE		15	/* Freeze request (atomic PF_FREEZE) */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-sparc64/unistd.h kernel-2.6.18-417.el5-028stab121/include/asm-sparc64/unistd.h
--- kernel-2.6.18-417.el5.orig/include/asm-sparc64/unistd.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-sparc64/unistd.h	2017-01-13 08:40:40.000000000 -0500
@@ -320,8 +320,18 @@
 #define __NR_unshare		299
 #define __NR_set_robust_list	300
 #define __NR_get_robust_list	301
+#define __NR_utimensat		310
+#define __NR_fairsched_mknod	500     /* FairScheduler syscalls */
+#define __NR_fairsched_rmnod	501
+#define __NR_fairsched_chwt	502
+#define __NR_fairsched_mvpr	503
+#define __NR_fairsched_rate	504
+#define __NR_getluid		510
+#define __NR_setluid		511
+#define __NR_setublimit		512
+#define __NR_ubstat		513
 
-#define NR_SYSCALLS		302
+#define NR_SYSCALLS		514
 
 #ifdef __KERNEL__
 /* WARNING: You MAY NOT add syscall numbers larger than 301, since
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/cacheflush.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/cacheflush.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/cacheflush.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/cacheflush.h	2017-01-13 08:40:40.000000000 -0500
@@ -1,9 +1,6 @@
 #ifndef _X8664_CACHEFLUSH_H
 #define _X8664_CACHEFLUSH_H
 
-/* Keep includes the same across arches.  */
-#include <linux/mm.h>
-
 /* Caches aren't brain-dead on the intel. */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
@@ -18,6 +15,9 @@
 #define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
+/* Keep includes the same across arches.  */
+#include <linux/mm.h>
+
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
 	memcpy(dst, src, len)
 #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/compat.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/compat.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/compat.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/compat.h	2017-01-13 08:40:40.000000000 -0500
@@ -33,8 +33,10 @@ typedef s32		compat_key_t;
 
 typedef s32		compat_int_t;
 typedef s32		compat_long_t;
+typedef s64 __attribute__((aligned(4))) compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
+typedef u64 __attribute__((aligned(4))) compat_u64;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/elf.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/elf.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/elf.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/elf.h	2017-01-13 08:40:40.000000000 -0500
@@ -166,9 +166,9 @@ extern int dump_task_fpu (struct task_st
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
-				       int uses_interp);
+				       int uses_interp, unsigned long map_add);
 
-extern unsigned int vdso_enabled;
+extern int vdso_enabled;
 
 #define ARCH_DLINFO                                            \
 do if (vdso_enabled) {                                         \
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/ia32.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/ia32.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/ia32.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/ia32.h	2017-01-13 08:40:26.000000000 -0500
@@ -149,7 +149,7 @@ struct rt_sigframe32
         struct _fpstate_ia32 fpstate;
 };
 
-#define IA32_STACK_TOP IA32_PAGE_OFFSET
+#define IA32_STACK_TOP (IA32_PAGE_OFFSET - PAGE_SIZE * 2)
 
 #ifdef __KERNEL__
 struct user_desc;
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/mach-xen/asm/msr.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/mach-xen/asm/msr.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/mach-xen/asm/msr.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/mach-xen/asm/msr.h	2017-01-13 08:40:40.000000000 -0500
@@ -1,6 +1,8 @@
 #ifndef X86_64_MSR_H
 #define X86_64_MSR_H 1
 
+#include <xen/interface/arch-x86_64.h>
+
 #ifndef __ASSEMBLY__
 /*
  * Access to machine-specific registers (available on 586 and better only)
@@ -152,6 +154,33 @@ static inline unsigned int cpuid_edx(uns
 #define MSR_IA32_UCODE_WRITE		0x79
 #define MSR_IA32_UCODE_REV		0x8b
 
+#ifdef CONFIG_SMP
+void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
+void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
+
+void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op);
+#else
+static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+	rdmsr(msr_no, *l, *h);
+}
+
+static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	wrmsr(msr_no, l, h);
+}
+
+static inline void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+	cpuid(op, eax, ebx, ecx, edx);
+}
+
+static inline u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op)
+{
+	return cpuid_eax(op);
+}
+#endif
 
 #endif
 
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/mach-xen/asm/pgalloc.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/mach-xen/asm/pgalloc.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/mach-xen/asm/pgalloc.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/mach-xen/asm/pgalloc.h	2017-01-13 08:40:40.000000000 -0500
@@ -87,14 +87,12 @@ static inline void pmd_free(pmd_t *pmd)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-        pmd_t *pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
-        return pmd;
+	return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
 }
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-        pud_t *pud = (pud_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
-        return pud;
+	return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
 }
 
 static inline void pud_free(pud_t *pud)
@@ -142,8 +140,8 @@ static inline pgd_t *pgd_alloc(struct mm
          * We allocate two contiguous pages for kernel and user.
          */
         unsigned boundary;
-	pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
-
+	pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC|__GFP_REPEAT|
+			__GFP_SOFT_UBC, 1);
 	if (!pgd)
 		return NULL;
 	pgd_list_add(pgd);
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/mach-xen/asm/processor.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/mach-xen/asm/processor.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/mach-xen/asm/processor.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/mach-xen/asm/processor.h	2017-01-13 08:40:40.000000000 -0500
@@ -192,7 +192,7 @@ static inline void clear_in_cr4 (unsigne
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
+#define IA32_PAGE_OFFSET 0xc0000000
 
 #define TASK_SIZE 		(test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
 #define TASK_SIZE_OF(child) 	((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/mach-xen/asm/system.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/mach-xen/asm/system.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/mach-xen/asm/system.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/mach-xen/asm/system.h	2017-01-13 08:40:40.000000000 -0500
@@ -249,6 +249,7 @@ static inline unsigned long __cmpxchg(vo
 #endif
 #define read_barrier_depends()	do {} while(0)
 #define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
 
 #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
 
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/mman.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/mman.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/mman.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/mman.h	2017-01-13 08:40:16.000000000 -0500
@@ -13,6 +13,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 #define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_EXECPRIO	0x40000		/* soft ubc charge */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/msr.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/msr.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/msr.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/msr.h	2017-01-13 08:40:15.000000000 -0500
@@ -13,6 +13,20 @@
 			    : "=a" (val1), "=d" (val2) \
 			    : "c" (msr))
 
+#ifdef CONFIG_SMP
+void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op);
+#else
+static inline void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+	cpuid(op, eax, ebx, ecx, edx);
+}
+
+static inline u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op)
+{
+	return cpuid_eax(op);
+}
+#endif
 
 #define rdmsrl(msr,val) do { unsigned long a__,b__; \
        __asm__ __volatile__("rdmsr" \
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/pda.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/pda.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/pda.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/pda.h	2017-01-13 08:40:16.000000000 -0500
@@ -22,6 +22,7 @@ struct x8664_pda {
 	int nodenumber;		    /* number of current node */
 	unsigned int __softirq_pending;
 	unsigned int __nmi_count;	/* number of NMI on this CPUs */
+	unsigned int __reschedule_count;
 	int mmu_state;     
 	struct mm_struct *active_mm;
 	unsigned apic_timer_irqs;
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/pgalloc.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/pgalloc.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/pgalloc.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/pgalloc.h	2017-01-13 08:40:19.000000000 -0500
@@ -81,7 +81,8 @@ static inline void pgd_list_del(pgd_t *p
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	unsigned boundary;
-	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL_UBC|__GFP_REPEAT|
+			__GFP_SOFT_UBC);
 	if (!pgd)
 		return NULL;
 	pgd_list_add(pgd);
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/processor.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/processor.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/processor.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/processor.h	2017-01-13 08:40:24.000000000 -0500
@@ -113,6 +113,7 @@ extern void identify_cpu(struct cpuinfo_
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern void early_init_intel(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;
 
 /*
@@ -190,7 +191,7 @@ static inline void clear_in_cr4 (unsigne
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
+#define IA32_PAGE_OFFSET 0xc0000000
 
 #define TASK_SIZE 		(test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
 #define TASK_SIZE_OF(child) 	((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/segment.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/segment.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/segment.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/segment.h	2017-01-13 08:40:24.000000000 -0500
@@ -3,10 +3,11 @@
 
 #include <asm/cache.h>
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
-
-#define __KERNEL32_CS   0x08
+#define GDT_ENTRY_BOOT_CS		2
+#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
+#define GDT_ENTRY_BOOT_DS		3
+#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
+#define GDT_ENTRY_TSS 4	/* needs two entries */
 
 /* 
  * we cannot use the same code segment descriptor for user and kernel
@@ -15,20 +16,21 @@
  * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) 
  */
 
-#define __USER32_CS   0x23   /* 4*8+3 */ 
-#define __USER_DS     0x2b   /* 5*8+3 */ 
-#define __USER_CS     0x33   /* 6*8+3 */ 
-#define __USER32_DS	__USER_DS 
+#define GDT_ENTRY_TLS_MIN 6
+#define GDT_ENTRY_TLS_MAX 8
 
-#define GDT_ENTRY_TLS 7
-#define GDT_ENTRY_TSS 8	/* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
+#define GDT_ENTRY_LDT 9 /* needs two entries */
+#define __KERNEL32_CS   0x8	/* 1*8 */
+#define __KERNEL_CS	0x60	/* 12*8 */
+#define __KERNEL_DS	0x68	/* 13*8 */
+#define __USER32_CS   0x73   /* 14*8+3 */ 
+#define __USER_DS     0x7b   /* 15*8+3 */ 
+#define __USER32_DS	__USER_DS 
+#define __USER_CS     0x83   /* 16*8+3 */ 
 
 #define GDT_ENTRY_TLS_ENTRIES 3
 
-#define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
+#define GDT_ENTRY_PER_CPU 17	/* Abused to load per CPU data from limit */
 #define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)
 
 /* TLS indexes for 64bit - hardcoded in arch_prctl */
@@ -39,7 +41,7 @@
 #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
 
 #define IDT_ENTRIES 256
-#define GDT_ENTRIES 16
+#define GDT_ENTRIES 32
 #define GDT_SIZE (GDT_ENTRIES * 8)
 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 
 
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/system.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/system.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/system.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/system.h	2017-01-13 08:40:16.000000000 -0500
@@ -246,6 +246,7 @@ static inline unsigned long __cmpxchg(vo
 #endif
 #define read_barrier_depends()	do {} while(0)
 #define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
 
 #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
 
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/thread_info.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/thread_info.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/thread_info.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/thread_info.h	2017-01-13 08:40:26.000000000 -0500
@@ -33,6 +33,7 @@ struct thread_info {
 
 	mm_segment_t		addr_limit;	
 	struct restart_block    restart_block;
+	void			*sysenter_return;
 };
 #endif
 
@@ -78,14 +79,15 @@ static inline struct thread_info *stack_
     ({								\
 	struct thread_info *ret;				\
 								\
-	ret = ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER)); \
+	ret = ((struct thread_info *) __get_free_pages(GFP_KERNEL_UBC,\
+						THREAD_ORDER)); \
 	if (ret)						\
 		memset(ret, 0, THREAD_SIZE);			\
 	ret;							\
     })
 #else
 #define alloc_thread_info(tsk) \
-	((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER))
+	((struct thread_info *) __get_free_pages(GFP_KERNEL_UBC,THREAD_ORDER))
 #endif
 
 #define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
@@ -121,6 +123,8 @@ static inline struct thread_info *stack_
 #define TIF_MEMDIE		20
 #define TIF_FORCED_TF		21	/* true if TF in eflags artificially */
 #define TIF_SYSCALL_TRACEPOINT	22	/* syscall tracepoint instrumentation */
+#define TIF_FREEZE		23
+#define TIF_RESUME		24
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -134,6 +138,7 @@ static inline struct thread_info *stack_
 #define _TIF_IA32		(1<<TIF_IA32)
 #define _TIF_FORK		(1<<TIF_FORK)
 #define _TIF_FORCED_TF		(1<<TIF_FORCED_TF)
+#define _TIF_RESUME		(1<<TIF_RESUME)
 #define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
 
 /* work to do in syscall_trace_enter() */
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/unistd.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/unistd.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/unistd.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/unistd.h	2017-01-13 08:40:40.000000000 -0500
@@ -628,11 +628,11 @@ __SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages		279
 __SYSCALL(__NR_move_pages, sys_move_pages)
 #define __NR_utimensat		280
-__SYSCALL(__NR_utimensat, sys_ni_syscall)
+__SYSCALL(__NR_utimensat, sys_utimensat)
 #define __NR_epoll_pwait	281
-__SYSCALL(__NR_epoll_pwait, sys_ni_syscall)
+__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait)
 #define __NR_signalfd		282
-__SYSCALL(__NR_signalfd, sys_ni_syscall)
+__SYSCALL(__NR_signalfd, sys_signalfd)
 #define __NR_timerfd_create	283
 __SYSCALL(__NR_timerfd_create, sys_ni_syscall)
 #define __NR_eventfd		284
@@ -644,23 +644,23 @@ __SYSCALL(__NR_timerfd_settime, sys_ni_s
 #define __NR_timerfd_gettime	287
 __SYSCALL(__NR_timerfd_gettime, sys_ni_syscall)
 #define __NR_accept4		288
-__SYSCALL(__NR_accept4, sys_ni_syscall)
+__SYSCALL(__NR_accept4, sys_accept4)
 #define __NR_signalfd4		289
-__SYSCALL(__NR_signalfd4, sys_ni_syscall)
+__SYSCALL(__NR_signalfd4, sys_signalfd4)
 #define __NR_eventfd2		290
 __SYSCALL(__NR_eventfd2, sys_ni_syscall)
 #define __NR_epoll_create1	291
-__SYSCALL(__NR_epoll_create1, sys_ni_syscall)
+__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
 #define __NR_dup3		292
-__SYSCALL(__NR_dup3, sys_ni_syscall)
+__SYSCALL(__NR_dup3, sys_dup3)
 #define __NR_pipe2		293
-__SYSCALL(__NR_pipe2, sys_ni_syscall)
+__SYSCALL(__NR_pipe2, sys_pipe2)
 #define __NR_inotify_init1	294
-__SYSCALL(__NR_inotify_init1, sys_ni_syscall)
+__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
 #define __NR_preadv		295
-__SYSCALL(__NR_preadv, sys_ni_syscall)
+__SYSCALL(__NR_preadv, sys_preadv)
 #define __NR_pwritev		296
-__SYSCALL(__NR_pwritev, sys_ni_syscall)
+__SYSCALL(__NR_pwritev, sys_pwritev)
 #define __NR_rt_tgsigqueueinfo	297
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_ni_syscall)
 #define __NR_perf_event_open	298
@@ -669,10 +669,39 @@ __SYSCALL(__NR_perf_event_open, sys_ni_s
 __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
 #define __NR_prlimit64		300
 __SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_fairsched_cpumask	498
+__SYSCALL(__NR_fairsched_cpumask, sys_fairsched_cpumask)
+#define __NR_fairsched_vcpus	499
+__SYSCALL(__NR_fairsched_vcpus, sys_fairsched_vcpus)
+#define __NR_getluid		500
+__SYSCALL(__NR_getluid, sys_getluid)
+#define __NR_setluid		501
+__SYSCALL(__NR_setluid, sys_setluid)
+#define __NR_setublimit		502
+__SYSCALL(__NR_setublimit, sys_setublimit)
+#define __NR_ubstat		503
+__SYSCALL(__NR_ubstat, sys_ubstat)
+#define __NR_fairsched_mknod	504 /* FairScheduler syscalls */
+__SYSCALL(__NR_fairsched_mknod, sys_fairsched_mknod)
+#define __NR_fairsched_rmnod	505
+__SYSCALL(__NR_fairsched_rmnod, sys_fairsched_rmnod)
+#define __NR_fairsched_chwt	506
+__SYSCALL(__NR_fairsched_chwt, sys_fairsched_chwt)
+#define __NR_fairsched_mvpr	507
+__SYSCALL(__NR_fairsched_mvpr, sys_fairsched_mvpr)
+#define __NR_fairsched_rate	508
+__SYSCALL(__NR_fairsched_rate, sys_fairsched_rate)
+#define __NR_lchmod		509
+__SYSCALL(__NR_lchmod, sys_lchmod)
+#define __NR_lutime		510
+__SYSCALL(__NR_lutime, sys_lutime)
+#define __NR_getcpu		511
+__SYSCALL(__NR_getcpu, sys_getcpu)
+
 
 #ifdef __KERNEL__
 
-#define __NR_syscall_max __NR_prlimit64
+#define __NR_syscall_max __NR_getcpu
 
 #ifndef __NO_STUBS
 
@@ -711,6 +740,7 @@ do { \
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 
 #ifndef __KERNEL_SYSCALLS__
 
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/vdso.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/vdso.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/vdso.h	2017-01-13 08:40:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/vdso.h	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,18 @@
+#ifndef _ASM_X86_VDSO_H
+#define _ASM_X86_VDSO_H
+
+#ifdef CONFIG_X86_64
+extern const char VDSO64_PRELINK[];
+
+/*
+ * Given a pointer to the vDSO image, find the pointer to VDSO64_name
+ * as that symbol is defined in the vDSO sources or linker script.
+ */
+#define VDSO64_SYMBOL(base, name)					\
+({									\
+	extern const char VDSO64_##name[];				\
+	(void *)(VDSO64_##name - VDSO64_PRELINK + (unsigned long)(base)); \
+})
+#endif
+
+#endif /* _ASM_X86_VDSO_H */
diff -upr kernel-2.6.18-417.el5.orig/include/asm-x86_64/vsyscall32.h kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/vsyscall32.h
--- kernel-2.6.18-417.el5.orig/include/asm-x86_64/vsyscall32.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/asm-x86_64/vsyscall32.h	2017-01-13 08:40:26.000000000 -0500
@@ -4,14 +4,20 @@
 /* Values need to match arch/x86_64/ia32/vsyscall.lds */
 
 #ifdef __ASSEMBLY__
-#define VSYSCALL32_BASE 0xffffe000
-#define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410)
+#define __IA32_PAGE_OFFSET 0xc0000000
+#define VSYSCALL32_BASE (__IA32_PAGE_OFFSET - PAGE_SIZE)
+/* For CPT: VSYSCALL32_SYSEXIT value must match SYSENTER_RETURN value
+   to be able to migrate vsyscall-sysenter page from x86_64 to i386 */
+#define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x420)
 #else
-#define VSYSCALL32_BASE 0xffffe000UL
+#define VSYSCALL32_BASE ((unsigned long)current->mm->context.vdso)
 #define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE)
 
+#define __VSYSCALL32_BASE ((unsigned long)(IA32_PAGE_OFFSET - PAGE_SIZE))
+#define __VSYSCALL32_END (__VSYSCALL32_BASE + PAGE_SIZE)
+
 #define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400) 
-#define VSYSCALL32_SYSEXIT ((void *)VSYSCALL32_BASE + 0x410)
+#define VSYSCALL32_SYSEXIT ((void *)VSYSCALL32_BASE + 0x420)
 #define VSYSCALL32_SIGRETURN ((void __user *)VSYSCALL32_BASE + 0x500) 
 #define VSYSCALL32_RTSIGRETURN ((void __user *)VSYSCALL32_BASE + 0x600) 
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/acct.h kernel-2.6.18-417.el5-028stab121/include/linux/acct.h
--- kernel-2.6.18-417.el5.orig/include/linux/acct.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/acct.h	2017-01-13 08:40:21.000000000 -0500
@@ -124,12 +124,14 @@ extern void acct_auto_close(struct super
 extern void acct_init_pacct(struct pacct_struct *pacct);
 extern void acct_collect(long exitcode, int group_dead);
 extern void acct_process(void);
+extern void acct_exit_ve(struct bsd_acct_struct *acct);
 #else
 #define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_auto_close(x)	do { } while (0)
 #define acct_init_pacct(x)	do { } while (0)
 #define acct_collect(x,y)	do { } while (0)
 #define acct_process()		do { } while (0)
+#define acct_exit_ve(acct) do { } while (0)
 #endif
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/include/linux/aio_abi.h kernel-2.6.18-417.el5-028stab121/include/linux/aio_abi.h
--- kernel-2.6.18-417.el5.orig/include/linux/aio_abi.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/aio_abi.h	2017-01-13 08:40:40.000000000 -0500
@@ -41,6 +41,8 @@ enum {
 	 * IOCB_CMD_POLL = 5,
 	 */
 	IOCB_CMD_NOOP = 6,
+	IOCB_CMD_PREADV = 7,
+	IOCB_CMD_PWRITEV = 8,
 };
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/include/linux/aio.h kernel-2.6.18-417.el5-028stab121/include/linux/aio.h
--- kernel-2.6.18-417.el5.orig/include/linux/aio.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/aio.h	2017-01-13 08:40:40.000000000 -0500
@@ -4,6 +4,8 @@
 #include <linux/list.h>
 #include <linux/workqueue.h>
 #include <linux/aio_abi.h>
+#include <linux/uio.h>
+
 #ifndef __GENKSYMS__
 #include <linux/rcupdate.h>
 #endif
@@ -120,6 +122,10 @@ struct kiocb {
 	long			ki_retried; 	/* just for testing */
 	long			ki_kicked; 	/* just for testing */
 	long			ki_queued; 	/* just for testing */
+	struct iovec		ki_inline_vec;	/* inline vector */
+ 	struct iovec		*ki_iovec;
+ 	unsigned long		ki_nr_segs;
+ 	unsigned long		ki_cur_seg;
 
 	struct list_head	ki_list;	/* the aio core uses this
 						 * for cancellation */
@@ -260,4 +266,8 @@ static inline struct kiocb *list_kiocb(s
 extern unsigned long aio_nr;
 extern unsigned long aio_max_nr;
 
+void wait_for_all_aios(struct kioctx *ctx);
+extern kmem_cache_t	*kioctx_cachep;
+extern void aio_kick_handler(void *);
+
 #endif /* __LINUX__AIO_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/auxvec.h kernel-2.6.18-417.el5-028stab121/include/linux/auxvec.h
--- kernel-2.6.18-417.el5.orig/include/linux/auxvec.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/auxvec.h	2017-01-13 08:40:16.000000000 -0500
@@ -27,6 +27,7 @@
 #define AT_SECURE 23   /* secure mode boolean */
 #define AT_BASE_PLATFORM 24     /* string identifying real platform, may
                                   * differ from AT_PLATFORM. */
+#define AT_RANDOM 25   /* address of 16 random bytes */
 
 #define AT_VECTOR_SIZE  44 /* Size of auxiliary table.  */
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/capability.h kernel-2.6.18-417.el5-028stab121/include/linux/capability.h
--- kernel-2.6.18-417.el5.orig/include/linux/capability.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/capability.h	2017-01-13 08:40:40.000000000 -0500
@@ -142,12 +142,9 @@ typedef __u32 kernel_cap_t;
 
 #define CAP_NET_BROADCAST    11
 
-/* Allow interface configuration */
 /* Allow administration of IP firewall, masquerading and accounting */
 /* Allow setting debug option on sockets */
 /* Allow modification of routing tables */
-/* Allow setting arbitrary process / process group ownership on
-   sockets */
 /* Allow binding to any address for transparent proxying */
 /* Allow setting TOS (type of service) */
 /* Allow setting promiscuous mode */
@@ -178,6 +175,7 @@ typedef __u32 kernel_cap_t;
 #define CAP_SYS_MODULE       16
 
 /* Allow ioperm/iopl access */
+/* Allow O_DIRECT access */
 /* Allow sending USB messages to any device via /proc/bus/usb */
 
 #define CAP_SYS_RAWIO        17
@@ -196,24 +194,19 @@ typedef __u32 kernel_cap_t;
 
 /* Allow configuration of the secure attention key */
 /* Allow administration of the random device */
-/* Allow examination and configuration of disk quotas */
 /* Allow configuring the kernel's syslog (printk behaviour) */
 /* Allow setting the domainname */
 /* Allow setting the hostname */
 /* Allow calling bdflush() */
-/* Allow mount() and umount(), setting up new smb connection */
+/* Allow setting up new smb connection */
 /* Allow some autofs root ioctls */
 /* Allow nfsservctl */
 /* Allow VM86_REQUEST_IRQ */
 /* Allow to read/write pci config on alpha */
 /* Allow irix_prctl on mips (setstacksize) */
 /* Allow flushing all cache on m68k (sys_cacheflush) */
-/* Allow removing semaphores */
-/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
-   and shared memory */
 /* Allow locking/unlocking of shared memory segment */
 /* Allow turning swap on/off */
-/* Allow forged pids on socket credentials passing */
 /* Allow setting readahead and flushing buffers on block devices */
 /* Allow setting geometry in floppy driver */
 /* Allow turning DMA on/off in xd driver */
@@ -231,6 +224,8 @@ typedef __u32 kernel_cap_t;
    arbitrary SCSI commands */
 /* Allow setting encryption key on loopback filesystem */
 /* Allow setting zone reclaim policy */
+/* Modify data journaling mode on ext3 filesystem (uses journaling
+   resources) */
 
 #define CAP_SYS_ADMIN        21
 
@@ -250,8 +245,6 @@ typedef __u32 kernel_cap_t;
 /* Override resource limits. Set resource limits. */
 /* Override quota limits. */
 /* Override reserved space on ext2 filesystem */
-/* Modify data journaling mode on ext3 filesystem (uses journaling
-   resources) */
 /* NOTE: ext2 honors fsuid when checking for resource overrides, so 
    you can override using fsuid too */
 /* Override size restrictions on IPC message queues */
@@ -284,11 +277,71 @@ typedef __u32 kernel_cap_t;
 
 #define CAP_AUDIT_CONTROL    30
 
+/*
+ * Important note: VZ capabilities do intersect with CAP_AUDIT
+ * this is due to compatibility reasons. Nothing bad.
+ * Both VZ and Audit/SELinux caps are disabled in VPSs.
+ */
+
+/* Allow access to all information. In the other case some structures will be
+ * hiding to ensure different Virtual Environment non-interaction on the same
+ * node (NOW OBSOLETED)
+ */
+#define CAP_SETVEID	     29
+
+#define capable_setveid()	({			\
+		ve_is_super(get_exec_env()) &&		\
+			(capable(CAP_SYS_ADMIN) ||	\
+			 capable(CAP_VE_ADMIN));	\
+	})
+
+/*
+ * coinsides with CAP_AUDIT_CONTROL but we don't care, since
+ * audit is disabled in Virtuozzo
+ */
+#define CAP_VE_ADMIN	     30
+
 #ifdef __KERNEL__
+
+#include <linux/config.h>
+
+#ifdef CONFIG_VE
+
+/* Replacement for CAP_NET_ADMIN:
+   delegated rights to the Virtual environment of its network administration.
+   For now the following rights have been delegated:
+
+   Allow setting arbitrary process / process group ownership on sockets
+   Allow interface configuration
+ */
+#define CAP_VE_NET_ADMIN     CAP_VE_ADMIN
+
+/* Replacement for CAP_SYS_ADMIN:
+   delegated rights to the Virtual environment of its administration.
+   For now the following rights have been delegated:
+ */
+/* Allow mount/umount/remount */
+/* Allow examination and configuration of disk quotas */
+/* Allow removing semaphores */
+/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
+   and shared memory */
+/* Allow locking/unlocking of shared memory segment */
+/* Allow forged pids on socket credentials passing */
+
+#define CAP_VE_SYS_ADMIN     CAP_VE_ADMIN
+#else
+#define CAP_VE_NET_ADMIN     CAP_NET_ADMIN
+#define CAP_VE_SYS_ADMIN     CAP_SYS_ADMIN
+#endif
+
 /* 
  * Bounding set
  */
+#ifndef CONFIG_VE
 extern kernel_cap_t cap_bset;
+#else
+#define cap_bset get_exec_env()->ve_cap_bset
+#endif
 
 /*
  * Internal kernel functions only
@@ -358,13 +411,19 @@ static inline kernel_cap_t cap_invert(ke
 #define cap_issubset(a,set)  (!(cap_t(a) & ~cap_t(set)))
 
 #define cap_clear(c)         do { cap_t(c) =  0; } while(0)
+#ifndef CONFIG_VE
 #define cap_set_full(c)      do { cap_t(c) = ~0; } while(0)
+#else
+#define cap_set_full(c) \
+        do {cap_t(c) = ve_is_super(get_exec_env()) ? ~0 :		\
+					cap_bset; } while(0)
+#endif
 #define cap_mask(c,mask)     do { cap_t(c) &= cap_t(mask); } while(0)
-
 #define cap_is_fs_cap(c)     (CAP_TO_MASK(c) & CAP_FS_MASK)
 
 int capable(int cap);
 int __capable(struct task_struct *t, int cap);
+extern spinlock_t task_capability_lock;
 
 #endif /* __KERNEL__ */
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/cfq-iosched.h kernel-2.6.18-417.el5-028stab121/include/linux/cfq-iosched.h
--- kernel-2.6.18-417.el5.orig/include/linux/cfq-iosched.h	2017-01-13 08:40:18.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/cfq-iosched.h	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,238 @@
+#ifndef _LINUX_CFQ_IOSCHED_H
+#define _LINUX_CFQ_IOSCHED_H
+
+#include <linux/ioprio.h>
+#include <linux/mempool.h>
+#include <linux/rbtree.h>
+
+/*
+ * Each block device managed by CFQ I/O scheduler is represented
+ * by cfq_data structure. Certain members of this structure are
+ * distinguished to cfq_bc_data on per-UBC basis. Thus cfq_bc_data
+ * structure is per (Device, UBC) pare.
+ *
+ * BC holds a list head of all cfq_bc_data, that belong to UBC,
+ * and cfq_data holds a list head of all active cfq_bc_data for
+ * for the device (active means that there are requests in-flight).
+ * cfq_bc_data has a pointers to owning UBC and cfq_data.
+ *
+ * For example, if there are two devices and three beancounters:
+ *
+ *	         cfq_data 1          cfq_data 2
+ *	             |                   |
+ *	             |                   |
+ *	UB1 --- cfq_bc_data ------- cfq_bc_data
+ *	             |                   |
+ *	             |                   |
+ *	UB2 --- cfq_bc_data ------- cfq_bc_data
+ *	             |                   |
+ *	             |                   |
+ *	UB2 --- cfq_bc_data ------- cfq_bc_data
+ *
+ * One more basic structure in CFQ scheduler is cfq_queue,
+ * which is a queue of requests. For sync queues it's a per-process
+ * structure. While creating new cfq_queue we store cfq_bc_data
+ * it belongs to, and later use this information in order to add
+ * queue in proper lists.
+ *
+ */
+
+extern kmem_cache_t *cfq_pool;
+
+#define CFQ_PRIO_LISTS		IOPRIO_BE_NR
+
+/*
+ * Per (Device, UBC) queue data
+ */
+struct cfq_bc_data {
+	/* for ub.iopriv->cfq_bc_head */
+	struct list_head	cfq_bc_list;
+#ifdef CONFIG_UBC_IO_PRIO
+	/* for cfqd->cfq_bc_queue */
+	struct rb_node		cfq_bc_node;
+#endif
+
+	struct cfq_data		*cfqd;
+	struct ub_iopriv	*ub_iopriv;
+
+	/*
+	 * rr list of queues with requests
+	 */
+	struct list_head	rr_list[CFQ_PRIO_LISTS];
+	struct list_head	cur_rr;
+	struct list_head	idle_rr;
+	struct list_head	busy_rr;
+	/*
+	 * non-ordered list of empty cfqq's
+	 */
+	struct list_head	empty_list;
+
+	int			cur_prio;
+	int			cur_end_prio;
+
+	unsigned long		rqnum;
+	unsigned long		on_dispatch;
+	u64			iotime;
+
+	/*
+	 * async queue for each priority case
+	 */
+	struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
+	struct cfq_queue *async_idle_cfqq;
+
+	/* write under cfqd->queue->request_queue_lock */
+	seqcount_t		stat_lock;
+	/* summarize delays between enqueue and activation. */
+	unsigned long		wait_time;
+	unsigned long		wait_start;
+	unsigned long		used_time;
+	unsigned long		activations_count;
+	unsigned long		requests_dispatched;
+	unsigned long		sectors_dispatched;
+};
+
+/*
+ * Per block device queue structure
+ */
+struct cfq_data {
+	struct request_queue *queue;
+
+#ifndef CONFIG_UBC_IO_PRIO
+	struct cfq_bc_data cfq_bc;
+#endif
+
+	/*
+	 * Each priority tree is sorted by next_request position.  These
+	 * trees are used when determining if two or more queues are
+	 * interleaving requests (see cfq_close_cooperator).
+	 */
+	struct rb_root prio_trees[CFQ_PRIO_LISTS];
+
+	unsigned int busy_queues;
+
+	/*
+	 * global crq hash for all queues
+	 */
+	struct hlist_head *crq_hash;
+
+	mempool_t *crq_pool;
+
+	int rq_in_driver;
+	int hw_tag;
+
+	/*
+	 * schedule slice state info
+	 */
+	/*
+	 * idle window management
+	 */
+	struct timer_list idle_slice_timer;
+	struct work_struct unplug_work;
+
+	struct cfq_queue *active_queue;
+	struct cfq_io_context *active_cic;
+	unsigned int dispatch_slice;
+
+	struct timer_list idle_class_timer;
+
+	sector_t last_position;
+	unsigned long last_end_request;
+
+	unsigned int rq_starved;
+
+	/*
+	 * tunables, see top of file
+	 */
+	unsigned int cfq_quantum;
+	unsigned int cfq_queued;
+	unsigned int cfq_fifo_expire[2];
+	unsigned int cfq_back_penalty;
+	unsigned int cfq_back_max;
+	unsigned int cfq_slice[2];
+	unsigned int cfq_slice_async_rq;
+	unsigned int cfq_slice_idle;
+
+	struct list_head cic_list;
+
+#ifdef CONFIG_UBC_IO_PRIO
+	/* bc priority queue */
+	struct rb_root cfq_bc_queue;
+#endif
+	/* ub that owns a timeslice at the moment */
+	struct cfq_bc_data *active_cfq_bc;
+	unsigned int cfq_ub_slice;
+	unsigned long slice_begin;
+	unsigned long slice_end;
+	u64 max_iotime;
+	int virt_mode;
+	int write_virt_mode;
+	int cfq_ub_isolate;
+};
+
+/*
+ * Per process-grouping structure
+ */
+struct cfq_queue {
+	/* reference count */
+	atomic_t ref;
+	/* parent cfq_data */
+	struct cfq_data *cfqd;
+	/* on either rr or empty list of cfq_bc_data, or empty for dead bc */
+	struct list_head cfq_list;
+	/* prio tree member */
+	struct rb_node p_node;
+	/* prio tree root we belong to, if any */
+	struct rb_root *p_root;
+	/* sorted list of pending requests */
+	struct rb_root sort_list;
+	/* if fifo isn't expired, next request to serve */
+	struct cfq_rq *next_crq;
+	/* requests queued in sort_list */
+	int queued[2];
+	/* currently allocated requests */
+	int allocated[2];
+	/* fifo list of requests in sort_list */
+	struct list_head fifo;
+
+	unsigned long slice_start;
+	unsigned long slice_end;
+	unsigned long slice_left;
+	unsigned long service_last;
+
+	/* number of requests that are on the dispatch list */
+	int on_dispatch[2];
+
+	/* io prio of this group */
+	unsigned short ioprio, org_ioprio;
+	unsigned short ioprio_class, org_ioprio_class;
+
+	unsigned int seek_samples;
+	u64 seek_total;
+	sector_t seek_mean;
+	sector_t last_request_pos;
+	unsigned long seeky_start;
+
+	/* various state flags, see below */
+	unsigned int flags;
+
+	struct cfq_queue *new_cfqq;
+
+	struct cfq_bc_data *cfq_bc;
+};
+
+static void inline cfq_init_cfq_bc(struct cfq_bc_data *cfq_bc)
+{
+	int i;
+
+	for (i = 0; i < CFQ_PRIO_LISTS; i++)
+		INIT_LIST_HEAD(&cfq_bc->rr_list[i]);
+
+	INIT_LIST_HEAD(&cfq_bc->cur_rr);
+	INIT_LIST_HEAD(&cfq_bc->idle_rr);
+	INIT_LIST_HEAD(&cfq_bc->busy_rr);
+	INIT_LIST_HEAD(&cfq_bc->empty_list);
+}
+
+extern void __cfq_put_async_queues(struct cfq_bc_data *cfq_bc);
+
+#endif /* _LINUX_CFQ_IOSCHED_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/compat.h kernel-2.6.18-417.el5-028stab121/include/linux/compat.h
--- kernel-2.6.18-417.el5.orig/include/linux/compat.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/compat.h	2017-01-13 08:40:40.000000000 -0500
@@ -190,6 +190,12 @@ asmlinkage ssize_t compat_sys_readv(unsi
 		const struct compat_iovec __user *vec, unsigned long vlen);
 asmlinkage ssize_t compat_sys_writev(unsigned long fd,
 		const struct compat_iovec __user *vec, unsigned long vlen);
+asmlinkage ssize_t compat_sys_preadv(unsigned long fd,
+		const struct compat_iovec __user *vec,
+		unsigned long vlen, u32 pos_low, u32 pos_high);
+asmlinkage ssize_t compat_sys_pwritev(unsigned long fd,
+		const struct compat_iovec __user *vec,
+		unsigned long vlen, u32 pos_low, u32 pos_high);
 
 int compat_do_execve(char * filename, compat_uptr_t __user *argv,
 	        compat_uptr_t __user *envp, struct pt_regs * regs);
@@ -235,8 +241,27 @@ static inline int compat_timespec_compar
 asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
 
 extern int compat_printk(const char *fmt, ...);
+extern int ve_compat_printk(int dst, const char *fmt, ...);
 extern void __user *compat_alloc_user_space(unsigned long len);
 
 
+extern long compat_nanosleep_restart(struct restart_block *restart);
+
+asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename,
+				struct compat_timespec __user *t, int flags);
+
+/*
+ * epoll (fs/eventpoll.c) compat bits follow ...
+ */
+struct epoll_event;
+#define compat_epoll_event      epoll_event
+asmlinkage long compat_sys_epoll_pwait(int epfd,
+                        struct compat_epoll_event __user *events,
+                        int maxevents, int timeout,
+                        const compat_sigset_t __user *sigmask,
+                        compat_size_t sigsetsize);
+asmlinkage long compat_sys_signalfd(int ufd,
+				const compat_sigset_t __user *sigmask,
+				compat_size_t sigsetsize);
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/config.h kernel-2.6.18-417.el5-028stab121/include/linux/config.h
--- kernel-2.6.18-417.el5.orig/include/linux/config.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/config.h	2017-01-13 08:40:20.000000000 -0500
@@ -3,7 +3,7 @@
 /* This file is no longer in use and kept only for backward compatibility.
  * autoconf.h is now included via -imacros on the commandline
  */
-#warning Including config.h is deprecated.
+/* #warning Including config.h is deprecated. */
 #include <linux/autoconf.h>
 #if !defined (__KERNEL__) && !defined(__KERNGLUE__)
 #error including kernel header in userspace; use the glibc headers instead!
diff -upr kernel-2.6.18-417.el5.orig/include/linux/console.h kernel-2.6.18-417.el5-028stab121/include/linux/console.h
--- kernel-2.6.18-417.el5.orig/include/linux/console.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/console.h	2017-01-13 08:40:16.000000000 -0500
@@ -140,4 +140,22 @@ void vcs_remove_sysfs(int index);
 #define VESA_HSYNC_SUSPEND      2
 #define VESA_POWERDOWN          3
 
+
+#include <linux/preempt.h>
+#include <linux/cache.h>
+#include <linux/threads.h>
+
+struct printk_aligned {
+	int v;
+} ____cacheline_aligned;
+extern struct printk_aligned printk_no_wake_var[NR_CPUS];
+#define __printk_no_wake (printk_no_wake_var[smp_processor_id()].v)
+#define printk_no_wake ({ \
+			int v; \
+			preempt_disable(); \
+			v = __printk_no_wake; \
+			preempt_enable_no_resched(); \
+			v; \
+			})
+
 #endif /* _LINUX_CONSOLE_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/cpt_exports.h kernel-2.6.18-417.el5-028stab121/include/linux/cpt_exports.h
--- kernel-2.6.18-417.el5.orig/include/linux/cpt_exports.h	2017-01-13 08:40:26.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/cpt_exports.h	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,31 @@
+/*
+ *
+ *  include/linux/cpt_exports.h
+ *
+ *  Copyright (C) 2008  Parallels
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __CPT_EXPORTS_H__
+#define __CPT_EXPORTS_H__
+
+struct cpt_context;
+
+struct cpt_ops {
+	void		(*write)(const void *addr, size_t count,
+					struct cpt_context *ctx);
+	int		(*get_object)(int type, loff_t pos, void *tmp,
+					int size, struct cpt_context *ctx);
+	loff_t		(*lookup_object)(enum _cpt_object_type type,
+					void *p, struct cpt_context *ctx);
+	struct file *	(*rst_file)(loff_t pos, int fd,
+					struct cpt_context *ctx);
+};
+
+extern struct cpt_ops cpt_ops;
+extern struct cpt_ops rst_ops;
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/cpt_image.h kernel-2.6.18-417.el5-028stab121/include/linux/cpt_image.h
--- kernel-2.6.18-417.el5.orig/include/linux/cpt_image.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/cpt_image.h	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,1862 @@
+/*
+ *
+ *  include/linux/cpt_image.h
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __CPT_IMAGE_H_
+#define __CPT_IMAGE_H_ 1
+
+#define CPT_NULL (~0ULL)
+#define CPT_NOINDEX (~0U)
+
+/**
+ * WARNING!!! For "expandable" objects at restore state
+ * _always_ use obj->cpt_hdrlen instead of sizeof()
+ */
+#define cpt_object_has(obj, field)	((obj)->cpt_hdrlen >= \
+		offsetof(typeof(*(obj)), field) + sizeof((obj)->field))
+
+/*
+ * Image file layout.
+ *
+ * - major header
+ * - sections[]
+ *
+ *	Each section is:
+ *	- section header
+ *	- array of objects
+ *
+ * All data records are arch independent, 64 bit aligned.
+ */
+
+enum _cpt_object_type
+{
+	CPT_OBJ_TASK = 0,
+	CPT_OBJ_MM,
+	CPT_OBJ_FS,
+	CPT_OBJ_FILES,
+	CPT_OBJ_FILE,
+	CPT_OBJ_SIGHAND_STRUCT,
+	CPT_OBJ_SIGNAL_STRUCT,
+	CPT_OBJ_TTY,
+	CPT_OBJ_SOCKET,
+	CPT_OBJ_SYSVSEM_UNDO,
+	CPT_OBJ_NAMESPACE,
+	CPT_OBJ_SYSV_SHM,
+	CPT_OBJ_INODE,
+	CPT_OBJ_UBC,
+	CPT_OBJ_SLM_SGREG,
+	CPT_OBJ_SLM_REGOBJ,
+	CPT_OBJ_SLM_MM,
+	CPT_OBJ_VFSMOUNT_REF,
+	CPT_OBJ_MAX,
+	/* The objects above are stored in memory while checkpointing */
+
+	CPT_OBJ_VMA = 1024,
+	CPT_OBJ_FILEDESC,
+	CPT_OBJ_SIGHANDLER,
+	CPT_OBJ_SIGINFO,
+	CPT_OBJ_LASTSIGINFO,
+	CPT_OBJ_SYSV_SEM,
+	CPT_OBJ_SKB,
+	CPT_OBJ_FLOCK,
+	CPT_OBJ_OPENREQ,
+	CPT_OBJ_VFSMOUNT,
+	CPT_OBJ_TRAILER,
+	CPT_OBJ_SYSVSEM_UNDO_REC,
+	CPT_OBJ_NET_DEVICE,
+	CPT_OBJ_NET_IFADDR,
+	CPT_OBJ_NET_ROUTE,
+	CPT_OBJ_NET_CONNTRACK,
+	CPT_OBJ_NET_CONNTRACK_EXPECT,
+	CPT_OBJ_AIO_CONTEXT,
+	CPT_OBJ_VEINFO,
+	CPT_OBJ_EPOLL,
+	CPT_OBJ_EPOLL_FILE,
+	CPT_OBJ_SKFILTER,
+	CPT_OBJ_SIGALTSTACK,
+  	CPT_OBJ_SOCK_MCADDR,
+	CPT_OBJ_BIND_MNT,
+	CPT_OBJ_SYSVMSG,
+	CPT_OBJ_SYSVMSG_MSG,
+
+	CPT_OBJ_X86_REGS = 4096,
+	CPT_OBJ_X86_64_REGS,
+	CPT_OBJ_PAGES,
+	CPT_OBJ_COPYPAGES,
+	CPT_OBJ_REMAPPAGES,
+	CPT_OBJ_LAZYPAGES,
+	CPT_OBJ_NAME,
+	CPT_OBJ_BITS,
+	CPT_OBJ_REF,
+	CPT_OBJ_ITERPAGES,
+	CPT_OBJ_ITERYOUNGPAGES,
+	CPT_OBJ_VSYSCALL,
+	CPT_OBJ_IA64_REGS,
+	CPT_OBJ_INOTIFY,
+	CPT_OBJ_INOTIFY_WATCH,
+	CPT_OBJ_INOTIFY_EVENT,
+	CPT_OBJ_TASK_AUX,
+	CPT_OBJ_NET_TUNTAP,
+	CPT_OBJ_NET_HWADDR,
+	CPT_OBJ_NET_VETH,
+	CPT_OBJ_NET_STATS,
+	CPT_OBJ_NET_IPIP_TUNNEL,
+};
+
+#define CPT_ALIGN(n) (((n)+7)&~7)
+
+struct cpt_major_hdr
+{
+	__u8	cpt_signature[4];	/* Magic number */
+	__u16	cpt_hdrlen;		/* Length of this header */
+	__u16	cpt_image_version;	/* Format of this file */
+#define CPT_VERSION_MINOR(a)	((a) & 0xf)
+#define CPT_VERSION_8		0
+#define CPT_VERSION_9		0x100
+#define CPT_VERSION_9_1		0x101
+#define CPT_VERSION_9_2		0x102
+#define CPT_VERSION_16		0x200
+#define CPT_VERSION_18		0x300
+#define CPT_VERSION_18_1	0x301
+#define CPT_VERSION_18_2	0x302
+#define CPT_VERSION_18_3	0x303
+#define CPT_VERSION_18_4	0x304
+#define CPT_CURRENT_VERSION	CPT_VERSION_18_4
+	__u16	cpt_os_arch;		/* Architecture */
+#define CPT_OS_ARCH_I386	0
+#define CPT_OS_ARCH_EMT64	1
+#define CPT_OS_ARCH_IA64	2
+	__u16	__cpt_pad1;
+	__u32	cpt_ve_features;	/* VE features */
+	__u32	cpt_ve_features2;	/* VE features */
+	__u16	cpt_pagesize;		/* Page size used by OS */
+	__u16	cpt_hz;			/* HZ used by OS */
+	__u64	cpt_start_jiffies64;	/* Jiffies */
+	__u32	cpt_start_sec;		/* Seconds */
+	__u32	cpt_start_nsec;		/* Nanoseconds */
+	__u32	cpt_cpu_caps[4];	/* CPU capabilities */
+	__u32	cpt_kernel_config[4];	/* Kernel config */
+	__u64	cpt_iptables_mask;	/* Used netfilter modules */
+} __attribute__ ((aligned (8)));
+
+#define CPT_SIGNATURE0 0x79
+#define CPT_SIGNATURE1 0x1c
+#define CPT_SIGNATURE2 0x01
+#define CPT_SIGNATURE3 0x63
+
+/* CPU capabilities */
+#define CPT_CPU_X86_CMOV	0
+#define CPT_CPU_X86_FXSR	1
+#define CPT_CPU_X86_SSE		2
+#define CPT_CPU_X86_SSE2	3
+#define CPT_CPU_X86_MMX		4
+#define CPT_CPU_X86_3DNOW	5
+#define CPT_CPU_X86_3DNOW2	6
+#define CPT_CPU_X86_SEP		7
+#define CPT_CPU_X86_EMT64	8
+#define CPT_CPU_X86_IA64	9
+#define CPT_CPU_X86_SYSCALL	10
+#define CPT_CPU_X86_SYSCALL32	11
+#define CPT_CPU_X86_SEP32	12
+#define CPT_NO_IPV6		13
+
+/* Unsupported features */
+#define CPT_EXTERNAL_PROCESS	16
+#define CPT_NAMESPACES		17
+#define CPT_SCHEDULER_POLICY	18
+#define CPT_PTRACED_FROM_VE0	19
+#define CPT_UNSUPPORTED_FSTYPE	20
+#define CPT_BIND_MOUNT		21
+#define CPT_UNSUPPORTED_NETDEV	22
+#define CPT_UNSUPPORTED_MISC	23
+#define CPT_SLM_DMPRST		24
+
+/* This mask is used to determine whether VE
+   has some unsupported features or not */
+#define CPT_UNSUPPORTED_MASK	0xffff0000UL
+
+#define CPT_KERNEL_CONFIG_PAE	0
+
+struct cpt_section_hdr
+{
+	__u64	cpt_next;
+	__u32	cpt_section;
+	__u16	cpt_hdrlen;
+	__u16	cpt_align;
+} __attribute__ ((aligned (8)));
+
+enum
+{
+	CPT_SECT_ERROR,			/* Error section, content is string */
+	CPT_SECT_VEINFO,
+	CPT_SECT_FILES,			/* Files. Content is array of file objects */
+	CPT_SECT_TASKS,
+	CPT_SECT_MM,
+	CPT_SECT_FILES_STRUCT,
+	CPT_SECT_FS,
+	CPT_SECT_SIGHAND_STRUCT,
+	CPT_SECT_TTY,
+	CPT_SECT_SOCKET,
+	CPT_SECT_NAMESPACE,
+	CPT_SECT_SYSVSEM_UNDO,
+	CPT_SECT_INODE,			/* Inodes with i->i_nlink==0 and
+					 * deleted dentires with inodes not
+					 * referenced inside dumped process.
+					 */
+	CPT_SECT_SYSV_SHM,
+	CPT_SECT_SYSV_SEM,
+	CPT_SECT_ORPHANS,
+	CPT_SECT_NET_DEVICE,
+	CPT_SECT_NET_IFADDR,
+	CPT_SECT_NET_ROUTE,
+	CPT_SECT_NET_IPTABLES,
+	CPT_SECT_NET_CONNTRACK,
+	CPT_SECT_NET_CONNTRACK_VE0,
+	CPT_SECT_UTSNAME,
+	CPT_SECT_TRAILER,
+	CPT_SECT_UBC,
+	CPT_SECT_SLM_SGREGS,
+	CPT_SECT_SLM_REGOBJS,
+/* Due to silly mistake we cannot index sections beyond this value */
+#define	CPT_SECT_MAX_INDEX	(CPT_SECT_SLM_REGOBJS+1)
+	CPT_SECT_EPOLL,
+	CPT_SECT_VSYSCALL,
+	CPT_SECT_INOTIFY,
+	CPT_SECT_SYSV_MSG,
+	CPT_SECT_SNMP_STATS,
+	CPT_SECT_MAX
+};
+
+struct cpt_major_tail
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_lazypages;
+	__u32	cpt_64bit;
+	__u64	cpt_sections[CPT_SECT_MAX_INDEX];
+	__u32	cpt_nsect;
+	__u8	cpt_signature[4];	/* Magic number */
+} __attribute__ ((aligned (8)));
+
+
+/* Common object header. */
+struct cpt_object_hdr
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+} __attribute__ ((aligned (8)));
+
+enum _cpt_content_type {
+	CPT_CONTENT_VOID,
+	CPT_CONTENT_ARRAY,
+	CPT_CONTENT_DATA,
+	CPT_CONTENT_NAME,
+
+	CPT_CONTENT_STACK,
+	CPT_CONTENT_X86_FPUSTATE_OLD,
+	CPT_CONTENT_X86_FPUSTATE,
+	CPT_CONTENT_MM_CONTEXT,
+	CPT_CONTENT_SEMARRAY,
+	CPT_CONTENT_SEMUNDO,
+	CPT_CONTENT_NLMARRAY,
+	CPT_CONTENT_MAX
+};
+
+/* CPT_OBJ_BITS: encode array of bytes */ 
+struct cpt_obj_bits
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_size;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_REF: a reference to another object */ 
+struct cpt_obj_ref
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_pos;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_VEINFO: various ve specific data */
+struct cpt_veinfo_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	/* ipc ctls */
+	__u32	shm_ctl_max;
+	__u32	shm_ctl_all;
+	__u32	shm_ctl_mni;
+	__u32	msg_ctl_max;
+	__u32	msg_ctl_mni;
+	__u32	msg_ctl_mnb;
+	__u32	sem_ctl_arr[4];
+
+	/* start time */
+	__u64	start_timespec_delta;
+	__u64	start_jiffies_delta;
+
+	/* later extension */
+	__u32	last_pid;
+	__u32	rnd_va_space;
+	__u32	vpid_max;
+	__u32	__cpt_pad1;
+	__u64	reserved[7];
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_FILE: one struct file */ 
+struct cpt_file_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_flags;
+	__u32	cpt_mode;
+	__u64	cpt_pos;
+	__u32	cpt_uid;
+	__u32	cpt_gid;
+
+	__u32	cpt_i_mode;
+	__u32	cpt_lflags;
+#define CPT_DENTRY_DELETED	1
+#define CPT_DENTRY_ROOT		2
+#define CPT_DENTRY_CLONING	4
+#define CPT_DENTRY_PROC		8
+#define CPT_DENTRY_EPOLL	0x10
+#define CPT_DENTRY_REPLACED	0x20
+#define CPT_DENTRY_INOTIFY	0x40
+#define CPT_DENTRY_FUTEX	0x80
+#define CPT_DENTRY_TUNTAP	0x100
+#define CPT_DENTRY_PROCPID_DEAD 0x200
+#define CPT_DENTRY_HARDLINKED	0x400
+#define CPT_DENTRY_SIGNALFD	0x800
+#define CPT_DENTRY_SILLYRENAME	0x20000
+	__u64	cpt_inode;
+	__u64	cpt_priv;
+
+	__u32	cpt_fown_fd;
+	__u32	cpt_fown_pid;
+	__u32	cpt_fown_uid;
+	__u32	cpt_fown_euid;
+	__u32	cpt_fown_signo;
+	__u32	__cpt_pad1;
+	__u64	cpt_vfsmount;
+} __attribute__ ((aligned (8)));
+/* Followed by file name, encoded as CPT_OBJ_NAME */
+
+struct cpt_epoll_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+} __attribute__ ((aligned (8)));
+/* Followed by array of struct cpt_epoll_file */
+
+struct cpt_epoll_file_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_fd;
+	__u32	cpt_events;
+	__u64	cpt_data;
+	__u32	cpt_revents;
+	__u32	cpt_ready;
+} __attribute__ ((aligned (8)));
+
+struct cpt_inotify_wd_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_wd;
+	__u32	cpt_mask;
+} __attribute__ ((aligned (8)));
+/* Followed by cpt_file_image of inode to watch */
+
+struct cpt_inotify_ev_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_wd;
+	__u32	cpt_mask;
+	__u32	cpt_cookie;
+	__u32	cpt_namelen;
+} __attribute__ ((aligned (8)));
+/* Followed by name */
+
+struct cpt_inotify_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_user;
+	__u32	cpt_max_events;
+	__u32	cpt_last_wd;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by array of struct cpt_inotify_wd_image and cpt_inotify_ev_image */
+
+
+/* CPT_OBJ_FILEDESC: one file descriptor */
+struct cpt_fd_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_fd;
+	__u32	cpt_flags;
+#define CPT_FD_FLAG_CLOSEEXEC	1
+	__u64	cpt_file;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_FILES: one files_struct */
+struct cpt_files_struct_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u32	cpt_max_fds;
+	__u32	cpt_next_fd;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by array of cpt_fd_image */
+
+/* CPT_OBJ_FS: one fs_struct */
+struct cpt_fs_struct_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_umask;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by two/three CPT_OBJ_FILENAME for root, pwd and, optionally, altroot */
+
+/* CPT_OBJ_INODE: one struct inode */
+struct cpt_inode_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_dev;
+	__u64	cpt_ino;
+	__u32	cpt_mode;
+	__u32	cpt_nlink;
+	__u32	cpt_uid;
+	__u32	cpt_gid;
+	__u64	cpt_rdev;
+	__u64	cpt_size;
+	__u64	cpt_blksize;
+	__u64	cpt_atime;
+	__u64	cpt_mtime;
+	__u64	cpt_ctime;
+	__u64	cpt_blocks;
+	__u32	cpt_sb;
+	__u32	__cpt_pad1;
+	__u64	cpt_vfsmount;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_VFSMOUNT: one vfsmount */
+struct cpt_vfsmount_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_mntflags;
+#define CPT_MNT_BIND	0x80000000
+#define CPT_MNT_EXT	0x40000000
+#define CPT_MNT_DELAYFS	0x20000000
+	__u32	cpt_flags;
+	__u64	cpt_mnt_bind;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_flock_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_owner;
+	__u32	cpt_pid;
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u32	cpt_flags;
+#define CPT_FLOCK_DELAYED	0x00010000
+	__u32	cpt_type;
+	__u32	cpt_svid;
+} __attribute__ ((aligned (8)));
+
+struct cpt_tty_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_flags;
+	__u32	cpt_link;
+	__u32	cpt_index;
+	__u32	cpt_drv_type;
+	__u32	cpt_drv_subtype;
+	__u32	cpt_drv_flags;
+	__u8	cpt_packet;
+	__u8	cpt_stopped;
+	__u8	cpt_hw_stopped;
+	__u8	cpt_flow_stopped;
+
+	__u32	cpt_canon_data;
+	__u32	cpt_canon_head;
+	__u32	cpt_canon_column;
+	__u32	cpt_column;
+	__u8	cpt_ctrl_status;
+	__u8	cpt_erasing;
+	__u8	cpt_lnext;
+	__u8	cpt_icanon;
+	__u8	cpt_raw;
+	__u8	cpt_real_raw;
+	__u8	cpt_closing;
+	__u8	__cpt_pad1;
+	__u16	cpt_minimum_to_wake;
+	__u16	__cpt_pad2;
+	__u32	cpt_pgrp;
+	__u32	cpt_session;
+	__u32	cpt_c_line;
+	__u8	cpt_name[64];	
+	__u16	cpt_ws_row;
+	__u16	cpt_ws_col;
+	__u16	cpt_ws_prow;
+	__u16	cpt_ws_pcol;
+	__u8	cpt_c_cc[32];
+	__u32	cpt_c_iflag;
+	__u32	cpt_c_oflag;
+	__u32	cpt_c_cflag;
+	__u32	cpt_c_lflag;
+	__u32	cpt_read_flags[4096/32];
+} __attribute__ ((aligned (8)));
+
+struct cpt_sock_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_parent;
+	__u32	cpt_index;
+
+	__u64	cpt_ssflags;
+	__u16	cpt_type;
+	__u16	cpt_family;
+	__u8	cpt_sstate;
+	__u8	cpt_passcred;
+	__u8	cpt_state;
+	__u8	cpt_reuse;
+
+	__u8	cpt_zapped;
+	__u8	cpt_shutdown;
+	__u8	cpt_userlocks;
+	__u8	cpt_no_check;
+	__u8	cpt_debug;
+	__u8	cpt_rcvtstamp;
+	__u8	cpt_localroute;
+	__u8	cpt_protocol;
+
+	__u32	cpt_err;
+	__u32	cpt_err_soft;
+
+	__u16	cpt_max_ack_backlog;
+	__u16   __cpt_pad1;
+	__u32	cpt_priority;
+
+	__u32	cpt_rcvlowat;
+	__u32	cpt_bound_dev_if;
+
+	__u64	cpt_rcvtimeo;
+	__u64	cpt_sndtimeo;
+	__u32	cpt_rcvbuf;
+	__u32	cpt_sndbuf;
+	__u64	cpt_flags;
+	__u64	cpt_lingertime;
+	__u32	cpt_peer_pid;
+	__u32	cpt_peer_uid;
+
+	__u32	cpt_peer_gid;
+	__u32	cpt_laddrlen;
+	__u32	cpt_laddr[128/4];
+	__u32	cpt_raddrlen;
+	__u32	cpt_raddr[128/4];
+	/* AF_UNIX */
+	__u32	cpt_peer;
+
+	__u8	cpt_socketpair;
+	__u8	cpt_sockflags;
+#define CPT_SOCK_DELETED	0x1
+#define CPT_SOCK_DELAYED	0x2
+
+	__u16	__cpt_pad4;
+	__u32	__cpt_pad5;
+/*
+	struct sk_filter      	*sk_filter;
+ */
+
+	__u64			cpt_stamp;
+	__u32			cpt_daddr;
+	__u16			cpt_dport;
+	__u16			cpt_sport;
+
+	union {
+		struct {
+			__u32	cpt_saddr;
+			__u32	cpt_rcv_saddr;
+		};
+
+		__u64		cpt_vfsmount_ref;
+	};
+
+
+	__u32			cpt_uc_ttl;
+	__u32			cpt_tos;
+
+	__u32			cpt_cmsg_flags;
+	__u32			cpt_mc_index;
+
+	__u32			cpt_mc_addr;
+/*
+	struct ip_options	*opt;
+ */
+	__u8			cpt_hdrincl;
+	__u8			cpt_mc_ttl;
+	__u8			cpt_mc_loop;
+	__u8			cpt_pmtudisc;
+
+	__u8			cpt_recverr;
+	__u8			cpt_freebind;
+	__u16			cpt_idcounter;
+	__u32			cpt_cork_flags;
+
+	__u32			cpt_cork_fragsize;
+	__u32			cpt_cork_length;
+	__u32			cpt_cork_addr;
+	__u32			cpt_cork_saddr;
+	__u32			cpt_cork_daddr;
+	__u32			cpt_cork_oif;
+
+	__u32			cpt_udp_pending;
+	__u32			cpt_udp_corkflag;
+	__u16			cpt_udp_encap;
+	__u16			cpt_udp_len;
+	__u32			__cpt_pad7;
+
+	__u64			cpt_saddr6[2];
+	__u64			cpt_rcv_saddr6[2];
+	__u64			cpt_daddr6[2];
+	__u32			cpt_flow_label6;
+	__u32			cpt_frag_size6;
+	__u32			cpt_hop_limit6;
+	__u32			cpt_mcast_hops6;
+
+	__u32			cpt_mcast_oif6;
+	__u8			cpt_rxopt6;
+	__u8			cpt_mc_loop6;
+	__u8			cpt_recverr6;
+	__u8			cpt_sndflow6;
+
+	__u8			cpt_pmtudisc6;
+	__u8			cpt_ipv6only6;
+	__u8			cpt_mapped;
+	__u8			__cpt_pad8;
+	__u32	cpt_pred_flags;
+
+	__u32	cpt_rcv_nxt;
+	__u32	cpt_snd_nxt;
+
+	__u32	cpt_snd_una;
+	__u32	cpt_snd_sml;
+
+	__u32	cpt_rcv_tstamp;
+	__u32	cpt_lsndtime;
+
+	__u8	cpt_tcp_header_len;
+	__u8	cpt_ack_pending;
+	__u8	cpt_quick;
+	__u8	cpt_pingpong;
+	__u8	cpt_blocked;
+	__u8	__cpt_pad9;
+	__u16	__cpt_pad10;
+
+	__u32	cpt_ato;
+	__u32	cpt_ack_timeout;
+
+	__u32	cpt_lrcvtime;
+	__u16	cpt_last_seg_size;
+	__u16	cpt_rcv_mss;
+
+	__u32	cpt_snd_wl1;
+	__u32	cpt_snd_wnd;
+
+	__u32	cpt_max_window;
+	__u32	cpt_pmtu_cookie;
+
+	__u32	cpt_mss_cache;
+	__u16	cpt_mss_cache_std;
+	__u16	cpt_mss_clamp;
+
+	__u16	cpt_ext_header_len;
+	__u16	cpt_ext2_header_len;
+	__u8	cpt_ca_state;
+	__u8	cpt_retransmits;
+	__u8	cpt_reordering;
+	__u8	cpt_frto_counter;
+
+	__u32	cpt_frto_highmark;
+	__u8	cpt_adv_cong;
+	__u8	cpt_defer_accept;
+	__u8	cpt_backoff;
+	__u8	__cpt_pad11;
+
+	__u32	cpt_srtt;
+	__u32	cpt_mdev;
+
+	__u32	cpt_mdev_max;
+	__u32	cpt_rttvar;
+
+	__u32	cpt_rtt_seq;
+	__u32	cpt_rto;
+
+	__u32	cpt_packets_out;
+	__u32	cpt_left_out;
+
+	__u32	cpt_retrans_out;
+ 	__u32	cpt_snd_ssthresh;
+
+ 	__u32	cpt_snd_cwnd;
+ 	__u16	cpt_snd_cwnd_cnt;
+	__u16	cpt_snd_cwnd_clamp;
+
+	__u32	cpt_snd_cwnd_used;
+	__u32	cpt_snd_cwnd_stamp;
+
+	__u32	cpt_timeout;
+	__u32	cpt_ka_timeout;
+
+ 	__u32	cpt_rcv_wnd;
+	__u32	cpt_rcv_wup;
+
+	__u32	cpt_write_seq;
+	__u32	cpt_pushed_seq;
+
+	__u32	cpt_copied_seq;
+	__u8	cpt_tstamp_ok;
+	__u8	cpt_wscale_ok;
+	__u8	cpt_sack_ok;
+	__u8	cpt_saw_tstamp;
+
+        __u8	cpt_snd_wscale;
+        __u8	cpt_rcv_wscale;
+	__u8	cpt_nonagle;
+	__u8	cpt_keepalive_probes;
+        __u32	cpt_rcv_tsval;
+
+        __u32	cpt_rcv_tsecr;
+        __u32	cpt_ts_recent;
+
+	__u64	cpt_ts_recent_stamp;
+	__u16	cpt_user_mss;
+	__u8	cpt_dsack;
+	__u8	cpt_eff_sacks;
+	__u32	cpt_sack_array[2*5];
+	__u32	cpt_window_clamp;
+
+	__u32	cpt_rcv_ssthresh;
+	__u8	cpt_probes_out;
+	__u8	cpt_num_sacks;
+	__u16	cpt_advmss;
+
+	__u8	cpt_syn_retries;
+	__u8	cpt_ecn_flags;
+	__u16	cpt_prior_ssthresh;
+	__u32	cpt_lost_out;
+
+	__u32   cpt_sacked_out;
+	__u32   cpt_fackets_out;
+
+	__u32   cpt_high_seq;
+	__u32	cpt_retrans_stamp;
+
+	__u32	cpt_undo_marker;
+	__u32	cpt_undo_retrans;
+
+	__u32	cpt_urg_seq;
+	__u16	cpt_urg_data;
+	__u8	cpt_pending;
+	__u8	cpt_urg_mode;
+
+	__u32	cpt_snd_up;
+	__u32	cpt_keepalive_time;
+
+	__u32   cpt_keepalive_intvl;
+	__u32   cpt_linger2;
+
+	__u32	cpt_rcvrtt_rtt;
+	__u32	cpt_rcvrtt_seq;
+
+	__u32	cpt_rcvrtt_time;
+	__u32	__cpt_pad12;
+
+	__u16	cpt_i_mode;
+	__u16	__cpt_pad13;
+	__u32	__cpt_pad14;
+} __attribute__ ((aligned (8)));
+
+struct cpt_sockmc_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u16	cpt_family;
+	__u16	cpt_mode;
+	__u32	cpt_ifindex;
+	__u32	cpt_mcaddr[4];
+} __attribute__ ((aligned (8)));
+/* Followed by array of source addresses, each zero padded to 16 bytes */
+
+struct cpt_openreq_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_rcv_isn;
+	__u32	cpt_snt_isn;
+
+	__u16	cpt_rmt_port;
+	__u16	cpt_mss;
+	__u8	cpt_family;
+	__u8	cpt_retrans;
+	__u8	cpt_snd_wscale;
+	__u8	cpt_rcv_wscale;
+
+	__u8	cpt_tstamp_ok;
+	__u8	cpt_sack_ok;
+	__u8	cpt_wscale_ok;
+	__u8	cpt_ecn_ok;
+	__u8	cpt_acked;
+	__u8	__cpt_pad1;
+	__u16	__cpt_pad2;
+
+	__u32	cpt_window_clamp;
+	__u32	cpt_rcv_wnd;
+	__u32	cpt_ts_recent;
+	__u32	cpt_iif;
+	__u64	cpt_expires;
+
+	__u64	cpt_loc_addr[2];
+	__u64	cpt_rmt_addr[2];
+/*
+	struct ip_options	*opt;
+ */
+	
+} __attribute__ ((aligned (8)));
+
+struct cpt_skb_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_owner;
+	__u32	cpt_queue;
+#define CPT_SKB_NQ	0
+#define CPT_SKB_RQ	1
+#define CPT_SKB_WQ	2
+#define CPT_SKB_OFOQ	3
+
+	__u64	cpt_stamp;
+	__u32	cpt_len;
+	__u32	cpt_hspace;
+	__u32	cpt_tspace;
+	__u32	cpt_h;
+	__u32	cpt_nh;
+	__u32	cpt_mac;
+	
+	__u64	cpt_cb[5];
+	__u32	cpt_mac_len;
+	__u32	cpt_csum;
+	__u8	cpt_local_df;
+	__u8	cpt_pkt_type;
+	__u8	cpt_ip_summed;
+	__u8	__cpt_pad1;
+	__u32	cpt_priority;
+	__u16	cpt_protocol;
+	__u16	cpt_security;
+	__u16	cpt_gso_segs;
+	__u16	cpt_gso_size;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_sysvshm_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_key;
+	__u64	cpt_uid;
+	__u64	cpt_gid;
+	__u64	cpt_cuid;
+	__u64	cpt_cgid;
+	__u64	cpt_mode;
+	__u64	cpt_seq;
+
+	__u32	cpt_id;
+	__u32	cpt_mlockuser;
+	__u64	cpt_segsz;
+	__u64	cpt_atime;
+	__u64	cpt_ctime;
+	__u64	cpt_dtime;
+	__u64	cpt_creator;
+	__u64	cpt_last;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_sysvsem_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_key;
+	__u64	cpt_uid;
+	__u64	cpt_gid;
+	__u64	cpt_cuid;
+	__u64	cpt_cgid;
+	__u64	cpt_mode;
+	__u64	cpt_seq;
+	__u32	cpt_id;
+	__u32	__cpt_pad1;
+
+	__u64	cpt_otime;
+	__u64	cpt_ctime;
+} __attribute__ ((aligned (8)));
+/* Content is array of pairs semval/sempid */
+
+struct cpt_sysvsem_undo_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_id;
+	__u32	cpt_nsem;
+} __attribute__ ((aligned (8)));
+
+struct cpt_sysvmsg_msg_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_type;
+	__u64	cpt_size;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_sysvmsg_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_key;
+	__u64	cpt_uid;
+	__u64	cpt_gid;
+	__u64	cpt_cuid;
+	__u64	cpt_cgid;
+	__u64	cpt_mode;
+	__u64	cpt_seq;
+	__u32	cpt_id;
+	__u32	__cpt_pad1;
+
+	__u64	cpt_stime;
+	__u64	cpt_rtime;
+	__u64	cpt_ctime;
+	__u64	cpt_last_sender;
+	__u64	cpt_last_receiver;
+	__u64	cpt_qbytes;
+} __attribute__ ((aligned (8)));
+/* Content is array of sysv msg */
+
+
+struct cpt_mm_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start_code;
+	__u64	cpt_end_code;
+	__u64	cpt_start_data;
+	__u64	cpt_end_data;
+	__u64	cpt_start_brk;
+	__u64	cpt_brk;
+	__u64	cpt_start_stack;
+	__u64	cpt_start_arg;
+	__u64	cpt_end_arg;
+	__u64	cpt_start_env;
+	__u64	cpt_end_env;
+	__u64	cpt_def_flags;
+	__u64	cpt_mmub;
+	__u8	cpt_dumpable;
+	__u8	cpt_vps_dumpable;
+	__u8	cpt_used_hugetlb;
+	__u8	__cpt_pad;
+	__u32	cpt_vdso;
+} __attribute__ ((aligned (8)));
+
+struct cpt_page_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+} __attribute__ ((aligned (8)));
+
+struct cpt_remappage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_pgoff;
+} __attribute__ ((aligned (8)));
+
+struct cpt_copypage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_source;
+} __attribute__ ((aligned (8)));
+
+struct cpt_lazypage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_index;
+} __attribute__ ((aligned (8)));
+
+struct cpt_iterpage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+} __attribute__ ((aligned (8)));
+/* Followed by array of PFNs */
+
+struct cpt_vma_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_type;
+#define CPT_VMA_TYPE_0		0
+#define CPT_VMA_TYPE_SHM	1
+#define CPT_VMA_VDSO		2
+	__u32	cpt_anonvma;
+	__u64	cpt_anonvmaid;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_flags;
+	__u64	cpt_pgprot;
+	__u64	cpt_pgoff;
+} __attribute__ ((aligned (8)));
+
+struct cpt_aio_ctx_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_max_reqs;
+	__u32	cpt_ring_pages;
+	__u32	cpt_tail;
+	__u32	cpt_nr;
+	__u64	cpt_mmap_base;
+	/* Data (io_event's) and struct aio_ring are stored in user space VM */
+} __attribute__ ((aligned (8)));
+
+
+/* Format of MM section.
+ *
+ * It is array of MM objects (mm_struct). Each MM object is
+ * header, encoding mm_struct, followed by array of VMA objects.
+ * Each VMA consists of VMA header, encoding vm_area_struct, and
+ * if the VMA contains copied pages, the header is followed by
+ * array of tuples start-end each followed by data.
+ *
+ * ATTN: no block/page alignment. Only 64bit alignment. This might be not good?
+ */
+
+struct cpt_restart_block {
+	__u64	fn;
+#define CPT_RBL_0			0
+#define CPT_RBL_NANOSLEEP		1
+#define CPT_RBL_COMPAT_NANOSLEEP	2
+	__u64	arg0;
+	__u64	arg1;
+	__u64	arg2;
+	__u64	arg3;
+} __attribute__ ((aligned (8)));
+
+struct cpt_siginfo_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_qflags;
+	__u32	cpt_signo;
+	__u32	cpt_errno;
+	__u32	cpt_code;
+
+	__u64	cpt_sigval;
+	__u32	cpt_pid;
+	__u32	cpt_uid;
+	__u64	cpt_utime;
+	__u64	cpt_stime;
+
+	__u64	cpt_user;
+} __attribute__ ((aligned (8)));
+
+/* Portable presentaions for segment registers */
+
+#define CPT_SEG_ZERO		0
+#define CPT_SEG_TLS1		1
+#define CPT_SEG_TLS2		2
+#define CPT_SEG_TLS3		3
+#define CPT_SEG_USER32_DS	4
+#define CPT_SEG_USER32_CS	5
+#define CPT_SEG_USER64_DS	6
+#define CPT_SEG_USER64_CS	7
+#define CPT_SEG_LDT		256
+
+struct cpt_x86_regs
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_debugreg[8];
+	__u32	cpt_fs;
+	__u32	cpt_gs;
+
+	__u32	cpt_ebx;
+	__u32	cpt_ecx;
+	__u32	cpt_edx;
+	__u32	cpt_esi;
+	__u32	cpt_edi;
+	__u32	cpt_ebp;
+	__u32	cpt_eax;
+	__u32	cpt_xds;
+	__u32	cpt_xes;
+	__u32	cpt_orig_eax;
+	__u32	cpt_eip;
+	__u32	cpt_xcs;
+	__u32	cpt_eflags;
+	__u32	cpt_esp;
+	__u32	cpt_xss;
+	__u32	cpt_pad;
+};
+
+struct cpt_x86_64_regs
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_debugreg[8];
+
+	__u64	cpt_fsbase;
+	__u64	cpt_gsbase;
+	__u32	cpt_fsindex;
+	__u32	cpt_gsindex;
+	__u32	cpt_ds;
+	__u32	cpt_es;
+
+	__u64	cpt_r15;
+	__u64	cpt_r14;
+	__u64	cpt_r13;
+	__u64	cpt_r12;
+	__u64	cpt_rbp;
+	__u64	cpt_rbx;
+	__u64	cpt_r11;
+	__u64	cpt_r10;	
+	__u64	cpt_r9;
+	__u64	cpt_r8;
+	__u64	cpt_rax;
+	__u64	cpt_rcx;
+	__u64	cpt_rdx;
+	__u64	cpt_rsi;
+	__u64	cpt_rdi;
+	__u64	cpt_orig_rax;
+	__u64	cpt_rip;
+	__u64	cpt_cs;
+	__u64	cpt_eflags;
+	__u64	cpt_rsp;
+	__u64	cpt_ss;
+};
+
+struct cpt_ia64_regs
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	gr[128];
+	__u64	fr[256];
+	__u64	br[8];
+	__u64	nat[2];
+
+	__u64	ar_bspstore;
+	__u64	num_regs;
+	__u64	loadrs;
+	__u64	ar_bsp;
+	__u64	ar_unat;
+	__u64	ar_pfs;
+	__u64	ar_ccv;
+	__u64	ar_fpsr;
+	__u64	ar_csd;
+	__u64	ar_ssd;
+	__u64	ar_ec;
+	__u64	ar_lc;
+	__u64	ar_rsc;
+	__u64	ar_rnat;
+
+	__u64	cr_iip;
+	__u64	cr_ipsr;
+
+	__u64	cfm;
+	__u64	pr;
+
+	__u64	ibr[8];
+	__u64	dbr[8];
+};
+
+
+struct cpt_task_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_state;
+	__u64	cpt_flags;
+#define CPT_TASK_FLAGS_MASK	(PF_EXITING | PF_DEAD | PF_FORKNOEXEC | \
+				 PF_SUPERPRIV | PF_DUMPCORE | PF_SIGNALED)
+	__u64	cpt_ptrace;
+#define CPT_PT_PTRACED		0x00000001
+#define CPT_PT_DTRACE		0x00000002	/* not used? */
+#define CPT_PT_TRACESYSGOOD	0x00000004
+#define CPT_PT_PTRACE_CAP	0x00000008
+#define CPT_PT_TRACE_FORK	0x00000010
+#define CPT_PT_TRACE_VFORK	0x00000020
+#define CPT_PT_TRACE_CLONE	0x00000040
+#define CPT_PT_TRACE_EXEC	0x00000080
+#define CPT_PT_TRACE_VFORK_DONE	0x00000100
+#define CPT_PT_TRACE_EXIT	0x00000200
+#define CPT_PT_ATTACHED		0x00000400
+	__u32	cpt_prio;
+	__u32	cpt_static_prio;
+	__u32	cpt_policy;
+	__u32	cpt_rt_priority;
+
+	/* struct thread_info */
+	__u64	cpt_exec_domain;
+	__u64	cpt_thrflags;
+	__u64	cpt_thrstatus;
+	__u64	cpt_addr_limit;
+
+	__u64	cpt_personality;
+
+	__u64	cpt_mm;
+	__u64	cpt_files;
+	__u64	cpt_fs;
+	__u64	cpt_signal;
+	__u64	cpt_sighand;
+	__u64	cpt_sigblocked;
+	__u64	cpt_sigrblocked;
+	__u64	cpt_sigpending;
+	__u64	cpt_namespace;
+	__u64	cpt_sysvsem_undo;
+	__u32	cpt_pid;
+	__u32	cpt_tgid;
+	__u32	cpt_ppid;
+	__u32	cpt_rppid;
+	__u32	cpt_pgrp;
+	__u32	cpt_session;
+	__u32	cpt_old_pgrp;
+	__u32	__cpt_pad;
+	__u32	cpt_leader;
+	__u8	cpt_pn_state;
+	__u8	cpt_stopped_state;
+	__u8	cpt_sigsuspend_state;
+	__u8	cpt_64bit;
+	__u64	cpt_set_tid;
+	__u64	cpt_clear_tid;
+	__u32	cpt_exit_code;
+	__u32	cpt_exit_signal;
+	__u32	cpt_pdeath_signal;
+	__u32	cpt_user;
+	__u32	cpt_uid;
+	__u32	cpt_euid;
+	__u32	cpt_suid;
+	__u32	cpt_fsuid;
+	__u32	cpt_gid;
+	__u32	cpt_egid;
+	__u32	cpt_sgid;
+	__u32	cpt_fsgid;
+	__u32	cpt_ngids;
+	__u32	cpt_gids[32];
+	__u8	cpt_prctl_uac;
+	__u8	cpt_prctl_fpemu;
+	__u16	__cpt_pad1;
+	__u64	cpt_ecap;
+	__u64	cpt_icap;
+	__u64	cpt_pcap;
+	__u8	cpt_comm[16];
+	__u64	cpt_tls[3];
+	struct cpt_restart_block cpt_restart;
+	__u64	cpt_it_real_value;	/* V8: jiffies, V9..: nsec */
+	__u64	cpt_it_real_incr;	/* V8: jiffies, V9..: nsec */
+	__u64	cpt_it_prof_value;
+	__u64	cpt_it_prof_incr;
+	__u64	cpt_it_virt_value;
+	__u64	cpt_it_virt_incr;
+
+	__u16	cpt_used_math;
+	__u8	cpt_keepcap;
+	__u8	cpt_did_exec;
+	__u32	cpt_ptrace_message;
+
+	__u64	cpt_utime;
+	__u64	cpt_stime;
+	__u64	cpt_starttime;		/* V8: jiffies, V9...: timespec */
+	__u64	cpt_nvcsw;
+	__u64	cpt_nivcsw;
+	__u64	cpt_min_flt;
+	__u64	cpt_maj_flt;
+
+	__u64	cpt_sigsuspend_blocked;
+	__u64	cpt_cutime, cpt_cstime;
+	__u64	cpt_cnvcsw, cpt_cnivcsw;
+	__u64	cpt_cmin_flt, cpt_cmaj_flt;
+
+#define CPT_RLIM_NLIMITS 16
+	__u64	cpt_rlim_cur[CPT_RLIM_NLIMITS];
+	__u64	cpt_rlim_max[CPT_RLIM_NLIMITS];
+
+	__u64	cpt_task_ub;
+	__u64	cpt_exec_ub;
+	__u64	cpt_mm_ub;
+	__u64	cpt_fork_sub;
+} __attribute__ ((aligned (8)));
+
+struct cpt_sigaltstack_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_stack;
+	__u32	cpt_stacksize;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+struct cpt_task_aux_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_robust_list;
+	__u64	__cpt_future[16];
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_signal_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_leader;
+	__u8	cpt_pgrp_type;
+	__u8	cpt_old_pgrp_type;
+	__u8	cpt_session_type;
+#define CPT_PGRP_NORMAL		0
+#define CPT_PGRP_ORPHAN		1
+#define CPT_PGRP_STRAY		2
+	__u8	__cpt_pad1;
+	__u64	cpt_pgrp;
+	__u64	cpt_old_pgrp;
+	__u64	cpt_session;
+	__u64	cpt_sigpending;
+	__u64	cpt_ctty;
+
+	__u32	cpt_curr_target;
+	__u32	cpt_group_exit;
+	__u32	cpt_group_exit_code;
+	__u32	cpt_group_exit_task;
+	__u32	cpt_notify_count;
+	__u32	cpt_group_stop_count;
+	__u32	cpt_stop_state;
+	__u32	__cpt_pad2;
+
+	__u64	cpt_utime, cpt_stime, cpt_cutime, cpt_cstime;
+	__u64	cpt_nvcsw, cpt_nivcsw, cpt_cnvcsw, cpt_cnivcsw;
+	__u64	cpt_min_flt, cpt_maj_flt, cpt_cmin_flt, cpt_cmaj_flt;
+
+	__u64	cpt_rlim_cur[CPT_RLIM_NLIMITS];
+	__u64	cpt_rlim_max[CPT_RLIM_NLIMITS];
+} __attribute__ ((aligned (8)));
+/* Followed by list of posix timers. */
+
+struct cpt_sighand_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+} __attribute__ ((aligned (8)));
+/* Followed by list of sighandles. */
+
+struct cpt_sighandler_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+	
+	__u32	cpt_signo;
+	__u32	__cpt_pad1;
+	__u64	cpt_handler;
+	__u64	cpt_restorer;
+	__u64	cpt_flags;
+	__u64	cpt_mask;
+} __attribute__ ((aligned (8)));
+
+struct cpt_netdev_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u32	cpt_flags;
+	__u8	cpt_name[16];
+} __attribute__ ((aligned (8)));
+
+struct cpt_tuntap_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_owner;
+	__u32	cpt_attached;
+	__u64	cpt_flags;
+	__u64	cpt_bindfile;
+	__u64	cpt_if_flags;
+	__u8	cpt_dev_addr[6];
+	__u16	cpt_pad;
+	__u32	cpt_chr_filter[2];
+	__u32	cpt_net_filter[2];
+} __attribute__ ((aligned (8)));
+
+struct cpt_veth_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_allow_mac_change;
+	__u32	__cpt_pad;
+} __attribute__ ((aligned (8)));
+
+struct cpt_tunnel_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_tnl_flags;
+#define CPT_TUNNEL_FBDEV	0x1
+#define CPT_TUNNEL_SIT		0x2
+#define CPT_TUNNEL_GRE		0x4
+	__u16	cpt_i_flags;
+	__u16	cpt_o_flags;
+	__u32	cpt_i_key;
+	__u32	cpt_o_key;
+	__u32	cpt_iphdr[5];
+	__u32	cpt_i_seqno;
+	__u32	cpt_o_seqno;
+} __attribute__ ((aligned (8)));
+
+struct cpt_hwaddr_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u8	cpt_dev_addr[32];
+} __attribute__ ((aligned (8)));
+
+struct cpt_netstats_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_rx_packets;
+	__u64	cpt_tx_packets;
+	__u64	cpt_rx_bytes;
+	__u64	cpt_tx_bytes;
+	__u64	cpt_rx_errors;
+	__u64	cpt_tx_errors;
+	__u64	cpt_rx_dropped;
+	__u64	cpt_tx_dropped;
+	__u64	cpt_multicast;
+	__u64	cpt_collisions;
+	__u64	cpt_rx_length_errors;
+	__u64	cpt_rx_over_errors;
+	__u64	cpt_rx_crc_errors;
+	__u64	cpt_rx_frame_errors;
+	__u64	cpt_rx_fifo_errors;
+	__u64	cpt_rx_missed_errors;
+	__u64	cpt_tx_aborted_errors;
+	__u64	cpt_tx_carrier_errors;
+	__u64	cpt_tx_fifo_errors;
+	__u64	cpt_tx_heartbeat_errors;
+	__u64	cpt_tx_window_errors;
+	__u64	cpt_rx_compressed;
+	__u64	cpt_tx_compressed;
+	__u64	pad[4];
+} __attribute__ ((aligned (8)));
+
+struct cpt_ifaddr_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u8	cpt_family;
+	__u8	cpt_masklen;
+	__u8	cpt_flags;
+	__u8	cpt_scope;
+	__u32	cpt_address[4];
+	__u32	cpt_peer[4];
+	__u32	cpt_broadcast[4];
+	__u8	cpt_label[16];
+	__u32	cpt_valid_lft;
+	__u32	cpt_prefered_lft;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ipct_tuple
+{
+	__u32	cpt_src;
+	__u16	cpt_srcport;
+	__u16	__cpt_pad1;
+
+	__u32	cpt_dst;
+	__u16	cpt_dstport;
+	__u8	cpt_protonum;
+	__u8	cpt_dir;	/* TEMPORARY HACK TO VALIDATE CODE */
+} __attribute__ ((aligned (8)));
+
+struct cpt_nat_manip
+{
+	__u8	cpt_direction;
+	__u8	cpt_hooknum;
+	__u8	cpt_maniptype;
+	__u8	__cpt_pad1;
+
+	__u32	cpt_manip_addr;
+	__u16	cpt_manip_port;
+	__u16	__cpt_pad2;
+	__u32	__cpt_pad3;
+} __attribute__ ((aligned (8)));
+
+struct cpt_nat_seq
+{
+	__u32	cpt_correction_pos;
+	__u32	cpt_offset_before;
+	__u32	cpt_offset_after;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ip_connexpect_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_timeout;
+	__u32	cpt_sibling_conntrack;	/* Index of child conntrack */
+	__u32	cpt_seq;		/* id in 2.6.15 */
+
+	struct cpt_ipct_tuple	cpt_ct_tuple;	/* NU 2.6.15 */
+	struct cpt_ipct_tuple	cpt_tuple;
+	struct cpt_ipct_tuple	cpt_mask;
+
+	/* union ip_conntrack_expect_help. Used by ftp, irc, amanda */
+	__u32	cpt_help[3];			/* NU 2.6.15 */
+	__u16	cpt_manip_proto;
+	__u8	cpt_dir;
+	__u8	cpt_flags;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ip_conntrack_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	struct cpt_ipct_tuple cpt_tuple[2];
+	__u64	cpt_status;
+	__u64	cpt_timeout;
+	__u32	cpt_index;
+	__u8	cpt_ct_helper;
+	__u8	cpt_nat_helper;
+	__u16	cpt_pad1;
+
+	/* union ip_conntrack_proto. Used by tcp and icmp. */
+	__u32	cpt_proto_data[12];
+
+	/* union ip_conntrack_help. Used by ftp and pptp helper.
+	 * We do not support pptp...
+	 */
+	__u32	cpt_help_data[6];
+
+	/* nat info */
+	__u32	cpt_initialized;	/* NU 2.6.15 */
+	__u32	cpt_num_manips;		/* NU 2.6.15 */
+	struct  cpt_nat_manip	cpt_nat_manips[6];	/* NU 2.6.15 */
+
+	struct	cpt_nat_seq	cpt_nat_seq[2];
+
+	__u32	cpt_masq_index;
+	__u32	cpt_id;
+	__u32	cpt_mark;
+} __attribute__ ((aligned (8)));
+
+/* cpt_ip_conntrack_image struct from 2.6.9 kernel */
+struct cpt_ip_conntrack_image_compat
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	struct cpt_ipct_tuple cpt_tuple[2];
+	__u64	cpt_status;
+	__u64	cpt_timeout;
+	__u32	cpt_index;
+	__u8	cpt_ct_helper;
+	__u8	cpt_nat_helper;
+	__u16	__cpt_pad1;
+
+	/* union ip_conntrack_proto. Used by tcp and icmp. */
+	__u32	cpt_proto_data[12];
+
+	/* union ip_conntrack_help. Used only by ftp helper. */
+	__u32	cpt_help_data[4];
+
+	/* nat info */
+	__u32	cpt_initialized;
+	__u32	cpt_num_manips;
+	struct  cpt_nat_manip	cpt_nat_manips[6];
+
+	struct	cpt_nat_seq	cpt_nat_seq[2];
+
+	__u32	cpt_masq_index;
+	__u32	__cpt_pad2;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ubparm
+{
+	__u64	barrier;
+	__u64	limit;
+	__u64	held;
+	__u64	maxheld;
+	__u64	minheld;
+	__u64	failcnt;
+} __attribute__ ((aligned (8)));
+
+struct cpt_beancounter_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_parent;
+	__u32	cpt_id;
+	__u32   cpt_ub_resources;
+	struct	cpt_ubparm	cpt_parms[32 * 2];
+} __attribute__ ((aligned (8)));
+
+struct cpt_slm_sgreg_image {
+	__u64   cpt_next;
+	__u32   cpt_object;
+	__u16   cpt_hdrlen;
+	__u16   cpt_content;
+
+	__u32   cpt_size;
+	__u32   __cpt_pad1;
+	__u32   cpt_id;
+	__u16   cpt_resource;
+	__u8    cpt_regname[32];
+	__u8	__cpt_pad2[2];
+} __attribute__ ((aligned (8)));
+
+struct cpt_slm_obj_image {      
+	__u64   cpt_next;
+	__u32   cpt_object;
+	__u16   cpt_hdrlen;
+	__u16   cpt_content;
+
+	__u32   cpt_size;
+	__u32   __cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+#ifdef __KERNEL__
+
+static inline void __user * cpt_ptr_import(__u64 ptr)
+{
+	return (void*)(unsigned long)ptr;
+}
+
+static inline __u64 cpt_ptr_export(void __user *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+static inline void cpt_sigset_import(sigset_t *sig, __u64 ptr)
+{
+	memcpy(sig, &ptr, sizeof(*sig));
+}
+
+static inline __u64 cpt_sigset_export(sigset_t *sig)
+{
+	return *(__u64*)sig;
+}
+
+static inline __u64 cpt_timespec_export(struct timespec *tv)
+{
+	return (((u64)tv->tv_sec) << 32) + tv->tv_nsec;
+}
+
+static inline void cpt_timespec_import(struct timespec *tv, __u64 val)
+{
+	tv->tv_sec = val>>32;
+	tv->tv_nsec = (val&0xFFFFFFFF);
+}
+
+static inline __u64 cpt_timeval_export(struct timeval *tv)
+{
+	return (((u64)tv->tv_sec) << 32) + tv->tv_usec;
+}
+
+static inline void cpt_timeval_import(struct timeval *tv, __u64 val)
+{
+	tv->tv_sec = val>>32;
+	tv->tv_usec = (val&0xFFFFFFFF);
+}
+
+#endif
+
+#endif /* __CPT_IMAGE_H_ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/cpt_ioctl.h kernel-2.6.18-417.el5-028stab121/include/linux/cpt_ioctl.h
--- kernel-2.6.18-417.el5.orig/include/linux/cpt_ioctl.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/cpt_ioctl.h	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,52 @@
+/*
+ *
+ *  include/linux/cpt_ioctl.h
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _CPT_IOCTL_H_
+#define _CPT_IOCTL_H_ 1
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define CPTCTLTYPE '-'
+#define CPT_SET_DUMPFD	_IOW(CPTCTLTYPE, 1, int)
+#define CPT_SET_STATUSFD _IOW(CPTCTLTYPE, 2, int)
+#define CPT_SET_LOCKFD	_IOW(CPTCTLTYPE, 3, int)
+#define CPT_SET_VEID	_IOW(CPTCTLTYPE, 4, int)
+#define CPT_SUSPEND	_IO(CPTCTLTYPE, 5)
+#define CPT_DUMP	_IO(CPTCTLTYPE, 6)
+#define CPT_UNDUMP	_IO(CPTCTLTYPE, 7)
+#define CPT_RESUME	_IO(CPTCTLTYPE, 8)
+#define CPT_KILL	_IO(CPTCTLTYPE, 9)
+#define CPT_JOIN_CONTEXT _IO(CPTCTLTYPE, 10)
+#define CPT_GET_CONTEXT _IOW(CPTCTLTYPE, 11, unsigned int)
+#define CPT_PUT_CONTEXT _IO(CPTCTLTYPE, 12)
+#define CPT_SET_PAGEINFDIN _IOW(CPTCTLTYPE, 13, int)
+#define CPT_SET_PAGEINFDOUT _IOW(CPTCTLTYPE, 14, int)
+#define CPT_PAGEIND	_IO(CPTCTLTYPE, 15)
+#define CPT_VMPREP	_IOW(CPTCTLTYPE, 16, int)
+#define CPT_SET_LAZY	_IOW(CPTCTLTYPE, 17, int)
+#define CPT_SET_CPU_FLAGS _IOW(CPTCTLTYPE, 18, unsigned int)
+#define CPT_TEST_CAPS	_IOW(CPTCTLTYPE, 19, unsigned int)
+#define CPT_TEST_VECAPS	_IOW(CPTCTLTYPE, 20, unsigned int)
+#define CPT_SET_ERRORFD _IOW(CPTCTLTYPE, 21, int)
+
+#define CPT_ITER	_IOW(CPTCTLTYPE, 23, int)
+#define CPT_LINKDIR_ADD	_IOW(CPTCTLTYPE, 24, int)
+#define CPT_HARDLNK_ON	_IOW(CPTCTLTYPE, 25, int)
+
+#define CPT_TEST_VERSION _IOW(CPTCTLTYPE, 26, int)
+
+#ifdef __KERNEL__
+extern void cpt_wake_ve(void);
+extern int  cpt_freeze_ve(struct ve_struct *);
+#endif
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/cpu.h kernel-2.6.18-417.el5-028stab121/include/linux/cpu.h
--- kernel-2.6.18-417.el5.orig/include/linux/cpu.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/cpu.h	2017-01-13 08:40:15.000000000 -0500
@@ -44,10 +44,20 @@ struct notifier_block;
 
 #ifdef CONFIG_SMP
 /* Need to know about CPUs going up/down? */
-extern int register_cpu_notifier(struct notifier_block *nb);
 #ifdef CONFIG_HOTPLUG_CPU
+extern int register_cpu_notifier(struct notifier_block *nb);
 extern void unregister_cpu_notifier(struct notifier_block *nb);
 #else
+
+#ifndef MODULE
+extern int register_cpu_notifier(struct notifier_block *nb);
+#else
+static inline int register_cpu_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+#endif
+
 static inline void unregister_cpu_notifier(struct notifier_block *nb)
 {
 }
@@ -80,17 +90,13 @@ extern void unlock_cpu_hotplug(void);
 #define register_hotcpu_notifier(nb)	register_cpu_notifier(nb)
 #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
 int cpu_down(unsigned int cpu);
-#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
 #else
 #define lock_cpu_hotplug()	do { } while (0)
 #define unlock_cpu_hotplug()	do { } while (0)
 #define lock_cpu_hotplug_interruptible() 0
 #define hotcpu_notifier(fn, pri)	do { } while (0)
-#define register_hotcpu_notifier(nb)	do { } while (0)
+#define register_hotcpu_notifier(nb)	(0)
 #define unregister_hotcpu_notifier(nb)	do { } while (0)
-
-/* CPUs don't go offline once they're online w/o CONFIG_HOTPLUG_CPU */
-static inline int cpu_is_offline(int cpu) { return 0; }
 #endif
 
 #ifdef CONFIG_SUSPEND_SMP
diff -upr kernel-2.6.18-417.el5.orig/include/linux/cpumask.h kernel-2.6.18-417.el5-028stab121/include/linux/cpumask.h
--- kernel-2.6.18-417.el5.orig/include/linux/cpumask.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/cpumask.h	2017-01-13 08:40:15.000000000 -0500
@@ -397,6 +397,8 @@ extern cpumask_t cpu_present_map;
 #define cpu_present(cpu)	((cpu) == 0)
 #endif
 
+#define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))
+
 #ifdef CONFIG_SMP
 int highest_possible_processor_id(void);
 #define any_online_cpu(mask) __any_online_cpu(&(mask))
diff -upr kernel-2.6.18-417.el5.orig/include/linux/dcache.h kernel-2.6.18-417.el5-028stab121/include/linux/dcache.h
--- kernel-2.6.18-417.el5.orig/include/linux/dcache.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/dcache.h	2017-01-13 08:40:23.000000000 -0500
@@ -9,6 +9,8 @@
 #include <linux/cache.h>
 #include <linux/rcupdate.h>
 
+#include <ub/ub_dcache.h>
+
 struct nameidata;
 struct vfsmount;
 
@@ -116,6 +118,9 @@ struct dentry {
 #define DMANAGED_MOUNTPOINT	0x0fffffff /* mountpoint count */
 #define DMANAGED_AUTOMOUNT	0x10000000 /* handle automount flag */
 #define DMANAGED_TRANSIT	0x20000000 /* managed transit */
+#ifdef CONFIG_USER_RESOURCE
+	struct dentry_beancounter dentry_bc;
+#endif
 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
 };
 
@@ -189,9 +194,15 @@ d_manage:	no		no		no	 yes
 
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
 #define DCACHE_UNHASHED		0x0010	
+#define DCACHE_VIRTUAL		0x0100	/* ve accessible */
+#define DCACHE_LOCALCACHE	0x0200	/* when FS is distributed, this dentry
+					 * is local only */
+
+extern void mark_tree_virtual(struct vfsmount *m, struct dentry *d);
 
 #define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched */
 
+extern kmem_cache_t *dentry_cache;
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
@@ -317,6 +328,7 @@ char * __d_path( struct dentry *dentry, 
 		 struct dentry *root, struct vfsmount *rootmnt,
 		 char *buffer, int buflen);
 
+extern int d_root_check(struct dentry *, struct vfsmount *);
 extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
 
 /* Allocation counts.. */
@@ -337,6 +349,12 @@ extern char * d_path(struct dentry *, st
 static inline struct dentry *dget(struct dentry *dentry)
 {
 	if (dentry) {
+#ifdef CONFIG_USER_RESOURCE
+		preempt_disable();
+		if (ub_dentry_on && ub_dget_testone(dentry))
+			BUG();
+		preempt_enable_no_resched();
+#endif
 		BUG_ON(!atomic_read(&dentry->d_count));
 		atomic_inc(&dentry->d_count);
 	}
@@ -385,6 +403,8 @@ extern struct dentry *lookup_create(stru
 
 extern int sysctl_vfs_cache_pressure;
 
+extern int check_area_access_ve(struct dentry *, struct vfsmount *);
+extern int check_area_execute_ve(struct dentry *, struct vfsmount *);
 #endif /* __KERNEL__ */
 
 #endif	/* __LINUX_DCACHE_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/device.h kernel-2.6.18-417.el5-028stab121/include/linux/device.h
--- kernel-2.6.18-417.el5.orig/include/linux/device.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/device.h	2017-01-13 08:40:22.000000000 -0500
@@ -269,6 +269,7 @@ class_set_devdata (struct class_device *
 
 extern int class_device_register(struct class_device *);
 extern void class_device_unregister(struct class_device *);
+extern void class_device_virtualize(struct class_device *);
 extern void class_device_initialize(struct class_device *);
 extern int class_device_add(struct class_device *);
 extern void class_device_del(struct class_device *);
@@ -308,6 +309,8 @@ extern struct class_device *class_device
 					__attribute__((format(printf,5,6)));
 extern void class_device_destroy(struct class *cls, dev_t devt);
 
+extern struct class net_class;
+
 
 /* interface for exporting device attributes */
 struct device_attribute {
diff -upr kernel-2.6.18-417.el5.orig/include/linux/devpts_fs.h kernel-2.6.18-417.el5-028stab121/include/linux/devpts_fs.h
--- kernel-2.6.18-417.el5.orig/include/linux/devpts_fs.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/devpts_fs.h	2017-01-13 08:40:20.000000000 -0500
@@ -21,6 +21,16 @@ int devpts_pty_new(struct tty_struct *tt
 struct tty_struct *devpts_get_tty(int number);	 /* get tty structure */
 void devpts_pty_kill(int number);		 /* unlink */
 
+struct devpts_config {
+	int setuid;
+	int setgid;
+	uid_t   uid;
+	gid_t   gid;
+	umode_t mode;
+};
+
+extern struct devpts_config devpts_config;
+extern struct file_system_type devpts_fs_type;
 #else
 
 /* Dummy stubs in the no-pty case */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/elevator.h kernel-2.6.18-417.el5-028stab121/include/linux/elevator.h
--- kernel-2.6.18-417.el5.orig/include/linux/elevator.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/elevator.h	2017-01-13 08:40:18.000000000 -0500
@@ -8,6 +8,8 @@ typedef void (elevator_merge_req_fn) (re
 
 typedef void (elevator_merged_fn) (request_queue_t *, struct request *);
 
+typedef int (elevator_allow_merge_fn) (request_queue_t *, struct request *, struct bio *);
+
 typedef int (elevator_dispatch_fn) (request_queue_t *, int);
 
 typedef void (elevator_add_req_fn) (request_queue_t *, struct request *);
@@ -29,6 +31,7 @@ struct elevator_ops
 	elevator_merge_fn *elevator_merge_fn;
 	elevator_merged_fn *elevator_merged_fn;
 	elevator_merge_req_fn *elevator_merge_req_fn;
+	elevator_allow_merge_fn *elevator_allow_merge_fn;
 
 	elevator_dispatch_fn *elevator_dispatch_fn;
 	elevator_add_req_fn *elevator_add_req_fn;
diff -upr kernel-2.6.18-417.el5.orig/include/linux/elfcore.h kernel-2.6.18-417.el5-028stab121/include/linux/elfcore.h
--- kernel-2.6.18-417.el5.orig/include/linux/elfcore.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/elfcore.h	2017-01-13 08:40:24.000000000 -0500
@@ -7,6 +7,8 @@
 #include <linux/user.h>
 #include <linux/ptrace.h>
 
+extern int sysctl_at_vsyscall;
+
 struct elf_siginfo
 {
 	int	si_signo;			/* signal number */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/eventpoll.h kernel-2.6.18-417.el5-028stab121/include/linux/eventpoll.h
--- kernel-2.6.18-417.el5.orig/include/linux/eventpoll.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/eventpoll.h	2017-01-13 08:40:40.000000000 -0500
@@ -14,8 +14,12 @@
 #ifndef _LINUX_EVENTPOLL_H
 #define _LINUX_EVENTPOLL_H
 
+/* For O_CLOEXEC */
+#include <linux/fcntl.h>
 #include <linux/types.h>
 
+/* Flags for epoll_create1.  */
+#define EPOLL_CLOEXEC O_CLOEXEC
 
 /* Valid opcodes to issue to sys_epoll_ctl() */
 #define EPOLL_CTL_ADD 1
@@ -58,6 +62,98 @@ static inline void eventpoll_init_file(s
 	spin_lock_init(&file->f_ep_lock);
 }
 
+struct epoll_filefd {
+	struct file *file;
+	int fd;
+	int added;
+};
+
+/*
+ * This structure is stored inside the "private_data" member of the file
+ * structure and rapresent the main data sructure for the eventpoll
+ * interface.
+ */
+struct eventpoll {
+	/* Protect the this structure access */
+	rwlock_t lock;
+
+	/*
+	 * This semaphore is used to ensure that files are not removed
+	 * while epoll is using them. This is read-held during the event
+	 * collection loop and it is write-held during the file cleanup
+	 * path, the epoll file exit code and the ctl operations.
+	 */
+	struct rw_semaphore sem;
+
+	/* Wait queue used by sys_epoll_wait() */
+	wait_queue_head_t wq;
+
+	/* Wait queue used by file->poll() */
+	wait_queue_head_t poll_wait;
+
+	/* List of ready file descriptors */
+	struct list_head rdllist;
+
+	/* RB-Tree root used to store monitored fd structs */
+	struct rb_root rbr;
+
+	struct file *file;
+
+	/* used to optimize loop detection check */
+	int visited;
+	struct list_head visitedllink;
+};
+
+/*
+ * Each file descriptor added to the eventpoll interface will
+ * have an entry of this type linked to the hash.
+ */
+struct epitem {
+	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
+	struct rb_node rbn;
+
+	/* List header used to link this structure to the eventpoll ready list */
+	struct list_head rdllink;
+
+	/* The file descriptor information this item refers to */
+	struct epoll_filefd ffd;
+
+	/* Number of active wait queue attached to poll operations */
+	int nwait;
+
+	/* List containing poll wait queues */
+	struct list_head pwqlist;
+
+	/* The "container" of this item */
+	struct eventpoll *ep;
+
+	/* The structure that describe the interested events and the source fd */
+	struct epoll_event event;
+
+	/*
+	 * Used to keep track of the usage count of the structure. This avoids
+	 * that the structure will desappear from underneath our processing.
+	 */
+	atomic_t usecnt;
+
+	/* List header used to link this item to the "struct file" items list */
+	struct list_head fllink;
+
+	/* List header used to link the item to the transfer list */
+	struct list_head txlink;
+
+	/*
+	 * This is used during the collection/transfer of events to userspace
+	 * to pin items empty events set.
+	 */
+	unsigned int revents;
+};
+
+extern struct semaphore epsem;
+struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+		     struct file *tfile, int fd);
+void ep_release_epitem(struct epitem *epi);
 
 /* Used to release the epoll bits inside the "struct file" */
 void eventpoll_release_file(struct file *file);
@@ -90,6 +186,8 @@ static inline void eventpoll_release(str
 	eventpoll_release_file(file);
 }
 
+extern struct mutex epmutex;
+
 #else
 
 static inline void eventpoll_init_file(struct file *file) {}
diff -upr kernel-2.6.18-417.el5.orig/include/linux/fairsched.h kernel-2.6.18-417.el5-028stab121/include/linux/fairsched.h
--- kernel-2.6.18-417.el5.orig/include/linux/fairsched.h	2017-01-13 08:40:28.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/fairsched.h	2017-01-13 08:40:28.000000000 -0500
@@ -0,0 +1,141 @@
+#ifndef __LINUX_FAIRSCHED_H__
+#define __LINUX_FAIRSCHED_H__
+
+/*
+ * Fair Scheduler
+ *
+ * Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#define FAIRSCHED_SET_RATE	0
+#define FAIRSCHED_DROP_RATE	1
+#define FAIRSCHED_GET_RATE	2
+
+#ifdef __KERNEL__
+#include <linux/cache.h>
+#include <asm/timex.h>
+
+#define FAIRSCHED_HAS_CPU_BINDING	0
+
+typedef struct { cycles_t t; } fschtag_t;
+typedef struct { unsigned long d; } fschdur_t;
+typedef struct { cycles_t v; } fschvalue_t;
+
+struct vcpu_scheduler;
+
+struct fairsched_node {
+	struct list_head runlist;
+
+	/*
+	 * Fair Scheduler fields
+	 *
+	 * nr_running >= nr_ready (!= if delayed)
+	 */
+	fschtag_t start_tag;
+	int nr_ready;
+	int nr_runnable;
+	int nr_pcpu;
+	int vcpus;
+
+	/*
+	 * Rate limitator fields
+	 */
+	cycles_t last_updated_at;
+	fschvalue_t value;	/* leaky function value */
+	cycles_t delay;		/* removed from schedule till */
+	unsigned char delayed;
+
+	/*
+	 * Configuration
+	 *
+	 * Read-only most of the time.
+	 */
+	unsigned weight ____cacheline_aligned_in_smp;
+				/* fairness weight */
+	unsigned char rate_limited;
+	unsigned rate;		/* max CPU share */
+	fschtag_t max_latency;
+	unsigned min_weight;
+
+	struct list_head nodelist;
+	int id;
+#ifdef CONFIG_VE
+	struct ve_struct *owner_env;
+#endif
+	struct vcpu_scheduler *vsched;
+};
+
+#define for_each_fairsched_node(n)	\
+	list_for_each_entry((n), &fairsched_node_head, nodelist)
+
+#ifdef CONFIG_FAIRSCHED
+
+#define FSCHWEIGHT_MAX			((1 << 16) - 1)
+#define FSCHRATE_SHIFT			10
+/* 
+ * Fairsched timeslice value (in msecs) specifies maximum possible time a 
+ * node can be running continuously without rescheduling, in other words
+ * main linux scheduler must call fairsched_scheduler() during 
+ * FSCH_TIMESLICE msecs or fairscheduler logic will be broken.
+ *
+ * Should be bigger for better performance, and smaller for good interactivity.
+ */
+#define FSCH_TIMESLICE			16
+
+/*
+ * Fairsched nodes used in boot process.
+ */
+extern struct fairsched_node fairsched_init_node;
+extern struct fairsched_node fairsched_idle_node;
+
+/*
+ * For proc output.
+ */
+extern unsigned fairsched_nr_cpus;
+extern void fairsched_cpu_online_map(int id, cpumask_t *mask);
+
+/* I hope vsched_id is always equal to fairsched node id  --SAW */
+#define task_fairsched_node_id(p)	task_vsched_id(p)
+
+/*
+ * Core functions.
+ */
+extern void fairsched_incrun(struct fairsched_node *node);
+extern void fairsched_decrun(struct fairsched_node *node);
+extern void fairsched_inccpu(struct fairsched_node *node);
+extern void fairsched_deccpu(struct fairsched_node *node);
+
+extern struct fairsched_node *fairsched_first(struct fairsched_node *cur_node,
+		cycles_t time);
+extern struct fairsched_node *fairsched_next(struct fairsched_node *cur_node,
+		struct fairsched_node *node);
+extern void fairsched_switch(struct fairsched_node *prev_node,
+		struct fairsched_node *next_node, cycles_t time);
+
+/*
+ * Management functions.
+ */
+void fairsched_init_early(void);
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid);
+int fairsched_new_node(int id, unsigned int vcpus);
+void fairsched_drop_node(int id);
+
+#else /* CONFIG_FAIRSCHED */
+
+#define task_fairsched_node_id(p)	0
+#define fairsched_incrun(p)		do { } while (0)
+#define fairsched_decrun(p)		do { } while (0)
+#define fairsched_inccpu(p)		do { } while (0)
+#define fairsched_deccpu(p)		do { } while (0)
+#define fairsched_cpu_online_map(id, mask)      do { *(mask) = cpu_online_map; } while (0)
+#define fairsched_new_node(id, vcpud)	0
+#define fairsched_drop_node(id)		do { } while (0)
+
+#endif /* CONFIG_FAIRSCHED */
+#endif /* __KERNEL__ */
+
+#endif /* __LINUX_FAIRSCHED_H__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/faudit.h kernel-2.6.18-417.el5-028stab121/include/linux/faudit.h
--- kernel-2.6.18-417.el5.orig/include/linux/faudit.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/faudit.h	2017-01-13 08:40:16.000000000 -0500
@@ -0,0 +1,46 @@
+/*
+ *  include/linux/faudit.h
+ *
+ *  Copyright (C) 2005  SWSoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __FAUDIT_H_
+#define __FAUDIT_H_
+
+#include <linux/config.h>
+#include <linux/virtinfo.h>
+
+struct vfsmount;
+struct dentry;
+struct super_block;
+struct kstatfs;
+struct kstat;
+struct pt_regs;
+
+struct faudit_regs_arg {
+	int err;
+	struct pt_regs *regs;
+};
+
+struct faudit_stat_arg {
+	int err;
+	struct vfsmount *mnt;
+	struct dentry *dentry;
+	struct kstat *stat;
+};
+
+struct faudit_statfs_arg {
+	int err;
+	struct super_block *sb;
+	struct kstatfs *stat;
+};
+
+#define VIRTINFO_FAUDIT			(0)
+#define VIRTINFO_FAUDIT_STAT		(VIRTINFO_FAUDIT + 0)
+#define VIRTINFO_FAUDIT_STATFS		(VIRTINFO_FAUDIT + 1)
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/file.h kernel-2.6.18-417.el5-028stab121/include/linux/file.h
--- kernel-2.6.18-417.el5.orig/include/linux/file.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/file.h	2017-01-13 08:40:40.000000000 -0500
@@ -78,7 +78,9 @@ extern struct file * FASTCALL(fget_light
 extern void FASTCALL(set_close_on_exec(unsigned int fd, int flag));
 extern void put_filp(struct file *);
 extern int get_unused_fd(void);
+extern int get_unused_fd_flags(int flags);
 extern void FASTCALL(put_unused_fd(unsigned int fd));
+struct file *get_task_file(pid_t pid, int fd);
 struct kmem_cache;
 
 extern struct file ** alloc_fd_array(int);
diff -upr kernel-2.6.18-417.el5.orig/include/linux/fs.h kernel-2.6.18-417.el5-028stab121/include/linux/fs.h
--- kernel-2.6.18-417.el5.orig/include/linux/fs.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/fs.h	2017-01-13 08:40:40.000000000 -0500
@@ -48,6 +48,9 @@ struct inodes_stat_t {
 extern struct inodes_stat_t inodes_stat;
 
 extern int leases_enable, lease_break_time;
+extern int odirect_enable;
+extern int snapapi_enable;
+extern int lsyscall_enable;
 
 #ifdef CONFIG_DNOTIFY
 extern int dir_notify_enable;
@@ -67,6 +70,7 @@ extern int dir_notify_enable;
 #define FMODE_LSEEK	4
 #define FMODE_PREAD	8
 #define FMODE_PWRITE	FMODE_PREAD	/* These go hand in hand */
+#define FMODE_QUOTACTL	4
 
 /* File is being opened for execution. Primary users of this flag are
    distributed filesystems that can use it to achieve correct ETXTBUSY
@@ -102,6 +106,9 @@ extern int dir_notify_enable;
 #define FS_HAS_TRYTOFREE 32   /* Safe to check for ->bdev_try_to_free... */
 #define FS_HAS_GETRESV 64     /* Safe to check for ->get_reserved_space */
 #define FS_HAS_IODONE2 128    /* dio->io_done is type dio_iodone2_t */
+#define FS_VIRTUALIZED	256	/* Can mount this fstype inside ve */
+#define FS_NFS_EXPORTABLE 512
+#define FS_MANGLE_PROC	1024	/* hide some /proc/mounts info inside VE */
 #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move()
 					 * during rename() internally.
@@ -118,6 +125,7 @@ extern int dir_notify_enable;
 #define MS_REMOUNT	32	/* Alter flags of a mounted FS */
 #define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
 #define MS_DIRSYNC	128	/* Directory modifications are synchronous */
+#define MS_CPTMOUNT	256
 #define MS_NOATIME	1024	/* Do not update access times. */
 #define MS_NODIRATIME	2048	/* Do not update directory access times */
 #define MS_BIND		4096
@@ -134,8 +142,9 @@ extern int dir_notify_enable;
 #define MS_NO_LEASES	(1<<21)	/* fs does not support leases */
 #define MS_HAS_SETLEASE        (1<<22) /* fs supports setlease fop */
 #define MS_I_VERSION	(1<<23)	/* Update inode I_version field */
-#define MS_HAS_NEW_AOPS	(1<<24) /* fs supports new aops */
+#define MS_STRICTATIME	(1<<24)	/* Always perform atime updates */
 #define MS_HAS_LAUNDER_PAGE	(1<<25) /* fs supports launder_page */
+#define MS_HAS_NEW_AOPS	(1<<26) /* fs supports new aops */
 #define MS_ACTIVE	(1<<30)
 #define MS_NOUSER	(1<<31)
 
@@ -167,6 +176,9 @@ extern int dir_notify_enable;
 /* RHEL only flags -- These are not upstream! */
 #define S_NOATTRKILL	65536	/* don't convert ATTR_KILL_* to mode change */
 
+/* VZ flags -- These are not upstream! */
+#define S_NOUNUSE	(1 << 17) /* just destroy inode in cleanup */
+
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
  * flags just means all the inodes inherit those flags by default. It might be
@@ -389,6 +401,9 @@ struct iattr {
  * Includes for diskquotas.
  */
 #include <linux/quota.h>
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+#include <linux/vzquota_qlnk.h>
+#endif
 
 /** 
  * enum positive_aop_returns - aop return codes with specific semantics
@@ -543,6 +558,7 @@ struct address_space {
 	spinlock_t		private_lock;	/* for use by the address_space */
 	struct list_head	private_list;	/* ditto */
 	struct address_space	*assoc_mapping;	/* ditto */
+	struct user_beancounter *dirtied_ub;
 } __attribute__((aligned(sizeof(long))));
 	/*
 	 * On most architectures that alignment is already the case; but
@@ -663,6 +679,9 @@ struct inode {
 #ifdef CONFIG_QUOTA
 	struct dquot		*i_dquot[MAXQUOTAS];
 #endif
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+	struct vz_quota_ilink	i_qlnk;
+#endif
 	struct list_head	i_devices;
 	union {
 		struct pipe_inode_info	*i_pipe;
@@ -716,6 +735,8 @@ enum inode_i_mutex_lock_class
 	I_MUTEX_QUOTA
 };
 
+extern kmem_cache_t *inode_cachep;
+
 /*
  * NOTE: in a 32bit arch with a preemptable kernel and
  * an UP compile the i_size_read/write must be atomic
@@ -803,6 +824,11 @@ struct file_ra_state {
 #define RA_FLAG_MISS 0x01	/* a cache miss occured against this file */
 #define RA_FLAG_INCACHE 0x02	/* file is already in cache */
 
+struct file_list {
+	spinlock_t		fl_lock;
+	struct list_head	fl_list;
+};
+
 struct file {
 	/*
 	 * fu_list becomes invalid after file_free is called and queued via
@@ -812,16 +838,19 @@ struct file {
 		struct list_head	fu_list;
 		struct rcu_head 	fu_rcuhead;
 	} f_u;
+	struct file_list	*f_list;
 	struct dentry		*f_dentry;
 	struct vfsmount         *f_vfsmnt;
 	const struct file_operations	*f_op;
 	atomic_t		f_count;
 	unsigned int 		f_flags;
 	mode_t			f_mode;
+	char			f_heavy;
 	loff_t			f_pos;
 	struct fown_struct	f_owner;
 	unsigned int		f_uid, f_gid;
 	struct file_ra_state	f_ra;
+	struct user_beancounter	*f_ub;
 
 	unsigned long		f_version;
 	void			*f_security;
@@ -835,10 +864,8 @@ struct file {
 	spinlock_t		f_ep_lock;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
+	struct ve_struct	*owner_env;
 };
-extern spinlock_t files_lock;
-#define file_list_lock() spin_lock(&files_lock);
-#define file_list_unlock() spin_unlock(&files_lock);
 
 #define get_file(x)	atomic_inc(&(x)->f_count)
 #define file_count(x)	atomic_read(&(x)->f_count)
@@ -862,6 +889,7 @@ extern spinlock_t files_lock;
 #define FL_LEASE	32	/* lease held on this file */
 #define FL_CLOSE	64	/* unlock on close */
 #define FL_SLEEP	128	/* A blocking lock */
+#define FL_LOCAL	256	/* A local lock */
 
 /*
  * The POSIX file lock owner is determined by
@@ -877,6 +905,7 @@ struct file_lock_operations {
 	void (*fl_remove)(struct file_lock *);	/* lock removal callback */
 	void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
 	void (*fl_release_private)(struct file_lock *);
+	int (*fl_owner_id)(struct file_lock *);
 };
 
 struct lock_manager_operations {
@@ -903,8 +932,14 @@ struct file_lock {
 	unsigned int fl_pid;
 	wait_queue_head_t fl_wait;
 	struct file *fl_file;
-	unsigned char fl_flags;
+	unsigned short fl_flags;
 	unsigned char fl_type;
+#ifdef CONFIG_USER_RESOURCE
+	unsigned char fl_charged;
+#endif
+#ifdef CONFIG_VE
+	unsigned char fl_notpid;
+#endif
 	loff_t fl_start;
 	loff_t fl_end;
 
@@ -919,6 +954,14 @@ struct file_lock {
 	} fl_u;
 };
 
+#ifdef CONFIG_VE
+#define set_flock_notpid(fl)	do { (fl)->fl_notpid = 1; } while (0)
+#define is_flock_notpid(fl)	((fl)->fl_notpid == 1)
+#else
+#define set_flock_notpid(fl)	do { } while (0)
+#define is_flock_notpid(fl)	0
+#endif
+
 /* The following constant reflects the upper bound of the file/locking space */
 #ifndef OFFSET_MAX
 #define INT_LIMIT(x)	(~((x)1 << (sizeof(x)*8 - 1)))
@@ -926,6 +969,9 @@ struct file_lock {
 #define OFFT_OFFSET_MAX	INT_LIMIT(off_t)
 #endif
 
+struct file_lock *locks_alloc_lock(int);
+void locks_free_lock(struct file_lock *);
+
 #include <linux/fcntl.h>
 
 extern int fcntl_getlk(struct file *, struct flock __user *);
@@ -1031,7 +1077,7 @@ struct super_block {
 	struct list_head	s_dirty;	/* dirty inodes */
 	struct list_head	s_io;		/* parked for writeback */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
-	struct list_head	s_files;
+	struct file_list	*s_files;	/* percpu */
 
 	struct block_device	*s_bdev;
 	struct list_head	s_instances;
@@ -1058,6 +1104,47 @@ struct super_block {
 extern struct timespec current_fs_time(struct super_block *sb);
 
 /*
+ * File lists
+ */
+
+static inline void file_list_init(struct file_list *list)
+{
+	spin_lock_init(&list->fl_lock);
+	INIT_LIST_HEAD(&list->fl_list);
+}
+
+static inline void file_list_lock(struct file_list *list)
+{
+	spin_lock(&list->fl_lock);
+}
+
+static inline void file_list_unlock(struct file_list *list)
+{
+	spin_unlock(&list->fl_lock);
+}
+
+static inline void file_list_lock_sb(struct super_block *sb)
+{
+	int cpu;
+	for_each_possible_cpu(cpu)
+		spin_lock_nested(&per_cpu_ptr(sb->s_files, cpu)->fl_lock, cpu);
+}
+
+static inline void file_list_unlock_sb(struct super_block *sb)
+{
+	int cpu;
+	for_each_possible_cpu(cpu)
+		spin_unlock(&per_cpu_ptr(sb->s_files, cpu)->fl_lock);
+}
+
+#define for_each_fl_file(file, list) \
+		list_for_each_entry(file, &(list)->fl_list, f_u.fu_list)
+
+#define for_each_sb_file(file, sb, cpu)	\
+		for_each_possible_cpu(cpu) \
+		for_each_fl_file(file, per_cpu_ptr((sb)->s_files, cpu))
+
+/*
  * Snapshotting support.
  */
 enum {
@@ -1218,9 +1305,9 @@ struct file_operations {
 	struct module *owner;
 	loff_t (*llseek) (struct file *, loff_t, int);
 	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
-	ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
-	ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	int (*readdir) (struct file *, void *, filldir_t);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
@@ -1244,6 +1331,7 @@ struct file_operations {
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
+	struct file * (*get_host)(struct file*);
 };
 
 struct file_operations_ext {
@@ -1286,6 +1374,11 @@ struct inode_operations {
 
 struct seq_file;
 
+ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
+				unsigned long nr_segs, unsigned long fast_segs,
+				struct iovec *fast_pointer,
+				struct iovec **ret_pointer);
+
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
@@ -1318,11 +1411,14 @@ struct super_operations {
 	void (*clear_inode) (struct inode *);
 	void (*umount_begin) (struct vfsmount *, int);
 
+	void (*show_type)(struct seq_file *, struct super_block *sb);
 	int (*show_options)(struct seq_file *, struct vfsmount *);
 	int (*show_stats)(struct seq_file *, struct vfsmount *);
 
 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+
+	struct inode *(*get_quota_root)(struct super_block *);
 #ifndef __GENKSYMS__
 	int (*freeze_fs) (struct super_block *);
 	int (*unfreeze_fs) (struct super_block *);
@@ -1558,8 +1654,13 @@ struct file_system_type {
 	struct list_head fs_supers;
 	struct lock_class_key s_lock_key;
 	struct lock_class_key s_umount_key;
+	struct file_system_type *proto;
+	struct ve_struct *owner_env;
 };
 
+void get_filesystem(struct file_system_type *fs);
+void put_filesystem(struct file_system_type *fs);
+
 extern int get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int),
@@ -1598,10 +1699,16 @@ void unnamed_dev_init(void);
 
 extern int register_filesystem(struct file_system_type *);
 extern int unregister_filesystem(struct file_system_type *);
+extern int register_ve_fs_type(struct ve_struct *, struct file_system_type *,
+		struct file_system_type **, struct vfsmount **);
+extern void unregister_ve_fs_type(struct file_system_type *, struct vfsmount *);
+extern void umount_ve_fs_type(struct file_system_type *local_fs_type, int veid);
 extern struct vfsmount *kern_mount(struct file_system_type *);
 extern int may_umount_tree(struct vfsmount *);
+extern struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root);
 extern int may_umount(struct vfsmount *);
 extern void umount_tree(struct vfsmount *, int, struct list_head *);
+#define kern_umount mntput
 extern void release_mounts(struct list_head *);
 extern long do_mount(char *, char *, char *, unsigned long, void *);
 extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
@@ -1611,6 +1718,7 @@ extern struct vfsmount *collect_mounts(s
 extern void drop_collected_mounts(struct vfsmount *);
 
 extern int vfs_statfs(struct dentry *, struct kstatfs *);
+extern int faudit_statfs(struct super_block *, struct kstatfs *);
 
 /* /sys/fs */
 extern struct subsystem fs_subsys;
@@ -1727,7 +1835,7 @@ extern void chrdev_show(struct seq_file 
 #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
 extern const char *__bdevname(dev_t, char *buffer);
 extern const char *bdevname(struct block_device *bdev, char *buffer);
-extern struct block_device *lookup_bdev(const char *);
+extern struct block_device *lookup_bdev(const char *, int mode);
 extern struct block_device *open_bdev_excl(const char *, int, void *);
 extern void close_bdev_excl(struct block_device *);
 extern void blkdev_show(struct seq_file *,off_t);
@@ -1758,7 +1866,7 @@ extern void check_disk_size_change(struc
 				   struct block_device *bdev);
 extern int revalidate_disk(struct gendisk *);
 extern int check_disk_change(struct block_device *);
-extern int invalidate_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *, int);
 extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);
 unsigned long __invalidate_mapping_pages(struct address_space *mapping,
@@ -1888,7 +1996,7 @@ static inline void insert_inode_hash(str
 }
 
 extern struct file * get_empty_filp(void);
-extern void file_move(struct file *f, struct list_head *list);
+extern void file_move(struct file *f, struct file_list *list);
 extern void file_kill(struct file *f);
 struct bio;
 extern void submit_bio(int, struct bio *);
@@ -1904,11 +2012,11 @@ extern int file_send_actor(read_descript
 extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *);
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
 extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
-extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
-extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
-		unsigned long, loff_t *);
+		unsigned long, loff_t);
 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
 		unsigned long *, loff_t, loff_t *, size_t, size_t);
 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
@@ -2057,6 +2165,7 @@ extern int generic_block_fiemap(struct i
 				struct fiemap_extent_info *fieinfo, u64 start,
 				u64 len, get_block_t *get_block);
 
+extern int check_fs_presence(const char *name);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
 extern struct super_block *user_get_super(dev_t);
@@ -2112,6 +2221,7 @@ extern int inode_newsize_ok(const struct
 extern int __must_check inode_setattr(struct inode *, struct iattr *);
 
 extern void file_update_time(struct file *file);
+extern void inode_update_time(struct inode *inode);
 
 static inline ino_t parent_ino(struct dentry *dentry)
 {
@@ -2225,5 +2335,16 @@ static inline void free_secdata(void *se
 { }
 #endif	/* CONFIG_SECURITY */
 
+static inline void *file_private(struct file *file)
+{
+	struct file *host = file;
+
+	while (host->f_op->get_host) {
+		host = host->f_op->get_host(host);
+		BUG_ON(host->f_mapping != file->f_mapping);
+	}
+	return host->private_data;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/futex.h kernel-2.6.18-417.el5-028stab121/include/linux/futex.h
--- kernel-2.6.18-417.el5.orig/include/linux/futex.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/futex.h	2017-01-13 08:40:40.000000000 -0500
@@ -15,9 +15,12 @@
 #define FUTEX_LOCK_PI		6
 #define FUTEX_UNLOCK_PI		7
 #define FUTEX_TRYLOCK_PI	8
+#define FUTEX_WAIT_BITSET	9
+#define FUTEX_WAKE_BITSET	10
 
 #define FUTEX_PRIVATE_FLAG	128
-#define FUTEX_CMD_MASK		~FUTEX_PRIVATE_FLAG
+#define FUTEX_CLOCK_REALTIME	256
+#define FUTEX_CMD_MASK		~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
 
 #define FUTEX_WAIT_PRIVATE	(FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAKE_PRIVATE	(FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
@@ -27,6 +30,8 @@
 #define FUTEX_LOCK_PI_PRIVATE	(FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_UNLOCK_PI_PRIVATE	(FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAIT_BITSET_PRIVATE	(FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAKE_BITSET_PRIVATE	(FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG)
 
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
@@ -105,6 +110,12 @@ struct robust_list_head {
  */
 #define ROBUST_LIST_LIMIT	2048
 
+/*
+ * bitset with all bits set for the FUTEX_xxx_BITSET OPs to request a
+ * match of any bit.
+ */
+#define FUTEX_BITSET_MATCH_ANY	0xffffffff
+
 #ifdef __KERNEL__
 long do_futex(u32 __user *uaddr, int op, u32 val, void *timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
diff -upr kernel-2.6.18-417.el5.orig/include/linux/genhd.h kernel-2.6.18-417.el5-028stab121/include/linux/genhd.h
--- kernel-2.6.18-417.el5.orig/include/linux/genhd.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/genhd.h	2017-01-13 08:40:19.000000000 -0500
@@ -640,6 +640,7 @@ static inline struct block_device *bdget
 	return bdget(MKDEV(disk->major, disk->first_minor) + index);
 }
 
+extern struct subsystem block_subsys;
 #endif
 
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/gfp.h kernel-2.6.18-417.el5-028stab121/include/linux/gfp.h
--- kernel-2.6.18-417.el5.orig/include/linux/gfp.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/gfp.h	2017-01-13 08:40:41.000000000 -0500
@@ -49,24 +49,30 @@ extern void kfree(const void *);
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
 #define __GFP_NO_OOM     ((__force gfp_t)0x40000u) /* Don't OOM on alloc fail - hugepages */
+#define __GFP_UBC	((__force gfp_t)0x80000u)/* charge kmem in buddy and slab */
+#define __GFP_SOFT_UBC	((__force gfp_t)0x100000u)/* use soft charging */
 
-#define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 22	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* if you forget to add the bitmask here kernel will crash, period */
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_NO_OOM)
+			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_NO_OOM| \
+			__GFP_UBC|__GFP_SOFT_UBC)
 
 /* This equals 0, but use constants in case they ever change */
 #define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
 /* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
 #define GFP_ATOMIC	(__GFP_HIGH | __GFP_NOWARN)
+#define GFP_ATOMIC_UBC	(__GFP_HIGH | __GFP_NOWARN | __GFP_UBC)
 #define GFP_NOIO	(__GFP_WAIT)
 #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
 #define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
+#define GFP_KERNEL_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_USER_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | __GFP_UBC)
 #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
 			 __GFP_HIGHMEM)
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/grinternal.h kernel-2.6.18-417.el5-028stab121/include/linux/grinternal.h
--- kernel-2.6.18-417.el5.orig/include/linux/grinternal.h	2017-01-13 08:40:41.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/grinternal.h	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,91 @@
+#ifndef __GRINTERNAL_H
+#define __GRINTERNAL_H
+
+#ifdef CONFIG_GRKERNSEC
+
+#include <linux/grmsg.h>
+
+extern char *gr_to_filename(const struct dentry *dentry,
+			    const struct vfsmount *mnt);
+extern char *gr_to_filename2(const struct dentry *dentry,
+			    const struct vfsmount *mnt);
+extern char *gr_to_filename3(const struct dentry *dentry,
+			    const struct vfsmount *mnt);
+
+#ifdef CONFIG_VE
+#include <linux/ve_task.h>
+#define grsec_enable_tpe		(get_exec_env()->grsec.enable_tpe)
+#define grsec_tpe_gid			(get_exec_env()->grsec.tpe_gid)
+#define grsec_enable_tpe_all		(get_exec_env()->grsec.enable_tpe_all)
+#define grsec_lock			(get_exec_env()->grsec.lock)
+#else
+extern int grsec_enable_tpe;
+extern int grsec_tpe_gid;
+extern int grsec_enable_tpe_all;
+extern int grsec_lock;
+#endif
+
+extern spinlock_t grsec_alert_lock;
+extern unsigned long grsec_alert_wtime;
+extern unsigned long grsec_alert_fyet;
+
+extern spinlock_t grsec_audit_lock;
+
+#define gr_task_fullpath(tsk) ("")
+
+#define gr_parent_task_fullpath(tsk) ("")
+
+#define DEFAULTSECARGS(task) gr_task_fullpath(task), task->comm, \
+		       task->pid, task->uid, \
+		       task->euid, task->gid, task->egid, \
+		       gr_parent_task_fullpath(task), \
+		       task->parent->comm, task->parent->pid, \
+		       task->parent->uid, task->parent->euid, \
+		       task->parent->gid, task->parent->egid
+
+enum {
+	GR_DO_AUDIT,
+	GR_DONT_AUDIT,
+	GR_DONT_AUDIT_GOOD
+};
+
+enum {
+	GR_TTYSNIFF,
+	GR_RBAC,
+	GR_RBAC_STR,
+	GR_STR_RBAC,
+	GR_RBAC_MODE2,
+	GR_RBAC_MODE3,
+	GR_FILENAME,
+	GR_NOARGS,
+	GR_ONE_INT,
+	GR_ONE_INT_TWO_STR,
+	GR_ONE_STR,
+	GR_STR_INT,
+	GR_TWO_INT,
+	GR_THREE_INT,
+	GR_FIVE_INT_TWO_STR,
+	GR_TWO_STR,
+	GR_THREE_STR,
+	GR_FOUR_STR,
+	GR_STR_FILENAME,
+	GR_FILENAME_STR,
+	GR_FILENAME_TWO_INT,
+	GR_FILENAME_TWO_INT_STR,
+	GR_TEXTREL,
+	GR_PTRACE,
+	GR_RESOURCE,
+	GR_CAP,
+	GR_SIG,
+	GR_CRASH1,
+	GR_CRASH2,
+	GR_PSACCT
+};
+
+#define gr_log_fs_generic(audit, msg, dentry, mnt) gr_log_varargs(audit, msg, GR_FILENAME, dentry, mnt)
+#define gr_log_str(audit, msg, str) gr_log_varargs(audit, msg, GR_ONE_STR, str)
+
+extern void gr_log_varargs(int audit, const char *msg, int argtypes, ...);
+
+#endif
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/grmsg.h kernel-2.6.18-417.el5-028stab121/include/linux/grmsg.h
--- kernel-2.6.18-417.el5.orig/include/linux/grmsg.h	2017-01-13 08:40:41.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/grmsg.h	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,3 @@
+#define DEFAULTSECMSG "%.256s[%.16s:%d] uid/euid:%u/%u gid/egid:%u/%u, parent %.256s[%.16s:%d] uid/euid:%u/%u gid/egid:%u/%u"
+#define GR_EXEC_TPE_MSG "denied untrusted exec of %.950s by "
+#define GR_SYSCTL_MSG "denied modification of grsecurity sysctl value : %.32s by "
diff -upr kernel-2.6.18-417.el5.orig/include/linux/grsecurity.h kernel-2.6.18-417.el5-028stab121/include/linux/grsecurity.h
--- kernel-2.6.18-417.el5.orig/include/linux/grsecurity.h	2017-01-13 08:40:41.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/grsecurity.h	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,13 @@
+#ifndef GR_SECURITY_H
+#define GR_SECURITY_H
+#include <linux/fs.h>
+
+extern int gr_tpe_allow(const struct file *file);
+extern void gr_copy_label(struct task_struct *tsk);
+extern int gr_acl_handle_mmap(const struct file *file,
+			      const unsigned long prot);
+extern int gr_acl_handle_mprotect(const struct file *file,
+				  const unsigned long prot);
+extern void gr_acl_handle_exit(void);
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/hardirq.h kernel-2.6.18-417.el5-028stab121/include/linux/hardirq.h
--- kernel-2.6.18-417.el5.orig/include/linux/hardirq.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/hardirq.h	2017-01-13 08:40:20.000000000 -0500
@@ -7,6 +7,9 @@
 #include <asm/hardirq.h>
 #include <asm/system.h>
 
+#include <ub/ub_task.h>
+#include <linux/ve_task.h>
+
 /*
  * We put the hardirq and softirq counter into the preemption
  * counter. The bitmask has the following meaning:
@@ -95,6 +98,24 @@ static inline void account_system_vtime(
 }
 #endif
 
+#define save_context()		do {				\
+		struct task_struct *tsk;			\
+		if (hardirq_count() == HARDIRQ_OFFSET) {	\
+			tsk = current;				\
+			ve_save_context(tsk);			\
+			ub_save_context(tsk);			\
+		}						\
+	} while (0)
+
+#define restore_context()		do {			\
+		struct task_struct *tsk;			\
+		if (hardirq_count() == HARDIRQ_OFFSET) {	\
+			tsk = current;				\
+			ve_restore_context(tsk);		\
+			ub_restore_context(tsk);		\
+		}						\
+	} while (0)
+
 /*
  * It is safe to do non-atomic ops on ->hardirq_context,
  * because NMI handlers may not preempt and the ops are
@@ -105,6 +126,7 @@ static inline void account_system_vtime(
 	do {						\
 		account_system_vtime(current);		\
 		add_preempt_count(HARDIRQ_OFFSET);	\
+		save_context();				\
 		trace_hardirq_enter();			\
 	} while (0)
 
@@ -115,6 +137,7 @@ static inline void account_system_vtime(
 	do {						\
 		trace_hardirq_exit();			\
 		account_system_vtime(current);		\
+		restore_context();			\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
 	} while (0)
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/highmem.h kernel-2.6.18-417.el5-028stab121/include/linux/highmem.h
--- kernel-2.6.18-417.el5.orig/include/linux/highmem.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/highmem.h	2017-01-13 08:40:14.000000000 -0500
@@ -43,10 +43,11 @@ static inline void *kmap(struct page *pa
 
 #define kunmap(page) do { (void) (page); } while (0)
 
-#define kmap_atomic(page, idx)		page_address(page)
-#define kunmap_atomic(addr, idx)	do { } while (0)
-#define kmap_atomic_pfn(pfn, idx)	page_address(pfn_to_page(pfn))
-#define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
+#define kmap_atomic(page, idx)			page_address(page)
+#define kmap_atomic_nocache_pfn(pfn, idx)	pfn_to_kaddr(pfn)
+#define kunmap_atomic(addr, idx)		do { } while (0)
+#define kmap_atomic_pfn(pfn, idx)		page_address(pfn_to_page(pfn))
+#define kmap_atomic_to_page(ptr)		virt_to_page(ptr)
 
 #endif /* CONFIG_HIGHMEM */
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/hrtimer.h kernel-2.6.18-417.el5-028stab121/include/linux/hrtimer.h
--- kernel-2.6.18-417.el5.orig/include/linux/hrtimer.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/hrtimer.h	2017-01-13 08:40:24.000000000 -0500
@@ -148,4 +148,9 @@ extern void hrtimer_run_queues(void);
 /* Bootup initialization: */
 extern void __init hrtimers_init(void);
 
+extern long nanosleep_restart(struct restart_block *restart);
+
+extern ktime_t schedule_hrtimer(struct hrtimer *timer,
+				const enum hrtimer_mode mode);
+
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/if_bridge.h kernel-2.6.18-417.el5-028stab121/include/linux/if_bridge.h
--- kernel-2.6.18-417.el5.orig/include/linux/if_bridge.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/if_bridge.h	2017-01-13 08:40:23.000000000 -0500
@@ -44,6 +44,7 @@
 #define BRCTL_SET_PORT_PRIORITY 16
 #define BRCTL_SET_PATH_COST 17
 #define BRCTL_GET_FDB_ENTRIES 18
+#define BRCTL_SET_VIA_ORIG_DEV 19
 
 #define BR_STATE_DISABLED 0
 #define BR_STATE_LISTENING 1
@@ -72,6 +73,7 @@ struct __bridge_info
 	__u32 tcn_timer_value;
 	__u32 topology_change_timer_value;
 	__u32 gc_timer_value;
+	__u8 via_phys_dev;
 };
 
 struct __port_info
@@ -104,6 +106,8 @@ struct __fdb_entry
 
 #include <linux/netdevice.h>
 
+#define BR_ALREADY_SEEN 1
+
 extern void brioctl_set(int (*ioctl_hook)(unsigned int, void __user *));
 extern int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
 extern int (*br_should_route_hook)(struct sk_buff **pskb);
diff -upr kernel-2.6.18-417.el5.orig/include/linux/if_packet.h kernel-2.6.18-417.el5-028stab121/include/linux/if_packet.h
--- kernel-2.6.18-417.el5.orig/include/linux/if_packet.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/if_packet.h	2017-01-13 08:40:16.000000000 -0500
@@ -43,6 +43,9 @@ struct sockaddr_ll
 #define PACKET_COPY_THRESH		7
 #define PACKET_AUXDATA			8
 #define PACKET_ORIGDEV			9
+#define PACKET_VERSION			10
+#define PACKET_HDRLEN			11
+#define PACKET_RESERVE			12
 
 struct tpacket_stats
 {
@@ -79,6 +82,26 @@ struct tpacket_hdr
 #define TPACKET_ALIGN(x)	(((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
 #define TPACKET_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll))
 
+struct tpacket2_hdr
+{
+	__u32		tp_status;
+	__u32		tp_len;
+	__u32		tp_snaplen;
+	__u16		tp_mac;
+	__u16		tp_net;
+	__u32		tp_sec;
+	__u32		tp_nsec;
+};
+
+#define TPACKET2_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
+
+enum tpacket_versions
+{
+	TPACKET_V1,
+	TPACKET_V2,
+	TPACKET_V1_COMPAT,
+};
+
 /*
    Frame structure:
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/if_tun.h kernel-2.6.18-417.el5-028stab121/include/linux/if_tun.h
--- kernel-2.6.18-417.el5.orig/include/linux/if_tun.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/if_tun.h	2017-01-13 08:40:26.000000000 -0500
@@ -37,6 +37,7 @@ struct tun_struct {
 	struct tun_file		*tfile;
 	unsigned long 		flags;
 	int			attached;
+	void			*bind_file;
 	uid_t			owner;
 	gid_t			group;
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/if_vlan.h kernel-2.6.18-417.el5-028stab121/include/linux/if_vlan.h
--- kernel-2.6.18-417.el5.orig/include/linux/if_vlan.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/if_vlan.h	2017-01-13 08:40:41.000000000 -0500
@@ -81,6 +81,9 @@ struct vlan_group {
 	struct hlist_node	hlist;	/* linked list */
 	struct net_device *vlan_devices[VLAN_GROUP_ARRAY_LEN];
 	struct rcu_head		rcu;
+#ifdef CONFIG_VE
+	struct ve_struct	*owner;
+#endif
 };
 
 struct vlan_priority_tci_mapping {
@@ -141,12 +144,6 @@ static inline __u32 vlan_get_ingress_pri
 	return vip->ingress_priority_map[(vlan_tag >> 13) & 0x7];
 }
 
-/* VLAN tx hw acceleration helpers. */
-struct vlan_skb_tx_cookie {
-	u32	magic;
-	u32	vlan_tag;
-};
-
 static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
 						       int vlan_id)
 {
@@ -168,11 +165,8 @@ static inline int is_vlan_dev(struct net
 
 #define vlan_dev_real_dev(netdev) (VLAN_DEV_INFO(netdev)->real_dev)
 
-#define VLAN_TX_COOKIE_MAGIC	0x564c414e	/* "VLAN" in ascii. */
-#define VLAN_TX_SKB_CB(__skb)	((struct vlan_skb_tx_cookie *)&((__skb)->cb[0]))
-#define vlan_tx_tag_present(__skb) \
-	(VLAN_TX_SKB_CB(__skb)->magic == VLAN_TX_COOKIE_MAGIC)
-#define vlan_tx_tag_get(__skb)	(VLAN_TX_SKB_CB(__skb)->vlan_tag)
+#define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci)
+#define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci)
 
 /* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
 static inline int __vlan_hwaccel_rx(struct sk_buff *skb,
@@ -323,16 +317,11 @@ static inline struct sk_buff *__vlan_put
  * @skb: skbuff to tag
  * @tag: VLAN tag to insert
  *
- * Puts the VLAN tag in @skb->cb[] and lets the device do the rest
+ * Puts the VLAN TCI in @skb->vlan_tci and lets the device do the rest
  */
 static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, unsigned short tag)
 {
-	struct vlan_skb_tx_cookie *cookie;
-
-	cookie = VLAN_TX_SKB_CB(skb);
-	cookie->magic = VLAN_TX_COOKIE_MAGIC;
-	cookie->vlan_tag = tag;
-
+	skb->vlan_tci = tag;
 	return skb;
 }
 
@@ -378,17 +367,14 @@ static inline int __vlan_get_tag(struct 
 /**
  * __vlan_hwaccel_get_tag - get the VLAN ID that is in @skb->cb[]
  * @skb: skbuff to query
- * @tag: buffer to store vlaue
- * 
- * Returns error if @skb->cb[] is not set correctly
+ * @vlan_tci: buffer to store vlaue
+ *
+ * Returns error if @skb->vlan_tci is not set correctly
  */
 static inline int __vlan_hwaccel_get_tag(struct sk_buff *skb, unsigned short *tag)
 {
-	struct vlan_skb_tx_cookie *cookie;
-
-	cookie = VLAN_TX_SKB_CB(skb);
-	if (cookie->magic == VLAN_TX_COOKIE_MAGIC) {
-		*tag = cookie->vlan_tag;
+	if (vlan_tx_tag_present(skb)) {
+		*tag = skb->vlan_tci;
 		return 0;
 	} else {
 		*tag = 0;
diff -upr kernel-2.6.18-417.el5.orig/include/linux/inetdevice.h kernel-2.6.18-417.el5-028stab121/include/linux/inetdevice.h
--- kernel-2.6.18-417.el5.orig/include/linux/inetdevice.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/inetdevice.h	2017-01-13 08:40:21.000000000 -0500
@@ -35,6 +35,12 @@ struct ipv4_devconf
 };
 
 extern struct ipv4_devconf ipv4_devconf;
+extern struct ipv4_devconf ipv4_devconf_dflt;
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+#define ve_ipv4_devconf		(*(get_exec_env()->_ipv4_devconf))
+#else
+#define ve_ipv4_devconf		ipv4_devconf
+#endif
 
 struct in_device
 {
@@ -61,32 +67,32 @@ struct in_device
 };
 
 #define IN_DEV_FORWARD(in_dev)		((in_dev)->cnf.forwarding)
-#define IN_DEV_MFORWARD(in_dev)		(ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
-#define IN_DEV_RPFILTER(in_dev)		(ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
-#define IN_DEV_SOURCE_ROUTE(in_dev)	(ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
+#define IN_DEV_MFORWARD(in_dev)		(ve_ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
+#define IN_DEV_RPFILTER(in_dev)		(ve_ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
+#define IN_DEV_SOURCE_ROUTE(in_dev)	(ve_ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
 #define IN_DEV_ACCEPT_LOCAL(in_dev)	(ipv4_devconf_ext.accept_local || \
 					 (dev_extended(in_dev->dev) ? \
 					 dev_extended(in_dev->dev)->ipv4_devconf_ext.accept_local : 0))
-#define IN_DEV_BOOTP_RELAY(in_dev)	(ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
+#define IN_DEV_BOOTP_RELAY(in_dev)	(ve_ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
 
-#define IN_DEV_LOG_MARTIANS(in_dev)	(ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
-#define IN_DEV_PROXY_ARP(in_dev)	(ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
-#define IN_DEV_SHARED_MEDIA(in_dev)	(ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
-#define IN_DEV_TX_REDIRECTS(in_dev)	(ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
-#define IN_DEV_SEC_REDIRECTS(in_dev)	(ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
+#define IN_DEV_LOG_MARTIANS(in_dev)	(ve_ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
+#define IN_DEV_PROXY_ARP(in_dev)	(ve_ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
+#define IN_DEV_SHARED_MEDIA(in_dev)	(ve_ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
+#define IN_DEV_TX_REDIRECTS(in_dev)	(ve_ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
+#define IN_DEV_SEC_REDIRECTS(in_dev)	(ve_ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
 #define IN_DEV_IDTAG(in_dev)		((in_dev)->cnf.tag)
 #define IN_DEV_MEDIUM_ID(in_dev)	((in_dev)->cnf.medium_id)
 #define IN_DEV_PROMOTE_SECONDARIES(in_dev)	(ipv4_devconf.promote_secondaries || (in_dev)->cnf.promote_secondaries)
 
 #define IN_DEV_RX_REDIRECTS(in_dev) \
 	((IN_DEV_FORWARD(in_dev) && \
-	  (ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
+	  (ve_ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
 	 || (!IN_DEV_FORWARD(in_dev) && \
-	  (ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
+ 	  (ve_ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
 
-#define IN_DEV_ARPFILTER(in_dev)	(ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
-#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
-#define IN_DEV_ARP_IGNORE(in_dev)	(max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
+#define IN_DEV_ARPFILTER(in_dev)	(ve_ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
+#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ve_ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
+#define IN_DEV_ARP_IGNORE(in_dev)	(max(ve_ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
 
 struct in_ifaddr
 {
@@ -117,6 +123,7 @@ extern u32		inet_select_addr(const struc
 extern u32		inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope);
 extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask);
 extern void		inet_forward_change(void);
+extern void		inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
 
 static __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa)
 {
@@ -184,6 +191,16 @@ static inline void in_dev_put(struct in_
 #define __in_dev_put(idev)  atomic_dec(&(idev)->refcnt)
 #define in_dev_hold(idev)   atomic_inc(&(idev)->refcnt)
 
+struct ve_struct;
+#ifdef CONFIG_INET
+extern int devinet_sysctl_init(struct ve_struct *);
+extern void devinet_sysctl_fini(struct ve_struct *);
+extern void devinet_sysctl_free(struct ve_struct *);
+#else
+static inline int devinet_sysctl_init(struct ve_struct *ve) { return 0; }
+static inline void devinet_sysctl_fini(struct ve_struct *ve) { ; }
+static inline void devinet_sysctl_free(struct ve_struct *ve) { ; }
+#endif
 #endif /* __KERNEL__ */
 
 static __inline__ __u32 inet_make_mask(int logmask)
diff -upr kernel-2.6.18-417.el5.orig/include/linux/init_task.h kernel-2.6.18-417.el5-028stab121/include/linux/init_task.h
--- kernel-2.6.18-417.el5.orig/include/linux/init_task.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/init_task.h	2017-01-13 08:40:40.000000000 -0500
@@ -4,7 +4,9 @@
 #include <linux/file.h>
 #include <linux/rcupdate.h>
 #include <linux/irqflags.h>
+#include <linux/utsname.h>
 #include <linux/lockdep.h>
+#include <linux/ipc.h>
 
 #define INIT_FDTABLE \
 {							\
@@ -68,10 +70,27 @@
 	.session	= 1,						\
 }
 
+#ifdef CONFIG_VE
+/* one for ve0, one for init_task */
+#define INIT_NSPROXY_COUNT	ATOMIC_INIT(2)
+#else
+#define INIT_NSPROXY_COUNT	ATOMIC_INIT(1)
+#endif
+
+extern struct nsproxy init_nsproxy;
+#define INIT_NSPROXY(nsproxy) {						\
+	.count		= INIT_NSPROXY_COUNT,				\
+	.nslock		= SPIN_LOCK_UNLOCKED,				\
+	.uts_ns		= &init_uts_ns,					\
+	.namespace	= NULL,						\
+	INIT_IPC_NS(ipc_ns)						\
+}
+
 #define INIT_SIGHAND(sighand) {						\
 	.count		= ATOMIC_INIT(1), 				\
 	.action		= { { { .sa_handler = NULL, } }, },		\
 	.siglock	= __SPIN_LOCK_UNLOCKED(sighand.siglock),	\
+	.signalfd_wqh	= __WAIT_QUEUE_HEAD_INITIALIZER(sighand.signalfd_wqh),	\
 }
 
 extern struct group_info init_groups;
@@ -114,6 +133,7 @@ extern struct group_info init_groups;
 	.files		= &init_files,					\
 	.signal		= &init_signals,				\
 	.sighand	= &init_sighand,				\
+	.nsproxy	= &init_nsproxy,				\
 	.pending	= {						\
 		.list = LIST_HEAD_INIT(tsk.pending.list),		\
 		.signal = {{0}}},					\
diff -upr kernel-2.6.18-417.el5.orig/include/linux/inotify.h kernel-2.6.18-417.el5-028stab121/include/linux/inotify.h
--- kernel-2.6.18-417.el5.orig/include/linux/inotify.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/inotify.h	2017-01-13 08:40:40.000000000 -0500
@@ -7,6 +7,7 @@
 #ifndef _LINUX_INOTIFY_H
 #define _LINUX_INOTIFY_H
 
+#include <linux/fcntl.h>
 #include <linux/types.h>
 
 /*
@@ -63,10 +64,15 @@ struct inotify_event {
 			 IN_MOVED_TO | IN_DELETE | IN_CREATE | IN_DELETE_SELF | \
 			 IN_MOVE_SELF)
 
+/* Flags for sys_inotify_init1.  */
+#define IN_CLOEXEC O_CLOEXEC
+#define IN_NONBLOCK O_NONBLOCK
+
 #ifdef __KERNEL__
 
 #include <linux/dcache.h>
 #include <linux/fs.h>
+#include <linux/idr.h>
 
 /*
  * struct inotify_watch - represents a watch request on a specific inode
@@ -84,6 +90,8 @@ struct inotify_watch {
 	struct list_head	i_list;	/* entry in inode's list */
 	atomic_t		count;	/* reference count */
 	struct inotify_handle	*ih;	/* associated inotify handle */
+	struct dentry		*dentry;
+	struct vfsmount		*mnt;
 	struct inode		*inode;	/* associated inode */
 	__s32			wd;	/* watch descriptor */
 	__u32			mask;	/* event mask for this watch */
@@ -106,6 +114,7 @@ extern void inotify_inode_queue_event(st
 extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
 					      const char *);
 extern void inotify_unmount_inodes(struct list_head *);
+extern void inotify_unmount_mnt(struct vfsmount *);
 extern void inotify_inode_is_dead(struct inode *);
 extern u32 inotify_get_cookie(void);
 
@@ -120,6 +129,9 @@ extern __s32 inotify_find_update_watch(s
 				       u32);
 extern __s32 inotify_add_watch(struct inotify_handle *, struct inotify_watch *,
 			       struct inode *, __u32);
+extern __s32 inotify_add_watch_dget(struct inotify_handle *ih,
+				    struct inotify_watch *watch, struct dentry *d,
+				    struct vfsmount *mnt, u32 mask);
 extern __s32 inotify_clone_watch(struct inotify_watch *, struct inotify_watch *);
 extern void inotify_evict_watch(struct inotify_watch *);
 extern int inotify_rm_watch(struct inotify_handle *, struct inotify_watch *);
@@ -131,6 +143,66 @@ extern void put_inotify_watch(struct ino
 extern int pin_inotify_watch(struct inotify_watch *);
 extern void unpin_inotify_watch(struct inotify_watch *);
 
+/*
+ * struct inotify_handle - represents an inotify instance
+ *
+ * This structure is protected by the mutex 'mutex'.
+ */
+struct inotify_handle {
+	struct idr		idr;		/* idr mapping wd -> watch */
+	struct mutex		mutex;		/* protects this bad boy */
+	struct list_head	watches;	/* list of watches */
+	atomic_t		count;		/* reference count */
+	u32			last_wd;	/* the last wd allocated */
+	const struct inotify_operations *in_ops; /* inotify caller operations */
+};
+
+
+/*
+ * struct inotify_device - represents an inotify instance
+ *
+ * This structure is protected by the mutex 'mutex'.
+ */
+struct inotify_device {
+	wait_queue_head_t 	wq;		/* wait queue for i/o */
+	struct mutex		ev_mutex;	/* protects event queue */
+	struct mutex		up_mutex;	/* synchronizes watch updates */
+	struct list_head 	events;		/* list of queued events */
+	atomic_t		count;		/* reference count */
+	struct user_struct	*user;		/* user who opened this dev */
+	struct inotify_handle	*ih;		/* inotify handle */
+	unsigned int		queue_size;	/* size of the queue (bytes) */
+	unsigned int		event_count;	/* number of pending events */
+	unsigned int		max_events;	/* maximum number of events */
+};
+
+/*
+ * struct inotify_kernel_event - An inotify event, originating from a watch and
+ * queued for user-space.  A list of these is attached to each instance of the
+ * device.  In read(), this list is walked and all events that can fit in the
+ * buffer are returned.
+ *
+ * Protected by dev->ev_mutex of the device in which we are queued.
+ */
+struct inotify_kernel_event {
+	struct inotify_event	event;	/* the user-space event */
+	struct list_head        list;	/* entry in inotify_device's list */
+	char			*name;	/* filename, if any */
+};
+
+/*
+ * struct inotify_user_watch - our version of an inotify_watch, we add
+ * a reference to the associated inotify_device.
+ */
+struct inotify_user_watch {
+	struct inotify_device	*dev;	/* associated device */
+	struct inotify_watch	wdata;	/* inotify watch data */
+};
+
+int inotify_create_watch(struct inotify_device *dev, struct dentry *d,
+			 struct vfsmount *mnt, u32 mask);
+
+
 #else
 
 static inline void inotify_d_instantiate(struct dentry *dentry,
@@ -159,6 +231,10 @@ static inline void inotify_unmount_inode
 {
 }
 
+static inline void inotify_unmount_mnt(struct vfsmount *)
+{
+}
+
 static inline void inotify_inode_is_dead(struct inode *inode)
 {
 }
diff -upr kernel-2.6.18-417.el5.orig/include/linux/interrupt.h kernel-2.6.18-417.el5-028stab121/include/linux/interrupt.h
--- kernel-2.6.18-417.el5.orig/include/linux/interrupt.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/interrupt.h	2017-01-13 08:40:16.000000000 -0500
@@ -236,9 +236,12 @@ enum
 	NET_RX_SOFTIRQ,
 	BLOCK_SOFTIRQ,
 	BLOCK_IOPOLL_SOFTIRQ,
-	TASKLET_SOFTIRQ
+	TASKLET_SOFTIRQ,
+	NR_SOFTIRQS
 };
 
+extern char *softirq_to_name[NR_SOFTIRQS];
+
 /* softirq mask and active fields moved to irq_cpustat_t in
  * asm/hardirq.h to get better cache usage.  KAO
  */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/ioprio.h kernel-2.6.18-417.el5-028stab121/include/linux/ioprio.h
--- kernel-2.6.18-417.el5.orig/include/linux/ioprio.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/ioprio.h	2017-01-13 08:40:18.000000000 -0500
@@ -38,6 +38,7 @@ enum {
 	IOPRIO_WHO_PROCESS = 1,
 	IOPRIO_WHO_PGRP,
 	IOPRIO_WHO_USER,
+	IOPRIO_WHO_UBC = 1000,
 };
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/include/linux/ipc.h kernel-2.6.18-417.el5-028stab121/include/linux/ipc.h
--- kernel-2.6.18-417.el5.orig/include/linux/ipc.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/ipc.h	2017-01-13 08:40:15.000000000 -0500
@@ -2,6 +2,7 @@
 #define _LINUX_IPC_H
 
 #include <linux/types.h>
+#include <linux/kref.h>
 
 #define IPC_PRIVATE ((__kernel_key_t) 0)  
 
@@ -68,6 +69,61 @@ struct kern_ipc_perm
 	void		*security;
 };
 
+struct ipc_ids;
+struct ipc_namespace {
+	struct kref	kref;
+	struct ipc_ids	*ids[3];
+
+	int		sem_ctls[4];
+	int		used_sems;
+
+	int		msg_ctlmax;
+	int		msg_ctlmnb;
+	int		msg_ctlmni;
+	atomic_t	msg_bytes;
+	atomic_t	msg_hdrs;
+
+	size_t		shm_ctlmax;
+	size_t		shm_ctlall;
+	int		shm_ctlmni;
+	int		shm_tot;
+};
+
+extern struct ipc_namespace init_ipc_ns;
+
+#ifdef CONFIG_SYSVIPC
+#define INIT_IPC_NS(ns)		.ns		= &init_ipc_ns,
+#else
+#define INIT_IPC_NS(ns)
+#endif
+
+#ifdef CONFIG_IPC_NS
+extern void free_ipc_ns(struct kref *kref);
+extern int copy_ipcs(unsigned long flags, struct task_struct *tsk);
+extern int unshare_ipcs(unsigned long flags, struct ipc_namespace **ns);
+#else
+static inline int copy_ipcs(unsigned long flags, struct task_struct *tsk)
+{
+	return 0;
+}
+#endif
+
+static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+{
+#ifdef CONFIG_IPC_NS
+	if (ns)
+		kref_get(&ns->kref);
+#endif
+	return ns;
+}
+
+static inline void put_ipc_ns(struct ipc_namespace *ns)
+{
+#ifdef CONFIG_IPC_NS
+	kref_put(&ns->kref, free_ipc_ns);
+#endif
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_IPC_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/ipv6.h kernel-2.6.18-417.el5-028stab121/include/linux/ipv6.h
--- kernel-2.6.18-417.el5.orig/include/linux/ipv6.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/ipv6.h	2017-01-13 08:40:26.000000000 -0500
@@ -352,6 +352,8 @@ static inline u32 inet6_rsk_offset(struc
 	return rsk->rsk_ops->obj_size - sizeof(struct inet6_request_sock);
 }
 
+struct request_sock *__inet6_reqsk_alloc(void);
+
 static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *ops)
 {
 	struct request_sock *req = reqsk_alloc(ops);
@@ -445,12 +447,13 @@ static inline struct raw6_sock *raw6_sk(
 #define inet_v6_ipv6only(__sk)		0
 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 
-#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\
+#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif,__ve)\
 	(((__sk)->sk_hash == (__hash))				&& \
 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))  	&& \
 	 ((__sk)->sk_family		== AF_INET6)		&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&& \
+	 ve_accessible_strict((__sk)->owner_env, (__ve))	&& \
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 
 #endif /* __KERNEL__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/irq.h kernel-2.6.18-417.el5-028stab121/include/linux/irq.h
--- kernel-2.6.18-417.el5.orig/include/linux/irq.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/irq.h	2017-01-13 08:40:16.000000000 -0500
@@ -176,6 +176,9 @@ typedef struct irq_chip		hw_irq_controll
 #define no_irq_type		no_irq_chip
 typedef struct irq_desc		irq_desc_t;
 
+extern int __irq_to_vector(int irq);
+#define irq_to_vector(irq)	__irq_to_vector(irq)
+
 /*
  * Pick up the arch-dependent methods:
  */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/jbd.h kernel-2.6.18-417.el5-028stab121/include/linux/jbd.h
--- kernel-2.6.18-417.el5.orig/include/linux/jbd.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/jbd.h	2017-01-13 08:40:15.000000000 -0500
@@ -251,10 +251,15 @@ typedef struct journal_superblock_s
 #define J_ASSERT(assert)						\
 do {									\
 	if (!(assert)) {						\
+		unsigned long stack;					\
 		printk (KERN_EMERG					\
 			"Assertion failure in %s() at %s:%d: \"%s\"\n",	\
 			__FUNCTION__, __FILE__, __LINE__, # assert);	\
-		BUG();							\
+		printk("Stack=%p current=%p pid=%d ve=%d comm='%s'\n",	\
+				&stack, current, current->pid,		\
+				get_exec_env()->veid,			\
+				current->comm);				\
+		dump_stack();						\
 	}								\
 } while (0)
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/jiffies.h kernel-2.6.18-417.el5-028stab121/include/linux/jiffies.h
--- kernel-2.6.18-417.el5.orig/include/linux/jiffies.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/jiffies.h	2017-01-13 08:40:28.000000000 -0500
@@ -92,6 +92,7 @@ extern unsigned int tick_divider;
  */
 extern u64 __jiffy_data jiffies_64;
 extern unsigned long volatile __jiffy_data jiffies;
+extern unsigned long cycles_per_jiffy, cycles_per_clock;
 
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void);
@@ -453,12 +454,14 @@ static inline clock_t jiffies_to_clock_t
 static inline unsigned long clock_t_to_jiffies(unsigned long x)
 {
 #if (HZ % USER_HZ)==0
+	WARN_ON((long)x < 0);
 	if (x >= ~0UL / (HZ / USER_HZ))
 		return ~0UL;
 	return x * (HZ / USER_HZ);
 #else
 	u64 jif;
 
+	WARN_ON((long)x < 0);
 	/* Don't worry about loss of precision here .. */
 	if (x >= ~0UL / HZ * USER_HZ)
 		return ~0UL;
@@ -472,6 +475,7 @@ static inline unsigned long clock_t_to_j
 
 static inline u64 jiffies_64_to_clock_t(u64 x)
 {
+	WARN_ON((s64)x < 0);
 #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
 	do_div(x, HZ / USER_HZ);
 #else
@@ -488,6 +492,7 @@ static inline u64 jiffies_64_to_clock_t(
 
 static inline u64 nsec_to_clock_t(u64 x)
 {
+	WARN_ON((s64)x < 0);
 #if (NSEC_PER_SEC % USER_HZ) == 0
 	do_div(x, (NSEC_PER_SEC / USER_HZ));
 #elif (USER_HZ % 512) == 0
diff -upr kernel-2.6.18-417.el5.orig/include/linux/Kbuild kernel-2.6.18-417.el5-028stab121/include/linux/Kbuild
--- kernel-2.6.18-417.el5.orig/include/linux/Kbuild	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/Kbuild	2017-01-13 08:40:28.000000000 -0500
@@ -210,6 +210,7 @@ unifdef-y += ethtool.h
 unifdef-y += eventpoll.h
 unifdef-y += ext2_fs.h
 unifdef-y += ext3_fs.h
+unifdef-y += fairsched.h
 unifdef-y += fb.h
 unifdef-y += fcntl.h
 unifdef-y += filter.h
diff -upr kernel-2.6.18-417.el5.orig/include/linux/kdev_t.h kernel-2.6.18-417.el5-028stab121/include/linux/kdev_t.h
--- kernel-2.6.18-417.el5.orig/include/linux/kdev_t.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/kdev_t.h	2017-01-13 08:40:19.000000000 -0500
@@ -87,6 +87,57 @@ static inline unsigned sysv_minor(u32 de
 	return dev & 0x3ffff;
 }
 
+#define UNNAMED_MAJOR_COUNT	16
+
+#if UNNAMED_MAJOR_COUNT > 1
+
+extern int unnamed_dev_majors[UNNAMED_MAJOR_COUNT];
+
+static inline dev_t make_unnamed_dev(int idx)
+{
+	/*
+	 * Here we transfer bits from 8 to 8+log2(UNNAMED_MAJOR_COUNT) of the
+	 * unnamed device index into major number.
+	 */
+	return MKDEV(unnamed_dev_majors[(idx >> 8) & (UNNAMED_MAJOR_COUNT - 1)],
+		     idx & ~((UNNAMED_MAJOR_COUNT - 1) << 8));
+}
+
+static inline int unnamed_dev_idx(dev_t dev)
+{
+	int i;
+	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
+				MAJOR(dev) != unnamed_dev_majors[i]; i++);
+	return MINOR(dev) | (i << 8);
+}
+
+static inline int is_unnamed_dev(dev_t dev)
+{
+	int i;
+	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
+				MAJOR(dev) != unnamed_dev_majors[i]; i++);
+	return i < UNNAMED_MAJOR_COUNT;
+}
+
+#else /* UNNAMED_MAJOR_COUNT */
+
+static inline dev_t make_unnamed_dev(int idx)
+{
+	return MKDEV(0, idx);
+}
+
+static inline int unnamed_dev_idx(dev_t dev)
+{
+	return MINOR(dev);
+}
+
+static inline int is_unnamed_dev(dev_t dev)
+{
+	return MAJOR(dev) == 0;
+}
+
+#endif /* UNNAMED_MAJOR_COUNT */
+
 
 #else /* __KERNEL__ */
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/kernel.h kernel-2.6.18-417.el5-028stab121/include/linux/kernel.h
--- kernel-2.6.18-417.el5.orig/include/linux/kernel.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/kernel.h	2017-01-13 08:40:22.000000000 -0500
@@ -178,6 +178,11 @@ asmlinkage int vprintk(const char *fmt, 
 	__attribute__ ((format (printf, 1, 0)));
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
+asmlinkage int ve_vprintk(int dst, const char *fmt, va_list args)
+	__attribute__ ((format (printf, 2, 0)));
+asmlinkage int ve_printk(int, const char * fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+void prepare_printk(void);
 extern void log_buf_kexec_setup(void);
 extern int dmesg_restrict;
 #else
@@ -187,8 +192,16 @@ static inline int vprintk(const char *s,
 static inline int printk(const char *s, ...)
 	__attribute__ ((format (printf, 1, 2)));
 static inline int printk(const char *s, ...) { return 0; }
+static inline int ve_printk(int d, const char *s, ...)
+	__attribute__ ((format (printf, 2, 3)));
+static inline int ve_printk(int d, const char *s, ...) {return 0; }
+#define prepare_printk()	do { } while (0)
 #endif
 
+#define VE0_LOG		1
+#define VE_LOG		2
+#define VE_LOG_BOTH	(VE0_LOG | VE_LOG)
+
 unsigned long int_sqrt(unsigned long);
 
 static inline int __attribute_pure__ long_log2(unsigned long x)
@@ -204,9 +217,14 @@ extern int __printk_ratelimit(int rateli
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				unsigned int interval_msec);
 
+extern int console_silence_loglevel;
+
 static inline void console_silent(void)
 {
-	console_loglevel = 0;
+	if (console_loglevel > console_silence_loglevel) {
+		printk(KERN_EMERG "console shuts up ...\n");
+		console_loglevel = 0;
+	}
 }
 
 static inline void console_verbose(void)
@@ -216,12 +234,15 @@ static inline void console_verbose(void)
 }
 
 extern void bust_spinlocks(int yes);
+extern void wake_up_klogd(void);
 extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
 extern int panic_timeout;
 extern int panic_on_oops;
+extern int decode_call_traces;
 extern int panic_on_unrecovered_nmi;
 extern int panic_on_io_nmi;
 extern int tainted;
+extern int kernel_text_csum_broken;
 extern const char *print_tainted(void);
 extern void add_taint(unsigned);
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/kernel_stat.h kernel-2.6.18-417.el5-028stab121/include/linux/kernel_stat.h
--- kernel-2.6.18-417.el5.orig/include/linux/kernel_stat.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/kernel_stat.h	2017-01-13 08:40:16.000000000 -0500
@@ -6,6 +6,7 @@
 #include <linux/threads.h>
 #include <linux/percpu.h>
 #include <linux/cpumask.h>
+#include <linux/interrupt.h>
 #include <asm/cputime.h>
 
 /*
@@ -28,6 +29,7 @@ struct cpu_usage_stat {
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
 	unsigned int irqs[NR_IRQS];
+	unsigned int softirqs[NR_SOFTIRQS];
 };
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
@@ -36,6 +38,16 @@ DECLARE_PER_CPU(struct kernel_stat, ksta
 /* Must have preemption disabled for this to be meaningful. */
 #define kstat_this_cpu	__get_cpu_var(kstat)
 
+static inline void kstat_inc_softirqs(unsigned int irq)
+{
+	kstat_this_cpu.softirqs[irq]++;
+}
+
+static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
+{
+	return kstat_cpu(cpu).softirqs[irq];
+}
+
 extern unsigned long long nr_context_switches(void);
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/include/linux/key.h kernel-2.6.18-417.el5-028stab121/include/linux/key.h
--- kernel-2.6.18-417.el5.orig/include/linux/key.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/key.h	2017-01-13 08:40:16.000000000 -0500
@@ -375,6 +375,7 @@ extern void key_init(void);
 #define key_get(k) 			({ NULL; })
 #define key_revoke(k)			do { } while(0)
 #define key_put(k)			do { } while(0)
+#define key_revoke(k)			do { } while(0)
 #define key_ref_put(k)			do { } while(0)
 #define make_key_ref(k)			({ NULL; })
 #define key_ref_to_ptr(k)		({ NULL; })
diff -upr kernel-2.6.18-417.el5.orig/include/linux/kmem_cache.h kernel-2.6.18-417.el5-028stab121/include/linux/kmem_cache.h
--- kernel-2.6.18-417.el5.orig/include/linux/kmem_cache.h	2017-01-13 08:40:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/kmem_cache.h	2017-01-13 08:40:17.000000000 -0500
@@ -0,0 +1,222 @@
+#ifndef __KMEM_CACHE_H__
+#define __KMEM_CACHE_H__
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <asm/atomic.h>
+
+/*
+ * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+ *		  SLAB_RED_ZONE & SLAB_POISON.
+ *		  0 for faster, smaller code (especially in the critical paths).
+ *
+ * STATS	- 1 to collect stats for /proc/slabinfo.
+ *		  0 for faster, smaller code (especially in the critical paths).
+ *
+ * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
+ */
+
+#ifdef CONFIG_DEBUG_SLAB
+#define	SLAB_DEBUG		1
+#define	SLAB_STATS		1
+#define	SLAB_FORCED_DEBUG	1
+#else
+#define	SLAB_DEBUG		0
+#define	SLAB_STATS		0
+#define	SLAB_FORCED_DEBUG	0
+#endif
+
+/*
+ * struct array_cache
+ *
+ * Purpose:
+ * - LIFO ordering, to hand out cache-warm objects from _alloc
+ * - reduce the number of linked list operations
+ * - reduce spinlock operations
+ *
+ * The limit is stored in the per-cpu structure to reduce the data cache
+ * footprint.
+ *
+ */
+struct array_cache {
+	unsigned int avail;
+	unsigned int limit;
+	unsigned int batchcount;
+	unsigned int touched;
+	spinlock_t lock;
+	void *entry[0];		/*
+				 * Must have this definition in here for the proper
+				 * alignment of array_cache. Also simplifies accessing
+				 * the entries.
+				 * [0] is for gcc 2.95. It should really be [].
+				 */
+};
+
+/* bootstrap: The caches do not work without cpuarrays anymore,
+ * but the cpuarrays are allocated from the generic caches...
+ */
+#define BOOT_CPUCACHE_ENTRIES	1
+struct arraycache_init {
+	struct array_cache cache;
+	void *entries[BOOT_CPUCACHE_ENTRIES];
+};
+
+/*
+ * The slab lists for all objects.
+ */
+struct kmem_list3 {
+	struct list_head slabs_partial;	/* partial list first, better asm code */
+	struct list_head slabs_full;
+	struct list_head slabs_free;
+	unsigned long free_objects;
+	unsigned int free_limit;
+	unsigned int colour_next;	/* Per-node cache coloring */
+	spinlock_t list_lock;
+	struct array_cache *shared;	/* shared per node */
+	struct array_cache **alien;	/* on other nodes */
+	unsigned long next_reap;	/* updated without locking */
+	int free_touched;		/* updated without locking */
+};
+
+/*
+ * struct kmem_cache
+ *
+ * manages a cache.
+ */
+
+struct kmem_cache {
+/* 1) per-cpu data, touched during every alloc/free */
+	struct array_cache *array[NR_CPUS];
+/* 2) Cache tunables. Protected by cache_chain_mutex */
+	unsigned int batchcount;
+	unsigned int limit;
+	unsigned int shared;
+
+	unsigned int buffer_size;
+/* 3) touched by every alloc & free from the backend */
+	struct kmem_list3 *nodelists[MAX_NUMNODES];
+
+	unsigned int flags;		/* constant flags */
+	unsigned int num;		/* # of objs per slab */
+
+/* 4) cache_grow/shrink */
+	/* order of pgs per slab (2^n) */
+	unsigned int gfporder;
+
+	/* force GFP flags, e.g. GFP_DMA */
+	gfp_t gfpflags;
+
+	size_t colour;			/* cache colouring range */
+	unsigned int colour_off;	/* colour offset */
+	struct kmem_cache *slabp_cache;
+	unsigned int slab_size;
+	unsigned int dflags;		/* dynamic flags */
+
+	/* constructor func */
+	void (*ctor) (void *, struct kmem_cache *, unsigned long);
+
+	/* de-constructor func */
+	void (*dtor) (void *, struct kmem_cache *, unsigned long);
+
+/* 5) cache creation/removal */
+	const char *name;
+	struct list_head next;
+
+/* 6) statistics */
+	unsigned long grown;
+	unsigned long reaped;
+	unsigned long shrunk;
+#if SLAB_STATS
+	unsigned long num_active;
+	unsigned long num_allocations;
+	unsigned long high_mark;
+	unsigned long errors;
+	unsigned long max_freeable;
+	unsigned long node_allocs;
+	unsigned long node_frees;
+	unsigned long node_overflow;
+	atomic_t allochit;
+	atomic_t allocmiss;
+	atomic_t freehit;
+	atomic_t freemiss;
+#endif
+#if SLAB_DEBUG
+	/*
+	 * If debugging is enabled, then the allocator can add additional
+	 * fields and/or padding to every object. buffer_size contains the total
+	 * object size including these internal fields, the following two
+	 * variables contain the offset to the user object and its size.
+	 */
+	int obj_offset;
+	int obj_size;
+#endif
+#ifdef CONFIG_USER_RESOURCE
+	unsigned int		objuse;
+#endif
+};
+
+#define CFLGS_OFF_SLAB		(0x80000000UL)
+#define CFLGS_ENVIDS		(0x04000000UL)
+#define OFF_SLAB(x)		((x)->flags & CFLGS_OFF_SLAB)
+#define ENVIDS(x)		((x)->flags & CFLGS_ENVIDS)
+#define kmem_mark_nocharge(c)	do { (c)->flags |= SLAB_NO_CHARGE; } while (0)
+
+struct slab;
+/*
+ * Functions for storing/retrieving the cachep and or slab from the page
+ * allocator.  These are used to find the slab an obj belongs to.  With kfree(),
+ * these are used to find the cache which an obj belongs to.
+ */
+static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
+{
+	page->lru.next = (struct list_head *)cache;
+}
+
+static inline struct kmem_cache *page_get_cache(struct page *page)
+{
+	page = compound_head(page);
+	BUG_ON(!PageSlab(page));
+	return (struct kmem_cache *)page->lru.next;
+}
+
+static inline void page_set_slab(struct page *page, struct slab *slab)
+{
+	page->lru.prev = (struct list_head *)slab;
+}
+
+static inline struct slab *page_get_slab(struct page *page)
+{
+	page = compound_head(page);
+	BUG_ON(!PageSlab(page));
+	return (struct slab *)page->lru.prev;
+}
+
+static inline struct kmem_cache *virt_to_cache(const void *obj)
+{
+	struct page *page = virt_to_page(obj);
+	return page_get_cache(page);
+}
+
+static inline struct slab *virt_to_slab(const void *obj)
+{
+	struct page *page = virt_to_page(obj);
+	return page_get_slab(page);
+}
+
+#include <linux/kmem_slab.h>
+
+static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
+				 unsigned int idx)
+{
+	return slab->s_mem + cache->buffer_size * idx;
+}
+
+static inline unsigned int obj_to_index(struct kmem_cache *cache,
+					struct slab *slab, void *obj)
+{
+	return (unsigned)(obj - slab->s_mem) / cache->buffer_size;
+}
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/kmem_slab.h kernel-2.6.18-417.el5-028stab121/include/linux/kmem_slab.h
--- kernel-2.6.18-417.el5.orig/include/linux/kmem_slab.h	2017-01-13 08:40:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/kmem_slab.h	2017-01-13 08:40:15.000000000 -0500
@@ -0,0 +1,72 @@
+#ifndef __KMEM_SLAB_H__
+#define __KMEM_SLAB_H__
+
+/*
+ * kmem_bufctl_t:
+ *
+ * Bufctl's are used for linking objs within a slab
+ * linked offsets.
+ *
+ * This implementation relies on "struct page" for locating the cache &
+ * slab an object belongs to.
+ * This allows the bufctl structure to be small (one int), but limits
+ * the number of objects a slab (not a cache) can contain when off-slab
+ * bufctls are used. The limit is the size of the largest general cache
+ * that does not use off-slab slabs.
+ * For 32bit archs with 4 kB pages, is this 56.
+ * This is not serious, as it is only for large objects, when it is unwise
+ * to have too many per slab.
+ * Note: This limit can be raised by introducing a general cache whose size
+ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
+ */
+
+typedef unsigned int kmem_bufctl_t;
+#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
+#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
+#define BUFCTL_ACTIVE	(((kmem_bufctl_t)(~0U))-2)
+#define SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-3)
+
+/*
+ * struct slab
+ *
+ * Manages the objs in a slab. Placed either at the beginning of mem allocated
+ * for a slab, or allocated from an general cache.
+ * Slabs are chained into three list: fully used, partial, fully free slabs.
+ */
+struct slab {
+	struct list_head list;
+	unsigned long colouroff;
+	void *s_mem;		/* including colour offset */
+	unsigned int inuse;	/* num of objs active in slab */
+	kmem_bufctl_t free;
+	unsigned short nodeid;
+};
+
+/*
+ * struct slab_rcu
+ *
+ * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
+ * arrange for kmem_freepages to be called via RCU.  This is useful if
+ * we need to approach a kernel structure obliquely, from its address
+ * obtained without the usual locking.  We can lock the structure to
+ * stabilize it and check it's still at the given address, only if we
+ * can be sure that the memory has not been meanwhile reused for some
+ * other kind of object (which our subsystem's lock might corrupt).
+ *
+ * rcu_read_lock before reading the address, then rcu_read_unlock after
+ * taking the spinlock within the structure expected at that address.
+ *
+ * We assume struct slab_rcu can overlay struct slab when destroying.
+ */
+struct slab_rcu {
+	struct rcu_head head;
+	struct kmem_cache *cachep;
+	void *addr;
+};
+
+static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+{
+	return (kmem_bufctl_t *) (slabp + 1);
+}
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/kobject.h kernel-2.6.18-417.el5-028stab121/include/linux/kobject.h
--- kernel-2.6.18-417.el5.orig/include/linux/kobject.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/kobject.h	2017-01-13 08:40:40.000000000 -0500
@@ -36,7 +36,12 @@
 extern char uevent_helper[];
 
 /* counter to tag the uevent, read only except for the kobject core */
+#ifdef CONFIG_VE
+#define ve_uevent_seqnum	(get_exec_env()->_uevent_seqnum)
+#else
+#define ve_uevent_seqnum uevent_seqnum
 extern u64 uevent_seqnum;
+#endif
 
 /* the actions here must match the proper string in lib/kobject_uevent.c */
 typedef int __bitwise kobject_action_t;
@@ -48,6 +53,9 @@ enum kobject_action {
 	KOBJ_UMOUNT	= (__force kobject_action_t) 0x05,	/* umount event for block devices (broken) */
 	KOBJ_OFFLINE	= (__force kobject_action_t) 0x06,	/* device offline */
 	KOBJ_ONLINE	= (__force kobject_action_t) 0x07,	/* device online */
+	KOBJ_START	= (__force kobject_action_t) 0x08,	/* start subsystem */
+	KOBJ_STOP	= (__force kobject_action_t) 0x09,	/* stop subsystem */
+	KOBJ_REBOOT	= (__force kobject_action_t) 0x0a,	/* stop subsystem */
 };
 
 struct kobject {
@@ -201,6 +209,9 @@ extern struct subsystem kernel_subsys;
 /* The global /sys/hypervisor/ subsystem  */
 extern struct subsystem hypervisor_subsys;
 
+extern struct subsystem class_obj_subsys;
+extern struct subsystem class_subsys;
+
 /**
  * Helpers for setting the kset of registered objects.
  * Often, a registered object belongs to a kset embedded in a 
@@ -270,6 +281,7 @@ extern int subsys_create_file(struct sub
 
 #if defined(CONFIG_HOTPLUG)
 void kobject_uevent(struct kobject *kobj, enum kobject_action action);
+ssize_t kobject_uevent_show(struct kobject *kobj, char *buf);
 int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 			char *envp[]);
 
@@ -281,6 +293,9 @@ int add_uevent_var_env(struct kobj_ueven
 	__attribute__((format (printf, 2, 3)));
 #else
 static inline void kobject_uevent(struct kobject *kobj, enum kobject_action action) { }
+static inline int kobject_uevent_env(struct kobject *kobj,
+		enum kobject_action action, char *envp[])
+{ return 0; }
 
 static inline int add_uevent_var(char **envp, int num_envp, int *cur_index,
 				      char *buffer, int buffer_size, int *cur_len, 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/lockd/bind.h kernel-2.6.18-417.el5-028stab121/include/linux/lockd/bind.h
--- kernel-2.6.18-417.el5.orig/include/linux/lockd/bind.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/lockd/bind.h	2017-01-13 08:40:27.000000000 -0500
@@ -39,4 +39,13 @@ extern int	lockd_up(void);
 extern int	lockd_up_proto(int proto);
 extern void	lockd_down(void);
 
+extern int	nlmclnt_set_lockowner(struct inode *,
+		struct file_lock *, fl_owner_t, int);
+extern int	nlmclnt_reserve_pid(int pid);
+
+struct nlm_reserved_pid {
+	int pid;
+	struct hlist_node list;
+};
+
 #endif /* LINUX_LOCKD_BIND_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/lockd/lockd.h kernel-2.6.18-417.el5-028stab121/include/linux/lockd/lockd.h
--- kernel-2.6.18-417.el5.orig/include/linux/lockd/lockd.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/lockd/lockd.h	2017-01-13 08:40:23.000000000 -0500
@@ -64,6 +64,7 @@ struct nlm_host {
 #ifndef __GENKSYMS__
 	struct sockaddr_in	h_saddr;	/* our address (optional) */
 #endif
+	envid_t			h_owner_veid;	/* VEID owning the host */
 };
 
 /*
@@ -83,7 +84,7 @@ struct nlm_wait;
 /*
  * Memory chunk for NLM client RPC request.
  */
-#define NLMCLNT_OHSIZE		(sizeof(system_utsname.nodename)+10)
+#define NLMCLNT_OHSIZE		(sizeof(utsname()->nodename)+10)
 struct nlm_rqst {
 	atomic_t		a_count;
 	unsigned int		a_flags;	/* initial RPC task flags */
@@ -155,8 +156,11 @@ extern struct svc_procedure	nlmsvc_proce
 #ifdef CONFIG_LOCKD_V4
 extern struct svc_procedure	nlmsvc_procedures4[];
 #endif
-extern int			nlmsvc_grace_period;
-extern unsigned long		nlmsvc_timeout;
+
+#include <linux/ve_nfs.h>
+extern int			_nlmsvc_grace_period;
+extern unsigned long		_nlmsvc_timeout;
+
 
 /*
  * Lockd client functions
@@ -242,6 +246,7 @@ nlm_compare_locks(const struct file_lock
 }
 
 extern struct lock_manager_operations nlmsvc_lock_operations;
+extern spinlock_t rpc_client_lock;
 
 #endif /* __KERNEL__ */
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/major.h kernel-2.6.18-417.el5-028stab121/include/linux/major.h
--- kernel-2.6.18-417.el5.orig/include/linux/major.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/major.h	2017-01-13 08:40:19.000000000 -0500
@@ -166,4 +166,7 @@
 
 #define VIOTAPE_MAJOR		230
 
+#define UNNAMED_EXTRA_MAJOR		130
+#define UNNAMED_EXTRA_MAJOR_COUNT	120
+
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/mman.h kernel-2.6.18-417.el5-028stab121/include/linux/mman.h
--- kernel-2.6.18-417.el5.orig/include/linux/mman.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/mman.h	2017-01-13 08:40:15.000000000 -0500
@@ -61,6 +61,9 @@ static inline unsigned long
 calc_vm_flag_bits(unsigned long flags)
 {
 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
+#ifdef MAP_GROWSUP
+	       _calc_vm_trans(flags, MAP_GROWSUP,    VM_GROWSUP ) |
+#endif
 	       _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
 	       _calc_vm_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE) |
 	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    );
diff -upr kernel-2.6.18-417.el5.orig/include/linux/mm.h kernel-2.6.18-417.el5-028stab121/include/linux/mm.h
--- kernel-2.6.18-417.el5.orig/include/linux/mm.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/mm.h	2017-01-13 08:40:27.000000000 -0500
@@ -194,6 +194,16 @@ extern unsigned int kobjsize(const void 
 #define VM_SequentialReadHint(v)	((v)->vm_flags & VM_SEQ_READ)
 #define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
 
+
+/*
+ * Bits in mapping->flags.  The lower __GFP_BITS_SHIFT bits are the page
+ * allocation mode flags.
+ */
+#define AS_EIO		(__GFP_BITS_SHIFT + 0)  /* IO error on async write */
+#define AS_ENOSPC       (__GFP_BITS_SHIFT + 1)  /* ENOSPC on async write */
+#define AS_MM_ALL_LOCKS	(__GFP_BITS_SHIFT + 2)	/* under mm_take_all_locks() */
+#define AS_CHECKPOINT   (__GFP_BITS_SHIFT + 3)  /* mapping is checkpointed */
+
 /*
  * mapping from the currently active vm_flags protection bits (the
  * low four bits) to a page protection mask..
@@ -281,6 +291,12 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
+#ifdef CONFIG_USER_RESOURCE
+	union {
+		struct user_beancounter *page_ub;
+		struct page_beancounter *page_pb;
+	} bc;
+#endif
 };
 
 #define page_private(page)		((page)->private)
@@ -676,6 +692,7 @@ static inline int page_mapped(struct pag
  */
 #define NOPAGE_SIGBUS	(NULL)
 #define NOPAGE_OOM	((struct page *) (-1))
+#define NOPAGE_RESTART	((struct page *) (-2))
 
 /*
  * Error return values for the *_nopfn functions
@@ -710,16 +727,9 @@ struct page *shmem_nopage(struct vm_area
 int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
 struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 					unsigned long addr);
-int shmem_lock(struct file *file, int lock, struct user_struct *user);
 #else
 #define shmem_nopage filemap_nopage
 
-static inline int shmem_lock(struct file *file, int lock,
-			     struct user_struct *user)
-{
-	return 0;
-}
-
 static inline int shmem_set_policy(struct vm_area_struct *vma,
 				   struct mempolicy *new)
 {
@@ -782,7 +792,9 @@ void free_pgd_range(struct mmu_gather **
 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
-			struct vm_area_struct *vma);
+		struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+int __copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *vma,
+		      unsigned long addr, size_t size);
 int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
 			unsigned long size, pgprot_t prot);
 void unmap_mapping_range(struct address_space *mapping,
@@ -834,6 +846,7 @@ int __set_page_dirty_nobuffers(struct pa
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
 int FASTCALL(set_page_dirty(struct page *page));
+int FASTCALL(set_page_dirty_mm(struct page *page, struct mm_struct *mm));
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
@@ -933,7 +946,8 @@ static inline int vma_wants_writenotify(
 
 	/* Can the mapping track the dirty pages? */
 	return vma->vm_file && vma->vm_file->f_mapping &&
-		mapping_cap_account_dirty(vma->vm_file->f_mapping);
+		(mapping_cap_account_dirty(vma->vm_file->f_mapping) ||
+		 test_bit(AS_CHECKPOINT, &vma->vm_file->f_mapping->flags));
 }
 
 extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
@@ -1061,6 +1075,7 @@ extern int insert_vm_struct(struct mm_st
 extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
 	struct rb_node **, struct rb_node *);
 extern void unlink_file_vma(struct vm_area_struct *);
+extern void __vma_link_file(struct vm_area_struct *vma);
 extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	unsigned long addr, unsigned long len, pgoff_t pgoff);
 extern void exit_mmap(struct mm_struct *);
@@ -1170,6 +1185,9 @@ pgprot_t vm_get_page_prot(unsigned long 
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 struct page *vmalloc_to_page(void *addr);
 unsigned long vmalloc_to_pfn(void *addr);
+extern struct page * follow_page_pte(struct mm_struct *mm,
+		unsigned long address, int write,
+		pte_t *page_pte, spinlock_t **ptlp);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
@@ -1237,7 +1255,12 @@ void drop_slab(void);
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
 #else
-extern int randomize_va_space;
+extern int _randomize_va_space;
+#ifndef CONFIG_VE
+#define randomize_va_space _randomize_va_space
+#else
+#define randomize_va_space (get_exec_env()->_randomize_va_space)
+#endif
 #endif
 
 const char *arch_vma_name(struct vm_area_struct *vma);
diff -upr kernel-2.6.18-417.el5.orig/include/linux/mm_inline.h kernel-2.6.18-417.el5-028stab121/include/linux/mm_inline.h
--- kernel-2.6.18-417.el5.orig/include/linux/mm_inline.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/mm_inline.h	2017-01-13 08:40:15.000000000 -0500
@@ -1,30 +1,29 @@
-
 static inline void
 add_page_to_active_list(struct zone *zone, struct page *page)
 {
 	list_add(&page->lru, &zone->active_list);
-	zone->nr_active++;
+	__inc_zone_state(zone, NR_ACTIVE);
 }
 
 static inline void
 add_page_to_inactive_list(struct zone *zone, struct page *page)
 {
 	list_add(&page->lru, &zone->inactive_list);
-	zone->nr_inactive++;
+	__inc_zone_state(zone, NR_INACTIVE);
 }
 
 static inline void
 del_page_from_active_list(struct zone *zone, struct page *page)
 {
 	list_del(&page->lru);
-	zone->nr_active--;
+	__dec_zone_state(zone, NR_ACTIVE);
 }
 
 static inline void
 del_page_from_inactive_list(struct zone *zone, struct page *page)
 {
 	list_del(&page->lru);
-	zone->nr_inactive--;
+	__dec_zone_state(zone, NR_INACTIVE);
 }
 
 static inline void
@@ -33,9 +32,9 @@ del_page_from_lru(struct zone *zone, str
 	list_del(&page->lru);
 	if (PageActive(page)) {
 		__ClearPageActive(page);
-		zone->nr_active--;
+		__dec_zone_state(zone, NR_ACTIVE);
 	} else {
-		zone->nr_inactive--;
+		__dec_zone_state(zone, NR_INACTIVE);
 	}
 }
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/mmzone.h kernel-2.6.18-417.el5-028stab121/include/linux/mmzone.h
--- kernel-2.6.18-417.el5.orig/include/linux/mmzone.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/mmzone.h	2017-01-13 08:40:15.000000000 -0500
@@ -47,6 +47,8 @@ struct zone_padding {
 #endif
 
 enum zone_stat_item {
+	NR_INACTIVE,
+	NR_ACTIVE,
 	NR_ANON_PAGES,	/* Mapped anonymous pages */
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
 			   only modified from process context */
@@ -179,8 +181,6 @@ struct zone {
 	struct list_head	inactive_list;
 	unsigned long		nr_scan_active;
 	unsigned long		nr_scan_inactive;
-	unsigned long		nr_active;
-	unsigned long		nr_inactive;
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	int			all_unreclaimable; /* All pages pinned */
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/module.h kernel-2.6.18-417.el5-028stab121/include/linux/module.h
--- kernel-2.6.18-417.el5.orig/include/linux/module.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/module.h	2017-01-13 08:40:40.000000000 -0500
@@ -390,6 +390,8 @@ void __symbol_put(const char *symbol);
 #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x)
 void symbol_put_addr(void *addr);
 
+struct module *find_module_by_name(const char *name);
+
 /* Sometimes we know we already have a refcount, and it's easier not
    to handle the error case (which only happens with rmmod --wait). */
 static inline void __module_get(struct module *module)
diff -upr kernel-2.6.18-417.el5.orig/include/linux/mount.h kernel-2.6.18-417.el5-028stab121/include/linux/mount.h
--- kernel-2.6.18-417.el5.orig/include/linux/mount.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/mount.h	2017-01-13 08:40:27.000000000 -0500
@@ -27,6 +27,7 @@ struct namespace;
 #define MNT_NOEXEC	0x04
 #define MNT_NOATIME	0x08
 #define MNT_NODIRATIME	0x10
+#define MNT_RELATIME	0x20
 
 #define MNT_SHRINKABLE	0x100
 
@@ -34,6 +35,8 @@ struct namespace;
 #define MNT_UNBINDABLE	0x2000	/* if the vfsmount is a unbindable mount */
 #define MNT_PNODE_MASK	0x3000	/* propogation flag mask */
 
+#define MNT_CPT		0x1000000
+
 struct vfsmount {
 	struct list_head mnt_hash;
 	struct vfsmount *mnt_parent;	/* fs we are mounted on */
@@ -54,6 +57,7 @@ struct vfsmount {
 	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
 	struct namespace *mnt_namespace; /* containing namespace */
 	int mnt_pinned;
+	unsigned owner;
 };
 
 static inline struct vfsmount *mntget(struct vfsmount *mnt)
@@ -84,6 +88,7 @@ struct file_system_type;
 extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 				      int flags, const char *name,
 				      void *data);
+extern struct vfsmount *vfs_bind_mount(struct vfsmount *, struct dentry *);
 
 struct nameidata;
 
@@ -92,6 +97,7 @@ extern int do_add_mount(struct vfsmount 
 
 extern void mark_mounts_for_expiry(struct list_head *mounts);
 extern void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts);
+extern void replace_mount(struct vfsmount *src_mnt, struct vfsmount *dst_mnt);
 
 extern spinlock_t vfsmount_lock;
 extern dev_t name_to_dev_t(char *name);
diff -upr kernel-2.6.18-417.el5.orig/include/linux/msg.h kernel-2.6.18-417.el5-028stab121/include/linux/msg.h
--- kernel-2.6.18-417.el5.orig/include/linux/msg.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/msg.h	2017-01-13 08:40:24.000000000 -0500
@@ -98,6 +98,14 @@ extern long do_msgsnd(int msqid, long mt
 extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
 			size_t msgsz, long msgtyp, int msgflg);
 
+int sysvipc_walk_msg(int (*func)(int, struct msg_queue*, void *), void *arg);
+int sysvipc_setup_msg(key_t key, int msqid, int msgflg);
+int sysv_msg_store(struct msg_msg *msg,
+		   int (*store)(void * src, int len, int offset, void * data),
+		   int len, void * data);
+struct msg_msg *sysv_msg_load(int (*load)(void * dst, int len, int offset,
+					  void * data), int len, void * data);
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_MSG_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/namei.h kernel-2.6.18-417.el5-028stab121/include/linux/namei.h
--- kernel-2.6.18-417.el5.orig/include/linux/namei.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/namei.h	2017-01-13 08:40:27.000000000 -0500
@@ -50,7 +50,10 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
 #define LOOKUP_PARENT		16
 #define LOOKUP_NOALT		32
 #define LOOKUP_REVAL		64
-#define LOOKUP_ATOMIC		128
+/* VvS: to fix O_ATOMICLOOKUP incompatibility with mainstream
+ * #define LOOKUP_ATOMIC		128
+ */
+#define LOOKUP_ATOMIC		0
 
 /*
  * Intent data
@@ -59,6 +62,9 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
 #define LOOKUP_CREATE		(0x0200)
 #define LOOKUP_ACCESS		(0x0400)
 #define LOOKUP_CHDIR		(0x0800)
+#define LOOKUP_NOAREACHECK	(0x2000)	/* no area check on lookup */
+#define LOOKUP_STRICT		(0x4000)	/* no symlinks or other filesystems */
+#define LOOKUP_DIVE		(0x8000)	/* no follow mount */
 
 extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
 extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
diff -upr kernel-2.6.18-417.el5.orig/include/linux/namespace.h kernel-2.6.18-417.el5-028stab121/include/linux/namespace.h
--- kernel-2.6.18-417.el5.orig/include/linux/namespace.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/namespace.h	2017-01-13 08:40:19.000000000 -0500
@@ -4,6 +4,7 @@
 
 #include <linux/mount.h>
 #include <linux/sched.h>
+#include <linux/nsproxy.h>
 
 struct namespace {
 	atomic_t		count;
@@ -13,6 +14,8 @@ struct namespace {
 	int event;
 };
 
+extern struct rw_semaphore namespace_sem;
+
 extern int copy_namespace(int, struct task_struct *);
 extern void __put_namespace(struct namespace *namespace);
 extern struct namespace *dup_namespace(struct task_struct *, struct fs_struct *);
@@ -26,11 +29,8 @@ static inline void put_namespace(struct 
 
 static inline void exit_namespace(struct task_struct *p)
 {
-	struct namespace *namespace = p->namespace;
+	struct namespace *namespace = p->nsproxy->namespace;
 	if (namespace) {
-		task_lock(p);
-		p->namespace = NULL;
-		task_unlock(p);
 		put_namespace(namespace);
 	}
 }
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netdevice.h kernel-2.6.18-417.el5-028stab121/include/linux/netdevice.h
--- kernel-2.6.18-417.el5.orig/include/linux/netdevice.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netdevice.h	2017-01-13 08:40:41.000000000 -0500
@@ -37,6 +37,7 @@
 #include <linux/device.h>
 #include <linux/percpu.h>
 #include <linux/dmaengine.h>
+#include <linux/ctype.h>
 
 struct divert_blk;
 struct vlan_group;
@@ -140,6 +141,10 @@ struct net_device_stats
 	unsigned long	tx_compressed;
 };
 
+struct pcpu_lstats {
+	unsigned long packets;
+	unsigned long bytes;
+};
 
 /* Media selection options. */
 enum {
@@ -243,6 +248,35 @@ enum netdev_state_t
 	__LINK_STATE_NETPOLL
 };
 
+struct netdev_bc {
+	struct user_beancounter *exec_ub, *owner_ub;
+};
+
+#define netdev_bc(dev)		(&(dev)->dev_bc)
+
+/* this one is to get merged with rtnl_link_ops */
+struct cpt_context;
+struct cpt_ops;
+struct cpt_netdev_image;
+
+struct dev_cpt_ops {
+	int			cpt_object;
+	char			*name;
+	struct list_head	list;
+
+	void (*dump)(struct net_device *,
+			struct cpt_ops *, struct cpt_context *);
+	int (*restore)(loff_t pos, struct cpt_netdev_image *,
+			struct cpt_ops *, struct cpt_context *);
+
+	struct net_device_stats *(*stats)(struct net_device *);
+	void (*post_restore_netstats)(struct net_device *);
+};
+
+extern struct dev_cpt_ops *dev_cpt_ops_get(int cpt_object,
+						struct dev_cpt_ops *ops);
+extern void register_dev_cpt_ops(struct dev_cpt_ops *);
+extern void unregister_dev_cpt_ops(struct dev_cpt_ops *);
 
 /*
  * This structure holds at boot time configured netdevice settings. They
@@ -343,6 +377,7 @@ struct net_device
 #define NETIF_F_IP_CSUM		2	/* Can checksum only TCP/UDP over IPv4. */
 #define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
 #define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
+#define NETIF_F_VIRTUAL		16	/* Can be registered inside CT */
 #define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
 #define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
 #define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
@@ -351,6 +386,7 @@ struct net_device
 #define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
 #define NETIF_F_GSO		2048	/* Enable software GSO. */
 #define NETIF_F_LLTX		4096	/* LockLess TX */
+#define NETIF_F_VENET		8192	/* Device is venet device */
 #define NETIF_F_GRO		16384	/* Generic receive offload */
 #define NETIF_F_LRO		32768	/* large receive offload */
 
@@ -591,10 +627,21 @@ struct net_device
 	struct divert_blk	*divert;
 #endif /* CONFIG_NET_DIVERT */
 
+	struct ve_struct	*owner_env; /* Owner VE of the interface */
+	struct netdev_bc	dev_bc;
+
 	/* class/net/name entry */
 	struct class_device	class_dev;
 	/* space for optional statistics and wireless sysfs groups */
 	struct attribute_group  *sysfs_groups[3];
+
+#ifdef CONFIG_VE
+	/* List entry in global devices list to keep track of their names
+	 * assignment */
+	struct list_head	dev_global_list_entry;
+
+	struct dev_cpt_ops	*cpt_ops;
+#endif
 #ifndef __GENKSYMS__
 	/*
 	 * Private data size is limited to 64kB
@@ -625,7 +672,6 @@ struct net_device_extended {
 
 extern struct ipv4_devconf_extensions ipv4_devconf_ext;
 extern struct ipv6_devconf_extensions ipv6_devconf_extensions_dflt;
-extern struct ipv6_devconf ipv6_devconf_dflt;
 
 #define	NETDEV_ALIGN		32
 #define	NETDEV_ALIGN_CONST	(NETDEV_ALIGN - 1)
@@ -704,10 +750,25 @@ typedef int (*lro_func_t)(struct net_dev
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 
+extern struct net_device		templ_loopback_dev;
 extern struct net_device		loopback_dev;		/* The loopback */
+#if defined(CONFIG_VE) && defined(CONFIG_NET)
+#define loopback_dev	(*get_exec_env()->_loopback_dev)
+#define ve0_loopback	(*get_ve0()->_loopback_dev)
+#define dev_base	(get_exec_env()->_net_dev_base)
+#define visible_dev_head(x)	(&(x)->_net_dev_head)
+#define visible_dev_index_head(x) (&(x)->_net_dev_index_head)
+#else
 extern struct net_device		*dev_base;		/* All devices */
+#define ve0_loopback	loopback_dev
+#define visible_dev_head(x)	NULL
+#define visible_dev_index_head(x) NULL
+#endif
 extern rwlock_t				dev_base_lock;		/* Device list lock */
 
+struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env);
+struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env);
+
 extern int 			netdev_boot_setup_check(struct net_device *dev);
 extern unsigned long		netdev_boot_base(const char *prefix, int unit);
 extern struct net_device    *dev_getbyhwaddr(unsigned short type, char *hwaddr);
@@ -1264,6 +1325,18 @@ unsigned long netdev_increment_features(
 					unsigned long mask);
 unsigned long netdev_fix_features(unsigned long features, const char *name);
 
+#if defined(CONFIG_VE) && defined(CONFIG_NET)
+static inline int ve_is_dev_movable(struct net_device *dev)
+{
+	return !(dev->features & NETIF_F_VIRTUAL);
+}
+#else
+static inline int ve_is_dev_movable(struct net_device *dev)
+{
+	return 0;
+}
+#endif
+
 static inline int net_gso_ok(int features, int gso_type)
 {
 	int feature = gso_type << NETIF_F_GSO_SHIFT;
@@ -1453,6 +1526,16 @@ do {								\
 })
 #endif
 
+#ifdef CONFIG_SYSFS
+extern int netdev_sysfs_init(void);
+extern int netdev_register_sysfs(struct net_device *, int);
+extern void netdev_unregister_sysfs(struct net_device *, int);
+#else
+#define netdev_sysfs_init()	 	(0)
+#define netdev_register_sysfs(dev, i)	(0)
+#define	netdev_unregister_sysfs(dev, i)	do { } while(0)
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif	/* _LINUX_DEV_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter/nf_conntrack_ftp.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/nf_conntrack_ftp.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter/nf_conntrack_ftp.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/nf_conntrack_ftp.h	2017-01-13 08:40:23.000000000 -0500
@@ -32,13 +32,22 @@ struct ip_conntrack_expect;
 
 /* For NAT to hook in when we find a packet which describes what other
  * connection we should expect. */
-extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
+typedef unsigned int (*ip_nat_helper_ftp_hook)(struct sk_buff **pskb,
 				       enum ip_conntrack_info ctinfo,
 				       enum ip_ct_ftp_type type,
 				       unsigned int matchoff,
 				       unsigned int matchlen,
 				       struct ip_conntrack_expect *exp,
 				       u32 *seq);
+extern ip_nat_helper_ftp_hook ip_nat_ftp_hook;
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_nat_ftp_hook \
+	((ip_nat_helper_ftp_hook) \
+		(get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook))
+#else
+#define ve_ip_nat_ftp_hook	ip_nat_ftp_hook
+#endif
 #endif /* __KERNEL__ */
 
 #endif /* _NF_CONNTRACK_FTP_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter/x_tables.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/x_tables.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter/x_tables.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/x_tables.h	2017-01-13 08:40:40.000000000 -0500
@@ -275,6 +275,7 @@ struct xt_table_info
 {
 	/* Size per table */
 	unsigned int size;
+	unsigned int alloc_size;
 	/* Number of entries: FIXME. --RR */
 	unsigned int number;
 	/* Initial number of entries. Needed for module usage count */
@@ -304,6 +305,10 @@ extern int xt_register_table(struct xt_t
 			     struct xt_table_info *bootstrap,
 			     struct xt_table_info *newinfo);
 extern void *xt_unregister_table(struct xt_table *table);
+extern struct xt_table *virt_xt_register_table(struct xt_table *table,
+			     struct xt_table_info *bootstrap,
+			     struct xt_table_info *newinfo);
+extern void *virt_xt_unregister_table(struct xt_table *table);
 
 extern struct xt_table_info *xt_replace_table(struct xt_table *table,
 					      unsigned int num_counters,
@@ -369,11 +374,7 @@ struct compat_xt_entry_target
 
 struct compat_xt_counters
 {
-#if defined(CONFIG_X86_64) || defined(CONFIG_IA64)
-	u_int32_t cnt[4];
-#else
-	u_int64_t cnt[2];
-#endif
+	compat_u64 pcnt, bcnt;			/* Packet and byte counters */
 };
 
 struct compat_xt_counters_info
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_connmark.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_connmark.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_connmark.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_connmark.h	2017-01-13 08:40:15.000000000 -0500
@@ -1,6 +1,8 @@
 #ifndef _XT_CONNMARK_H
 #define _XT_CONNMARK_H
 
+#include <net/compat.h>
+
 /* Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
  * by Henrik Nordstrom <hno@marasystems.com>
  *
@@ -15,4 +17,10 @@ struct xt_connmark_info {
 	u_int8_t invert;
 };
 
+#ifdef CONFIG_COMPAT
+struct compat_xt_connmark_info {
+	compat_ulong_t mark, mask;
+	u_int8_t invert;
+};
+#endif /*CONFIG_COMPAT*/
 #endif /*_XT_CONNMARK_H*/
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_CONNMARK.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_CONNMARK.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_CONNMARK.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_CONNMARK.h	2017-01-13 08:40:15.000000000 -0500
@@ -1,6 +1,8 @@
 #ifndef _XT_CONNMARK_H_target
 #define _XT_CONNMARK_H_target
 
+#include <net/compat.h>
+
 /* Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
  * by Henrik Nordstrom <hno@marasystems.com>
  *
@@ -22,4 +24,11 @@ struct xt_connmark_target_info {
 	u_int8_t mode;
 };
 
+#ifdef CONFIG_COMPAT
+struct compat_xt_connmark_target_info {
+	compat_ulong_t mark;
+	compat_ulong_t mask;
+	u_int8_t mode;
+};
+#endif /*CONFIG_COMPAT*/
 #endif /*_XT_CONNMARK_H_target*/
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_conntrack.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_conntrack.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_conntrack.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_conntrack.h	2017-01-13 08:40:15.000000000 -0500
@@ -8,6 +8,8 @@
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <linux/in.h>
 
+#include <net/compat.h>
+
 #define XT_CONNTRACK_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
 #define XT_CONNTRACK_STATE_INVALID (1 << 0)
 
@@ -60,4 +62,21 @@ struct xt_conntrack_info
 	/* Inverse flags */
 	u_int8_t invflags;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_conntrack_info
+{
+	unsigned int statemask, statusmask;
+
+	struct ip_conntrack_old_tuple tuple[IP_CT_DIR_MAX];
+	struct in_addr sipmsk[IP_CT_DIR_MAX], dipmsk[IP_CT_DIR_MAX];
+
+	compat_ulong_t expires_min, expires_max;
+
+	/* Flags word */
+	u_int8_t flags;
+	/* Inverse flags */
+	u_int8_t invflags;
+};
+#endif /*CONFIG_COMPAT*/
 #endif /*_XT_CONNTRACK_H*/
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_limit.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_limit.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_limit.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_limit.h	2017-01-13 08:40:15.000000000 -0500
@@ -18,4 +18,20 @@ struct xt_rateinfo {
 	/* Ugly, ugly fucker. */
 	struct xt_rateinfo *master;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_rateinfo {
+	u_int32_t avg;    /* Average secs between packets * scale */
+	u_int32_t burst;  /* Period multiplier for upper limit. */
+
+	/* Used internally by the kernel */
+	compat_ulong_t prev;
+	u_int32_t credit;
+	u_int32_t credit_cap, cost;
+
+	/* Ugly, ugly fucker. */
+	compat_uptr_t master;
+};
+#endif
+
 #endif /*_XT_RATE_H*/
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_mark.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_mark.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_mark.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_mark.h	2017-01-13 08:40:15.000000000 -0500
@@ -1,9 +1,17 @@
 #ifndef _XT_MARK_H
 #define _XT_MARK_H
 
+#include <net/compat.h>
+
 struct xt_mark_info {
     unsigned long mark, mask;
     u_int8_t invert;
 };
 
+#ifdef CONFIG_COMPAT
+struct compat_xt_mark_info {
+    compat_ulong_t mark, mask;
+    u_int8_t invert;
+};
+#endif /*CONFIG_COMPAT*/
 #endif /*_XT_MARK_H*/
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_MARK.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_MARK.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter/xt_MARK.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter/xt_MARK.h	2017-01-13 08:40:15.000000000 -0500
@@ -1,6 +1,8 @@
 #ifndef _XT_MARK_H_target
 #define _XT_MARK_H_target
 
+#include <net/compat.h>
+
 /* Version 0 */
 struct xt_mark_target_info {
 	unsigned long mark;
@@ -18,4 +20,10 @@ struct xt_mark_target_info_v1 {
 	u_int8_t mode;
 };
 
+#ifdef CONFIG_COMPAT
+struct compat_xt_mark_target_info_v1 {
+	compat_ulong_t mark;
+	u_int8_t mode;
+};
+#endif /*CONFIG_COMPAT*/
 #endif /*_XT_MARK_H_target */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter.h	2017-01-13 08:40:40.000000000 -0500
@@ -117,11 +117,17 @@ void nf_unregister_hooks(struct nf_hook_
 int nf_register_sockopt(struct nf_sockopt_ops *reg);
 void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
 
+#ifdef CONFIG_VE_IPTABLES
+#define ve_nf_hooks \
+       ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
+#else
 #ifdef __GENKSYMS__
 extern struct list_head nf_hooks[32][NF_MAX_HOOKS];
 #else
 extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
 #endif
+#define ve_nf_hooks nf_hooks
+#endif
 
 /* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
  * disappear once iptables is replaced with pkttables.  Please DO NOT use them
@@ -199,7 +205,7 @@ static inline int nf_hook_thresh(int pf,
 	if (!cond)
 		return 1;
 #ifndef CONFIG_NETFILTER_DEBUG
-	if (list_empty(&nf_hooks[pf][hook]))
+	if (list_empty(&ve_nf_hooks[pf][hook]))
 		return 1;
 #endif
 	return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
@@ -286,6 +292,12 @@ extern void nf_invalidate_cache(int pf);
    Returns true or false. */
 extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len);
 
+extern u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval,
+				u_int32_t csum);
+extern u_int16_t nf_proto_csum_update(struct sk_buff *skb,
+				      u_int32_t oldval, u_int32_t newval,
+				      u_int16_t csum, int pseudohdr);
+
 struct nf_afinfo {
 	unsigned short	family;
 	unsigned int	(*checksum)(struct sk_buff *skb, unsigned int hook,
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_conntrack_core.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_conntrack_core.h	2017-01-13 08:40:23.000000000 -0500
@@ -3,7 +3,6 @@
 #include <linux/netfilter.h>
 
 #define MAX_IP_CT_PROTO 256
-extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 
 /* This header is used to share core functionality between the
    standalone connection tracking module, and the compatibility layer's use
@@ -59,8 +58,29 @@ static inline int ip_conntrack_confirm(s
 
 extern void ip_ct_unlink_expect(struct ip_conntrack_expect *exp);
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_ct_initialized() \
+	(get_exec_env()->_ip_conntrack != NULL)
+#define ve_ip_ct_protos \
+	(get_exec_env()->_ip_conntrack->_ip_ct_protos)
+#define ve_ip_conntrack_hash	\
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_hash)
+#define ve_ip_conntrack_expect_list \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_expect_list)
+#define ve_ip_conntrack_vmalloc \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_vmalloc)
+#else
+extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 extern struct list_head *ip_conntrack_hash;
 extern struct list_head ip_conntrack_expect_list;
+#define ve_ip_ct_initialized()		1
+#define ve_ip_ct_protos			ip_ct_protos
+#define ve_ip_conntrack_hash		ip_conntrack_hash
+#define ve_ip_conntrack_expect_list	ip_conntrack_expect_list
+#define ve_ip_conntrack_vmalloc		ip_conntrack_vmalloc
+#endif /* CONFIG_VE_IPTABLES */
+
 extern rwlock_t ip_conntrack_lock;
 #endif /* _IP_CONNTRACK_CORE_H */
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_conntrack.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_conntrack.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_conntrack.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_conntrack.h	2017-01-13 08:40:24.000000000 -0500
@@ -72,6 +72,10 @@ do {									\
 
 struct ip_conntrack_helper;
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/ve.h>
+#endif
+
 struct ip_conntrack
 {
 	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
@@ -127,6 +131,9 @@ struct ip_conntrack
 	/* Traversed often, so hopefully in different cacheline to top */
 	/* These are my tuples; original and reply */
 	struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
+#ifdef CONFIG_VE_IPTABLES
+        struct ve_struct *ct_owner_env;
+#endif
 };
 
 struct ip_conntrack_expect
@@ -241,7 +248,15 @@ extern s16 (*ip_ct_nat_offset)(const str
 			       u32 seq);
 
 /* Call me when a conntrack is destroyed. */
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_conntrack_destroyed	\
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_destroyed)
+#else
 extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
+#define ve_ip_conntrack_destroyed	ip_conntrack_destroyed
+#endif
+
 
 /* Fake conntrack entry for untracked connections */
 extern struct ip_conntrack ip_conntrack_untracked;
@@ -270,7 +285,7 @@ extern void ip_conntrack_proto_put(struc
 extern void ip_ct_remove_expectations(struct ip_conntrack *ct);
 
 extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *,
-					       struct ip_conntrack_tuple *);
+		struct ip_conntrack_tuple *, struct user_beancounter *);
 
 extern void ip_conntrack_free(struct ip_conntrack *ct);
 
@@ -279,6 +294,8 @@ extern void ip_conntrack_hash_insert(str
 extern struct ip_conntrack_expect *
 __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
 
+extern void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp);
+
 extern struct ip_conntrack_expect *
 ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
 
@@ -301,6 +318,7 @@ static inline int is_dying(struct ip_con
 
 extern unsigned int ip_conntrack_htable_size;
 extern int ip_conntrack_checksum;
+extern int ip_conntrack_disable_ve0;
  
 #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
 
@@ -352,6 +370,9 @@ ip_conntrack_event_cache(enum ip_conntra
 	struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
 	struct ip_conntrack_ecache *ecache;
 	
+	if (!ve_is_super(get_exec_env()))
+		return;
+
 	local_bh_disable();
 	ecache = &__get_cpu_var(ip_conntrack_ecache);
 	if (ct != ecache->ct)
@@ -363,7 +384,7 @@ ip_conntrack_event_cache(enum ip_conntra
 static inline void ip_conntrack_event(enum ip_conntrack_events event,
 				      struct ip_conntrack *ct)
 {
-	if (is_confirmed(ct) && !is_dying(ct))
+	if (is_confirmed(ct) && !is_dying(ct) && ve_is_super(get_exec_env()))
 		atomic_notifier_call_chain(&ip_conntrack_chain, event, ct);
 }
 
@@ -371,7 +392,9 @@ static inline void 
 ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
 			  struct ip_conntrack_expect *exp)
 {
-	atomic_notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
+	if (ve_is_super(get_exec_env()))
+		atomic_notifier_call_chain(&ip_conntrack_expect_chain, event,
+									exp);
 }
 #else /* CONFIG_IP_NF_CONNTRACK_EVENTS */
 static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_conntrack_helper.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2017-01-13 08:40:23.000000000 -0500
@@ -31,6 +31,9 @@ struct ip_conntrack_helper
 extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
 extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
 
+extern int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *);
+extern void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
+
 /* Allocate space for an expectation: this is mandatory before calling 
    ip_conntrack_expect_related.  You will have to call put afterwards. */
 extern struct ip_conntrack_expect *
@@ -41,4 +44,7 @@ extern void ip_conntrack_expect_put(stru
 extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp);
 extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
 
+#ifdef CONFIG_VE_IPTABLES
+extern struct list_head helpers;
+#endif
 #endif /*_IP_CONNTRACK_HELPER_H*/
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_conntrack_irc.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_conntrack_irc.h	2017-01-13 08:40:23.000000000 -0500
@@ -14,16 +14,26 @@
 #ifndef _IP_CONNTRACK_IRC_H
 #define _IP_CONNTRACK_IRC_H
 
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+
 /* This structure exists only once per master */
 struct ip_ct_irc_master {
 };
 
 #ifdef __KERNEL__
-extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
-				       enum ip_conntrack_info ctinfo,
-				       unsigned int matchoff,
-				       unsigned int matchlen,
-				       struct ip_conntrack_expect *exp);
+typedef unsigned int (*ip_nat_helper_irc_hook)(struct sk_buff **,
+		enum ip_conntrack_info, unsigned int, unsigned int,
+		struct ip_conntrack_expect *);
+
+extern ip_nat_helper_irc_hook ip_nat_irc_hook;
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_nat_irc_hook \
+	((ip_nat_helper_irc_hook) \
+		(get_exec_env()->_ip_conntrack->_ip_nat_irc_hook))
+#else
+#define ve_ip_nat_irc_hook	ip_nat_irc_hook
+#endif
 
 #define IRC_PORT	6667
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2017-01-13 08:40:23.000000000 -0500
@@ -67,6 +67,7 @@ struct ip_conntrack_protocol
 /* Protocol registration. */
 extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
 extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
+
 /* Existing built-in protocols */
 extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
 extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
@@ -74,6 +75,41 @@ extern struct ip_conntrack_protocol ip_c
 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
 extern int ip_conntrack_protocol_tcp_init(void);
 
+#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_SYSCTL)
+#include <linux/sched.h>
+#define ve_ip_ct_tcp_timeouts \
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeouts)
+#define ve_ip_ct_udp_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout)
+#define ve_ip_ct_udp_timeout_stream \
+	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout_stream)
+#define ve_ip_ct_icmp_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_icmp_timeout)
+#define ve_ip_ct_generic_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_generic_timeout)
+#define ve_ip_ct_log_invalid	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_log_invalid)
+#define ve_ip_ct_tcp_timeout_max_retrans \
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeout_max_retrans)
+#define ve_ip_ct_tcp_loose	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_loose)
+#define ve_ip_ct_tcp_be_liberal	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_be_liberal)
+#define ve_ip_ct_tcp_max_retrans	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_max_retrans)
+#else
+#define ve_ip_ct_tcp_timeouts		*tcp_timeouts
+#define ve_ip_ct_udp_timeout		ip_ct_udp_timeout
+#define ve_ip_ct_udp_timeout_stream	ip_ct_udp_timeout_stream
+#define ve_ip_ct_icmp_timeout		ip_ct_icmp_timeout
+#define ve_ip_ct_generic_timeout	ip_ct_generic_timeout
+#define ve_ip_ct_log_invalid		ip_ct_log_invalid
+#define ve_ip_ct_tcp_timeout_max_retrans ip_ct_tcp_timeout_max_retrans
+#define ve_ip_ct_tcp_loose		ip_ct_tcp_loose
+#define ve_ip_ct_tcp_be_liberal		ip_ct_tcp_be_liberal
+#define ve_ip_ct_tcp_max_retrans	ip_ct_tcp_max_retrans
+#endif
+
 /* Log invalid packets */
 extern unsigned int ip_ct_log_invalid;
 
@@ -85,10 +121,10 @@ extern int ip_ct_port_nfattr_to_tuple(st
 #ifdef CONFIG_SYSCTL
 #ifdef DEBUG_INVALID_PACKETS
 #define LOG_INVALID(proto) \
-	(ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
+	(ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW)
 #else
 #define LOG_INVALID(proto) \
-	((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
+	((ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW) \
 	 && net_ratelimit())
 #endif
 #else
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_nat_core.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_nat_core.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_nat_core.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_nat_core.h	2017-01-13 08:40:40.000000000 -0500
@@ -11,8 +11,8 @@ extern unsigned int ip_nat_packet(struct
 			       unsigned int hooknum,
 			       struct sk_buff **pskb);
 
-extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
-					 struct ip_conntrack *ct,
-					 enum ip_nat_manip_type manip,
-					 enum ip_conntrack_dir dir);
+extern int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
+					 enum ip_conntrack_info ctinfo,
+					 unsigned int hooknum,
+					 struct sk_buff **pskb);
 #endif /* _IP_NAT_CORE_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_nat.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_nat.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_nat.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_nat.h	2017-01-13 08:40:40.000000000 -0500
@@ -73,10 +73,20 @@ extern unsigned int ip_nat_setup_info(st
 extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
 			     const struct ip_conntrack *ignored_conntrack);
 
-/* Calculate relative checksum. */
-extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv,
-				    u_int32_t newval,
-				    u_int16_t oldcheck);
+extern void ip_nat_hash_conntrack(struct ip_conntrack *conntrack);
+
+#ifdef CONFIG_VE_IPTABLES
+static inline int ve_ip_nat_initialized(void)
+{
+	struct ve_struct *ve = get_exec_env();
+	return ve->_ip_conntrack != NULL &&
+		ve->_ip_conntrack->_ip_nat_protos != NULL &&
+		ve->_ip_conntrack->_ip_nat_bysource != NULL;
+}
+#else
+#define ve_ip_nat_initialized()   1
+#endif
+
 #else  /* !__KERNEL__: iptables wants this to compile. */
 #define ip_nat_multi_range ip_nat_multi_range_compat
 #endif /*__KERNEL__*/
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_nat_rule.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_nat_rule.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_nat_rule.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_nat_rule.h	2017-01-13 08:40:23.000000000 -0500
@@ -6,7 +6,7 @@
 
 #ifdef __KERNEL__
 
-extern int ip_nat_rule_init(void) __init;
+extern int ip_nat_rule_init(void);
 extern void ip_nat_rule_cleanup(void);
 extern int ip_nat_rule_find(struct sk_buff **pskb,
 			    unsigned int hooknum,
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_tables.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_tables.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ip_tables.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ip_tables.h	2017-01-13 08:40:40.000000000 -0500
@@ -28,7 +28,7 @@
 #include <linux/netfilter/x_tables.h>
 
 #define IPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN
-#define IPT_TABLE_MAXNAMELEN XT_FUNCTION_MAXNAMELEN
+#define IPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN
 #define ipt_match xt_match
 #define ipt_target xt_target
 #define ipt_table xt_table
@@ -261,6 +261,26 @@ ipt_get_target(struct ipt_entry *e)
 	__ret;							\
 })
 
+/* fn returns 0 to continue iteration */
+#define IPT_ENTRY_ITERATE_CONTINUE(entries, size, n, fn, args...) \
+({								\
+	unsigned int __i, __n;					\
+	int __ret = 0;						\
+	struct ipt_entry *__entry;				\
+								\
+	for (__i = 0, __n = 0; __i < (size);			\
+	     __i += __entry->next_offset, __n++) {		\
+		__entry = (void *)(entries) + __i;		\
+		if (__n < n)					\
+			continue;				\
+								\
+		__ret = fn(__entry , ## args);			\
+		if (__ret != 0)					\
+			break;					\
+	}							\
+	__ret;							\
+})
+
 /*
  *	Main firewall chains definitions and global var's definitions.
  */
@@ -282,7 +302,7 @@ extern void ipt_init(void) __init;
 //#define ipt_register_table(tbl, repl) xt_register_table(AF_INET, tbl, repl)
 //#define ipt_unregister_table(tbl) xt_unregister_table(AF_INET, tbl)
 
-extern int ipt_register_table(struct ipt_table *table,
+extern struct ipt_table *ipt_register_table(struct ipt_table *table,
 			      const struct ipt_replace *repl);
 extern void ipt_unregister_table(struct ipt_table *table);
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ipt_hashlimit.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ipt_hashlimit.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ipt_hashlimit.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ipt_hashlimit.h	2017-01-13 08:40:23.000000000 -0500
@@ -1,6 +1,8 @@
 #ifndef _IPT_HASHLIMIT_H
 #define _IPT_HASHLIMIT_H
 
+#include <net/compat.h>
+
 /* timings are in milliseconds. */
 #define IPT_HASHLIMIT_SCALE 10000
 /* 1/10,000 sec period => max of 10,000/sec.  Min rate is then 429490
@@ -37,4 +39,23 @@ struct ipt_hashlimit_info {
 		struct ipt_hashlimit_info *master;
 	} u;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_ipt_hashlimit_info {
+	char name [IFNAMSIZ];		/* name */
+	struct hashlimit_cfg cfg;
+	compat_uptr_t hinfo;
+
+	/* Used internally by the kernel */
+	union {
+		compat_uptr_t ptr;
+		compat_uptr_t master;
+	} u;
+};
+#endif /*CONFIG_COMPAT*/
+
+struct ve_ipt_hashlimit {
+	struct hlist_head	hashlimit_htables;
+	struct proc_dir_entry	*hashlimit_procdir;
+};
 #endif /*_IPT_HASHLIMIT_H*/
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ipt_recent.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ipt_recent.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/ipt_recent.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/ipt_recent.h	2017-01-13 08:40:23.000000000 -0500
@@ -24,4 +24,10 @@ struct ipt_recent_info {
 	u_int8_t    side;
 };
 
+struct ve_ipt_recent {
+	struct list_head	tables;
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry	*proc_dir;
+#endif
+};
 #endif /*_IPT_RECENT_H*/
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/listhelp.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/listhelp.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv4/listhelp.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv4/listhelp.h	2017-01-13 08:40:23.000000000 -0500
@@ -105,6 +105,13 @@ static inline int __list_cmp_name(const 
 	return strcmp(name, i+sizeof(struct list_head)) == 0;
 }
 
+/* Like __list_cmp_name() but the field after the list_head
+   is a pointer to nul-terminated string. */
+static inline int __list_cmp_name2(const void *i, const char *name)
+{
+	return strcmp(name, *(char **)(i+sizeof(struct list_head))) == 0;
+}
+
 /* Returns false if same name already in list, otherwise does insert. */
 static inline int
 list_named_insert(struct list_head *head, void *new)
@@ -120,4 +127,7 @@ list_named_insert(struct list_head *head
 #define list_named_find(head, name)			\
 LIST_FIND(head, __list_cmp_name, void *, name)
 
+#define list_named_find2(head, name)			\
+LIST_FIND(head, __list_cmp_name2, void *, name)
+
 #endif /*_LISTHELP_H*/
diff -upr kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv6/ip6_tables.h kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv6/ip6_tables.h
--- kernel-2.6.18-417.el5.orig/include/linux/netfilter_ipv6/ip6_tables.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/netfilter_ipv6/ip6_tables.h	2017-01-13 08:40:40.000000000 -0500
@@ -226,6 +226,27 @@ struct ip6t_get_entries
 	struct ip6t_entry entrytable[0];
 };
 
+#ifdef CONFIG_COMPAT
+#include <net/compat.h>
+
+struct compat_ip6t_entry {
+	struct ip6t_ip6			ipv6;
+	compat_uint_t			nfcache;
+	u_int16_t			target_offset;
+	u_int16_t			next_offset;
+	compat_uint_t			comefrom;
+	struct compat_xt_counters	counters;
+	unsigned char			elems[0];
+};
+
+static __inline__ struct ip6t_entry_target *
+compat_ip6t_get_target(struct compat_ip6t_entry *e)
+{
+	return (void *)e + e->target_offset;
+}
+
+#endif /* CONFIG_COMPAT */
+
 /* Standard return verdict, or do jump. */
 #define IP6T_STANDARD_TARGET XT_STANDARD_TARGET
 /* Error verdict. */
@@ -233,7 +254,7 @@ struct ip6t_get_entries
 
 /* Helper functions */
 static __inline__ struct ip6t_entry_target *
-ip6t_get_target(struct ip6t_entry *e)
+ip6t_get_target(const struct ip6t_entry *e)
 {
 	return (void *)e + e->target_offset;
 }
@@ -257,6 +278,25 @@ ip6t_get_target(struct ip6t_entry *e)
 	__ret;					\
 })
 
+ /* fn returns 0 to continue iteration */
+#define COMPAT_IP6T_MATCH_ITERATE(e, fn, args...)	\
+({							\
+	unsigned int __i;				\
+	int __ret = 0;					\
+	struct ip6t_entry_match *__m;			\
+							\
+	for (__i = sizeof(struct compat_ip6t_entry);	\
+	     __i < (e)->target_offset;			\
+	     __i += __m->u.match_size) {		\
+		__m = (void *)(e) + __i;		\
+							\
+		__ret = fn(__m , ## args);		\
+		if (__ret != 0)				\
+			break;				\
+	}						\
+	__ret;						\
+})
+
 /* fn returns 0 to continue iteration */
 #define IP6T_ENTRY_ITERATE(entries, size, fn, args...)		\
 ({								\
@@ -274,6 +314,43 @@ ip6t_get_target(struct ip6t_entry *e)
 	__ret;							\
 })
 
+/* fn returns 0 to continue iteration */
+#define COMPAT_IP6T_ENTRY_ITERATE(entries, size, fn, args...)	\
+({								\
+	unsigned int __i;					\
+	int __ret = 0;						\
+	struct compat_ip6t_entry *__e;				\
+								\
+	for (__i = 0; __i < (size); __i += __e->next_offset) {	\
+		__e = (void *)(entries) + __i;			\
+								\
+		__ret = fn(__e , ## args);			\
+		if (__ret != 0)					\
+			break;					\
+	}							\
+	__ret;							\
+})
+
+/* fn returns 0 to continue iteration */
+#define IP6T_ENTRY_ITERATE_CONTINUE(entries, size, n, fn, args...) \
+({								\
+	unsigned int __i, __n;					\
+	int __ret = 0;						\
+	struct ip6t_entry *__entry;				\
+								\
+	for (__i = 0, __n = 0; __i < (size);			\
+	     __i += __entry->next_offset, __n++) {		\
+		__entry = (void *)(entries) + __i;		\
+		if (__n < n)					\
+			continue;				\
+								\
+		__ret = fn(__entry , ## args);			\
+		if (__ret != 0)					\
+			break;					\
+	}							\
+	__ret;							\
+})
+
 /*
  *	Main firewall chains definitions and global var's definitions.
  */
@@ -293,7 +370,7 @@ extern void ip6t_init(void) __init;
 	xt_register_match(match); })
 #define ip6t_unregister_match(match) xt_unregister_match(match)
 
-extern int ip6t_register_table(struct ip6t_table *table,
+extern struct ip6t_table *ip6t_register_table(struct ip6t_table *table,
 			       const struct ip6t_replace *repl);
 extern void ip6t_unregister_table(struct ip6t_table *table);
 extern unsigned int ip6t_do_table(struct sk_buff **pskb,
diff -upr kernel-2.6.18-417.el5.orig/include/linux/net.h kernel-2.6.18-417.el5-028stab121/include/linux/net.h
--- kernel-2.6.18-417.el5.orig/include/linux/net.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/net.h	2017-01-13 08:40:40.000000000 -0500
@@ -43,7 +43,7 @@ struct inode;
 #define SYS_GETSOCKOPT	15		/* sys_getsockopt(2)		*/
 #define SYS_SENDMSG	16		/* sys_sendmsg(2)		*/
 #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
-#define SYS_ACCEPT4	18		/* sys_accept4(2)   *unused*	*/
+#define SYS_ACCEPT4	18		/* sys_accept4(2)		*/
 #define SYS_RECVMMSG	19		/* sys_recvmmsg(2)		*/
 
 typedef enum {
@@ -92,6 +92,13 @@ enum sock_type {
 };
 
 #define SOCK_MAX (SOCK_PACKET + 1)
+/* Mask which covers at least up to SOCK_MASK-1.  The
+ * remaining bits are used as flags. */
+#define SOCK_TYPE_MASK 0xf
+
+/* Flags for socket, socketpair, accept4 */
+#define SOCK_CLOEXEC	O_CLOEXEC
+#define SOCK_NONBLOCK	O_NONBLOCK
 
 #endif /* ARCH_HAS_SOCKET_TYPES */
 
@@ -198,6 +205,7 @@ extern int   	     sock_sendmsg(struct s
 extern int	     sock_recvmsg(struct socket *sock, struct msghdr *msg,
 				  size_t size, int flags);
 extern int 	     sock_map_fd(struct socket *sock);
+extern int 	     sock_map_fd_flags(struct socket *sock, int flags);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
diff -upr kernel-2.6.18-417.el5.orig/include/linux/nfcalls.h kernel-2.6.18-417.el5-028stab121/include/linux/nfcalls.h
--- kernel-2.6.18-417.el5.orig/include/linux/nfcalls.h	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/nfcalls.h	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,181 @@
+/*
+ *  include/linux/nfcalls.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_NFCALLS_H
+#define _LINUX_NFCALLS_H
+
+#include <linux/rcupdate.h>
+
+#ifdef CONFIG_MODULES
+extern struct module no_module;
+
+#define DECL_KSYM_MODULE(name)				\
+	extern struct module *vz_mod_##name
+
+#define INIT_KSYM_MODULE(name)				\
+	struct module *vz_mod_##name = &no_module;	\
+	EXPORT_SYMBOL(vz_mod_##name)
+
+static inline void __vzksym_modresolve(struct module **modp, struct module *mod)
+{
+	/*
+	 * we want to be sure, that pointer updates are visible first:
+	 * 1. wmb() is here only for piece of sure
+	 *    (note, no rmb() in KSYMSAFECALL)
+	 * 2. synchronize_sched() guarantees that updates are visible
+	 *    on all cpus and allows us to remove rmb() in KSYMSAFECALL
+	 */
+	wmb(); synchronize_sched();
+	*modp = mod;
+	/* just to be sure, our changes are visible as soon as possible */
+	wmb(); synchronize_sched();
+}
+
+static inline void __vzksym_modunresolve(struct module **modp)
+{
+	/*
+	 * try_module_get() in KSYMSAFECALL should fail at this moment since
+	 * THIS_MODULE in in unloading state (we should be called from fini),
+	 * no need to syncronize pointers/ve_module updates.
+	 */
+	*modp = &no_module;
+	/*
+	 * synchronize_sched() guarantees here that we see
+	 * updated module pointer before the module really gets away
+	 */
+	synchronize_sched();
+}
+
+static inline int __vzksym_module_get(struct module *mod)
+{
+	/*
+	 * we want to avoid rmb(), so use synchronize_sched() in KSYMUNRESOLVE
+	 * and smp_read_barrier_depends() here...
+	 */
+	smp_read_barrier_depends(); /* for module loading */
+	if (!try_module_get(mod))
+		return -EBUSY;
+
+	return 0;
+}
+
+static inline void __vzksym_module_put(struct module *mod)
+{
+	module_put(mod);
+}
+#else
+#define DECL_KSYM_MODULE(name)
+#define INIT_KSYM_MODULE(name)
+#define __vzksym_modresolve(modp, mod)
+#define __vzksym_modunresolve(modp)
+#define __vzksym_module_get(mod)			(0)
+#define __vzksym_module_put(mod)
+#endif
+
+#define __KSYMERRCALL(err, type, mod, name, args)	\
+({							\
+	type ret = (type)err;				\
+	if (!__vzksym_module_get(vz_mod_##mod))	{	\
+		if (vz_##name)				\
+			ret = ((*vz_##name)args); 	\
+		__vzksym_module_put(vz_mod_##mod);	\
+	}						\
+	ret;						\
+})
+#define __KSYMSAFECALL_VOID(mod, name, args)		\
+do {							\
+	if (!__vzksym_module_get(vz_mod_##mod)) {	\
+		if (vz_##name)				\
+			((*vz_##name)args); 		\
+		__vzksym_module_put(vz_mod_##mod);	\
+	}						\
+} while (0)
+
+#define DECL_KSYM_CALL(type, name, args)		\
+	extern type (*vz_##name) args
+#define INIT_KSYM_CALL(type, name, args)		\
+	type (*vz_##name) args;				\
+	EXPORT_SYMBOL(vz_##name)
+
+#define KSYMERRCALL(err, mod, name, args)		\
+	__KSYMERRCALL(err, int, mod, name, args)
+#define KSYMSAFECALL(type, mod, name, args)		\
+	__KSYMERRCALL(0, type, mod, name, args)
+#define KSYMSAFECALL_VOID(mod, name, args)		\
+	__KSYMSAFECALL_VOID(mod, name, args)
+#define KSYMREF(name)					vz_##name
+
+/* should be called _after_ KSYMRESOLVE's */
+#define KSYMMODRESOLVE(name)				\
+	__vzksym_modresolve(&vz_mod_##name, THIS_MODULE)
+#define KSYMMODUNRESOLVE(name)				\
+	__vzksym_modunresolve(&vz_mod_##name)
+
+#define KSYMRESOLVE(name)				\
+	vz_##name = &name
+#define KSYMUNRESOLVE(name)				\
+	vz_##name = NULL
+
+#if defined(CONFIG_VE)
+DECL_KSYM_MODULE(ip_tables);
+DECL_KSYM_MODULE(ip6_tables);
+DECL_KSYM_MODULE(iptable_filter);
+DECL_KSYM_MODULE(ip6table_filter);
+DECL_KSYM_MODULE(iptable_mangle);
+DECL_KSYM_MODULE(ip6table_mangle);
+DECL_KSYM_MODULE(ip_conntrack);
+DECL_KSYM_MODULE(ip_conntrack_ftp);
+DECL_KSYM_MODULE(ip_conntrack_irc);
+DECL_KSYM_MODULE(xt_conntrack);
+DECL_KSYM_MODULE(ip_nat);
+DECL_KSYM_MODULE(iptable_nat);
+DECL_KSYM_MODULE(ip_nat_ftp);
+DECL_KSYM_MODULE(ip_nat_irc);
+
+struct sk_buff;
+
+DECL_KSYM_CALL(int, init_netfilter, (void));
+DECL_KSYM_CALL(int, init_iptables, (void));
+DECL_KSYM_CALL(int, init_ip6tables, (void));
+DECL_KSYM_CALL(int, init_iptable_filter, (void));
+DECL_KSYM_CALL(int, init_ip6table_filter, (void));
+DECL_KSYM_CALL(int, init_iptable_mangle, (void));
+DECL_KSYM_CALL(int, init_ip6table_mangle, (void));
+DECL_KSYM_CALL(int, init_iptable_conntrack, (void));
+DECL_KSYM_CALL(int, init_ip_ct_ftp, (void));
+DECL_KSYM_CALL(int, init_ip_ct_irc, (void));
+DECL_KSYM_CALL(int, ip_nat_init, (void));
+DECL_KSYM_CALL(int, init_iptable_nat, (void));
+DECL_KSYM_CALL(int, init_iptable_nat_ftp, (void));
+DECL_KSYM_CALL(int, init_iptable_nat_irc, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat_irc, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat, (void));
+DECL_KSYM_CALL(void, ip_nat_cleanup, (void));
+DECL_KSYM_CALL(void, fini_ip_ct_irc, (void));
+DECL_KSYM_CALL(void, fini_ip_ct_ftp, (void));
+DECL_KSYM_CALL(void, fini_iptable_conntrack, (void));
+DECL_KSYM_CALL(void, fini_ip6table_filter, (void));
+DECL_KSYM_CALL(void, fini_iptable_filter, (void));
+DECL_KSYM_CALL(void, fini_ip6table_mangle, (void));
+DECL_KSYM_CALL(void, fini_iptable_mangle, (void));
+DECL_KSYM_CALL(void, fini_ip6tables, (void));
+DECL_KSYM_CALL(void, fini_iptables, (void));
+DECL_KSYM_CALL(void, fini_netfilter, (void));
+
+#include <linux/netfilter/x_tables.h>
+#endif /* CONFIG_VE */
+
+#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
+DECL_KSYM_MODULE(vzmon);
+DECL_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
+#endif
+
+#endif /* _LINUX_NFCALLS_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/nfsd/export.h kernel-2.6.18-417.el5-028stab121/include/linux/nfsd/export.h
--- kernel-2.6.18-417.el5.orig/include/linux/nfsd/export.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/nfsd/export.h	2017-01-13 08:40:23.000000000 -0500
@@ -132,7 +132,7 @@ __be32 check_nfsd_access(struct svc_expo
 /*
  * Function declarations
  */
-void			nfsd_export_init(void);
+int			nfsd_export_init(void);
 void			nfsd_export_shutdown(void);
 void			nfsd_export_flush(void);
 void			exp_readlock(void);
@@ -156,12 +156,9 @@ int			exp_rootfh(struct auth_domain *, 
 int			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
 int			nfserrno(int errno);
 
-extern struct cache_detail svc_export_cache;
+dev_t exp_get_dev(struct svc_export *ex);
 
-static inline void exp_put(struct svc_export *exp)
-{
-	cache_put(&exp->h, &svc_export_cache);
-}
+extern void exp_put(struct svc_export *exp);
 
 static inline void exp_get(struct svc_export *exp)
 {
diff -upr kernel-2.6.18-417.el5.orig/include/linux/nfsd/nfsd.h kernel-2.6.18-417.el5-028stab121/include/linux/nfsd/nfsd.h
--- kernel-2.6.18-417.el5.orig/include/linux/nfsd/nfsd.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/nfsd/nfsd.h	2017-01-13 08:40:23.000000000 -0500
@@ -50,6 +50,9 @@
 #define MAY_CREATE		(MAY_EXEC|MAY_WRITE)
 #define MAY_REMOVE		(MAY_EXEC|MAY_WRITE|MAY_TRUNC)
 
+#define RAPARM_HASH_BITS	4
+#define RAPARM_HASH_SIZE	(1<<RAPARM_HASH_BITS)
+#define RAPARM_HASH_MASK	(RAPARM_HASH_SIZE-1)
 /*
  * Callback function for readdir
  */
@@ -63,7 +66,42 @@ typedef int (*nfsd_dirop_t)(struct inode
 extern struct svc_program	nfsd_program;
 extern struct svc_version	nfsd_version2, nfsd_version3,
 				nfsd_version4;
-extern struct svc_serv		*nfsd_serv;
+
+struct raparms {
+	struct raparms		*p_next;
+	unsigned int		p_count;
+	ino_t			p_ino;
+	dev_t			p_dev;
+	int			p_set;
+	struct file_ra_state	p_ra;
+	unsigned int		p_hindex;
+};
+
+struct raparm_hbucket {
+	struct raparms		*pb_head;
+	spinlock_t		pb_lock;
+} ____cacheline_aligned_in_smp;
+
+struct ve_nfsd_data {
+	struct file_system_type *nfsd_fs;
+	struct cache_detail *exp_cache;
+	struct cache_detail *key_cache;
+	struct list_head nfsd_list;
+	struct svc_serv *_nfsd_serv;
+	struct nfsd_stats stats;
+	struct svc_stat *svc_stat;
+	struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
+};
+
+#define nfsd_serv	(get_exec_env()->nfsd_data->_nfsd_serv)
+
+#ifndef CONFIG_VE
+extern struct raparm_hbucket	raparm_hash[RAPARM_HASH_SIZE];
+#else
+#define raparm_hash (get_exec_env()->nfsd_data->raparm_hash)
+#endif
+
+void nfsd_kill_all(struct list_head *);
 /*
  * Function prototypes.
  */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/nfsd/stats.h kernel-2.6.18-417.el5-028stab121/include/linux/nfsd/stats.h
--- kernel-2.6.18-417.el5.orig/include/linux/nfsd/stats.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/nfsd/stats.h	2017-01-13 08:40:23.000000000 -0500
@@ -40,10 +40,13 @@ struct nfsd_stats {
 
 #ifdef __KERNEL__
 
+#ifndef CONFIG_VE
 extern struct nfsd_stats	nfsdstats;
-extern struct svc_stat		nfsd_svcstats;
+#else
+#define nfsdstats		(get_exec_env()->nfsd_data->stats)
+#endif
 
-void	nfsd_stat_init(void);
+int	nfsd_stat_init(void);
 void	nfsd_stat_shutdown(void);
 
 #endif /* __KERNEL__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/nfs_fs.h kernel-2.6.18-417.el5-028stab121/include/linux/nfs_fs.h
--- kernel-2.6.18-417.el5.orig/include/linux/nfs_fs.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/nfs_fs.h	2017-01-13 08:40:41.000000000 -0500
@@ -201,6 +201,8 @@ struct nfs_inode {
 	int			 delegation_state;
 	struct rw_semaphore	rwsem;
 #endif /* CONFIG_NFS_V4*/
+	unsigned long		dflags;
+#define NFS_DFLAG_LOCAL		0
 	struct inode		vfs_inode;
 };
 
@@ -271,7 +273,11 @@ static inline int nfs_server_capable(str
 
 static inline int NFS_USE_READDIRPLUS(struct inode *inode)
 {
+#ifdef NFS_ALLOW_READDIRPLUS
 	return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
+#else
+	return 0;
+#endif
 }
 
 static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
@@ -310,8 +316,9 @@ extern int nfs_sync_mapping(struct addre
 extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping);
 extern void nfs_zap_caches(struct inode *);
 extern void nfs_invalidate_atime(struct inode *);
+struct nfs_dq_info;
 extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
-				struct nfs_fattr *);
+				struct nfs_fattr *, struct nfs_dq_info *qi);
 extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
@@ -390,10 +397,12 @@ extern int nfs3_removexattr (struct dent
  */
 extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
 			unsigned long);
-extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf,
-			size_t count, loff_t pos);
-extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf,
-			size_t count, loff_t pos);
+extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
+			const struct iovec *iov, unsigned long nr_segs,
+			loff_t pos);
+extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
+			const struct iovec *iov, unsigned long nr_segs,
+			loff_t pos);
 
 /*
  * linux/fs/nfs/dir.c
@@ -406,7 +415,7 @@ extern const struct file_operations nfs_
 extern struct dentry_operations nfs_dentry_operations;
 
 extern void nfs_force_lookup_revalidate(struct inode *dir);
-extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
+extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs_dq_info *qi);
 
 /*
  * linux/fs/nfs/symlink.c
@@ -441,6 +450,15 @@ extern void nfs_block_sillyrename(struct
 extern void nfs_unblock_sillyrename(struct dentry *dentry);
 extern int  nfs_sillyrename(struct inode *dir, struct dentry *dentry);
 
+struct nfs_unlinkdata {
+	struct hlist_node list;
+	struct nfs_removeargs args;
+	struct nfs_removeres res;
+	struct inode *dir;
+	struct rpc_cred	*cred;
+	struct nfs_fattr dir_attr;
+};
+
 /*
  * linux/fs/nfs/write.c
  */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/nfs_fs_sb.h kernel-2.6.18-417.el5-028stab121/include/linux/nfs_fs_sb.h
--- kernel-2.6.18-417.el5.orig/include/linux/nfs_fs_sb.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/nfs_fs_sb.h	2017-01-13 08:40:23.000000000 -0500
@@ -69,6 +69,7 @@ struct nfs_client {
 	char			cl_ipaddr[16];
 	unsigned char		cl_id_uniquifier;
 #endif
+	struct ve_struct	*owner_env;
 };
 
 /*
@@ -122,6 +123,7 @@ struct nfs_server {
 
 	atomic_t active; /* Keep trace of any activity to this server */
 	wait_queue_head_t active_wq;  /* Wait for any activity to stop  */
+	struct ve_struct	*owner_env;
 };
 
 /* Server capabilities */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/nmi.h kernel-2.6.18-417.el5-028stab121/include/linux/nmi.h
--- kernel-2.6.18-417.el5.orig/include/linux/nmi.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/nmi.h	2017-01-13 08:40:15.000000000 -0500
@@ -20,4 +20,6 @@ extern void touch_nmi_watchdog(void);
 # define touch_nmi_watchdog() touch_softlockup_watchdog()
 #endif
 
+extern void nmi_show_regs(struct pt_regs *regs, int in_nmi);
+extern int do_nmi_show_regs(struct pt_regs *regs, int cpu);
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/notifier.h kernel-2.6.18-417.el5-028stab121/include/linux/notifier.h
--- kernel-2.6.18-417.el5.orig/include/linux/notifier.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/notifier.h	2017-01-13 08:40:16.000000000 -0500
@@ -108,8 +108,9 @@ extern int raw_notifier_call_chain(struc
 
 #define NOTIFY_DONE		0x0000		/* Don't care */
 #define NOTIFY_OK		0x0001		/* Suits me */
+#define NOTIFY_FAIL		0x0002		/* Reject */
 #define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
-#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)
+#define NOTIFY_BAD		(NOTIFY_STOP_MASK|NOTIFY_FAIL)
 						/* Bad/Veto action */
 /*
  * Clean way to return from the notifier and stop further calls.
diff -upr kernel-2.6.18-417.el5.orig/include/linux/nsproxy.h kernel-2.6.18-417.el5-028stab121/include/linux/nsproxy.h
--- kernel-2.6.18-417.el5.orig/include/linux/nsproxy.h	2017-01-13 08:40:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/nsproxy.h	2017-01-13 08:40:15.000000000 -0500
@@ -0,0 +1,52 @@
+#ifndef _LINUX_NSPROXY_H
+#define _LINUX_NSPROXY_H
+
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+
+struct namespace;
+struct uts_namespace;
+struct ipc_namespace;
+
+/*
+ * A structure to contain pointers to all per-process
+ * namespaces - fs (mount), uts, network, sysvipc, etc.
+ *
+ * 'count' is the number of tasks holding a reference.
+ * The count for each namespace, then, will be the number
+ * of nsproxies pointing to it, not the number of tasks.
+ *
+ * The nsproxy is shared by tasks which share all namespaces.
+ * As soon as a single namespace is cloned or unshared, the
+ * nsproxy is copied.
+ */
+struct nsproxy {
+	atomic_t count;
+	spinlock_t nslock;
+	struct uts_namespace *uts_ns;
+	struct ipc_namespace *ipc_ns;
+	struct namespace *namespace;
+};
+extern struct nsproxy init_nsproxy;
+
+struct nsproxy *dup_namespaces(struct nsproxy *orig);
+int copy_namespaces(int flags, struct task_struct *tsk);
+void get_task_namespaces(struct task_struct *tsk);
+void free_nsproxy(struct nsproxy *ns);
+
+static inline struct nsproxy *get_nsproxy(struct nsproxy *n)
+{
+	atomic_inc(&n->count);
+	return n;
+}
+
+static inline void put_nsproxy(struct nsproxy *ns)
+{
+	if (atomic_dec_and_test(&ns->count)) {
+		free_nsproxy(ns);
+	}
+}
+
+extern void exit_task_namespaces(struct task_struct *);
+struct namespace * get_task_mnt_ns(struct task_struct *);
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/page-flags.h kernel-2.6.18-417.el5-028stab121/include/linux/page-flags.h
--- kernel-2.6.18-417.el5.orig/include/linux/page-flags.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/page-flags.h	2017-01-13 08:40:24.000000000 -0500
@@ -88,6 +88,8 @@
 #define PG_buddy		19	/* Page is free, on buddy lists */
 #define PG_xpmem		27	/* Testing for xpmem. */
 
+#define PG_checkpointed		21	/* Page transferred */
+
 /* PG_owner_priv_1 users should have descriptive aliases */
 #define PG_checked              PG_owner_priv_1 /* Used by some filesystems */
 
@@ -282,6 +284,8 @@
 #define PageXpmem(page)	0
 #endif
 
+#define ClearPageCheckpointed(page) clear_bit(PG_checkpointed, &(page)->flags)
+
 struct page;	/* forward declaration */
 
 int test_clear_page_dirty(struct page *page);
diff -upr kernel-2.6.18-417.el5.orig/include/linux/pagemap.h kernel-2.6.18-417.el5-028stab121/include/linux/pagemap.h
--- kernel-2.6.18-417.el5.orig/include/linux/pagemap.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/pagemap.h	2017-01-13 08:40:25.000000000 -0500
@@ -4,6 +4,7 @@
 /*
  * Copyright 1995 Linus Torvalds
  */
+
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/list.h>
@@ -12,13 +13,6 @@
 #include <asm/uaccess.h>
 #include <linux/gfp.h>
 
-/*
- * Bits in mapping->flags.  The lower __GFP_BITS_SHIFT bits are the page
- * allocation mode flags.
- */
-#define	AS_EIO		(__GFP_BITS_SHIFT + 0)	/* IO error on async write */
-#define AS_ENOSPC	(__GFP_BITS_SHIFT + 1)	/* ENOSPC on async write */
-#define AS_MM_ALL_LOCKS	(__GFP_BITS_SHIFT + 2)	/* under mm_take_all_locks() */
 
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
diff -upr kernel-2.6.18-417.el5.orig/include/linux/percpu.h kernel-2.6.18-417.el5-028stab121/include/linux/percpu.h
--- kernel-2.6.18-417.el5.orig/include/linux/percpu.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/percpu.h	2017-01-13 08:40:16.000000000 -0500
@@ -39,16 +39,25 @@ struct percpu_data {
         (__typeof__(ptr))__p->ptrs[(cpu)];	\
 })
 
-extern void *__alloc_percpu(size_t size);
+#define static_percpu_ptr(sptr, sptrs) ({		\
+		int i;					\
+		for (i = 0; i < NR_CPUS; i++)		\
+			(sptr)->ptrs[i] = &(sptrs)[i];	\
+		((void *)(~(unsigned long)(sptr)));	\
+	})
+
+extern void *__alloc_percpu_mask(size_t size, gfp_t gfp);
 extern void free_percpu(const void *);
 
 #else /* CONFIG_SMP */
 
 #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
 
-static inline void *__alloc_percpu(size_t size)
+#define static_percpu_ptr(sptr, sptrs)	(&sptrs[0])
+
+static inline void *__alloc_percpu_mask(size_t size, gfp_t gfp)
 {
-	void *ret = kmalloc(size, GFP_KERNEL);
+	void *ret = kmalloc(size, gfp);
 	if (ret)
 		memset(ret, 0, size);
 	return ret;
@@ -61,6 +70,13 @@ static inline void free_percpu(const voi
 #endif /* CONFIG_SMP */
 
 /* Simple wrapper for the common case: zeros memory. */
-#define alloc_percpu(type)	((type *)(__alloc_percpu(sizeof(type))))
+static inline void *__alloc_percpu(size_t size)
+{
+	return __alloc_percpu_mask(size, GFP_KERNEL);
+}
+#define alloc_percpu(type)		\
+	((type *)(__alloc_percpu_mask(sizeof(type), GFP_KERNEL)))
+#define alloc_percpu_atomic(type)	\
+	((type *)(__alloc_percpu_mask(sizeof(type), GFP_ATOMIC)))
 
 #endif /* __LINUX_PERCPU_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/pid.h kernel-2.6.18-417.el5-028stab121/include/linux/pid.h
--- kernel-2.6.18-417.el5.orig/include/linux/pid.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/pid.h	2017-01-13 08:40:31.000000000 -0500
@@ -3,6 +3,18 @@
 
 #include <linux/rcupdate.h>
 
+#define VPID_BIT	10
+#define VPID_DIV	(1<<VPID_BIT)
+
+#ifdef CONFIG_VE
+#define __is_virtual_pid(pid)	((pid) & VPID_DIV)
+#define is_virtual_pid(pid)	\
+   (__is_virtual_pid(pid) || ((pid)==1 && !ve_is_super(get_exec_env())))
+#else
+#define __is_virtual_pid(pid)	0
+#define is_virtual_pid(pid)	0
+#endif
+
 enum pid_type
 {
 	PIDTYPE_PID,
@@ -47,9 +59,38 @@ struct pid
 	struct hlist_node pid_chain;
 	/* lists of tasks that use this pid */
 	struct hlist_head tasks[PIDTYPE_MAX];
+#ifdef CONFIG_VE
+	int vnr;
+	int veid;
+	struct hlist_node vpid_chain;
+#endif
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *ub;
+#endif
 	struct rcu_head rcu;
 };
 
+/*
+ * PID-map pages start out as NULL, they get allocated upon
+ * first use and are never deallocated. This way a low pid_max
+ * value does not cause lots of bitmaps to be allocated, but
+ * the scheme scales to up to 4 million PIDs, runtime.
+ */
+typedef struct pidmap {
+	atomic_t nr_free;
+	void *page;
+} pidmap_t;
+
+#define PIDMAP_ENTRIES		((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
+
+#define BITS_PER_PAGE		(PAGE_SIZE*8)
+
+#ifdef CONFIG_VE
+#define PIDMAP_NRFREE (BITS_PER_PAGE/2)
+#else
+#define PIDMAP_NRFREE BITS_PER_PAGE
+#endif
+
 struct pid_link
 {
 	struct hlist_node node;
@@ -82,39 +123,119 @@ extern void FASTCALL(detach_pid(struct t
  * or rcu_read_lock() held.
  */
 extern struct pid *FASTCALL(find_pid(int nr));
+extern struct pid *FASTCALL(find_vpid(int nr));
 
+struct ve_struct;
 /*
  * Lookup a PID in the hash table, and return with it's count elevated.
  */
 extern struct pid *find_get_pid(int nr);
-extern struct pid *find_ge_pid(int nr);
+extern struct pid *find_ge_pid(int nr, struct ve_struct *ve);
 
 extern struct pid *alloc_pid(void);
 extern void FASTCALL(free_pid(struct pid *pid));
 
-#define pid_next(task, type)					\
-	((task)->pids[(type)].node.next)
+extern int alloc_pidmap(void);
+extern fastcall void free_pidmap(int pid);
+
+#ifndef CONFIG_VE
+
+#define vpid_to_pid(pid)	(pid)
+#define __vpid_to_pid(pid)	(pid)
+#define vpid_to_pid_ve(pid, ve)	(pid)
+#define pid_to_vpid(pid)	(pid)
+#define _pid_to_vpid(pid)	(pid)
+
+#define comb_vpid_to_pid(pid)	(pid)
+#define comb_pid_to_vpid(pid)	(pid)
+
+#else
+
+extern void free_vpid(struct pid *pid);
+extern pid_t alloc_vpid(struct pid *pid, pid_t vpid);
+extern pid_t vpid_to_pid(pid_t pid);
+extern pid_t __vpid_to_pid(pid_t pid);
+extern pid_t vpid_to_pid_ve(pid_t pid, struct ve_struct *env);
+extern pid_t pid_to_vpid(pid_t pid);
+extern pid_t _pid_to_vpid(pid_t pid);
+
+static inline int comb_vpid_to_pid(int vpid)
+{
+	int pid = vpid;
+
+	if (vpid > 0) {
+		pid = vpid_to_pid(vpid);
+		if (unlikely(pid < 0))
+			return 0;
+	} else if (vpid < 0) {
+		pid = vpid_to_pid(-vpid);
+		if (unlikely(pid < 0))
+			return 0;
+		pid = -pid;
+	}
+	return pid;
+}
 
-#define pid_next_task(task, type) 				\
-	hlist_entry(pid_next(task, type), struct task_struct,	\
-			pids[(type)].node)
+static inline int comb_pid_to_vpid(int pid)
+{
+	int vpid = pid;
 
+	if (pid > 0) {
+		vpid = pid_to_vpid(pid);
+		if (unlikely(vpid < 0))
+			return 0;
+	} else if (pid < 0) {
+		vpid = pid_to_vpid(-pid);
+		if (unlikely(vpid < 0))
+			return 0;
+		vpid = -vpid;
+	}
+	return vpid;
+}
+
+extern int glob_virt_pids;
+#endif
+
+#define pid_next_all(task, type)				\
+	((task)->pids[(type)].node.next)
+
+#define pid_next_task_all(task, type) 				\
+	hlist_entry(pid_next_all(task, type),			\
+			struct task_struct, pids[(type)].node)
 
 /* We could use hlist_for_each_entry_rcu here but it takes more arguments
  * than the do_each_task_pid/while_each_task_pid.  So we roll our own
  * to preserve the existing interface.
  */
-#define do_each_task_pid(who, type, task)				\
-	if ((task = find_task_by_pid_type(type, who))) {		\
-		prefetch(pid_next(task, type));				\
+#define do_each_task_pid_all(who, type, task)				\
+	if ((task = find_task_by_pid_type_all(type, who))) {		\
+		prefetch(pid_next_all(task, type));			\
 		do {
 
-#define while_each_task_pid(who, type, task)				\
-		} while (pid_next(task, type) &&  ({			\
-				task = pid_next_task(task, type);	\
+#define while_each_task_pid_all(who, type, task)			\
+		} while (pid_next_all(task, type) &&  ({		\
+				task = pid_next_task_all(task, type);	\
 				rcu_dereference(task);			\
-				prefetch(pid_next(task, type));		\
+				prefetch(pid_next_all(task, type));	\
 				1; }) );				\
 	}
 
+#ifndef CONFIG_VE
+#define __do_each_task_pid_ve(who, type, task, owner)			\
+		do_each_task_pid_all(who, type, task)
+#define __while_each_task_pid_ve(who, type, task, owner)		\
+		while_each_task_pid_all(who, type, task)
+#else /* CONFIG_VE */
+#define __do_each_task_pid_ve(who, type, task, owner)			\
+		do_each_task_pid_all(who, type, task)			\
+			if (ve_accessible(VE_TASK_INFO(task)->owner_env, owner))
+#define __while_each_task_pid_ve(who, type, task, owner)		\
+		while_each_task_pid_all(who, type, task)
+#endif /* CONFIG_VE */
+
+#define do_each_task_pid_ve(who, type, task)				\
+		__do_each_task_pid_ve(who, type, task, get_exec_env());
+#define while_each_task_pid_ve(who, type, task)				\
+		__while_each_task_pid_ve(who, type, task, get_exec_env());
+
 #endif /* _LINUX_PID_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/posix-timers.h kernel-2.6.18-417.el5-028stab121/include/linux/posix-timers.h
--- kernel-2.6.18-417.el5.orig/include/linux/posix-timers.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/posix-timers.h	2017-01-13 08:40:15.000000000 -0500
@@ -75,6 +75,7 @@ struct k_clock {
 	int (*timer_set) (struct k_itimer * timr, int flags,
 			  struct itimerspec * new_setting,
 			  struct itimerspec * old_setting);
+	void (*timer_cleanup) (struct k_itimer * timr);
 	int (*timer_del) (struct k_itimer * timr);
 #define TIMER_RETRY 1
 	void (*timer_get) (struct k_itimer * timr,
@@ -99,6 +100,7 @@ int posix_cpu_nsleep(const clockid_t whi
 		     struct timespec *rqtp, struct timespec __user *rmtp);
 int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 			struct itimerspec *new, struct itimerspec *old);
+void posix_cpu_timer_cleanup(struct k_itimer *timer);
 int posix_cpu_timer_del(struct k_itimer *timer);
 void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp);
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/ppp_channel.h kernel-2.6.18-417.el5-028stab121/include/linux/ppp_channel.h
--- kernel-2.6.18-417.el5.orig/include/linux/ppp_channel.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/ppp_channel.h	2017-01-13 08:40:23.000000000 -0500
@@ -23,6 +23,8 @@
 #include <linux/skbuff.h>
 #include <linux/poll.h>
 
+#include <linux/ve.h>
+
 struct ppp_channel;
 
 struct ppp_channel_ops {
@@ -56,6 +58,9 @@ extern void ppp_input(struct ppp_channel
    that we may have missed a packet. */
 extern void ppp_input_error(struct ppp_channel *, int code);
 
+/* Attach a channel to a given PPP unit in specified VE. */
+extern int ppp_register_ve_channel(struct ve_struct *, struct ppp_channel *);
+
 /* Attach a channel to a given PPP unit. */
 extern int ppp_register_channel(struct ppp_channel *);
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/proc_fs.h kernel-2.6.18-417.el5-028stab121/include/linux/proc_fs.h
--- kernel-2.6.18-417.el5.orig/include/linux/proc_fs.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/proc_fs.h	2017-01-13 08:40:26.000000000 -0500
@@ -4,6 +4,7 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/spinlock.h>
+#include <linux/smp_lock.h>
 #include <asm/atomic.h>
 
 /*
@@ -102,8 +103,16 @@ struct vmcore {
 
 extern struct proc_dir_entry proc_root;
 extern struct proc_dir_entry *proc_root_fs;
+extern struct file_system_type proc_fs_type;
+
+#ifdef CONFIG_VE
+#include <linux/sched.h>
+#define proc_net	(get_exec_env()->_proc_net)
+#define proc_net_stat	(get_exec_env()->_proc_net_stat)
+#else
 extern struct proc_dir_entry *proc_net;
 extern struct proc_dir_entry *proc_net_stat;
+#endif
 extern struct proc_dir_entry *proc_bus;
 extern struct proc_dir_entry *proc_root_driver;
 extern struct proc_dir_entry *proc_root_kcore;
@@ -122,12 +131,19 @@ unsigned long task_vsize(struct mm_struc
 int task_statm(struct mm_struct *, int *, int *, int *, int *);
 char *task_mem(struct mm_struct *, char *);
 
+extern int proc_dentry_of_dead_task(struct dentry *dentry);
+extern struct file_operations dummy_proc_pid_file_operations;
+
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
+extern struct proc_dir_entry *create_proc_glob_entry(const char *name,
+						mode_t mode,
+						struct proc_dir_entry *parent);
 struct proc_dir_entry *proc_create(const char *name, mode_t mode,
 				struct proc_dir_entry *parent,
 				const struct file_operations *proc_fops);
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
+extern void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent);
 
 extern struct vfsmount *proc_mnt;
 extern int proc_fill_super(struct super_block *,void *,int);
@@ -214,6 +230,15 @@ static inline struct proc_dir_entry *pro
 	return res;
 }
 
+static inline struct proc_dir_entry *proc_glob_fops_create(const char *name,
+	mode_t mode, struct file_operations *fops)
+{
+	struct proc_dir_entry *res = create_proc_glob_entry(name, mode, NULL);
+	if (res)
+		res->proc_fops = fops;
+	return res;
+}
+
 static inline void proc_net_remove(const char *name)
 {
 	remove_proc_entry(name,proc_net);
@@ -226,13 +251,18 @@ static inline void proc_net_remove(const
 #define proc_bus NULL
 
 #define proc_net_fops_create(name, mode, fops)  ({ (void)(mode), NULL; })
+#define proc_glob_fops_create(name, mode, fops)  ({ (void)(mode), NULL; })
 #define proc_net_create(name, mode, info)	({ (void)(mode), NULL; })
 static inline void proc_net_remove(const char *name) {}
 
+static inline int proc_dentry_of_dead_task(struct dentry *dentry) { return 0; }
+
 static inline void proc_flush_task(struct task_struct *task) { }
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+static inline struct proc_dir_entry *create_proc_glob_entry(const char *name,
+	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
 static inline struct proc_dir_entry *proc_create(const char *name,
 	mode_t mode, struct proc_dir_entry *parent,
 	const struct file_operations *proc_fops)
@@ -261,6 +291,48 @@ extern struct proc_dir_entry proc_root;
 
 #endif /* CONFIG_PROC_FS */
 
+static inline struct proc_dir_entry *create_proc_entry_mod(const char *name,
+					mode_t mode,
+					struct proc_dir_entry *parent,
+					struct module *owner)
+{
+	struct proc_dir_entry *ent;
+
+	/*
+	 * lock_kernel() here protects against proc_lookup()
+	 * which can find this freshly created entry w/o owner being set.
+	 * this can lead to module being put more times then getted.
+	 */
+	lock_kernel();
+	ent = create_proc_entry(name, mode, parent);
+	if (ent)
+		ent->owner = owner;
+	unlock_kernel();
+
+	return ent;
+}
+
+static inline struct proc_dir_entry *create_proc_glob_entry_mod(const char *name, 
+					mode_t mode,
+					struct proc_dir_entry *parent,
+					struct module *owner)
+{
+	struct proc_dir_entry *ent;
+
+	/*
+	 * lock_kernel() here protects against proc_lookup()
+	 * which can find this freshly created entry w/o owner being set.
+	 * this can lead to module being put more times then getted.
+	 */
+	lock_kernel();
+	ent = create_proc_glob_entry(name, mode, parent);
+	if (ent)
+		ent->owner = owner;
+	unlock_kernel();
+
+	return ent;
+}
+
 #if !defined(CONFIG_PROC_KCORE)
 static inline void kclist_add(struct kcore_list *new, void *addr, size_t size)
 {
@@ -290,10 +362,24 @@ static inline struct proc_dir_entry *PDE
 	return PROC_I(inode)->pde;
 }
 
+static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
+{
+	if (de)
+		atomic_inc(&de->count);
+	return de;
+}
+
+void de_put(struct proc_dir_entry *de);
+
 struct proc_maps_private {
 	struct pid *pid;
 	struct task_struct *task;
 	struct vm_area_struct *tail_vma;
 };
 
+#define LPDE(inode)	(PROC_I((inode))->pde)
+#ifdef CONFIG_VE
+#define GPDE(inode)	(*(struct proc_dir_entry **)(&(inode)->i_pipe))
+#endif
+
 #endif /* _LINUX_PROC_FS_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/ptrace.h kernel-2.6.18-417.el5-028stab121/include/linux/ptrace.h
--- kernel-2.6.18-417.el5.orig/include/linux/ptrace.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/ptrace.h	2017-01-13 08:40:24.000000000 -0500
@@ -66,6 +66,43 @@ extern int ptrace_may_attach(struct task
 #include <linux/tracehook.h>
 #endif
 
+#define PTRACE_DEBUG 1
+
+struct ptrace_state
+{
+	struct rcu_head rcu;
+	atomic_t refcnt;
+#ifdef PTRACE_DEBUG
+	atomic_t check_dead;
+#endif
+
+	/*
+	 * These elements are always available, even when the struct is
+	 * awaiting destruction at the next RCU callback point.
+	 */
+	struct utrace_attached_engine *engine;
+	struct task_struct *task; /* Target task.  */
+	struct task_struct *parent; /* Whom we report to.  */
+	struct list_head entry;	/* Entry on parent->ptracees list.  */
+
+	u8 options;		/* PTRACE_SETOPTIONS bits.  */
+	unsigned int syscall:1;	/* Reporting for syscall.  */
+#ifdef PTRACE_SYSEMU
+	unsigned int sysemu:1;	/* PTRACE_SYSEMU in progress. */
+#endif
+	unsigned int have_eventmsg:1; /* u.eventmsg valid. */
+	unsigned int cap_sys_ptrace:1; /* Tracer capable.  */
+
+	union
+	{
+		unsigned long eventmsg;
+		siginfo_t *siginfo;
+	} u;
+};
+
+extern struct utrace_engine_ops ptrace_utrace_ops;
+
+
 /*
  * These must be defined by arch code to handle machine-specific ptrace
  * requests such as PTRACE_PEEKUSR and PTRACE_GETREGS.  Returns -ENOSYS for
diff -upr kernel-2.6.18-417.el5.orig/include/linux/quota-compat.h kernel-2.6.18-417.el5-028stab121/include/linux/quota-compat.h
--- kernel-2.6.18-417.el5.orig/include/linux/quota-compat.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/quota-compat.h	2017-01-13 08:40:16.000000000 -0500
@@ -0,0 +1,60 @@
+/*
+ *  include/linux/quota-compat.h
+ *
+ *  Copyright (C) 2008  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_QUOTA_COMPAT_
+#define _LINUX_QUOTA_COMPAT_
+
+#include <linux/compat.h>
+
+#define QC_QUOTAON  0x0100	/* enable quotas */
+#define QC_QUOTAOFF 0x0200	/* disable quotas */
+
+/* GETQUOTA, SETQUOTA and SETUSE which were at 0x0300-0x0500 has now
+ * other parameteres
+ */
+#define QC_SYNC     0x0600	/* sync disk copy of a filesystems quotas */
+#define QC_SETQLIM  0x0700	/* set limits */
+/* GETSTATS at 0x0800 is now longer... */
+#define QC_GETINFO  0x0900	/* get info about quotas - graces, flags... */
+#define QC_SETINFO  0x0A00	/* set info about quotas */
+#define QC_SETGRACE 0x0B00	/* set inode and block grace */
+#define QC_SETFLAGS 0x0C00	/* set flags for quota */
+#define QC_GETQUOTA 0x0D00	/* get limits and usage */
+#define QC_SETQUOTA 0x0E00	/* set limits and usage */
+#define QC_SETUSE   0x0F00	/* set usage */
+/* 0x1000 used by old RSQUASH */
+#define QC_GETSTATS 0x1100	/* get collected stats */
+
+struct compat_v2_dqblk {
+	unsigned int dqb_ihardlimit;
+	unsigned int dqb_isoftlimit;
+	unsigned int dqb_curinodes;
+	unsigned int dqb_bhardlimit;
+	unsigned int dqb_bsoftlimit;
+	qsize_t dqb_curspace;
+	__kernel_time_t dqb_btime;
+	__kernel_time_t dqb_itime;
+};
+
+#ifdef CONFIG_COMPAT
+struct compat_v2_dqblk_32 {
+	unsigned int dqb_ihardlimit;
+	unsigned int dqb_isoftlimit;
+	unsigned int dqb_curinodes;
+	unsigned int dqb_bhardlimit;
+	unsigned int dqb_bsoftlimit;
+	qsize_t dqb_curspace;
+	compat_time_t dqb_btime;
+	compat_time_t dqb_itime;
+} __attribute__ ((packed));
+#endif
+
+#endif /* _LINUX_QUOTA_COMPAT_ */
+
diff -upr kernel-2.6.18-417.el5.orig/include/linux/quota.h kernel-2.6.18-417.el5-028stab121/include/linux/quota.h
--- kernel-2.6.18-417.el5.orig/include/linux/quota.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/quota.h	2017-01-13 08:40:24.000000000 -0500
@@ -44,8 +44,6 @@
 typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
 typedef __u64 qsize_t;          /* Type in which we store sizes */
 
-extern spinlock_t dq_data_lock;
-
 /* Size of blocks in which are counted size limits */
 #define QUOTABLOCK_BITS 10
 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -134,6 +132,10 @@ struct if_dqinfo {
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 
+#include <linux/spinlock.h>
+
+extern spinlock_t dq_data_lock;
+
 #include <linux/dqblk_xfs.h>
 #include <linux/dqblk_v1.h>
 #include <linux/dqblk_v2.h>
@@ -249,6 +251,8 @@ struct quota_format_ops {
 	int (*release_dqblk)(struct dquot *dquot);	/* Called when last reference to dquot is being dropped */
 };
 
+struct inode;
+struct iattr;
 /* Operations working with dquots */
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
@@ -263,6 +267,10 @@ struct dquot_operations {
 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
 	int (*write_info) (struct super_block *, int);	/* Write of quota "superblock" */
+	int (*rename) (struct inode *, struct inode *, struct inode *);
+
+	void (*swap_inode) (struct inode *, struct inode *);
+	void (*shutdown) (struct super_block *);
 #ifndef __GENKSYMS__
 	/* reserve quota for delayed block allocation */
 	int (*reserve_space) (struct inode *, qsize_t, int);
@@ -277,6 +285,7 @@ struct dquot_operations {
 };
 
 /* Operations handling requests from userspace */
+struct v2_disk_dqblk;
 struct quotactl_ops {
 	int (*quota_on)(struct super_block *, int, int, char *);
 	int (*quota_off)(struct super_block *, int);
@@ -289,6 +298,10 @@ struct quotactl_ops {
 	int (*set_xstate)(struct super_block *, unsigned int, int);
 	int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 	int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
+#ifdef CONFIG_QUOTA_COMPAT
+	int (*get_quoti)(struct super_block *, int, unsigned int,
+			struct v2_disk_dqblk __user *);
+#endif
 };
 
 struct quota_format_type {
@@ -309,6 +322,10 @@ struct quota_info {
 	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */
 	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
 	struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+	struct vz_quota_master *vzdq_master;
+	int vzdq_count;
+#endif
 };
 
 /* Inline would be better but we need to dereference super_block which is not defined yet */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/quotaops.h kernel-2.6.18-417.el5-028stab121/include/linux/quotaops.h
--- kernel-2.6.18-417.el5.orig/include/linux/quotaops.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/quotaops.h	2017-01-13 08:40:24.000000000 -0500
@@ -231,6 +231,19 @@ static __inline__ int DQUOT_TRANSFER(str
 	return 0;
 }
 
+static __inline__ int DQUOT_RENAME(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	struct dquot_operations *q_op;
+
+	q_op = inode->i_sb->dq_op;
+	if (q_op && q_op->rename) {
+		if (q_op->rename(inode, old_dir, new_dir) == NO_QUOTA)
+			return 1;
+	}
+	return 0;
+}
+
 /* The following two functions cannot be called inside a transaction */
 #define DQUOT_SYNC(sb)	sync_dquots(sb, -1)
 
@@ -243,6 +256,35 @@ static __inline__ int DQUOT_OFF(struct s
 	return ret;
 }
 
+static __inline__ void DQUOT_SWAP(struct inode *inode, struct inode *tmpl)
+{
+	if (sb_any_quota_enabled(tmpl->i_sb) &&
+	    tmpl->i_sb->dq_op->swap_inode)
+		tmpl->i_sb->dq_op->swap_inode(inode, tmpl);
+}
+
+static __inline__ int DQUOT_CHECK_SPACE(struct inode *inode)
+{
+	if (DQUOT_ALLOC_SPACE_NODIRTY(inode, 512))
+		return -EDQUOT;
+	DQUOT_FREE_SPACE_NODIRTY(inode, 512);
+	return 0;
+}
+
+static __inline__ void DQUOT_SYNC_BLOCKS(struct inode *inode, blkcnt_t blocks)
+{
+	if (sb_any_quota_enabled(inode->i_sb)) {
+		if (blocks > inode->i_blocks)
+			inode->i_sb->dq_op->alloc_space(inode,
+							(qsize_t)(blocks-inode->i_blocks)*512,
+							13 /*DQUOT_CMD_FORCE*/);
+		else if (blocks < inode->i_blocks)
+			inode->i_sb->dq_op->free_space(inode, (qsize_t)(inode->i_blocks-blocks)*512);
+	} else
+		inode->i_blocks = blocks;
+}
+
+
 #else
 
 /*
@@ -260,6 +302,7 @@ static __inline__ int DQUOT_OFF(struct s
 #define DQUOT_SYNC(sb)				do { } while(0)
 #define DQUOT_OFF(sb)				do { } while(0)
 #define DQUOT_TRANSFER(inode, iattr)		(0)
+#define DQUOT_RENAME(inode, old_dir, new_dir)	(0)
 static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 {
 	inode_add_bytes(inode, nr);
@@ -303,6 +346,15 @@ static inline void DQUOT_FREE_SPACE(stru
 	mark_inode_dirty(inode);
 }	
 
+static inline void DQUOT_SWAP(struct inode *inode, struct inode *tmpl)
+{
+}
+
+static inline void DQUOT_SYNC_BLOCKS(struct inode *inode, blkcnt_t blocks)
+{
+	inode->i_blocks = blocks;
+}
+
 #endif /* CONFIG_QUOTA */
 
 #define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr)	DQUOT_PREALLOC_SPACE_NODIRTY(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits)
diff -upr kernel-2.6.18-417.el5.orig/include/linux/radix-tree.h kernel-2.6.18-417.el5-028stab121/include/linux/radix-tree.h
--- kernel-2.6.18-417.el5.orig/include/linux/radix-tree.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/radix-tree.h	2017-01-13 08:40:18.000000000 -0500
@@ -63,6 +63,7 @@ void *radix_tree_tag_clear(struct radix_
 			unsigned long index, unsigned int tag);
 int radix_tree_tag_get(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag);
+int radix_tree_prev_tag_get(struct radix_tree_root *root, unsigned int tag);
 unsigned int
 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 		unsigned long first_index, unsigned int max_items,
diff -upr kernel-2.6.18-417.el5.orig/include/linux/rmap.h kernel-2.6.18-417.el5-028stab121/include/linux/rmap.h
--- kernel-2.6.18-417.el5.orig/include/linux/rmap.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/rmap.h	2017-01-13 08:40:24.000000000 -0500
@@ -34,21 +34,13 @@ struct anon_vma {
 	 * mm_take_all_locks() (mm_all_locks_mutex).
 	 */
 	struct list_head head;	/* List of private "related" vmas */
+
+	struct user_beancounter *anon_vma_ub;
 };
 
 #ifdef CONFIG_MMU
 
-extern kmem_cache_t *anon_vma_cachep;
-
-static inline struct anon_vma *anon_vma_alloc(void)
-{
-	return kmem_cache_alloc(anon_vma_cachep, SLAB_KERNEL);
-}
 
-static inline void anon_vma_free(struct anon_vma *anon_vma)
-{
-	kmem_cache_free(anon_vma_cachep, anon_vma);
-}
 
 static inline void anon_vma_lock(struct vm_area_struct *vma)
 {
@@ -81,6 +73,7 @@ void page_add_anon_rmap(struct page *, s
 void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
 void page_remove_rmap(struct page *);
+struct anon_vma *page_lock_anon_vma(struct page *page);
 
 /**
  * page_dup_rmap - duplicate pte mapping to a page
diff -upr kernel-2.6.18-417.el5.orig/include/linux/sched.h kernel-2.6.18-417.el5-028stab121/include/linux/sched.h
--- kernel-2.6.18-417.el5.orig/include/linux/sched.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/sched.h	2017-01-13 08:40:40.000000000 -0500
@@ -24,6 +24,11 @@
 #define CLONE_UNTRACED		0x00800000	/* set if the tracing process can't force CLONE_PTRACE on this clone */
 #define CLONE_CHILD_SETTID	0x01000000	/* set the TID in the child */
 #define CLONE_STOPPED		0x02000000	/* Start in stopped state */
+#define CLONE_NEWUTS		0x04000000	/* New utsname group? */
+#define CLONE_NEWIPC		0x08000000	/* New ipcs */
+
+/* mask of clones which are disabled in OpenVZ VEs */
+#define CLONE_NAMESPACES_MASK	(CLONE_NEWUTS | CLONE_NEWIPC)
 
 /*
  * Scheduling policies
@@ -85,8 +90,11 @@ struct sched_param {
 
 #include <asm/processor.h>
 
+#include <ub/ub_task.h>
+
 struct exec_domain;
 struct futex_pi_state;
+struct ve_struct;
 extern int exec_shield;
 extern int print_fatal_signals;
 
@@ -120,6 +128,9 @@ extern unsigned long avenrun[];		/* Load
 	load += n*(FIXED_1-exp); \
 	load >>= FSHIFT;
 
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
 extern int sched_interactive;
 extern int sched_interactive_min;
 extern int sched_interactive_max;
@@ -128,12 +139,27 @@ extern int nr_threads;
 extern int last_pid;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
+
+extern unsigned long nr_sleeping(void);
+extern unsigned long nr_stopped(void);
+extern unsigned long nr_zombie;
+extern atomic_t nr_dead;
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
-extern unsigned long weighted_cpuload(const int cpu);
 
+#ifdef CONFIG_VE
+struct ve_struct;
+struct vcpu_scheduler;
+extern unsigned long nr_running_vsched(struct vcpu_scheduler *);
+extern unsigned long nr_iowait_ve(void);
+extern unsigned int nr_unint_vsched(struct vcpu_scheduler *);
+#else
+#define nr_running_vsched(vsched)	0
+#define nr_iowait_ve()			0
+#define nr_unint_vsched(vsched)		0
+#endif
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -191,6 +217,9 @@ extern unsigned long weighted_cpuload(co
 extern rwlock_t tasklist_lock;
 extern spinlock_t mmlist_lock;
 
+extern struct list_head vsched_list;
+extern spinlock_t vsched_list_lock;
+
 struct task_struct;
 
 extern void sched_init(void);
@@ -201,6 +230,7 @@ extern cpumask_t nohz_cpu_mask;
 
 extern void show_state(void);
 extern void show_regs(struct pt_regs *);
+extern void show_vsched(void);
 
 /*
  * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
@@ -266,7 +296,7 @@ extern signed long schedule_timeout_inte
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
 
-struct namespace;
+struct nsproxy;
 
 /* Maximum number of active map areas.. This is a random (large) number */
 #define DEFAULT_MAX_MAP_COUNT	65536
@@ -395,6 +425,8 @@ struct mm_struct {
 	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
 	unsigned dumpable:2;
+	unsigned vps_dumpable:2;
+	unsigned oom_killed:1;
 #ifndef __GENKSYMS__
 	unsigned ia32_compat:1;
 #endif
@@ -420,6 +452,12 @@ struct mm_struct {
 #ifndef __GENKSYMS__
 	unsigned long shlib_base;	/* base of lib map area (ASCII armour)*/
 #endif
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *mm_ub;
+#endif
+	/* protected by mmap_sem write held or read with page_table_lock */
+	long page_table_precharge;
+	long page_table_charged;
 };
 
 static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
@@ -440,6 +478,7 @@ struct sighand_struct {
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
 	spinlock_t		siglock;
+	wait_queue_head_t	signalfd_wqh;
 };
 
 struct pacct_struct {
@@ -450,6 +489,8 @@ struct pacct_struct {
 	unsigned long		ac_minflt, ac_majflt;
 };
 
+#include <linux/ve_task.h>
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -501,6 +542,10 @@ struct signal_struct {
 	pid_t session;
 	/* boolean value for session group leader */
 	int leader;
+#ifdef CONFIG_VE
+	pid_t vpgrp;
+	pid_t vsession;
+#endif
 
 	struct tty_struct *tty; /* NULL if no tty */
 
@@ -877,6 +922,9 @@ static inline void prefetch_stack(struct
 struct audit_context;		/* See audit.c */
 struct mempolicy;
 struct pipe_inode_info;
+struct uts_namespace;
+struct vcpu_scheduler;
+struct vcpu_struct;
 
 enum sleep_type {
 	SLEEP_NORMAL,
@@ -912,6 +960,14 @@ struct task_struct {
 	int oncpu;
 #endif
 #endif
+#ifdef CONFIG_SCHED_VCPU
+	struct vcpu_scheduler *vsched;
+	struct vcpu_struct *vcpu;
+
+	/* id's are saved to avoid locking (e.g. on vsched->id access) */
+	int vsched_id;
+	int vcpu_id;
+#endif
 	int load_weight;	/* for niceness load balancing purposes */
 	int prio, static_prio, normal_prio;
 	struct list_head run_list;
@@ -947,6 +1003,10 @@ struct task_struct {
 	unsigned did_exec:1;
 	pid_t pid;
 	pid_t tgid;
+#ifdef CONFIG_VE
+	pid_t vpid;
+	pid_t vtgid;
+#endif
 	/* 
 	 * pointers to parent process, youngest child, younger sibling,
 	 * older sibling, respectively.  (p->father can be replaced with 
@@ -1009,8 +1069,8 @@ struct task_struct {
 	struct fs_struct *fs;
 /* open file information */
 	struct files_struct *files;
-/* namespace */
-	struct namespace *namespace;
+/* namespaces */
+	struct nsproxy *nsproxy;
 /* signal handlers */
 	struct signal_struct *signal;
 	struct sighand_struct *sighand;
@@ -1131,13 +1191,30 @@ struct task_struct {
 #endif
 
 	/*
+	 * state tracking for suspend
+	 * FIXME - ptrace is completely rewritten in this kernel
+	 * so set_pn_state() is not set in many places correctyl
+	 */
+	__u8	 pn_state;
+	__u8	 stopped_state:1;
+
+	/*
 	 * cache last used pipe for splice
 	 */
 	struct pipe_inode_info *splice_pipe;
 #ifdef	CONFIG_TASK_DELAY_ACCT
 	struct task_delay_info *delays;
 #endif
-
+#ifdef CONFIG_USER_RESOURCE
+	struct task_beancounter task_bc;
+#endif
+#ifdef CONFIG_VE
+	struct ve_task_info ve_task_info;
+#endif
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+	unsigned long	magic;
+	struct inode	*ino;
+#endif
 #ifdef CONFIG_X86
 	/*
 	 * This will break KABI on ia64 as smp_processor_id() is a
@@ -1186,6 +1263,43 @@ static inline void put_task_struct(struc
 		__put_task_struct(t);
 }
 
+#ifndef CONFIG_VE
+#define set_pn_state(tsk, state)	do { } while(0)
+#define clear_pn_state(tsk)		do { } while(0)
+#define set_stop_state(tsk)		do { } while(0)
+#define clear_stop_state(tsk)		do { } while(0)
+#else
+#define PN_STOP_TF	1	/* was not in 2.6.8 */
+#define PN_STOP_TF_RT	2	/* was not in 2.6.8 */ 
+#define PN_STOP_ENTRY	3
+#define PN_STOP_FORK	4
+#define PN_STOP_VFORK	5
+#define PN_STOP_SIGNAL	6
+#define PN_STOP_EXIT	7
+#define PN_STOP_EXEC	8
+#define PN_STOP_LEAVE	9
+
+static inline void set_pn_state(struct task_struct *tsk, int state)
+{
+	tsk->pn_state = state;
+}
+
+static inline void clear_pn_state(struct task_struct *tsk)
+{
+	tsk->pn_state = 0;
+}
+
+static inline void set_stop_state(struct task_struct *tsk)
+{
+	tsk->stopped_state = 1;
+}
+
+static inline void clear_stop_state(struct task_struct *tsk)
+{
+	tsk->stopped_state = 0;
+}
+#endif
+
 /*
  * Per process flags
  */
@@ -1202,7 +1316,7 @@ static inline void put_task_struct(struc
 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
 #define PF_FLUSHER	0x00001000	/* responsible for disk writeback */
 #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
-#define PF_FREEZE	0x00004000	/* this task is being frozen for suspend now */
+#define PF_EXIT_RESTART	0x00004000	/* do_exit() restarted, see do_exit() */
 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
@@ -1214,6 +1328,7 @@ static inline void put_task_struct(struc
 #define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
 #define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
 #define PF_SPREAD_SLAB	0x02000000	/* Spread some slab caches over cpuset */
+#define PF_THREAD_BOUND	0x04000000	/* Thread bound to specific cpu */
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_PREEMPT_NOTIFIER 0x40000000  /* preempt notifier attached to the task */
@@ -1273,6 +1388,21 @@ extern unsigned long long sched_clock(vo
 extern unsigned long long
 current_sched_time(const struct task_struct *current_task);
 
+static inline unsigned long cycles_to_clocks(cycles_t cycles)
+{
+	extern unsigned long cycles_per_clock;
+	do_div(cycles, cycles_per_clock);
+	return cycles;
+}
+
+static inline u64 cycles_to_jiffies(cycles_t cycles)
+{
+	extern unsigned long cycles_per_jiffy;
+	do_div(cycles, cycles_per_jiffy);
+	return cycles;
+}
+
+
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
 extern void sched_exec(void);
@@ -1340,10 +1470,226 @@ extern struct   mm_struct init_mm;
 
 #define find_task_by_pid(nr)	find_task_by_pid_type(PIDTYPE_PID, nr)
 extern struct task_struct *find_task_by_pid_type(int type, int pid);
+
+#define find_task_by_pid_all(nr)	\
+		find_task_by_pid_type_all(PIDTYPE_PID, nr)
+extern struct task_struct *find_task_by_pid_type_all(int type, int pid);
 extern void set_special_pids(pid_t session, pid_t pgrp);
 extern void __set_special_pids(pid_t session, pid_t pgrp);
 
+#ifndef CONFIG_VE
+#define find_task_by_pid_ve find_task_by_pid_all
+
+#define ve_is_super(env)			1
+#define ve_accessible(target, owner)		1
+#define ve_accessible_strict(target, owner)	1
+#define ve_accessible_veid(target, owner)		1
+#define ve_accessible_strict_veid(target, owner)	1
+
+#define VEID(envid)				0
+
+static inline pid_t virt_pid(struct task_struct *tsk)
+{
+	return tsk->pid;
+}
+
+static inline pid_t virt_tgid(struct task_struct *tsk)
+{
+	return tsk->tgid;
+}
+
+static inline pid_t virt_pgid(struct task_struct *tsk)
+{
+	return tsk->signal->pgrp;
+}
+
+static inline pid_t virt_sid(struct task_struct *tsk)
+{
+	return tsk->signal->session;
+}
+
+#define get_task_pid_ve(tsk, ve)	get_task_pid(tsk)
+
+static inline pid_t get_task_pid(struct task_struct *tsk)
+{
+	return tsk->pid;
+}
+
+static inline pid_t get_task_tgid(struct task_struct *tsk)
+{
+	return tsk->tgid;
+}
+
+static inline pid_t get_task_pgid(struct task_struct *tsk)
+{
+	return tsk->signal->pgrp;
+}
+
+static inline pid_t get_task_sid(struct task_struct *tsk)
+{
+	return tsk->signal->session;
+}
+
+static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline pid_t get_task_ppid(struct task_struct *p)
+{
+	return pid_alive(p) ? p->group_leader->parent->tgid : 0;
+}
+
+static inline struct task_struct *find_task_by_pid_type_ve(int type, int pid) {
+	return find_task_by_pid_type_all(type, pid);
+}
+
+#else	/* CONFIG_VE */
+
+#include <asm/current.h>
+#include <linux/ve.h>
+
+#define find_task_by_pid_ve(nr)	\
+		find_task_by_pid_type_ve(PIDTYPE_PID, nr)
+
+extern struct task_struct *find_task_by_pid_type_ve(int type, int pid);
+
+#define VEID(envid)	((envid)->veid)
+
+#define ve_is_super(env) ((env) == get_ve0())
+#define ve_accessible_strict(target, owner)	((target) == (owner))
+static inline int ve_accessible(struct ve_struct *target,
+				struct ve_struct *owner) {
+	return ve_is_super(owner) || ve_accessible_strict(target, owner);
+}
+
+#define ve_accessible_strict_veid(target, owner) ((target) == (owner))
+static inline int ve_accessible_veid(envid_t target, envid_t owner)
+{
+	return get_ve0()->veid == owner ||
+	       ve_accessible_strict_veid(target, owner);
+}
+
+static inline pid_t virt_pid(struct task_struct *tsk)
+{
+	return tsk->vpid;
+}
+
+static inline pid_t virt_tgid(struct task_struct *tsk)
+{
+	return tsk->vtgid;
+}
+
+static inline pid_t virt_pgid(struct task_struct *tsk)
+{
+	return tsk->signal->vpgrp;
+}
+
+static inline pid_t virt_sid(struct task_struct *tsk)
+{
+	return tsk->signal->vsession;
+}
+
+static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *env)
+{
+	return ve_is_super(env) ? tsk->pid : virt_pid(tsk);
+}
+
+static inline pid_t get_task_pid(struct task_struct *tsk)
+{
+	return get_task_pid_ve(tsk, get_exec_env());
+}
+
+static inline pid_t get_task_tgid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->tgid : virt_tgid(tsk);
+}
+
+static inline pid_t get_task_pgid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->signal->pgrp : virt_pgid(tsk);
+}
+
+static inline pid_t get_task_sid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->signal->session : virt_sid(tsk);
+}
+
+static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->vpid = pid;
+}
+
+static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->vtgid = pid;
+}
+
+static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->signal->vpgrp = pid;
+}
+
+static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->signal->vsession = pid;
+}
+
+static inline pid_t get_task_ppid(struct task_struct *p)
+{
+	struct task_struct *parent;
+	struct ve_struct *env;
+
+	if (!pid_alive(p))
+		return 0;
+	env = get_exec_env();
+	if (get_task_pid_ve(p, env) == 1)
+		return 0;
+	parent = p->group_leader->parent;
+	return ve_accessible(VE_TASK_INFO(parent)->owner_env, env) ?
+		get_task_tgid(parent) : 1;
+}
+
+void ve_sched_get_cpu_stat(struct ve_struct *envid, cycles_t *idle,
+				cycles_t *strv, unsigned int cpu);
+void ve_sched_attach(struct ve_struct *envid);
+
+#endif	/* CONFIG_VE */
+
+
+#ifdef CONFIG_VE
+extern cycles_t ve_sched_get_idle_time(int cpu);
+extern cycles_t ve_sched_get_idle_time_total(struct ve_struct *ve);
+extern cycles_t ve_sched_get_iowait_time(int cpu);
+#else
+#define ve_sched_get_idle_time(cpu)		0
+#define ve_sched_get_idle_time_total(ve)	0
+#define ve_sched_get_iowait_time(cpu)		0
+#endif
+
+#ifdef CONFIG_SCHED_VCPU
+struct vcpu_scheduler;
+extern void fastcall vsched_cpu_online_map(struct vcpu_scheduler *sched,
+		cpumask_t *mask);
+#else
+#define vsched_cpu_online_map(vsched, mask)     do {    \
+			*mask = cpu_online_map;         \
+	} while (0)
+#endif
+
 /* per-UID process charging. */
+extern int set_user(uid_t new_ruid, int dumpclear);
 extern struct user_struct * alloc_uid(uid_t);
 static inline struct user_struct *get_uid(struct user_struct *u)
 {
@@ -1361,7 +1707,7 @@ extern int FASTCALL(wake_up_state(struct
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
 extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
 						unsigned long clone_flags));
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined (CONFIG_SCHED_VCPU)
  extern void kick_process(struct task_struct *tsk);
 #else
  static inline void kick_process(struct task_struct *tsk) { }
@@ -1480,12 +1826,19 @@ extern struct task_struct *child_reaper;
 
 extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+extern long do_fork_pid(unsigned long clone_flags,
+			unsigned long stack_start,
+			struct pt_regs *regs,
+			unsigned long stack_size,
+			int __user *parent_tidptr,
+			int __user *child_tidptr,
+			long pid0);
 struct task_struct *fork_idle(int);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
 extern void get_task_comm(char *to, struct task_struct *tsk);
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined (CONFIG_SCHED_VCPU)
 extern void wait_task_inactive(struct task_struct * p);
 #else
 #define wait_task_inactive(p)	do { } while (0)
@@ -1494,21 +1847,85 @@ extern void wait_task_inactive(struct ta
 #define remove_parent(p)	list_del_init(&(p)->sibling)
 #define add_parent(p)		list_add_tail(&(p)->sibling,&(p)->parent->children)
 
-#define next_task(p)	list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)
+#define next_task_all(p)	list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)
 
-#define for_each_process(p) \
-	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+#define for_each_process_all(p) \
+	for (p = &init_task ; (p = next_task_all(p)) != &init_task ; )
 
 /*
  * Careful: do_each_thread/while_each_thread is a double loop so
  *          'break' will not work as expected - use goto instead.
  */
-#define do_each_thread(g, t) \
-	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
+#define do_each_thread_all(g, t) \
+	for (g = t = &init_task ; (g = t = next_task_all(g)) != &init_task ; ) do
+
+#define while_each_thread_all(g, t) \
+	while ((t = next_thread(t)) != g)
+
+#ifndef CONFIG_VE
+
+#define for_each_process_ve(p)		for_each_process_all(p)
+#define do_each_thread_ve(g, t)		do_each_thread_all(g, t)
+#define while_each_thread_ve(g, t)	while_each_thread_all(g, t)
+#define first_task_ve()			next_task_ve(&init_task)
+#define __first_task_ve(owner)		next_task_ve(&init_task)
+#define __next_task_ve(owner, p)	next_task_ve(p)
+#define next_task_ve(p)			\
+		(next_task_all(p) != &init_task ? next_task_all(p) : NULL)
+
+#else	/* CONFIG_VE */
+
+static inline struct task_struct *ve_lh2task(struct ve_struct *ve,
+		struct list_head *lh)
+{
+	return lh == &ve->vetask_lh ? NULL :
+		list_entry(lh, struct task_struct, ve_task_info.vetask_list);
+}
+
+static inline struct task_struct *__first_task_ve(struct ve_struct *ve)
+{
+	struct task_struct *tsk;
+
+	if (unlikely(ve_is_super(ve))) {
+		tsk = next_task_all(&init_task);
+		if (tsk == &init_task)
+			tsk = NULL;
+	} else {
+		tsk = ve_lh2task(ve, rcu_dereference(ve->vetask_lh.next));
+	}
+	return tsk;
+}
+
+static inline struct task_struct *__next_task_ve(struct ve_struct *ve,
+		struct task_struct *tsk)
+{
+	if (unlikely(ve_is_super(ve))) {
+		tsk = next_task_all(tsk);
+		if (tsk == &init_task)
+			tsk = NULL;
+	} else {
+		BUG_ON(tsk->ve_task_info.owner_env != ve);
+		tsk = ve_lh2task(ve, rcu_dereference(tsk->
+					ve_task_info.vetask_list.next));
+	}
+	return tsk;
+}
+
+#define first_task_ve()	__first_task_ve(get_exec_env())
+#define next_task_ve(p)	__next_task_ve(get_exec_env(), p)
+/* no one uses prev_task_ve(), copy next_task_ve() if needed */
+
+#define for_each_process_ve(p) \
+	for (p = first_task_ve(); p != NULL ; p = next_task_ve(p))
 
-#define while_each_thread(g, t) \
+#define do_each_thread_ve(g, t) \
+	for (g = t = first_task_ve() ; g != NULL; g = t = next_task_ve(g)) do
+
+#define while_each_thread_ve(g, t) \
 	while ((t = next_thread(t)) != g)
 
+#endif	/* CONFIG_VE */
+
 /* de_thread depends on thread_group_leader not being a pid based check */
 #define thread_group_leader(p)	(p == p->group_leader)
 
@@ -1530,8 +1947,15 @@ static inline struct pid *task_pid(struc
 
 static inline struct task_struct *next_thread(const struct task_struct *p)
 {
-	return list_entry(rcu_dereference(p->thread_group.next),
+	struct task_struct *tsk;
+
+	tsk = list_entry(rcu_dereference(p->thread_group.next),
 			  struct task_struct, thread_group);
+#ifdef CONFIG_VE
+	/* all threads should belong to ONE ve! */
+	BUG_ON(VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(p)->owner_env);
+#endif
+	return tsk;
 }
 
 static inline int thread_group_empty(struct task_struct *p)
@@ -1689,7 +2113,8 @@ static inline int lock_need_resched(spin
  */
 extern void recalc_sigpending_and_wake(struct task_struct *t);
 extern void recalc_sigpending(void);
-extern int  fork_recalc_sigpending(void);
+extern int  recalc_sigpending_tsk(struct task_struct *t);
+extern int  fork_recalc_sigpending(int pid0);
 
 extern void signal_wake_up(struct task_struct *t, int resume_stopped);
 
@@ -1698,28 +2123,63 @@ extern void signal_wake_up(struct task_s
  */
 #ifdef CONFIG_SMP
 
-static inline unsigned int task_cpu(const struct task_struct *p)
+static inline unsigned int task_pcpu(const struct task_struct *p)
 {
 	return task_thread_info(p)->cpu;
 }
 
-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
 {
 	task_thread_info(p)->cpu = cpu;
 }
 
 #else
 
+static inline unsigned int task_pcpu(const struct task_struct *p)
+{
+	return 0;
+}
+
+static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
+{
+}
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_SCHED_VCPU
+
+static inline unsigned int task_vsched_id(const struct task_struct *p)
+{
+	return p->vsched_id;
+}
+
 static inline unsigned int task_cpu(const struct task_struct *p)
 {
+	return p->vcpu_id;
+}
+
+extern void set_task_cpu(struct task_struct *p, unsigned int vcpu);
+extern int vcpu_online(int cpu);
+
+#else
+
+static inline unsigned int task_vsched_id(const struct task_struct *p)
+{
 	return 0;
 }
 
+static inline unsigned int task_cpu(const struct task_struct *p)
+{
+	return task_pcpu(p);
+}
+
 static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 {
+	set_task_pcpu(p, cpu);
 }
 
-#endif /* CONFIG_SMP */
+#define vcpu_online(cpu)	cpu_online(cpu)
+#endif /* CONFIG_SCHED_VCPU */
 
 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 extern void arch_pick_mmap_layout(struct mm_struct *mm);
@@ -1732,6 +2192,8 @@ static inline void arch_pick_mmap_layout
 }
 #endif
 
+int get_user_cpu_mask(unsigned long __user *user_mask_ptr,
+		unsigned len, cpumask_t *new_mask);
 extern long sched_setaffinity(pid_t pid, cpumask_t new_mask);
 extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
 
@@ -1752,20 +2214,12 @@ static inline int frozen(struct task_str
 }
 
 /*
- * Check if there is a request to freeze a process
- */
-static inline int freezing(struct task_struct *p)
-{
-	return p->flags & PF_FREEZE;
-}
-
-/*
  * Request that a process be frozen
  * FIXME: SMP problem. We may not modify other process' flags!
  */
 static inline void freeze(struct task_struct *p)
 {
-	p->flags |= PF_FREEZE;
+	set_tsk_thread_flag(p, TIF_FREEZE);
 }
 
 /*
@@ -1773,7 +2227,7 @@ static inline void freeze(struct task_st
  */
 static inline void do_not_freeze(struct task_struct *p)
 {
-	p->flags &= ~PF_FREEZE;
+	clear_tsk_thread_flag(p, TIF_FREEZE);
 }
 
 /*
@@ -1794,35 +2248,44 @@ static inline int thaw_process(struct ta
  */
 static inline void frozen_process(struct task_struct *p)
 {
-	p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
+	clear_tsk_thread_flag(p, TIF_FREEZE);
+	p->flags |= PF_FROZEN;
 }
 
-extern void refrigerator(void);
 extern int freeze_processes(void);
 extern void thaw_processes(void);
 
-static inline int try_to_freeze(void)
-{
-	if (freezing(current)) {
-		refrigerator();
-		return 1;
-	} else
-		return 0;
-}
 #else
 static inline int frozen(struct task_struct *p) { return 0; }
-static inline int freezing(struct task_struct *p) { return 0; }
 static inline void freeze(struct task_struct *p) { BUG(); }
 static inline int thaw_process(struct task_struct *p) { return 1; }
 static inline void frozen_process(struct task_struct *p) { BUG(); }
 
-static inline void refrigerator(void) {}
 static inline int freeze_processes(void) { BUG(); return 0; }
 static inline void thaw_processes(void) {}
 
-static inline int try_to_freeze(void) { return 0; }
-
 #endif /* CONFIG_PM */
+
+extern void refrigerator(void);
+
+/*
+ * Check if there is a request to freeze a process
+ */
+static inline int freezing(struct task_struct *p)
+{
+	return test_tsk_thread_flag(p, TIF_FREEZE);
+}
+
+static inline int try_to_freeze(void)
+{
+	if (freezing(current)) {
+		refrigerator();
+		return 1;
+	} else
+		return 0;
+}
+
+
 #endif /* __KERNEL__ */
 
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/selinux.h kernel-2.6.18-417.el5-028stab121/include/linux/selinux.h
--- kernel-2.6.18-417.el5.orig/include/linux/selinux.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/selinux.h	2017-01-13 08:40:40.000000000 -0500
@@ -90,7 +90,6 @@ void selinux_task_ctxid(struct task_stru
  *     kfree() on it after use.
  */
 int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen);
-#define selinux_sid_to_string(sid,ctx,ctxlen) selinux_ctxid_to_string((sid),(ctx),(ctxlen))
 
 /**
  *     selinux_get_inode_sid - get the inode's security context ID
@@ -207,5 +206,6 @@ static inline int selinux_relabel_packet
 }
 
 #endif	/* CONFIG_SECURITY_SELINUX */
+#define selinux_sid_to_string(sid,ctx,ctxlen) selinux_ctxid_to_string((sid),(ctx),(ctxlen))
 
 #endif /* _LINUX_SELINUX_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/sem.h kernel-2.6.18-417.el5-028stab121/include/linux/sem.h
--- kernel-2.6.18-417.el5.orig/include/linux/sem.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/sem.h	2017-01-13 08:40:24.000000000 -0500
@@ -155,6 +155,9 @@ static inline void exit_sem(struct task_
 }
 #endif
 
+int sysvipc_walk_sem(int (*func)(int, struct sem_array*, void *), void *arg);
+int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg);
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SEM_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/shmem_fs.h kernel-2.6.18-417.el5-028stab121/include/linux/shmem_fs.h
--- kernel-2.6.18-417.el5.orig/include/linux/shmem_fs.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/shmem_fs.h	2017-01-13 08:40:24.000000000 -0500
@@ -19,6 +19,9 @@ struct shmem_inode_info {
 	swp_entry_t		i_direct[SHMEM_NR_DIRECT]; /* first blocks */
 	struct list_head	swaplist;	/* chain of maybes on swap */
 	struct inode		vfs_inode;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter	*shmi_ub;
+#endif
 };
 
 struct shmem_sb_info {
@@ -36,4 +39,9 @@ static inline struct shmem_inode_info *S
 	return container_of(inode, struct shmem_inode_info, vfs_inode);
 }
 
+extern struct file_system_type tmpfs_fs_type;
+
+int shmem_insertpage(struct inode * inode, unsigned long index,
+		     swp_entry_t swap);
+
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/shm.h kernel-2.6.18-417.el5-028stab121/include/linux/shm.h
--- kernel-2.6.18-417.el5.orig/include/linux/shm.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/shm.h	2017-01-13 08:40:24.000000000 -0500
@@ -104,6 +104,11 @@ static inline long do_shmat(int shmid, c
 }
 #endif
 
+void shm_clean_ns(struct ipc_namespace *ns);
+
+int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg);
+struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg);
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SHM_H_ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/signalfd.h kernel-2.6.18-417.el5-028stab121/include/linux/signalfd.h
--- kernel-2.6.18-417.el5.orig/include/linux/signalfd.h	2017-01-13 08:40:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/signalfd.h	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,78 @@
+/*
+ *  include/linux/signalfd.h
+ *
+ *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#ifndef _LINUX_SIGNALFD_H
+#define _LINUX_SIGNALFD_H
+
+#include <linux/types.h>
+/* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/fcntl.h>
+
+/* Flags for signalfd4.  */
+#define SFD_CLOEXEC O_CLOEXEC
+#define SFD_NONBLOCK O_NONBLOCK
+
+struct signalfd_siginfo {
+	__u32 ssi_signo;
+	__s32 ssi_errno;
+	__s32 ssi_code;
+	__u32 ssi_pid;
+	__u32 ssi_uid;
+	__s32 ssi_fd;
+	__u32 ssi_tid;
+	__u32 ssi_band;
+	__u32 ssi_overrun;
+	__u32 ssi_trapno;
+	__s32 ssi_status;
+	__s32 ssi_int;
+	__u64 ssi_ptr;
+	__u64 ssi_utime;
+	__u64 ssi_stime;
+	__u64 ssi_addr;
+
+	/*
+	 * Pad strcture to 128 bytes. Remember to update the
+	 * pad size when you add new members. We use a fixed
+	 * size structure to avoid compatibility problems with
+	 * future versions, and we leave extra space for additional
+	 * members. We use fixed size members because this strcture
+	 * comes out of a read(2) and we really don't want to have
+	 * a compat on read(2).
+	 */
+	__u8 __pad[48];
+};
+
+
+#ifdef __KERNEL__
+
+#ifdef CONFIG_SIGNALFD
+
+/*
+ * Deliver the signal to listening signalfd.
+ */
+static inline void signalfd_notify(struct task_struct *tsk, int sig)
+{
+	if (unlikely(waitqueue_active(&tsk->sighand->signalfd_wqh)))
+		wake_up(&tsk->sighand->signalfd_wqh);
+}
+
+struct signalfd_ctx {
+	sigset_t sigmask;
+};
+
+extern long do_signalfd(int ufd, sigset_t *sigmask, int flags);
+
+#else /* CONFIG_SIGNALFD */
+
+static inline void signalfd_notify(struct task_struct *tsk, int sig) { }
+
+#endif /* CONFIG_SIGNALFD */
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_SIGNALFD_H */
+
diff -upr kernel-2.6.18-417.el5.orig/include/linux/signal.h kernel-2.6.18-417.el5-028stab121/include/linux/signal.h
--- kernel-2.6.18-417.el5.orig/include/linux/signal.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/signal.h	2017-01-13 08:40:40.000000000 -0500
@@ -7,6 +7,7 @@
 #ifdef __KERNEL__
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 
 /*
  * Real Time signals may be queued.
@@ -17,6 +18,9 @@ struct sigqueue {
 	int flags;
 	siginfo_t info;
 	struct user_struct *user;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *sig_ub;
+#endif
 };
 
 /* flags values. */
@@ -233,6 +237,7 @@ static inline int valid_signal(unsigned 
 	return sig <= _NSIG ? 1 : 0;
 }
 
+extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern long do_sigpending(void __user *, unsigned long);
@@ -241,6 +246,8 @@ extern int sigprocmask(int, sigset_t *, 
 struct pt_regs;
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 
+extern kmem_cache_t *sigqueue_cachep;
+
 /*
  * In POSIX a signal is sent either to a specific thread (Linux task)
  * or to the process as a whole (Linux thread group).  How the signal
diff -upr kernel-2.6.18-417.el5.orig/include/linux/skbuff.h kernel-2.6.18-417.el5-028stab121/include/linux/skbuff.h
--- kernel-2.6.18-417.el5.orig/include/linux/skbuff.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/skbuff.h	2017-01-13 08:40:41.000000000 -0500
@@ -256,8 +256,11 @@ enum {
  *	@dma_cookie: a cookie to one of several possible DMA operations
  *		done by skb DMA functions
  *	@secmark: security marking
+ *	@vlan_tci: vlan tag control information
  */
 
+#include <ub/ub_sk.h>
+
 struct sk_buff {
 	/* These two members must be first. */
 	struct sk_buff		*next;
@@ -312,12 +315,12 @@ struct sk_buff {
 				nfctinfo:3;
 	__u8			pkt_type:3,
 				fclone:2,
-#ifndef CONFIG_XEN
-				ipvs_property:1;
-#else
 				ipvs_property:1,
 				proto_data_valid:1,
 				proto_csum_blank:1;
+	__u8			redirected:1;
+#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
+	__u8			brmark;
 #endif
 	__be16			protocol;
 
@@ -345,6 +348,7 @@ struct sk_buff {
 	__u32			secmark;
 #endif
 
+	__u16			vlan_tci;
 
 	/* These elements must be at the end, see alloc_skb() for details.  */
 	unsigned int		truesize;
@@ -353,6 +357,8 @@ struct sk_buff {
 				*data,
 				*tail,
 				*end;
+	struct skb_beancounter	skb_bc;
+	struct ve_struct	*owner_env;
 	/* Extra stuff at the end to avoid breaking abi */
 #ifndef __GENKSYMS__
 	int			 peeked;
@@ -364,6 +370,7 @@ struct sk_buff {
  *	Handling routines are only of interest to the kernel
  */
 #include <linux/slab.h>
+#include <ub/ub_net.h>
 
 #include <asm/system.h>
 
@@ -676,6 +683,13 @@ static inline void skb_queue_head_init(s
 	__skb_queue_head_init(list);
 }
 
+static inline void skb_queue_head_init_class(struct sk_buff_head *list,
+		struct lock_class_key *class)
+{
+	skb_queue_head_init(list);
+	lockdep_set_class(&list->lock, class);
+}
+
 /*
  *	Insert an sk_buff at the start of a list.
  *
@@ -1294,6 +1308,8 @@ static inline void pskb_trim_unique(stru
  */
 static inline void skb_orphan(struct sk_buff *skb)
 {
+	ub_skb_uncharge(skb);
+
 	if (skb->destructor)
 		skb->destructor(skb);
 	skb->destructor = NULL;
@@ -1727,6 +1743,14 @@ static inline unsigned int skb_checksum_
 		__skb_checksum_complete(skb);
 }
 
+#if defined(CONFIG_VE) || defined(CONFIG_XEN)
+#define skb_partial_checksummed(x) ((x)->proto_csum_blank)
+#define skb_reset_proto_csum(x) ((x)->proto_csum_blank = 0)
+#else
+#define skb_partial_checksummed(x) (0)
+#define skb_reset_proto_csum(x)
+#endif
+
 struct tux_req_struct;
 
 #ifdef CONFIG_NETFILTER
@@ -1819,6 +1843,24 @@ static inline void skb_init_secmark(stru
 { }
 #endif
 
+#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
+static inline void skb_copy_brmark(struct sk_buff *to, const struct sk_buff *from)
+{
+	to->brmark = from->brmark;
+}
+
+static inline void skb_init_brmark(struct sk_buff *skb)
+{
+	skb->brmark = 0;
+}
+#else
+static inline void skb_copy_brmark(struct sk_buff *to, const struct sk_buff *from)
+{ }
+
+static inline void skb_init_brmark(struct sk_buff *skb)
+{ }
+#endif
+
 static inline int skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
diff -upr kernel-2.6.18-417.el5.orig/include/linux/slab.h kernel-2.6.18-417.el5-028stab121/include/linux/slab.h
--- kernel-2.6.18-417.el5.orig/include/linux/slab.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/slab.h	2017-01-13 08:40:19.000000000 -0500
@@ -47,6 +47,26 @@ typedef struct kmem_cache kmem_cache_t;
 #define SLAB_DESTROY_BY_RCU	0x00080000UL	/* defer freeing pages to RCU */
 #define SLAB_MEM_SPREAD		0x00100000UL	/* Spread some memory over cpuset */
 
+/*
+ * allocation rules:                            __GFP_UBC       0
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *  cache (SLAB_UBC)				charge		charge
+ *				      (usual caches: mm, vma, task_struct, ...)
+ *
+ *  cache (SLAB_UBC | SLAB_NO_CHARGE)		charge		---
+ *					     (ub_kmalloc)    (kmalloc)
+ *
+ *  cache (no UB flags)				BUG()		---
+ *							(nonub caches, mempools)
+ *
+ *  pages					charge		---
+ *					   (ub_vmalloc,	      (vmalloc,
+ *				        poll, fdsets, ...)  non-ub allocs)
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#define SLAB_UBC		0x20000000UL	/* alloc space for ubs ... */
+#define SLAB_NO_CHARGE		0x40000000UL	/* ... but don't charge */
+
 /* flags passed to a constructor func */
 #define	SLAB_CTOR_CONSTRUCTOR	0x001UL		/* if not set, then deconstructor */
 #define SLAB_CTOR_ATOMIC	0x002UL		/* tell constructor it can't sleep */
@@ -68,6 +88,11 @@ extern void kmem_cache_free(kmem_cache_t
 extern unsigned int kmem_cache_size(kmem_cache_t *);
 extern const char *kmem_cache_name(kmem_cache_t *);
 extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags);
+extern void show_slab_info(void);
+
+struct user_beancounter;
+extern void slab_walk_ub(struct user_beancounter *ub,
+		void (*show)(const char *name, int count, void *v), void *v);
 
 /* Size description struct for general caches. */
 struct cache_sizes {
@@ -76,6 +101,7 @@ struct cache_sizes {
 	kmem_cache_t	*cs_dmacachep;
 };
 extern struct cache_sizes malloc_sizes[];
+extern int malloc_cache_num;
 
 extern void *__kmalloc(size_t, gfp_t);
 #ifndef CONFIG_DEBUG_SLAB
@@ -133,7 +159,7 @@ extern void *__kmalloc_track_caller(size
  */
 static inline void *kmalloc(size_t size, gfp_t flags)
 {
-	if (__builtin_constant_p(size)) {
+	if (__builtin_constant_p(size) && __builtin_constant_p(flags)) {
 		int i = 0;
 #define CACHE(x) \
 		if (size <= x) \
@@ -147,6 +173,8 @@ static inline void *kmalloc(size_t size,
 			__you_cannot_kmalloc_that_much();
 		}
 found:
+		if (flags & __GFP_UBC)
+			i += malloc_cache_num;
 		return kmem_cache_alloc((flags & GFP_DMA) ?
 			malloc_sizes[i].cs_dmacachep :
 			malloc_sizes[i].cs_cachep, flags);
@@ -154,6 +182,7 @@ found:
 	return __kmalloc(size, flags);
 }
 
+#define ub_kmalloc(size, flags) kmalloc(size, ((flags) | __GFP_UBC))
 extern void *__kzalloc(size_t, gfp_t);
 
 /**
@@ -177,12 +206,15 @@ static inline void *kzalloc(size_t size,
 			__you_cannot_kzalloc_that_much();
 		}
 found:
+		if (flags & __GFP_UBC)
+			i += malloc_cache_num;
 		return kmem_cache_zalloc((flags & GFP_DMA) ?
 			malloc_sizes[i].cs_dmacachep :
 			malloc_sizes[i].cs_cachep, flags);
 	}
 	return __kzalloc(size, flags);
 }
+#define ub_kzalloc(size, flags) kzalloc(size, (flags) | __GFP_UBC)
 
 /**
  * kcalloc - allocate memory for an array. The memory is set to zero.
@@ -273,8 +305,11 @@ static inline void *kcalloc(size_t n, si
 
 #endif /* CONFIG_SLOB */
 
+extern struct vm_area_struct *allocate_vma(struct mm_struct *mm, gfp_t gfp_flags);
+extern void free_vma(struct mm_struct *mm, struct vm_area_struct *vma);
+
 /* System wide caches */
-extern kmem_cache_t	*vm_area_cachep;
+extern kmem_cache_t	*__vm_area_cachep;
 extern kmem_cache_t	*names_cachep;
 extern kmem_cache_t	*files_cachep;
 extern kmem_cache_t	*filp_cachep;
diff -upr kernel-2.6.18-417.el5.orig/include/linux/socket.h kernel-2.6.18-417.el5-028stab121/include/linux/socket.h
--- kernel-2.6.18-417.el5.orig/include/linux/socket.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/socket.h	2017-01-13 08:40:21.000000000 -0500
@@ -296,6 +296,16 @@ struct ucred {
 #define IPX_TYPE	1
 
 #ifdef __KERNEL__
+
+#define MAX_SOCK_ADDR	128		/* 108 for Unix domain -
+					   16 for IP, 16 for IPX,
+					   24 for IPv6,
+					   about 80 for AX.25
+					   must be at least one bigger than
+					   the AF_UNIX size (see net/unix/af_unix.c
+					   :unix_mkname()).
+					 */
+
 extern int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 extern int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, 
 				int offset, int len);
@@ -309,6 +319,8 @@ extern int memcpy_toiovec(struct iovec *
 extern int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen);
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+extern int vz_security_family_check(int family);
+extern int vz_security_protocol_check(int protocol);
 
 struct timespec;
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/stat.h kernel-2.6.18-417.el5-028stab121/include/linux/stat.h
--- kernel-2.6.18-417.el5.orig/include/linux/stat.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/stat.h	2017-01-13 08:40:40.000000000 -0500
@@ -53,6 +53,9 @@
 #define S_IWUGO		(S_IWUSR|S_IWGRP|S_IWOTH)
 #define S_IXUGO		(S_IXUSR|S_IXGRP|S_IXOTH)
 
+#define UTIME_NOW	((1l << 30) - 1l)
+#define UTIME_OMIT	((1l << 30) - 2l)
+
 #include <linux/types.h>
 #include <linux/time.h>
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/sunrpc/cache.h kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/cache.h
--- kernel-2.6.18-417.el5.orig/include/linux/sunrpc/cache.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/cache.h	2017-01-13 08:40:23.000000000 -0500
@@ -170,6 +170,8 @@ extern void cache_purge(struct cache_det
 #define NEVER (0x7FFFFFFF)
 extern void cache_register(struct cache_detail *cd);
 extern int cache_unregister(struct cache_detail *cd);
+extern struct cache_detail *cache_alloc(struct cache_detail *, int);
+extern void cache_free(struct cache_detail *);
 
 extern void qword_add(char **bpp, int *lp, char *str);
 extern void qword_addhex(char **bpp, int *lp, char *buf, int blen);
diff -upr kernel-2.6.18-417.el5.orig/include/linux/sunrpc/clnt.h kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/clnt.h
--- kernel-2.6.18-417.el5.orig/include/linux/sunrpc/clnt.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/clnt.h	2017-01-13 08:40:22.000000000 -0500
@@ -53,7 +53,8 @@ struct rpc_clnt {
 				cl_intr     : 1,/* interruptible */
 				cl_autobind : 1,/* use getport() */
 				cl_oneshot  : 1,/* dispose after use */
-				cl_dead     : 1;/* abandoned */
+				cl_dead     : 1,/* abandoned */
+				cl_broken   : 1;/* no responce for too long */
 
 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
 	struct rpc_portmap *	cl_pmap;	/* port mapping */
@@ -68,6 +69,7 @@ struct rpc_clnt {
 	struct rpc_portmap	cl_pmap_default;
 	char			cl_inline_name[32];
 	struct rpc_program *	cl_program;
+	unsigned long		cl_pr_time;
 #ifndef __GENKSYMS__
 	struct rpc_pmap_result	*cl_pmap_result;
 #endif /* !__GENKSYMS__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/sunrpc/debug.h kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/debug.h
--- kernel-2.6.18-417.el5.orig/include/linux/sunrpc/debug.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/debug.h	2017-01-13 08:40:22.000000000 -0500
@@ -95,6 +95,7 @@ enum {
 	CTL_SLOTTABLE_TCP,
 	CTL_MIN_RESVPORT,
 	CTL_MAX_RESVPORT,
+	CTL_ABORT_TIMEOUT,
 };
 
 #endif /* _LINUX_SUNRPC_DEBUG_H_ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/sunrpc/sched.h kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/sched.h
--- kernel-2.6.18-417.el5.orig/include/linux/sunrpc/sched.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/sched.h	2017-01-13 08:40:23.000000000 -0500
@@ -270,6 +270,7 @@ void		rpc_put_task_async(struct rpc_task
 void		rpc_exit_task(struct rpc_task *);
 void		rpc_release_calldata(const struct rpc_call_ops *, void *);
 void		rpc_killall_tasks(struct rpc_clnt *);
+void		rpc_kill_client(struct rpc_clnt *clnt);
 int		rpc_execute(struct rpc_task *);
 void		rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
 					rpc_action action);
@@ -293,6 +294,7 @@ void		rpc_show_tasks(void);
 int		rpc_init_mempool(void);
 void		rpc_destroy_mempool(void);
 extern struct workqueue_struct *rpciod_workqueue;
+extern struct rw_semaphore rpc_async_task_lock;
 
 static inline void rpc_exit(struct rpc_task *task, int status)
 {
diff -upr kernel-2.6.18-417.el5.orig/include/linux/sunrpc/stats.h kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/stats.h
--- kernel-2.6.18-417.el5.orig/include/linux/sunrpc/stats.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/stats.h	2017-01-13 08:40:23.000000000 -0500
@@ -55,7 +55,11 @@ void			svc_proc_unregister(const char *)
 void			svc_seq_show(struct seq_file *,
 				     const struct svc_stat *);
 
+#ifdef CONFIG_VE
+#define proc_net_rpc	(get_exec_env()->_proc_net_rpc)
+#else
 extern struct proc_dir_entry	*proc_net_rpc;
+#endif
 
 #else
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/sunrpc/svc.h kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/svc.h
--- kernel-2.6.18-417.el5.orig/include/linux/sunrpc/svc.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/svc.h	2017-01-13 08:40:23.000000000 -0500
@@ -321,6 +321,7 @@ typedef void		(*svc_thread_fn)(struct sv
 /*
  * Function prototypes.
  */
+struct svc_serv *  __svc_create(struct svc_program *, unsigned int, struct svc_stat *);
 struct svc_serv *  svc_create(struct svc_program *, unsigned int);
 int		   svc_create_thread(svc_thread_fn, struct svc_serv *);
 void		   svc_exit_thread(struct svc_rqst *);
diff -upr kernel-2.6.18-417.el5.orig/include/linux/sunrpc/xprt.h kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/xprt.h
--- kernel-2.6.18-417.el5.orig/include/linux/sunrpc/xprt.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/sunrpc/xprt.h	2017-01-13 08:40:22.000000000 -0500
@@ -36,6 +36,14 @@ extern unsigned int xprt_max_resvport;
 #define RPC_DEF_MAX_RESVPORT	(1023U)
 
 /*
+ * Grand abort timeout (stop the client if occures)
+ */
+extern int xprt_abort_timeout;
+
+#define RPC_MIN_ABORT_TIMEOUT	300
+#define RPC_MAX_ABORT_TIMEOUT	INT_MAX
+
+/*
  * This describes a timeout strategy
  */
 struct rpc_timeout {
@@ -118,6 +126,7 @@ struct rpc_xprt {
 	struct rpc_xprt_ops *	ops;		/* transport methods */
 	struct socket *		sock;		/* BSD socket layer */
 	struct sock *		inet;		/* INET layer */
+	struct ve_struct *	owner_env;	/* VE owner of mount */
 
 	struct rpc_timeout	timeout;	/* timeout parms */
 	struct sockaddr_in	addr;		/* server address */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/swap.h kernel-2.6.18-417.el5-028stab121/include/linux/swap.h
--- kernel-2.6.18-417.el5.orig/include/linux/swap.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/swap.h	2017-01-13 08:40:24.000000000 -0500
@@ -17,6 +17,7 @@ struct notifier_block;
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
 #define SWAP_FLAG_PRIO_SHIFT	0
+#define SWAP_FLAG_READONLY	0x40000000      /* set if swap is read-only */
 
 static inline int current_is_kswapd(void)
 {
@@ -92,6 +93,7 @@ struct address_space;
 struct sysinfo;
 struct writeback_control;
 struct zone;
+struct user_beancounter;
 
 /*
  * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
@@ -121,6 +123,7 @@ enum {
 	SWP_ACTIVE	= (SWP_USED | SWP_WRITEOK),
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
+	SWP_READONLY	= (1 << 2),
 };
 
 #define SWAP_CLUSTER_MAX 32
@@ -131,6 +134,7 @@ enum {
 /*
  * The in-memory structure used to track swap areas.
  */
+struct user_beancounter;
 struct swap_info_struct {
 	unsigned int flags;
 	int prio;			/* swap priority */
@@ -148,6 +152,9 @@ struct swap_info_struct {
 	unsigned int max;
 	unsigned int inuse_pages;
 	int next;			/* next entry on swap list */
+#ifdef CONFIG_USER_SWAP_ACCOUNTING
+	struct user_beancounter **swap_ubs;
+#endif
 };
 
 struct swap_list_t {
@@ -155,6 +162,9 @@ struct swap_list_t {
 	int next;	/* swapfile to be used next */
 };
 
+extern struct swap_list_t swap_list;
+extern struct swap_info_struct swap_info[MAX_SWAPFILES];
+
 /* Swap 50% full? Release swapcache more aggressively.. */
 #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
 
@@ -162,6 +172,8 @@ struct swap_list_t {
 extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order, int force);
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);
+extern int oom_kill_process(struct task_struct *p, const char *message);
+extern struct task_struct *oom_select_bad_process(struct user_beancounter *ub);
 
 /* linux/mm/memory.c */
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
@@ -184,6 +196,7 @@ extern int pagecache_maxpercent;
 extern void FASTCALL(lru_cache_add(struct page *));
 extern void FASTCALL(lru_cache_add_active(struct page *));
 extern void FASTCALL(activate_page(struct page *));
+extern void deactivate_page(struct page *);
 extern void FASTCALL(mark_page_accessed(struct page *));
 extern void FASTCALL(deactivate_unmapped_page(struct page *));
 extern void lru_add_drain(void);
@@ -236,6 +249,9 @@ extern struct address_space swapper_spac
 extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *, gfp_t);
 extern void __delete_from_swap_cache(struct page *);
+extern int add_to_swap_cache(struct page *page, swp_entry_t entry);
+extern int __add_to_swap_cache(struct page *page,
+			       swp_entry_t entry, gfp_t gfp_mask);
 extern void delete_from_swap_cache(struct page *);
 extern int move_to_swap_cache(struct page *, swp_entry_t);
 extern int move_from_swap_cache(struct page *, unsigned long,
@@ -249,7 +265,7 @@ extern struct page * read_swap_cache_asy
 extern long total_swap_pages;
 extern unsigned int nr_swapfiles;
 extern void si_swapinfo(struct sysinfo *);
-extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page(struct user_beancounter *);
 extern swp_entry_t get_swap_page_of_type(int);
 extern int swap_duplicate(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
@@ -261,6 +277,7 @@ extern sector_t map_swap_page(struct swa
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
 extern int can_share_swap_page(struct page *);
 extern int remove_exclusive_swap_page(struct page *);
+extern int try_to_remove_exclusive_swap_page(struct page *);
 struct backing_dev_info;
 
 extern spinlock_t swap_lock;
@@ -362,7 +379,7 @@ static inline int remove_exclusive_swap_
 	return 0;
 }
 
-static inline swp_entry_t get_swap_page(void)
+static inline swp_entry_t get_swap_page(struct user_beancounter *ub)
 {
 	swp_entry_t entry;
 	entry.val = 0;
diff -upr kernel-2.6.18-417.el5.orig/include/linux/syscalls.h kernel-2.6.18-417.el5-028stab121/include/linux/syscalls.h
--- kernel-2.6.18-417.el5.orig/include/linux/syscalls.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/syscalls.h	2017-01-13 08:40:40.000000000 -0500
@@ -389,6 +389,10 @@ asmlinkage ssize_t sys_pread64(unsigned 
 				size_t count, loff_t pos);
 asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf,
 				size_t count, loff_t pos);
+asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
+			   unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
+asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
+			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
 asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
 asmlinkage long sys_mkdir(const char __user *pathname, int mode);
 asmlinkage long sys_chdir(const char __user *filename);
@@ -411,6 +415,7 @@ asmlinkage long sys_getsockopt(int fd, i
 asmlinkage long sys_bind(int, struct sockaddr __user *, int);
 asmlinkage long sys_connect(int, struct sockaddr __user *, int);
 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
+asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int);
 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
@@ -433,10 +438,15 @@ asmlinkage long sys_poll(struct pollfd _
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
 asmlinkage long sys_epoll_create(int size);
+asmlinkage long sys_epoll_create1(int flags);
 asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
 				struct epoll_event __user *event);
 asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
 				int maxevents, int timeout);
+asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
+                                int maxevents, int timeout,
+                                const sigset_t __user *sigmask,
+                                size_t sigsetsize);
 asmlinkage long sys_gethostname(char __user *name, int len);
 asmlinkage long sys_sethostname(char __user *name, int len);
 asmlinkage long sys_setdomainname(char __user *name, int len);
@@ -547,6 +557,7 @@ asmlinkage long sys_get_mempolicy(int __
 				unsigned long addr, unsigned long flags);
 
 asmlinkage long sys_inotify_init(void);
+asmlinkage long sys_inotify_init1(int flags);
 asmlinkage long sys_inotify_add_watch(int fd, const char __user *path,
 					u32 mask);
 asmlinkage long sys_inotify_rm_watch(int fd, u32 wd);
@@ -582,6 +593,8 @@ asmlinkage long sys_fstatat64(int dfd, c
 			       struct stat64 __user *statbuf, int flag);
 asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf,
 			       int bufsiz);
+asmlinkage long sys_utimensat(int dfd, char __user *filename,
+				struct timespec __user *utimes, int flags);
 asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename,
 				     struct compat_timeval __user *t);
 asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
@@ -608,6 +621,8 @@ asmlinkage long sys_get_robust_list(int 
 asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
 asmlinkage long sys_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *cache);
+asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask);
+asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, int flags);
 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 asmlinkage long sys_eventfd(unsigned int count);
 asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
diff -upr kernel-2.6.18-417.el5.orig/include/linux/sysctl.h kernel-2.6.18-417.el5-028stab121/include/linux/sysctl.h
--- kernel-2.6.18-417.el5.orig/include/linux/sysctl.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/sysctl.h	2017-01-13 08:40:41.000000000 -0500
@@ -167,6 +167,21 @@ enum
 	KERN_HUNG_TASK_TIMEOUT_SECS=84,
 	KERN_HUNG_TASK_WARNINGS=85,
 	KERN_NMI_WATCHDOG=86,
+#ifdef CONFIG_GRKERNSEC_SYSCTL
+	KERN_GRSECURITY=98,	/* grsecurity */
+#endif
+	KERN_SILENCE_LEVEL=200, /* int: Console silence loglevel */
+	KERN_ALLOC_FAIL_WARN=201, /* int: whether we'll print "alloc failure" */
+	KERN_VIRT_PIDS=202,	/* int: VE pids virtualization */
+	KERN_VIRT_OSRELEASE=205,/* virtualization of utsname.release */
+	KERN_FAIRSCHED_MAX_LATENCY=211, /* int: Max start_tag delta */
+	KERN_VCPU_SCHED_TIMESLICE=212,
+	KERN_VCPU_TIMESLICE=213,
+	KERN_SCALE_VCPU_FREQUENCY=214,	/* Scale cpu frequency inside VE */
+	KERN_VCPU_HOT_TIMESLICE=215,
+	KERN_VE_ALLOW_KTHREADS=207,
+	KERN_VE_MEMINFO=208,    /* int: use privvmpages(0) or oomguarpages(1) */
+	KERN_SYSRQ_KEY_SCANCODE=216, /* int: sysrq key */
 };
 
 
@@ -444,6 +459,8 @@ enum
 	NET_TCP_CONG_CONTROL=110,
 	NET_TCP_ABC=111,
 	NET_IPV4_IPFRAG_MAX_DIST=112,
+	NET_TCP_MAX_TW_BUCKETS_UB=151,
+	NET_TCP_MAX_TW_KMEM_FRACTION=152,
  	NET_TCP_MTU_PROBING=113,
 	NET_TCP_BASE_MSS=114,
 	NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115,
@@ -457,10 +474,13 @@ enum
 	NET_UDP_RMEM_MIN=123,
 	NET_UDP_WMEM_MIN=124,
 	NET_IPV4_LOCAL_RESERVED_PORTS=125,
+	NET_TCP_PORT_FORWARD_RANGE=150,
+	NET_TCP_USE_SG=245,
 };
 
 enum {
 	NET_IPV4_ROUTE_FLUSH=1,
+	NET_IPV4_ROUTE_SRC_CHECK=188,
 	NET_IPV4_ROUTE_MIN_DELAY=2,
 	NET_IPV4_ROUTE_MAX_DELAY=3,
 	NET_IPV4_ROUTE_GC_THRESH=4,
@@ -897,6 +917,15 @@ enum
 	FS_AIO_NR=18,	/* current system-wide number of aio requests */
 	FS_AIO_MAX_NR=19,	/* system-wide maximum number of aio requests */
 	FS_INOTIFY=20,	/* inotify submenu */
+ 	FS_AT_VSYSCALL=21,	/* int: to announce vsyscall data */
+	FS_ODIRECT=50,	/* enable O_DIRECT for all users */
+	FS_SNAPAPI=51,  /* enable snapapi char device */
+	FS_LSYSCALL=52, /* enable symlink lutime/lchmod syscalls */
+};
+
+/* /proc/sys/debug */
+enum {
+	DBG_DECODE_CALLTRACES = 1,	/* int: decode call traces on oops */
 };
 
 /* /proc/sys/fs/quota/ */
@@ -1010,6 +1039,8 @@ enum
 #ifdef __KERNEL__
 #include <linux/list.h>
 
+extern int ve_allow_kthreads;
+
 extern void sysctl_init(void);
 
 typedef struct ctl_table ctl_table;
@@ -1056,7 +1087,11 @@ extern ctl_handler sysctl_string;
 extern ctl_handler sysctl_intvec;
 extern ctl_handler sysctl_jiffies;
 extern ctl_handler sysctl_ms_jiffies;
+extern ctl_handler sysctl_strategy_bset;
 
+extern proc_handler proc_dostring_ve_immutable;
+extern ctl_handler sysctl_string_ve_immutable;
+extern proc_handler proc_dointvec_ve_immutable;
 
 /*
  * Register a set of sysctl names by calling register_sysctl_table
@@ -1096,6 +1131,8 @@ extern ctl_handler sysctl_ms_jiffies;
  */
 
 /* A sysctl table is an array of struct ctl_table: */
+struct ve_struct;
+
 struct ctl_table 
 {
 	int ctl_name;			/* Binary ID */
@@ -1109,6 +1146,8 @@ struct ctl_table 
 	struct proc_dir_entry *de;	/* /proc control block */
 	void *extra1;
 	void *extra2;
+	struct ve_struct *owner_env;
+	int virt_handler;
 };
 
 /* struct ctl_table_header is used to maintain dynamic lists of
@@ -1125,6 +1164,9 @@ struct ctl_table_header * register_sysct
 						int insert_at_head);
 void unregister_sysctl_table(struct ctl_table_header * table);
 
+ctl_table *clone_sysctl_template(ctl_table *tmpl);
+void free_sysctl_clone(ctl_table *clone);
+
 #else /* __KERNEL__ */
 
 #endif /* __KERNEL__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/sysfs.h kernel-2.6.18-417.el5-028stab121/include/linux/sysfs.h
--- kernel-2.6.18-417.el5.orig/include/linux/sysfs.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/sysfs.h	2017-01-13 08:40:20.000000000 -0500
@@ -111,6 +111,8 @@ int sysfs_create_group(struct kobject *,
 void sysfs_remove_group(struct kobject *, const struct attribute_group *);
 void sysfs_notify(struct kobject * k, char *dir, char *attr);
 
+extern struct file_system_type sysfs_fs_type;
+
 #else /* CONFIG_SYSFS */
 
 static inline int sysfs_schedule_callback(struct kobject *kobj,
diff -upr kernel-2.6.18-417.el5.orig/include/linux/task_io_accounting.h kernel-2.6.18-417.el5-028stab121/include/linux/task_io_accounting.h
--- kernel-2.6.18-417.el5.orig/include/linux/task_io_accounting.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/task_io_accounting.h	2017-01-13 08:40:15.000000000 -0500
@@ -8,6 +8,9 @@
  * Blame akpm@osdl.org for all this.
  */
 
+#ifndef __TASK_IO_ACCOUNTING_H_
+#define __TASK_IO_ACCOUNTING_H_
+
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 struct task_io_accounting {
 	/*
@@ -35,3 +38,5 @@ struct task_io_accounting {
 struct task_io_accounting {
 };
 #endif
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/task_io_accounting_ops.h kernel-2.6.18-417.el5-028stab121/include/linux/task_io_accounting_ops.h
--- kernel-2.6.18-417.el5.orig/include/linux/task_io_accounting_ops.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/task_io_accounting_ops.h	2017-01-13 08:40:18.000000000 -0500
@@ -4,14 +4,23 @@
 #ifndef __TASK_IO_ACCOUNTING_OPS_INCLUDED
 #define __TASK_IO_ACCOUNTING_OPS_INCLUDED
 
+#include <ub/io_acct.h>
+
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 static inline void task_io_account_read(size_t bytes)
 {
+	ub_io_account_read(bytes);
 	task_aux(current)->ioac.read_bytes += bytes;
 }
 
 static inline void task_io_account_write(size_t bytes)
 {
+	ub_io_account_write(bytes);
+	task_aux(current)->ioac.write_bytes += bytes;
+}
+
+static inline void task_io_account_dirty(size_t bytes)
+{
 	task_aux(current)->ioac.write_bytes += bytes;
 }
 
@@ -35,6 +44,10 @@ static inline void task_io_account_write
 {
 }
 
+static inline void task_io_account_dirty(size_t bytes)
+{
+}
+
 static inline void task_io_account_cancelled_write(size_t bytes)
 {
 }
diff -upr kernel-2.6.18-417.el5.orig/include/linux/threads.h kernel-2.6.18-417.el5-028stab121/include/linux/threads.h
--- kernel-2.6.18-417.el5.orig/include/linux/threads.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/threads.h	2017-01-13 08:40:22.000000000 -0500
@@ -24,13 +24,15 @@
 /*
  * This controls the default maximum pid allocated to a process
  */
-#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
+#define PID_MAX_DEFAULT  (sizeof(long) > 4 ? 1024 * 1024 : 32 * 1024)
+
+#define VPID_MAX_DEFAULT (32 * 1024)
 
 /*
  * A maximum of 4 million PIDs should be enough for a while.
  * [NOTE: PID/TIDs are limited to 2^29 ~= 500+ million, see futex.h.]
  */
 #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
-	(sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))
+	(sizeof(long) > 4 ? 4 * 1024 * 1024 : 32 * 1024))
 
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/time.h kernel-2.6.18-417.el5-028stab121/include/linux/time.h
--- kernel-2.6.18-417.el5.orig/include/linux/time.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/time.h	2017-01-13 08:40:41.000000000 -0500
@@ -71,6 +71,7 @@ extern unsigned long mktime(const unsign
 			    const unsigned int min, const unsigned int sec);
 
 extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
+extern struct timespec ns_to_timespec(const s64 nsec);
 extern struct timespec timespec_add_safe(const struct timespec lhs,
 					 const struct timespec rhs);
 
@@ -112,7 +113,7 @@ extern void do_gettimeofday(struct timev
 extern int do_settimeofday(struct timespec *tv);
 extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
 #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
-extern long do_utimes(int dfd, char __user *filename, struct timeval *times);
+extern long do_utimes(int dfd, char __user *filename, struct timeval *times, int flags);
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
diff -upr kernel-2.6.18-417.el5.orig/include/linux/tracehook.h kernel-2.6.18-417.el5-028stab121/include/linux/tracehook.h
--- kernel-2.6.18-417.el5.orig/include/linux/tracehook.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/tracehook.h	2017-01-13 08:40:24.000000000 -0500
@@ -658,7 +658,7 @@ static inline void tracehook_report_clon
 						   struct task_struct *child)
 {
 	if (tsk_utrace_flags(current) & UTRACE_ACTION_QUIESCE)
-		utrace_quiescent(current, NULL);
+		utrace_quiescent(current, NULL, PN_STOP_FORK);
 }
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/include/linux/tty_driver.h kernel-2.6.18-417.el5-028stab121/include/linux/tty_driver.h
--- kernel-2.6.18-417.el5.orig/include/linux/tty_driver.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/tty_driver.h	2017-01-13 08:40:20.000000000 -0500
@@ -213,6 +213,7 @@ struct tty_driver {
 			unsigned int set, unsigned int clear);
 
 	struct list_head tty_drivers;
+	struct ve_struct *owner_env;
 #ifndef __GENKSYMS__
 	void (*shutdown)(struct tty_struct *tty);
 	int (*set_termiox)(struct tty_struct *tty, struct termiox *old);
@@ -220,7 +221,18 @@ struct tty_driver {
 #endif
 };
 
+#ifdef CONFIG_UNIX98_PTYS
+extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
+extern struct tty_driver *pts_driver;	/* Unix98 pty slaves;  for /dev/ptmx */
+#endif
+
+#ifdef CONFIG_LEGACY_PTYS
+extern struct tty_driver *pty_driver;
+extern struct tty_driver *pty_slave_driver;
+#endif
+
 extern struct list_head tty_drivers;
+extern rwlock_t tty_driver_guard;
 
 struct tty_driver *alloc_tty_driver(int lines);
 void put_tty_driver(struct tty_driver *driver);
@@ -228,6 +240,9 @@ void tty_set_operations(struct tty_drive
 
 void tty_shutdown(struct tty_struct *tty);
 
+struct class *init_ve_tty_class(void);
+void fini_ve_tty_class(struct class *ve_tty_class);
+
 /* tty driver magic number */
 #define TTY_DRIVER_MAGIC		0x5402
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/tty.h kernel-2.6.18-417.el5-028stab121/include/linux/tty.h
--- kernel-2.6.18-417.el5.orig/include/linux/tty.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/tty.h	2017-01-13 08:40:40.000000000 -0500
@@ -196,7 +196,7 @@ struct tty_struct {
 	struct work_struct hangup_work;
 	void *disc_data;
 	void *driver_data;
-	struct list_head tty_files;
+	struct file_list tty_files;
 
 #define N_TTY_BUF_SIZE 4096
 	
@@ -226,6 +226,7 @@ struct tty_struct {
 	spinlock_t read_lock;
 	/* If the tty has a pending do_SAK, queue it here - akpm */
 	struct work_struct SAK_work;
+	struct ve_struct *owner_env;
 };
 
 /* tty magic number */
@@ -253,6 +254,7 @@ struct tty_struct {
 #define TTY_PTY_LOCK 		16	/* pty private */
 #define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
 #define TTY_HUPPED 		18	/* Post driver->hangup() */
+#define TTY_CHARGED		19	/* Charged as ub resource */
 
 #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
 
diff -upr kernel-2.6.18-417.el5.orig/include/linux/utrace.h kernel-2.6.18-417.el5-028stab121/include/linux/utrace.h
--- kernel-2.6.18-417.el5.orig/include/linux/utrace.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/utrace.h	2017-01-13 08:40:24.000000000 -0500
@@ -55,7 +55,22 @@ struct utrace_signal;
 struct utrace_regset;
 struct utrace_regset_view;
 
-#ifdef __GENKSYMS__		/* RHEL-5 GA KABI compatibility */
+#define UTRACE_DEBUG 1
+
+/*
+ * Per-thread structure task_struct.utrace points to.
+ *
+ * The task itself never has to worry about this going away after
+ * some event is found set in task_struct.utrace_flags.
+ * Once created, this pointer is changed only when the task is quiescent
+ * (TASK_TRACED or TASK_STOPPED with the siglock held, or dead).
+ *
+ * For other parties, the pointer to this is protected by RCU and
+ * task_lock.  Since call_rcu is never used while the thread is alive and
+ * using this struct utrace, we can overlay the RCU data structure used
+ * only for a dead struct with some local state used only for a live utrace
+ * on an active thread.
+ */
 struct utrace
 {
 	union {
@@ -65,15 +80,18 @@ struct utrace
 			struct utrace_signal *signal;
 		} live;
 		struct {
-			int report_death; /* report_death running */
-			int reap; /* release_task called */
+			unsigned long flags;
 		} exit;
 	} u;
 
 	struct list_head engines;
 	spinlock_t lock;
-};
+	bool freeze_stop;
+#ifdef UTRACE_DEBUG
+	atomic_t check_dead;
 #endif
+};
+
 
 /*
  * Flags in &struct task_struct.utrace_flags and
@@ -407,6 +425,16 @@ struct utrace_engine_ops
 					   struct task_struct *target);
 };
 
+/*
+ * This is pointed to by the utrace struct, but it's really a private
+ * structure between utrace_get_signal and utrace_inject_signal.
+ */
+struct utrace_signal
+{
+	siginfo_t *const info;
+	struct k_sigaction *return_ka;
+	int signr;
+};
 
 /*
  * These are the exported entry points for tracing engines to use.
@@ -433,7 +461,7 @@ const struct utrace_regset *utrace_regse
 /*
  * Hooks in <linux/tracehook.h> call these entry points to the utrace dispatch.
  */
-int utrace_quiescent(struct task_struct *, struct utrace_signal *);
+int utrace_quiescent(struct task_struct *, struct utrace_signal *, int loc);
 void utrace_release_task(struct task_struct *);
 int utrace_get_signal(struct task_struct *, struct pt_regs *,
 		      siginfo_t *, struct k_sigaction *);
@@ -449,6 +477,7 @@ struct task_struct *utrace_tracer_task(s
 int utrace_allow_access_process_vm(struct task_struct *);
 int utrace_unsafe_exec(struct task_struct *);
 void utrace_signal_handler_singlestep(struct task_struct *, struct pt_regs *);
+const struct utrace_regset_view *utrace_native_view(struct task_struct *tsk);
 
 /*
  * <linux/tracehook.h> uses these accessors to avoid #ifdef CONFIG_UTRACE.
diff -upr kernel-2.6.18-417.el5.orig/include/linux/utsname.h kernel-2.6.18-417.el5-028stab121/include/linux/utsname.h
--- kernel-2.6.18-417.el5.orig/include/linux/utsname.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/utsname.h	2017-01-13 08:40:20.000000000 -0500
@@ -1,6 +1,11 @@
 #ifndef _LINUX_UTSNAME_H
 #define _LINUX_UTSNAME_H
 
+#include <linux/sched.h>
+#include <linux/kref.h>
+#include <linux/nsproxy.h>
+#include <asm/atomic.h>
+
 #define __OLD_UTS_LEN 8
 
 struct oldold_utsname {
@@ -39,16 +44,51 @@ struct uts_namespace {
 	struct new_utsname name;
 };
 
-extern struct new_utsname system_utsname;
 extern struct uts_namespace init_uts_ns;
+extern struct new_utsname virt_utsname;
 
-extern struct rw_semaphore uts_sem;
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+	kref_get(&ns->kref);
+}
+
+#ifdef CONFIG_UTS_NS
+extern int unshare_utsname(unsigned long unshare_flags,
+				struct uts_namespace **new_uts);
+extern int copy_utsname(int flags, struct task_struct *tsk);
+extern void free_uts_ns(struct kref *kref);
+
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+	kref_put(&ns->kref, free_uts_ns);
+}
+#else
+static inline int unshare_utsname(unsigned long unshare_flags,
+			struct uts_namespace **new_uts)
+{
+	return -EINVAL;
+}
+static inline int copy_utsname(int flags, struct task_struct *tsk)
+{
+	return 0;
+}
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+}
+#endif
+
+static inline struct new_utsname *utsname(void)
+{
+	return &current->nsproxy->uts_ns->name;
+}
 
 static inline struct new_utsname *init_utsname(void)
 {
-	return &system_utsname;
+	return &init_uts_ns.name;
 }
 
+extern struct rw_semaphore uts_sem;
+
 #endif /* __KERNEL__ */
 
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/ve.h kernel-2.6.18-417.el5-028stab121/include/linux/ve.h
--- kernel-2.6.18-417.el5.orig/include/linux/ve.h	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/ve.h	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,457 @@
+/*
+ *  include/linux/ve.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VE_H
+#define _LINUX_VE_H
+
+#include <linux/config.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/utsname.h>
+#include <linux/sysctl.h>
+#include <linux/net.h>
+#include <linux/vzstat.h>
+#include <linux/kobject.h>
+#include <linux/pid.h>
+
+#ifdef VZMON_DEBUG
+#  define VZTRACE(fmt,args...) \
+	printk(KERN_DEBUG fmt, ##args)
+#else
+#  define VZTRACE(fmt,args...)
+#endif /* VZMON_DEBUG */
+
+struct tty_driver;
+struct devpts_config;
+struct task_struct;
+struct new_utsname;
+struct file_system_type;
+struct icmp_mib;
+struct ip_mib;
+struct tcp_mib;
+struct udp_mib;
+struct linux_mib;
+struct fib_info;
+struct fib_rule;
+struct veip_struct;
+struct ve_monitor;
+struct nsproxy;
+struct svc_serv;
+
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+struct fib_table;
+struct devcnfv4_struct;
+#ifdef CONFIG_VE_IPTABLES
+struct xt_af;
+struct xt_table;
+struct xt_target;
+struct ip_conntrack;
+typedef unsigned int (*ip_nat_helper_func)(void);
+struct ve_ip_conntrack {
+	struct list_head 	*_ip_conntrack_hash;
+	struct list_head	_ip_conntrack_expect_list;
+	struct list_head	_ip_conntrack_unconfirmed;
+	struct ip_conntrack_protocol ** _ip_ct_protos;
+	struct list_head	_ip_conntrack_helpers;
+	int 			_ip_conntrack_max;
+	int			_ip_conntrack_vmalloc;
+	atomic_t		_ip_conntrack_count;
+	void (*_ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
+#ifdef CONFIG_SYSCTL
+	unsigned long		_ip_ct_tcp_timeouts[10];
+	unsigned long		_ip_ct_udp_timeout;
+	unsigned long		_ip_ct_udp_timeout_stream;
+	unsigned long		_ip_ct_icmp_timeout;
+	unsigned long		_ip_ct_generic_timeout;
+	unsigned int		_ip_ct_log_invalid;
+	unsigned long		_ip_ct_tcp_timeout_max_retrans;
+	int			_ip_ct_tcp_loose;
+	int			_ip_ct_tcp_be_liberal;
+	int			_ip_ct_tcp_max_retrans;
+	struct ctl_table_header *_ip_ct_sysctl_header;
+	ctl_table		*_ip_ct_net_table;
+	ctl_table		*_ip_ct_ipv4_table;
+	ctl_table		*_ip_ct_netfilter_table;
+	ctl_table		*_ip_ct_sysctl_table;
+#endif /*CONFIG_SYSCTL*/
+
+	struct ip_nat_protocol	**_ip_nat_protos;
+	ip_nat_helper_func	_ip_nat_ftp_hook;
+	ip_nat_helper_func	_ip_nat_irc_hook;
+	struct list_head	*_ip_nat_bysource;
+	struct xt_table		*_ip_nat_table;
+
+	/* resource accounting */
+	struct user_beancounter *ub;
+};
+#endif
+#endif
+
+#define UIDHASH_BITS_VE		6
+#define UIDHASH_SZ_VE		(1 << UIDHASH_BITS_VE)
+
+struct ve_cpu_stats {
+	cycles_t	idle_time;
+	cycles_t	iowait_time;
+	cycles_t	strt_idle_time;
+	cycles_t	used_time;
+	seqcount_t	stat_lock;
+	cputime64_t	user;
+	cputime64_t	nice;
+	cputime64_t	system;
+} ____cacheline_aligned;
+
+struct ve_ipt_recent;
+struct ve_ipt_hashlimit;
+
+struct ve_sit;
+struct ve_ipip;
+struct ve_gre;
+struct ve_pppoe;
+struct ve_ppp;
+struct ve_nfsd_data;
+struct ve_ipv6_ops;
+
+struct ve_struct {
+	struct list_head	ve_list;
+
+	envid_t			veid;
+	struct task_struct	*init_entry;
+	struct list_head	vetask_lh;
+	/* capability bounding set */
+	kernel_cap_t		ve_cap_bset;
+	atomic_t		pcounter;
+	/* ref counter to ve from ipc */
+	atomic_t		counter;	
+	unsigned int		class_id;
+	struct rw_semaphore	op_sem;
+	int			is_running;
+	int			is_locked;
+	atomic_t		suspend;
+	int			virt_pids;
+	int			vpid_max;
+	unsigned int		flags;
+	/* see vzcalluser.h for VE_FEATURE_XXX definitions */
+	__u64			features;
+
+/* VE's root */
+	struct vfsmount 	*fs_rootmnt;
+	struct dentry 		*fs_root;
+
+/* sysctl */
+	struct list_head	sysctl_lh;
+	struct ctl_table_header	*quota_header;
+	struct ctl_table	*quota_table;
+	struct file_system_type *proc_fstype;
+	struct vfsmount		*proc_mnt;
+	struct proc_dir_entry	*proc_root;
+	struct proc_dir_entry	*proc_sys_root;
+	struct proc_dir_entry	*_proc_net;
+	struct proc_dir_entry	*_proc_net_stat;
+	struct proc_dir_entry	*_proc_net_rpc;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct proc_dir_entry	*_proc_net_devsnmp6;
+#endif
+
+	struct cache_detail	*_ip_map_cache;
+	struct file_system_type	*rpc_pipefs_fstype;
+
+/* BSD pty's */
+#ifdef CONFIG_LEGACY_PTYS
+	struct tty_driver       *pty_driver;
+	struct tty_driver       *pty_slave_driver;
+#endif
+#ifdef CONFIG_UNIX98_PTYS
+	struct tty_driver	*ptm_driver;
+	struct tty_driver	*pts_driver;
+	struct idr		*allocated_ptys;
+	struct file_system_type *devpts_fstype;
+	struct vfsmount		*devpts_mnt;
+	struct dentry		*devpts_root;
+	struct devpts_config	*devpts_config;
+#endif
+
+	struct ve_nfs_context	*nfs_context;
+
+	struct file_system_type *shmem_fstype;
+	struct vfsmount		*shmem_mnt;
+#ifdef CONFIG_SYSFS
+	struct file_system_type *sysfs_fstype;
+	struct vfsmount		*sysfs_mnt;
+	struct super_block	*sysfs_sb;
+	struct sysfs_dirent	*sysfs_root;
+#endif
+	struct subsystem	*class_subsys;
+	struct subsystem	*class_obj_subsys;
+	struct class		*tty_class;
+	struct class		*mem_class;
+
+/* User uids hash */
+	struct list_head	*uidhash_table;
+
+#ifdef CONFIG_NET
+	struct class		*net_class;
+	struct hlist_head	_net_dev_head;
+	struct hlist_head	_net_dev_index_head;
+	struct net_device	*_net_dev_base, **_net_dev_tail;
+	int			ifindex;
+	struct net_device	*_loopback_dev;
+	struct pcpu_lstats	*_pcpu_lstats;
+#ifdef CONFIG_INET
+	struct ipv4_devconf	*_ipv4_devconf;
+	struct ipv4_devconf	*_ipv4_devconf_dflt;
+	struct ctl_table_header	*forward_header;
+	struct ctl_table	*forward_table;
+	unsigned long		rt_flush_required;
+	struct neigh_table	*ve_arp_tbl;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct ipv6_devconf	*_ipv6_devconf;
+	struct ipv6_devconf	*_ipv6_devconf_dflt;
+	struct neigh_table	*ve_nd_tbl;
+#endif
+#endif
+#endif
+#if defined(CONFIG_VE_NETDEV) || defined (CONFIG_VE_NETDEV_MODULE)
+	struct veip_struct	*veip;
+	struct net_device	*_venet_dev;
+#endif
+
+	struct ve_sit		*ve_sit;
+	struct ve_ipip		*ve_ipip;
+	struct ve_gre		*ve_gre;
+
+#if defined(CONFIG_PPP) || defined(CONFIG_PPP_MODULE)
+	struct ve_ppp		*ve_ppp;
+#if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
+	struct ve_pppoe		*ve_pppoe;
+#endif
+#endif
+
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+	int			has_bridge_support;
+#endif
+
+/* per VE CPU stats*/
+	struct timespec		start_timespec;
+	u64			start_jiffies;	/* Deprecated */
+	cycles_t 		start_cycles;
+	unsigned long		avenrun[3];	/* loadavg data */
+
+	cycles_t 		cpu_used_ve;
+	struct kstat_lat_pcpu_struct	sched_lat_ve;
+
+#ifdef CONFIG_INET
+	struct hlist_head	*_fib_info_hash;
+	struct hlist_head	*_fib_info_laddrhash;
+	int			_fib_hash_size;
+	int			_fib_info_cnt;
+
+#ifdef CONFIG_FIB_RULES
+	struct list_head	_rules_ops;
+#endif
+	/* XXX: why a magic constant? */
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	struct hlist_head 	_fib_table_hash[256];
+	struct fib_rules_ops 	*_fib4_ops;
+#else
+	struct hlist_head 	_fib_table_hash[1];
+	struct fib_table	*_main_table;
+	struct fib_table	*_local_table;
+#endif
+	struct icmp_mib		*_icmp_statistics[2];
+	struct icmpmsg_mib	*_icmpmsg_statistics[2];
+	struct ipstats_mib	*_ip_statistics[2];
+	struct tcp_mib		*_tcp_statistics[2];
+	struct udp_mib		*_udp_statistics[2];
+	struct linux_mib	*_net_statistics[2];
+	struct venet_stat       *stat;
+#ifdef CONFIG_VE_IPTABLES
+/* core/netfilter.c virtualization */
+	void			*_nf_hooks;
+	struct xt_table		*_ve_ipt_filter_pf; /* packet_filter struct */
+	struct xt_table		*_ve_ip6t_filter_pf;
+	struct xt_table		*_ipt_mangle_table;
+	struct xt_table		*_ip6t_mangle_table;
+	struct list_head	_xt_tables[NPROTO];
+
+	__u64			_iptables_modules;
+	struct ve_ip_conntrack	*_ip_conntrack;
+	struct ve_ipt_recent	*_ipt_recent;
+	struct ve_ipt_hashlimit	*_ipt_hashlimit;
+#endif /* CONFIG_VE_IPTABLES */
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	struct hlist_head	_fib6_table_hash[256];
+	struct fib6_table	*_fib6_local_table;
+	struct fib_rules_ops 	*_fib6_ops;
+#else
+	struct hlist_head	_fib6_table_hash[1];
+#endif
+	struct fib6_table	*_fib6_table;
+	struct ipstats_mib	*_ipv6_statistics[2];
+	struct icmpv6_mib	*_icmpv6_statistics[2];
+	struct udp_mib		*_udp_stats_in6[2];
+#endif
+#endif
+	wait_queue_head_t	*_log_wait;
+	unsigned long		*_log_start;
+	unsigned long		*_log_end;
+	unsigned long		*_logged_chars;
+	char			*log_buf;
+#define VE_DEFAULT_LOG_BUF_LEN	4096
+
+	struct ve_cpu_stats	*cpu_stats;
+	unsigned long		down_at;
+	struct list_head	cleanup_list;
+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
+	struct list_head	_fuse_conn_list;
+	struct super_block	*_fuse_control_sb;
+
+	struct file_system_type	*fuse_fs_type;
+	struct file_system_type	*fuse_ctl_fs_type;
+#endif
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+	struct proc_dir_entry	*_proc_vlan_dir;
+	struct proc_dir_entry	*_proc_vlan_conf;
+#endif
+ 	unsigned long		jiffies_fixup;
+ 	unsigned char		disable_net;
+ 	unsigned char		sparse_vpid;
+	struct pidmap		ve_pidmap[PIDMAP_ENTRIES];
+	int			last_vpid;
+	struct ve_monitor	*monitor;
+	struct proc_dir_entry	*monitor_proc;
+	unsigned long		meminfo_val;
+	int _randomize_va_space;
+
+#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
+	struct file_system_type	*bm_fs_type;
+	struct vfsmount		*bm_mnt;
+	int			bm_enabled;
+	int			bm_entry_count;
+	struct list_head	bm_entries;
+#endif
+
+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE) \
+	|| defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
+	unsigned int		_nlmsvc_users;
+	pid_t			_nlmsvc_pid;
+	struct svc_serv		*_nlmsvc_serv;
+	int			_nlmsvc_grace_period;
+	unsigned long		_nlmsvc_timeout;
+
+	struct ve_nfsd_data	*nfsd_data;
+#endif
+
+	struct hlist_head	nlm_reserved_pids;
+	spinlock_t		nlm_reserved_lock;
+
+	struct nsproxy		*ve_ns;
+	struct list_head	vetask_auxlist;
+#ifdef CONFIG_GRKERNSEC
+	struct {
+		int		lock;
+#ifdef CONFIG_GRKERNSEC_TPE
+		int		enable_tpe;
+		int		tpe_gid;
+#ifdef CONFIG_GRKERNSEC_TPE_ALL
+		int		enable_tpe_all;
+#endif
+#endif /*CONFIG_GRKERNSEC_TPE */
+	} grsec;
+#endif /* CONFIG_GRKERNSEC */
+	struct afs_sysnames_struct	*afs_sysnames;
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	struct bsd_acct_struct	*bacct;
+#endif
+#if defined(CONFIG_HOTPLUG)
+	u64 _uevent_seqnum;
+#endif
+	struct ve_ipv6_ops	*ipv6_ops;
+
+	atomic_t                mnt_nr;
+	atomic_t		ifa_nr;
+};
+
+#define VE_MEMINFO_DEFAULT	1	/* default behaviour */
+#define VE_MEMINFO_SYSTEM	0	/* disable meminfo virtualization */
+
+enum {
+	VE_REBOOT,
+};
+
+#define VE_CPU_STATS(ve, cpu)	(per_cpu_ptr((ve)->cpu_stats, cpu))
+
+extern int nr_ve;
+
+#ifdef CONFIG_VE
+
+void do_update_load_avg_ve(void);
+void do_env_free(struct ve_struct *ptr);
+
+static inline struct ve_struct *get_ve(struct ve_struct *ptr)
+{
+	if (ptr != NULL)
+		atomic_inc(&ptr->counter);
+	return ptr;
+}
+
+static inline void put_ve(struct ve_struct *ptr)
+{
+	if (ptr && atomic_dec_and_test(&ptr->counter)) {
+		if (atomic_read(&ptr->pcounter) > 0)
+			BUG();
+		if (ptr->is_running)
+			BUG();
+		do_env_free(ptr);
+	}
+}
+
+static inline void pget_ve(struct ve_struct *ptr)
+{
+	atomic_inc(&ptr->pcounter);
+}
+
+void ve_cleanup_schedule(struct ve_struct *);
+static inline void pput_ve(struct ve_struct *ptr)
+{
+	if (unlikely(atomic_dec_and_test(&ptr->pcounter)))
+		ve_cleanup_schedule(ptr);
+}
+
+extern spinlock_t ve_cleanup_lock;
+extern struct list_head ve_cleanup_list;
+extern struct task_struct *ve_cleanup_thread;
+
+extern unsigned long long ve_relative_clock(struct timespec * ts);
+
+extern int sysctl_fsync_enable;
+
+#ifdef CONFIG_FAIRSCHED
+#define ve_cpu_online_map(ve, mask) fairsched_cpu_online_map(ve->veid, mask)
+#else
+#define ve_cpu_online_map(ve, mask) do { *(mask) = cpu_online_map; } while (0)
+#endif
+#else	/* CONFIG_VE */
+#define ve_utsname	system_utsname
+#define get_ve(ve)	(NULL)
+#define put_ve(ve)	do { } while (0)
+#define pget_ve(ve)	do { } while (0)
+#define pput_ve(ve)	do { } while (0)
+#endif	/* CONFIG_VE */
+
+#endif /* _LINUX_VE_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/veip.h kernel-2.6.18-417.el5-028stab121/include/linux/veip.h
--- kernel-2.6.18-417.el5.orig/include/linux/veip.h	2017-01-13 08:40:21.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/veip.h	2017-01-13 08:40:21.000000000 -0500
@@ -0,0 +1,15 @@
+#ifndef __VE_IP_H_
+#define __VE_IP_H_
+
+struct ve_addr_struct {
+	int family;
+	__u32 key[4];
+};
+
+struct sockaddr;
+
+extern void veaddr_print(char *, int, struct ve_addr_struct *);
+extern int sockaddr_to_veaddr(struct sockaddr __user *uaddr, int addrlen,
+		struct ve_addr_struct *veaddr);
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/venet.h kernel-2.6.18-417.el5-028stab121/include/linux/venet.h
--- kernel-2.6.18-417.el5.orig/include/linux/venet.h	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/venet.h	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,96 @@
+/*
+ *  include/linux/venet.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VENET_H
+#define _VENET_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/vzcalluser.h>
+#include <linux/veip.h>
+#include <linux/netdevice.h>
+
+#define VEIP_HASH_SZ 512
+
+struct ve_struct;
+struct venet_stat;
+
+struct venet_stats {
+	struct net_device_stats	stats;
+	struct net_device_stats	*real_stats;
+};
+
+struct ip_entry_struct
+{
+	struct ve_addr_struct	addr;
+	struct ve_struct	*active_env;
+	struct venet_stat	*stat;
+	struct veip_struct	*veip;
+	struct list_head 	ip_hash;
+	struct list_head 	ve_list;
+};
+
+struct ext_entry_struct
+{
+	struct list_head	list;
+	struct ve_addr_struct	addr;
+};
+
+struct veip_struct
+{
+	struct list_head	src_lh;
+	struct list_head	dst_lh;
+	struct list_head	ip_lh;
+	struct list_head	list;
+	struct list_head	ext_lh;
+	envid_t			veid;
+};
+
+static inline struct net_device_stats *
+venet_stats(struct net_device *dev, int cpu)
+{
+	struct venet_stats *stats;
+	stats = (struct venet_stats*)dev->priv;
+	return per_cpu_ptr(stats->real_stats, cpu);
+}
+
+/* veip_hash_lock should be taken for write by caller */
+void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip);
+/* veip_hash_lock should be taken for write by caller */
+void ip_entry_unhash(struct ip_entry_struct *entry);
+/* veip_hash_lock should be taken for read by caller */
+struct ip_entry_struct *venet_entry_lookup(struct ve_addr_struct *);
+
+/* veip_hash_lock should be taken for read by caller */
+struct veip_struct *veip_find(envid_t veid);
+/* veip_hash_lock should be taken for write by caller */
+struct veip_struct *veip_findcreate(envid_t veid);
+/* veip_hash_lock should be taken for write by caller */
+void veip_put(struct veip_struct *veip);
+
+extern struct list_head veip_lh;
+
+int veip_start(struct ve_struct *ve);
+void veip_stop(struct ve_struct *ve);
+__exit void veip_cleanup(void);
+int veip_entry_add(struct ve_struct *ve, struct ve_addr_struct *addr);
+int veip_entry_del(envid_t veid, struct ve_addr_struct *addr);
+int venet_change_skb_owner(struct sk_buff *skb);
+struct ext_entry_struct *venet_ext_lookup(struct ve_struct *ve,
+		struct ve_addr_struct *addr);
+
+extern struct list_head ip_entry_hash_table[];
+extern rwlock_t veip_hash_lock;
+
+#ifdef CONFIG_PROC_FS
+int veip_seq_show(struct seq_file *m, void *v);
+#endif
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/ve_nfs.h kernel-2.6.18-417.el5-028stab121/include/linux/ve_nfs.h
--- kernel-2.6.18-417.el5.orig/include/linux/ve_nfs.h	2017-01-13 08:40:22.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/ve_nfs.h	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,39 @@
+/*
+ * linux/include/ve_nfs.h
+ *
+ * VE context for NFS
+ *
+ * Copyright (C) 2007 SWsoft
+ */
+
+#ifndef __VE_NFS_H__
+#define __VE_NFS_H__
+
+#ifdef CONFIG_VE
+#include <linux/ve.h>
+
+#define NFS_CTX_FIELD(arg)  (get_exec_env()->_##arg)
+
+#define nlmsvc_grace_period	NFS_CTX_FIELD(nlmsvc_grace_period)
+#define nlmsvc_timeout		NFS_CTX_FIELD(nlmsvc_timeout)
+#define nlmsvc_users		NFS_CTX_FIELD(nlmsvc_users)
+#define nlmsvc_pid		NFS_CTX_FIELD(nlmsvc_pid)
+#define nlmsvc_serv		NFS_CTX_FIELD(nlmsvc_serv)
+#else
+#define nlmsvc_grace_period	_nlmsvc_grace_period
+#define nlmsvc_users		_nlmsvc_users
+#define nlmsvc_pid		_nlmsvc_pid
+#define nlmsvc_serv		_nlmsvc_serv
+#define nlmsvc_timeout		_nlmsvc_timeout
+#endif
+
+extern void nfs_change_server_params(void *data, int flags, int timeo, int retrans);
+
+extern int ve_nfs_sync(struct ve_struct *env, int wait);
+extern int is_nfs_automount(struct vfsmount *mnt);
+
+/* This two originaly defined in linux/sunrpc/xprt.h */
+#define RPC_MAX_ABORT_TIMEOUT	INT_MAX
+extern int xprt_abort_timeout;
+ 
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/veprintk.h kernel-2.6.18-417.el5-028stab121/include/linux/veprintk.h
--- kernel-2.6.18-417.el5.orig/include/linux/veprintk.h	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/veprintk.h	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,38 @@
+/*
+ *  include/linux/veprintk.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_PRINTK_H__
+#define __VE_PRINTK_H__
+
+#ifdef CONFIG_VE
+
+#define ve_log_wait		(*(get_exec_env()->_log_wait))
+#define ve_log_start		(*(get_exec_env()->_log_start))
+#define ve_log_end		(*(get_exec_env()->_log_end))
+#define ve_logged_chars		(*(get_exec_env()->_logged_chars))
+#define ve_log_buf		(get_exec_env()->log_buf)
+#define ve_log_buf_len		(ve_is_super(get_exec_env()) ? \
+				log_buf_len : VE_DEFAULT_LOG_BUF_LEN)
+#define VE_LOG_BUF_MASK		(ve_log_buf_len - 1)
+#define VE_LOG_BUF(idx)		(ve_log_buf[(idx) & VE_LOG_BUF_MASK])
+
+#else
+
+#define ve_log_wait		log_wait
+#define ve_log_start		log_start
+#define ve_log_end		log_end
+#define ve_logged_chars		logged_chars
+#define ve_log_buf		log_buf
+#define ve_log_buf_len		log_buf_len
+#define VE_LOG_BUF_MASK		LOG_BUF_MASK
+#define VE_LOG_BUF(idx)		LOG_BUF(idx)
+
+#endif /* CONFIG_VE */
+#endif /* __VE_PRINTK_H__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/ve_proto.h kernel-2.6.18-417.el5-028stab121/include/linux/ve_proto.h
--- kernel-2.6.18-417.el5.orig/include/linux/ve_proto.h	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/ve_proto.h	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,144 @@
+/*
+ *  include/linux/ve_proto.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_H__
+#define __VE_H__
+
+#ifdef CONFIG_VE
+
+struct ve_struct;
+
+struct seq_file;
+
+typedef void (*ve_seq_print_t)(struct seq_file *, struct ve_struct *);
+
+void vzmon_register_veaddr_print_cb(ve_seq_print_t);
+void vzmon_unregister_veaddr_print_cb(ve_seq_print_t);
+
+#ifdef CONFIG_INET
+void ip_fragment_cleanup(struct ve_struct *envid);
+void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
+#ifdef CONFIG_VE_NETDEV
+int venet_init(void);
+#endif
+#else
+static inline void ip_fragment_cleanup(struct ve_struct *ve) { ; }
+#endif
+
+extern struct list_head ve_list_head;
+#define for_each_ve(ve)	list_for_each_entry((ve), &ve_list_head, ve_list)
+extern rwlock_t ve_list_lock;
+extern struct ve_struct *get_ve_by_id(envid_t);
+extern struct ve_struct *__find_ve_by_id(envid_t);
+
+struct env_create_param3;
+extern int real_env_create(envid_t veid, unsigned flags, u32 class_id,
+			   struct env_create_param3 *data, int datalen);
+extern void ve_move_task(struct task_struct *, struct ve_struct *);
+
+int set_device_perms_ve(envid_t veid, unsigned type, dev_t dev, unsigned mask);
+int get_device_perms_ve(int dev_type, dev_t dev, int access_mode);
+void clean_device_perms_ve(envid_t veid);
+extern struct file_operations proc_devperms_ops;
+
+enum {
+	VE_SS_CHAIN,
+	VE_INIT_EXIT_CHAIN,
+
+	VE_MAX_CHAINS
+};
+
+struct in6_addr;
+struct request_sock;
+struct sock;
+
+struct ve_ipv6_ops {
+	int (*snmp_proc_init)(struct ve_struct *);
+	void (*snmp_proc_fini)(struct ve_struct *);
+
+	int (*addrconf_sysctl_init)(struct ve_struct *);
+	void (*addrconf_sysctl_fini)(struct ve_struct *);
+	void (*addrconf_sysctl_free)(struct ve_struct *);
+
+	int (*ndisc_init)(struct ve_struct *);
+	void (*ndisc_fini)(struct ve_struct *);
+
+	int (*route_init)(struct ve_struct *);
+	void (*route_fini)(struct ve_struct *);
+
+	int (*ifdown)(struct net_device *, int);
+	void (*frag_cleanup)(struct ve_struct *);
+	int (*addr_add)(int, struct in6_addr *, int, __u32, __u32);
+	int (*sock_mc_join)(struct sock *, int, struct in6_addr *);
+	struct request_sock * (*reqsk_alloc)(void);
+	void (*reqsk_queue)(struct sock *, struct request_sock *, const unsigned long);
+	void (*make_sk_mapped)(struct sock *);
+};
+
+extern struct ve_ipv6_ops *ve_ipv6_ops;
+
+static inline void ve_ipv6_ops_init(struct ve_ipv6_ops *ops)
+{
+	wmb();
+	BUG_ON(ve_ipv6_ops != NULL);
+	ve_ipv6_ops = ops;
+}
+
+static inline struct ve_ipv6_ops *ve_ipv6_ops_get(void)
+{
+	struct ve_ipv6_ops *ret;
+
+	ret = ve_ipv6_ops;
+	rmb();
+	return ret;
+}
+
+typedef int ve_hook_init_fn(void *data);
+typedef void ve_hook_fini_fn(void *data);
+
+struct ve_hook
+{
+	ve_hook_init_fn *init;
+	ve_hook_fini_fn *fini;
+	struct module *owner;
+
+	/* Functions are called in ascending priority */
+	int priority;
+
+	/* Private part */
+	struct list_head list;
+};
+
+enum {
+	HOOK_PRIO_DEFAULT = 0,
+
+	HOOK_PRIO_FS = HOOK_PRIO_DEFAULT,
+
+	HOOK_PRIO_NET_PRE,
+	HOOK_PRIO_NET,
+	HOOK_PRIO_NET_POST,
+	HOOK_PRIO_NET_ACCT = 100,
+	HOOK_PRIO_NET_ACCT_V6,
+
+	HOOK_PRIO_AFTERALL = INT_MAX
+};
+
+extern int ve_hook_iterate_init(int chain, void *data);
+extern void ve_hook_iterate_fini(int chain, void *data);
+
+extern void ve_hook_register(int chain, struct ve_hook *vh);
+extern void ve_hook_unregister(struct ve_hook *vh);
+#else /* CONFIG_VE */
+#define ve_hook_register(ch, vh)	do { } while (0)
+#define ve_hook_unregister(ve)		do { } while (0)
+
+#define get_device_perms_ve(t, d, a)	(0)
+#endif /* CONFIG_VE */
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/ve_task.h kernel-2.6.18-417.el5-028stab121/include/linux/ve_task.h
--- kernel-2.6.18-417.el5.orig/include/linux/ve_task.h	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/ve_task.h	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,69 @@
+/*
+ *  include/linux/ve_task.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_TASK_H__
+#define __VE_TASK_H__
+
+#include <linux/seqlock.h>
+#include <asm/timex.h>
+
+struct ve_task_info {
+/* virtualization */
+	struct ve_struct *owner_env;
+	struct ve_struct *exec_env;
+	struct ve_struct *saved_env;
+	struct list_head vetask_list;
+	struct dentry *glob_proc_dentry;
+/* statistics: scheduling latency */
+	cycles_t sleep_time;
+	cycles_t sched_time;
+	cycles_t sleep_stamp;
+	cycles_t wakeup_stamp;
+	seqcount_t wakeup_lock;
+	struct list_head aux_list;
+};
+
+#define VE_TASK_INFO(task)	(&(task)->ve_task_info)
+#define VE_TASK_LIST_2_TASK(lh)	\
+	list_entry(lh, struct task_struct, ve_task_info.vetask_list)
+
+#ifdef CONFIG_VE
+extern struct ve_struct ve0;
+#define get_ve0()	(&ve0)
+
+#define ve_save_context(t)	do {				\
+		t->ve_task_info.saved_env = 			\
+				t->ve_task_info.exec_env;	\
+		t->ve_task_info.exec_env = get_ve0();		\
+	} while (0)
+#define ve_restore_context(t)	do {				\
+		t->ve_task_info.exec_env = 			\
+				t->ve_task_info.saved_env;	\
+	} while (0)
+
+#define get_exec_env()	(current->ve_task_info.exec_env)
+#define set_exec_env(ve)	({		\
+		struct ve_task_info *vi;	\
+		struct ve_struct *__lold;	\
+						\
+		vi = &current->ve_task_info;	\
+		__lold = vi->exec_env;		\
+		vi->exec_env = ve;		\
+		__lold;				\
+	})
+#else
+#define get_ve0()		(NULL)
+#define get_exec_env()		(NULL)
+#define set_exec_env(new_env)	(NULL)
+#define ve_save_context(t)	do { } while (0)
+#define ve_restore_context(t)	do { } while (0)
+#endif
+
+#endif /* __VE_TASK_H__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/virtinfo.h kernel-2.6.18-417.el5-028stab121/include/linux/virtinfo.h
--- kernel-2.6.18-417.el5.orig/include/linux/virtinfo.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/virtinfo.h	2017-01-13 08:40:18.000000000 -0500
@@ -0,0 +1,107 @@
+/*
+ *  include/linux/virtinfo.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __LINUX_VIRTINFO_H
+#define __LINUX_VIRTINFO_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/page-flags.h>
+#include <linux/notifier.h>
+
+struct vnotifier_block
+{
+	int (*notifier_call)(struct vnotifier_block *self,
+			unsigned long, void *, int);
+	struct vnotifier_block *next;
+	int priority;
+};
+
+extern struct semaphore virtinfo_sem;
+void __virtinfo_notifier_register(int type, struct vnotifier_block *nb);
+void virtinfo_notifier_register(int type, struct vnotifier_block *nb);
+void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb);
+int virtinfo_notifier_call(int type, unsigned long n, void *data);
+
+struct page_info {
+	unsigned long nr_file_dirty;
+	unsigned long nr_writeback;
+	unsigned long nr_anon_pages;
+	unsigned long nr_file_mapped;
+	unsigned long nr_slab;
+	unsigned long nr_pagetable;
+	unsigned long nr_unstable_nfs;
+	unsigned long nr_bounce;
+};
+
+struct meminfo {
+	struct sysinfo si;
+	struct page_info pi;
+	struct user_beancounter *ub;
+	unsigned long active, inactive;
+	unsigned long cache, swapcache;
+	unsigned long committed_space;
+	unsigned long allowed;
+	unsigned long vmalloc_total, vmalloc_used, vmalloc_largest;
+};
+
+#define VIRTINFO_MEMINFO	0
+#define VIRTINFO_ENOUGHMEM	1
+#define VIRTINFO_DOFORK         2
+#define VIRTINFO_DOEXIT         3
+#define VIRTINFO_DOEXECVE       4
+#define VIRTINFO_DOFORKRET      5
+#define VIRTINFO_DOFORKPOST     6
+#define VIRTINFO_EXIT           7
+#define VIRTINFO_EXITMMAP       8
+#define VIRTINFO_EXECMMAP       9
+#define VIRTINFO_OUTOFMEM       10
+#define VIRTINFO_PAGEIN         11
+#define VIRTINFO_SYSINFO        12
+#define VIRTINFO_NEWUBC         13
+#define VIRTINFO_VMSTAT		14
+#define VIRTINFO_OOMKILL	15
+
+#define VIRTINFO_IO_ACCOUNT	0
+#define VIRTINFO_IO_PREPARE	1
+#define VIRTINFO_IO_JOURNAL	2
+#define VIRTINFO_IO_READAHEAD	3
+
+enum virt_info_types {
+	VITYPE_GENERAL,
+	VITYPE_FAUDIT,
+	VITYPE_QUOTA,
+	VITYPE_SCP,
+	VITYPE_IO,
+
+	VIRT_TYPES
+};
+
+#ifdef CONFIG_VZ_GENCALLS
+
+static inline int virtinfo_gencall(unsigned long n, void *data)
+{
+	int r;
+
+	r = virtinfo_notifier_call(VITYPE_GENERAL, n, data);
+	if (r & NOTIFY_FAIL)
+		return -ENOBUFS;
+	if (r & NOTIFY_OK)
+		return -ERESTARTNOINTR;
+	return 0;
+}
+
+#else
+
+#define virtinfo_gencall(n, data)	0
+
+#endif
+
+#endif /* __LINUX_VIRTINFO_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/virtinfoscp.h kernel-2.6.18-417.el5-028stab121/include/linux/virtinfoscp.h
--- kernel-2.6.18-417.el5.orig/include/linux/virtinfoscp.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/virtinfoscp.h	2017-01-13 08:40:16.000000000 -0500
@@ -0,0 +1,23 @@
+#ifndef __VIRTINFO_SCP_H__
+#define __VIRTINFO_SCP_H__
+
+/*
+ * Dump and restore operations are non-symmetric.
+ * With respect to finish/fail hooks, 2 dump hooks are called from
+ * different proc operations, but restore hooks are called from a single one.
+ */
+#define VIRTINFO_SCP_COLLECT    0x10
+#define VIRTINFO_SCP_DUMP       0x11
+#define VIRTINFO_SCP_DMPFIN     0x12
+#define VIRTINFO_SCP_RSTCHECK   0x13
+#define VIRTINFO_SCP_RESTORE    0x14
+#define VIRTINFO_SCP_RSTFAIL    0x15
+
+#define VIRTINFO_SCP_RSTTSK     0x20
+#define VIRTINFO_SCP_RSTMM      0x21
+
+#define VIRTINFO_SCP_TEST	0x30
+
+#define VIRTNOTIFY_CHANGE       0x100 
+
+#endif /* __VIRTINFO_SCP_H__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vmalloc.h kernel-2.6.18-417.el5-028stab121/include/linux/vmalloc.h
--- kernel-2.6.18-417.el5.orig/include/linux/vmalloc.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vmalloc.h	2017-01-13 08:40:19.000000000 -0500
@@ -22,6 +22,10 @@ struct vm_area_struct;
 #define IOREMAP_MAX_ORDER	(7 + PAGE_SHIFT)	/* 128 pages */
 #endif
 
+/* align size to 2^n page boundary */
+#define POWER2_PAGE_ALIGN(size) \
+	((typeof(size))(1UL << (PAGE_SHIFT + get_order(size))))
+
 struct vm_struct {
 	void			*addr;
 	unsigned long		size;
@@ -36,14 +40,18 @@ struct vm_struct {
  *	Highlevel APIs for driver use
  */
 extern void *vmalloc(unsigned long size);
+extern void *ub_vmalloc(unsigned long size);
 extern void *vzalloc(unsigned long size);
 extern void *vmalloc_user(unsigned long size);
 extern void *vmalloc_node(unsigned long size, int node);
+extern void *ub_vmalloc_node(unsigned long size, int node);
 extern void *vzalloc_node(unsigned long size, int node);
 extern void *vmalloc_exec(unsigned long size);
 extern void *vmalloc_32(unsigned long size);
 extern void *vmalloc_32_user(unsigned long size);
 extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
+extern void *vmalloc_best(unsigned long size);
+extern void *ub_vmalloc_best(unsigned long size);
 extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
 				pgprot_t prot);
 extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
@@ -70,6 +78,9 @@ static inline size_t get_vm_area_size(co
 extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
 extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 					unsigned long start, unsigned long end);
+extern struct vm_struct * get_vm_area_best(unsigned long size,
+					   unsigned long flags);
+extern void vprintstat(void);
 extern struct vm_struct *get_vm_area_node(unsigned long size,
 					unsigned long flags, int node);
 extern struct vm_struct *remove_vm_area(void *addr);
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vmstat.h kernel-2.6.18-417.el5-028stab121/include/linux/vmstat.h
--- kernel-2.6.18-417.el5.orig/include/linux/vmstat.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vmstat.h	2017-01-13 08:40:19.000000000 -0500
@@ -60,6 +60,7 @@ static inline void count_vm_events(enum 
 	put_cpu();
 }
 
+extern unsigned long vm_events(enum vm_event_item i);
 extern void all_vm_events(unsigned long *);
 extern void vm_events_fold_cpu(int cpu);
 
@@ -169,6 +170,9 @@ void inc_zone_page_state(struct page *, 
 void dec_zone_page_state(struct page *, enum zone_stat_item);
 
 extern void inc_zone_state(struct zone *, enum zone_stat_item);
+extern void __inc_zone_state(struct zone *, enum zone_stat_item);
+extern void dec_zone_state(struct zone *, enum zone_stat_item);
+extern void __dec_zone_state(struct zone *, enum zone_stat_item);
 
 void refresh_cpu_vm_stats(int);
 void refresh_vm_stats(void);
@@ -197,6 +201,12 @@ static inline void __inc_zone_page_state
 	__inc_zone_state(page_zone(page), item);
 }
 
+static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
+{
+	atomic_long_dec(&zone->vm_stat[item]);
+	atomic_long_dec(&vm_stat[item]);
+}
+
 static inline void __dec_zone_page_state(struct page *page,
 			enum zone_stat_item item)
 {
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vsched.h kernel-2.6.18-417.el5-028stab121/include/linux/vsched.h
--- kernel-2.6.18-417.el5.orig/include/linux/vsched.h	2017-01-13 08:40:28.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vsched.h	2017-01-13 08:40:28.000000000 -0500
@@ -0,0 +1,53 @@
+/*
+ *  include/linux/vsched.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VSCHED_H__
+#define __VSCHED_H__
+
+#include <linux/config.h>
+#include <linux/cache.h>
+#include <linux/fairsched.h>
+#include <linux/sched.h>
+
+#define task_vsched(tsk)	((tsk)->vsched)
+#define this_vsched()		(task_vsched(current))
+
+/* VCPU scheduler state description */
+struct vcpu_struct;
+struct vcpu_scheduler {
+	struct list_head idle_list;
+	struct list_head active_list;
+	struct list_head running_list;
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node;
+#endif
+	struct list_head list;
+	struct vcpu_struct *vcpu[NR_CPUS];
+	int id;
+	cpumask_t vcpu_online_map, vcpu_running_map;
+	cpumask_t pcpu_running_map;
+	cpumask_t pcpu_allowed_map;
+	int num_online_vcpus;
+	atomic_t nr_unint_fixup; /* nr_uninterruptible stat is added here
+				      on vcpu death */
+} ____cacheline_internodealigned_in_smp;
+
+extern struct vcpu_scheduler default_vsched, idle_vsched;
+
+extern int vsched_create(int id, struct fairsched_node *node);
+extern int vsched_destroy(struct vcpu_scheduler *vsched);
+extern int vsched_taskcount(struct vcpu_scheduler *vsched);
+
+extern int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched);
+extern int vsched_set_vcpus(struct vcpu_scheduler *vsched, unsigned int vcpus);
+
+unsigned long ve_scale_khz(unsigned long khz);
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzcalluser.h kernel-2.6.18-417.el5-028stab121/include/linux/vzcalluser.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzcalluser.h	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzcalluser.h	2017-01-13 08:40:24.000000000 -0500
@@ -0,0 +1,257 @@
+/*
+ *  include/linux/vzcalluser.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VZCALLUSER_H
+#define _LINUX_VZCALLUSER_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define KERN_VZ_PRIV_RANGE 51
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+#ifndef __KERNEL__
+#define __user
+#endif
+
+/*
+ * VE management ioctls
+ */
+
+struct vzctl_old_env_create {
+	envid_t veid;
+	unsigned flags;
+#define VE_CREATE 	1	/* Create VE, VE_ENTER added automatically */
+#define VE_EXCLUSIVE	2	/* Fail if exists */
+#define VE_ENTER	4	/* Enter existing VE */
+#define VE_TEST		8	/* Test if VE exists */
+#define VE_LOCK		16	/* Do not allow entering created VE */
+#define VE_SKIPLOCK	32	/* Allow entering embrion VE */
+	__u32 addr;
+};
+
+struct vzctl_mark_env_to_down {
+	envid_t veid;
+};
+
+struct vzctl_setdevperms {
+	envid_t veid;
+	unsigned type;
+#define VE_USE_MAJOR	010	/* Test MAJOR supplied in rule */
+#define VE_USE_MINOR	030	/* Test MINOR supplied in rule */
+#define VE_USE_MASK	030	/* Testing mask, VE_USE_MAJOR|VE_USE_MINOR */
+	unsigned dev;
+	unsigned mask;
+};
+
+struct vzctl_ve_netdev {
+	envid_t veid;
+	int op;
+#define VE_NETDEV_ADD  1
+#define VE_NETDEV_DEL  2
+	char __user *dev_name;
+};
+
+struct vzctl_ve_meminfo {
+	envid_t veid;
+	unsigned long val;
+};
+
+struct vzctl_ve_configure {
+	unsigned int veid;
+	unsigned int key;
+#define VE_CONFIGURE_OS_RELEASE		2
+	unsigned int val;
+	unsigned int size;
+	char data[0];
+};
+
+/* these masks represent modules */
+#define VE_IP_IPTABLES_MOD		(1U<<0)
+#define VE_IP_FILTER_MOD		(1U<<1)
+#define VE_IP_MANGLE_MOD		(1U<<2)
+#define VE_IP_CONNTRACK_MOD		(1U<<14)
+#define VE_IP_CONNTRACK_FTP_MOD		(1U<<15)
+#define VE_IP_CONNTRACK_IRC_MOD		(1U<<16)
+#define VE_IP_NAT_MOD			(1U<<20)
+#define VE_IP_NAT_FTP_MOD		(1U<<21)
+#define VE_IP_NAT_IRC_MOD		(1U<<22)
+#define VE_IP_IPTABLES6_MOD		(1U<<26)
+#define VE_IP_FILTER6_MOD		(1U<<27)
+#define VE_IP_MANGLE6_MOD		(1U<<28)
+#define VE_IP_IPTABLE_NAT_MOD		(1U<<29)
+
+/* these masks represent modules with their dependences */
+#define VE_IP_IPTABLES		(VE_IP_IPTABLES_MOD)
+#define VE_IP_FILTER		(VE_IP_FILTER_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MANGLE		(VE_IP_MANGLE_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_IPTABLES6		(VE_IP_IPTABLES6_MOD)
+#define VE_IP_FILTER6		(VE_IP_FILTER6_MOD | VE_IP_IPTABLES6)
+#define VE_IP_MANGLE6		(VE_IP_MANGLE6_MOD | VE_IP_IPTABLES6)
+#define VE_IP_CONNTRACK		(VE_IP_CONNTRACK_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_CONNTRACK_FTP	(VE_IP_CONNTRACK_FTP_MOD	\
+					| VE_IP_CONNTRACK)
+#define VE_IP_CONNTRACK_IRC	(VE_IP_CONNTRACK_IRC_MOD	\
+					| VE_IP_CONNTRACK)
+#define VE_IP_NAT		(VE_IP_NAT_MOD			\
+					| VE_IP_CONNTRACK)
+#define VE_IP_NAT_FTP		(VE_IP_NAT_FTP_MOD		\
+					| VE_IP_NAT | VE_IP_CONNTRACK_FTP)
+#define VE_IP_NAT_IRC		(VE_IP_NAT_IRC_MOD		\
+					| VE_IP_NAT | VE_IP_CONNTRACK_IRC)
+#define VE_IP_IPTABLE_NAT	(VE_IP_IPTABLE_NAT_MOD | VE_IP_CONNTRACK)
+
+/* safe iptables mask to be used by default */
+#define VE_IP_DEFAULT					\
+	(VE_IP_IPTABLES |				\
+	VE_IP_FILTER | VE_IP_MANGLE)
+
+#define VE_IPT_CMP(x,y)		(((x) & (y)) == (y))
+
+struct vzctl_env_create_cid {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+};
+
+struct vzctl_env_create {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+};
+
+struct env_create_param {
+	__u64 iptables_mask;
+};
+
+#define VZCTL_ENV_CREATE_DATA_MINLEN	sizeof(struct env_create_param)
+
+struct env_create_param2 {
+	__u64 iptables_mask;
+	__u64 feature_mask;
+	__u32 total_vcpus;	/* 0 - don't care, same as in host */
+};
+
+struct env_create_param3 {
+	__u64 iptables_mask;
+	__u64 feature_mask;
+	__u32 total_vcpus;
+	__u32 pad;
+	__u64 known_features;
+};
+
+#define VE_FEATURE_SYSFS	(1ULL << 0)
+#define VE_FEATURE_NFS		(1ULL << 1)
+#define VE_FEATURE_DEF_PERMS	(1ULL << 2)
+#define VE_FEATURE_SIT		(1ULL << 3)
+#define VE_FEATURE_IPIP		(1ULL << 4)
+#define VE_FEATURE_PPP		(1ULL << 5)
+#define VE_FEATURE_IPGRE	(1ULL << 6)
+#define VE_FEATURE_BRIDGE	(1ULL << 7)
+#define VE_FEATURE_NFSD		(1ULL << 8)
+
+#define VE_FEATURES_OLD		(VE_FEATURE_SYSFS)
+#define VE_FEATURES_DEF		(VE_FEATURE_SYSFS | \
+				 VE_FEATURE_DEF_PERMS)
+
+typedef struct env_create_param3 env_create_param_t;
+#define VZCTL_ENV_CREATE_DATA_MAXLEN	sizeof(env_create_param_t)
+
+struct vzctl_env_create_data {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+	env_create_param_t __user *data;
+	int datalen;
+};
+
+struct vz_load_avg {
+	int val_int;
+	int val_frac;
+};
+
+struct vz_cpu_stat {
+	unsigned long user_jif;
+	unsigned long nice_jif;
+	unsigned long system_jif; 
+	unsigned long uptime_jif;
+	__u64 idle_clk;
+	__u64 strv_clk;
+	__u64 uptime_clk;
+	struct vz_load_avg avenrun[3];	/* loadavg data */
+};
+
+struct vzctl_cpustatctl {
+	envid_t veid;
+	struct vz_cpu_stat __user *cpustat;
+};
+
+#define VZCTLTYPE '.'
+#define VZCTL_OLD_ENV_CREATE	_IOW(VZCTLTYPE, 0,			\
+					struct vzctl_old_env_create)
+#define VZCTL_MARK_ENV_TO_DOWN	_IOW(VZCTLTYPE, 1,			\
+					struct vzctl_mark_env_to_down)
+#define VZCTL_SETDEVPERMS	_IOW(VZCTLTYPE, 2,			\
+					struct vzctl_setdevperms)
+#define VZCTL_ENV_CREATE_CID	_IOW(VZCTLTYPE, 4,			\
+					struct vzctl_env_create_cid)
+#define VZCTL_ENV_CREATE	_IOW(VZCTLTYPE, 5,			\
+					struct vzctl_env_create)
+#define VZCTL_GET_CPU_STAT	_IOW(VZCTLTYPE, 6,			\
+					struct vzctl_cpustatctl)
+#define VZCTL_ENV_CREATE_DATA	_IOW(VZCTLTYPE, 10,			\
+					struct vzctl_env_create_data)
+#define VZCTL_VE_NETDEV		_IOW(VZCTLTYPE, 11,			\
+					struct vzctl_ve_netdev)
+#define VZCTL_VE_MEMINFO	_IOW(VZCTLTYPE, 13,                     \
+					struct vzctl_ve_meminfo)
+#define VZCTL_VE_CONFIGURE	_IOW(VZCTLTYPE, 15,			\
+					struct vzctl_ve_configure)
+
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+struct compat_vzctl_ve_netdev {
+	envid_t veid;
+	int op;
+	compat_uptr_t dev_name;
+};
+
+struct compat_vzctl_ve_meminfo {
+	envid_t veid;
+	compat_ulong_t val;
+};
+
+struct compat_vzctl_env_create_data {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+	compat_uptr_t data;
+	int datalen;
+};
+
+#define VZCTL_COMPAT_ENV_CREATE_DATA _IOW(VZCTLTYPE, 10,		\
+					struct compat_vzctl_env_create_data)
+#define VZCTL_COMPAT_VE_NETDEV	_IOW(VZCTLTYPE, 11,			\
+					struct compat_vzctl_ve_netdev)
+#define VZCTL_COMPAT_VE_MEMINFO	_IOW(VZCTLTYPE, 13,                     \
+					struct compat_vzctl_ve_meminfo)
+#endif
+#endif
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzctl.h kernel-2.6.18-417.el5-028stab121/include/linux/vzctl.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzctl.h	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzctl.h	2017-01-13 08:40:20.000000000 -0500
@@ -0,0 +1,30 @@
+/*
+ *  include/linux/vzctl.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VZCTL_H
+#define _LINUX_VZCTL_H
+
+#include <linux/list.h>
+
+struct module;
+struct inode;
+struct file;
+struct vzioctlinfo {
+	unsigned type;
+	int (*ioctl)(struct file *, unsigned int, unsigned long);
+	int (*compat_ioctl)(struct file *, unsigned int, unsigned long);
+	struct module *owner;
+	struct list_head list;
+};
+
+extern void vzioctl_register(struct vzioctlinfo *inf);
+extern void vzioctl_unregister(struct vzioctlinfo *inf);
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzctl_quota.h kernel-2.6.18-417.el5-028stab121/include/linux/vzctl_quota.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzctl_quota.h	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzctl_quota.h	2017-01-13 08:40:24.000000000 -0500
@@ -0,0 +1,74 @@
+/*
+ *  include/linux/vzctl_quota.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __LINUX_VZCTL_QUOTA_H__
+#define __LINUX_VZCTL_QUOTA_H__
+
+#include <linux/compat.h>
+
+#ifndef __KERNEL__
+#define __user
+#endif
+
+/*
+ * Quota management ioctl
+ */
+
+struct vz_quota_stat;
+struct vzctl_quotactl {
+	int cmd;
+	unsigned int quota_id;
+	struct vz_quota_stat __user *qstat;
+	char __user *ve_root;
+};
+
+struct vzctl_quotaugidctl {
+	int cmd;		/* subcommand */
+	unsigned int quota_id;	/* quota id where it applies to */
+	unsigned int ugid_index;/* for reading statistic. index of first
+				    uid/gid record to read */
+	unsigned int ugid_size;	/* size of ugid_buf array */
+	void *addr; 		/* user-level buffer */
+};
+
+#define VZDQCTLTYPE '+'
+#define VZCTL_QUOTA_DEPR_CTL	_IOWR(VZDQCTLTYPE, 1,			\
+					struct vzctl_quotactl)
+#define VZCTL_QUOTA_NEW_CTL	_IOWR(VZDQCTLTYPE, 2,			\
+					struct vzctl_quotactl)
+#define VZCTL_QUOTA_UGID_CTL	_IOWR(VZDQCTLTYPE, 3,			\
+					struct vzctl_quotaugidctl)
+
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+struct compat_vzctl_quotactl {
+	int cmd;
+	unsigned int quota_id;
+	compat_uptr_t qstat;
+	compat_uptr_t ve_root;
+};
+
+struct compat_vzctl_quotaugidctl {
+	int cmd;		/* subcommand */
+	unsigned int quota_id;	/* quota id where it applies to */
+	unsigned int ugid_index;/* for reading statistic. index of first
+				    uid/gid record to read */
+	unsigned int ugid_size;	/* size of ugid_buf array */
+	compat_uptr_t addr; 	/* user-level buffer */
+};
+
+#define VZCTL_COMPAT_QUOTA_CTL	_IOWR(VZDQCTLTYPE, 2,			\
+					struct compat_vzctl_quotactl)
+#define VZCTL_COMPAT_QUOTA_UGID_CTL _IOWR(VZDQCTLTYPE, 3,		\
+					struct compat_vzctl_quotaugidctl)
+#endif
+#endif
+
+#endif /* __LINUX_VZCTL_QUOTA_H__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzctl_venet.h kernel-2.6.18-417.el5-028stab121/include/linux/vzctl_venet.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzctl_venet.h	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzctl_venet.h	2017-01-13 08:40:22.000000000 -0500
@@ -0,0 +1,53 @@
+/*
+ *  include/linux/vzctl_venet.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZCTL_VENET_H
+#define _VZCTL_VENET_H
+
+#include <linux/types.h>
+#include <linux/compat.h>
+#include <linux/ioctl.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+struct vzctl_ve_ip_map {
+	envid_t veid;
+	int op;
+#define VE_IP_ADD	1
+#define VE_IP_DEL	2
+#define VE_IP_EXT_ADD	3
+#define VE_IP_EXT_DEL	4
+	struct sockaddr *addr;
+	int addrlen;
+};
+
+#define VENETCTLTYPE '('
+
+#define VENETCTL_VE_IP_MAP	_IOW(VENETCTLTYPE, 3,			\
+					struct vzctl_ve_ip_map)
+
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+struct compat_vzctl_ve_ip_map {
+	envid_t veid;
+	int op;
+	compat_uptr_t addr;
+	int addrlen;
+};
+
+#define VENETCTL_COMPAT_VE_IP_MAP _IOW(VENETCTLTYPE, 3,			\
+					struct compat_vzctl_ve_ip_map)
+#endif
+#endif
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzctl_veth.h kernel-2.6.18-417.el5-028stab121/include/linux/vzctl_veth.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzctl_veth.h	2017-01-13 08:40:21.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzctl_veth.h	2017-01-13 08:40:22.000000000 -0500
@@ -0,0 +1,42 @@
+/*
+ *  include/linux/vzctl_veth.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZCTL_VETH_H
+#define _VZCTL_VETH_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+struct vzctl_ve_hwaddr {
+	envid_t veid;
+	int op;
+#define VE_ETH_ADD			1
+#define VE_ETH_DEL			2
+#define VE_ETH_ALLOW_MAC_CHANGE		3
+#define VE_ETH_DENY_MAC_CHANGE		4
+	unsigned char	dev_addr[6];
+	int addrlen;
+	char		dev_name[16];
+	unsigned char	dev_addr_ve[6];
+	int addrlen_ve;
+	char		dev_name_ve[16];
+};
+
+#define VETHCTLTYPE '['
+
+#define VETHCTL_VE_HWADDR	_IOW(VETHCTLTYPE, 3,			\
+					struct vzctl_ve_hwaddr)
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzdq_tree.h kernel-2.6.18-417.el5-028stab121/include/linux/vzdq_tree.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzdq_tree.h	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzdq_tree.h	2017-01-13 08:40:24.000000000 -0500
@@ -0,0 +1,99 @@
+/*
+ *
+ * Copyright (C) 2005 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo disk quota tree definition
+ */
+
+#ifndef _VZDQ_TREE_H
+#define _VZDQ_TREE_H
+
+#include <linux/list.h>
+#include <asm/string.h>
+
+typedef unsigned int quotaid_t;
+#define QUOTAID_BITS		32
+#define QUOTAID_BBITS		4
+#define QUOTAID_EBITS		8
+
+#if QUOTAID_EBITS % QUOTAID_BBITS
+#error Quota bit assumption failure
+#endif
+
+#define QUOTATREE_BSIZE		(1 << QUOTAID_BBITS)
+#define QUOTATREE_BMASK		(QUOTATREE_BSIZE - 1)
+#define QUOTATREE_DEPTH		((QUOTAID_BITS + QUOTAID_BBITS - 1) \
+							/ QUOTAID_BBITS)
+#define QUOTATREE_EDEPTH	((QUOTAID_BITS + QUOTAID_EBITS - 1) \
+							/ QUOTAID_EBITS)
+#define QUOTATREE_BSHIFT(lvl)	((QUOTATREE_DEPTH - (lvl) - 1) * QUOTAID_BBITS)
+
+/*
+ * Depth of keeping unused node (not inclusive).
+ * 0 means release all nodes including root,
+ * QUOTATREE_DEPTH means never release nodes.
+ * Current value: release all nodes strictly after QUOTATREE_EDEPTH 
+ * (measured in external shift units).
+ */
+#define QUOTATREE_CDEPTH	(QUOTATREE_DEPTH \
+				- 2 * QUOTATREE_DEPTH / QUOTATREE_EDEPTH \
+				+ 1)
+
+/*
+ * Levels 0..(QUOTATREE_DEPTH-1) are tree nodes.
+ * On level i the maximal number of nodes is 2^(i*QUOTAID_BBITS),
+ * and each node contains 2^QUOTAID_BBITS pointers.
+ * Level 0 is a (single) tree root node.
+ *
+ * Nodes of level (QUOTATREE_DEPTH-1) contain pointers to caller's data.
+ * Nodes of lower levels contain pointers to nodes.
+ *
+ * Double pointer in array of i-level node, pointing to a (i+1)-level node
+ * (such as inside quotatree_find_state) are marked by level (i+1), not i.
+ * Level 0 double pointer is a pointer to root inside tree struct.
+ *
+ * The tree is permanent, i.e. all index blocks allocated are keeped alive to
+ * preserve the blocks numbers in the quota file tree to keep its changes
+ * locally.
+ */
+struct quotatree_node {
+	struct list_head list;
+	quotaid_t num;
+	void *blocks[QUOTATREE_BSIZE];
+};
+
+struct quotatree_level {
+	struct list_head usedlh, freelh;
+	quotaid_t freenum;
+};
+
+struct quotatree_tree {
+	struct quotatree_level levels[QUOTATREE_DEPTH];
+	struct quotatree_node *root;
+	unsigned int leaf_num;
+};
+
+struct quotatree_find_state {
+	void **block;
+	int level;
+};
+
+/* number of leafs (objects) and leaf level of the tree */
+#define QTREE_LEAFNUM(tree)	((tree)->leaf_num)
+#define QTREE_LEAFLVL(tree)	(&(tree)->levels[QUOTATREE_DEPTH - 1])
+
+struct quotatree_tree *quotatree_alloc(void);
+void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st);
+int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st, void *data);
+void quotatree_remove(struct quotatree_tree *tree, quotaid_t id);
+void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *));
+void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id);
+void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index);
+
+#endif /* _VZDQ_TREE_H */
+
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzevent.h kernel-2.6.18-417.el5-028stab121/include/linux/vzevent.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzevent.h	2017-01-13 08:40:20.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzevent.h	2017-01-13 08:40:20.000000000 -0500
@@ -0,0 +1,13 @@
+#ifndef __LINUX_VZ_EVENT_H__
+#define __LINUX_VZ_EVENT_H__
+
+#if defined(CONFIG_VZ_EVENT) || defined(CONFIG_VZ_EVENT_MODULE)
+extern int vzevent_send(int msg, const char *attrs_fmt, ...);
+#else
+static inline int vzevent_send(int msg, const char *attrs_fmt, ...)
+{
+	return 0;
+}
+#endif
+
+#endif /* __LINUX_VZ_EVENT_H__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzquota.h kernel-2.6.18-417.el5-028stab121/include/linux/vzquota.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzquota.h	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzquota.h	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,454 @@
+/*
+ *
+ * Copyright (C) 2001-2005 SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo disk quota implementation
+ */
+
+#ifndef _VZDQUOTA_H
+#define _VZDQUOTA_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/* vzquotactl syscall commands */
+#define VZ_DQ_CREATE		5 /* create quota master block */
+#define VZ_DQ_DESTROY		6 /* destroy qmblk */
+#define VZ_DQ_ON		7 /* mark dentry with already created qmblk */
+#define VZ_DQ_OFF		8 /* remove mark, don't destroy qmblk */
+#define VZ_DQ_SETLIMIT		9 /* set new limits */
+#define VZ_DQ_GETSTAT		10 /* get usage statistic */
+#define VZ_DQ_OFF_FORCED	11 /* forced off */
+/* set of syscalls to maintain UGID quotas */
+#define VZ_DQ_UGID_GETSTAT	1 /* get usage/limits for ugid(s) */
+#define VZ_DQ_UGID_ADDSTAT	2 /* set usage/limits statistic for ugid(s) */
+#define VZ_DQ_UGID_GETGRACE	3 /* get expire times */
+#define VZ_DQ_UGID_SETGRACE	4 /* set expire times */
+#define VZ_DQ_UGID_GETCONFIG	5 /* get ugid_max limit, cnt, flags of qmblk */
+#define VZ_DQ_UGID_SETCONFIG	6 /* set ugid_max limit, flags of qmblk */
+#define VZ_DQ_UGID_SETLIMIT	7 /* set ugid B/I limits */
+#define VZ_DQ_UGID_SETINFO	8 /* set ugid info */
+
+/* common structure for vz and ugid quota */
+struct dq_stat {
+	/* blocks limits */
+	__u64	bhardlimit;	/* absolute limit in bytes */
+	__u64	bsoftlimit;	/* preferred limit in bytes */
+	time_t	btime;		/* time limit for excessive disk use */
+	__u64	bcurrent;	/* current bytes count */
+	/* inodes limits */
+	__u32	ihardlimit;	/* absolute limit on allocated inodes */
+	__u32	isoftlimit;	/* preferred inode limit */
+	time_t	itime;		/* time limit for excessive inode use */
+	__u32	icurrent;	/* current # allocated inodes */
+};
+
+/* One second resolution for grace times */
+#define CURRENT_TIME_SECONDS	(get_seconds())
+
+/* Values for dq_info->flags */
+#define VZ_QUOTA_INODES 0x01       /* inodes limit warning printed */
+#define VZ_QUOTA_SPACE  0x02       /* space limit warning printed */
+
+struct dq_info {
+	time_t		bexpire;   /* expire timeout for excessive disk use */
+	time_t		iexpire;   /* expire timeout for excessive inode use */
+	unsigned	flags;	   /* see previos defines */
+};
+
+struct vz_quota_stat  {
+	struct dq_stat dq_stat;
+	struct dq_info dq_info;
+};
+
+/* UID/GID interface record - for user-kernel level exchange */
+struct vz_quota_iface {
+	unsigned int	qi_id;	   /* UID/GID this applies to */
+	unsigned int	qi_type;   /* USRQUOTA|GRPQUOTA */
+	struct dq_stat	qi_stat;   /* limits, options, usage stats */
+};
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+struct compat_dq_stat {
+	/* blocks limits */
+	__u64	bhardlimit;	/* absolute limit in bytes */
+	__u64	bsoftlimit;	/* preferred limit in bytes */
+	compat_time_t btime;	/* time limit for excessive disk use */
+	__u64	bcurrent;	/* current bytes count */
+	/* inodes limits */
+	__u32	ihardlimit;	/* absolute limit on allocated inodes */
+	__u32	isoftlimit;	/* preferred inode limit */
+	compat_time_t itime;	/* time limit for excessive inode use */
+	__u32	icurrent;	/* current # allocated inodes */
+};
+
+struct compat_dq_info {
+	compat_time_t	bexpire;   /* expire timeout for excessive disk use */
+	compat_time_t	iexpire;   /* expire timeout for excessive inode use */
+	unsigned	flags;	   /* see previos defines */
+};
+
+struct compat_vz_quota_stat  {
+	struct compat_dq_stat dq_stat;
+	struct compat_dq_info dq_info;
+};
+
+struct compat_vz_quota_iface {
+	unsigned int	qi_id;	   /* UID/GID this applies to */
+	unsigned int	qi_type;   /* USRQUOTA|GRPQUOTA */
+	struct compat_dq_stat qi_stat;   /* limits, options, usage stats */
+};
+
+static inline void compat_dqstat2dqstat(struct compat_dq_stat *odqs,
+				struct dq_stat *dqs)
+{
+	dqs->bhardlimit = odqs->bhardlimit;
+	dqs->bsoftlimit = odqs->bsoftlimit;
+	dqs->bcurrent = odqs->bcurrent;
+	dqs->btime = odqs->btime;
+
+	dqs->ihardlimit = odqs->ihardlimit;
+	dqs->isoftlimit = odqs->isoftlimit;
+	dqs->icurrent = odqs->icurrent;
+	dqs->itime = odqs->itime;
+}
+
+static inline void compat_dqinfo2dqinfo(struct compat_dq_info *odqi,
+				struct dq_info *dqi)
+{
+	dqi->bexpire = odqi->bexpire;
+	dqi->iexpire = odqi->iexpire;
+	dqi->flags = odqi->flags;
+}
+
+static inline void dqstat2compat_dqstat(struct dq_stat *dqs,
+				struct compat_dq_stat *odqs)
+{
+	odqs->bhardlimit = dqs->bhardlimit;
+	odqs->bsoftlimit = dqs->bsoftlimit;
+	odqs->bcurrent = dqs->bcurrent;
+	odqs->btime = (compat_time_t)dqs->btime;
+
+	odqs->ihardlimit = dqs->ihardlimit;
+	odqs->isoftlimit = dqs->isoftlimit;
+	odqs->icurrent = dqs->icurrent;
+	odqs->itime = (compat_time_t)dqs->itime;
+}
+
+static inline void dqinfo2compat_dqinfo(struct dq_info *dqi,
+				struct compat_dq_info *odqi)
+{
+	odqi->bexpire = (compat_time_t)dqi->bexpire;
+	odqi->iexpire = (compat_time_t)dqi->iexpire;
+	odqi->flags = dqi->flags;
+}
+#endif
+
+/* values for flags and dq_flags */
+/* this flag is set if the userspace has been unable to provide usage
+ * information about all ugids
+ * if the flag is set, we don't allocate new UG quota blocks (their
+ * current usage is unknown) or free existing UG quota blocks (not to
+ * lose information that this block is ok) */
+#define VZDQUG_FIXED_SET	0x01
+/* permit to use ugid quota */
+#define VZDQUG_ON		0x02
+#define VZDQ_USRQUOTA		0x10
+#define VZDQ_GRPQUOTA		0x20
+#define VZDQ_NOACT		0x1000	/* not actual */
+#define VZDQ_NOQUOT		0x2000	/* not under quota tree */
+
+struct vz_quota_ugid_stat {
+	unsigned int	limit;	/* max amount of ugid records */
+	unsigned int	count;	/* amount of ugid records */
+	unsigned int	flags;	
+};
+
+struct vz_quota_ugid_setlimit {
+	unsigned int	type;	/* quota type (USR/GRP) */
+	unsigned int	id;	/* ugid */
+	struct if_dqblk dqb;	/* limits info */
+};
+
+struct vz_quota_ugid_setinfo {
+	unsigned int	type;	/* quota type (USR/GRP) */
+	struct if_dqinfo dqi;	/* grace info */
+};
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+#include <linux/time.h>
+#include <linux/vzquota_qlnk.h>
+#include <linux/vzdq_tree.h>
+
+/* Values for dq_info flags */
+#define VZ_QUOTA_INODES	0x01	   /* inodes limit warning printed */
+#define VZ_QUOTA_SPACE	0x02	   /* space limit warning printed */
+
+/* values for dq_state */
+#define VZDQ_STARTING		0 /* created, not turned on yet */
+#define VZDQ_WORKING		1 /* quota created, turned on */
+#define VZDQ_STOPING		2 /* created, turned on and off */
+
+/* master quota record - one per veid */
+struct vz_quota_master {
+	struct list_head	dq_hash;	/* next quota in hash list */
+	atomic_t		dq_count;	/* inode reference count */
+	unsigned int		dq_flags;	/* see VZDQUG_FIXED_SET */
+	unsigned int		dq_state;	/* see values above */
+	unsigned int		dq_id;		/* VEID this applies to */
+	struct dq_stat		dq_stat; 	/* limits, grace, usage stats */
+	struct dq_info		dq_info;	/* grace times and flags */
+	spinlock_t		dq_data_lock;	/* for dq_stat */
+
+	struct mutex		dq_mutex;	/* mutex to protect
+						   ugid tree */
+
+	struct list_head	dq_ilink_list;	/* list of vz_quota_ilink */
+	struct quotatree_tree	*dq_uid_tree;	/* vz_quota_ugid tree for UIDs */
+	struct quotatree_tree	*dq_gid_tree;	/* vz_quota_ugid tree for GIDs */
+	unsigned int		dq_ugid_count;	/* amount of ugid records */
+	unsigned int		dq_ugid_max;	/* max amount of ugid records */
+	struct dq_info		dq_ugid_info[MAXQUOTAS]; /* ugid grace times */
+
+	struct dentry		*dq_root_dentry;/* dentry of fs tree */
+	struct vfsmount		*dq_root_mnt;	/* vfsmnt of this dentry */
+	struct super_block	*dq_sb;	      /* superblock of our quota root */
+	struct vzsnap_struct	*dq_snap;	/* pointer to vzsnap struct */
+};
+
+/* UID/GID quota record - one per pair (quota_master, uid or gid) */
+struct vz_quota_ugid {
+	unsigned int		qugid_id;     /* UID/GID this applies to */
+	struct dq_stat		qugid_stat;   /* limits, options, usage stats */
+	int			qugid_type;   /* USRQUOTA|GRPQUOTA */
+	atomic_t		qugid_count;  /* reference count */
+};
+
+#define VZ_QUOTA_UGBAD		((struct vz_quota_ugid *)0xfeafea11)
+
+struct vz_quota_datast {
+	struct vz_quota_ilink qlnk;
+};
+
+#define VIRTINFO_QUOTA_GETSTAT	0
+#define VIRTINFO_QUOTA_ON	1
+#define VIRTINFO_QUOTA_OFF	2
+#define VIRTINFO_QUOTA_DISABLE	3
+
+struct virt_info_quota {
+	struct super_block *super;
+	struct dq_stat *qstat;
+};
+
+/*
+ * Interface to VZ quota core
+ */
+#define INODE_QLNK(inode)	(&(inode)->i_qlnk)
+#define QLNK_INODE(qlnk)	container_of((qlnk), struct inode, i_qlnk)
+
+#define VZ_QUOTA_BAD		((struct vz_quota_master *)0xefefefef)
+
+#define VZ_QUOTAO_SETE		1
+#define VZ_QUOTAO_INIT		2
+#define VZ_QUOTAO_DESTR		3
+#define VZ_QUOTAO_SWAP		4
+#define VZ_QUOTAO_INICAL	5
+#define VZ_QUOTAO_DRCAL		6
+#define VZ_QUOTAO_QSET		7
+#define VZ_QUOTAO_TRANS		8
+#define VZ_QUOTAO_ACT		9
+#define VZ_QUOTAO_DTREE		10
+#define VZ_QUOTAO_DET		11
+#define VZ_QUOTAO_ON		12
+#define VZ_QUOTAO_RE_LOCK	13
+
+#define DQUOT_CMD_ALLOC		0
+#define DQUOT_CMD_PREALLOC	1
+#define DQUOT_CMD_CHECK		12
+#define DQUOT_CMD_FORCE		13
+
+extern struct mutex vz_quota_mutex;
+
+void inode_qmblk_lock(struct super_block *sb);
+void inode_qmblk_unlock(struct super_block *sb);
+void qmblk_data_read_lock(struct vz_quota_master *qmblk);
+void qmblk_data_read_unlock(struct vz_quota_master *qmblk);
+void qmblk_data_write_lock(struct vz_quota_master *qmblk);
+void qmblk_data_write_unlock(struct vz_quota_master *qmblk);
+
+/* for quota operations */
+void vzquota_inode_init_call(struct inode *inode);
+void vzquota_inode_swap_call(struct inode *, struct inode *);
+void vzquota_inode_drop_call(struct inode *inode);
+int vzquota_inode_transfer_call(struct inode *, struct iattr *);
+struct vz_quota_master *vzquota_inode_data(struct inode *inode,
+		struct vz_quota_datast *);
+void vzquota_data_unlock(struct inode *inode, struct vz_quota_datast *);
+int vzquota_rename_check(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir);
+struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode);
+/* for second-level quota */
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
+/* for management operations */
+struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
+		struct vz_quota_stat *qstat);
+void vzquota_free_master(struct vz_quota_master *);
+struct vz_quota_master *vzquota_find_master(unsigned int quota_id);
+int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
+		struct vz_quota_master *qmblk, char __user *buf);
+int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk,
+		char __user *buf, int force);
+int vzquota_get_super(struct super_block *sb);
+void vzquota_put_super(struct super_block *sb);
+
+/* ----------------------------------------------------------------------
+ *
+ * Passing quota information through current
+ *
+ * Used in inode -> qmblk lookup at inode creation stage (since at that
+ * time there are no links between the inode being created and its parent
+ * directory).
+ *
+ * Used also in NFS - when one opens inode by its i_ino the inode is
+ * actually detached and vzquota can't find qmblk for it. However the
+ * export's root is a good candidate for this.
+ *
+ * --------------------------------------------------------------------- */
+
+#define VZDQ_CUR_MAGIC	0x57d0fee2
+
+static inline void vzquota_cur_qmblk_set(struct inode *data)
+{
+	struct task_struct *tsk;
+
+	tsk = current;
+	tsk->magic = VZDQ_CUR_MAGIC;
+	tsk->ino = data;
+}
+
+static inline struct vz_quota_master *qmblk_get(struct vz_quota_master *qmblk)
+{
+	if (!atomic_read(&qmblk->dq_count))
+		BUG();
+	atomic_inc(&qmblk->dq_count);
+	return qmblk;
+}
+
+static inline void __qmblk_put(struct vz_quota_master *qmblk)
+{
+	atomic_dec(&qmblk->dq_count);
+}
+
+static inline void qmblk_put(struct vz_quota_master *qmblk)
+{
+	if (!atomic_dec_and_test(&qmblk->dq_count))
+		return;
+	vzquota_free_master(qmblk);
+}
+
+extern struct list_head vzquota_hash_table[];
+extern int vzquota_hash_size;
+
+/*
+ * Interface to VZ UGID quota
+ */
+extern struct quotactl_ops vz_quotactl_operations;
+extern struct dquot_operations vz_quota_operations2;
+extern struct quota_format_type vz_quota_empty_v2_format;
+
+#define QUGID_TREE(qmblk, type)	(((type) == USRQUOTA) ?		\
+					qmblk->dq_uid_tree :	\
+					qmblk->dq_gid_tree)
+
+#define VZDQUG_FIND_DONT_ALLOC	1
+#define VZDQUG_FIND_FAKE	2
+struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
+		unsigned int quota_id, int type, int flags);
+struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
+		unsigned int quota_id, int type, int flags);
+struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid);
+void vzquota_put_ugid(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid);
+void vzquota_kill_ugid(struct vz_quota_master *qmblk);
+int vzquota_ugid_init(void);
+void vzquota_ugid_release(void);
+int vzquota_transfer_usage(struct inode *inode, int mask,
+		struct vz_quota_ilink *qlnk);
+void vzquota_inode_off(struct inode *inode);
+
+long do_vzquotaugidctl(int cmd, unsigned int quota_id,
+		unsigned int ugid_index, unsigned int ugid_size,
+		void *addr, int compat);
+
+/*
+ * Other VZ quota parts
+ */
+extern struct dquot_operations vz_quota_operations;
+
+long do_vzquotactl(int cmd, unsigned int quota_id,
+		struct vz_quota_stat __user *qstat, const char __user *ve_root,
+		int compat);
+int vzquota_proc_init(void);
+void vzquota_proc_release(void);
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
+
+void vzaquota_init(void);
+void vzaquota_fini(void);
+
+extern int vzquota_snap_init(struct super_block *super, struct vzsnap_struct *vzs);
+extern int vzquota_snap_stop(struct super_block *super, struct vzsnap_struct *vzs);
+
+/* This is the ugliest hack of the release, we have to fixup filesystem type
+ * in order to support quota tools.
+ */
+static inline int vzquota_fake_fstype(const struct task_struct *tsk)
+{
+	const char **p;
+	const char *comm;
+	const char *comm_list[] = {
+		"convertquota",
+		"edquota",
+		"quota",
+		"quot",
+		"quotacheck",
+		"quotadebug",
+		"quotaon",
+		"quotaoff",
+		"quotastats",
+		"quota_nld",
+		"repquota",
+		"rpc.rquotad",
+		"setquota",
+		"setup_quota_group",
+		"xqmstats"
+		"warnquota",
+		NULL,
+	};
+	comm = strrchr(tsk->comm, '/');
+	if (comm)
+		comm++;
+	else
+		comm = tsk->comm;
+
+	p = comm_list;
+	while (*p != NULL) {
+		if (!strcmp(*p, comm))
+			return 1;
+		p++;
+	}
+	return 0;
+}
+
+/* quotacheck uses direct scan mode for ext2/ext3 */
+#define VZQUOTA_FAKE_FSTYPE "reiserfs"
+
+#endif /* __KERNEL__ */
+
+#endif /* _VZDQUOTA_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzquota_qlnk.h kernel-2.6.18-417.el5-028stab121/include/linux/vzquota_qlnk.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzquota_qlnk.h	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzquota_qlnk.h	2017-01-13 08:40:24.000000000 -0500
@@ -0,0 +1,25 @@
+/*
+ *  include/linux/vzquota_qlnk.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZDQUOTA_QLNK_H
+#define _VZDQUOTA_QLNK_H
+
+struct vz_quota_master;
+struct vz_quota_ugid;
+
+/* inode link, used to track inodes using quota via dq_ilink_list */
+struct vz_quota_ilink {
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid *qugid[MAXQUOTAS];
+	struct list_head list;
+	unsigned char origin[2];
+};
+
+#endif /* _VZDQUOTA_QLNK_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzratelimit.h kernel-2.6.18-417.el5-028stab121/include/linux/vzratelimit.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzratelimit.h	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzratelimit.h	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,28 @@
+/*
+ *  include/linux/vzratelimit.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VZ_RATELIMIT_H__
+#define __VZ_RATELIMIT_H__
+
+/*
+ * Generic ratelimiting stuff.
+ */
+
+struct vz_rate_info {
+	int burst;
+	int interval; /* jiffy_t per event */
+	int bucket; /* kind of leaky bucket */
+	unsigned long last; /* last event */
+};
+
+/* Return true if rate limit permits. */
+int vz_ratelimit(struct vz_rate_info *p);
+
+#endif /* __VZ_RATELIMIT_H__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzsnap.h kernel-2.6.18-417.el5-028stab121/include/linux/vzsnap.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzsnap.h	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzsnap.h	2017-01-13 08:40:24.000000000 -0500
@@ -0,0 +1,119 @@
+/*
+ *
+ * Copyright (C) 2007 SWsoft
+ * All rights reserved.
+ * 
+ */
+
+#ifndef _VZSNAP_H
+#define _VZSNAP_H
+
+#define VZSNAPCTLTYPE ';'
+
+#define VZCTL_VZSNAP_NEW_CTL	_IO(VZSNAPCTLTYPE, 1)
+
+
+#define VZSNAPCTL_SET_ID	0
+#define VZSNAPCTL_BIND_VZFS	1
+#define VZSNAPCTL_BIND_VE	2
+#define VZSNAPCTL_PREPARE_DIR	3
+#define VZSNAPCTL_SCAN_FD	4
+#define VZSNAPCTL_RESCAN_FD	5
+#define VZSNAPCTL_SCAN_NAME	6
+#define VZSNAPCTL_START		7
+#define VZSNAPCTL_STOP		8
+#define VZSNAPCTL_GETROOT	9
+
+#define VZSNAPCTL_GETBMAPSIZE	10
+#define VZSNAPCTL_GETIMAPSIZE	11
+#define VZSNAPCTL_GETBMAPMAP	12
+#define VZSNAPCTL_GETIMAPMAP	13
+
+/* ioctl request structure for VZSNAPCTL_SCAN_NAME. "Novel idea" is to use
+ * 64bit interface even on 32bit hosts. I know, I know... */
+
+struct vzsnap_name_req
+{
+	__s32	dirfd;
+	__s32	pad;
+	__u64	ptr;
+} __attribute__((aligned (8)));
+
+/* Offsets on vzsnap "bus" */
+
+#define VZSNAP_BMAP_PGOFF	0
+#define VZSNAP_IMAP_PGOFF	0x20000000
+
+enum
+{
+	IS_NONE		= 0,	/* Not scanned or not within our tree */
+	IS_SCANNED	= 1,	/* Inode is ours, scan is started */
+	IS_RESCAN	= 3	/* Inode is ours, needs rescan */
+};
+
+#ifdef __KERNEL__
+
+struct vzsnap_struct
+{
+	atomic_t		refcnt;
+	unsigned long		dead;
+	unsigned long		state;
+	struct list_head	list;
+	int			id;
+
+	struct vzsnap_ops	*ops;
+
+	int			error;
+
+	int			ve_frozen;
+	struct ve_struct	*ve;
+
+	struct page		**inode_map;
+	struct page		**block_map;
+	ino_t			inode_max;
+	sector_t		block_max;
+
+	struct vfsmount		*vzfs_mnt;
+	struct dentry		*vzfs_root;
+	unsigned long		priv_ino;
+	unsigned long		cow_ino;
+
+	struct vfsmount		*vzdq_mnt;
+	struct dentry		*vzdq_root;
+
+	struct super_block	*psb;
+
+	spinlock_t		lock;	/* Protects bitmap operations */
+	struct mutex		mutex;	/* ioctl serialization */
+};
+
+struct vzsnap_ops
+{
+	void (*addblock)(struct vzsnap_struct *vzs, struct inode * inode);
+	void (*create)(struct vzsnap_struct *vzs, struct inode *dir, struct dentry *de);
+	void (*unlink)(struct vzsnap_struct *vzs, struct inode *dir, struct dentry *de);
+	void (*rename)(struct vzsnap_struct *vzs, struct inode *ndir,
+		   struct dentry *nde, struct inode *odir, struct dentry *ode);
+	void (*truncate)(struct vzsnap_struct *vzs, struct inode *dir, size_t len);
+};
+
+/* Should be protected with user-specific serializer */
+
+static inline struct vzsnap_struct *vzsnap_get(struct vzsnap_struct * vzs)
+{
+	atomic_inc(&vzs->refcnt);
+	return vzs;
+}
+
+static inline void __vzsnap_put(struct vzsnap_struct * vzs)
+{
+	atomic_dec(&vzs->refcnt);
+}
+
+extern int vzsnap_release_map(struct vzsnap_struct *vzs);
+extern struct vzsnap_struct * vzsnap_get_map(int id, struct block_device *bdev);
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _VZSNAP_H */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/vzstat.h kernel-2.6.18-417.el5-028stab121/include/linux/vzstat.h
--- kernel-2.6.18-417.el5.orig/include/linux/vzstat.h	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/vzstat.h	2017-01-13 08:40:30.000000000 -0500
@@ -0,0 +1,184 @@
+/*
+ *  include/linux/vzstat.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VZSTAT_H__
+#define __VZSTAT_H__
+
+#include <linux/mmzone.h>
+
+struct swap_cache_info_struct {
+	unsigned long add_total;
+	unsigned long del_total;
+	unsigned long find_success;
+	unsigned long find_total;
+	unsigned long noent_race;
+	unsigned long exist_race;
+	unsigned long remove_race;
+};
+
+struct kstat_lat_snap_struct {
+	cycles_t maxlat, totlat;
+	unsigned long count;
+};
+struct kstat_lat_pcpu_snap_struct {
+	cycles_t maxlat, totlat;
+	unsigned long count;
+	seqcount_t lock;
+} ____cacheline_aligned_in_smp;
+
+struct kstat_lat_struct {
+	struct kstat_lat_snap_struct cur, last;
+	cycles_t avg[3];
+};
+struct kstat_lat_pcpu_struct {
+	struct kstat_lat_pcpu_snap_struct *cur;
+	cycles_t max_snap;
+	struct kstat_lat_snap_struct last;
+	cycles_t avg[3];
+};
+
+struct kstat_perf_snap_struct {
+	cycles_t wall_tottime, cpu_tottime;
+	cycles_t wall_maxdur, cpu_maxdur;
+	unsigned long count;
+};
+struct kstat_perf_struct {
+	struct kstat_perf_snap_struct cur, last;
+};
+
+struct kstat_zone_avg {
+	unsigned long		free_pages_avg[3],
+				nr_active_avg[3],
+				nr_inactive_avg[3];
+};
+
+#define KSTAT_ALLOCSTAT_NR 5
+
+struct kernel_stat_glob {
+	unsigned long nr_unint_avg[3];
+
+	unsigned long alloc_fails[NR_CPUS][KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_pcpu_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_pcpu_struct sched_lat;
+	struct kstat_lat_struct swap_in;
+
+	struct kstat_perf_struct ttfp, cache_reap,
+			refill_inact, shrink_icache, shrink_dcache;
+
+	struct kstat_zone_avg zone_avg[MAX_NR_ZONES];
+} ____cacheline_aligned;
+
+extern struct kernel_stat_glob kstat_glob ____cacheline_aligned;
+extern spinlock_t kstat_glb_lock;
+
+#ifdef CONFIG_VE
+#define KSTAT_PERF_ENTER(name)				\
+	unsigned long flags;				\
+	cycles_t start, sleep_time;			\
+							\
+	start = get_cycles();				\
+	sleep_time = VE_TASK_INFO(current)->sleep_time;	\
+
+#define KSTAT_PERF_LEAVE(name)				\
+	spin_lock_irqsave(&kstat_glb_lock, flags);	\
+	kstat_glob.name.cur.count++;			\
+	start = get_cycles() - start;			\
+	if (kstat_glob.name.cur.wall_maxdur < start)	\
+		kstat_glob.name.cur.wall_maxdur = start;\
+	kstat_glob.name.cur.wall_tottime += start;	\
+	start -= VE_TASK_INFO(current)->sleep_time -	\
+					sleep_time;	\
+	if (kstat_glob.name.cur.cpu_maxdur < start)	\
+		kstat_glob.name.cur.cpu_maxdur = start;	\
+	kstat_glob.name.cur.cpu_tottime += start;	\
+	spin_unlock_irqrestore(&kstat_glb_lock, flags);	\
+
+#else
+#define KSTAT_PERF_ENTER(name)
+#define KSTAT_PERF_LEAVE(name)
+#endif
+
+/*
+ * Add another statistics reading.
+ * Serialization is the caller's due.
+ */
+static inline void KSTAT_LAT_ADD(struct kstat_lat_struct *p,
+		cycles_t dur)
+{
+	p->cur.count++;
+	if (p->cur.maxlat < dur)
+		p->cur.maxlat = dur;
+	p->cur.totlat += dur;
+}
+
+static inline void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, int cpu,
+		cycles_t dur)
+{
+	struct kstat_lat_pcpu_snap_struct *cur;
+
+	cur = per_cpu_ptr(p->cur, cpu);
+	write_seqcount_begin(&cur->lock);
+	cur->count++;
+	if (cur->maxlat < dur)
+		cur->maxlat = dur;
+	cur->totlat += dur;
+	write_seqcount_end(&cur->lock);
+}
+
+/*
+ * Move current statistics to last, clear last.
+ * Serialization is the caller's due.
+ */
+static inline void KSTAT_LAT_UPDATE(struct kstat_lat_struct *p)
+{
+	cycles_t m;
+	memcpy(&p->last, &p->cur, sizeof(p->last));
+	p->cur.maxlat = 0;
+	m = p->last.maxlat;
+	CALC_LOAD(p->avg[0], EXP_1, m)
+	CALC_LOAD(p->avg[1], EXP_5, m)
+	CALC_LOAD(p->avg[2], EXP_15, m)
+}
+
+static inline void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p)
+{
+	unsigned i, cpu;
+	struct kstat_lat_pcpu_snap_struct snap, *cur;
+	cycles_t m;
+
+	memset(&p->last, 0, sizeof(p->last));
+	for_each_online_cpu(cpu) {
+		cur = per_cpu_ptr(p->cur, cpu);
+		do {
+			i = read_seqcount_begin(&cur->lock);
+			memcpy(&snap, cur, sizeof(snap));
+		} while (read_seqcount_retry(&cur->lock, i));
+		/* 
+		 * read above and this update of maxlat is not atomic,
+		 * but this is OK, since it happens rarely and losing
+		 * a couple of peaks is not essential. xemul
+		 */
+		cur->maxlat = 0;
+
+		p->last.count += snap.count;
+		p->last.totlat += snap.totlat;
+		if (p->last.maxlat < snap.maxlat)
+			p->last.maxlat = snap.maxlat;
+	}
+
+	m = (p->last.maxlat > p->max_snap ? p->last.maxlat : p->max_snap);
+	CALC_LOAD(p->avg[0], EXP_1, m);
+	CALC_LOAD(p->avg[1], EXP_5, m);
+	CALC_LOAD(p->avg[2], EXP_15, m);
+	/* reset max_snap to calculate it correctly next time */
+	p->max_snap = 0;
+}
+
+#endif /* __VZSTAT_H__ */
diff -upr kernel-2.6.18-417.el5.orig/include/linux/writeback.h kernel-2.6.18-417.el5-028stab121/include/linux/writeback.h
--- kernel-2.6.18-417.el5.orig/include/linux/writeback.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/writeback.h	2017-01-13 08:40:40.000000000 -0500
@@ -30,6 +30,8 @@ enum writeback_sync_modes {
 	WB_SYNC_HOLD,	/* Hold the inode on sb_dirty for sys_sync() */
 };
 
+struct user_beancounter;
+
 /*
  * A control structure which tells the writeback code what to do.  These are
  * always on the stack, and hence need no locking.  They are always initialised
@@ -41,6 +43,8 @@ struct writeback_control {
 	enum writeback_sync_modes sync_mode;
 	unsigned long *older_than_this;	/* If !NULL, only write back inodes
 					   older than this */
+	struct user_beancounter *only_this_ub; /* If !NULL, write only inodes
+						  dirtied by this ub. */
 	long nr_to_write;		/* Write this many pages, and decrement
 					   this for each page written */
 	long pages_skipped;		/* Pages which were not written */
@@ -59,6 +63,7 @@ struct writeback_control {
 	unsigned for_reclaim:1;		/* Invoked from the page allocator */
 	unsigned for_writepages:1;	/* This is a writepages() call */
 	unsigned range_cyclic:1;	/* range_start is cyclic */
+	unsigned force_sync_io:1;
 };
 
 /*
@@ -69,6 +74,7 @@ void wake_up_inode(struct inode *inode);
 int inode_wait(void *);
 void sync_inodes_sb(struct super_block *, int wait);
 void sync_inodes(int wait);
+void sync_inodes_ub(int wait, struct user_beancounter *ub);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
diff -upr kernel-2.6.18-417.el5.orig/include/linux/xattr.h kernel-2.6.18-417.el5-028stab121/include/linux/xattr.h
--- kernel-2.6.18-417.el5.orig/include/linux/xattr.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/linux/xattr.h	2017-01-13 08:40:22.000000000 -0500
@@ -10,6 +10,13 @@
 #ifndef _LINUX_XATTR_H
 #define _LINUX_XATTR_H
 
+#ifdef CONFIG_VE
+extern int ve_xattr_policy;
+#define VE_XATTR_POLICY_ACCEPT	0
+#define VE_XATTR_POLICY_IGNORE	1
+#define VE_XATTR_POLICY_REJECT	2
+#endif
+
 #define XATTR_CREATE	0x1	/* set value, fail if attr already exists */
 #define XATTR_REPLACE	0x2	/* set value, fail if attr does not exist */
 
diff -upr kernel-2.6.18-417.el5.orig/include/net/addrconf.h kernel-2.6.18-417.el5-028stab121/include/net/addrconf.h
--- kernel-2.6.18-417.el5.orig/include/net/addrconf.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/addrconf.h	2017-01-13 08:40:24.000000000 -0500
@@ -259,5 +259,13 @@ extern int if6_proc_init(void);
 extern void if6_proc_exit(void);
 #endif
 
+int addrconf_ifdown(struct net_device *dev, int how);
+int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
+			  __u32 prefered_lft, __u32 valid_lft);
+
+int addrconf_sysctl_init(struct ve_struct *ve);
+void addrconf_sysctl_fini(struct ve_struct *ve);
+void addrconf_sysctl_free(struct ve_struct *ve);
+
 #endif
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/net/af_unix.h kernel-2.6.18-417.el5-028stab121/include/net/af_unix.h
--- kernel-2.6.18-417.el5.orig/include/net/af_unix.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/af_unix.h	2017-01-13 08:40:27.000000000 -0500
@@ -10,6 +10,7 @@ extern void unix_inflight(struct file *f
 extern void unix_notinflight(struct file *fp);
 extern void unix_gc(void);
 extern void wait_for_unix_gc(void);
+extern void unix_destruct_fds(struct sk_buff *skb);
 
 #define UNIX_HASH_SIZE	256
 
@@ -20,23 +21,37 @@ extern atomic_t unix_tot_inflight;
 
 static inline struct sock *first_unix_socket(int *i)
 {
+	struct sock *s;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
+		for (s = sk_head(&unix_socket_table[*i]);
+		     s != NULL && !ve_accessible(s->owner_env, ve);
+		     s = sk_next(s));
+		if (s != NULL)
+			return s;
 	}
 	return NULL;
 }
 
 static inline struct sock *next_unix_socket(int *i, struct sock *s)
 {
-	struct sock *next = sk_next(s);
-	/* More in this chain? */
-	if (next)
-		return next;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+	for (s = sk_next(s); s != NULL; s = sk_next(s)) {
+		if (!ve_accessible(s->owner_env, ve))
+			continue;
+		return s;
+	}
 	/* Look for next non-empty chain. */
 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
+		for (s = sk_head(&unix_socket_table[*i]);
+		     s != NULL && !ve_accessible(s->owner_env, ve);
+		     s = sk_next(s));
+		if (s != NULL)
+			return s;
 	}
 	return NULL;
 }
@@ -51,6 +66,9 @@ struct unix_address {
 	struct sockaddr_un name[0];
 };
 
+int unix_bind_path(struct sock *, struct dentry *, struct vfsmount *);
+int unix_attach_addr(struct sock *, struct sockaddr_un *, int);
+
 struct unix_skb_parms {
 	struct ucred		creds;		/* Skb credentials	*/
 	struct scm_fp_list	*fp;		/* Passed files		*/
diff -upr kernel-2.6.18-417.el5.orig/include/net/arp.h kernel-2.6.18-417.el5-028stab121/include/net/arp.h
--- kernel-2.6.18-417.el5.orig/include/net/arp.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/arp.h	2017-01-13 08:40:21.000000000 -0500
@@ -7,7 +7,16 @@
 
 #define HAVE_ARP_CREATE
 
-extern struct neigh_table arp_tbl;
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+#define arp_tbl		(*(get_exec_env()->ve_arp_tbl))
+extern int ve_arp_init(struct ve_struct *ve);
+extern void ve_arp_fini(struct ve_struct *ve);
+#else
+extern struct neigh_table	global_arp_tbl;
+#define arp_tbl		global_arp_tbl
+static inline int ve_arp_init(struct ve_struct *ve) { return 0; }
+static inline void ve_arp_fini(struct ve_struct *ve) { ; }
+#endif
 
 extern void	arp_init(void);
 extern int	arp_find(unsigned char *haddr, struct sk_buff *skb);
diff -upr kernel-2.6.18-417.el5.orig/include/net/dst.h kernel-2.6.18-417.el5-028stab121/include/net/dst.h
--- kernel-2.6.18-417.el5.orig/include/net/dst.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/dst.h	2017-01-13 08:40:16.000000000 -0500
@@ -52,6 +52,8 @@ struct dst_entry
 #define DST_BALANCED            0x10
 	unsigned long		lastuse;
 	unsigned long		expires;
+	unsigned int		privnet_mark;
+
 
 	unsigned short		header_len;	/* more space at head required */
 	unsigned short		trailer_len;	/* space to reserve at tail */
diff -upr kernel-2.6.18-417.el5.orig/include/net/fib_rules.h kernel-2.6.18-417.el5-028stab121/include/net/fib_rules.h
--- kernel-2.6.18-417.el5.orig/include/net/fib_rules.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/fib_rules.h	2017-01-13 08:40:15.000000000 -0500
@@ -53,7 +53,7 @@ struct fib_rules_ops
 
 	int			nlgroup;
 	struct nla_policy	*policy;
-	struct list_head	*rules_list;
+	struct list_head	rules_list;
 	struct module		*owner;
 };
 
diff -upr kernel-2.6.18-417.el5.orig/include/net/flow.h kernel-2.6.18-417.el5-028stab121/include/net/flow.h
--- kernel-2.6.18-417.el5.orig/include/net/flow.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/flow.h	2017-01-13 08:40:21.000000000 -0500
@@ -10,6 +10,7 @@
 #include <linux/in6.h>
 #include <asm/atomic.h>
 
+struct ve_struct;
 struct flowi {
 	int	oif;
 	int	iif;
@@ -80,6 +81,9 @@ struct flowi {
 #define fl_icmp_type	uli_u.icmpt.type
 #define fl_icmp_code	uli_u.icmpt.code
 #define fl_ipsec_spi	uli_u.spi
+#ifdef CONFIG_VE
+	struct ve_struct *owner_env;
+#endif
 	__u32           secid;	/* used by xfrm; see secid.txt */
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
diff -upr kernel-2.6.18-417.el5.orig/include/net/icmp.h kernel-2.6.18-417.el5-028stab121/include/net/icmp.h
--- kernel-2.6.18-417.el5.orig/include/net/icmp.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/icmp.h	2017-01-13 08:40:22.000000000 -0500
@@ -30,16 +30,23 @@ struct icmp_err {
 
 extern struct icmp_err icmp_err_convert[];
 DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+#define ve_icmp_statistics (get_exec_env()->_icmp_statistics)
+#define ve_icmpmsg_statistics (get_exec_env()->_icmpmsg_statistics)
+#else
 DECLARE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics);
-#define ICMP_INC_STATS(field)		SNMP_INC_STATS(icmp_statistics, field)
-#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmp_statistics, field)
-#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(icmp_statistics, field)
-#define ICMPMSGOUT_INC_STATS(field)	SNMP_INC_STATS(icmpmsg_statistics, field+256)
+#define ve_icmp_statistics icmp_statistics
+#define ve_icmpmsg_statistics icmpmsg_statistics
+#endif
+#define ICMP_INC_STATS(field)		SNMP_INC_STATS(ve_icmp_statistics, field)
+#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(ve_icmp_statistics, field)
+#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_icmp_statistics, field)
+#define ICMPMSGOUT_INC_STATS(field)	SNMP_INC_STATS(ve_icmpmsg_statistics, field+256)
 #define ICMPMSGOUT_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmpmsg_statistics, field+256)
-#define ICMPMSGOUT_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(icmpmsg_statistics, field+256)
-#define ICMPMSGIN_INC_STATS(field)	SNMP_INC_STATS(icmpmsg_statistics, field)
-#define ICMPMSGIN_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmpmsg_statistics, field)
-#define ICMPMSGIN_INC_STATS_USER(field) SNMP_INC_STATS_USER(icmpmsg_statistics, field)
+#define ICMPMSGOUT_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_icmpmsg_statistics, field+256)
+#define ICMPMSGIN_INC_STATS(field)	SNMP_INC_STATS(ve_icmpmsg_statistics, field)
+#define ICMPMSGIN_INC_STATS_BH(field)	SNMP_INC_STATS_BH(ve_icmpmsg_statistics, field)
+#define ICMPMSGIN_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_icmpmsg_statistics, field)
 
 struct dst_entry;
 struct net_proto_family;
@@ -72,7 +79,4 @@ extern int sysctl_icmp_errors_use_inboun
 extern int sysctl_icmp_ratelimit;
 extern int sysctl_icmp_ratemask;
 
-extern void xfrm4_decode_session_reverse(struct sk_buff *skb, struct flowi *fl);
-extern int xfrm4_icmp_check(struct sk_buff *skb);
-
 #endif	/* _ICMP_H */
diff -upr kernel-2.6.18-417.el5.orig/include/net/if_inet6.h kernel-2.6.18-417.el5-028stab121/include/net/if_inet6.h
--- kernel-2.6.18-417.el5.orig/include/net/if_inet6.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/if_inet6.h	2017-01-13 08:40:21.000000000 -0500
@@ -209,7 +209,14 @@ struct inet6_dev 
 #endif
 };
 
-extern struct ipv6_devconf ipv6_devconf;
+extern struct ipv6_devconf global_ipv6_devconf;
+extern struct ipv6_devconf global_ipv6_devconf_dflt;
+
+#ifdef CONFIG_VE
+#define ve_ipv6_devconf	(*(get_exec_env()->_ipv6_devconf))
+#else
+#define ve_ipv6_devconf	global_ipv6_devconf
+#endif
 
 static inline void ipv6_eth_mc_map(struct in6_addr *addr, char *buf)
 {
diff -upr kernel-2.6.18-417.el5.orig/include/net/inet6_connection_sock.h kernel-2.6.18-417.el5-028stab121/include/net/inet6_connection_sock.h
--- kernel-2.6.18-417.el5.orig/include/net/inet6_connection_sock.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/inet6_connection_sock.h	2017-01-13 08:40:26.000000000 -0500
@@ -39,4 +39,5 @@ extern void inet6_csk_reqsk_queue_hash_a
 extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
 
 extern int inet6_csk_xmit(struct sk_buff *skb, int ipfragok);
+extern void inet6_make_mapped(struct sock *sk);
 #endif /* _INET6_CONNECTION_SOCK_H */
diff -upr kernel-2.6.18-417.el5.orig/include/net/inet6_hashtables.h kernel-2.6.18-417.el5-028stab121/include/net/inet6_hashtables.h
--- kernel-2.6.18-417.el5.orig/include/net/inet6_hashtables.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/inet6_hashtables.h	2017-01-13 08:40:21.000000000 -0500
@@ -26,11 +26,13 @@ struct inet_hashinfo;
 
 /* I have no idea if this is a good hash for v6 or not. -DaveM */
 static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
-				const struct in6_addr *faddr, const u16 fport)
+				const struct in6_addr *faddr, const u16 fport,
+				const envid_t veid)
 {
 	unsigned int hashent = (lport ^ fport);
 
 	hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
+	hashent ^= (veid ^ (veid >> 16));
 	hashent ^= hashent >> 16;
 	hashent ^= hashent >> 8;
 	return hashent;
@@ -44,7 +46,7 @@ static inline int inet6_sk_ehashfn(const
 	const struct in6_addr *faddr = &np->daddr;
 	const __u16 lport = inet->num;
 	const __u16 fport = inet->dport;
-	return inet6_ehashfn(laddr, lport, faddr, fport);
+	return inet6_ehashfn(laddr, lport, faddr, fport, VEID(sk->owner_env));
 }
 
 extern void __inet6_hash(struct inet_hashinfo *hashinfo, struct sock *sk);
diff -upr kernel-2.6.18-417.el5.orig/include/net/inet_hashtables.h kernel-2.6.18-417.el5-028stab121/include/net/inet_hashtables.h
--- kernel-2.6.18-417.el5.orig/include/net/inet_hashtables.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/inet_hashtables.h	2017-01-13 08:40:21.000000000 -0500
@@ -74,6 +74,7 @@ struct inet_ehash_bucket {
  * ports are created in O(1) time?  I thought so. ;-)	-DaveM
  */
 struct inet_bind_bucket {
+	struct ve_struct	*owner_env;
 	unsigned short		port;
 	signed short		fastreuse;
 	struct hlist_node	node;
@@ -138,37 +139,43 @@ static inline struct inet_ehash_bucket *
 extern struct inet_bind_bucket *
 		    inet_bind_bucket_create(kmem_cache_t *cachep,
 					    struct inet_bind_hashbucket *head,
-					    const unsigned short snum);
+					    const unsigned short snum,
+					    struct ve_struct *env);
 extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
 				     struct inet_bind_bucket *tb);
 
-static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
+static inline int inet_bhashfn(const __u16 lport, const int bhash_size,
+		unsigned veid)
 {
-	return lport & (bhash_size - 1);
+	return ((lport + (veid ^ (veid >> 16))) & (bhash_size - 1));
 }
 
 extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
 			   const unsigned short snum);
 
 /* These can have wildcards, don't try too hard. */
-static inline int inet_lhashfn(const unsigned short num)
+static inline int inet_lhashfn(const unsigned short num, unsigned veid)
 {
-	return num & (INET_LHTABLE_SIZE - 1);
+	return ((num + (veid ^ (veid >> 16))) & (INET_LHTABLE_SIZE - 1));
 }
 
 static inline int inet_sk_listen_hashfn(const struct sock *sk)
 {
-	return inet_lhashfn(inet_sk(sk)->num);
+	return inet_lhashfn(inet_sk(sk)->num, VEID(sk->owner_env));
 }
 
 /* Caller must disable local BH processing. */
 static inline void __inet_inherit_port(struct inet_hashinfo *table,
 				       struct sock *sk, struct sock *child)
 {
-	const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
-	struct inet_bind_hashbucket *head = &table->bhash[bhash];
+	int bhash;
+	struct inet_bind_hashbucket *head;
 	struct inet_bind_bucket *tb;
 
+	bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size,
+			VEID(child->owner_env));
+	head = &table->bhash[bhash];
+
 	spin_lock(&head->lock);
 	tb = inet_csk(sk)->icsk_bind_hash;
 	sk_add_bind_node(child, &tb->owners);
@@ -274,7 +281,8 @@ static inline int inet_iif(const struct 
 extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
 					   const u32 daddr,
 					   const unsigned short hnum,
-					   const int dif);
+					   const int dif,
+					   struct ve_struct *env);
 
 /* Optimize the common listener case. */
 static inline struct sock *
@@ -284,18 +292,21 @@ static inline struct sock *
 {
 	struct sock *sk = NULL;
 	const struct hlist_head *head;
+	struct ve_struct *env;
 
+	env = get_exec_env();
 	read_lock(&hashinfo->lhash_lock);
-	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+	head = &hashinfo->listening_hash[inet_lhashfn(hnum, VEID(env))];
 	if (!hlist_empty(head)) {
 		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
 
 		if (inet->num == hnum && !sk->sk_node.next &&
+		    ve_accessible_strict(sk->owner_env, env) &&
 		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
 		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
 		    !sk->sk_bound_dev_if)
 			goto sherry_cache;
-		sk = __inet_lookup_listener(head, daddr, hnum, dif);
+		sk = __inet_lookup_listener(head, daddr, hnum, dif, env);
 	}
 	if (sk) {
 sherry_cache:
@@ -322,25 +333,25 @@ sherry_cache:
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
 	const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
 #endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie))	&&	\
 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+#define INET_TW_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&	\
 	 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
+#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 (inet_sk(__sk)->daddr		== (__saddr))		&&	\
 	 (inet_sk(__sk)->rcv_saddr	== (__daddr))		&&	\
 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
+#define INET_TW_MATCH_ALLVE(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 (inet_twsk(__sk)->tw_daddr	== (__saddr))		&&	\
 	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))		&&	\
@@ -348,6 +359,18 @@ sherry_cache:
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #endif /* 64-bit arch */
 
+#define INET_MATCH(__sk, __hash, __cookie, __saddr,			\
+					__daddr, __ports, __dif, __ve)  \
+        (INET_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr),	\
+			  		(__daddr), (__ports), (__dif))	\
+	 && ve_accessible_strict((__sk)->owner_env, (__ve)))
+
+#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr,			\
+					__daddr, __ports, __dif, __ve)	\
+        (INET_TW_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr),	\
+					(__daddr), (__ports), (__dif))	\
+	 && ve_accessible_strict(inet_twsk(__sk)->tw_owner_env, VEID(__ve)))
+
 /*
  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
  * not check it for lookups anymore, thanks Alexey. -DaveM
@@ -367,19 +390,25 @@ static inline struct sock *
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
-
+	unsigned int hash;
+	struct inet_ehash_bucket *head;
+	struct ve_struct *env;
+
+	env = get_exec_env();
+	hash = inet_ehashfn(daddr, hnum, saddr, sport, VEID(env));
+	head = inet_ehash_bucket(hashinfo, hash);
 	prefetch(head->chain.first);
 	read_lock(&head->lock);
 	sk_for_each(sk, node, &head->chain) {
-		if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
+		if (INET_MATCH(sk, hash, acookie, saddr, daddr,
+					ports, dif, env))
 			goto hit; /* You sunk my battleship! */
 	}
 
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
 	sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
-		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
+		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr,
+					ports, dif, env))
 			goto hit;
 	}
 	sk = NULL;
diff -upr kernel-2.6.18-417.el5.orig/include/net/inet_sock.h kernel-2.6.18-417.el5-028stab121/include/net/inet_sock.h
--- kernel-2.6.18-417.el5.orig/include/net/inet_sock.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/inet_sock.h	2017-01-13 08:40:21.000000000 -0500
@@ -234,9 +234,10 @@ static inline void inet_sk_copy_descenda
 extern int inet_sk_rebuild_header(struct sock *sk);
 
 static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,
-					const __u32 faddr, const __u16 fport)
+					const __u32 faddr, const __u16 fport,
+					const envid_t veid)
 {
-	unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
+	int h = (laddr ^ lport) ^ (faddr ^ fport) ^ (veid ^ (veid >> 16));
 	h ^= h >> 16;
 	h ^= h >> 8;
 	return h;
@@ -249,8 +250,9 @@ static inline int inet_sk_ehashfn(const 
 	const __u16 lport = inet->num;
 	const __u32 faddr = inet->daddr;
 	const __u16 fport = inet->dport;
+	envid_t veid = VEID(sk->owner_env);
 
-	return inet_ehashfn(laddr, lport, faddr, fport);
+	return inet_ehashfn(laddr, lport, faddr, fport, veid);
 }
 
 #endif	/* _INET_SOCK_H */
diff -upr kernel-2.6.18-417.el5.orig/include/net/inet_timewait_sock.h kernel-2.6.18-417.el5-028stab121/include/net/inet_timewait_sock.h
--- kernel-2.6.18-417.el5.orig/include/net/inet_timewait_sock.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/inet_timewait_sock.h	2017-01-13 08:40:21.000000000 -0500
@@ -81,6 +81,7 @@ struct inet_timewait_death_row {
 	struct inet_hashinfo 	*hashinfo;
 	int			sysctl_tw_recycle;
 	int			sysctl_max_tw_buckets;
+	int			ub_managed;
 };
 
 extern void inet_twdr_hangman(unsigned long data);
@@ -133,6 +134,7 @@ struct inet_timewait_sock {
 	unsigned long		tw_ttd;
 	struct inet_bind_bucket	*tw_tb;
 	struct hlist_node	tw_death_node;
+	envid_t			tw_owner_env;
 };
 
 static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
diff -upr kernel-2.6.18-417.el5.orig/include/net/ip6_fib.h kernel-2.6.18-417.el5-028stab121/include/net/ip6_fib.h
--- kernel-2.6.18-417.el5.orig/include/net/ip6_fib.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/ip6_fib.h	2017-01-13 08:40:22.000000000 -0500
@@ -94,6 +94,8 @@ static inline struct inet6_dev *ip6_dst_
 	return ((struct rt6_info *)dst)->rt6i_idev;
 }
 
+extern struct list_head	fib6_table_list;
+
 struct fib6_walker_t
 {
 	struct fib6_walker_t *prev, *next;
@@ -164,6 +166,7 @@ struct fib6_table {
 	u32			tb6_id;
 	rwlock_t		tb6_lock;
 	struct fib6_node	tb6_root;
+	struct ve_struct	*owner_env;
 };
 
 #define RT6_TABLE_UNSPEC	RT_TABLE_UNSPEC
@@ -232,11 +235,18 @@ extern void			fib6_run_gc(unsigned long 
 extern void			fib6_gc_cleanup(void);
 
 extern int			fib6_init(void);
+extern void			fib6_tables_init(void);
+extern void			fib6_tables_cleanup(void);
 
 extern void			fib6_rules_init(void);
 extern void			fib6_rules_cleanup(void);
 extern int			fib6_rules_dump(struct sk_buff *,
 						struct netlink_callback *);
 
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+extern int			fib6_rules_create(void);
+extern void			fib6_rules_destroy(void);
+#endif
+
 #endif
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/net/ip6_route.h kernel-2.6.18-417.el5-028stab121/include/net/ip6_route.h
--- kernel-2.6.18-417.el5.orig/include/net/ip6_route.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/ip6_route.h	2017-01-13 08:40:23.000000000 -0500
@@ -53,6 +53,7 @@ extern struct rt6_info	ip6_blk_hole_entr
 extern int ip6_rt_gc_interval;
 
 extern void			ip6_route_input(struct sk_buff *skb);
+extern void			__ip6_route_input(struct sk_buff *skb, struct in6_addr *daddr);
 
 extern struct dst_entry *	ip6_route_output(struct sock *sk,
 						 struct flowi *fl);
@@ -186,5 +187,8 @@ static inline int ipv6_unicast_destinati
 
 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 
+int init_ve_route6(struct ve_struct *ve);
+void fini_ve_route6(struct ve_struct *ve);
+
 #endif
 #endif
diff -upr kernel-2.6.18-417.el5.orig/include/net/ip_fib.h kernel-2.6.18-417.el5-028stab121/include/net/ip_fib.h
--- kernel-2.6.18-417.el5.orig/include/net/ip_fib.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/ip_fib.h	2017-01-13 08:40:21.000000000 -0500
@@ -169,10 +169,24 @@ struct fib_table {
 	unsigned char	tb_data[0];
 };
 
+struct fn_zone;
+struct fn_hash
+{
+	struct fn_zone	*fn_zones[33];
+	struct fn_zone	*fn_zone_list;
+};
+
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
-extern struct fib_table *ip_fib_local_table;
-extern struct fib_table *ip_fib_main_table;
+#ifdef CONFIG_VE
+#define ip_fib_local_table 	get_exec_env()->_local_table
+#define ip_fib_main_table 	get_exec_env()->_main_table
+#else
+extern struct fib_table *__ip_fib_local_table;
+extern struct fib_table *__ip_fib_main_table;
+#define ip_fib_local_table	__ip_fib_local_table
+#define ip_fib_main_table	__ip_fib_main_table
+#endif
 
 static inline struct fib_table *fib_get_table(u32 id)
 {
@@ -235,7 +249,19 @@ extern u32  __fib_res_prefsrc(struct fib
 /* Exported by fib_hash.c */
 extern struct fib_table *fib_hash_init(u32 id);
 
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+struct ve_struct;
+extern int init_ve_route(struct ve_struct *ve);
+extern void fini_ve_route(struct ve_struct *ve);
+#else
+#define init_ve_route(ve)	(0)
+#define fini_ve_route(ve)	do { } while (0)
+#endif
+
 #ifdef CONFIG_IP_MULTIPLE_TABLES
+extern int fib_rules_create(void);
+extern void fib_rules_destroy(void);
+
 extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
 extern void __init fib4_rules_init(void);
diff -upr kernel-2.6.18-417.el5.orig/include/net/ip.h kernel-2.6.18-417.el5-028stab121/include/net/ip.h
--- kernel-2.6.18-417.el5.orig/include/net/ip.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/ip.h	2017-01-13 08:40:21.000000000 -0500
@@ -157,15 +157,25 @@ struct ipv4_config
 
 extern struct ipv4_config ipv4_config;
 DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
-#define IP_INC_STATS(field)		SNMP_INC_STATS(ip_statistics, field)
-#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ip_statistics, field)
-#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ip_statistics, field)
+#ifdef CONFIG_VE
+#define ve_ip_statistics (get_exec_env()->_ip_statistics)
+#else
+#define ve_ip_statistics ip_statistics
+#endif
+#define IP_INC_STATS(field)		SNMP_INC_STATS(ve_ip_statistics, field)
+#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_ip_statistics, field)
+#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_ip_statistics, field)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
-#define NET_INC_STATS(field)		SNMP_INC_STATS(net_statistics, field)
-#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(net_statistics, field)
-#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(net_statistics, field)
-#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
-#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+#define ve_net_statistics (get_exec_env()->_net_statistics)
+#else
+#define ve_net_statistics net_statistics
+#endif
+#define NET_INC_STATS(field)		SNMP_INC_STATS(ve_net_statistics, field)
+#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_net_statistics, field)
+#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_net_statistics, field)
+#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(ve_net_statistics, field, adnd)
+#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(ve_net_statistics, field, adnd)
 
 extern void inet_get_local_port_range(int *low, int *high);
 extern int sysctl_local_port_range[2];
@@ -398,4 +408,11 @@ extern int ip_misc_proc_init(void);
 
 extern struct ctl_table ipv4_table[];
 
+#ifdef CONFIG_SYSCTL
+extern int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+			void __user *buffer, size_t *lenp, loff_t *ppos);
+extern int ipv4_sysctl_forward_strategy(ctl_table *table, int __user *name,
+			int nlen, void __user *oldval, size_t __user *oldlenp,
+			 void __user *newval, size_t newlen, void **context);
+#endif
 #endif	/* _IP_H */
diff -upr kernel-2.6.18-417.el5.orig/include/net/ipv6.h kernel-2.6.18-417.el5-028stab121/include/net/ipv6.h
--- kernel-2.6.18-417.el5.orig/include/net/ipv6.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/ipv6.h	2017-01-13 08:40:23.000000000 -0500
@@ -109,7 +109,7 @@ struct frag_hdr {
 extern int sysctl_ipv6_bindv6only;
 extern int sysctl_mld_max_msf;
 
-#define _DEVINC(statname, modifier, idev, field)			\
+#define FIXME_DEVINC(statname, modifier, idev, field)			\
 ({									\
 	struct inet6_dev *_idev = (idev);				\
 	if (likely(_idev != NULL))					\
@@ -117,13 +117,29 @@ extern int sysctl_mld_max_msf;
 	SNMP_INC_STATS##modifier(statname##_statistics, (field));       \
 })
 
+#define _DEVINC(statname, modifier, idev, field)			\
+({									\
+	struct inet6_dev *_idev = (idev);				\
+	if (likely(_idev != NULL))					\
+		SNMP_INC_STATS##modifier((_idev)->statsx.statname, (field)); \
+	SNMP_INC_STATS##modifier(ve_##statname##_statistics, (field));       \
+})
+
  
 /* MIBs */
 DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
- 
-#define IP6_INC_STATS(idev,field)     _DEVINC(ipv6, , idev, field)
-#define IP6_INC_STATS_BH(idev,field)  _DEVINC(ipv6, _BH, idev, field)
-#define IP6_INC_STATS_USER(idev,field)        _DEVINC(ipv6, _USER, idev, field)
+#ifdef CONFIG_VE
+#define ve_ipv6_statistics (get_exec_env()->_ipv6_statistics)
+#define ve_icmpv6_statistics (get_exec_env()->_icmpv6_statistics)
+#define ve_udp_stats_in6 (get_exec_env()->_udp_stats_in6)
+#else
+#define ve_ipv6_statistics ipv6_statistics
+#define ve_icmpv6_statistics icmpv6_statistics
+#define ve_udp_stats_in6 udp_stats_in6
+#endif
+#define IP6_INC_STATS(idev,field)	_DEVINC(ipv6, ,idev, field)
+#define IP6_INC_STATS_BH(idev,field)	_DEVINC(ipv6, _BH, idev, field)
+#define IP6_INC_STATS_USER(idev,field)	_DEVINC(ipv6, _USER, idev, filed)
 
 DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
 
@@ -139,26 +155,26 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, 
 	__typeof__(offset) _offset = (offset);					\
 	if (likely(_idev != NULL))						\
 		SNMP_INC_STATS_OFFSET_BH(_idev->statsx.icmpv6, field, _offset);	\
-	SNMP_INC_STATS_OFFSET_BH(icmpv6_statistics, field, _offset);    	\
+	SNMP_INC_STATS_OFFSET_BH(ve_icmpv6_statistics, field, _offset);		\
 })
 
 #define ICMP6MSGOUT_INC_STATS(idev, field) \
-	_DEVINC(icmpv6msg, , idev, field +256)
+	FIXME_DEVINC(icmpv6msg, , idev, field +256)
 #define ICMP6MSGOUT_INC_STATS_BH(idev, field) \
-	_DEVINC(icmpv6msg, _BH, idev, field +256)
+	FIXME_DEVINC(icmpv6msg, _BH, idev, field +256)
 #define ICMP6MSGOUT_INC_STATS_USER(idev, field) \
-	_DEVINC(icmpv6msg, _USER, idev, field +256)
+	FIXME_DEVINC(icmpv6msg, _USER, idev, field +256)
 #define ICMP6MSGIN_INC_STATS(idev, field) \
-	 _DEVINC(icmpv6msg, , idev, field)
+	FIXME_DEVINC(icmpv6msg, , idev, field)
 #define ICMP6MSGIN_INC_STATS_BH(idev, field) \
-	_DEVINC(icmpv6msg, _BH, idev, field)
+	FIXME_DEVINC(icmpv6msg, _BH, idev, field)
 #define ICMP6MSGIN_INC_STATS_USER(idev, field) \
-	_DEVINC(icmpv6msg, _USER, idev, field)
+	FIXME_DEVINC(icmpv6msg, _USER, idev, field)
 
 DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
-#define UDP6_INC_STATS(field)		SNMP_INC_STATS(udp_stats_in6, field)
-#define UDP6_INC_STATS_BH(field)	SNMP_INC_STATS_BH(udp_stats_in6, field)
-#define UDP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_stats_in6, field)
+#define UDP6_INC_STATS(field)		SNMP_INC_STATS(ve_udp_stats_in6, field)
+#define UDP6_INC_STATS_BH(field)	SNMP_INC_STATS_BH(ve_udp_stats_in6, field)
+#define UDP6_INC_STATS_USER(field)	SNMP_INC_STATS_USER(ve_udp_stats_in6, field)
 
 int snmp6_register_dev(struct inet6_dev *idev);
 int snmp6_unregister_dev(struct inet6_dev *idev);
@@ -167,6 +183,10 @@ int snmp6_free_dev(struct inet6_dev *ide
 int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
 void snmp6_mib_free(void *ptr[2]);
 
+int ve_snmp_proc_init(struct ve_struct *);
+void ve_snmp_proc_fini(struct ve_struct *);
+void ip6_frag_cleanup(struct ve_struct *);
+
 struct ip6_ra_chain
 {
 	struct ip6_ra_chain	*next;
diff -upr kernel-2.6.18-417.el5.orig/include/net/ndisc.h kernel-2.6.18-417.el5-028stab121/include/net/ndisc.h
--- kernel-2.6.18-417.el5.orig/include/net/ndisc.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/ndisc.h	2017-01-13 08:40:23.000000000 -0500
@@ -51,7 +51,15 @@ struct net_device;
 struct net_proto_family;
 struct sk_buff;
 
-extern struct neigh_table nd_tbl;
+#ifdef CONFIG_VE
+#define nd_tbl		(*(get_exec_env()->ve_nd_tbl))
+#else
+#define nd_tbl		global_nd_tbl
+extern struct neigh_table global_nd_tbl;
+#endif
+
+extern int ve_ndisc_init(struct ve_struct *ve);
+extern void ve_ndisc_fini(struct ve_struct *ve);
 
 struct nd_msg {
         struct icmp6hdr	icmph;
@@ -143,6 +151,7 @@ extern int 			ndisc_ifinfo_sysctl_change
 extern void 			inet6_ifinfo_notify(int event,
 						    struct inet6_dev *idev);
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static inline struct neighbour * ndisc_get_neigh(struct net_device *dev, struct in6_addr *addr)
 {
 
@@ -151,6 +160,7 @@ static inline struct neighbour * ndisc_g
 
 	return NULL;
 }
+#endif
 
 
 #endif /* __KERNEL__ */
diff -upr kernel-2.6.18-417.el5.orig/include/net/neighbour.h kernel-2.6.18-417.el5-028stab121/include/net/neighbour.h
--- kernel-2.6.18-417.el5.orig/include/net/neighbour.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/neighbour.h	2017-01-13 08:40:21.000000000 -0500
@@ -194,6 +194,8 @@ struct neigh_table
 	atomic_t		entries;
 	rwlock_t		lock;
 	unsigned long		last_rand;
+	struct ve_struct	*owner_env;
+	struct user_beancounter *owner_ub;
 	kmem_cache_t		*kmem_cachep;
 	struct neigh_statistics	*stats;
 	struct neighbour	**hash_buckets;
@@ -213,8 +215,8 @@ struct neigh_table
 #define NEIGH_UPDATE_F_ISROUTER			0x40000000
 #define NEIGH_UPDATE_F_ADMIN			0x80000000
 
-extern void			neigh_table_init(struct neigh_table *tbl);
-extern void			neigh_table_init_no_netlink(struct neigh_table *tbl);
+extern int			neigh_table_init(struct neigh_table *tbl);
+extern int			neigh_table_init_no_netlink(struct neigh_table *tbl);
 extern int			neigh_table_clear(struct neigh_table *tbl);
 extern struct neighbour *	neigh_lookup(struct neigh_table *tbl,
 					     const void *pkey,
diff -upr kernel-2.6.18-417.el5.orig/include/net/netlink_sock.h kernel-2.6.18-417.el5-028stab121/include/net/netlink_sock.h
--- kernel-2.6.18-417.el5.orig/include/net/netlink_sock.h	2017-01-13 08:40:24.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/netlink_sock.h	2017-01-13 08:40:29.000000000 -0500
@@ -0,0 +1,29 @@
+#ifndef __NET_NETLINK_SOCK_H
+#define __NET_NETLINK_SOCK_H
+
+struct netlink_sock {
+	/* struct sock has to be the first member of netlink_sock */
+	struct sock		sk;
+	u32			pid;
+	u32			dst_pid;
+	u32			dst_group;
+	u32			flags;
+	u32			subscriptions;
+	u32			ngroups;
+	unsigned long		*groups;
+	unsigned long		state;
+	wait_queue_head_t	wait;
+	struct netlink_callback	*cb;
+	spinlock_t		cb_lock;
+	void			(*data_ready)(struct sock *sk, int bytes);
+	void                    (*netlink_rcv)(struct sk_buff *skb);
+	struct module		*module;
+	kernel_cap_t		f_eff_cap;
+};
+
+static inline struct netlink_sock *nlk_sk(struct sock *sk)
+{
+	return (struct netlink_sock *)sk;
+}
+
+#endif /* __NET_NETLINK_SOCK_H */
diff -upr kernel-2.6.18-417.el5.orig/include/net/pkt_sched.h kernel-2.6.18-417.el5-028stab121/include/net/pkt_sched.h
--- kernel-2.6.18-417.el5.orig/include/net/pkt_sched.h	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/pkt_sched.h	2017-01-13 08:40:15.000000000 -0500
@@ -120,14 +120,9 @@ do {									\
 	   int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
 	   int __delta = (tv1).tv_usec - (tv2).tv_usec; \
 	   if (__delta_sec) { \
-	           switch (__delta_sec) { \
-		   default: \
-			   __delta = 0; \
-		   case 2: \
-			   __delta += USEC_PER_SEC; \
-		   case 1: \
-			   __delta += USEC_PER_SEC; \
-	           } \
+		   if (__delta_sec > ((0x7FFFFFFF/USEC_PER_SEC) - 1))	\
+			   __delta_sec = (0x7FFFFFFF/USEC_PER_SEC) - 1;	\
+		   __delta += __delta_sec * USEC_PER_SEC;		\
 	   } \
 	   __delta; \
 })
@@ -189,7 +184,7 @@ psched_tod_diff(int delta_sec, int bound
 #define PSCHED_SET_PASTPERFECT(t)	((t).tv_sec = 0)
 #define PSCHED_IS_PASTPERFECT(t)	((t).tv_sec == 0)
 
-#define	PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; })
+#define	PSCHED_AUDIT_TDIFF(t)
 
 #else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
 
diff -upr kernel-2.6.18-417.el5.orig/include/net/route.h kernel-2.6.18-417.el5-028stab121/include/net/route.h
--- kernel-2.6.18-417.el5.orig/include/net/route.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/route.h	2017-01-13 08:40:21.000000000 -0500
@@ -138,6 +138,7 @@ static inline void ip_rt_put(struct rtab
 #define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)
 
 extern __u8 ip_tos2prio[16];
+extern int ip_rt_src_check;
 
 static inline char rt_tos2priority(u8 tos)
 {
@@ -204,4 +205,14 @@ static inline struct inet_peer *rt_get_p
 
 extern ctl_table ipv4_route_table[];
 
+#ifdef CONFIG_SYSCTL
+extern int ipv4_flush_delay;
+extern int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+		struct file *filp, void __user *buffer,	size_t *lenp,
+		loff_t *ppos);
+extern int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+		int __user *name, int nlen, void __user *oldval,
+		size_t __user *oldlenp,	void __user *newval,
+		size_t newlen, void **context);
+#endif
 #endif	/* _ROUTE_H */
diff -upr kernel-2.6.18-417.el5.orig/include/net/scm.h kernel-2.6.18-417.el5-028stab121/include/net/scm.h
--- kernel-2.6.18-417.el5.orig/include/net/scm.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/scm.h	2017-01-13 08:40:19.000000000 -0500
@@ -57,7 +57,7 @@ static __inline__ int scm_send(struct so
 	struct task_struct *p = current;
 	scm->creds.uid = p->uid;
 	scm->creds.gid = p->gid;
-	scm->creds.pid = p->tgid;
+	scm->creds.pid = virt_tgid(p);
 	scm->fp = NULL;
 	scm->seq = 0;
 	unix_get_peersec_dgram(sock, scm);
diff -upr kernel-2.6.18-417.el5.orig/include/net/sock.h kernel-2.6.18-417.el5-028stab121/include/net/sock.h
--- kernel-2.6.18-417.el5.orig/include/net/sock.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/sock.h	2017-01-13 08:40:40.000000000 -0500
@@ -56,6 +56,8 @@
 #include <net/dst.h>
 #include <net/checksum.h>
 
+#include <ub/ub_net.h>
+
 /*
  * This structure really needs to be cleaned up.
  * Most of it is for TCP, and not used by any of
@@ -261,6 +263,8 @@ struct sock {
 						  struct sk_buff *skb);  
 	void			(*sk_create_child)(struct sock *sk, struct sock *newsk);
 	void                    (*sk_destruct)(struct sock *sk);
+	struct sock_beancounter sk_bc;
+	struct ve_struct	*owner_env;
 };
 
 /*
@@ -526,6 +530,8 @@ static inline __must_check int sk_add_ba
 })
 
 extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
+extern int __sk_stream_wait_memory(struct sock *sk, long *timeo_p,
+				unsigned long amount);
 extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
 extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
 extern int sk_stream_error(struct sock *sk, int flags, int err);
@@ -707,7 +713,6 @@ struct sock_iocb {
 	struct sock		*sk;
 	struct scm_cookie	*scm;
 	struct msghdr		*msg, async_msg;
-	struct iovec		async_iov;
 	struct kiocb		*kiocb;
 };
 
@@ -835,7 +840,10 @@ static inline void sk_stream_writequeue_
 
 static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
 {
-	return sk_rmem_schedule(sk, skb->truesize);
+	if (!sk_rmem_schedule(sk, skb->truesize))
+		/* The situation is bad according to mainstream. Den */
+		return 0;
+	return ub_tcprcvbuf_charge(sk, skb) == 0;
 }
 
 static inline int sk_stream_wmem_schedule(struct sock *sk, int size)
@@ -919,6 +927,11 @@ extern struct sk_buff 		*sock_alloc_send
 						      unsigned long data_len,
 						      int noblock,
 						      int *errcode);
+extern struct sk_buff 		*sock_alloc_send_skb2(struct sock *sk,
+						     unsigned long size,
+						     unsigned long size2,
+						     int noblock,
+						     int *errcode);
 extern void *sock_kmalloc(struct sock *sk, int size,
 			  gfp_t priority);
 extern void sock_kfree_s(struct sock *sk, void *mem, int size);
@@ -1192,6 +1205,8 @@ static inline int sk_can_gso(const struc
 
 static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 {
+	extern int sysctl_tcp_use_sg;
+
 	__sk_dst_set(sk, dst);
 	sk->sk_route_caps = dst->dev->features;
 	if (sk->sk_route_caps & NETIF_F_GSO)
@@ -1202,6 +1217,8 @@ static inline void sk_setup_caps(struct 
 		else 
 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
 	}
+	if (!sysctl_tcp_use_sg)
+		sk->sk_route_caps &= ~NETIF_F_SG;
 }
 
 static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
@@ -1312,6 +1329,7 @@ static inline void sock_poll_wait(struct
 
 static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 {
+	WARN_ON(skb->destructor);
 	sock_hold(sk);
 	skb->sk = sk;
 	skb->destructor = sock_wfree;
@@ -1320,6 +1338,7 @@ static inline void skb_set_owner_w(struc
 
 static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 {
+	WARN_ON(skb->destructor);
 	skb->sk = sk;
 	skb->destructor = sock_rfree;
 	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
diff -upr kernel-2.6.18-417.el5.orig/include/net/tcp.h kernel-2.6.18-417.el5-028stab121/include/net/tcp.h
--- kernel-2.6.18-417.el5.orig/include/net/tcp.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/tcp.h	2017-01-13 08:40:24.000000000 -0500
@@ -42,6 +42,13 @@
 #include <net/dst.h>
 
 #include <linux/seq_file.h>
+#include <ub/ub_net.h>
+
+#define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
+#define TCP_OFF(sk)	(sk->sk_sndmsg_off)
+
+#define TW_WSCALE_MASK		0x0f
+#define TW_WSCALE_SPEC		0x10
 
 extern struct inet_hashinfo tcp_hashinfo;
 
@@ -216,7 +223,9 @@ extern int sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
 extern int sysctl_tcp_app_win;
+#ifndef sysctl_tcp_adv_win_scale
 extern int sysctl_tcp_adv_win_scale;
+#endif
 extern int sysctl_tcp_tw_reuse;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_low_latency;
@@ -229,6 +238,10 @@ extern int sysctl_tcp_mtu_probing;
 extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
+extern int sysctl_tcp_use_sg;
+extern int sysctl_tcp_max_tw_kmem_fraction;
+extern int sysctl_tcp_max_tw_buckets_ub;
+
 
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
@@ -260,12 +273,17 @@ static inline int between(__u32 seq1, __
 extern struct proto tcp_prot;
 
 DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
-#define TCP_INC_STATS(field)		SNMP_INC_STATS(tcp_statistics, field)
-#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(tcp_statistics, field)
-#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(tcp_statistics, field)
-#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(tcp_statistics, field)
-#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(tcp_statistics, field, val)
-#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(tcp_statistics, field, val)
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+#define ve_tcp_statistics (get_exec_env()->_tcp_statistics)
+#else
+#define ve_tcp_statistics tcp_statistics
+#endif
+#define TCP_INC_STATS(field)		SNMP_INC_STATS(ve_tcp_statistics, field)
+#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_tcp_statistics, field)
+#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_tcp_statistics, field)
+#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(ve_tcp_statistics, field)
+#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(ve_tcp_statistics, field, val)
+#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(ve_tcp_statistics, field, val)
 
 extern void			tcp_v4_err(struct sk_buff *skb, u32);
 
@@ -535,7 +553,7 @@ extern u32	__tcp_select_window(struct so
  * to use only the low 32-bits of jiffies and hide the ugly
  * casts with the following macro.
  */
-#define tcp_time_stamp		((__u32)(jiffies))
+#define tcp_time_stamp		((__u32)(jiffies + get_exec_env()->jiffies_fixup))
 
 /* This is what the send packet queuing engine uses to pass
  * TCP per-packet control information to the transmission
diff -upr kernel-2.6.18-417.el5.orig/include/net/udp.h kernel-2.6.18-417.el5-028stab121/include/net/udp.h
--- kernel-2.6.18-417.el5.orig/include/net/udp.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/udp.h	2017-01-13 08:40:40.000000000 -0500
@@ -28,22 +28,27 @@
 #include <net/snmp.h>
 #include <linux/seq_file.h>
 
-#define UDP_HTABLE_SIZE		128
-
 /* udp.c: This needs to be shared by v4 and v6 because the lookup
  *        and hashing code needs to work with different AF's yet
  *        the port space is shared.
  */
-extern struct hlist_head udp_hash[UDP_HTABLE_SIZE];
+extern struct hlist_head *udp_hash;
+extern unsigned int udp_hash_size;
 extern rwlock_t udp_hash_lock;
 
-static inline int udp_lport_inuse(u16 num)
+static inline int udp_hashfn(u16 num, unsigned veid)
+{
+	return ((num + (veid ^ (veid >> 16))) & (udp_hash_size - 1));
+}
+
+static inline int udp_lport_inuse(u16 num, struct ve_struct *env)
 {
 	struct sock *sk;
 	struct hlist_node *node;
 
-	sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
-		if (inet_sk(sk)->num == num)
+	sk_for_each(sk, node, &udp_hash[udp_hashfn(num, VEID(env))])
+		if (inet_sk(sk)->num == num &&
+		    ve_accessible_strict(sk->owner_env, env))
 			return 1;
 	return 0;
 }
@@ -68,6 +73,8 @@ extern int sysctl_udp_wmem_min;
 
 struct sk_buff;
 
+extern int	udp_get_port(struct sock *sk, unsigned short snum,
+			     int (*saddr_cmp)(const struct sock *, const struct sock *));
 extern void	udp_err(struct sk_buff *, u32);
 
 extern int	udp_sendmsg(struct kiocb *iocb, struct sock *sk,
@@ -81,9 +88,14 @@ extern unsigned int udp_poll(struct file
 			     poll_table *wait);
 
 DECLARE_SNMP_STAT(struct udp_mib, udp_statistics);
-#define UDP_INC_STATS(field)		SNMP_INC_STATS(udp_statistics, field)
-#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(udp_statistics, field)
-#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_statistics, field)
+#ifdef CONFIG_VE
+#define ve_udp_statistics (get_exec_env()->_udp_statistics)
+#else
+#define ve_udp_statistics udp_statistics
+#endif
+#define UDP_INC_STATS(field)		SNMP_INC_STATS(ve_udp_statistics, field)
+#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_udp_statistics, field)
+#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_udp_statistics, field)
 
 /* /proc */
 struct udp_seq_afinfo {
diff -upr kernel-2.6.18-417.el5.orig/include/net/xfrm.h kernel-2.6.18-417.el5-028stab121/include/net/xfrm.h
--- kernel-2.6.18-417.el5.orig/include/net/xfrm.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/net/xfrm.h	2017-01-13 08:40:16.000000000 -0500
@@ -794,6 +794,7 @@ static inline int xfrm6_policy_check_rev
 extern int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family);
 extern void xfrm4_decode_session_reverse(struct sk_buff *skb, struct flowi *fl);
 extern void xfrm6_decode_session_reverse(struct sk_buff *skb, struct flowi *fl);
+extern int xfrm4_icmp_check(struct sk_buff *skb);
 
 extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
 
@@ -875,6 +876,11 @@ static inline int xfrm6_policy_check_rev
 {
 	return 1;
 }
+
+static inline int xfrm4_icmp_check(struct sk_buff *skb)
+{
+	return 1;
+}
 #endif
 
 static __inline__
diff -upr kernel-2.6.18-417.el5.orig/include/ub/beancounter.h kernel-2.6.18-417.el5-028stab121/include/ub/beancounter.h
--- kernel-2.6.18-417.el5.orig/include/ub/beancounter.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/beancounter.h	2017-01-13 08:40:23.000000000 -0500
@@ -0,0 +1,572 @@
+/*
+ *  include/ub/beancounter.h
+ *
+ *  Copyright (C) 1999-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  Andrey Savochkin	saw@sw-soft.com
+ *
+ */
+
+#ifndef _LINUX_BEANCOUNTER_H
+#define _LINUX_BEANCOUNTER_H
+
+/*
+ * Generic ratelimiting stuff.
+ */
+
+struct ub_rate_info {
+	int burst;
+	int interval; /* jiffy_t per event */
+	int bucket; /* kind of leaky bucket */
+	unsigned long last; /* last event */
+};
+
+/* Return true if rate limit permits. */
+int ub_ratelimit(struct ub_rate_info *);
+
+
+/*
+ * This magic is used to distinuish user beancounter and pages beancounter
+ * in struct page. page_ub and page_bc are placed in union and MAGIC
+ * ensures us that we don't use pbc as ubc in ub_page_uncharge().
+ */
+#define UB_MAGIC		0x62756275
+
+/*
+ *	Resource list.
+ */
+
+#define UB_KMEMSIZE	0	/* Unswappable kernel memory size including
+				 * struct task, page directories, etc.
+				 */
+#define UB_LOCKEDPAGES	1	/* Mlock()ed pages. */
+#define UB_PRIVVMPAGES	2	/* Total number of pages, counting potentially
+				 * private pages as private and used.
+				 */
+#define UB_SHMPAGES	3	/* IPC SHM segment size. */
+#define UB_DUMMY	4	/* Dummy resource (compatibility) */
+#define UB_NUMPROC	5	/* Number of processes. */
+#define UB_PHYSPAGES	6	/* All resident pages, for swapout guarantee. */
+#define UB_VMGUARPAGES	7	/* Guarantee for memory allocation,
+				 * checked against PRIVVMPAGES.
+				 */
+#define UB_OOMGUARPAGES	8	/* Guarantees against OOM kill.
+				 * Only limit is used, no accounting.
+				 */
+#define UB_NUMTCPSOCK	9	/* Number of TCP sockets. */
+#define UB_NUMFLOCK	10	/* Number of file locks. */
+#define UB_NUMPTY	11	/* Number of PTYs. */
+#define UB_NUMSIGINFO	12	/* Number of siginfos. */
+#define UB_TCPSNDBUF	13	/* Total size of tcp send buffers. */
+#define UB_TCPRCVBUF	14	/* Total size of tcp receive buffers. */
+#define UB_OTHERSOCKBUF	15	/* Total size of other socket
+				 * send buffers (all buffers for PF_UNIX).
+				 */
+#define UB_DGRAMRCVBUF	16	/* Total size of other socket
+				 * receive buffers.
+				 */
+#define UB_NUMOTHERSOCK	17	/* Number of other sockets. */
+#define UB_DCACHESIZE	18	/* Size of busy dentry/inode cache. */
+#define UB_NUMFILE	19	/* Number of open files. */
+
+#define UB_RESOURCES_COMPAT	24
+
+/* Add new resources here */
+
+#define UB_NUMXTENT	23
+#define UB_SWAPPAGES	24
+#define UB_RESOURCES	25
+
+#define UB_UNUSEDPRIVVM	(UB_RESOURCES + 0)
+#define UB_TMPFSPAGES	(UB_RESOURCES + 1)
+#define UB_HELDPAGES	(UB_RESOURCES + 2)
+
+struct ubparm {
+	/* 
+	 * A barrier over which resource allocations are failed gracefully.
+	 * If the amount of consumed memory is over the barrier further sbrk()
+	 * or mmap() calls fail, the existing processes are not killed. 
+	 */
+	unsigned long	barrier;
+	/* hard resource limit */
+	unsigned long	limit;
+	/* consumed resources */
+	unsigned long	held;
+	/* maximum amount of consumed resources through the last period */
+	unsigned long	maxheld;
+	/* minimum amount of consumed resources through the last period */
+	unsigned long	minheld;
+	/* count of failed charges */
+	unsigned long	failcnt;
+};
+
+/*
+ * Kernel internal part.
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/threads.h>
+#include <linux/task_io_accounting.h>
+#include <linux/percpu.h>
+#include <ub/ub_debug.h>
+#include <ub/ub_decl.h>
+#include <asm/atomic.h>
+#include <ub/io_prio.h>
+
+/*
+ * UB_MAXVALUE is essentially LONG_MAX declared in a cross-compiling safe form.
+ */
+#define UB_MAXVALUE	( (1UL << (sizeof(unsigned long)*8-1)) - 1)
+
+
+/*
+ *	Resource management structures
+ * Serialization issues:
+ *   beancounter list management is protected via ub_hash_lock
+ *   task pointers are set only for current task and only once
+ *   refcount is managed atomically
+ *   value and limit comparison and change are protected by per-ub spinlock
+ */
+
+struct page_beancounter;
+struct task_beancounter;
+struct sock_beancounter;
+
+struct page_private {
+	unsigned long		ubp_unused_privvmpages;
+	unsigned long		ubp_tmpfs_respages;
+	long long		ubp_held_pages;
+};
+
+struct sock_private {
+	unsigned long		ubp_rmem_thres;
+	unsigned long		ubp_wmem_pressure;
+	unsigned long		ubp_maxadvmss;
+	unsigned long		ubp_rmem_pressure;
+	int			ubp_tw_count;
+#define UB_RMEM_EXPAND          0
+#define UB_RMEM_KEEP            1
+#define UB_RMEM_SHRINK          2
+	struct list_head	ubp_other_socks;
+	struct list_head	ubp_tcp_socks;
+	atomic_t		ubp_orphan_count;
+};
+
+struct ub_percpu_struct {
+	unsigned long unmap;
+	unsigned long swapin;
+	int pbcs;
+	int dirty_pages;
+#ifdef CONFIG_UBC_IO_ACCT
+	unsigned long async_write_complete;
+	unsigned long async_write_canceled;
+	unsigned long long sync_write_bytes;
+	unsigned long long sync_read_bytes;
+#endif
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	long	pages_charged;
+	long	vmalloc_charged;
+#endif
+	unsigned long	sync;
+	unsigned long	sync_done;
+
+	unsigned long	fsync;
+	unsigned long	fsync_done;
+
+	unsigned long	fdsync;
+	unsigned long	fdsync_done;
+
+	unsigned long	frsync;
+	unsigned long	frsync_done;
+
+	unsigned long		write;
+	unsigned long		read;
+	unsigned long long	wchar;
+	unsigned long long	rchar;
+
+	int		held_pages;
+
+	int		fast_refcount;
+};
+
+struct user_beancounter
+{
+	unsigned long		ub_magic;
+	atomic_t		ub_refcount;
+	struct list_head	ub_list;
+	struct hlist_node	ub_hash;
+
+	union {
+		struct rcu_head rcu;
+		struct execute_work cleanup;
+	};
+
+	spinlock_t		ub_lock;
+	uid_t			ub_uid;
+	unsigned int		ub_cookie;
+
+	struct ub_rate_info	ub_limit_rl;
+	int			ub_oom_noproc;
+
+	atomic_long_t		pbcs;
+
+	atomic_long_t		dirty_pages;
+
+	struct page_private	ppriv;
+#define ub_unused_privvmpages	ppriv.ubp_unused_privvmpages
+#define ub_tmpfs_respages	ppriv.ubp_tmpfs_respages
+#define ub_held_pages		ppriv.ubp_held_pages
+	struct sock_private	spriv;
+#define ub_rmem_thres		spriv.ubp_rmem_thres
+#define ub_maxadvmss		spriv.ubp_maxadvmss
+#define ub_rmem_pressure	spriv.ubp_rmem_pressure
+#define ub_wmem_pressure	spriv.ubp_wmem_pressure
+#define ub_tcp_sk_list		spriv.ubp_tcp_socks
+#define ub_other_sk_list	spriv.ubp_other_socks
+#define ub_orphan_count		spriv.ubp_orphan_count
+#define ub_tw_count		spriv.ubp_tw_count
+#ifdef CONFIG_UBC_IO_PRIO
+	struct ub_iopriv	iopriv;
+#endif
+
+	struct user_beancounter *parent;
+	int			ub_childs;
+	void			*private_data;
+	unsigned long		ub_aflags;
+
+	void			*private_data2;
+
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry	*proc;
+#endif
+	unsigned long		ub_mem_size;
+	int			dirty_exceeded;
+
+	/* resources statistic and settings */
+	struct ubparm		ub_parms[UB_RESOURCES];
+	/* resources statistic for last interval */
+	struct ubparm		ub_store[UB_RESOURCES];
+
+	struct ub_percpu_struct	*ub_percpu;
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	struct list_head	ub_cclist;
+#endif
+	atomic_t		ub_fastcount;
+};
+
+extern int ub_count;
+
+enum ub_severity { UB_HARD, UB_SOFT, UB_FORCE };
+
+#define UB_AFLAG_NOTIF_PAGEIN	0
+
+static inline
+struct user_beancounter *top_beancounter(struct user_beancounter *ub)
+{
+	while (ub->parent != NULL)
+		ub = ub->parent;
+	return ub;
+}
+
+static inline int ub_barrier_hit(struct user_beancounter *ub, int resource)
+{
+	return ub->ub_parms[resource].held > ub->ub_parms[resource].barrier;
+}
+
+static inline int ub_hfbarrier_hit(struct user_beancounter *ub, int resource)
+{
+	return (ub->ub_parms[resource].held > 
+		((ub->ub_parms[resource].barrier) >> 1));
+}
+
+static inline int ub_barrier_farnr(struct user_beancounter *ub, int resource)
+{
+	struct ubparm *p;
+	p = ub->ub_parms + resource;
+	return p->held <= (p->barrier >> 3);
+}
+
+static inline int ub_barrier_farsz(struct user_beancounter *ub, int resource)
+{
+	struct ubparm *p;
+	p = ub->ub_parms + resource;
+	return p->held <= (p->barrier >> 3) && p->barrier >= 1024 * 1024;
+}
+
+static inline struct user_beancounter *switch_exec_ub(
+		struct user_beancounter *ub)
+{
+	return ub ? set_exec_ub(ub) : NULL;
+}
+
+#ifndef CONFIG_USER_RESOURCE
+
+#define ub_percpu_add(ub, f, v)	do { } while (0)
+#define ub_percpu_sub(ub, f, v)	do { } while (0)
+#define ub_percpu_inc(ub, f)	do { } while (0)
+#define ub_percpu_dec(ub, f)	do { } while (0)
+
+#define mm_ub(mm)	(NULL)
+
+extern inline struct user_beancounter *get_beancounter_byuid
+		(uid_t uid, int create) { return NULL; }
+extern inline struct user_beancounter *get_beancounter
+		(struct user_beancounter *ub) { return NULL; }
+extern inline void put_beancounter(struct user_beancounter *ub) { }
+
+static inline void ub_init_late(void) { };
+static inline void ub_init_early(void) { };
+
+static inline int charge_beancounter(struct user_beancounter *ub,
+			int resource, unsigned long val,
+			enum ub_severity strict) { return 0; }
+static inline void uncharge_beancounter(struct user_beancounter *ub,
+			int resource, unsigned long val) { }
+
+#else /* CONFIG_USER_RESOURCE */
+
+#define ub_percpu(ub, cpu) (per_cpu_ptr((ub)->ub_percpu, (cpu)))
+
+#define __ub_percpu_sum(ub, field)	({			\
+		struct user_beancounter *__ub = (ub);		\
+		typeof(ub_percpu(__ub, 0)->field) __sum = 0;	\
+		int __cpu;					\
+		for_each_possible_cpu(__cpu)			\
+			__sum += ub_percpu(__ub, __cpu)->field;	\
+		__sum;						\
+	})
+
+#define ub_percpu_sum(ub, field) max(0l, __ub_percpu_sum(ub, field))
+
+#define ub_percpu_add(ub, field, v)		do {			\
+		per_cpu_ptr(ub->ub_percpu, get_cpu())->field += (v);	\
+		put_cpu();						\
+	} while (0)
+#define ub_percpu_inc(ub, field) ub_percpu_add(ub, field, 1)
+
+#define ub_percpu_sub(ub, field, v)		do {			\
+		per_cpu_ptr(ub->ub_percpu, get_cpu())->field -= (v);	\
+		put_cpu();						\
+	} while (0)
+#define ub_percpu_dec(ub, field) ub_percpu_sub(ub, field, 1)
+
+#define UB_STAT_BATCH	64
+
+#define ub_stat_add(ub, name, val)	do {		\
+	unsigned long __flags;				\
+	int *__pcpu;					\
+							\
+	local_irq_save(__flags);			\
+	__pcpu = &(per_cpu_ptr((ub)->ub_percpu, smp_processor_id())->name); \
+	if (*__pcpu + (val) <= UB_STAT_BATCH)		\
+		*__pcpu += val;				\
+	else {						\
+		atomic_long_add(*__pcpu + (val), &(ub)->name);	\
+		*__pcpu = 0;				\
+	}						\
+	local_irq_restore(__flags);			\
+} while (0)
+
+#define ub_stat_sub(ub, name, val)	do {		\
+	unsigned long __flags;				\
+	int *__pcpu;					\
+							\
+	local_irq_save(__flags);			\
+	__pcpu = &(per_cpu_ptr((ub)->ub_percpu, smp_processor_id())->name); \
+	if (*__pcpu - (val) >= -UB_STAT_BATCH)		\
+		*__pcpu -= val;				\
+	else {						\
+		atomic_long_add(*__pcpu - (val), &(ub)->name);	\
+		*__pcpu = 0;				\
+	}						\
+	local_irq_restore(__flags);			\
+} while (0)
+
+#define ub_stat_flush_pcpu(ub, name)	do {		\
+	unsigned long __flags;				\
+	int *__pcpu;					\
+							\
+	local_irq_save(__flags);			\
+	__pcpu = &(per_cpu_ptr((ub)->ub_percpu, smp_processor_id())->name); \
+	atomic_long_add(*__pcpu, &(ub)->name);		\
+	*__pcpu = 0;					\
+	local_irq_restore(__flags);			\
+} while (0)
+
+#define ub_stat_inc(ub, name)		ub_stat_add(ub, name, 1)
+#define ub_stat_dec(ub, name)		ub_stat_sub(ub, name, 1)
+#define ub_stat_mod(ub, name, val)	atomic_long_add(val, &(ub)->name)
+#define __ub_stat_get(ub, name)		atomic_long_read(&(ub)->name)
+#define ub_stat_get(ub, name)		max(0l, atomic_long_read(&(ub)->name))
+#define ub_stat_get_exact(ub, name)	max(0l, __ub_stat_get(ub, name) + __ub_percpu_sum(ub, name))
+
+#define mm_ub(mm)	((mm)->mm_ub)
+/*
+ *  Charge/uncharge operations
+ */
+
+extern int __charge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val, enum ub_severity strict);
+
+extern void __uncharge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val);
+
+extern void put_beancounter_safe(struct user_beancounter *ub);
+extern void __put_beancounter(struct user_beancounter *ub);
+
+extern void uncharge_warn(struct user_beancounter *ub, int resource,
+		unsigned long val, unsigned long held);
+
+extern const char *ub_rnames[];
+/*
+ *	Put a beancounter reference
+ */
+
+static inline void put_beancounter(struct user_beancounter *ub)
+{
+	if (unlikely(ub == NULL))
+		return;
+
+	/* FIXME - optimize not to disable interrupts and make call */
+	__put_beancounter(ub);
+}
+
+static inline
+struct user_beancounter *get_beancounter_fast(struct user_beancounter *ub)
+{
+	if (unlikely(ub == NULL))
+		return NULL;
+
+	preempt_disable();
+	if (likely(atomic_read(&ub->ub_fastcount) == 0))
+		per_cpu_ptr(ub->ub_percpu, smp_processor_id())->fast_refcount++;
+	else
+		atomic_inc(&ub->ub_fastcount);
+	preempt_enable();
+
+	return ub;
+}
+
+static inline void put_beancounter_fast(struct user_beancounter *ub)
+{
+	if (unlikely(ub == NULL))
+		return;
+
+	preempt_disable();
+	if (likely(atomic_read(&ub->ub_fastcount) == 0))
+		per_cpu_ptr(ub->ub_percpu, smp_processor_id())->fast_refcount--;
+	else if (atomic_dec_and_test(&ub->ub_fastcount))
+		__put_beancounter(ub);
+	preempt_enable();
+}
+
+/* fast put, refcount can't reach zero */
+static inline void __put_beancounter_batch(struct user_beancounter *ub, int n)
+{
+	atomic_sub(n, &ub->ub_refcount);
+}
+
+static inline void put_beancounter_batch(struct user_beancounter *ub, int n)
+{
+	if (n > 1)
+		__put_beancounter_batch(ub, n - 1);
+	__put_beancounter(ub);
+}
+
+/*
+ *	Create a new beancounter reference
+ */
+extern struct user_beancounter *get_beancounter_byuid(uid_t uid, int create);
+
+static inline 
+struct user_beancounter *get_beancounter(struct user_beancounter *ub)
+{
+	if (unlikely(ub == NULL))
+		return NULL;
+
+	atomic_inc(&ub->ub_refcount);
+	return ub;
+}
+
+static inline 
+struct user_beancounter *get_beancounter_rcu(struct user_beancounter *ub)
+{
+	return atomic_inc_not_zero(&ub->ub_refcount) ? ub : NULL;
+}
+
+static inline void get_beancounter_batch(struct user_beancounter *ub, int n)
+{
+	atomic_add(n, &ub->ub_refcount);
+}
+
+/* UB_CREATE* are bit masks */
+#define UB_CREATE		1
+#define UB_CREATE_ATOMIC	2
+extern struct user_beancounter *get_subbeancounter_byid(
+		struct user_beancounter *,
+		int id, int create);
+
+extern void ub_init_late(void);
+extern void ub_init_early(void);
+
+extern int print_ub_uid(struct user_beancounter *ub, char *buf, int size);
+
+/*
+ *	Resource charging
+ * Change user's account and compare against limits
+ */
+
+static inline void ub_adjust_maxheld(struct user_beancounter *ub, int resource)
+{
+	if (ub->ub_parms[resource].maxheld < ub->ub_parms[resource].held)
+		ub->ub_parms[resource].maxheld = ub->ub_parms[resource].held;
+	if (ub->ub_parms[resource].minheld > ub->ub_parms[resource].held)
+		ub->ub_parms[resource].minheld = ub->ub_parms[resource].held;
+}
+
+int charge_beancounter(struct user_beancounter *ub, int resource,
+		unsigned long val, enum ub_severity strict);
+void uncharge_beancounter(struct user_beancounter *ub, int resource,
+		unsigned long val);
+void __charge_beancounter_notop(struct user_beancounter *ub, int resource,
+		unsigned long val);
+void __uncharge_beancounter_notop(struct user_beancounter *ub, int resource,
+		unsigned long val);
+
+static inline void charge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	if (ub->parent != NULL)
+		__charge_beancounter_notop(ub, resource, val);
+}
+
+static inline void uncharge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	if (ub->parent != NULL)
+		__uncharge_beancounter_notop(ub, resource, val);
+}
+
+#define ub_mapped_pages(ub)	ub_stat_get(ub, pbcs)
+
+void ub_flush_held_pages(struct user_beancounter *ub);
+
+void ub_held_snapshot(struct user_beancounter *ub, unsigned long *held);
+
+#endif /* CONFIG_USER_RESOURCE */
+
+#ifndef CONFIG_USER_RSS_ACCOUNTING
+static inline void ub_ini_pbc(void) { }
+#else
+extern void ub_init_pbc(void);
+#endif
+#endif /* __KERNEL__ */
+#endif /* _LINUX_BEANCOUNTER_H */
diff -upr kernel-2.6.18-417.el5.orig/include/ub/io_acct.h kernel-2.6.18-417.el5-028stab121/include/ub/io_acct.h
--- kernel-2.6.18-417.el5.orig/include/ub/io_acct.h	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/io_acct.h	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,86 @@
+/*
+ *  include/ub/io_acct.h
+ *
+ *  Copyright (C) 2006 SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  Pavel Emelianov <xemul@openvz.org>
+ *
+ */
+
+#ifndef __UB_IO_ACCT_H_
+#define __UB_IO_ACCT_H_
+
+#ifdef CONFIG_UBC_IO_ACCT
+#include <ub/beancounter.h>
+#include <linux/virtinfo.h>
+
+static inline struct user_beancounter *get_io_ub(void)
+{
+	return top_beancounter(get_exec_ub());
+}
+
+static inline struct user_beancounter *get_mapping_ub(struct address_space *mapping)
+{
+	struct user_beancounter *ub;
+
+	rcu_read_lock();
+	ub = rcu_dereference(mapping->dirtied_ub);
+	if (ub)
+		ub = get_beancounter_rcu(ub);
+	rcu_read_unlock();
+
+	return ub;
+}
+
+static inline void ub_io_account_read(size_t bytes)
+{
+	ub_percpu_add(get_io_ub(), sync_read_bytes, bytes);
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_ACCOUNT, &bytes);
+}
+
+static inline void ub_io_account_write(size_t bytes)
+{
+	ub_percpu_add(get_io_ub(), sync_write_bytes, bytes);
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_ACCOUNT, &bytes);
+}
+
+extern void ub_io_account_dirty(struct address_space *mapping, int pages);
+extern void ub_io_account_clean(struct address_space *mapping, int pages, int cancel);
+
+#define ub_dirty_pages(ub)	ub_stat_get(ub, dirty_pages)
+extern int ub_dirty_limits(long *pdirty, struct user_beancounter *ub);
+
+#else /* UBC_IO_ACCT */
+
+static inline void ub_io_account_read(size_t bytes)
+{
+}
+
+static inline void ub_io_account_write(size_t bytes)
+{
+}
+
+static inline void ub_io_account_dirty(struct address_space *mapping, int pages)
+{
+}
+
+static inline void ub_io_account_clean(struct address_space *mapping, int pages, int cancel)
+{
+}
+
+static inline unsigned long ub_dirty_pages(struct user_beancounter *ub)
+{
+	return 0;
+}
+
+static inline int ub_dirty_limits(long *pdirty, struct user_beancounter *ub)
+{
+	return 0;
+}
+
+#endif /* UBC_IO_ACCT */
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/io_prio.h kernel-2.6.18-417.el5-028stab121/include/ub/io_prio.h
--- kernel-2.6.18-417.el5.orig/include/ub/io_prio.h	2017-01-13 08:40:18.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/io_prio.h	2017-01-13 08:40:18.000000000 -0500
@@ -0,0 +1,80 @@
+/*
+ *  include/ub/io_prio.h
+ *
+ *  Copyright (C) 2007 SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  Vasily Tarasov <vtaras@openvz.org>
+ *
+ */
+
+#ifndef _UB_IO_PRIO_H
+#define _UB_IO_PRIO_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/cfq-iosched.h>
+
+#define UB_IOPRIO_MIN 0
+#define UB_IOPRIO_MAX IOPRIO_BE_NR
+#define UB_IOPRIO_BASE 4
+
+struct ub_iopriv {
+	struct list_head	cfq_bc_head;
+	rwlock_t		cfq_bc_list_lock;
+
+	unsigned int		ioprio;
+};
+
+#ifdef CONFIG_UBC_IO_PRIO
+extern void bc_init_ioprio(struct ub_iopriv *);
+extern void bc_fini_ioprio(struct ub_iopriv *);
+extern struct cfq_bc_data * bc_findcreate_cfq_bc(struct ub_iopriv *,
+					struct cfq_data *, gfp_t gfp_mask);
+extern void bc_cfq_exit_queue(struct cfq_data *);
+extern int bc_expired(struct cfq_data *);
+extern void bc_set_active(struct cfq_data *, struct cfq_bc_data *);
+extern void bc_schedule_active(struct cfq_data *);
+extern int bc_allow_preempt(struct cfq_data *, struct cfq_bc_data *);
+extern void  bc_inc_rqnum(struct cfq_queue *);
+extern void bc_dec_rqnum(struct cfq_queue *);
+extern unsigned long bc_set_ioprio(int, int);
+extern struct cfq_bc_data *
+__find_cfq_bc(struct ub_iopriv *iopriv, struct cfq_data *cfqd);
+#else
+static inline struct cfq_bc_data *
+bc_findcreate_cfq_bc(struct ub_iopriv *iopriv,
+			struct cfq_data *cfqd, gfp_t mask)
+{
+	return &cfqd->cfq_bc;
+}
+static inline void bc_cfq_exit_queue(struct cfq_data *cfqd) { ; }
+static inline int bc_expired(struct cfq_data *cfqd) { return 0; }
+static inline void bc_set_active(struct cfq_data *cfqd, struct cfq_bc_data *next_active)
+{
+}
+static inline void bc_schedule_active(struct cfq_data *cfqd)
+{
+	cfqd->active_cfq_bc = &cfqd->cfq_bc;
+}
+static inline int bc_allow_preempt(struct cfq_data *cfqd,
+		struct cfq_bc_data *cfq_bc)
+{
+	return 1;
+}
+static inline void bc_inc_rqnum(struct cfq_queue *cfqq) { ; }
+static inline void bc_dec_rqnum(struct cfq_queue *cfqq) { ; }
+static inline unsigned long bc_set_ioprio(int ubid, int ioprio)
+{
+	return -EINVAL;
+}
+static inline struct cfq_bc_data *
+__find_cfq_bc(struct ub_iopriv *iopriv, struct cfq_data *cfqd)
+{
+	return &cfqd->cfq_bc;
+}
+static inline struct user_beancounter *
+#endif /* CONFIG_UBC_IO_PRIO */
+#endif /* _UB_IO_PRIO_H */
diff -upr kernel-2.6.18-417.el5.orig/include/ub/proc.h kernel-2.6.18-417.el5-028stab121/include/ub/proc.h
--- kernel-2.6.18-417.el5.orig/include/ub/proc.h	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/proc.h	2017-01-13 08:40:18.000000000 -0500
@@ -0,0 +1,40 @@
+/*
+ *  include/ub/proc.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_PROC_H_
+#define __UB_PROC_H_
+
+#include <linux/seq_file.h>
+
+struct bc_proc_entry {
+	char *name;
+	union {
+		int (*show)(struct seq_file *, void *);
+		struct file_operations *fops;
+	} u;
+	struct bc_proc_entry *next;
+	int cookie;
+};
+
+struct user_beancounter;
+
+void bc_register_proc_entry(struct bc_proc_entry *);
+void bc_register_proc_root_entry(struct bc_proc_entry *);
+
+static inline struct user_beancounter *seq_beancounter(struct seq_file *f)
+{
+	return (struct user_beancounter *)(f->private);
+}
+
+extern const char *bc_proc_lu_fmt;
+extern const char *bc_proc_lu_lfmt;
+extern const char *bc_proc_llu_fmt;
+extern const char *bc_proc_lu_lu_fmt;
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_dcache.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_dcache.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_dcache.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_dcache.h	2017-01-13 08:40:17.000000000 -0500
@@ -0,0 +1,49 @@
+/*
+ *  include/ub/ub_dcache.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DCACHE_H_
+#define __UB_DCACHE_H_
+
+#include <ub/ub_decl.h>
+
+/*
+ * UB_DCACHESIZE accounting
+ */
+
+struct dentry_beancounter
+{
+	/*
+	 *  d_inuse =
+	 *         <number of external refs> +
+	 *         <number of 'used' childs>
+	 *
+	 * d_inuse == -1 means that dentry is unused
+	 * state change -1 => 0 causes charge
+	 * state change 0 => -1 causes uncharge
+	 */
+	atomic_t d_inuse;
+	/* charged size, including name length if name is not inline */
+	unsigned long d_ubsize;
+	struct user_beancounter *d_ub;
+};
+
+#ifdef CONFIG_USER_RESOURCE
+#define ub_dget_testone(d)  (atomic_inc_and_test(&(d)->dentry_bc.d_inuse))
+#define ub_dput_testzero(d) (atomic_add_negative(-1, &(d)->dentry_bc.d_inuse))
+#define INUSE_INIT		0
+
+extern int ub_dentry_on;
+extern void ub_dentry_checkup(void);
+#else
+#define ub_dget_testone(d)	(0)
+#define ub_dput_testzero(d)	(0)
+#define ub_dentry_checkup()	do { } while (0)
+#endif
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_dcache_op.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_dcache_op.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_dcache_op.h	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_dcache_op.h	2017-01-13 08:40:29.000000000 -0500
@@ -0,0 +1,102 @@
+/*
+ *  include/ub/ub_dcache_op.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DCACHE_OP_H_
+#define __UB_DCACHE_OP_H_
+
+struct dentry;
+
+#ifdef CONFIG_USER_RESOURCE
+
+#include <linux/spinlock.h>
+#include <ub/ub_dcache.h>
+#include <ub/ub_task.h>
+
+extern int ub_dentry_alloc_barrier;
+extern spinlock_t dcache_lock;
+
+static inline int ub_dentry_alloc(struct dentry *d)
+{
+	extern int __ub_dentry_alloc(struct dentry *);
+
+	if (!ub_dentry_on)
+		return 0;
+	return __ub_dentry_alloc(d);
+}
+
+static inline void ub_dentry_alloc_start(void)
+{
+	extern void __ub_dentry_alloc_start(void);
+
+	if (ub_dentry_alloc_barrier)
+		__ub_dentry_alloc_start();
+}
+
+static inline void ub_dentry_alloc_end(void)
+{
+	extern void __ub_dentry_alloc_end(void);
+
+	if (current->task_bc.dentry_alloc)
+		__ub_dentry_alloc_end();
+}
+
+static inline int ub_dentry_charge(struct dentry *d)
+{
+	extern int __ub_dentry_charge(struct dentry *);
+
+	if (!ub_dentry_on)
+		return 0;
+	return __ub_dentry_charge(d);
+}
+
+static inline void ub_dentry_charge_nofail(struct dentry *d)
+{
+	extern void __ub_dentry_charge_nofail(struct dentry *);
+
+	if (!ub_dentry_on)
+		return;
+	__ub_dentry_charge_nofail(d);
+}
+
+static inline void ub_dentry_uncharge_locked(struct dentry *d)
+{
+	extern void __ub_dentry_uncharge(struct dentry *);
+
+	if (!ub_dentry_on)
+		return;
+	__ub_dentry_uncharge(d);
+}
+
+static inline void ub_dentry_uncharge(struct dentry *d)
+{
+	extern void __ub_dentry_uncharge(struct dentry *);
+
+	if (!ub_dentry_on)
+		return;
+	spin_lock(&dcache_lock);
+	__ub_dentry_uncharge(d);
+	spin_unlock(&dcache_lock);
+}
+
+void uncharge_dcache(struct user_beancounter *ub, unsigned long size);
+#else /* CONFIG_USER_RESOURCE */
+
+static inline int ub_dentry_alloc(struct dentry *d) { return 0; }
+static inline void ub_dentry_alloc_start(void) { }
+static inline void ub_dentry_alloc_end(void) { }
+static inline int ub_dentry_charge(struct dentry *d) { return 0; }
+static inline void ub_dentry_charge_nofail(struct dentry *d) { }
+static inline void ub_dentry_uncharge_locked(struct dentry *d) { }
+static inline void ub_dentry_uncharge(struct dentry *d) { }
+static inline void uncharge_dcache(struct user_beancounter *ub, unsigned long size) { }
+
+#endif /* CONFIG_USER_RESOURCE */
+
+#endif /* __UB_DCACHE_OP_H_ */
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_debug.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_debug.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_debug.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_debug.h	2017-01-13 08:40:18.000000000 -0500
@@ -0,0 +1,106 @@
+/*
+ *  include/ub/ub_debug.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DEBUG_H_
+#define __UB_DEBUG_H_
+
+/*
+ * general debugging
+ */
+
+#define UBD_ALLOC	0x1
+#define UBD_CHARGE	0x2
+#define UBD_LIMIT	0x4
+#define UBD_TRACE	0x8
+
+/*
+ * ub_net debugging
+ */
+
+#define UBD_NET_SOCKET	0x10
+#define UBD_NET_SLEEP	0x20
+#define UBD_NET_SEND	0x40
+#define UBD_NET_RECV	0x80
+
+/*
+ * Main routines
+ */
+
+#define UB_DEBUG (0)
+#define DEBUG_RESOURCE (0ULL)
+
+#define ub_dbg_cond(__cond, __str, args...)				\
+	do { 								\
+		if ((__cond) != 0)					\
+			printk(__str, ##args);				\
+	} while(0)
+
+#define ub_debug(__section, __str, args...) 				\
+	ub_dbg_cond(UB_DEBUG & (__section), __str, ##args)
+
+#define ub_debug_resource(__resource, __str, args...)			\
+	ub_dbg_cond((UB_DEBUG & UBD_CHARGE) && 				\
+			(DEBUG_RESOURCE & (1 << (__resource))), 	\
+			__str, ##args)
+
+#if UB_DEBUG & UBD_TRACE
+#define ub_debug_trace(__cond, __b, __r)				\
+		do {							\
+			static struct ub_rate_info ri =	{ __b, __r };	\
+			if ((__cond) != 0 && ub_ratelimit(&ri))		\
+				dump_stack(); 				\
+		} while(0)
+#else
+#define ub_debug_trace(__cond, __burst, __rate)
+#endif
+
+#include <linux/config.h>
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+#include <linux/list.h>
+#include <linux/kmem_cache.h>
+
+struct user_beancounter;
+struct ub_cache_counter {
+	struct list_head ulist;
+	struct ub_cache_counter *next;
+	struct user_beancounter *ub;
+	kmem_cache_t *cachep;
+	unsigned long counter;
+};
+
+extern spinlock_t cc_lock;
+extern void init_cache_counters(void);
+extern void ub_free_counters(struct user_beancounter *);
+extern void ub_kmemcache_free(kmem_cache_t *cachep);
+
+struct vm_struct;
+#define inc_vmalloc_charged(vm, flags)	do {				\
+		if (flags & __GFP_UBC)					\
+			ub_percpu_add(get_exec_ub(), vmalloc_charged,	\
+					vm->nr_pages);			\
+	} while (0)
+#define dec_vmalloc_charged(vm)		do {				\
+		struct user_beancounter *ub;				\
+		ub = page_ub(vm->pages[0]);				\
+		if (ub != NULL)						\
+			ub_percpu_sub(ub, vmalloc_charged,		\
+					vm->nr_pages);			\
+	} while (0)
+#else
+#define init_cache_counters()		do { } while (0)
+#define inc_vmalloc_charged(vm, f)	do { } while (0)
+#define dec_vmalloc_charged(vm)		do { } while (0)
+
+#define ub_free_counters(ub)		do { } while (0)
+#define ub_kmemcache_free(cachep)	do { } while (0)
+#endif
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_decl.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_decl.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_decl.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_decl.h	2017-01-13 08:40:16.000000000 -0500
@@ -0,0 +1,42 @@
+/*
+ *  include/ub/ub_decl.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DECL_H_
+#define __UB_DECL_H_
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+
+/*
+ * Naming convension:
+ * ub_<section|object>_<operation>
+ */
+
+#ifdef CONFIG_USER_RESOURCE
+
+#define UB_DECLARE_FUNC(ret_type, decl)	extern ret_type decl;
+#define UB_DECLARE_VOID_FUNC(decl)	extern void decl;
+
+#else /* CONFIG_USER_RESOURCE */
+
+#define UB_DECLARE_FUNC(ret_type, decl)		\
+	static inline ret_type decl		\
+	{					\
+		return (ret_type)0;		\
+	}
+#define UB_DECLARE_VOID_FUNC(decl)		\
+	static inline void decl			\
+	{					\
+	}
+
+#endif /* CONFIG_USER_RESOURCE */
+#endif
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_hash.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_hash.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_hash.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_hash.h	2017-01-13 08:40:17.000000000 -0500
@@ -0,0 +1,36 @@
+/*
+ *  include/ub/ub_hash.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_UBHASH_H
+#define _LINUX_UBHASH_H
+
+#ifdef __KERNEL__
+
+#define UB_HASH_SIZE 256
+
+extern struct hlist_head ub_hash[];
+extern spinlock_t ub_hash_lock;
+extern struct list_head ub_list_head;
+
+#ifdef CONFIG_USER_RESOURCE
+
+/*
+ * Iterate over beancounters
+ * @__ubp - beancounter ptr
+ * Can use break :)
+ */
+#define for_each_beancounter(__ubp)				\
+	list_for_each_entry_rcu(__ubp, &ub_list_head, ub_list)	\
+
+#define bc_hash_entry(ptr) hlist_entry(ptr, struct user_beancounter, ub_hash)
+
+#endif /* CONFIG_USER_RESOURCE */
+#endif /* __KERNEL__ */
+#endif /* _LINUX_UBHASH_H */
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_mem.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_mem.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_mem.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_mem.h	2017-01-13 08:40:31.000000000 -0500
@@ -0,0 +1,118 @@
+/*
+ *  include/ub/ub_mem.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_SLAB_H_
+#define __UB_SLAB_H_
+
+#include <linux/config.h>
+#include <linux/kmem_slab.h>
+#include <ub/beancounter.h>
+#include <ub/ub_decl.h>
+
+/*
+ * UB_KMEMSIZE accounting
+ */
+
+#ifdef CONFIG_UBC_DEBUG_ITEMS
+#define CHARGE_ORDER(__o)		(1 << (__o))
+#define CHARGE_SIZE(__s)		1
+#else
+#define CHARGE_ORDER(__o)		(PAGE_SIZE << (__o))
+#define CHARGE_SIZE(__s)		(__s)
+#endif
+
+#define page_ub(__page)	((__page)->bc.page_ub)
+
+struct mm_struct;
+struct page;
+struct kmem_cache;
+
+UB_DECLARE_FUNC(struct user_beancounter *, slab_ub(void *obj))
+UB_DECLARE_FUNC(struct user_beancounter *, vmalloc_ub(void *obj))
+UB_DECLARE_FUNC(struct user_beancounter *, mem_ub(void *obj))
+
+UB_DECLARE_FUNC(int, ub_kmemsize_charge(struct user_beancounter *ub,
+		unsigned long size, enum ub_severity strict))
+UB_DECLARE_VOID_FUNC(ub_kmemsize_uncharge(struct user_beancounter *ub,
+		unsigned long size))
+
+UB_DECLARE_FUNC(int, ub_page_charge(struct page *page, int order, gfp_t mask))
+UB_DECLARE_VOID_FUNC(ub_page_uncharge(struct page *page, int order))
+UB_DECLARE_FUNC(int, ub_slab_charge(struct kmem_cache *cachep,
+			void *objp, gfp_t flags))
+UB_DECLARE_VOID_FUNC(ub_slab_uncharge(struct kmem_cache *cachep, void *obj))
+
+static inline int ub_page_table_charge(struct mm_struct *mm)
+{
+	if (likely(mm->page_table_precharge))
+		mm->page_table_precharge--;
+	else if (charge_beancounter(mm->mm_ub, UB_KMEMSIZE,
+				CHARGE_ORDER(0), UB_SOFT))
+		return -ENOMEM;
+	mm->page_table_charged++;
+	return 0;
+}
+
+static inline void ub_page_table_uncharge(struct mm_struct *mm)
+{
+	mm->page_table_precharge++;
+	mm->page_table_charged--;
+}
+
+static inline int ub_page_table_precharge(struct mm_struct *mm, long precharge)
+{
+	if (charge_beancounter(mm->mm_ub, UB_KMEMSIZE,
+				precharge * CHARGE_ORDER(0), UB_SOFT))
+		return -ENOMEM;
+	mm->page_table_precharge += precharge;
+	return 0;
+}
+
+static inline void ub_page_table_commit(struct mm_struct *mm)
+{
+	if (unlikely(mm->page_table_precharge)) {
+		uncharge_beancounter(mm->mm_ub, UB_KMEMSIZE,
+				mm->page_table_precharge * CHARGE_ORDER(0));
+		mm->page_table_precharge = 0;
+	}
+}
+
+#define slab_ubcs(cachep, slabp) ((struct user_beancounter **)\
+		(ALIGN((unsigned long)(slab_bufctl(slabp) + (cachep)->num),\
+		       sizeof(void *))))
+
+#ifdef CONFIG_USER_RESOURCE
+extern struct user_beancounter *ub_select_worst(long *);
+
+/* mm/slab.c needed stuff */
+#define UB_ALIGN(flags)		(flags & SLAB_UBC ? sizeof(void *) : 1)
+#define UB_EXTRA(flags)		(flags & SLAB_UBC ? sizeof(void *) : 0)
+#define set_cache_objuse(cachep)	do {				\
+		(cachep)->objuse = ((PAGE_SIZE << (cachep)->gfporder) +	\
+				(cachep)->num - 1) / (cachep)->num;	\
+		if (!OFF_SLAB(cachep))					\
+			break;						\
+		(cachep)->objuse += ((cachep)->slabp_cache->objuse +	\
+				(cachep)->num - 1) / (cachep)->num;	\
+	} while (0)
+#define init_slab_ubps(cachep, slabp)	do {				\
+		if (!((cachep)->flags & SLAB_UBC))			\
+			break;						\
+		memset(slab_ubcs(cachep, slabp), 0,			\
+				(cachep)->num * sizeof(void *));	\
+	} while (0)
+#define kmem_obj_memusage(o)	(virt_to_cache(o)->objuse)
+#else
+#define UB_ALIGN(flags)		1
+#define UB_EXTRA(flags)		0
+#define set_cache_objuse(c)	do { } while (0)
+#define init_slab_ubps(c, s)	do { } while (0)
+#endif
+#endif /* __UB_SLAB_H_ */
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_misc.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_misc.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_misc.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_misc.h	2017-01-13 08:40:17.000000000 -0500
@@ -0,0 +1,55 @@
+/*
+ *  include/ub/ub_misc.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_MISC_H_
+#define __UB_MISC_H_
+
+#include <ub/ub_decl.h>
+
+struct tty_struct;
+struct file;
+struct file_lock;
+struct sigqueue;
+
+UB_DECLARE_FUNC(int, ub_file_charge(struct file *f))
+UB_DECLARE_VOID_FUNC(ub_file_uncharge(struct file *f))
+UB_DECLARE_FUNC(int, ub_flock_charge(struct file_lock *fl, int hard))
+UB_DECLARE_VOID_FUNC(ub_flock_uncharge(struct file_lock *fl))
+UB_DECLARE_FUNC(int, ub_siginfo_charge(struct sigqueue *q,
+			struct user_beancounter *ub))
+UB_DECLARE_VOID_FUNC(ub_siginfo_uncharge(struct sigqueue *q))
+UB_DECLARE_FUNC(int, ub_task_charge(struct task_struct *parent,
+			struct task_struct *task))
+UB_DECLARE_VOID_FUNC(ub_task_uncharge(struct task_struct *task))
+UB_DECLARE_VOID_FUNC(ub_task_put(struct task_struct *task))
+UB_DECLARE_FUNC(int, ub_pty_charge(struct tty_struct *tty))
+UB_DECLARE_VOID_FUNC(ub_pty_uncharge(struct tty_struct *tty))
+
+#ifdef CONFIG_USER_RESOURCE
+#define set_flock_charged(fl)	do { (fl)->fl_charged = 1; } while (0)
+#define unset_flock_charged(fl)	do {		\
+		WARN_ON((fl)->fl_charged == 0);	\
+		(fl)->fl_charged = 0;		\
+	} while (0)
+#define set_mm_ub(mm, tsk)	do {				\
+		(mm)->mm_ub = get_beancounter(tsk ? 		\
+			tsk->task_bc.task_ub : get_exec_ub());	\
+	} while (0)
+#define put_mm_ub(mm)		do {				\
+		put_beancounter((mm)->mm_ub);			\
+		(mm)->mm_ub = NULL;				\
+	} while (0)
+#else
+#define set_flock_charged(fl)	do { } while (0)
+#define unset_flock_charged(fl)	do { } while (0)
+#define set_mm_ub(mm, tsk)	do { } while (0)
+#define put_mm_ub(mm)		do { } while (0)
+#endif
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_net.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_net.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_net.h	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_net.h	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,206 @@
+/*
+ *  include/ub/ub_net.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_NET_H_
+#define __UB_NET_H_
+
+/*
+ * UB_NUMXXXSOCK, UB_XXXBUF accounting
+ */
+
+#include <ub/ub_decl.h>
+#include <ub/ub_sk.h>
+#include <ub/beancounter.h>
+
+#define bid2sid(__bufid) \
+	((__bufid) == UB_TCPSNDBUF ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK)
+
+#define SOCK_MIN_UBCSPACE ((int)((2048 - sizeof(struct skb_shared_info)) & \
+			~(SMP_CACHE_BYTES-1)))
+#define SOCK_MIN_UBCSPACE_CH skb_charge_size(SOCK_MIN_UBCSPACE)
+
+static inline int ub_skb_alloc_bc(struct sk_buff *skb, gfp_t gfp_mask)
+{
+#ifdef CONFIG_USER_RESOURCE
+	memset(skb_bc(skb), 0, sizeof(struct skb_beancounter));
+#endif
+	return 0;
+}
+
+static inline void ub_skb_free_bc(struct sk_buff *skb)
+{
+}
+
+#define IS_TCP_SOCK(__family, __type) \
+		(((__family) == PF_INET || (__family) == PF_INET6) && (__type) == SOCK_STREAM)
+
+/* number of sockets */
+UB_DECLARE_FUNC(int, ub_sock_charge(struct sock *sk, int family, int type))
+UB_DECLARE_FUNC(int, ub_tcp_sock_charge(struct sock *sk)) 
+UB_DECLARE_FUNC(int, ub_other_sock_charge(struct sock *sk))
+UB_DECLARE_VOID_FUNC(ub_sock_uncharge(struct sock *sk))
+
+/* management of queue for send space */
+UB_DECLARE_FUNC(long, ub_sock_wait_for_space(struct sock *sk, long timeo, 
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_sock_snd_queue_add(struct sock *sk, int resource, 
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_sock_sndqueuedel(struct sock *sk))
+
+/* send space */
+UB_DECLARE_FUNC(int, ub_sock_make_wreserv(struct sock *sk, int bufid,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_sock_get_wreserv(struct sock *sk, int bufid,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_sock_ret_wreserv(struct sock *sk, int bufid,
+			unsigned long size, unsigned long ressize))
+UB_DECLARE_FUNC(int, ub_sock_tcp_chargesend(struct sock *sk,
+			struct sk_buff *skb, enum ub_severity strict))
+UB_DECLARE_FUNC(int, ub_sock_tcp_chargepage(struct sock *sk))
+UB_DECLARE_VOID_FUNC(ub_sock_tcp_detachpage(struct sock *sk))
+
+UB_DECLARE_FUNC(int, ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk))
+
+/* receive space */
+UB_DECLARE_FUNC(int, ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb))
+UB_DECLARE_FUNC(int, ub_sock_tcp_chargerecv(struct sock *sk,
+			struct sk_buff *skb, enum ub_severity strict))
+
+/* skb destructor */
+UB_DECLARE_VOID_FUNC(ub_skb_uncharge(struct sk_buff *skb))
+
+static inline int ub_sock_makewres_other(struct sock *sk, unsigned long size)
+{
+	return ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size);
+}
+
+static inline int ub_sock_makewres_tcp(struct sock *sk, unsigned long size)
+{
+	return ub_sock_make_wreserv(sk, UB_TCPSNDBUF, size);
+}
+
+UB_DECLARE_FUNC(int, ub_sock_getwres_other(struct sock *sk,
+			unsigned long size))
+
+static inline int ub_sock_getwres_tcp(struct sock *sk, unsigned long size)
+{
+	return ub_sock_get_wreserv(sk, UB_TCPSNDBUF, size);
+}
+
+UB_DECLARE_VOID_FUNC(ub_sock_retwres_other(struct sock *sk,
+			unsigned long size, unsigned long ressize))
+
+static inline void ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
+		unsigned long ressize)
+{
+	ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, ressize);
+}
+
+static inline int ub_sock_sndqueueadd_other(struct sock *sk, unsigned long sz)
+{
+	return ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, sz);
+}
+
+static inline int ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz)
+{
+	return ub_sock_snd_queue_add(sk, UB_TCPSNDBUF, sz);
+}
+
+static inline int ub_tcpsndbuf_charge(struct sock *sk,
+		struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargesend(sk, skb, UB_HARD);
+}
+
+static inline int ub_tcpsndbuf_charge_forced(struct sock *sk,
+		struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargesend(sk, skb, UB_FORCE);
+}
+
+static inline int ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargerecv(sk, skb, UB_SOFT);
+}
+
+static inline int ub_tcprcvbuf_charge_forced(struct sock *sk,
+		struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargerecv(sk, skb, UB_FORCE);
+}
+
+/* Charge size */
+static inline unsigned long skb_charge_datalen(unsigned long chargesize)
+{
+#ifdef CONFIG_USER_RESOURCE
+	unsigned long slabsize;
+
+	chargesize -= sizeof(struct sk_buff);
+	slabsize = rounddown_pow_of_two(chargesize);
+
+	return (slabsize - sizeof(struct skb_shared_info)) &
+		~(SMP_CACHE_BYTES-1);
+#else
+	return 0;
+#endif
+}
+
+static inline unsigned long skb_charge_size_gen(unsigned long size)
+{ 
+#ifdef CONFIG_USER_RESOURCE
+	unsigned long slabsize;
+
+	size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
+	slabsize = roundup_pow_of_two(size);
+
+	return slabsize + sizeof(struct sk_buff);
+#else
+	return 0;
+#endif
+
+}
+	
+static inline unsigned long skb_charge_size_const(unsigned long size)
+{
+#ifdef CONFIG_USER_RESOURCE
+	unsigned int ret;
+	if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 64)
+		ret = 64 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 128)
+		ret = 128 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 256)
+		ret = 256 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 512)
+		ret = 512 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 1024)
+		ret = 1024 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 2048)
+		ret = 2048 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 4096)
+		ret = 4096 + sizeof(struct sk_buff);
+	else
+		ret = skb_charge_size_gen(size);
+	return ret;
+#else
+	return 0;
+#endif
+}
+
+
+#define skb_charge_size(__size)			\
+	(__builtin_constant_p(__size)	?	\
+	 skb_charge_size_const(__size)	:	\
+	 skb_charge_size_gen(__size))
+
+UB_DECLARE_FUNC(int, skb_charge_fullsize(struct sk_buff *skb))
+UB_DECLARE_VOID_FUNC(ub_skb_set_charge(struct sk_buff *skb, 
+			struct sock *sk, unsigned long size, int res))
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_oom.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_oom.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_oom.h	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_oom.h	2017-01-13 08:40:17.000000000 -0500
@@ -0,0 +1,26 @@
+#include <ub/ub_decl.h>
+#include <ub/ub_task.h>
+
+UB_DECLARE_FUNC(int, ub_oom_lock(void))
+UB_DECLARE_FUNC(struct user_beancounter *, ub_oom_select_worst(void))
+UB_DECLARE_VOID_FUNC(ub_oom_mm_killed(struct user_beancounter *ub))
+UB_DECLARE_VOID_FUNC(ub_oom_unlock(void))
+UB_DECLARE_VOID_FUNC(ub_out_of_memory(struct user_beancounter *ub))
+UB_DECLARE_VOID_FUNC(ub_oom_task_dead(struct task_struct *tsk))
+UB_DECLARE_FUNC(int, ub_oom_task_skip(struct user_beancounter *ub,
+			struct task_struct *tsk))
+
+#ifdef CONFIG_USER_RESOURCE
+extern int oom_generation;
+extern int oom_kill_counter;
+#define ub_oom_start() do {						\
+		current->task_bc.oom_generation = oom_generation;	\
+	} while (0)
+#define ub_oom_task_killed(p) do { 					\
+		oom_kill_counter++;					\
+		wake_up_process(p);					\
+	} while (0)
+#else
+#define ub_oom_start()			do { } while (0)
+#define ub_oom_task_killed(p)		do { } while (0)
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_orphan.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_orphan.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_orphan.h	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_orphan.h	2017-01-13 08:40:17.000000000 -0500
@@ -0,0 +1,106 @@
+/*
+ *  include/ub/ub_orphan.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_ORPHAN_H_
+#define __UB_ORPHAN_H_
+
+#include <net/tcp.h>
+
+#include "ub/beancounter.h"
+#include "ub/ub_net.h"
+
+
+static inline atomic_t *__ub_get_orphan_count_ptr(struct sock *sk)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return &sock_bc(sk)->ub->ub_orphan_count;
+#endif
+	return sk->sk_prot->orphan_count;
+}
+
+static inline void ub_inc_orphan_count(struct sock *sk)
+{
+	atomic_inc(__ub_get_orphan_count_ptr(sk));
+}
+
+static inline void ub_dec_orphan_count(struct sock *sk)
+{
+	atomic_dec(__ub_get_orphan_count_ptr(sk));
+}
+
+static inline int ub_get_orphan_count(struct sock *sk)
+{
+	return atomic_read(__ub_get_orphan_count_ptr(sk));
+}
+
+extern int __ub_too_many_orphans(struct sock *sk, int count);
+static inline int ub_too_many_orphans(struct sock *sk, int count)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (__ub_too_many_orphans(sk, count))
+		return 1;
+#endif
+	return (ub_get_orphan_count(sk) > sysctl_tcp_max_orphans ||
+		(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+		 atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]));
+}
+
+#include <ub/ub_mem.h>
+#include <linux/kmem_cache.h>
+
+struct inet_timewait_sock;
+
+static inline void ub_timewait_mod(struct inet_timewait_sock *tw, int incdec)
+{
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *ub;
+
+	ub = slab_ub(tw);
+	if (ub != NULL)
+		ub->ub_tw_count += incdec;
+#endif
+}
+
+static inline int __ub_timewait_check(struct sock *sk)
+{
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *ub;
+	unsigned long mem_max, mem;
+	int tw_count;
+
+	ub = sock_bc(sk)->ub;
+	if (ub == NULL)
+		return 1;
+
+	tw_count = ub->ub_tw_count;
+	mem_max = sysctl_tcp_max_tw_kmem_fraction *
+		((ub->ub_parms[UB_KMEMSIZE].limit >> 10) + 1);
+	mem = tw_count * sk->sk_prot_creator->twsk_prot->twsk_slab->objuse;
+	return tw_count < sysctl_tcp_max_tw_buckets_ub && mem < mem_max;
+#else
+	return 1;
+#endif
+}
+
+#define ub_timewait_inc(tw, twdr) do {			\
+		if ((twdr)->ub_managed)			\
+			ub_timewait_mod(tw, 1);		\
+	} while (0)
+
+#define ub_timewait_dec(tw, twdr) do {			\
+		if ((twdr)->ub_managed)			\
+			ub_timewait_mod(tw, -1);	\
+	} while (0)
+
+#define ub_timewait_check(sk, twdr) ((!(twdr)->ub_managed) || \
+					__ub_timewait_check(sk))
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_page.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_page.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_page.h	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_page.h	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,59 @@
+/*
+ *  include/ub/ub_page.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_PAGE_H_
+#define __UB_PAGE_H_
+
+#include <linux/config.h>
+
+/*
+ * Page_beancounters
+ */
+
+struct page;
+struct user_beancounter;
+
+#define PB_MAGIC 0x62700001UL
+
+struct page_beancounter {
+	unsigned long pb_magic;
+	struct page *page;
+	struct user_beancounter *ub;
+	struct page_beancounter *next_hash;
+	atomic_t refcnt;
+	unsigned shift;
+	union {
+		struct list_head page_list;
+		struct rcu_head rcu;
+	};
+};
+
+#if 0
+#define PB_REFCOUNT_BITS 24
+#define PB_SHIFT_GET(c) ((c) >> PB_REFCOUNT_BITS)
+#define PB_SHIFT_INC(c) ((c) += (1 << PB_REFCOUNT_BITS))
+#define PB_SHIFT_DEC(c) ((c) -= (1 << PB_REFCOUNT_BITS))
+#define PB_COUNT_GET(c) ((c) & ((1 << PB_REFCOUNT_BITS) - 1))
+#define PB_COUNT_INC(c) ((c)++)
+#define PB_COUNT_DEC(c) ((c)--)
+#define PB_REFCOUNT_MAKE(s, c) (((s) << PB_REFCOUNT_BITS) + (c))
+#endif
+
+#define page_pbc(__page)        ((__page)->bc.page_pb)
+
+extern spinlock_t pb_lock;
+
+struct address_space;
+extern int is_shmem_mapping(struct address_space *);
+
+struct mm_struct;
+int ub_migrate_mm(struct mm_struct *, struct user_beancounter *);
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_sk.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_sk.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_sk.h	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_sk.h	2017-01-13 08:40:17.000000000 -0500
@@ -0,0 +1,48 @@
+/*
+ *  include/ub/ub_sk.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_SK_H_
+#define __UB_SK_H_
+
+#include <linux/config.h>
+#include <ub/ub_task.h>
+
+struct sock;
+struct sk_buff;
+
+struct skb_beancounter {
+	struct user_beancounter *ub;
+	unsigned long charged:27, resource:5;
+};
+
+struct sock_beancounter {
+	struct user_beancounter *ub;
+	/*
+	 * poll_reserv accounts space already charged for future sends.
+	 * It is required to make poll agree with sendmsg.
+	 * Additionally, it makes real charges (with taking bc spinlock)
+	 * in the send path rarer, speeding networking up.
+	 * For TCP (only): changes are protected by socket lock (not bc!)
+	 * For all proto: may be read without serialization in poll.
+	 */
+	unsigned long           poll_reserv;
+	unsigned long		forw_space;
+	/* fields below are protected by bc spinlock */
+	unsigned long           ub_waitspc;     /* space waiting for */
+	unsigned long           ub_wcharged;
+	struct list_head        ub_sock_list;
+};
+
+#define sock_bc(__sk)		(&(__sk)->sk_bc)
+#define skb_bc(__skb)		(&(__skb)->skb_bc)
+#define skbc_sock(__skbc)	(container_of(__skbc, struct sock, sk_bc))
+#define sock_has_ubc(__sk)	(sock_bc(__sk)->ub != NULL)
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_stat.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_stat.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_stat.h	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_stat.h	2017-01-13 08:40:17.000000000 -0500
@@ -0,0 +1,70 @@
+/*
+ *  include/ub/ub_stat.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_STAT_H_
+#define __UB_STAT_H_
+
+/* sys_ubstat commands list */
+#define UBSTAT_READ_ONE			0x010000
+#define UBSTAT_READ_ALL			0x020000
+#define UBSTAT_READ_FULL		0x030000
+#define UBSTAT_UBLIST			0x040000
+#define UBSTAT_UBPARMNUM		0x050000
+#define UBSTAT_GETTIME			0x060000
+
+#define UBSTAT_CMD(func)		((func) & 0xF0000)
+#define UBSTAT_PARMID(func)		((func) & 0x0FFFF)
+
+#define TIME_MAX_SEC		(LONG_MAX / HZ)
+#define TIME_MAX_JIF		(TIME_MAX_SEC * HZ)
+
+typedef unsigned long ubstattime_t;
+
+typedef struct {
+	ubstattime_t	start_time;
+	ubstattime_t	end_time;
+	ubstattime_t	cur_time;
+} ubgettime_t;
+
+typedef struct {
+	long		maxinterval;
+	int		signum;
+} ubnotifrq_t;
+
+typedef struct {
+	unsigned long	maxheld;
+	unsigned long	failcnt;
+} ubstatparm_t;
+
+typedef struct {
+	unsigned long	barrier;
+	unsigned long	limit;
+	unsigned long	held;
+	unsigned long	maxheld;
+	unsigned long	minheld;
+	unsigned long	failcnt;
+	unsigned long __unused1;
+	unsigned long __unused2;
+} ubstatparmf_t;
+
+typedef struct {
+	ubstattime_t	start_time;
+	ubstattime_t	end_time;
+	ubstatparmf_t	param[0];
+} ubstatfull_t;
+
+#ifdef __KERNEL__
+struct ub_stat_notify {
+	struct list_head	list;
+	struct task_struct	*task;
+	int			signum;
+};
+#endif
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_task.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_task.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_task.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_task.h	2017-01-13 08:40:17.000000000 -0500
@@ -0,0 +1,69 @@
+/*
+ *  include/ub/ub_task.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_TASK_H_
+#define __UB_TASK_H_
+
+struct user_beancounter;
+
+
+#ifdef CONFIG_USER_RESOURCE
+struct task_beancounter {
+	struct user_beancounter	*exec_ub;
+	struct user_beancounter *saved_ub;
+	struct user_beancounter	*task_ub;
+	struct user_beancounter *fork_sub;
+	unsigned long file_precharged, file_quant, file_count;
+	unsigned long kmem_precharged;
+	char dentry_alloc, pgfault_handle;
+	void *task_fnode, *task_freserv;
+	unsigned long oom_generation;
+	unsigned long task_data[4];
+	unsigned long pgfault_allot;
+};
+
+#define get_task_ub(__task)	((__task)->task_bc.task_ub)
+
+extern struct user_beancounter ub0;
+#define get_ub0()	(&ub0)
+
+#define ub_save_context(t)	do {				\
+		t->task_bc.saved_ub = t->task_bc.exec_ub;	\
+		t->task_bc.exec_ub = get_ub0();			\
+	} while (0)
+#define ub_restore_context(t)	do {				\
+		t->task_bc.exec_ub = t->task_bc.saved_ub;	\
+	} while (0)
+
+#define get_exec_ub()		(current->task_bc.exec_ub)
+#define set_exec_ub(__newub)		\
+({					\
+	struct user_beancounter *old;	\
+	struct task_beancounter *tbc;	\
+ 					\
+	tbc = &current->task_bc;	\
+	old = tbc->exec_ub;		\
+	tbc->exec_ub = __newub;		\
+	old;				\
+})
+
+void ub_init_task_bc(struct task_beancounter *);
+
+#else /* CONFIG_USER_RESOURCE */
+
+#define get_ub0()		(NULL)
+#define get_exec_ub()		(NULL)
+#define get_task_ub(task)	(NULL)
+#define set_exec_ub(__ub)	(NULL)
+#define ub_save_context(t)	do { } while (0)
+#define ub_restore_context(t)	do { } while (0)
+
+#endif /* CONFIG_USER_RESOURCE */
+#endif /* __UB_TASK_H_ */
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_tcp.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_tcp.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_tcp.h	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_tcp.h	2017-01-13 08:40:17.000000000 -0500
@@ -0,0 +1,76 @@
+/*
+ *  include/ub/ub_tcp.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_TCP_H_
+#define __UB_TCP_H_
+
+/*
+ * UB_NUMXXXSOCK, UB_XXXBUF accounting
+ */
+
+#include <ub/ub_sk.h>
+#include <ub/beancounter.h>
+
+static inline void ub_tcp_update_maxadvmss(struct sock *sk)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (!sock_has_ubc(sk))
+		return;
+	if (sock_bc(sk)->ub->ub_maxadvmss >= tcp_sk(sk)->advmss)
+		return;
+
+	sock_bc(sk)->ub->ub_maxadvmss =
+		skb_charge_size(MAX_HEADER + sizeof(struct iphdr)
+				+ sizeof(struct tcphdr)	+ tcp_sk(sk)->advmss);
+#endif
+}
+
+static inline int ub_tcp_rmem_allows_expand(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 0;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk)) {
+		struct user_beancounter *ub;
+
+		ub = sock_bc(sk)->ub;
+		if (ub->ub_rmem_pressure == UB_RMEM_EXPAND)
+			return 1;
+		if (ub->ub_rmem_pressure == UB_RMEM_SHRINK)
+			return 0;
+		return sk->sk_rcvbuf <= ub->ub_rmem_thres;
+	}
+#endif
+	return 1;
+}
+
+static inline int ub_tcp_memory_pressure(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return sock_bc(sk)->ub->ub_rmem_pressure != UB_RMEM_EXPAND;
+#endif
+	return 0;
+}
+
+static inline int ub_tcp_shrink_rcvbuf(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return sock_bc(sk)->ub->ub_rmem_pressure == UB_RMEM_SHRINK;
+#endif
+	return 0;
+}
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/include/ub/ub_vmpages.h kernel-2.6.18-417.el5-028stab121/include/ub/ub_vmpages.h
--- kernel-2.6.18-417.el5.orig/include/ub/ub_vmpages.h	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/include/ub/ub_vmpages.h	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,171 @@
+/*
+ *  include/ub/ub_vmpages.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_PAGES_H_
+#define __UB_PAGES_H_
+
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <ub/beancounter.h>
+#include <ub/ub_decl.h>
+
+/*
+ * Check whether vma has private or copy-on-write mapping.
+ * Should match checks in ub_protected_charge().
+ */
+#define VM_UB_PRIVATE(__flags, __file)					\
+		( ((__flags) & VM_WRITE) ?				\
+			(__file) == NULL || !((__flags) & VM_SHARED) :	\
+			0						\
+		)
+
+/* Mprotect charging result */
+#define PRIVVM_ERROR		-1
+#define PRIVVM_NO_CHARGE	 0 /* UB_DECLARE_FUNC retval with ubc off */
+#define PRIVVM_TO_PRIVATE	 1
+#define PRIVVM_TO_SHARED	 2
+
+UB_DECLARE_FUNC(int, ub_protected_charge(struct mm_struct *mm,
+			unsigned long size,
+			unsigned long newflags,
+			struct vm_area_struct *vma))
+
+UB_DECLARE_VOID_FUNC(__ub_unused_privvm_inc(struct mm_struct *mm,
+			unsigned long num))
+UB_DECLARE_VOID_FUNC(ub_unused_privvm_add(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			unsigned long num))
+#define ub_unused_privvm_inc(mm, vma)	ub_unused_privvm_add(mm, vma, 1)
+UB_DECLARE_VOID_FUNC(ub_unused_privvm_sub(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			unsigned long num))
+#define ub_unused_privvm_dec(mm, vma)	ub_unused_privvm_sub(mm, vma, 1)
+
+UB_DECLARE_VOID_FUNC(__ub_unused_privvm_dec(struct mm_struct *mm,
+			long sz))
+
+UB_DECLARE_FUNC(int, ub_memory_charge(struct mm_struct *mm,
+			unsigned long size,
+			unsigned vm_flags,
+			struct file *vm_file,
+			int strict))
+UB_DECLARE_VOID_FUNC(ub_memory_uncharge(struct mm_struct *mm,
+			unsigned long size,
+			unsigned vm_flags,
+			struct file *vm_file))
+
+struct shmem_inode_info;
+UB_DECLARE_FUNC(int, ub_shmpages_charge(struct shmem_inode_info *i,
+			unsigned long sz))
+UB_DECLARE_VOID_FUNC(ub_shmpages_uncharge(struct shmem_inode_info *i,
+			unsigned long sz))
+UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_inc(struct shmem_inode_info *shi))
+UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
+			unsigned long size))
+#define ub_tmpfs_respages_dec(shi)	ub_tmpfs_respages_sub(shi, 1)
+
+#ifdef CONFIG_USER_RESOURCE
+#define shmi_ub_set(shi, ub)	do {			\
+		(shi)->shmi_ub = get_beancounter(ub);	\
+	} while (0)
+#define shmi_ub_put(shi)	do {			\
+		put_beancounter((shi)->shmi_ub);	\
+		(shi)->shmi_ub = NULL;			\
+	} while (0)
+#else
+#define shmi_ub_set(shi, ub)	do { } while (0)
+#define shmi_ub_put(shi)	do { } while (0)
+#endif
+
+UB_DECLARE_FUNC(int, ub_locked_charge(struct mm_struct *mm,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_locked_uncharge(struct mm_struct *mm,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_lockedshm_charge(struct shmem_inode_info *shi,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_lockedshm_uncharge(struct shmem_inode_info *shi,
+			unsigned long size))
+
+UB_DECLARE_FUNC(unsigned long, pages_in_vma_range(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long end))
+#define pages_in_vma(vma)	(pages_in_vma_range(vma, \
+			vma->vm_start, vma->vm_end))
+
+#define UB_PAGE_WEIGHT_SHIFT 24
+#define UB_PAGE_WEIGHT (1 << UB_PAGE_WEIGHT_SHIFT)
+
+/* call it under ub->ub_lock */
+static inline long ub_oomguarpages(struct user_beancounter *ub)
+{
+	long long held_pages;
+	int cpu;
+
+	held_pages = ub->ub_held_pages;
+	for_each_possible_cpu(cpu)
+		held_pages += ub_percpu(ub, cpu)->held_pages;
+	held_pages = max(0ll, held_pages);
+
+	return (held_pages >> UB_PAGE_WEIGHT_SHIFT) +
+		ub->ub_parms[UB_SWAPPAGES].held +
+		ub->ub_tmpfs_respages;
+}
+
+struct page_beancounter;
+#define PBC_COPY_SAME	((struct page_beancounter *) 1)
+
+/* Mprotect charging result */
+#define PRIVVM_ERROR		-1
+#define PRIVVM_NO_CHARGE	0
+#define PRIVVM_TO_PRIVATE	1
+#define PRIVVM_TO_SHARED	2
+
+extern void fastcall __ub_update_physpages(struct user_beancounter *ub);
+extern void fastcall __ub_update_oomguarpages(struct user_beancounter *ub);
+extern void fastcall __ub_update_privvm(struct user_beancounter *ub);
+
+#ifdef CONFIG_USER_RSS_ACCOUNTING
+#define PB_DECLARE_FUNC(ret, decl)	UB_DECLARE_FUNC(ret, decl)
+#define PB_DECLARE_VOID_FUNC(decl)	UB_DECLARE_VOID_FUNC(decl)
+#else
+#define PB_DECLARE_FUNC(ret, decl)	static inline ret decl {return (ret)0;}
+#define PB_DECLARE_VOID_FUNC(decl)	static inline void decl { }
+#endif
+
+PB_DECLARE_FUNC(int, pb_alloc(struct page_beancounter **pbc))
+PB_DECLARE_FUNC(int, pb_alloc_list(struct page_beancounter **pbc, int num))
+PB_DECLARE_FUNC(int, pb_alloc_all(struct page_beancounter **pbc))
+PB_DECLARE_VOID_FUNC(pb_add_ref(struct page *page,
+			struct mm_struct *mm,
+			struct page_beancounter **pbc))
+PB_DECLARE_VOID_FUNC(pb_dup_ref(struct page *page, 
+			struct mm_struct *mm, 
+			struct page_beancounter **pbc))
+PB_DECLARE_VOID_FUNC(pb_free_list(struct page_beancounter **pb))
+PB_DECLARE_VOID_FUNC(pb_free(struct page_beancounter **pb))
+PB_DECLARE_VOID_FUNC(pb_remove_ref(struct page *page, 
+			struct mm_struct *mm))
+
+PB_DECLARE_FUNC(struct user_beancounter *, pb_grab_page_ub(struct page *page))
+#endif
+
+#ifdef CONFIG_USER_SWAP_ACCOUNTING
+#define SWP_DECLARE_FUNC(ret, decl)	UB_DECLARE_FUNC(ret, decl)
+#define SWP_DECLARE_VOID_FUNC(decl)	UB_DECLARE_VOID_FUNC(decl)
+#else
+#define SWP_DECLARE_FUNC(ret, decl)	static inline ret decl {return (ret)0;}
+#define SWP_DECLARE_VOID_FUNC(decl)	static inline void decl { }
+#endif
+
+struct swap_info_struct;
+SWP_DECLARE_FUNC(int, ub_swap_init(struct swap_info_struct *si, pgoff_t n))
+SWP_DECLARE_VOID_FUNC(ub_swap_fini(struct swap_info_struct *si))
+SWP_DECLARE_VOID_FUNC(ub_swapentry_inc(struct swap_info_struct *si, pgoff_t n,
+			struct user_beancounter *ub))
+SWP_DECLARE_VOID_FUNC(ub_swapentry_dec(struct swap_info_struct *si, pgoff_t n))
diff -upr kernel-2.6.18-417.el5.orig/init/calibrate.c kernel-2.6.18-417.el5-028stab121/init/calibrate.c
--- kernel-2.6.18-417.el5.orig/init/calibrate.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/init/calibrate.c	2017-01-13 08:40:20.000000000 -0500
@@ -8,6 +8,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/smp.h>
+#include <linux/module.h>
 
 #include <asm/timex.h>
 
@@ -112,6 +113,60 @@ static unsigned long __devinit calibrate
 static unsigned long __devinit calibrate_delay_direct(void) {return 0;}
 #endif
 
+unsigned long cycles_per_jiffy, cycles_per_clock;
+
+static __devinit void calibrate_cycles(void)
+{
+	unsigned long ticks;
+	cycles_t time;
+
+	ticks = jiffies;
+	while (ticks == jiffies)
+		/* nothing */;
+	time = get_cycles();
+	ticks = jiffies;
+	while (ticks == jiffies)
+		/* nothing */;
+
+	time = get_cycles() - time;
+	cycles_per_jiffy = time;
+	if ((time >> 32) != 0) {
+		printk("CPU too fast! timings are incorrect\n");
+		cycles_per_jiffy = -1;
+	}
+}
+
+EXPORT_SYMBOL(cycles_per_jiffy);
+EXPORT_SYMBOL(cycles_per_clock);
+
+static __devinit void calc_cycles_per_jiffy(void)
+{
+#if 0
+	extern unsigned long fast_gettimeoffset_quotient;
+	unsigned long low, high;
+
+	if (fast_gettimeoffset_quotient != 0) {
+		__asm__("divl %2"
+				:"=a" (low), "=d" (high)
+				:"r" (fast_gettimeoffset_quotient),
+				"0" (0), "1" (1000000/HZ));
+
+		cycles_per_jiffy = low;
+	}
+#endif
+	if (cycles_per_jiffy == 0)
+		calibrate_cycles();
+
+	if (cycles_per_jiffy == 0) {
+		printk(KERN_WARNING "Cycles are stuck! "
+				"Some statistics will not be available.");
+		/* to prevent division by zero in cycles_to_(clocks|jiffies) */
+		cycles_per_jiffy = 1;
+		cycles_per_clock = 1;
+	} else
+		cycles_per_clock = cycles_per_jiffy * (HZ / CLOCKS_PER_SEC);
+}
+
 /*
  * This is the number of bits of precision for the loops_per_jiffy.  Each
  * bit takes on average 1.5/HZ seconds.  This (like the original) is a little
@@ -174,6 +229,9 @@ void __devinit calibrate_delay(void)
 				loops_per_jiffy &= ~loopbit;
 		}
 	}
+
+	calc_cycles_per_jiffy();
+
 	printk(KERN_CONT "%lu.%02lu BogoMIPS (lpj=%lu)\n",
 			loops_per_jiffy/(500000/HZ),
 			(loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
diff -upr kernel-2.6.18-417.el5.orig/init/Kconfig kernel-2.6.18-417.el5-028stab121/init/Kconfig
--- kernel-2.6.18-417.el5.orig/init/Kconfig	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/init/Kconfig	2017-01-13 08:40:40.000000000 -0500
@@ -116,6 +116,15 @@ config SYSVIPC
 	  section 6.4 of the Linux Programmer's Guide, available from
 	  <http://www.tldp.org/guides.html>.
 
+config IPC_NS
+	bool "IPC Namespaces"
+	depends on SYSVIPC
+	default n
+	help
+	  Support ipc namespaces.  This allows containers, i.e. virtual
+	  environments, to use ipc namespaces to provide different ipc
+	  objects for different servers.  If unsure, say N.
+
 config POSIX_MQUEUE
 	bool "POSIX Message Queues"
 	depends on NET && EXPERIMENTAL
@@ -191,6 +200,14 @@ config TASK_IO_ACCOUNTING
 
 	  Say N if unsure.
 
+config UTS_NS
+	bool "UTS Namespaces"
+	default n
+	help
+	  Support uts namespaces.  This allows containers, i.e.
+	  vservers, to use uts namespaces to provide different
+	  uts info for different servers.  If unsure, say N.
+
 config AUDIT
 	bool "Auditing support"
 	depends on NET
@@ -411,6 +428,16 @@ config EVENTFD
 
 	  If unsure, say Y.
 
+config SIGNALFD
+	bool "Enable signalfd() system call" if EMBEDDED
+	select ANON_INODES
+	default y
+	help
+	  Enable the signalfd() system call that allows to receive signals
+	  on a file descriptor.
+
+	  If unsure, say Y.
+
 config SHMEM
 	bool "Use full shmem filesystem" if EMBEDDED
 	default y
diff -upr kernel-2.6.18-417.el5.orig/init/main.c kernel-2.6.18-417.el5-028stab121/init/main.c
--- kernel-2.6.18-417.el5.orig/init/main.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/init/main.c	2017-01-13 08:40:41.000000000 -0500
@@ -52,6 +52,8 @@
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
 
+#include <ub/beancounter.h>
+
 #include <asm/io.h>
 #include <asm/bugs.h>
 #include <asm/setup.h>
@@ -83,6 +85,7 @@ extern void mca_init(void);
 extern void sbus_init(void);
 extern void sysctl_init(void);
 extern void signals_init(void);
+extern void fairsched_init_late(void);
 extern void pidhash_init(void);
 extern void pidmap_init(void);
 extern void prio_tree_init(void);
@@ -103,10 +106,31 @@ static inline void mark_rodata_ro(void) 
 #ifdef CONFIG_TC
 extern void tc_init(void);
 #endif
+extern void grsecurity_init(void);
 
 enum system_states system_state;
 EXPORT_SYMBOL(system_state);
 
+#ifdef CONFIG_VE
+extern void init_ve_system(void);
+extern void init_ve0(void);
+extern void prepare_ve0_process(struct task_struct *tsk);
+extern void prepare_ve0_proc_root(void);
+extern void prepare_ve0_sysctl(void);
+#else
+#define init_ve_system()		do { } while (0)
+#define init_ve0()			do { } while (0)
+#define prepare_ve0_process(tsk)	do { } while (0)
+#define prepare_ve0_proc_root()		do { } while (0)
+#define prepare_ve0_sysctl()		do { } while (0)
+#endif
+
+#if defined(CONFIG_VE) && defined(CONFIG_NET)
+extern void prepare_ve0_loopback(void);
+#else
+#define prepare_ve0_loopback()		do { } while (0)
+#endif
+
 /*
  * Boot command-line arguments
  */
@@ -495,6 +519,9 @@ asmlinkage void __init start_kernel(void
 
 	smp_setup_processor_id();
 
+	prepare_ve0_process(&init_task);
+	init_ve0();
+
 	/*
 	 * Need to run as early as possible, to initialize the
 	 * lockdep hash:
@@ -511,6 +538,7 @@ asmlinkage void __init start_kernel(void
  * enable them
  */
 	lock_kernel();
+	ub_init_early();
 	boot_cpu_init();
 	page_address_init();
 	printk(KERN_NOTICE);
@@ -598,6 +626,7 @@ asmlinkage void __init start_kernel(void
 #endif
 	fork_init(totalram_pages);
 	proc_caches_init();
+	ub_init_late();
 	buffer_init();
 	unnamed_dev_init();
 	key_init();
@@ -608,6 +637,8 @@ asmlinkage void __init start_kernel(void
 	/* rootfs populating might need page-writeback */
 	page_writeback_init();
 #ifdef CONFIG_PROC_FS
+	prepare_ve0_proc_root();
+	prepare_ve0_sysctl();
 	proc_root_init();
 #endif
 	cpuset_init();
@@ -618,6 +649,10 @@ asmlinkage void __init start_kernel(void
 
 	acpi_early_init(); /* before LAPIC and SMP init */
 
+#ifdef CONFIG_USER_RSS_ACCOUNTING
+	ub_init_pbc();
+#endif
+
 	/* Do the rest non-__init'ed, we're now alive */
 	rest_init();
 }
@@ -712,6 +747,9 @@ static void __init do_initcalls(void)
  */
 static void __init do_basic_setup(void)
 {
+	prepare_ve0_loopback();
+	init_ve_system();
+
 	/* drivers will send hotplug events */
 	init_workqueues();
 	usermodehelper_init();
@@ -736,7 +774,7 @@ __setup("nosoftlockup", nosoftlockup_set
 static void __init do_pre_smp_initcalls(void)
 {
 	extern int spawn_ksoftirqd(void);
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
 	extern int migration_init(void);
 
 	migration_init();
@@ -816,6 +854,12 @@ static int __init init(void * unused)
 	do_pre_smp_initcalls();
 
 	smp_init();
+
+	/* 
+	 * This should be done after all cpus are known to
+	 * be online.  smp_init gives us confidence in it.
+	 */
+	fairsched_init_late();
 	sched_init_smp();
 
 	cpuset_init_smp();
@@ -841,6 +885,8 @@ static int __init init(void * unused)
 		prepare_namespace();
 	}
 
+	grsecurity_init();
+
 	/*
 	 * Ok, we have completed the initial bootup, and
 	 * we're essentially up and running. Get rid of the
diff -upr kernel-2.6.18-417.el5.orig/init/version.c kernel-2.6.18-417.el5-028stab121/init/version.c
--- kernel-2.6.18-417.el5.orig/init/version.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/init/version.c	2017-01-13 08:40:19.000000000 -0500
@@ -12,39 +12,34 @@
 #include <linux/utsname.h>
 #include <linux/utsrelease.h>
 #include <linux/version.h>
+#include <linux/sched.h>
 
 #define version(a) Version_ ## a
 #define version_string(a) version(a)
 
 int version_string(LINUX_VERSION_CODE);
 
-struct new_utsname system_utsname = {
-	.sysname	= UTS_SYSNAME,
-	.nodename	= UTS_NODENAME,
-	.release	= UTS_RELEASE,
-	.version	= UTS_VERSION,
-	.machine	= UTS_MACHINE,
-	.domainname	= UTS_DOMAINNAME,
-};
-
-EXPORT_SYMBOL(system_utsname);
-
 struct uts_namespace init_uts_ns = {
-        .kref = {
-                .refcount       = ATOMIC_INIT(2),
-        },
-        .name = {
-                .sysname        = UTS_SYSNAME,
-                .nodename       = UTS_NODENAME,
-                .release        = UTS_RELEASE,
-                .version        = UTS_VERSION,
-                .machine        = UTS_MACHINE,
-                .domainname     = UTS_DOMAINNAME,
-        },
+	.kref = {
+		.refcount	= ATOMIC_INIT(2),
+	},
+	.name = {
+		.sysname	= UTS_SYSNAME,
+		.nodename	= UTS_NODENAME,
+		.release	= UTS_RELEASE,
+		.version	= UTS_VERSION,
+		.machine	= UTS_MACHINE,
+		.domainname	= UTS_DOMAINNAME,
+	},
 };
-
 EXPORT_SYMBOL_GPL(init_uts_ns);
 
+struct new_utsname virt_utsname = {
+	/* we need only this field */
+	.release        = UTS_RELEASE,
+};
+EXPORT_SYMBOL(virt_utsname);
+
 const char linux_banner[] =
 	"Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 	LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
diff -upr kernel-2.6.18-417.el5.orig/ipc/msg.c kernel-2.6.18-417.el5-028stab121/ipc/msg.c
--- kernel-2.6.18-417.el5.orig/ipc/msg.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/ipc/msg.c	2017-01-13 08:40:24.000000000 -0500
@@ -16,6 +16,10 @@
  *
  * support for audit of ipc object properties and permission changes
  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ *
+ * namespaces support
+ * OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
  */
 
 #include <linux/capability.h>
@@ -31,16 +35,12 @@
 #include <linux/audit.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
+#include <linux/nsproxy.h>
 
 #include <asm/current.h>
 #include <asm/uaccess.h>
 #include "util.h"
 
-/* sysctl: */
-int msg_ctlmax = MSGMAX;
-int msg_ctlmnb = MSGMNB;
-int msg_ctlmni = MSGMNI;
-
 /*
  * one msg_receiver structure for each sleeping receiver:
  */
@@ -66,33 +66,78 @@ struct msg_sender {
 #define SEARCH_NOTEQUAL		3
 #define SEARCH_LESSEQUAL	4
 
-static atomic_t msg_bytes =	ATOMIC_INIT(0);
-static atomic_t msg_hdrs =	ATOMIC_INIT(0);
+static struct ipc_ids init_msg_ids;
 
-static struct ipc_ids msg_ids;
+#define msg_ids(ns)	(*((ns)->ids[IPC_MSG_IDS]))
 
-#define msg_lock(id)		((struct msg_queue *)ipc_lock(&msg_ids, id))
+#define msg_lock(ns, id)	((struct msg_queue*)ipc_lock(&msg_ids(ns), id))
 #define msg_unlock(msq)		ipc_unlock(&(msq)->q_perm)
-#define msg_rmid(id)		((struct msg_queue *)ipc_rmid(&msg_ids, id))
-#define msg_checkid(msq, msgid)	ipc_checkid(&msg_ids, &msq->q_perm, msgid)
-#define msg_buildid(id, seq)	ipc_buildid(&msg_ids, id, seq)
+#define msg_rmid(ns, id)	((struct msg_queue*)ipc_rmid(&msg_ids(ns), id))
+#define msg_checkid(ns, msq, msgid)	\
+	ipc_checkid(&msg_ids(ns), &msq->q_perm, msgid)
+#define msg_buildid(ns, id, seq) \
+	ipc_buildid(&msg_ids(ns), id, seq)
 
-static void freeque(struct msg_queue *msq, int id);
-static int newque(key_t key, int msgflg);
+static void freeque (struct ipc_namespace *ns, struct msg_queue *msq, int id);
+static int newque (struct ipc_namespace *ns, key_t key, int msqid, int msgflg);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
 #endif
 
+static void __ipc_init __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+{
+	ns->ids[IPC_MSG_IDS] = ids;
+	ns->msg_ctlmax = MSGMAX;
+	ns->msg_ctlmnb = MSGMNB;
+	ns->msg_ctlmni = MSGMNI;
+	atomic_set(&ns->msg_bytes, 0);
+	atomic_set(&ns->msg_hdrs, 0);
+	ipc_init_ids(ids, ns->msg_ctlmni);
+}
+
+#ifdef CONFIG_IPC_NS
+int msg_init_ns(struct ipc_namespace *ns)
+{
+	struct ipc_ids *ids;
+
+	ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
+	if (ids == NULL)
+		return -ENOMEM;
+
+	__msg_init_ns(ns, ids);
+	return 0;
+}
+
+void msg_exit_ns(struct ipc_namespace *ns)
+{
+	int i;
+	struct msg_queue *msq;
+
+	mutex_lock(&msg_ids(ns).mutex);
+	for (i = 0; i <= msg_ids(ns).max_id; i++) {
+		msq = msg_lock(ns, i);
+		if (msq == NULL)
+			continue;
+
+		freeque(ns, msq, i);
+	}
+	mutex_unlock(&msg_ids(ns).mutex);
+
+	ipc_fini_ids(ns->ids[IPC_MSG_IDS]);
+	kfree(ns->ids[IPC_MSG_IDS]);
+	ns->ids[IPC_MSG_IDS] = NULL;
+}
+#endif
+
 void __init msg_init(void)
 {
-	ipc_init_ids(&msg_ids, msg_ctlmni);
+	__msg_init_ns(&init_ipc_ns, &init_msg_ids);
 	ipc_init_proc_interface("sysvipc/msg",
 				"       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
-				&msg_ids,
-				sysvipc_msg_proc_show);
+				IPC_MSG_IDS, sysvipc_msg_proc_show);
 }
 
-static int newque(key_t key, int msgflg)
+static int newque (struct ipc_namespace *ns, key_t key, int msqid, int msgflg)
 {
 	struct msg_queue *msq;
 	int id, retval;
@@ -111,18 +156,18 @@ static int newque(key_t key, int msgflg)
 		return retval;
 	}
 
-	id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
+	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni, msqid);
 	if (id == -1) {
 		security_msg_queue_free(msq);
 		ipc_rcu_putref(msq);
 		return -ENOSPC;
 	}
 
-	msq->q_id = msg_buildid(id, msq->q_perm.seq);
+	msq->q_id = msg_buildid(ns, id, msq->q_perm.seq);
 	msq->q_stime = msq->q_rtime = 0;
 	msq->q_ctime = get_seconds();
 	msq->q_cbytes = msq->q_qnum = 0;
-	msq->q_qbytes = msg_ctlmnb;
+	msq->q_qbytes = ns->msg_ctlmnb;
 	msq->q_lspid = msq->q_lrpid = 0;
 	INIT_LIST_HEAD(&msq->q_messages);
 	INIT_LIST_HEAD(&msq->q_receivers);
@@ -186,13 +231,13 @@ static void expunge_all(struct msg_queue
  * msg_ids.mutex and the spinlock for this message queue is hold
  * before freeque() is called. msg_ids.mutex remains locked on exit.
  */
-static void freeque(struct msg_queue *msq, int id)
+static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id)
 {
 	struct list_head *tmp;
 
 	expunge_all(msq, -EIDRM);
 	ss_wakeup(&msq->q_senders, 1);
-	msq = msg_rmid(id);
+	msq = msg_rmid(ns, id);
 	msg_unlock(msq);
 
 	tmp = msq->q_messages.next;
@@ -200,10 +245,10 @@ static void freeque(struct msg_queue *ms
 		struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
 
 		tmp = tmp->next;
-		atomic_dec(&msg_hdrs);
+		atomic_dec(&ns->msg_hdrs);
 		free_msg(msg);
 	}
-	atomic_sub(msq->q_cbytes, &msg_bytes);
+	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
 	security_msg_queue_free(msq);
 	ipc_rcu_putref(msq);
 }
@@ -212,24 +257,27 @@ asmlinkage long sys_msgget(key_t key, in
 {
 	struct msg_queue *msq;
 	int id, ret = -EPERM;
+	struct ipc_namespace *ns;
+
+	ns = current->nsproxy->ipc_ns;
 	
-	mutex_lock(&msg_ids.mutex);
+	mutex_lock(&msg_ids(ns).mutex);
 	if (key == IPC_PRIVATE) 
-		ret = newque(key, msgflg);
-	else if ((id = ipc_findkey(&msg_ids, key)) == -1) { /* key not used */
+		ret = newque(ns, key, -1, msgflg);
+	else if ((id = ipc_findkey(&msg_ids(ns), key)) == -1) { /* key not used */
 		if (!(msgflg & IPC_CREAT))
 			ret = -ENOENT;
 		else
-			ret = newque(key, msgflg);
+			ret = newque(ns, key, -1, msgflg);
 	} else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
 		ret = -EEXIST;
 	} else {
-		msq = msg_lock(id);
+		msq = msg_lock(ns, id);
 		BUG_ON(msq == NULL);
 		if (ipcperms(&msq->q_perm, msgflg))
 			ret = -EACCES;
 		else {
-			int qid = msg_buildid(id, msq->q_perm.seq);
+			int qid = msg_buildid(ns, id, msq->q_perm.seq);
 
 			ret = security_msg_queue_associate(msq, msgflg);
 			if (!ret)
@@ -237,7 +285,7 @@ asmlinkage long sys_msgget(key_t key, in
 		}
 		msg_unlock(msq);
 	}
-	mutex_unlock(&msg_ids.mutex);
+	mutex_unlock(&msg_ids(ns).mutex);
 
 	return ret;
 }
@@ -341,11 +389,13 @@ asmlinkage long sys_msgctl(int msqid, in
 	struct msq_setbuf setbuf;
 	struct msg_queue *msq;
 	int err, version;
+	struct ipc_namespace *ns;
 
 	if (msqid < 0 || cmd < 0)
 		return -EINVAL;
 
 	version = ipc_parse_version(&cmd);
+	ns = current->nsproxy->ipc_ns;
 
 	switch (cmd) {
 	case IPC_INFO:
@@ -366,23 +416,23 @@ asmlinkage long sys_msgctl(int msqid, in
 			return err;
 
 		memset(&msginfo, 0, sizeof(msginfo));
-		msginfo.msgmni = msg_ctlmni;
-		msginfo.msgmax = msg_ctlmax;
-		msginfo.msgmnb = msg_ctlmnb;
+		msginfo.msgmni = ns->msg_ctlmni;
+		msginfo.msgmax = ns->msg_ctlmax;
+		msginfo.msgmnb = ns->msg_ctlmnb;
 		msginfo.msgssz = MSGSSZ;
 		msginfo.msgseg = MSGSEG;
-		mutex_lock(&msg_ids.mutex);
+		mutex_lock(&msg_ids(ns).mutex);
 		if (cmd == MSG_INFO) {
-			msginfo.msgpool = msg_ids.in_use;
-			msginfo.msgmap = atomic_read(&msg_hdrs);
-			msginfo.msgtql = atomic_read(&msg_bytes);
+			msginfo.msgpool = msg_ids(ns).in_use;
+			msginfo.msgmap = atomic_read(&ns->msg_hdrs);
+			msginfo.msgtql = atomic_read(&ns->msg_bytes);
 		} else {
 			msginfo.msgmap = MSGMAP;
 			msginfo.msgpool = MSGPOOL;
 			msginfo.msgtql = MSGTQL;
 		}
-		max_id = msg_ids.max_id;
-		mutex_unlock(&msg_ids.mutex);
+		max_id = msg_ids(ns).max_id;
+		mutex_unlock(&msg_ids(ns).mutex);
 		if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
 			return -EFAULT;
 		return (max_id < 0) ? 0 : max_id;
@@ -395,20 +445,20 @@ asmlinkage long sys_msgctl(int msqid, in
 
 		if (!buf)
 			return -EFAULT;
-		if (cmd == MSG_STAT && msqid >= msg_ids.entries->size)
+		if (cmd == MSG_STAT && msqid >= msg_ids(ns).entries->size)
 			return -EINVAL;
 
 		memset(&tbuf, 0, sizeof(tbuf));
 
-		msq = msg_lock(msqid);
+		msq = msg_lock(ns, msqid);
 		if (msq == NULL)
 			return -EINVAL;
 
 		if (cmd == MSG_STAT) {
-			success_return = msg_buildid(msqid, msq->q_perm.seq);
+			success_return = msg_buildid(ns, msqid, msq->q_perm.seq);
 		} else {
 			err = -EIDRM;
-			if (msg_checkid(msq, msqid))
+			if (msg_checkid(ns, msq, msqid))
 				goto out_unlock;
 			success_return = 0;
 		}
@@ -446,14 +496,14 @@ asmlinkage long sys_msgctl(int msqid, in
 		return  -EINVAL;
 	}
 
-	mutex_lock(&msg_ids.mutex);
-	msq = msg_lock(msqid);
+	mutex_lock(&msg_ids(ns).mutex);
+	msq = msg_lock(ns, msqid);
 	err = -EINVAL;
 	if (msq == NULL)
 		goto out_up;
 
 	err = -EIDRM;
-	if (msg_checkid(msq, msqid))
+	if (msg_checkid(ns, msq, msqid))
 		goto out_unlock_up;
 	ipcp = &msq->q_perm;
 
@@ -469,7 +519,7 @@ asmlinkage long sys_msgctl(int msqid, in
 
 	err = -EPERM;
 	if (current->euid != ipcp->cuid &&
-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
+	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN))
 		/* We _could_ check for CAP_CHOWN above, but we don't */
 		goto out_unlock_up;
 
@@ -481,7 +531,7 @@ asmlinkage long sys_msgctl(int msqid, in
 	case IPC_SET:
 	{
 		err = -EPERM;
-		if (setbuf.qbytes > msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
+		if (setbuf.qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
 			goto out_unlock_up;
 
 		msq->q_qbytes = setbuf.qbytes;
@@ -503,12 +553,12 @@ asmlinkage long sys_msgctl(int msqid, in
 		break;
 	}
 	case IPC_RMID:
-		freeque(msq, msqid);
+		freeque(ns, msq, msqid);
 		break;
 	}
 	err = 0;
 out_up:
-	mutex_unlock(&msg_ids.mutex);
+	mutex_unlock(&msg_ids(ns).mutex);
 	return err;
 out_unlock_up:
 	msg_unlock(msq);
@@ -562,7 +612,7 @@ static inline int pipelined_send(struct 
 				msr->r_msg = ERR_PTR(-E2BIG);
 			} else {
 				msr->r_msg = NULL;
-				msq->q_lrpid = msr->r_tsk->pid;
+				msq->q_lrpid = virt_pid(msr->r_tsk);
 				msq->q_rtime = get_seconds();
 				wake_up_process(msr->r_tsk);
 				smp_mb();
@@ -581,8 +631,11 @@ long do_msgsnd(int msqid, long mtype, vo
 	struct msg_queue *msq;
 	struct msg_msg *msg;
 	int err;
+	struct ipc_namespace *ns;
 
-	if (msgsz > msg_ctlmax || (long) msgsz < 0 || msqid < 0)
+	ns = current->nsproxy->ipc_ns;
+
+	if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
 		return -EINVAL;
 	if (mtype < 1)
 		return -EINVAL;
@@ -594,13 +647,13 @@ long do_msgsnd(int msqid, long mtype, vo
 	msg->m_type = mtype;
 	msg->m_ts = msgsz;
 
-	msq = msg_lock(msqid);
+	msq = msg_lock(ns, msqid);
 	err = -EINVAL;
 	if (msq == NULL)
 		goto out_free;
 
 	err= -EIDRM;
-	if (msg_checkid(msq, msqid))
+	if (msg_checkid(ns, msq, msqid))
 		goto out_unlock_free;
 
 	for (;;) {
@@ -648,7 +701,7 @@ long do_msgsnd(int msqid, long mtype, vo
 		}
 	}
 
-	msq->q_lspid = current->tgid;
+	msq->q_lspid = virt_tgid(current);
 	msq->q_stime = get_seconds();
 
 	if (!pipelined_send(msq, msg)) {
@@ -656,8 +709,8 @@ long do_msgsnd(int msqid, long mtype, vo
 		list_add_tail(&msg->m_list, &msq->q_messages);
 		msq->q_cbytes += msgsz;
 		msq->q_qnum++;
-		atomic_add(msgsz, &msg_bytes);
-		atomic_inc(&msg_hdrs);
+		atomic_add(msgsz, &ns->msg_bytes);
+		atomic_inc(&ns->msg_hdrs);
 	}
 
 	err = 0;
@@ -706,17 +759,19 @@ long do_msgrcv(int msqid, long *pmtype, 
 	struct msg_queue *msq;
 	struct msg_msg *msg;
 	int mode;
+	struct ipc_namespace *ns;
 
 	if (msqid < 0 || (long) msgsz < 0)
 		return -EINVAL;
 	mode = convert_mode(&msgtyp, msgflg);
+	ns = current->nsproxy->ipc_ns;
 
-	msq = msg_lock(msqid);
+	msq = msg_lock(ns, msqid);
 	if (msq == NULL)
 		return -EINVAL;
 
 	msg = ERR_PTR(-EIDRM);
-	if (msg_checkid(msq, msqid))
+	if (msg_checkid(ns, msq, msqid))
 		goto out_unlock;
 
 	for (;;) {
@@ -761,10 +816,10 @@ long do_msgrcv(int msqid, long *pmtype, 
 			list_del(&msg->m_list);
 			msq->q_qnum--;
 			msq->q_rtime = get_seconds();
-			msq->q_lrpid = current->tgid;
+			msq->q_lrpid = virt_tgid(current);
 			msq->q_cbytes -= msg->m_ts;
-			atomic_sub(msg->m_ts, &msg_bytes);
-			atomic_dec(&msg_hdrs);
+			atomic_sub(msg->m_ts, &ns->msg_bytes);
+			atomic_dec(&ns->msg_hdrs);
 			ss_wakeup(&msq->q_senders, 0);
 			msg_unlock(msq);
 			break;
@@ -892,3 +947,53 @@ static int sysvipc_msg_proc_show(struct 
 			msq->q_ctime);
 }
 #endif
+
+#ifdef CONFIG_VE
+#include <linux/module.h>
+
+int sysvipc_setup_msg(key_t key, int msqid, int msgflg)
+{
+	int err = 0;
+	struct msg_queue *msq;
+	struct ipc_namespace *ns;
+
+	ns = current->nsproxy->ipc_ns;
+
+	mutex_lock(&msg_ids(ns).mutex);
+	msq = msg_lock(ns, msqid);
+	if (!msq) {
+		err = newque(ns, key, msqid, msgflg);
+		if (err >= 0)
+			msq = msg_lock(ns, msqid);
+	}
+	if (msq)
+		msg_unlock(msq);
+	mutex_unlock(&msg_ids(ns).mutex);
+
+	return err > 0 ? 0 : err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_setup_msg);
+
+int sysvipc_walk_msg(int (*func)(int i, struct msg_queue*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct msg_queue * msq;
+	struct ipc_namespace *ns;
+
+	ns = current->nsproxy->ipc_ns;
+
+	mutex_lock(&msg_ids(ns).mutex);
+	for(i = 0; i <= msg_ids(ns).max_id; i++) {
+		if ((msq = msg_lock(ns, i)) == NULL)
+			continue;
+		err = func(msg_buildid(ns, i, msq->q_perm.seq), msq, arg);
+		msg_unlock(msq);
+		if (err)
+			break;
+	}
+	mutex_unlock(&msg_ids(ns).mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_walk_msg);
+#endif
diff -upr kernel-2.6.18-417.el5.orig/ipc/msgutil.c kernel-2.6.18-417.el5-028stab121/ipc/msgutil.c
--- kernel-2.6.18-417.el5.orig/ipc/msgutil.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/ipc/msgutil.c	2017-01-13 08:40:24.000000000 -0500
@@ -8,6 +8,7 @@
  * See the file COPYING for more details.
  */
 
+#include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
 #include <linux/security.h>
@@ -17,6 +18,8 @@
 
 #include "util.h"
 
+#include <ub/ub_mem.h>
+
 struct msg_msgseg {
 	struct msg_msgseg* next;
 	/* the next part of the message follows immediately */
@@ -25,38 +28,40 @@ struct msg_msgseg {
 #define DATALEN_MSG	(PAGE_SIZE-sizeof(struct msg_msg))
 #define DATALEN_SEG	(PAGE_SIZE-sizeof(struct msg_msgseg))
 
-struct msg_msg *load_msg(const void __user *src, int len)
+struct msg_msg *sysv_msg_load(int (*load)(void * dst, int len, int offset,
+					  void * data), int len, void * data)
 {
 	struct msg_msg *msg;
 	struct msg_msgseg **pseg;
 	int err;
 	int alen;
+	int offset = 0;
 
 	alen = len;
 	if (alen > DATALEN_MSG)
 		alen = DATALEN_MSG;
 
-	msg = (struct msg_msg *)kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
+	msg = (struct msg_msg *)ub_kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
 	if (msg == NULL)
 		return ERR_PTR(-ENOMEM);
 
 	msg->next = NULL;
 	msg->security = NULL;
 
-	if (copy_from_user(msg + 1, src, alen)) {
+	if (load(msg + 1, alen, offset, data)) {
 		err = -EFAULT;
 		goto out_err;
 	}
 
 	len -= alen;
-	src = ((char __user *)src) + alen;
+	offset += alen;
 	pseg = &msg->next;
 	while (len > 0) {
 		struct msg_msgseg *seg;
 		alen = len;
 		if (alen > DATALEN_SEG)
 			alen = DATALEN_SEG;
-		seg = (struct msg_msgseg *)kmalloc(sizeof(*seg) + alen,
+		seg = (struct msg_msgseg *)ub_kmalloc(sizeof(*seg) + alen,
 						 GFP_KERNEL);
 		if (seg == NULL) {
 			err = -ENOMEM;
@@ -64,13 +69,13 @@ struct msg_msg *load_msg(const void __us
 		}
 		*pseg = seg;
 		seg->next = NULL;
-		if (copy_from_user(seg + 1, src, alen)) {
+		if (load(seg + 1, alen, offset, data)) {
 			err = -EFAULT;
 			goto out_err;
 		}
 		pseg = &seg->next;
 		len -= alen;
-		src = ((char __user *)src) + alen;
+		offset += alen;
 	}
 
 	err = security_msg_msg_alloc(msg);
@@ -83,33 +88,58 @@ out_err:
 	free_msg(msg);
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL_GPL(sysv_msg_load);
 
-int store_msg(void __user *dest, struct msg_msg *msg, int len)
+static int do_load_msg(void * dst, int len, int offset, void * data)
+{
+	return copy_from_user(dst, data + offset, len);
+}
+
+struct msg_msg *load_msg(const void __user *src, int len)
+{
+	return sysv_msg_load(do_load_msg, len, (void*)src);
+}
+
+int sysv_msg_store(struct msg_msg *msg,
+		   int (*store)(void * src, int len, int offset, void * data),
+		   int len, void * data)
 {
 	int alen;
+	int offset = 0;
 	struct msg_msgseg *seg;
-
+	
 	alen = len;
 	if (alen > DATALEN_MSG)
 		alen = DATALEN_MSG;
-	if (copy_to_user(dest, msg + 1, alen))
+	if (store(msg + 1, alen, offset, data))
 		return -1;
 
 	len -= alen;
-	dest = ((char __user *)dest) + alen;
+	offset += alen;
 	seg = msg->next;
 	while (len > 0) {
 		alen = len;
 		if (alen > DATALEN_SEG)
 			alen = DATALEN_SEG;
-		if (copy_to_user(dest, seg + 1, alen))
+		if (store(seg + 1, alen, offset, data))
 			return -1;
 		len -= alen;
-		dest = ((char __user *)dest) + alen;
+		offset += alen;
 		seg = seg->next;
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(sysv_msg_store);
+
+static int do_store_msg(void * src, int len, int offset, void * data)
+{
+	return copy_to_user(data + offset, src, len);
+}
+
+int store_msg(void __user *dest, struct msg_msg *msg, int len)
+{
+	return sysv_msg_store(msg, do_store_msg, len, dest);
+}
 
 void free_msg(struct msg_msg *msg)
 {
diff -upr kernel-2.6.18-417.el5.orig/ipc/sem.c kernel-2.6.18-417.el5-028stab121/ipc/sem.c
--- kernel-2.6.18-417.el5.orig/ipc/sem.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/ipc/sem.c	2017-01-13 08:40:24.000000000 -0500
@@ -64,6 +64,10 @@
  *
  * support for audit of ipc object properties and permission changes
  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ *
+ * namespaces support
+ * OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
  */
 
 #include <linux/slab.h>
@@ -78,22 +82,25 @@
 #include <linux/capability.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include "util.h"
 
+#define sem_ids(ns)	(*((ns)->ids[IPC_SEM_IDS]))
+
+#define sem_lock(ns, id)	((struct sem_array*)ipc_lock(&sem_ids(ns), id))
+#define sem_unlock(sma)		ipc_unlock(&(sma)->sem_perm)
+#define sem_rmid(ns, id)	((struct sem_array*)ipc_rmid(&sem_ids(ns), id))
+#define sem_checkid(ns, sma, semid)	\
+	ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid)
+#define sem_buildid(ns, id, seq) \
+	ipc_buildid(&sem_ids(ns), id, seq)
 
-#define sem_lock(id)	((struct sem_array*)ipc_lock(&sem_ids,id))
-#define sem_unlock(sma)	ipc_unlock(&(sma)->sem_perm)
-#define sem_rmid(id)	((struct sem_array*)ipc_rmid(&sem_ids,id))
-#define sem_checkid(sma, semid)	\
-	ipc_checkid(&sem_ids,&sma->sem_perm,semid)
-#define sem_buildid(id, seq) \
-	ipc_buildid(&sem_ids, id, seq)
-static struct ipc_ids sem_ids;
+static struct ipc_ids init_sem_ids;
 
-static int newary (key_t, int, int);
-static void freeary (struct sem_array *sma, int id);
+static int newary(struct ipc_namespace *, key_t, int, int, int);
+static void freeary(struct ipc_namespace *ns, struct sem_array *sma, int id);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 #endif
@@ -110,22 +117,62 @@ static int sysvipc_sem_proc_show(struct 
  *	
  */
 
-int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI};
-#define sc_semmsl	(sem_ctls[0])
-#define sc_semmns	(sem_ctls[1])
-#define sc_semopm	(sem_ctls[2])
-#define sc_semmni	(sem_ctls[3])
+#define sc_semmsl	sem_ctls[0]
+#define sc_semmns	sem_ctls[1]
+#define sc_semopm	sem_ctls[2]
+#define sc_semmni	sem_ctls[3]
+
+static void __ipc_init __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+{
+	ns->ids[IPC_SEM_IDS] = ids;
+	ns->sc_semmsl = SEMMSL;
+	ns->sc_semmns = SEMMNS;
+	ns->sc_semopm = SEMOPM;
+	ns->sc_semmni = SEMMNI;
+	ns->used_sems = 0;
+	ipc_init_ids(ids, ns->sc_semmni);
+}
+
+#ifdef CONFIG_IPC_NS
+int sem_init_ns(struct ipc_namespace *ns)
+{
+	struct ipc_ids *ids;
+
+	ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
+	if (ids == NULL)
+		return -ENOMEM;
+
+	__sem_init_ns(ns, ids);
+	return 0;
+}
+
+void sem_exit_ns(struct ipc_namespace *ns)
+{
+	int i;
+	struct sem_array *sma;
+
+	mutex_lock(&sem_ids(ns).mutex);
+	for (i = 0; i <= sem_ids(ns).max_id; i++) {
+		sma = sem_lock(ns, i);
+		if (sma == NULL)
+			continue;
+
+		freeary(ns, sma, i);
+	}
+	mutex_unlock(&sem_ids(ns).mutex);
 
-static int used_sems;
+	ipc_fini_ids(ns->ids[IPC_SEM_IDS]);
+	kfree(ns->ids[IPC_SEM_IDS]);
+	ns->ids[IPC_SEM_IDS] = NULL;
+}
+#endif
 
 void __init sem_init (void)
 {
-	used_sems = 0;
-	ipc_init_ids(&sem_ids,sc_semmni);
+	__sem_init_ns(&init_ipc_ns, &init_sem_ids);
 	ipc_init_proc_interface("sysvipc/sem",
 				"       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime\n",
-				&sem_ids,
-				sysvipc_sem_proc_show);
+				IPC_SEM_IDS, sysvipc_sem_proc_show);
 }
 
 /*
@@ -162,7 +209,8 @@ void __init sem_init (void)
  */
 #define IN_WAKEUP	1
 
-static int newary (key_t key, int nsems, int semflg)
+static int newary (struct ipc_namespace *ns, key_t key, int semid,
+		int nsems, int semflg)
 {
 	int id;
 	int retval;
@@ -171,7 +219,7 @@ static int newary (key_t key, int nsems,
 
 	if (!nsems)
 		return -EINVAL;
-	if (used_sems + nsems > sc_semmns)
+	if (ns->used_sems + nsems > ns->sc_semmns)
 		return -ENOSPC;
 
 	size = sizeof (*sma) + nsems * sizeof (struct sem);
@@ -191,15 +239,15 @@ static int newary (key_t key, int nsems,
 		return retval;
 	}
 
-	id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
+	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni, semid);
 	if(id == -1) {
 		security_sem_free(sma);
 		ipc_rcu_putref(sma);
 		return -ENOSPC;
 	}
-	used_sems += nsems;
+	ns->used_sems += nsems;
 
-	sma->sem_id = sem_buildid(id, sma->sem_perm.seq);
+	sma->sem_id = sem_buildid(ns, id, sma->sem_perm.seq);
 	sma->sem_base = (struct sem *) &sma[1];
 	/* sma->sem_pending = NULL; */
 	sma->sem_pending_last = &sma->sem_pending;
@@ -215,29 +263,32 @@ asmlinkage long sys_semget (key_t key, i
 {
 	int id, err = -EINVAL;
 	struct sem_array *sma;
+	struct ipc_namespace *ns;
+
+	ns = current->nsproxy->ipc_ns;
 
-	if (nsems < 0 || nsems > sc_semmsl)
+	if (nsems < 0 || nsems > ns->sc_semmsl)
 		return -EINVAL;
-	mutex_lock(&sem_ids.mutex);
+	mutex_lock(&sem_ids(ns).mutex);
 	
 	if (key == IPC_PRIVATE) {
-		err = newary(key, nsems, semflg);
-	} else if ((id = ipc_findkey(&sem_ids, key)) == -1) {  /* key not used */
+		err = newary(ns, key, -1, nsems, semflg);
+	} else if ((id = ipc_findkey(&sem_ids(ns), key)) == -1) {  /* key not used */
 		if (!(semflg & IPC_CREAT))
 			err = -ENOENT;
 		else
-			err = newary(key, nsems, semflg);
+			err = newary(ns, key, -1, nsems, semflg);
 	} else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
 		err = -EEXIST;
 	} else {
-		sma = sem_lock(id);
+		sma = sem_lock(ns, id);
 		BUG_ON(sma==NULL);
 		if (nsems > sma->sem_nsems)
 			err = -EINVAL;
 		else if (ipcperms(&sma->sem_perm, semflg))
 			err = -EACCES;
 		else {
-			int semid = sem_buildid(id, sma->sem_perm.seq);
+			int semid = sem_buildid(ns, id, sma->sem_perm.seq);
 			err = security_sem_associate(sma, semflg);
 			if (!err)
 				err = semid;
@@ -245,7 +296,7 @@ asmlinkage long sys_semget (key_t key, i
 		sem_unlock(sma);
 	}
 
-	mutex_unlock(&sem_ids.mutex);
+	mutex_unlock(&sem_ids(ns).mutex);
 	return err;
 }
 
@@ -444,7 +495,7 @@ static int count_semzcnt (struct sem_arr
  * the spinlock for this semaphore set hold. sem_ids.mutex remains locked
  * on exit.
  */
-static void freeary (struct sem_array *sma, int id)
+static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id)
 {
 	struct sem_undo *un, *next;
 	struct sem_queue *q;
@@ -480,10 +531,10 @@ static void freeary (struct sem_array *s
 	}
 
 	/* Remove the semaphore set from the ID array*/
-	sma = sem_rmid(id);
+	sma = sem_rmid(ns, id);
 	sem_unlock(sma);
 
-	used_sems -= sma->sem_nsems;
+	ns->used_sems -= sma->sem_nsems;
 	size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
 	security_sem_free(sma);
 	ipc_rcu_putref(sma);
@@ -513,7 +564,8 @@ static unsigned long copy_semid_to_user(
 	}
 }
 
-static int semctl_nolock(int semid, int semnum, int cmd, int version, union semun arg)
+static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
+		int cmd, int version, union semun arg)
 {
 	int err = -EINVAL;
 	struct sem_array *sma;
@@ -530,24 +582,24 @@ static int semctl_nolock(int semid, int 
 			return err;
 		
 		memset(&seminfo,0,sizeof(seminfo));
-		seminfo.semmni = sc_semmni;
-		seminfo.semmns = sc_semmns;
-		seminfo.semmsl = sc_semmsl;
-		seminfo.semopm = sc_semopm;
+		seminfo.semmni = ns->sc_semmni;
+		seminfo.semmns = ns->sc_semmns;
+		seminfo.semmsl = ns->sc_semmsl;
+		seminfo.semopm = ns->sc_semopm;
 		seminfo.semvmx = SEMVMX;
 		seminfo.semmnu = SEMMNU;
 		seminfo.semmap = SEMMAP;
 		seminfo.semume = SEMUME;
-		mutex_lock(&sem_ids.mutex);
+		mutex_lock(&sem_ids(ns).mutex);
 		if (cmd == SEM_INFO) {
-			seminfo.semusz = sem_ids.in_use;
-			seminfo.semaem = used_sems;
+			seminfo.semusz = sem_ids(ns).in_use;
+			seminfo.semaem = ns->used_sems;
 		} else {
 			seminfo.semusz = SEMUSZ;
 			seminfo.semaem = SEMAEM;
 		}
-		max_id = sem_ids.max_id;
-		mutex_unlock(&sem_ids.mutex);
+		max_id = sem_ids(ns).max_id;
+		mutex_unlock(&sem_ids(ns).mutex);
 		if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) 
 			return -EFAULT;
 		return (max_id < 0) ? 0: max_id;
@@ -557,12 +609,12 @@ static int semctl_nolock(int semid, int 
 		struct semid64_ds tbuf;
 		int id;
 
-		if(semid >= sem_ids.entries->size)
+		if(semid >= sem_ids(ns).entries->size)
 			return -EINVAL;
 
 		memset(&tbuf,0,sizeof(tbuf));
 
-		sma = sem_lock(semid);
+		sma = sem_lock(ns, semid);
 		if(sma == NULL)
 			return -EINVAL;
 
@@ -574,7 +626,7 @@ static int semctl_nolock(int semid, int 
 		if (err)
 			goto out_unlock;
 
-		id = sem_buildid(semid, sma->sem_perm.seq);
+		id = sem_buildid(ns, semid, sma->sem_perm.seq);
 
 		kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
 		tbuf.sem_otime  = sma->sem_otime;
@@ -594,7 +646,8 @@ out_unlock:
 	return err;
 }
 
-static int semctl_main(int semid, int semnum, int cmd, int version, union semun arg)
+static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
+		int cmd, int version, union semun arg)
 {
 	struct sem_array *sma;
 	struct sem* curr;
@@ -603,14 +656,14 @@ static int semctl_main(int semid, int se
 	ushort* sem_io = fast_sem_io;
 	int nsems;
 
-	sma = sem_lock(semid);
+	sma = sem_lock(ns, semid);
 	if(sma==NULL)
 		return -EINVAL;
 
 	nsems = sma->sem_nsems;
 
 	err=-EIDRM;
-	if (sem_checkid(sma,semid))
+	if (sem_checkid(ns,sma,semid))
 		goto out_unlock;
 
 	err = -EACCES;
@@ -759,7 +812,7 @@ static int semctl_main(int semid, int se
 		for (un = sma->undo; un; un = un->id_next)
 			un->semadj[semnum] = 0;
 		curr->semval = val;
-		curr->sempid = current->tgid;
+		curr->sempid = virt_tgid(current);
 		sma->sem_ctime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
@@ -815,7 +868,8 @@ static inline unsigned long copy_semid_f
 	}
 }
 
-static int semctl_down(int semid, int semnum, int cmd, int version, union semun arg)
+static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
+		int cmd, int version, union semun arg)
 {
 	struct sem_array *sma;
 	int err;
@@ -826,11 +880,11 @@ static int semctl_down(int semid, int se
 		if(copy_semid_from_user (&setbuf, arg.buf, version))
 			return -EFAULT;
 	}
-	sma = sem_lock(semid);
+	sma = sem_lock(ns, semid);
 	if(sma==NULL)
 		return -EINVAL;
 
-	if (sem_checkid(sma,semid)) {
+	if (sem_checkid(ns,sma,semid)) {
 		err=-EIDRM;
 		goto out_unlock;
 	}	
@@ -846,7 +900,7 @@ static int semctl_down(int semid, int se
 			goto out_unlock;
 	}
 	if (current->euid != ipcp->cuid && 
-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
+	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN)) {
 	    	err=-EPERM;
 		goto out_unlock;
 	}
@@ -857,7 +911,7 @@ static int semctl_down(int semid, int se
 
 	switch(cmd){
 	case IPC_RMID:
-		freeary(sma, semid);
+		freeary(ns, sma, semid);
 		err = 0;
 		break;
 	case IPC_SET:
@@ -885,17 +939,19 @@ asmlinkage long sys_semctl (int semid, i
 {
 	int err = -EINVAL;
 	int version;
+	struct ipc_namespace *ns;
 
 	if (semid < 0)
 		return -EINVAL;
 
 	version = ipc_parse_version(&cmd);
+	ns = current->nsproxy->ipc_ns;
 
 	switch(cmd) {
 	case IPC_INFO:
 	case SEM_INFO:
 	case SEM_STAT:
-		err = semctl_nolock(semid,semnum,cmd,version,arg);
+		err = semctl_nolock(ns,semid,semnum,cmd,version,arg);
 		return err;
 	case GETALL:
 	case GETVAL:
@@ -905,13 +961,13 @@ asmlinkage long sys_semctl (int semid, i
 	case IPC_STAT:
 	case SETVAL:
 	case SETALL:
-		err = semctl_main(semid,semnum,cmd,version,arg);
+		err = semctl_main(ns,semid,semnum,cmd,version,arg);
 		return err;
 	case IPC_RMID:
 	case IPC_SET:
-		mutex_lock(&sem_ids.mutex);
-		err = semctl_down(semid,semnum,cmd,version,arg);
-		mutex_unlock(&sem_ids.mutex);
+		mutex_lock(&sem_ids(ns).mutex);
+		err = semctl_down(ns,semid,semnum,cmd,version,arg);
+		mutex_unlock(&sem_ids(ns).mutex);
 		return err;
 	default:
 		return -EINVAL;
@@ -947,6 +1003,7 @@ static inline void unlock_semundo(void)
 		spin_unlock(&undo_list->lock);
 }
 
+#include <ub/ub_mem.h>
 
 /* If the task doesn't already have a undo_list, then allocate one
  * here.  We guarantee there is only one thread using this undo list,
@@ -967,7 +1024,8 @@ static inline int get_undo_list(struct s
 	undo_list = current->sysvsem.undo_list;
 	if (!undo_list) {
 		size = sizeof(struct sem_undo_list);
-		undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
+		undo_list = (struct sem_undo_list *) ub_kmalloc(size,
+				GFP_KERNEL);
 		if (undo_list == NULL)
 			return -ENOMEM;
 		memset(undo_list, 0, size);
@@ -999,7 +1057,7 @@ static struct sem_undo *lookup_undo(stru
 	return un;
 }
 
-static struct sem_undo *find_undo(int semid)
+static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 {
 	struct sem_array *sma;
 	struct sem_undo_list *ulp;
@@ -1018,12 +1076,12 @@ static struct sem_undo *find_undo(int se
 		goto out;
 
 	/* no undo structure around - allocate one. */
-	sma = sem_lock(semid);
+	sma = sem_lock(ns, semid);
 	un = ERR_PTR(-EINVAL);
 	if(sma==NULL)
 		goto out;
 	un = ERR_PTR(-EIDRM);
-	if (sem_checkid(sma,semid)) {
+	if (sem_checkid(ns,sma,semid)) {
 		sem_unlock(sma);
 		goto out;
 	}
@@ -1035,7 +1093,8 @@ static struct sem_undo *find_undo(int se
 	}
 	sem_unlock(sma);
 
-	new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
+	new = (struct sem_undo *) ub_kmalloc(sizeof(struct sem_undo) +
+			sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
 		ipc_lock_by_ptr(&sma->sem_perm);
 		ipc_rcu_putref(sma);
@@ -1087,13 +1146,16 @@ asmlinkage long sys_semtimedop(int semid
 	int undos = 0, alter = 0, max;
 	struct sem_queue queue;
 	unsigned long jiffies_left = 0;
+	struct ipc_namespace *ns;
+
+	ns = current->nsproxy->ipc_ns;
 
 	if (nsops < 1 || semid < 0)
 		return -EINVAL;
-	if (nsops > sc_semopm)
+	if (nsops > ns->sc_semopm)
 		return -E2BIG;
 	if(nsops > SEMOPM_FAST) {
-		sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
+		sops = ub_kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
 		if(sops==NULL)
 			return -ENOMEM;
 	}
@@ -1126,7 +1188,7 @@ asmlinkage long sys_semtimedop(int semid
 
 retry_undos:
 	if (undos) {
-		un = find_undo(semid);
+		un = find_undo(ns, semid);
 		if (IS_ERR(un)) {
 			error = PTR_ERR(un);
 			goto out_free;
@@ -1134,12 +1196,12 @@ retry_undos:
 	} else
 		un = NULL;
 
-	sma = sem_lock(semid);
+	sma = sem_lock(ns, semid);
 	error=-EINVAL;
 	if(sma==NULL)
 		goto out_free;
 	error = -EIDRM;
-	if (sem_checkid(sma,semid))
+	if (sem_checkid(ns,sma,semid))
 		goto out_unlock_free;
 	/*
 	 * semid identifies are not unique - find_undo may have
@@ -1162,7 +1224,7 @@ retry_undos:
 	if (error)
 		goto out_unlock_free;
 
-	error = try_atomic_semop (sma, sops, nsops, un, current->tgid);
+	error = try_atomic_semop (sma, sops, nsops, un, virt_tgid(current));
 	if (error <= 0) {
 		if (alter && error == 0)
 			update_queue (sma);
@@ -1177,7 +1239,7 @@ retry_undos:
 	queue.sops = sops;
 	queue.nsops = nsops;
 	queue.undo = un;
-	queue.pid = current->tgid;
+	queue.pid = virt_tgid(current);
 	queue.id = semid;
 	queue.alter = alter;
 	if (alter)
@@ -1207,7 +1269,7 @@ retry_undos:
 		goto out_free;
 	}
 
-	sma = sem_lock(semid);
+	sma = sem_lock(ns, semid);
 	if(sma==NULL) {
 		BUG_ON(queue.prev != NULL);
 		error = -EIDRM;
@@ -1284,6 +1346,7 @@ void exit_sem(struct task_struct *tsk)
 {
 	struct sem_undo_list *undo_list;
 	struct sem_undo *u, **up;
+	struct ipc_namespace *ns;
 
 	undo_list = tsk->sysvsem.undo_list;
 	if (!undo_list)
@@ -1292,6 +1355,7 @@ void exit_sem(struct task_struct *tsk)
 	if (!atomic_dec_and_test(&undo_list->refcnt))
 		return;
 
+	ns = tsk->nsproxy->ipc_ns;
 	/* There's no need to hold the semundo list lock, as current
          * is the last task exiting for this undo list.
 	 */
@@ -1305,14 +1369,14 @@ void exit_sem(struct task_struct *tsk)
 
 		if(semid == -1)
 			continue;
-		sma = sem_lock(semid);
+		sma = sem_lock(ns, semid);
 		if (sma == NULL)
 			continue;
 
 		if (u->semid == -1)
 			goto next_entry;
 
-		BUG_ON(sem_checkid(sma,u->semid));
+		BUG_ON(sem_checkid(ns,sma,u->semid));
 
 		/* remove u from the sma->undo list */
 		for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
@@ -1346,7 +1410,7 @@ found:
 					semaphore->semval = 0;
 				if (semaphore->semval > SEMVMX)
 					semaphore->semval = SEMVMX;
-				semaphore->sempid = current->tgid;
+				semaphore->sempid = virt_tgid(current);
 			}
 		}
 		sma->sem_otime = get_seconds();
@@ -1377,3 +1441,54 @@ static int sysvipc_sem_proc_show(struct 
 			  sma->sem_ctime);
 }
 #endif
+
+#ifdef CONFIG_VE
+#include <linux/module.h>
+
+int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg)
+{
+	int err = 0;
+	struct sem_array *sma;
+	struct ipc_namespace *ns;
+
+	ns = current->nsproxy->ipc_ns;
+
+	mutex_lock(&sem_ids(ns).mutex);
+	sma = sem_lock(ns, semid);
+	if (!sma) {
+		err = newary(ns, key, semid, size, semflg);
+		if (err >= 0)
+			sma = sem_lock(ns, semid);
+	}
+	if (sma)
+		sem_unlock(sma);
+	mutex_unlock(&sem_ids(ns).mutex);
+
+	return err > 0 ? 0 : err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_setup_sem);
+
+int sysvipc_walk_sem(int (*func)(int i, struct sem_array*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct sem_array *sma;
+	struct ipc_namespace *ns;
+
+	ns = current->nsproxy->ipc_ns;
+
+	mutex_lock(&sem_ids(ns).mutex);
+	for (i = 0; i <= sem_ids(ns).max_id; i++) {
+		if ((sma = sem_lock(ns, i)) == NULL)
+			continue;
+		err = func(sem_buildid(ns, i, sma->sem_perm.seq), sma, arg);
+		sem_unlock(sma);
+		if (err)
+			break;
+	}
+	mutex_unlock(&sem_ids(ns).mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_walk_sem);
+EXPORT_SYMBOL_GPL(exit_sem);
+#endif
diff -upr kernel-2.6.18-417.el5.orig/ipc/shm.c kernel-2.6.18-417.el5-028stab121/ipc/shm.c
--- kernel-2.6.18-417.el5.orig/ipc/shm.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/ipc/shm.c	2017-01-13 08:40:24.000000000 -0500
@@ -15,10 +15,15 @@
  *
  * support for audit of ipc object properties and permission changes
  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ *
+ * namespaces support
+ * OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
  */
 
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/hugetlb.h>
 #include <linux/shm.h>
 #include <linux/init.h>
@@ -32,82 +37,200 @@
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
+#include <linux/nsproxy.h>
+#include <linux/shmem_fs.h>
 
 #include <asm/uaccess.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+
 #include "util.h"
 
 static struct file_operations shm_file_operations;
 static struct vm_operations_struct shm_vm_ops;
 
-static struct ipc_ids shm_ids;
+static struct ipc_ids init_shm_ids;
 
-#define shm_lock(id)	((struct shmid_kernel*)ipc_lock(&shm_ids,id))
-#define shm_unlock(shp)	ipc_unlock(&(shp)->shm_perm)
-#define shm_get(id)	((struct shmid_kernel*)ipc_get(&shm_ids,id))
-#define shm_buildid(id, seq) \
-	ipc_buildid(&shm_ids, id, seq)
+#define shm_ids(ns)	(*((ns)->ids[IPC_SHM_IDS]))
 
-static int newseg (key_t key, int shmflg, size_t size);
+#define shm_lock(ns, id)		\
+	((struct shmid_kernel*)ipc_lock(&shm_ids(ns),id))
+#define shm_unlock(shp)			\
+	ipc_unlock(&(shp)->shm_perm)
+#define shm_get(ns, id)			\
+	((struct shmid_kernel*)ipc_get(&shm_ids(ns),id))
+#define shm_buildid(ns, id, seq)	\
+	ipc_buildid(&shm_ids(ns), id, seq)
+
+static int newseg (struct ipc_namespace *ns, key_t key,
+		int shmid, int shmflg, size_t size);
 static void shm_open (struct vm_area_struct *shmd);
 static void shm_close (struct vm_area_struct *shmd);
+static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
 #endif
 
-size_t	shm_ctlmax = SHMMAX;
-size_t 	shm_ctlall = SHMALL;
-int 	shm_ctlmni = SHMMNI;
+static void __ipc_init __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+{
+	ns->ids[IPC_SHM_IDS] = ids;
+	ns->shm_ctlmax = SHMMAX;
+	ns->shm_ctlall = SHMALL;
+	ns->shm_ctlmni = SHMMNI;
+	ns->shm_tot = 0;
+	ipc_init_ids(ids, 1);
+}
+
+static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
+{
+	if (shp->shm_nattch){
+		shp->shm_perm.mode |= SHM_DEST;
+		/* Do not find it any more */
+		shp->shm_perm.key = IPC_PRIVATE;
+		shm_unlock(shp);
+	} else
+		shm_destroy(ns, shp);
+}
+
+#ifdef CONFIG_IPC_NS
+int shm_init_ns(struct ipc_namespace *ns)
+{
+	struct ipc_ids *ids;
 
-static int shm_tot; /* total number of shared memory pages */
+	ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
+	if (ids == NULL)
+		return -ENOMEM;
+
+	__shm_init_ns(ns, ids);
+	return 0;
+}
+
+/* This function does not invalidate ipc namespace, it just releases
+ * all its content. Unless caller take some precautions, new objects
+ * can appear.
+ */
+void shm_clean_ns(struct ipc_namespace *ns)
+{
+	int i;
+	struct shmid_kernel *shp;
+
+	mutex_lock(&shm_ids(ns).mutex);
+	for (i = 0; i <= shm_ids(ns).max_id; i++) {
+		shp = shm_lock(ns, i);
+		if (shp == NULL)
+			continue;
+
+		do_shm_rmid(ns, shp);
+	}
+	mutex_unlock(&shm_ids(ns).mutex);
+}
+EXPORT_SYMBOL(shm_clean_ns);
+
+void shm_exit_ns(struct ipc_namespace *ns)
+{
+	shm_clean_ns(ns);
+
+	ipc_fini_ids(ns->ids[IPC_SHM_IDS]);
+	kfree(ns->ids[IPC_SHM_IDS]);
+	ns->ids[IPC_SHM_IDS] = NULL;
+}
+#endif
 
 void __init shm_init (void)
 {
-	ipc_init_ids(&shm_ids, 1);
+	__shm_init_ns(&init_ipc_ns, &init_shm_ids);
 	ipc_init_proc_interface("sysvipc/shm",
 #if BITS_PER_LONG <= 32
 				"       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
 #else
 				"       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
 #endif
-				&shm_ids,
-				sysvipc_shm_proc_show);
+				IPC_SHM_IDS, sysvipc_shm_proc_show);
 }
 
-static inline int shm_checkid(struct shmid_kernel *s, int id)
+static inline int shm_checkid(struct ipc_namespace *ns,
+		struct shmid_kernel *s, int id)
 {
-	if (ipc_checkid(&shm_ids,&s->shm_perm,id))
+	if (ipc_checkid(&shm_ids(ns), &s->shm_perm, id))
 		return -EIDRM;
 	return 0;
 }
 
-static inline struct shmid_kernel *shm_rmid(int id)
+static inline struct shmid_kernel *shm_rmid(struct ipc_namespace *ns, int id)
 {
-	return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
+	return (struct shmid_kernel *)ipc_rmid(&shm_ids(ns), id);
 }
 
-static inline int shm_addid(struct shmid_kernel *shp)
+static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp,
+		int reqid)
 {
-	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
+	return ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni, reqid);
 }
 
 
 
-static inline void shm_inc (int id) {
+static inline void shm_inc(struct ipc_namespace *ns, int id)
+{
 	struct shmid_kernel *shp;
 
-	shp = shm_lock(id);
+	shp = shm_lock(ns, id);
 	BUG_ON(!shp);
 	shp->shm_atim = get_seconds();
-	shp->shm_lprid = current->tgid;
+	shp->shm_lprid = virt_tgid(current);
 	shp->shm_nattch++;
 	shm_unlock(shp);
 }
 
+#define shm_file_ns(file) (*((struct ipc_namespace **)&(file)->private_data))
+
 /* This is called by fork, once for every shm attach. */
-static void shm_open (struct vm_area_struct *shmd)
+static void shm_open(struct vm_area_struct *shmd)
 {
-	shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
+	shm_inc(shm_file_ns(shmd->vm_file),
+			shmd->vm_file->f_dentry->d_inode->i_ino);
+}
+
+static int shmem_lock(struct shmid_kernel *shp, int lock,
+		struct user_struct *user)
+{
+	struct file *file = shp->shm_file;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	unsigned long size;
+
+	size = shp->shm_segsz + PAGE_SIZE - 1;
+
+#ifdef CONFIG_SHMEM
+	spin_lock(&info->lock);
+	if (lock && !(info->flags & VM_LOCKED)) {
+		if (ub_lockedshm_charge(info, size) < 0)
+			goto out_ch;
+
+		if (!user_shm_lock(inode->i_size, user))
+			goto out_user;
+		info->flags |= VM_LOCKED;
+	}
+	if (!lock && (info->flags & VM_LOCKED) && user) {
+		ub_lockedshm_uncharge(info, size);
+		user_shm_unlock(inode->i_size, user);
+		info->flags &= ~VM_LOCKED;
+	}
+	spin_unlock(&info->lock);
+	return 0;
+
+out_user:
+	ub_lockedshm_uncharge(info, size);
+out_ch:
+	spin_unlock(&info->lock);
+	return -ENOMEM;
+#else
+	if (lock && ub_lockedshm_charge(info, size))
+		return -ENOMEM;
+	if (!lock)
+		ub_lockedshm_uncharge(info, size);
+	return 0;
+#endif
 }
 
 /*
@@ -118,13 +241,13 @@ static void shm_open (struct vm_area_str
  * It has to be called with shp and shm_ids.mutex locked,
  * but returns with shp unlocked and freed.
  */
-static void shm_destroy (struct shmid_kernel *shp)
+static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 {
-	shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	shm_rmid (shp->id);
+	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	shm_rmid(ns, shp->id);
 	shm_unlock(shp);
 	if (!is_file_hugepages(shp->shm_file))
-		shmem_lock(shp->shm_file, 0, shp->mlock_user);
+		shmem_lock(shp, 0, shp->mlock_user);
 	else
 		user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size,
 						shp->mlock_user);
@@ -144,20 +267,23 @@ static void shm_close (struct vm_area_st
 	struct file * file = shmd->vm_file;
 	int id = file->f_dentry->d_inode->i_ino;
 	struct shmid_kernel *shp;
+	struct ipc_namespace *ns;
+
+	ns = shm_file_ns(file);
 
-	mutex_lock(&shm_ids.mutex);
+	mutex_lock(&shm_ids(ns).mutex);
 	/* remove from the list of attaches of the shm segment */
-	shp = shm_lock(id);
+	shp = shm_lock(ns, id);
 	BUG_ON(!shp);
-	shp->shm_lprid = current->tgid;
+	shp->shm_lprid = virt_tgid(current);
 	shp->shm_dtim = get_seconds();
 	shp->shm_nattch--;
 	if(shp->shm_nattch == 0 &&
 	   shp->shm_perm.mode & SHM_DEST)
-		shm_destroy (shp);
+		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
-	mutex_unlock(&shm_ids.mutex);
+	mutex_unlock(&shm_ids(ns).mutex);
 }
 
 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
@@ -169,14 +295,25 @@ static int shm_mmap(struct file * file, 
 		vma->vm_ops = &shm_vm_ops;
 		if (!(vma->vm_flags & VM_WRITE))
 			vma->vm_flags &= ~VM_MAYWRITE;
-		shm_inc(file->f_dentry->d_inode->i_ino);
+		shm_inc(shm_file_ns(file), file->f_dentry->d_inode->i_ino);
 	}
 
 	return ret;
 }
 
+static int shm_release(struct inode *ino, struct file *file)
+{
+	struct ipc_namespace *ns;
+
+	ns = shm_file_ns(file);
+	put_ipc_ns(ns);
+	shm_file_ns(file) = NULL;
+	return 0;
+}
+
 static struct file_operations shm_file_operations = {
-	.mmap	= shm_mmap,
+	.mmap		= shm_mmap,
+	.release	= shm_release,
 #ifndef CONFIG_MMU
 	.get_unmapped_area = shmem_get_unmapped_area,
 #endif
@@ -192,7 +329,8 @@ static struct vm_operations_struct shm_v
 #endif
 };
 
-static int newseg (key_t key, int shmflg, size_t size)
+static int newseg (struct ipc_namespace *ns, key_t key, int shmid,
+		int shmflg, size_t size)
 {
 	int error;
 	struct shmid_kernel *shp;
@@ -201,10 +339,10 @@ static int newseg (key_t key, int shmflg
 	char name[13];
 	int id;
 
-	if (size < SHMMIN || size > shm_ctlmax)
+	if (size < SHMMIN || size > ns->shm_ctlmax)
 		return -EINVAL;
 
-	if (shm_tot + numpages >= shm_ctlall)
+	if (ns->shm_tot + numpages >= ns->shm_ctlall)
 		return -ENOSPC;
 
 	shp = ipc_rcu_alloc(sizeof(*shp));
@@ -243,25 +381,27 @@ static int newseg (key_t key, int shmflg
 		goto no_file;
 
 	error = -ENOSPC;
-	id = shm_addid(shp);
+	id = shm_addid(ns, shp, shmid);
 	if(id == -1) 
 		goto no_id;
 
-	shp->shm_cprid = current->tgid;
+	shp->shm_cprid = virt_tgid(current);
 	shp->shm_lprid = 0;
 	shp->shm_atim = shp->shm_dtim = 0;
 	shp->shm_ctim = get_seconds();
 	shp->shm_segsz = size;
 	shp->shm_nattch = 0;
-	shp->id = shm_buildid(id,shp->shm_perm.seq);
+	shp->id = shm_buildid(ns, id, shp->shm_perm.seq);
 	shp->shm_file = file;
 	file->f_dentry->d_inode->i_ino = shp->id;
 
+	shm_file_ns(file) = get_ipc_ns(ns);
+
 	/* Hugetlb ops would have already been assigned. */
 	if (!(shmflg & SHM_HUGETLB))
 		file->f_op = &shm_file_operations;
 
-	shm_tot += numpages;
+	ns->shm_tot += numpages;
 	shm_unlock(shp);
 	return shp->id;
 
@@ -277,33 +417,36 @@ asmlinkage long sys_shmget (key_t key, s
 {
 	struct shmid_kernel *shp;
 	int err, id = 0;
+	struct ipc_namespace *ns;
+
+	ns = current->nsproxy->ipc_ns;
 
-	mutex_lock(&shm_ids.mutex);
+	mutex_lock(&shm_ids(ns).mutex);
 	if (key == IPC_PRIVATE) {
-		err = newseg(key, shmflg, size);
-	} else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
+		err = newseg(ns, key, -1, shmflg, size);
+	} else if ((id = ipc_findkey(&shm_ids(ns), key)) == -1) {
 		if (!(shmflg & IPC_CREAT))
 			err = -ENOENT;
 		else
-			err = newseg(key, shmflg, size);
+			err = newseg(ns, key, -1, shmflg, size);
 	} else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
 		err = -EEXIST;
 	} else {
-		shp = shm_lock(id);
+		shp = shm_lock(ns, id);
 		BUG_ON(shp==NULL);
 		if (shp->shm_segsz < size)
 			err = -EINVAL;
 		else if (ipcperms(&shp->shm_perm, shmflg))
 			err = -EACCES;
 		else {
-			int shmid = shm_buildid(id, shp->shm_perm.seq);
+			int shmid = shm_buildid(ns, id, shp->shm_perm.seq);
 			err = security_shm_associate(shp, shmflg);
 			if (!err)
 				err = shmid;
 		}
 		shm_unlock(shp);
 	}
-	mutex_unlock(&shm_ids.mutex);
+	mutex_unlock(&shm_ids(ns).mutex);
 
 	return err;
 }
@@ -423,17 +566,18 @@ static void shm_add_rss_swap(struct shmi
 	}
 }
 
-static void shm_get_stat(unsigned long *rss, unsigned long *swp) 
+static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
+		unsigned long *swp)
 {
 	int i;
 
 	*rss = 0;
 	*swp = 0;
 
-	for (i = 0; i <= shm_ids.max_id; i++) {
+	for (i = 0; i <= shm_ids(ns).max_id; i++) {
 		struct shmid_kernel *shp;
 
-		shp = shm_get(i);
+		shp = shm_get(ns, i);
 		if(!shp)
 			continue;
 
@@ -446,6 +590,7 @@ asmlinkage long sys_shmctl (int shmid, i
 	struct shm_setbuf setbuf;
 	struct shmid_kernel *shp;
 	int err, version;
+	struct ipc_namespace *ns;
 
 	if (cmd < 0 || shmid < 0) {
 		err = -EINVAL;
@@ -453,6 +598,7 @@ asmlinkage long sys_shmctl (int shmid, i
 	}
 
 	version = ipc_parse_version(&cmd);
+	ns = current->nsproxy->ipc_ns;
 
 	switch (cmd) { /* replace with proc interface ? */
 	case IPC_INFO:
@@ -464,15 +610,15 @@ asmlinkage long sys_shmctl (int shmid, i
 			return err;
 
 		memset(&shminfo,0,sizeof(shminfo));
-		shminfo.shmmni = shminfo.shmseg = shm_ctlmni;
-		shminfo.shmmax = shm_ctlmax;
-		shminfo.shmall = shm_ctlall;
+		shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
+		shminfo.shmmax = ns->shm_ctlmax;
+		shminfo.shmall = ns->shm_ctlall;
 
 		shminfo.shmmin = SHMMIN;
 		if(copy_shminfo_to_user (buf, &shminfo, version))
 			return -EFAULT;
 		/* reading a integer is always atomic */
-		err= shm_ids.max_id;
+		err= shm_ids(ns).max_id;
 		if(err<0)
 			err = 0;
 		goto out;
@@ -486,14 +632,14 @@ asmlinkage long sys_shmctl (int shmid, i
 			return err;
 
 		memset(&shm_info,0,sizeof(shm_info));
-		mutex_lock(&shm_ids.mutex);
-		shm_info.used_ids = shm_ids.in_use;
-		shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
-		shm_info.shm_tot = shm_tot;
+		mutex_lock(&shm_ids(ns).mutex);
+		shm_info.used_ids = shm_ids(ns).in_use;
+		shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
+		shm_info.shm_tot = ns->shm_tot;
 		shm_info.swap_attempts = 0;
 		shm_info.swap_successes = 0;
-		err = shm_ids.max_id;
-		mutex_unlock(&shm_ids.mutex);
+		err = shm_ids(ns).max_id;
+		mutex_unlock(&shm_ids(ns).mutex);
 		if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
 			err = -EFAULT;
 			goto out;
@@ -508,17 +654,17 @@ asmlinkage long sys_shmctl (int shmid, i
 		struct shmid64_ds tbuf;
 		int result;
 		memset(&tbuf, 0, sizeof(tbuf));
-		shp = shm_lock(shmid);
+		shp = shm_lock(ns, shmid);
 		if(shp==NULL) {
 			err = -EINVAL;
 			goto out;
 		} else if(cmd==SHM_STAT) {
 			err = -EINVAL;
-			if (shmid > shm_ids.max_id)
+			if (shmid > shm_ids(ns).max_id)
 				goto out_unlock;
-			result = shm_buildid(shmid, shp->shm_perm.seq);
+			result = shm_buildid(ns, shmid, shp->shm_perm.seq);
 		} else {
-			err = shm_checkid(shp,shmid);
+			err = shm_checkid(ns, shp,shmid);
 			if(err)
 				goto out_unlock;
 			result = 0;
@@ -550,12 +696,12 @@ asmlinkage long sys_shmctl (int shmid, i
 	case SHM_LOCK:
 	case SHM_UNLOCK:
 	{
-		shp = shm_lock(shmid);
+		shp = shm_lock(ns, shmid);
 		if(shp==NULL) {
 			err = -EINVAL;
 			goto out;
 		}
-		err = shm_checkid(shp,shmid);
+		err = shm_checkid(ns, shp,shmid);
 		if(err)
 			goto out_unlock;
 
@@ -580,14 +726,15 @@ asmlinkage long sys_shmctl (int shmid, i
 		if(cmd==SHM_LOCK) {
 			struct user_struct * user = current->user;
 			if (!is_file_hugepages(shp->shm_file)) {
-				err = shmem_lock(shp->shm_file, 1, user);
-				if (!err) {
+				err = shmem_lock(shp, 1, user);
+				if (!err &&
+				    !(shp->shm_perm.mode & SHM_LOCKED)) {
 					shp->shm_perm.mode |= SHM_LOCKED;
 					shp->mlock_user = user;
 				}
 			}
 		} else if (!is_file_hugepages(shp->shm_file)) {
-			shmem_lock(shp->shm_file, 0, shp->mlock_user);
+			shmem_lock(shp, 0, shp->mlock_user);
 			shp->shm_perm.mode &= ~SHM_LOCKED;
 			shp->mlock_user = NULL;
 		}
@@ -606,12 +753,12 @@ asmlinkage long sys_shmctl (int shmid, i
 		 *	Instead we set a destroyed flag, and then blow
 		 *	the name away when the usage hits zero.
 		 */
-		mutex_lock(&shm_ids.mutex);
-		shp = shm_lock(shmid);
+		mutex_lock(&shm_ids(ns).mutex);
+		shp = shm_lock(ns, shmid);
 		err = -EINVAL;
 		if (shp == NULL) 
 			goto out_up;
-		err = shm_checkid(shp, shmid);
+		err = shm_checkid(ns, shp, shmid);
 		if(err)
 			goto out_unlock_up;
 
@@ -621,7 +768,7 @@ asmlinkage long sys_shmctl (int shmid, i
 
 		if (current->euid != shp->shm_perm.uid &&
 		    current->euid != shp->shm_perm.cuid && 
-		    !capable(CAP_SYS_ADMIN)) {
+		    !capable(CAP_VE_SYS_ADMIN)) {
 			err=-EPERM;
 			goto out_unlock_up;
 		}
@@ -630,14 +777,8 @@ asmlinkage long sys_shmctl (int shmid, i
 		if (err)
 			goto out_unlock_up;
 
-		if (shp->shm_nattch){
-			shp->shm_perm.mode |= SHM_DEST;
-			/* Do not find it any more */
-			shp->shm_perm.key = IPC_PRIVATE;
-			shm_unlock(shp);
-		} else
-			shm_destroy (shp);
-		mutex_unlock(&shm_ids.mutex);
+		do_shm_rmid(ns, shp);
+		mutex_unlock(&shm_ids(ns).mutex);
 		goto out;
 	}
 
@@ -647,12 +788,12 @@ asmlinkage long sys_shmctl (int shmid, i
 			err = -EFAULT;
 			goto out;
 		}
-		mutex_lock(&shm_ids.mutex);
-		shp = shm_lock(shmid);
+		mutex_lock(&shm_ids(ns).mutex);
+		shp = shm_lock(ns, shmid);
 		err=-EINVAL;
 		if(shp==NULL)
 			goto out_up;
-		err = shm_checkid(shp,shmid);
+		err = shm_checkid(ns, shp,shmid);
 		if(err)
 			goto out_unlock_up;
 		err = audit_ipc_obj(&(shp->shm_perm));
@@ -664,7 +805,7 @@ asmlinkage long sys_shmctl (int shmid, i
 		err=-EPERM;
 		if (current->euid != shp->shm_perm.uid &&
 		    current->euid != shp->shm_perm.cuid && 
-		    !capable(CAP_SYS_ADMIN)) {
+		    !capable(CAP_VE_SYS_ADMIN)) {
 			goto out_unlock_up;
 		}
 
@@ -689,7 +830,7 @@ asmlinkage long sys_shmctl (int shmid, i
 out_unlock_up:
 	shm_unlock(shp);
 out_up:
-	mutex_unlock(&shm_ids.mutex);
+	mutex_unlock(&shm_ids(ns).mutex);
 	goto out;
 out_unlock:
 	shm_unlock(shp);
@@ -715,6 +856,7 @@ long do_shmat(int shmid, char __user *sh
 	unsigned long prot;
 	int acc_mode;
 	void *user_addr;
+	struct ipc_namespace *ns;
 
 	if (shmid < 0) {
 		err = -EINVAL;
@@ -753,12 +895,13 @@ long do_shmat(int shmid, char __user *sh
 	 * We cannot rely on the fs check since SYSV IPC does have an
 	 * additional creator id...
 	 */
-	shp = shm_lock(shmid);
+	ns = current->nsproxy->ipc_ns;
+	shp = shm_lock(ns, shmid);
 	if(shp == NULL) {
 		err = -EINVAL;
 		goto out;
 	}
-	err = shm_checkid(shp,shmid);
+	err = shm_checkid(ns, shp,shmid);
 	if (err) {
 		shm_unlock(shp);
 		goto out;
@@ -799,16 +942,16 @@ long do_shmat(int shmid, char __user *sh
 invalid:
 	up_write(&current->mm->mmap_sem);
 
-	mutex_lock(&shm_ids.mutex);
-	shp = shm_lock(shmid);
+	mutex_lock(&shm_ids(ns).mutex);
+	shp = shm_lock(ns, shmid);
 	BUG_ON(!shp);
 	shp->shm_nattch--;
 	if(shp->shm_nattch == 0 &&
 	   shp->shm_perm.mode & SHM_DEST)
-		shm_destroy (shp);
+		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
-	mutex_unlock(&shm_ids.mutex);
+	mutex_unlock(&shm_ids(ns).mutex);
 
 	*raddr = (unsigned long) user_addr;
 	err = 0;
@@ -953,3 +1096,61 @@ static int sysvipc_shm_proc_show(struct 
 			  swp * PAGE_SIZE);
 }
 #endif
+
+#ifdef CONFIG_VE
+#include <linux/module.h>
+
+struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg)
+{
+	struct shmid_kernel *shp;
+	struct file *file;
+	struct ipc_namespace *ns;
+
+	ns = current->nsproxy->ipc_ns;
+
+	mutex_lock(&shm_ids(ns).mutex);
+	shp = shm_lock(ns, shmid);
+	if (!shp) {
+		int err;
+
+		err = newseg(ns, key, shmid, shmflg, size);
+		file = ERR_PTR(err);
+		if (err < 0)
+			goto out;
+		shp = shm_lock(ns, shmid);
+	}
+	file = ERR_PTR(-EINVAL);
+	if (shp) {
+		file = shp->shm_file;
+		get_file(file);
+		shm_unlock(shp);
+	}
+out:
+	mutex_unlock(&shm_ids(ns).mutex);
+	return file;
+}
+EXPORT_SYMBOL_GPL(sysvipc_setup_shm);
+
+int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct shmid_kernel* shp;
+	struct ipc_namespace *ns;
+
+	ns = current->nsproxy->ipc_ns;
+
+	mutex_lock(&shm_ids(ns).mutex);
+	for(i = 0; i <= shm_ids(ns).max_id; i++) {
+		if ((shp = shm_lock(ns, i)) == NULL)
+			continue;
+		err = func(shp, arg);
+		shm_unlock(shp);
+		if (err)
+			break;
+	}
+	mutex_unlock(&shm_ids(ns).mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_walk_shm);
+#endif
diff -upr kernel-2.6.18-417.el5.orig/ipc/util.c kernel-2.6.18-417.el5-028stab121/ipc/util.c
--- kernel-2.6.18-417.el5.orig/ipc/util.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/ipc/util.c	2017-01-13 08:40:24.000000000 -0500
@@ -12,6 +12,9 @@
  *            Mingming Cao <cmm@us.ibm.com>
  * Mar 2006 - support for audit of ipc object properties
  *            Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ * Jun 2006 - namespaces ssupport
+ *            OpenVZ, SWsoft Inc.
+ *            Pavel Emelianov <xemul@openvz.org>
  */
 
 #include <linux/mm.h>
@@ -29,18 +32,124 @@
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
 #include <linux/audit.h>
+#include <linux/nsproxy.h>
 
 #include <asm/unistd.h>
 
+#include <ub/ub_mem.h>
+
 #include "util.h"
 
 struct ipc_proc_iface {
 	const char *path;
 	const char *header;
-	struct ipc_ids *ids;
+	int ids;
 	int (*show)(struct seq_file *, void *);
 };
 
+struct ipc_namespace init_ipc_ns = {
+	.kref = {
+		.refcount	= ATOMIC_INIT(2),
+	},
+};
+
+#ifdef CONFIG_IPC_NS
+static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
+{
+	int err;
+	struct ipc_namespace *ns;
+
+	err = -ENOMEM;
+	ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
+	if (ns == NULL)
+		goto err_mem;
+
+	err = sem_init_ns(ns);
+	if (err)
+		goto err_sem;
+	err = msg_init_ns(ns);
+	if (err)
+		goto err_msg;
+	err = shm_init_ns(ns);
+	if (err)
+		goto err_shm;
+
+	kref_init(&ns->kref);
+	return ns;
+
+err_shm:
+	msg_exit_ns(ns);
+err_msg:
+	sem_exit_ns(ns);
+err_sem:
+	kfree(ns);
+err_mem:
+	return ERR_PTR(err);
+}
+
+int unshare_ipcs(unsigned long unshare_flags, struct ipc_namespace **new_ipc)
+{
+	struct ipc_namespace *new;
+
+	if (unshare_flags & CLONE_NEWIPC) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		new = clone_ipc_ns(current->nsproxy->ipc_ns);
+		if (IS_ERR(new))
+			return PTR_ERR(new);
+
+		*new_ipc = new;
+	}
+
+	return 0;
+}
+
+int copy_ipcs(unsigned long flags, struct task_struct *tsk)
+{
+	struct ipc_namespace *old_ns = tsk->nsproxy->ipc_ns;
+	struct ipc_namespace *new_ns;
+	int err = 0;
+
+	if (!old_ns)
+		return 0;
+
+	get_ipc_ns(old_ns);
+
+	if (!(flags & CLONE_NEWIPC))
+		return 0;
+
+#ifndef CONFIG_VE
+	if (!capable(CAP_SYS_ADMIN)) {
+		err = -EPERM;
+		goto out;
+	}
+#endif
+
+	new_ns = clone_ipc_ns(old_ns);
+	if (!new_ns) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	tsk->nsproxy->ipc_ns = new_ns;
+out:
+	put_ipc_ns(old_ns);
+	return err;
+}
+
+void free_ipc_ns(struct kref *kref)
+{
+	struct ipc_namespace *ns;
+
+	ns = container_of(kref, struct ipc_namespace, kref);
+	sem_exit_ns(ns);
+	msg_exit_ns(ns);
+	shm_exit_ns(ns);
+	kfree(ns);
+}
+#endif
+
 /**
  *	ipc_init	-	initialise IPC subsystem
  *
@@ -67,7 +176,7 @@ __initcall(ipc_init);
  *	array itself. 
  */
  
-void __init ipc_init_ids(struct ipc_ids* ids, int size)
+void __ipc_init ipc_init_ids(struct ipc_ids* ids, int size)
 {
 	int i;
 
@@ -110,8 +219,7 @@ static struct file_operations sysvipc_pr
  *	@show: show routine.
  */
 void __init ipc_init_proc_interface(const char *path, const char *header,
-				    struct ipc_ids *ids,
-				    int (*show)(struct seq_file *, void *))
+		int ids, int (*show)(struct seq_file *, void *))
 {
 	struct proc_dir_entry *pde;
 	struct ipc_proc_iface *iface;
@@ -197,7 +305,7 @@ static int grow_ary(struct ipc_ids* ids,
 	 */
 	rcu_assign_pointer(ids->entries, new);
 
-	ipc_rcu_putref(old);
+	__ipc_fini_ids(ids, old);
 	return newsize;
 }
 
@@ -215,10 +323,18 @@ static int grow_ary(struct ipc_ids* ids,
  *	Called with ipc_ids.mutex held.
  */
  
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
+int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid)
 {
 	int id;
 
+	if (reqid >= 0) {
+		id = reqid%SEQ_MULTIPLIER;
+		size = grow_ary(ids,id+1);
+		if (ids->entries->p[id] == NULL)
+			goto found;
+		return -1;
+	}
+
 	size = grow_ary(ids,size);
 
 	/*
@@ -238,9 +354,13 @@ found:
 	new->cuid = new->uid = current->euid;
 	new->gid = new->cgid = current->egid;
 
-	new->seq = ids->seq++;
-	if(ids->seq > ids->seq_max)
-		ids->seq = 0;
+	if (reqid >= 0) {
+		new->seq = reqid/SEQ_MULTIPLIER;
+	} else {
+		new->seq = ids->seq++;
+		if(ids->seq > ids->seq_max)
+			ids->seq = 0;
+	}
 
 	spin_lock_init(&new->lock);
 	new->deleted = 0;
@@ -302,9 +422,9 @@ void *ipc_alloc(int size)
 {
 	void *out;
 	if(size > PAGE_SIZE)
-		out = vmalloc(size);
+		out = ub_vmalloc(size);
 	else
-		out = kmalloc(size, GFP_KERNEL);
+		out = ub_kmalloc(size, GFP_KERNEL);
 	return out;
 }
 
@@ -386,14 +506,14 @@ void *ipc_rcu_alloc(int size)
 	 * workqueue if necessary (for vmalloc).
 	 */
 	if (rcu_use_vmalloc(size)) {
-		out = vmalloc(HDRLEN_VMALLOC + size);
+		out = ub_vmalloc(HDRLEN_VMALLOC + size);
 		if (!out)
 			goto done;
 
 		out += HDRLEN_VMALLOC;
 		container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
 	} else {
-		out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
+		out = ub_kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
 		if (!out)
 			goto done;
 
@@ -637,6 +757,9 @@ static void *sysvipc_proc_next(struct se
 	struct ipc_proc_iface *iface = s->private;
 	struct kern_ipc_perm *ipc = it;
 	loff_t p;
+	struct ipc_ids *ids;
+
+	ids = current->nsproxy->ipc_ns->ids[iface->ids];
 
 	/* If we had an ipc id locked before, unlock it */
 	if (ipc && ipc != SEQ_START_TOKEN)
@@ -646,8 +769,8 @@ static void *sysvipc_proc_next(struct se
 	 * p = *pos - 1 (because id 0 starts at position 1)
 	 *          + 1 (because we increment the position by one)
 	 */
-	for (p = *pos; p <= iface->ids->max_id; p++) {
-		if ((ipc = ipc_lock(iface->ids, p)) != NULL) {
+	for (p = *pos; p <= ids->max_id; p++) {
+		if ((ipc = ipc_lock(ids, p)) != NULL) {
 			*pos = p + 1;
 			return ipc;
 		}
@@ -666,12 +789,15 @@ static void *sysvipc_proc_start(struct s
 	struct ipc_proc_iface *iface = s->private;
 	struct kern_ipc_perm *ipc;
 	loff_t p;
+	struct ipc_ids *ids;
+
+	ids = current->nsproxy->ipc_ns->ids[iface->ids];
 
 	/*
 	 * Take the lock - this will be released by the corresponding
 	 * call to stop().
 	 */
-	mutex_lock(&iface->ids->mutex);
+	mutex_lock(&ids->mutex);
 
 	/* pos < 0 is invalid */
 	if (*pos < 0)
@@ -682,8 +808,8 @@ static void *sysvipc_proc_start(struct s
 		return SEQ_START_TOKEN;
 
 	/* Find the (pos-1)th ipc */
-	for (p = *pos - 1; p <= iface->ids->max_id; p++) {
-		if ((ipc = ipc_lock(iface->ids, p)) != NULL) {
+	for (p = *pos - 1; p <= ids->max_id; p++) {
+		if ((ipc = ipc_lock(ids, p)) != NULL) {
 			*pos = p + 1;
 			return ipc;
 		}
@@ -695,13 +821,15 @@ static void sysvipc_proc_stop(struct seq
 {
 	struct kern_ipc_perm *ipc = it;
 	struct ipc_proc_iface *iface = s->private;
+	struct ipc_ids *ids;
 
 	/* If we had a locked segment, release it */
 	if (ipc && ipc != SEQ_START_TOKEN)
 		ipc_unlock(ipc);
 
+	ids = current->nsproxy->ipc_ns->ids[iface->ids];
 	/* Release the lock we took in start() */
-	mutex_unlock(&iface->ids->mutex);
+	mutex_unlock(&ids->mutex);
 }
 
 static int sysvipc_proc_show(struct seq_file *s, void *it)
diff -upr kernel-2.6.18-417.el5.orig/ipc/util.h kernel-2.6.18-417.el5-028stab121/ipc/util.h
--- kernel-2.6.18-417.el5.orig/ipc/util.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/ipc/util.h	2017-01-13 08:40:24.000000000 -0500
@@ -3,6 +3,8 @@
  * Copyright (C) 1999 Christoph Rohland
  *
  * ipc helper functions (c) 1999 Manfred Spraul <manfred@colorfullife.com>
+ * namespaces support.      2006 OpenVZ, SWsoft Inc.
+ *                               Pavel Emelianov <xemul@openvz.org>
  */
 
 #ifndef _IPC_UTIL_H
@@ -15,6 +17,14 @@ void sem_init (void);
 void msg_init (void);
 void shm_init (void);
 
+int sem_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
+int shm_init_ns(struct ipc_namespace *ns);
+
+void sem_exit_ns(struct ipc_namespace *ns);
+void msg_exit_ns(struct ipc_namespace *ns);
+void shm_exit_ns(struct ipc_namespace *ns);
+
 struct ipc_id_ary {
 	int size;
 	struct kern_ipc_perm *p[0];
@@ -31,18 +41,26 @@ struct ipc_ids {
 };
 
 struct seq_file;
-void __init ipc_init_ids(struct ipc_ids* ids, int size);
+#ifdef CONFIG_IPC_NS
+#define __ipc_init
+#else
+#define __ipc_init	__init
+#endif
+void __ipc_init ipc_init_ids(struct ipc_ids *ids, int size);
 #ifdef CONFIG_PROC_FS
 void __init ipc_init_proc_interface(const char *path, const char *header,
-				    struct ipc_ids *ids,
-				    int (*show)(struct seq_file *, void *));
+		int ids, int (*show)(struct seq_file *, void *));
 #else
 #define ipc_init_proc_interface(path, header, ids, show) do {} while (0)
 #endif
 
+#define IPC_SEM_IDS	0
+#define IPC_MSG_IDS	1
+#define IPC_SHM_IDS	2
+
 /* must be called with ids->mutex acquired.*/
 int ipc_findkey(struct ipc_ids* ids, key_t key);
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
+int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid);
 
 /* must be called with both locks acquired. */
 struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
@@ -65,6 +83,18 @@ void* ipc_rcu_alloc(int size);
 int ipc_rcu_getref(void *ptr);
 void ipc_rcu_putref(void *ptr);
 
+static inline void __ipc_fini_ids(struct ipc_ids *ids,
+		struct ipc_id_ary *entries)
+{
+	if (entries != &ids->nullentry)
+		ipc_rcu_putref(entries);
+}
+
+static inline void ipc_fini_ids(struct ipc_ids *ids)
+{
+	__ipc_fini_ids(ids, ids->entries);
+}
+
 struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id);
 struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id);
 void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp);
diff -upr kernel-2.6.18-417.el5.orig/kernel/acct.c kernel-2.6.18-417.el5-028stab121/kernel/acct.c
--- kernel-2.6.18-417.el5.orig/kernel/acct.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/acct.c	2017-01-13 08:40:21.000000000 -0500
@@ -58,6 +58,7 @@
 #include <asm/uaccess.h>
 #include <asm/div64.h>
 #include <linux/blkdev.h> /* sector_div */
+#include <linux/rcupdate.h>
 
 /*
  * These constants control the amount of freespace that suspend and
@@ -71,38 +72,43 @@ int acct_parm[3] = {4, 2, 30};
 #define SUSPEND		(acct_parm[1])	/* <foo% free space - suspend */
 #define ACCT_TIMEOUT	(acct_parm[2])	/* foo second timeout between checks */
 
+struct bsd_acct_struct;
+
 /*
  * External references and all of the globals.
  */
-static void do_acct_process(struct file *);
+static void do_acct_process(struct bsd_acct_struct *acct, struct file *);
 
 /*
  * This structure is used so that all the data protected by lock
  * can be placed in the same cache line as the lock.  This primes
  * the cache line to have the data after getting the lock.
  */
-struct acct_glbs {
-	spinlock_t		lock;
+struct bsd_acct_struct {
 	volatile int		active;
 	volatile int		needcheck;
 	struct file		*file;
 	struct timer_list	timer;
+	struct list_head	list;
 };
 
-static struct acct_glbs acct_globals __cacheline_aligned = {SPIN_LOCK_UNLOCKED};
+static DEFINE_SPINLOCK(acct_lock);
+static LIST_HEAD(acct_list);
 
 /*
  * Called whenever the timer says to check the free space.
  */
-static void acct_timeout(unsigned long unused)
+static void acct_timeout(unsigned long x)
 {
-	acct_globals.needcheck = 1;
+	struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x;
+
+	acct->needcheck = 1;
 }
 
 /*
  * Check the amount of free space and suspend/resume accordingly.
  */
-static int check_free_space(struct file *file)
+static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
 {
 	struct kstatfs sbuf;
 	int res;
@@ -110,11 +116,11 @@ static int check_free_space(struct file 
 	sector_t resume;
 	sector_t suspend;
 
-	spin_lock(&acct_globals.lock);
-	res = acct_globals.active;
-	if (!file || !acct_globals.needcheck)
+	spin_lock(&acct_lock);
+	res = acct->active;
+	if (!file || !acct->needcheck)
 		goto out;
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 
 	/* May block */
 	if (vfs_statfs(file->f_dentry, &sbuf))
@@ -133,35 +139,35 @@ static int check_free_space(struct file 
 		act = 0;
 
 	/*
-	 * If some joker switched acct_globals.file under us we'ld better be
+	 * If some joker switched acct->file under us we'ld better be
 	 * silent and _not_ touch anything.
 	 */
-	spin_lock(&acct_globals.lock);
-	if (file != acct_globals.file) {
+	spin_lock(&acct_lock);
+	if (file != acct->file) {
 		if (act)
 			res = act>0;
 		goto out;
 	}
 
-	if (acct_globals.active) {
+	if (acct->active) {
 		if (act < 0) {
-			acct_globals.active = 0;
+			acct->active = 0;
 			printk(KERN_INFO "Process accounting paused\n");
 		}
 	} else {
 		if (act > 0) {
-			acct_globals.active = 1;
+			acct->active = 1;
 			printk(KERN_INFO "Process accounting resumed\n");
 		}
 	}
 
-	del_timer(&acct_globals.timer);
-	acct_globals.needcheck = 0;
-	acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
-	add_timer(&acct_globals.timer);
-	res = acct_globals.active;
+	del_timer(&acct->timer);
+	acct->needcheck = 0;
+	acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+	add_timer(&acct->timer);
+	res = acct->active;
 out:
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 	return res;
 }
 
@@ -169,41 +175,44 @@ out:
  * Close the old accounting file (if currently open) and then replace
  * it with file (if non-NULL).
  *
- * NOTE: acct_globals.lock MUST be held on entry and exit.
+ * NOTE: acct_lock MUST be held on entry and exit.
  */
-static void acct_file_reopen(struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
 {
 	struct file *old_acct = NULL;
 
-	if (acct_globals.file) {
-		old_acct = acct_globals.file;
-		del_timer(&acct_globals.timer);
-		acct_globals.active = 0;
-		acct_globals.needcheck = 0;
-		acct_globals.file = NULL;
+ 	if (acct->file) {
+ 		old_acct = acct->file;
+ 		del_timer(&acct->timer);
+ 		acct->active = 0;
+ 		acct->needcheck = 0;
+ 		acct->file = NULL;
+		list_del(&acct->list);
 	}
 	if (file) {
-		acct_globals.file = file;
-		acct_globals.needcheck = 0;
-		acct_globals.active = 1;
+ 		acct->file = file;
+ 		acct->needcheck = 0;
+ 		acct->active = 1;
 		/* It's been deleted if it was used before so this is safe */
-		init_timer(&acct_globals.timer);
-		acct_globals.timer.function = acct_timeout;
-		acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
-		add_timer(&acct_globals.timer);
+		setup_timer(&acct->timer, acct_timeout,
+			    (unsigned long)acct);
+		acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+		add_timer(&acct->timer);
+		list_add(&acct->list, &acct_list);
 	}
 	if (old_acct) {
 		mnt_unpin(old_acct->f_vfsmnt);
-		spin_unlock(&acct_globals.lock);
-		do_acct_process(old_acct);
+		spin_unlock(&acct_lock);
+		do_acct_process(acct, old_acct);
 		filp_close(old_acct, NULL);
-		spin_lock(&acct_globals.lock);
+		spin_lock(&acct_lock);
 	}
 }
 
-static int acct_on(char *name)
+static int acct_on(struct ve_struct *ve, char *name)
 {
 	struct file *file;
+	struct bsd_acct_struct *acct = NULL;
 	int error;
 
 	/* Difference from BSD - they don't do O_APPEND */
@@ -227,12 +236,26 @@ static int acct_on(char *name)
 		return error;
 	}
 
-	spin_lock(&acct_globals.lock);
+	if (!ve->bacct) {
+		/* try pre alloc */
+		acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+		if (!acct) {
+			filp_close(file, NULL);
+			return -ENOMEM;
+		}
+	}
+
+	spin_lock(&acct_lock);
+	if (!ve->bacct) {
+		ve->bacct = acct;
+		acct = NULL;
+	}
 	mnt_pin(file->f_vfsmnt);
-	acct_file_reopen(file);
-	spin_unlock(&acct_globals.lock);
+	acct_file_reopen(ve->bacct, file);
+	spin_unlock(&acct_lock);
 
 	mntput(file->f_vfsmnt);	/* it's pinned, now give up active reference */
+	kfree(acct);
 
 	return 0;
 }
@@ -251,6 +274,7 @@ static int acct_on(char *name)
 asmlinkage long sys_acct(const char __user *name)
 {
 	int error;
+	struct ve_struct *ve = get_exec_env();
 
 	if (!capable(CAP_SYS_PACCT))
 		return -EPERM;
@@ -259,14 +283,20 @@ asmlinkage long sys_acct(const char __us
 		char *tmp = getname(name);
 		if (IS_ERR(tmp))
 			return (PTR_ERR(tmp));
-		error = acct_on(tmp);
+		error = acct_on(ve, tmp);
 		putname(tmp);
 	} else {
+		struct bsd_acct_struct *acct;
+
+		acct = ve->bacct;
+		if (!acct)
+			return 0;
+
 		error = security_acct(NULL);
 		if (!error) {
-			spin_lock(&acct_globals.lock);
-			acct_file_reopen(NULL);
-			spin_unlock(&acct_globals.lock);
+			spin_lock(&acct_lock);
+			acct_file_reopen(acct, NULL);
+			spin_unlock(&acct_lock);
 		}
 	}
 	return error;
@@ -281,10 +311,14 @@ asmlinkage long sys_acct(const char __us
  */
 void acct_auto_close_mnt(struct vfsmount *m)
 {
-	spin_lock(&acct_globals.lock);
-	if (acct_globals.file && acct_globals.file->f_vfsmnt == m)
-		acct_file_reopen(NULL);
-	spin_unlock(&acct_globals.lock);
+	struct bsd_acct_struct *acct, *temp;
+
+	spin_lock(&acct_lock);
+	list_for_each_entry_safe(acct, temp, &acct_list, list) {
+		if (acct->file && acct->file->f_vfsmnt == m)
+			acct_file_reopen(acct, NULL);
+	}
+	spin_unlock(&acct_lock);
 }
 
 /**
@@ -296,12 +330,14 @@ void acct_auto_close_mnt(struct vfsmount
  */
 void acct_auto_close(struct super_block *sb)
 {
-	spin_lock(&acct_globals.lock);
-	if (acct_globals.file &&
-	    acct_globals.file->f_vfsmnt->mnt_sb == sb) {
-		acct_file_reopen(NULL);
+	struct bsd_acct_struct *acct, *temp;
+
+	spin_lock(&acct_lock);
+	list_for_each_entry_safe(acct, temp, &acct_list, list) {
+		if (acct->file && acct->file->f_vfsmnt->mnt_sb == sb)
+			acct_file_reopen(acct, NULL);
 	}
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 }
 
 /*
@@ -418,7 +454,7 @@ static u32 encode_float(u64 value)
 /*
  *  do_acct_process does all actual work. Caller holds the reference to file.
  */
-static void do_acct_process(struct file *file)
+static void do_acct_process(struct bsd_acct_struct *acct, struct file *file)
 {
 	struct pacct_struct *pacct = &current->signal->pacct;
 	acct_t ac;
@@ -433,7 +469,7 @@ static void do_acct_process(struct file 
 	 * First check to see if there is enough free_space to continue
 	 * the process accounting system.
 	 */
-	if (!check_free_space(file))
+	if (!check_free_space(acct, file))
 		return;
 
 	/*
@@ -480,8 +516,8 @@ static void do_acct_process(struct file 
 	ac.ac_gid16 = current->gid;
 #endif
 #if ACCT_VERSION==3
-	ac.ac_pid = current->tgid;
-	ac.ac_ppid = current->parent->tgid;
+	ac.ac_pid = get_task_tgid(current);
+	ac.ac_ppid = get_task_tgid(current->parent);
 #endif
 
 	spin_lock_irq(&current->sighand->siglock);
@@ -526,6 +562,21 @@ void acct_init_pacct(struct pacct_struct
 	pacct->ac_utime = pacct->ac_stime = cputime_zero;
 }
 
+void acct_exit_ve(struct bsd_acct_struct *acct)
+{
+	if (!acct)
+		return;
+
+	spin_lock(&acct_lock);
+	if (acct->file)
+		acct_file_reopen(acct, NULL);
+	spin_unlock(&acct_lock);
+
+	synchronize_rcu();
+	kfree(acct);
+}
+EXPORT_SYMBOL(acct_exit_ve);
+
 /**
  * acct_collect - collect accounting information into pacct_struct
  * @exitcode: task exit code
@@ -577,22 +628,23 @@ void acct_collect(long exitcode, int gro
 void acct_process(void)
 {
 	struct file *file = NULL;
+	struct bsd_acct_struct *acct = get_exec_env()->bacct;
 
 	/*
 	 * accelerate the common fastpath:
 	 */
-	if (!acct_globals.file)
+	if (!acct || !acct->file)
 		return;
 
-	spin_lock(&acct_globals.lock);
-	file = acct_globals.file;
+	spin_lock(&acct_lock);
+	file = acct->file;
 	if (unlikely(!file)) {
-		spin_unlock(&acct_globals.lock);
+		spin_unlock(&acct_lock);
 		return;
 	}
 	get_file(file);
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 
-	do_acct_process(file);
+	do_acct_process(acct, acct->file);
 	fput(file);
 }
diff -upr kernel-2.6.18-417.el5.orig/kernel/audit.c kernel-2.6.18-417.el5-028stab121/kernel/audit.c
--- kernel-2.6.18-417.el5.orig/kernel/audit.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/audit.c	2017-01-13 08:40:29.000000000 -0500
@@ -53,6 +53,7 @@
 
 #include <net/sock.h>
 #include <net/netlink.h>
+#include <net/netlink_sock.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/selinux.h>
@@ -463,7 +464,7 @@ static int audit_prepare_user_tty(pid_t 
 	int err;
 
 	read_lock(&tasklist_lock);
-	tsk = find_task_by_pid(pid);
+	tsk = find_task_by_pid_all(pid);
 	err = -ESRCH;
 	if (!tsk)
 		goto out;
@@ -628,6 +629,9 @@ static int audit_receive_msg(struct sk_b
 	char			*ctx;
 	u32			len;
 
+	if (!ve_is_super(skb->owner_env))
+		return -ECONNREFUSED;
+
 	err = audit_netlink_ok(skb, msg_type);
 	if (err)
 		return err;
@@ -910,7 +914,7 @@ static int audit_receive_msg(struct sk_b
 		struct task_struct *tsk;
 
 		read_lock(&tasklist_lock);
-		tsk = find_task_by_pid(pid);
+		tsk = find_task_by_pid_all(pid);
 		if (!tsk)
 			err = -ESRCH;
 		else {
@@ -933,7 +937,7 @@ static int audit_receive_msg(struct sk_b
 		if (s->enabled != 0 && s->enabled != 1)
 			return -EINVAL;
 		read_lock(&tasklist_lock);
-		tsk = find_task_by_pid(pid);
+		tsk = find_task_by_pid_all(pid);
 		if (!tsk)
 			err = -ESRCH;
 		else {
@@ -984,6 +988,8 @@ static void audit_receive(struct sock *s
 	struct sk_buff  *skb;
 	unsigned int qlen;
 
+	BUG();
+
 	mutex_lock(&audit_cmd_mutex);
 
 	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
@@ -994,6 +1000,14 @@ static void audit_receive(struct sock *s
 	mutex_unlock(&audit_cmd_mutex);
 }
 
+static void audit_rcv(struct sk_buff *skb)
+{
+	mutex_lock(&audit_cmd_mutex);
+	audit_receive_skb(skb);
+	mutex_unlock(&audit_cmd_mutex);
+	kfree_skb(skb);
+}
+
 #ifdef CONFIG_AUDITSYSCALL
 static const struct inotify_operations audit_inotify_ops = {
 	.handle_event	= audit_handle_ievent,
@@ -1005,6 +1019,7 @@ static const struct inotify_operations a
 static int __init audit_init(void)
 {
 	int i;
+	struct netlink_sock *nlk;
 
 	printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
 	       audit_default ? "enabled" : "disabled");
@@ -1015,6 +1030,9 @@ static int __init audit_init(void)
 	else
 		audit_sock->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
 
+	nlk = nlk_sk(audit_sock);
+	nlk->netlink_rcv = audit_rcv;
+
 	skb_queue_head_init(&audit_skb_queue);
 	audit_initialized = 1;
 	audit_enabled = audit_default;
diff -upr kernel-2.6.18-417.el5.orig/kernel/auditfilter.c kernel-2.6.18-417.el5-028stab121/kernel/auditfilter.c
--- kernel-2.6.18-417.el5.orig/kernel/auditfilter.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/auditfilter.c	2017-01-13 08:40:24.000000000 -0500
@@ -177,7 +177,7 @@ static struct audit_parent *audit_init_p
 	inotify_init_watch(&parent->wdata);
 	/* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
 	get_inotify_watch(&parent->wdata);
-	wd = inotify_add_watch(audit_ih, &parent->wdata, ndp->dentry->d_inode,
+	wd = inotify_add_watch_dget(audit_ih, &parent->wdata, ndp->dentry, ndp->mnt,
 			       AUDIT_IN_WATCH);
 	if (wd < 0) {
 		audit_free_parent(&parent->wdata);
diff -upr kernel-2.6.18-417.el5.orig/kernel/capability.c kernel-2.6.18-417.el5-028stab121/kernel/capability.c
--- kernel-2.6.18-417.el5.orig/kernel/capability.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/capability.c	2017-01-13 08:40:21.000000000 -0500
@@ -15,16 +15,18 @@
 #include <asm/uaccess.h>
 
 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
-kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
-
 EXPORT_SYMBOL(securebits);
+#ifndef CONFIG_VE
+kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
 EXPORT_SYMBOL(cap_bset);
+#endif
 
 /*
  * This lock protects task->cap_* for all tasks including current.
  * Locking rule: acquire this prior to tasklist_lock.
  */
-static DEFINE_SPINLOCK(task_capability_lock);
+DEFINE_SPINLOCK(task_capability_lock);
+EXPORT_SYMBOL(task_capability_lock);
 
 /*
  * For sys_getproccap() and sys_setproccap(), any of the three
@@ -67,8 +69,8 @@ asmlinkage long sys_capget(cap_user_head
      spin_lock(&task_capability_lock);
      read_lock(&tasklist_lock); 
 
-     if (pid && pid != current->pid) {
-	     target = find_task_by_pid(pid);
+     if (pid && pid != virt_pid(current)) {
+	     target = find_task_by_pid_ve(pid);
 	     if (!target) {
 	          ret = -ESRCH;
 	          goto out;
@@ -100,9 +102,13 @@ static inline int cap_set_pg(int pgrp, k
 	int ret = -EPERM;
 	int found = 0;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
+	pgrp = vpid_to_pid(pgrp);
+	if (pgrp < 0)
+		return ret;
+
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, g) {
 		target = g;
-		while_each_thread(g, target) {
+		while_each_thread_ve(g, target) {
 			if (!security_capset_check(target, effective,
 							inheritable,
 							permitted)) {
@@ -113,7 +119,7 @@ static inline int cap_set_pg(int pgrp, k
 			}
 			found = 1;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, g);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, g);
 
 	if (!found)
 	     ret = 0;
@@ -132,7 +138,7 @@ static inline int cap_set_all(kernel_cap
      int ret = -EPERM;
      int found = 0;
 
-     do_each_thread(g, target) {
+     do_each_thread_ve(g, target) {
              if (target == current || target->pid == 1)
                      continue;
              found = 1;
@@ -141,7 +147,7 @@ static inline int cap_set_all(kernel_cap
 		     continue;
 	     ret = 0;
 	     security_capset_set(target, effective, inheritable, permitted);
-     } while_each_thread(g, target);
+     } while_each_thread_ve(g, target);
 
      if (!found)
 	     ret = 0;
@@ -188,7 +194,7 @@ asmlinkage long sys_capset(cap_user_head
      if (get_user(pid, &header->pid))
 	     return -EFAULT; 
 
-     if (pid && pid != current->pid && !capable(CAP_SETPCAP))
+     if (pid && pid != virt_pid(current) && !capable(CAP_SETPCAP))
              return -EPERM;
 
      if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
@@ -199,8 +205,8 @@ asmlinkage long sys_capset(cap_user_head
      spin_lock(&task_capability_lock);
      read_lock(&tasklist_lock);
 
-     if (pid > 0 && pid != current->pid) {
-          target = find_task_by_pid(pid);
+     if (pid > 0 && pid != virt_pid(current)) {
+          target = find_task_by_pid_ve(pid);
           if (!target) {
                ret = -ESRCH;
                goto out;
diff -upr kernel-2.6.18-417.el5.orig/kernel/compat.c kernel-2.6.18-417.el5-028stab121/kernel/compat.c
--- kernel-2.6.18-417.el5.orig/kernel/compat.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/compat.c	2017-01-13 08:40:24.000000000 -0500
@@ -22,6 +22,8 @@
 #include <linux/security.h>
 #include <linux/timex.h>
 #include <linux/migrate.h>
+#include <linux/hrtimer.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <linux/module.h>
@@ -40,61 +42,75 @@ int put_compat_timespec(const struct tim
 			__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
 }
 
-static long compat_nanosleep_restart(struct restart_block *restart)
+long compat_nanosleep_restart(struct restart_block *restart)
 {
-	unsigned long expire = restart->arg0, now = jiffies;
 	struct compat_timespec __user *rmtp;
+	struct timespec tu;
+	void *rfn_save = restart->fn;
+	struct hrtimer_sleeper sleeper;
+	ktime_t rem;
 
-	/* Did it expire while we handled signals? */
-	if (!time_after(expire, now))
-		return 0;
+	restart->fn = do_no_restart_syscall;
+
+	hrtimer_init(&sleeper.timer, (clockid_t) restart->arg3, HRTIMER_ABS);
+
+	sleeper.timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
+	hrtimer_init_sleeper(&sleeper, current);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	rem = schedule_hrtimer(&sleeper.timer, HRTIMER_ABS);
 
-	expire = schedule_timeout_interruptible(expire - now);
-	if (expire == 0)
+	if (rem.tv64 <= 0)
 		return 0;
 
-	rmtp = (struct compat_timespec __user *)restart->arg1;
-	if (rmtp) {
-		struct compat_timespec ct;
-		struct timespec t;
-
-		jiffies_to_timespec(expire, &t);
-		ct.tv_sec = t.tv_sec;
-		ct.tv_nsec = t.tv_nsec;
-		if (copy_to_user(rmtp, &ct, sizeof(ct)))
-			return -EFAULT;
-	}
-	/* The 'restart' block is already filled in */
+	rmtp = (struct compat_timespec __user *) restart->arg2;
+	tu = ktime_to_timespec(rem);
+	if (rmtp && put_compat_timespec(&tu, rmtp))
+		return -EFAULT;
+
+	restart->fn = rfn_save;
+
+	/* The other values in restart are already filled in */
 	return -ERESTART_RESTARTBLOCK;
 }
+EXPORT_SYMBOL_GPL(compat_nanosleep_restart);
 
 asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
 		struct compat_timespec __user *rmtp)
 {
 	struct timespec t;
 	struct restart_block *restart;
-	unsigned long expire;
+	struct hrtimer_sleeper sleeper;
+	ktime_t rem;
 
 	if (get_compat_timespec(&t, rqtp))
 		return -EFAULT;
 
-	if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0))
+	if (!timespec_valid(&t))
 		return -EINVAL;
 
-	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
-	expire = schedule_timeout_interruptible(expire);
-	if (expire == 0)
+	hrtimer_init(&sleeper.timer, CLOCK_MONOTONIC, HRTIMER_REL);
+
+	sleeper.timer.expires = timespec_to_ktime(t);
+	hrtimer_init_sleeper(&sleeper, current);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	rem = schedule_hrtimer(&sleeper.timer, HRTIMER_REL);
+	if (rem.tv64 <= 0)
 		return 0;
 
-	if (rmtp) {
-		jiffies_to_timespec(expire, &t);
-		if (put_compat_timespec(&t, rmtp))
-			return -EFAULT;
-	}
+	t = ktime_to_timespec(rem);
+
+	if (rmtp && put_compat_timespec(&t, rmtp))
+		return -EFAULT;
+
 	restart = &current_thread_info()->restart_block;
 	restart->fn = compat_nanosleep_restart;
-	restart->arg0 = jiffies + expire;
-	restart->arg1 = (unsigned long) rmtp;
+	restart->arg0 = sleeper.timer.expires.tv64 & 0xFFFFFFFF;
+	restart->arg1 = sleeper.timer.expires.tv64 >> 32;
+	restart->arg2 = (unsigned long) rmtp;
+	restart->arg3 = (unsigned long) sleeper.timer.base->index;
+
 	return -ERESTART_RESTARTBLOCK;
 }
 
@@ -949,4 +965,4 @@ void __user *compat_alloc_user_space(uns
 
 	return ptr;
 }
-EXPORT_SYMBOL_GPL(compat_alloc_user_space);
+EXPORT_SYMBOL(compat_alloc_user_space);
diff -upr kernel-2.6.18-417.el5.orig/kernel/configs.c kernel-2.6.18-417.el5-028stab121/kernel/configs.c
--- kernel-2.6.18-417.el5.orig/kernel/configs.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/configs.c	2017-01-13 08:40:19.000000000 -0500
@@ -79,8 +79,7 @@ static int __init ikconfig_init(void)
 	struct proc_dir_entry *entry;
 
 	/* create the current config file */
-	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
-				  &proc_root);
+	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, NULL);
 	if (!entry)
 		return -ENOMEM;
 
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_conntrack.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_conntrack.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_conntrack.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_conntrack.c	2017-01-13 08:40:25.000000000 -0500
@@ -0,0 +1,366 @@
+/*
+ *
+ *  kernel/cpt/cpt_conntrack.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/unistd.h>
+#include <linux/ve.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+
+/* How does it work?
+ *
+ * Network is disabled, so new conntrack entries will not appear.
+ * However, some of them can disappear because of timeouts.
+ *
+ * So, we take read_lock, collect all required information atomically,
+ * essentially, creating parallel "refcount" structures holding pointers.
+ * We delete conntrack timers as well, so the structures cannot disappear
+ * after releasing the lock. Now, after releasing lock we can dump everything
+ * safely. And on exit we restore timers to their original values.
+ *
+ * Note, this approach is not going to work in VE0.
+ */
+
+struct ct_holder
+{
+	struct ct_holder *next;
+	struct ip_conntrack_tuple_hash *cth;
+	int index;
+};
+
+static void encode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple)
+{
+	v->cpt_dst = tuple->dst.ip;
+	v->cpt_dstport = tuple->dst.u.all;
+	v->cpt_protonum = tuple->dst.protonum;
+	v->cpt_dir = tuple->dst.dir;
+
+	v->cpt_src = tuple->src.ip;
+	v->cpt_srcport = tuple->src.u.all;
+}
+
+static int dump_one_expect(struct cpt_ip_connexpect_image *v,
+			   struct ip_conntrack_expect *exp,
+			   int sibling, cpt_context_t *ctx)
+{
+	int err = 0;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_NET_CONNTRACK_EXPECT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	encode_tuple(&v->cpt_tuple, &exp->tuple);
+	encode_tuple(&v->cpt_mask, &exp->mask);
+	v->cpt_sibling_conntrack = sibling;
+	v->cpt_flags = exp->flags;
+	v->cpt_seq = exp->id;
+	v->cpt_dir = 0;
+	v->cpt_manip_proto = 0;
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+	v->cpt_manip_proto = exp->saved_proto.all;
+	v->cpt_dir = exp->dir;
+#endif
+	v->cpt_timeout = 0;
+	if (exp->master->helper->timeout)
+		v->cpt_timeout = exp->timeout.expires - jiffies;
+	return err;
+}
+
+/* NOTE. We use one page to dump list of expectations. This may be not enough
+ * in theory. In practice there is only one expectation per conntrack record.
+ * Moreover, taking into account that _ALL_ of expecations are saved in one
+ * global list, which is looked up each incoming/outpging packet, the system
+ * would be severely dead when even one conntrack would have so much of
+ * expectations. Shortly, I am not going to repair this.
+ */
+
+static int dump_expect_list(struct ip_conntrack *ct, struct ct_holder *list,
+			    cpt_context_t *ctx)
+{
+	int err = 0;
+	unsigned long pg;
+	struct cpt_ip_connexpect_image *v;
+	struct ip_conntrack_expect *exp;
+
+	if (ct->expecting == 0)
+		return err;
+	if (ct->expecting*sizeof(struct cpt_ip_connexpect_image) > PAGE_SIZE)
+		return -ENOBUFS;
+
+	pg = __get_free_page(GFP_KERNEL);
+	if (!pg)
+		return -ENOMEM;
+	v = (struct cpt_ip_connexpect_image *)pg;
+
+	read_lock_bh(&ip_conntrack_lock);
+	list_for_each_entry(exp, &ve_ip_conntrack_expect_list, list) {
+		int sibling;
+
+		if (exp->master != ct)
+			continue;
+
+		if (ct->helper == NULL) {
+			eprintk_ctx("conntrack: no helper and non-trivial expectation\n");
+			err = -EINVAL;
+			break;
+		}
+
+		sibling = 0;
+#if 0
+		/* That's all? No need to calculate sibling? */
+		if (exp->sibling) {
+			struct ct_holder *c;
+			for (c = list; c; c = c->next) {
+				if (tuplehash_to_ctrack(c->cth) == exp->sibling) {
+					sibling = c->index;
+					break;
+				}
+			}
+			/* NOTE: exp->sibling could be not "confirmed" and, hence,
+			 * out of hash table. We should just ignore such a sibling,
+			 * the connection is going to be retried, the packet
+			 * apparently was lost somewhere.
+			 */
+			if (sibling == 0)
+				dprintk_ctx("sibling conntrack is not found\n");
+		}
+#endif
+
+		/* If the expectation still does not have exp->sibling
+		 * and timer is not running, it is about to die on another
+		 * cpu. Skip it. */
+		if (!sibling &&
+		    ct->helper->timeout &&
+		    !timer_pending(&exp->timeout)) {
+			dprintk_ctx("conntrack: expectation: no timer\n");
+			continue;
+		}
+
+		err = dump_one_expect(v, exp, sibling, ctx);
+		if (err)
+			break;
+
+		v++;
+	}
+	read_unlock_bh(&ip_conntrack_lock);
+
+	if (err == 0 && (unsigned long)v != pg)
+		ctx->write((void*)pg, (unsigned long)v - pg, ctx);
+
+	free_page(pg);
+	return err;
+}
+
+static int dump_one_ct(struct ct_holder *c, struct ct_holder *list,
+		       cpt_context_t *ctx)
+{
+	struct ip_conntrack_tuple_hash *h = c->cth;
+	struct ip_conntrack *ct = tuplehash_to_ctrack(h);
+	struct cpt_ip_conntrack_image v;
+	int err = 0;
+
+	if (sizeof(v.cpt_proto_data) != sizeof(ct->proto)) {
+		eprintk_ctx("conntrack module ct->proto version mismatch\n");
+		return -EINVAL;
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_CONNTRACK;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	read_lock_bh(&ip_conntrack_lock);
+	v.cpt_status = ct->status;
+	v.cpt_timeout = ct->timeout.expires - jiffies;
+	v.cpt_ct_helper = (ct->helper != NULL);
+	v.cpt_index = c->index;
+	v.cpt_id = ct->id;
+	v.cpt_mark = 0;
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	v.cpt_mark = ct->mark;
+#endif
+	encode_tuple(&v.cpt_tuple[0], &ct->tuplehash[0].tuple);
+	encode_tuple(&v.cpt_tuple[1], &ct->tuplehash[1].tuple);
+	memcpy(&v.cpt_proto_data, &ct->proto, sizeof(v.cpt_proto_data));
+	memcpy(&v.cpt_help_data, &ct->help, sizeof(v.cpt_help_data));
+
+	v.cpt_masq_index = 0;
+	v.cpt_initialized = 0;
+	v.cpt_num_manips = 0;
+	v.cpt_nat_helper = 0;
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+	v.cpt_masq_index = ct->nat.masq_index;
+#endif
+	/* "help" data is used by pptp, difficult to support */
+	v.cpt_nat_seq[0].cpt_correction_pos = ct->nat.info.seq[0].correction_pos;
+	v.cpt_nat_seq[0].cpt_offset_before = ct->nat.info.seq[0].offset_before;
+	v.cpt_nat_seq[0].cpt_offset_after = ct->nat.info.seq[0].offset_after;
+	v.cpt_nat_seq[1].cpt_correction_pos = ct->nat.info.seq[1].correction_pos;
+	v.cpt_nat_seq[1].cpt_offset_before = ct->nat.info.seq[1].offset_before;
+	v.cpt_nat_seq[1].cpt_offset_after = ct->nat.info.seq[1].offset_after;
+#endif
+	read_unlock_bh(&ip_conntrack_lock);
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	err = dump_expect_list(ct, list, ctx);
+
+	cpt_close_object(ctx);
+	return err;
+}
+
+int cpt_dump_ip_conntrack(cpt_context_t * ctx)
+{
+	struct ct_holder *ct_list = NULL;
+	struct ct_holder *c, **cp;
+	int err = 0;
+	int index = 0;
+	int idx;
+
+	if (get_exec_env()->_ip_conntrack == NULL)
+		return 0;
+
+	for (idx = atomic_read(&(get_exec_env()->_ip_conntrack->_ip_conntrack_count)); idx >= 0; idx--) {
+		c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
+		if (c == NULL) {
+			err = -ENOMEM;
+			goto done;
+		}
+		memset(c, 0, sizeof(struct ct_holder));
+		c->next = ct_list;
+		ct_list = c;
+	}
+
+	c = ct_list;
+
+	read_lock_bh(&ip_conntrack_lock);
+	for (idx = 0; idx < ip_conntrack_htable_size; idx++) {
+		struct ip_conntrack_tuple_hash *h;
+		list_for_each_entry(h, &ve_ip_conntrack_hash[idx], list) {
+			/* Skip reply tuples, they are covered by original
+			 * direction. */
+			if (DIRECTION(h))
+				continue;
+
+			/* Oops, we have not enough of holders...
+			 * It is impossible. */
+			if (unlikely(c == NULL)) {
+				read_unlock_bh(&ip_conntrack_lock);
+				eprintk_ctx("unexpected conntrack appeared\n");
+				err = -ENOMEM;
+				goto done;
+			}
+
+			/* If timer is not running, it means that it
+			 * has just been scheduled on another cpu.
+			 * We should skip this conntrack, it is about to be
+			 * destroyed. */
+			if (!del_timer(&tuplehash_to_ctrack(h)->timeout)) {
+				dprintk_ctx("conntrack: no timer\n");
+				continue;
+			}
+
+			/* Timer is deleted. refcnt is _not_ decreased.
+			 * We are going to restore the timer on exit
+			 * from this function. */
+			c->cth = h;
+			c->index = ++index;
+			c = c->next;
+		}
+	}
+	read_unlock_bh(&ip_conntrack_lock);
+
+	/* No conntracks? Good. */
+	if (index == 0)
+		goto done;
+
+	/* Comb the list a little. */
+	cp = &ct_list;
+	while ((c = *cp) != NULL) {
+		/* Discard unused entries; they can appear, if some
+		 * entries were timed out since we preallocated the list.
+		 */
+		if (c->cth == NULL) {
+			*cp = c->next;
+			kfree(c);
+			continue;
+		}
+
+		/* Move conntracks attached to expectations to the beginning
+		 * of the list. */
+		if (tuplehash_to_ctrack(c->cth)->master && c != ct_list) {
+			*cp = c->next;
+			c->next = ct_list;
+			ct_list = c;
+			dprintk_ctx("conntrack: %d moved in list\n", c->index);
+			continue;
+		}
+		cp = &c->next;
+	}
+
+	cpt_open_section(ctx, CPT_SECT_NET_CONNTRACK);
+
+	for (c = ct_list; c; c = c->next) {
+		err = dump_one_ct(c, ct_list, ctx);
+		if (err)
+			goto done;
+	}
+
+	cpt_close_section(ctx);
+
+done:
+	while ((c = ct_list) != NULL) {
+		ct_list = c->next;
+		if (c->cth) {
+			/* Restore timer. refcnt is preserved. */
+			add_timer(&tuplehash_to_ctrack(c->cth)->timeout);
+		}
+		kfree(c);
+	}
+	return err;
+}
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_context.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_context.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_context.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_context.c	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,273 @@
+/*
+ *
+ *  kernel/cpt/cpt_context.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <linux/cpt_image.h>
+#include <linux/cpt_exports.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+
+static void file_write(const void *addr, size_t count, struct cpt_context *ctx)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->write(file, addr, count, &file->f_pos);
+	set_fs(oldfs);
+	if (err != count && !ctx->write_error)
+		ctx->write_error = err < 0 ? err : -EIO;
+}
+
+static void file_pwrite(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->write(file, addr, count, &pos);
+	set_fs(oldfs);
+	if (err != count && !ctx->write_error)
+		ctx->write_error = err < 0 ? err : -EIO;
+}
+
+static void file_align(struct cpt_context *ctx)
+{
+	struct file *file = ctx->file;
+
+	if (file)
+		file->f_pos = CPT_ALIGN(file->f_pos);
+}
+
+static loff_t lookup_cpt_object_pos(enum _cpt_object_type type, void *p,
+		struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(type, p, ctx);
+	return obj->o_pos;
+}
+
+struct cpt_ops cpt_ops = {
+	.write = file_write,
+	.lookup_object = lookup_cpt_object_pos,
+};
+
+void cpt_context_init(struct cpt_context *ctx)
+{
+	int i;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	init_MUTEX(&ctx->main_sem);
+	ctx->refcount = 1;
+
+	ctx->current_section = -1;
+	ctx->current_object = -1;
+	ctx->pagesize = PAGE_SIZE;
+	ctx->write = file_write;
+	ctx->pwrite = file_pwrite;
+	ctx->align = file_align;
+	for (i=0; i < CPT_SECT_MAX; i++)
+		ctx->sections[i] = CPT_NULL;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	init_completion(&ctx->pgin_notify);
+#endif
+	cpt_object_init(ctx);
+}
+
+int cpt_open_dumpfile(struct cpt_context *ctx)
+{
+	ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
+	if (ctx->tmpbuf == NULL)
+		return -ENOMEM;
+	__cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_close_dumpfile(struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		fput(ctx->file);
+		ctx->file = NULL;
+	}
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+	if (ctx->write_error)
+		eprintk_ctx("error while writing dump file: %d\n", ctx->write_error);
+	return ctx->write_error;
+}
+
+int cpt_major_hdr_out(struct cpt_context *ctx)
+{
+	struct cpt_major_hdr hdr;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.cpt_signature[0] = CPT_SIGNATURE0;
+	hdr.cpt_signature[1] = CPT_SIGNATURE1;
+	hdr.cpt_signature[2] = CPT_SIGNATURE2;
+	hdr.cpt_signature[3] = CPT_SIGNATURE3;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_image_version = CPT_CURRENT_VERSION;
+#ifdef CONFIG_X86_64
+	hdr.cpt_os_arch = CPT_OS_ARCH_EMT64;
+#elif defined(CONFIG_X86_32)
+	hdr.cpt_os_arch = CPT_OS_ARCH_I386;
+#elif defined(CONFIG_IA64)
+	hdr.cpt_os_arch = CPT_OS_ARCH_IA64;
+#else
+#error	Arch is not supported
+#endif
+	hdr.cpt_ve_features = (__u32)ctx->features;
+	hdr.cpt_ve_features2 = (__u32)(ctx->features>>32);
+	hdr.cpt_pagesize = (__u16)PAGE_SIZE;
+	hdr.cpt_hz = HZ;
+	hdr.cpt_start_jiffies64 = ctx->virt_jiffies64;
+	hdr.cpt_start_sec = ctx->start_time.tv_sec;
+	hdr.cpt_start_nsec = ctx->start_time.tv_nsec;
+	hdr.cpt_cpu_caps[0] = ctx->src_cpu_flags;
+	hdr.cpt_kernel_config[0] = ctx->kernel_config_flags;
+	hdr.cpt_iptables_mask = ctx->iptables_mask;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	return 0;
+}
+
+int cpt_close_section(struct cpt_context *ctx)
+{
+	if (ctx->file && ctx->current_section >= 0) {
+		__u64 next = ctx->file->f_pos - ctx->current_section;
+		ctx->pwrite(&next, 8, ctx, ctx->current_section);
+		ctx->current_section = -1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_close_section);
+
+int cpt_open_section(struct cpt_context *ctx, __u32 type)
+{
+	struct cpt_section_hdr hdr;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_close_section(ctx);
+
+	ctx->current_section = ctx->file->f_pos;
+	ctx->sections[type] = ctx->current_section;
+
+	hdr.cpt_next = 0;
+	hdr.cpt_section = type;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_align = 0;
+	ctx->write(&hdr, sizeof(hdr), ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL(cpt_open_section);
+
+
+int cpt_close_object(struct cpt_context *ctx)
+{
+	if (ctx->file && ctx->current_object >= 0) {
+		__u64 next = ctx->file->f_pos - ctx->current_object;
+		ctx->pwrite(&next, 8, ctx, ctx->current_object);
+		ctx->current_object = -1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_close_object);
+
+int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_close_object(ctx);
+
+	ctx->current_object = ctx->file->f_pos;
+	if (obj)
+		cpt_obj_setpos(obj, ctx->current_object, ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL(cpt_open_object);
+
+int cpt_push_object(loff_t *saved, struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		*saved = ctx->current_object;
+		ctx->current_object = ctx->file->f_pos;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_push_object);
+
+int cpt_pop_object(loff_t *saved, struct cpt_context *ctx)
+{
+	ctx->current_object = *saved;
+	return 0;
+}
+EXPORT_SYMBOL(cpt_pop_object);
+
+int cpt_dump_tail(struct cpt_context *ctx)
+{
+	struct cpt_major_tail hdr;
+	int i;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_open_section(ctx, CPT_SECT_TRAILER);
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.cpt_next = sizeof(hdr);
+	hdr.cpt_object = CPT_OBJ_TRAILER;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = CPT_CONTENT_VOID;
+	hdr.cpt_lazypages = 0;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	hdr.cpt_lazypages = ctx->lazypages;
+#endif
+	hdr.cpt_64bit = ctx->tasks64;
+	hdr.cpt_signature[0] = CPT_SIGNATURE0;
+	hdr.cpt_signature[1] = CPT_SIGNATURE1;
+	hdr.cpt_signature[2] = CPT_SIGNATURE2;
+	hdr.cpt_signature[3] = CPT_SIGNATURE3;
+	hdr.cpt_nsect = CPT_SECT_MAX_INDEX;
+	for (i = 0; i < CPT_SECT_MAX_INDEX; i++)
+		hdr.cpt_sections[i] = ctx->sections[i];
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_context.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_context.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_context.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_context.h	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,239 @@
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+#include <ub/beancounter.h>
+
+#define	CPT_CTX_ERROR		-1
+#define	CPT_CTX_IDLE		0
+#define CPT_CTX_SUSPENDING	1
+#define	CPT_CTX_SUSPENDED	2
+#define CPT_CTX_DUMPING		3
+#define CPT_CTX_UNDUMPING	4
+#define CPT_CTX_UNDUMPED	5
+
+#define CPT_TID(tsk)   (tsk)->pid, virt_pid(tsk), (tsk)->comm
+#define CPT_FID		"%d,%d(%s)"
+
+enum {
+	CPT_DOBJ_VFSMOUNT_REF,
+	CPT_DOBJ_FILE,
+	CPT_DOBJ_MAX,
+};
+
+struct cpt_delayed_context {
+	int ve_id;
+	struct task_struct *dfs_daemon;
+	struct completion dfs_notify;
+	struct list_head object_array[CPT_DOBJ_MAX];
+};
+
+void destroy_delayed_context(struct cpt_delayed_context *);
+
+typedef struct cpt_context
+{
+	struct list_head ctx_list;
+	int	refcount;
+	int	ctx_state;
+	int	objcount;
+	int	sticky;
+	struct semaphore main_sem;
+
+	struct file *errorfile;
+	struct file *statusfile;
+	struct file *lockfile;
+
+	int	errno;
+	char	*error_msg;
+	loff_t	err_offset;
+
+	struct file	*file;
+	char		*tmpbuf;
+	int		pagesize;
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+	int		iter_done;
+	void		*iter_dir;
+	struct user_beancounter *iter_ub;
+	int		iter_shm_start;
+#endif
+	loff_t		current_section;
+	loff_t		current_object;
+
+	loff_t		sections[CPT_SECT_MAX];
+
+	__u32		errormask;
+	__u32		write_error;
+
+	struct list_head object_array[CPT_OBJ_MAX];
+
+	void		(*write)(const void *addr, size_t count, struct cpt_context *ctx);
+	void		(*pwrite)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
+	ssize_t		(*read)(void *addr, size_t count, struct cpt_context *ctx);
+	ssize_t		(*pread)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
+	void		(*align)(struct cpt_context *ctx);
+	int		ve_id;
+	int		contextid;
+	struct timespec cpt_monotonic_time; /* Host monotonic time at the moment of cpt/rst
+					     * corresponging to start_time */
+	__u64		virt_jiffies64;	/* Virtual jiffies64. It is == cpt_jiffies64 when
+					 * VE did not migrate. */
+	struct timespec	start_time;
+	struct timespec delta_time;
+	__s64		delta_nsec;
+	int		image_version;
+	__u16		image_arch;
+	__u64		iptables_mask;
+	__u64		features;
+
+#define CPT_ANONVMA_HBITS (sizeof(void*) == 4 ? 10 : 9)
+#define CPT_ANONVMA_HSIZE (1<<CPT_ANONVMA_HBITS)
+	struct hlist_head *anonvmas;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	struct file	*pagein_file_in;
+	struct file	*pagein_file_out;
+	int		lazy_vm;
+	int		lazypages;
+	int		lazytype;
+	struct task_struct	*pgin_task;
+	unsigned long	last_pagein;
+	struct pagein_desc	**pgin_dir;
+	struct pgin_device	*pagein_dev;
+	struct completion	pgin_notify;
+	struct completion	*pgind_completion;
+	struct swap_info_struct	*pgin_swp;
+#endif
+	int		tasks64;
+	__u32		src_cpu_flags;
+	__u32		dst_cpu_flags;
+	__u32		kernel_config_flags;
+
+	__u32		last_vpid;
+
+	struct filejob  *filejob_queue;
+
+	int		slm_count;
+
+	char		*vdso;
+
+	struct cpt_delayed_context *dctx;
+
+#ifdef CONFIG_USER_RESOURCE
+	/* Store here ubc limits and barriers during undumping,
+	   and restore them before resuming */
+	struct ubparm	saved_ubc[UB_RESOURCES];
+#endif
+
+#define CPT_MAX_LINKDIRS	1
+	struct file	*linkdirs[CPT_MAX_LINKDIRS];
+	int		linkdirs_num;
+	unsigned int	linkcnt; /* for create hardlinked files */
+	int	hardlinked_on;
+} cpt_context_t;
+
+typedef struct {
+	int pid;
+	cpt_context_t *ctx;
+	struct completion done;
+} pagein_info_t;
+
+int pagein_info_printf(char *buf, cpt_context_t *ctx);
+
+int cpt_open_dumpfile(struct cpt_context *);
+int cpt_close_dumpfile(struct cpt_context *);
+int rst_open_dumpfile(struct cpt_context *);
+void rst_close_dumpfile(struct cpt_context *);
+void cpt_context_init(struct cpt_context *);
+void rst_context_init(struct cpt_context *);
+void cpt_context_destroy(struct cpt_context *);
+
+void rst_report_error(int err, cpt_context_t *ctx);
+
+
+int cpt_major_hdr_out(struct cpt_context *ctx);
+int cpt_dump_tail(struct cpt_context *ctx);
+int cpt_close_section(struct cpt_context *ctx);
+int cpt_open_section(struct cpt_context *ctx, __u32 type);
+int cpt_close_object(struct cpt_context *ctx);
+int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx);
+int cpt_push_object(loff_t *saved, struct cpt_context *ctx);
+int cpt_pop_object(loff_t *saved, struct cpt_context *ctx);
+
+int rst_get_section(int type, struct cpt_context * ctx, loff_t *, loff_t *);
+__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx);
+__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx);
+void rst_put_name(__u8 *name, struct cpt_context *ctx);
+int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx);
+void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx);
+
+#define rst_get_object(type, pos, tmp, ctx) \
+ _rst_get_object((type), (pos), (tmp), sizeof(*(tmp)), (ctx))
+
+extern int debug_level;
+
+#define cpt_printk(lvl, fmt, args...)	do {	\
+		if (lvl <= debug_level)		\
+			printk(fmt, ##args);	\
+	} while (0)
+
+#define dprintk(a...) cpt_printk(3, "CPT DBG: " a)
+#define dprintk_ctx(f, arg...) dprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
+
+#define wprintk(a...) cpt_printk(2, "CPT WRN: " a)
+#define wprintk_ctx(f, arg...) wprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
+
+#define iprintk(a...) cpt_printk(1, "CPT INF: " a)
+#define iprintk_ctx(f, arg...) iprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
+
+#define eprintk(a...) cpt_printk(1, "CPT ERR: " a)
+#define eprintk_ctx(f, arg...)						\
+do {									\
+	eprintk("%p,%u :" f, ctx, ctx->ve_id, ##arg);			\
+	if (ctx->error_msg && ctx->err_offset < PAGE_SIZE)		\
+		ctx->err_offset += snprintf((char*)(ctx->error_msg +	\
+				ctx->err_offset),			\
+			       	PAGE_SIZE - ctx->err_offset,		\
+				"Error: " f, ##arg);			\
+} while(0)
+
+#define CPT_TMPBUF_FREE 0x789adf12
+#define CPT_TMPBUF_BUSY 0xabcd9876
+
+static inline void *cpt_get_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_FREE);
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_BUSY;
+	return buf;
+}
+
+static inline void __cpt_release_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
+}
+
+static inline void cpt_release_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_BUSY);
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
+}
+
+static inline void cpt_flush_error(cpt_context_t *ctx)
+{
+	mm_segment_t oldfs;
+
+	if (ctx->errorfile && ctx->error_msg && ctx->err_offset) {
+		if (ctx->errorfile->f_op && ctx->errorfile->f_op->write) {
+			oldfs = get_fs();
+			set_fs(KERNEL_DS);
+			ctx->errorfile->f_op->write(ctx->errorfile,
+				ctx->error_msg, ctx->err_offset,
+				&ctx->errorfile->f_pos);
+			set_fs(oldfs);
+		}
+		ctx->error_msg[0] = 0;
+		ctx->err_offset = 0;
+	}
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_dump.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_dump.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_dump.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_dump.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,1452 @@
+/*
+ *
+ *  kernel/cpt/cpt_dump.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/ptrace.h>
+#include <linux/smp_lock.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <ub/ub_task.h>
+#include <linux/cpt_image.h>
+#include <linux/nsproxy.h>
+#include <linux/namespace.h>
+#include <linux/netdevice.h>
+#include <linux/nfcalls.h>
+#include <linux/dcache.h>
+#ifdef CONFIG_UTRACE
+#include <linux/utrace.h>
+#endif
+#include <linux/ptrace.h>
+#include <linux/ip.h>
+#include <linux/ve_nfs.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_process.h"
+#include "cpt_net.h"
+#include "cpt_socket.h"
+#include "cpt_ubc.h"
+#include "cpt_kernel.h"
+
+
+static int vps_child_level(struct task_struct *root, struct task_struct *c)
+{
+	int level = 0;
+	int veid = VE_TASK_INFO(c)->owner_env->veid;
+
+	while (VE_TASK_INFO(c)->owner_env->veid == veid) {
+		if (c->pid != c->tgid)
+			c = c->group_leader;
+		if (c == root)
+			return level;
+
+		c = c->parent;
+		level++;
+	}
+	return -1;
+}
+
+static inline int freezable(struct task_struct * p)
+{
+	if (p->exit_state)
+		return 0;
+
+	switch (p->state) {
+	case EXIT_ZOMBIE:
+	case EXIT_DEAD:
+	case TASK_STOPPED:
+#if TASK_TRACED != TASK_STOPPED
+	case TASK_TRACED:
+#endif
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+void cpt_wake_ve(void)
+{
+	struct task_struct *p, *g;
+
+	do_each_thread_ve(g, p) {
+		spin_lock_irq(&p->sighand->siglock);
+		if (p->flags & PF_FROZEN) {
+			p->flags &= ~PF_FROZEN;
+			wake_up_process(p);
+		}
+		spin_unlock_irq(&p->sighand->siglock);
+	} while_each_thread_ve(g, p);
+}
+EXPORT_SYMBOL(cpt_wake_ve);
+
+
+
+#ifdef CONFIG_UTRACE
+static int check_utrace(struct task_struct *tsk, struct task_struct *root,
+			cpt_context_t *ctx)
+{
+	int err = 0;
+	struct utrace *utrace;
+	extern struct utrace_engine_ops ptrace_utrace_ops;
+
+	rcu_read_lock();
+	utrace = rcu_dereference(tsk->utrace);
+	smp_rmb();
+
+	if (utrace) {
+#ifndef CONFIG_PTRACE
+		err = -EBUSY;
+#else
+		struct utrace_attached_engine *engine;
+
+		spin_lock(&utrace->lock);
+		list_for_each_entry_rcu(engine, &utrace->engines, entry) {
+			struct ptrace_state *state;
+
+			if (engine->ops != &ptrace_utrace_ops) {
+				err = -EBUSY;
+				break;
+			}
+			state = (struct ptrace_state *) engine->data;
+			if (!state) {
+				err = -ERANGE;
+				break;
+			}
+			if (!state->parent ||
+			    vps_child_level(root, state->parent) < 0) {
+				err = -EBUSY;
+				break;
+			}
+		}
+		spin_unlock(&utrace->lock);
+#endif
+	}
+	rcu_read_unlock();
+	return err;
+}
+#else
+static int check_utrace(struct task_struct *tsk, struct task_struct *root,
+			cpt_context_t *ctx)
+{
+	return 0;
+}
+#endif
+
+/*
+ * Some comment is necessary about PF_FREEZE,PF_FROZEN,TIF_FREEZE...
+ *
+ * SWSUSP uses PF_FREEZE flag in tsk->flags raising it in context
+ * of another process. Apparently, it is unacceptable on SMP.
+ * Let's take freeze_processes() in kernel/power/process.c as an example.
+ * Unserialized modifications tsk->flags easily
+ * (believe or not, but it happens with probability of almost 100% :-))
+ * creates the situation when setting PF_FREEZE in freeze_processes(),
+ * which quickly spins raising PF_FREEZE of all the processes,
+ * _clears_ PF_FROZEN just set in refrigerator(), so that suspend deadlocks.
+ *
+ * So, to make things clean, we require that those flags may be modified
+ * only under tsk->sighand->siglock, which is quite natural because PF_FREEZE
+ * is just a kind of signal.
+ *
+ * It is not enough, because we are still not allowed to change tsk->flags
+ * in context of another process, we can corrupt another flags, when the process
+ * running on another cpu modifies them. So, we use TIF_FREEZE in thread flags,
+ * which can be changed atomically.
+ *
+ * PF_FROZEN also changes in context of another process, but this happens
+ * only when the process is already in refrigerator() which does not modify
+ * tsk->flags.
+ */
+
+enum
+{
+	OBSTACLE_NOGO = -1,
+	OBSTACLE_TIMEOUT = -2,
+	OBSTACLE_TRYAGAIN = -3,
+};
+
+#define SUSPEND_TIMEOUT	(10UL*HZ)
+
+static int vps_stop_tasks(struct cpt_context *ctx)
+{
+	unsigned long start_time = jiffies;
+	unsigned long target, timeout;
+	struct task_struct *p, *g;
+	int todo;
+	int round = 0;
+
+	do_gettimespec(&ctx->start_time); 
+	do_posix_clock_monotonic_gettime(&ctx->cpt_monotonic_time);
+	ctx->virt_jiffies64 = get_jiffies_64() + get_exec_env()->jiffies_fixup;
+
+	read_lock(&tasklist_lock);
+
+	atomic_inc(&get_exec_env()->suspend);
+	timeout = HZ/5;
+	target = jiffies + timeout;
+
+	for(;;) {
+		struct task_struct *root;
+		todo = 0;
+
+		root = find_task_by_pid_ve(1);
+		if (!root) {
+			read_unlock(&tasklist_lock);
+			eprintk_ctx("cannot find ve init\n");
+			atomic_dec(&get_exec_env()->suspend);
+			return -ESRCH;
+		}
+
+		do_each_thread_ve(g, p) {
+			if (vps_child_level(root, p) >= 0) {
+				if (!is_virtual_pid(virt_pid(p))) {
+					eprintk_ctx("external process %d/%d(%s) inside CT (e.g. vzctl enter or vzctl exec).\n",
+							virt_pid(p), p->pid, p->comm);
+					todo = OBSTACLE_NOGO;
+					goto out;
+				}
+				if (!is_virtual_pid(virt_pgid(p))) {
+					eprintk_ctx("external process group %d/%d(%s) inside CT (e.g. vzctl enter or vzctl exec).\n",
+							virt_pgid(p), p->pid, p->comm);
+					todo = OBSTACLE_NOGO;
+					goto out;
+				}
+				if (!is_virtual_pid(virt_sid(p))) {
+					eprintk_ctx("external process session %d/%d(%s) inside CT (e.g. vzctl enter or vzctl exec).\n",
+							virt_sid(p), p->pid, p->comm);
+					todo = OBSTACLE_NOGO;
+					goto out;
+				}
+				if (task_aux(p)->vfork_done) {
+					/* Task between vfork()...exec()
+					 * cannot be frozen, because parent
+					 * wait in uninterruptible state.
+					 * So, we do nothing, waiting for
+					 * exec(), unless:
+					 */
+					if (p->state == TASK_STOPPED ||
+					    p->state == TASK_TRACED) {
+						eprintk_ctx("task " CPT_FID " is stopped while vfork(). "
+								"Checkpointing is impossible.\n",
+								CPT_TID(p));
+						todo = OBSTACLE_NOGO;
+						/* It is fatal, _user_ stopped
+						 * vfork()ing task, so that we
+						 * cannot suspend now.
+						 */
+					} else {
+						todo = OBSTACLE_TRYAGAIN;
+					}
+					goto out;
+				}
+				if (p->signal->group_exit_task &&
+				    p->signal->notify_count) {
+					/* exec() waits for threads' death */
+					wprintk_ctx("task " CPT_FID " waits for threads' death\n", CPT_TID(p));
+					todo = OBSTACLE_TRYAGAIN;
+					goto out;
+				}
+				if (p->state == TASK_TRACED
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+				    && !p->stopped_state
+#endif
+				    ) {
+					int ptrace_id = p->pn_state;
+					/* Debugger waits for signal. */
+					switch (ptrace_id) {
+					case PN_STOP_TF:
+					case PN_STOP_TF_RT:
+					case PN_STOP_ENTRY:
+					case PN_STOP_FORK:
+					case PN_STOP_VFORK:
+					case PN_STOP_SIGNAL:
+					case PN_STOP_EXIT:
+					case PN_STOP_LEAVE:
+						break;
+					default:
+						eprintk_ctx("task " CPT_FID " is stopped by debugger while %d.\n", CPT_TID(p), ptrace_id);
+						todo = OBSTACLE_NOGO;
+						goto out;
+					}
+				}
+				if (check_utrace(p, root, ctx)) {
+					eprintk_ctx("task " CPT_FID " is utraced. Checkpointing is impossible.\n", CPT_TID(p));
+					todo = OBSTACLE_NOGO;
+					goto out;
+				}
+				if (p->flags & PF_NOFREEZE) {
+					eprintk_ctx("task " CPT_FID " is unfreezable. Checkpointing is impossible.\n", CPT_TID(p));
+					todo = OBSTACLE_NOGO;
+					goto out;
+				}
+
+				if (!freezable(p))
+					continue;
+
+				spin_lock_irq(&p->sighand->siglock);
+				if (!(p->flags & PF_FROZEN)) {
+					set_tsk_thread_flag(p, TIF_FREEZE);
+					signal_wake_up(p, 0);
+				}
+				spin_unlock_irq(&p->sighand->siglock);
+
+				if (p->flags & PF_FROZEN) {
+					if (p->state != TASK_UNINTERRUPTIBLE)
+						printk("Holy Crap 1 %ld " CPT_FID "\n", p->state, CPT_TID(p));
+					continue;
+				}
+
+				if (round == 10)
+					wprintk_ctx(CPT_FID " is running\n", CPT_TID(p));
+
+				todo++;
+			} else {
+				if (!cpt_skip_task(p)) {
+					eprintk_ctx("foreign process %d/%d(%s) inside CT (e.g. vzctl enter or vzctl exec).\n",
+							virt_pid(p), p->pid, p->comm);
+					todo = OBSTACLE_NOGO;
+					goto out;
+				}
+			}
+		} while_each_thread_ve(g, p);
+
+		if (todo > 0) {
+			/* No visible obstacles, but VE did not freeze
+			 * for timeout. Interrupt suspend, if it is major
+			 * timeout or signal; if it is minor timeout
+			 * we will wake VE and restart suspend.
+			 */
+			if (time_after(jiffies, start_time + SUSPEND_TIMEOUT)
+			    || signal_pending(current))
+				todo = OBSTACLE_TIMEOUT;
+			else if (time_after(jiffies, target))
+				todo = OBSTACLE_TRYAGAIN;
+		}
+
+out:
+		if (todo < 0) {
+			atomic_dec(&get_exec_env()->suspend);
+
+			cpt_wake_ve();
+
+#if 0
+			/* This is sign of failure of printk(), which is not
+			 * ours. So, no prefixes. */
+			printk(">\n");
+#endif
+		}
+
+		read_unlock(&tasklist_lock);
+
+		if (!todo) {
+			atomic_dec(&get_exec_env()->suspend);
+			return 0;
+		}
+
+		switch (todo) {
+		case OBSTACLE_NOGO:
+			eprintk_ctx("suspend is impossible now.\n");
+			return -EAGAIN;
+
+		case OBSTACLE_TIMEOUT:
+			eprintk_ctx("interrupted or timed out.\n");
+			return -EINTR;
+
+		case OBSTACLE_TRYAGAIN:
+			if (time_after(jiffies, start_time + SUSPEND_TIMEOUT) ||
+			    signal_pending(current)) {
+				wprintk_ctx("suspend timed out\n");
+				return -EAGAIN;
+			}
+
+			wprintk_ctx("minor suspend timeout (%lu) expired, "
+				    "trying again\n", timeout);
+
+			/* Try again. VE is awake, give it some time to run. */
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(HZ);
+
+			/* After a short wait restart suspend
+			 * with longer timeout */
+			atomic_inc(&get_exec_env()->suspend);
+			timeout = min(timeout<<1, SUSPEND_TIMEOUT);
+			target = jiffies + timeout;
+			break;
+
+		default:
+			if (round > 0) {
+				/* VE is partially frozen, give processes
+				 * a chance to enter to refrigerator(). */
+				current->state = TASK_INTERRUPTIBLE;
+				schedule_timeout(HZ/20);
+			} else {
+				yield();
+			}
+		}
+
+		read_lock(&tasklist_lock);
+		round++;
+	}
+}
+
+int cpt_freeze_ve(struct ve_struct *env)
+{
+	int err;
+	struct ve_struct *oldenv;
+	unsigned long start_time = jiffies;
+	unsigned long target, timeout;
+	struct task_struct *p, *g;
+	int todo;
+	int round = 0;
+
+	down_write(&env->op_sem);
+	err = -ESRCH;
+	if (!env->is_running)
+		goto out_noenv;
+
+	err = -EBUSY;
+	if (env->is_locked)
+		goto out_noenv;
+	env->is_locked = 1;
+	downgrade_write(&env->op_sem);
+
+	oldenv = set_exec_env(env);
+
+	read_lock(&tasklist_lock);
+
+	atomic_inc(&get_exec_env()->suspend);
+	timeout = HZ/5;
+	target = jiffies + timeout;
+
+	for(;;) {
+		todo = 0;
+
+		do_each_thread_ve(g, p) {
+			if (task_aux(p)->vfork_done) {
+				if (p->state == TASK_STOPPED ||
+				    p->state == TASK_TRACED) {
+					todo = OBSTACLE_NOGO;
+				} else {
+					todo = OBSTACLE_TRYAGAIN;
+				}
+				goto done;
+			}
+			if (p->signal->group_exit_task &&
+			    p->signal->notify_count) {
+				/* exec() waits for threads' death */
+				todo = OBSTACLE_TRYAGAIN;
+				goto done;
+			}
+
+			if (!freezable(p))
+				continue;
+
+			spin_lock_irq(&p->sighand->siglock);
+			if (!(p->flags & PF_FROZEN)) {
+				set_tsk_thread_flag(p, TIF_FREEZE);
+				signal_wake_up(p, 0);
+			}
+			spin_unlock_irq(&p->sighand->siglock);
+
+			if (p->flags & PF_FROZEN)
+				continue;
+
+			todo++;
+		} while_each_thread_ve(g, p);
+
+		if (todo > 0) {
+			/* No visible obstacles, but VE did not freeze
+			 * for timeout. Interrupt suspend, if it is major
+			 * timeout or signal; if it is minor timeout
+			 * we will wake VE and restart suspend.
+			 */
+			if (time_after(jiffies, start_time + SUSPEND_TIMEOUT)
+			    || signal_pending(current))
+				todo = OBSTACLE_TIMEOUT;
+			else if (time_after(jiffies, target))
+				todo = OBSTACLE_TRYAGAIN;
+		}
+
+done:
+		if (todo < 0) {
+			atomic_dec(&get_exec_env()->suspend);
+			cpt_wake_ve();
+		}
+
+		read_unlock(&tasklist_lock);
+
+		if (!todo) {
+			atomic_dec(&get_exec_env()->suspend);
+			err = 0;
+			goto out;
+		}
+
+		switch (todo) {
+		case OBSTACLE_NOGO:
+			err = -EAGAIN;
+			goto out;
+
+		case OBSTACLE_TIMEOUT:
+			err = -EINTR;
+			goto out;
+
+		case OBSTACLE_TRYAGAIN:
+			if (time_after(jiffies, start_time + SUSPEND_TIMEOUT) ||
+			    signal_pending(current)) {
+				err = -EAGAIN;
+				goto out;
+			}
+
+			/* Try again. VE is awake, give it some time to run. */
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(HZ);
+
+			/* After a short wait restart suspend
+			 * with longer timeout */
+			atomic_inc(&get_exec_env()->suspend);
+			timeout = min(timeout<<1, SUSPEND_TIMEOUT);
+			target = jiffies + timeout;
+			break;
+
+		default:
+			if (round > 0) {
+				/* VE is partially frozen, give processes
+				 * a chance to enter to refrigerator(). */
+				current->state = TASK_INTERRUPTIBLE;
+				schedule_timeout(HZ/20);
+			} else {
+				yield();
+			}
+		}
+
+		read_lock(&tasklist_lock);
+		round++;
+	}
+
+out:
+	set_exec_env(oldenv);
+	up_read(&env->op_sem);
+	down_write(&env->op_sem);
+	env->is_locked = 0;
+
+out_noenv:
+	up_write(&env->op_sem);
+	return err;
+}
+EXPORT_SYMBOL(cpt_freeze_ve);
+
+
+static int cpt_unlock_ve(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	down_write(&env->op_sem);
+	env->is_locked = 0;
+	up_write(&env->op_sem);
+	put_ve(env);
+	return 0;
+}
+
+int cpt_resume(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	virtinfo_notifier_call(VITYPE_SCP, VIRTINFO_SCP_DMPFIN, ctx);
+
+	cpt_unlock_sockets(ctx);
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pgin_task) {
+		wait_for_completion(&ctx->pgin_notify);
+		put_task_struct(ctx->pgin_task);
+		ctx->pgin_task = NULL;
+	}
+#endif
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		if (tsk->flags & PF_FROZEN) {
+			tsk->flags &= ~PF_FROZEN;
+			wake_up_process(tsk);
+		} else if (freezable(tsk)) {
+			eprintk_ctx("strange, %s not frozen\n", tsk->comm );
+		}
+		spin_unlock_irq(&tsk->sighand->siglock);
+		put_task_struct(tsk);
+	}
+
+	cpt_resume_network(ctx);
+
+	cpt_unlock_ve(ctx);
+
+	cpt_finish_ubc(ctx);
+	cpt_finish_vfsmount_ref(ctx);
+	cpt_object_destroy(ctx);
+	return 0;
+}
+
+int cpt_kill(struct cpt_context *ctx)
+{
+	int err = 0;
+	struct ve_struct *env;
+	cpt_object_t *obj;
+	struct task_struct *root_task = NULL;
+	long delay;
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+
+	/* from here cpt_kill succeeds */
+	virtinfo_notifier_call(VITYPE_SCP, VIRTINFO_SCP_DMPFIN, ctx);
+
+	if (current->ve_task_info.owner_env == env) {
+		wprintk_ctx("attempt to kill ve from inside, escaping...\n");
+		return -EPERM; 
+	}
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pgin_task) {
+		wait_for_completion(&ctx->pgin_notify);
+		put_task_struct(ctx->pgin_task);
+		ctx->pgin_task = NULL;
+	}
+#endif
+
+	cpt_kill_sockets(ctx);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+
+		if (tsk->exit_state) {
+			put_task_struct(tsk);
+			continue;
+		}
+
+		if (virt_pid(tsk) == 1) {
+			root_task = tsk;
+			continue;
+		}
+
+		tsk->robust_list = NULL;
+#ifdef CONFIG_COMPAT
+		tsk->compat_robust_list = NULL;
+#endif
+		tsk->clear_child_tid = NULL;
+
+#ifndef CONFIG_UTRACE
+		/* Is this necessary? SIGKILL must override tracing
+		 * in any case. With utrace we are going to have problems
+		 * doing this. */
+		if (tsk->ptrace) {
+			write_lock_irq(&tasklist_lock);
+			tsk->ptrace = 0;
+			if (!list_empty(&tsk->ptrace_list)) {
+				list_del_init(&tsk->ptrace_list);
+				remove_parent(tsk);
+				tsk->parent = tsk->parent;
+				add_parent(tsk);
+			}
+			write_unlock_irq(&tasklist_lock);
+		}
+#endif
+
+		send_sig(SIGKILL, tsk, 1);
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		sigfillset(&tsk->blocked);
+		sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
+		set_tsk_thread_flag(tsk, TIF_SIGPENDING);
+		if (tsk->flags & PF_FROZEN)
+			tsk->flags &= ~PF_FROZEN;
+		spin_unlock_irq(&tsk->sighand->siglock);
+
+		wake_up_process(tsk);
+		put_task_struct(tsk);
+	}
+
+	yield();
+
+	if (root_task != NULL) {
+		send_sig(SIGKILL, root_task, 1);
+
+		spin_lock_irq(&root_task->sighand->siglock);
+		sigfillset(&root_task->blocked);
+		sigdelsetmask(&root_task->blocked, sigmask(SIGKILL));
+		set_tsk_thread_flag(root_task, TIF_SIGPENDING);
+		clear_tsk_thread_flag(root_task, TIF_FREEZE);
+		if (root_task->flags & PF_FROZEN)
+			root_task->flags &= ~PF_FROZEN;
+		spin_unlock_irq(&root_task->sighand->siglock);
+
+		wake_up_process(root_task);
+		put_task_struct(root_task);
+	}
+
+	cpt_finish_ubc(ctx);
+	cpt_finish_vfsmount_ref(ctx);
+	cpt_object_destroy(ctx);
+
+	delay = 1;
+	while (atomic_read(&env->counter) != 1) {
+		if (signal_pending(current))
+			break;
+		current->state = TASK_INTERRUPTIBLE;
+		delay = (delay < HZ) ? (delay << 1) : HZ;
+		schedule_timeout(delay);
+	}
+	put_ve(env);
+
+	return err;
+}
+
+#ifdef CONFIG_USER_RESOURCE
+static void collect_task_ubc(struct task_struct *t, struct cpt_context *ctx)
+{
+	struct task_beancounter *tbc;
+
+	tbc = &(t->task_bc);
+	cpt_add_ubc(tbc->exec_ub, ctx);
+	cpt_add_ubc(tbc->task_ub, ctx);
+	cpt_add_ubc(tbc->fork_sub, ctx);
+}
+#else
+static void inline collect_task_ubc(struct task_struct *t,
+		struct cpt_context *ctx)
+{ return; }
+#endif
+
+static cpt_object_t * remember_task(struct task_struct * child,
+		cpt_object_t * head, cpt_context_t * ctx)
+{
+	cpt_object_t *cobj;
+
+	if (freezable(child) && !(child->flags&PF_FROZEN)) {
+		eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(child));
+		put_task_struct(child);
+		return NULL;
+	}
+
+	if (lookup_cpt_object(CPT_OBJ_TASK, child, ctx)) BUG();
+	if ((cobj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
+		put_task_struct(child);
+		return NULL;
+	}
+	cobj->o_count = 1;
+	cpt_obj_setobj(cobj, child, ctx);
+	insert_cpt_object(CPT_OBJ_TASK, cobj, head, ctx);
+	collect_task_ubc(child, ctx);
+	return cobj;
+}
+
+static int vps_collect_tasks(struct cpt_context *ctx)
+{
+	int err = -ESRCH;
+	cpt_object_t *obj;
+	struct task_struct *root;
+	read_lock(&tasklist_lock);
+	root = find_task_by_pid_ve(1);
+	if (root)
+		get_task_struct(root);
+	read_unlock(&tasklist_lock);
+
+	if (!root) {
+		err = -ESRCH;
+		eprintk_ctx("vps_collect_tasks: cannot find root\n");
+		goto out;
+	}
+
+	if ((obj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
+		put_task_struct(root);
+		return -ENOMEM;
+	}
+	obj->o_count = 1;
+	cpt_obj_setobj(obj, root, ctx);
+	intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
+	collect_task_ubc(root, ctx);
+
+	/* Collect process subtree recursively */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		cpt_object_t *head = obj;
+		struct task_struct *tsk = obj->o_obj;
+		struct task_struct *child;
+
+		if (freezable(tsk) && !(tsk->flags&PF_FROZEN)) {
+			eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(tsk));
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (tsk->state == TASK_RUNNING)
+			printk("Holy Crap 2 %ld " CPT_FID "\n", tsk->state, CPT_TID(tsk));
+
+		wait_task_inactive(tsk);
+
+		err = check_task_state(tsk, ctx);
+		if (err)
+			goto out;
+
+		if (tsk->pid == tsk->tgid) {
+			child = tsk;
+			for (;;) {
+				read_lock(&tasklist_lock);
+				child = next_thread(child);
+				if (child != tsk)
+					get_task_struct(child);
+				read_unlock(&tasklist_lock);
+
+				if (child == tsk)
+					break;
+
+				if (child->parent != tsk->parent) {
+					put_task_struct(child);
+					eprintk_ctx("illegal thread structure, kernel bug\n");
+					err = -EINVAL;
+					goto out;
+				}
+
+				if ((head = remember_task(child, head, ctx)) == NULL) {
+					eprintk_ctx("task obj allocation failure\n");
+					err = -ENOMEM;
+					goto out;
+				}
+			}
+		}
+
+		/* About locking. VE is frozen. But lists of children
+		 * may change at least for init, when entered task reparents
+		 * to init and when reparented task exits. If we take care
+		 * of this case, we still can unlock while scanning
+		 * tasklists.
+		 */
+		read_lock(&tasklist_lock);
+		list_for_each_entry(child, &tsk->children, sibling) {
+			if (child->parent != tsk)
+				continue;
+			if (child->pid != child->tgid)
+				continue;
+			get_task_struct(child);
+			read_unlock(&tasklist_lock);
+
+			if ((head = remember_task(child, head, ctx)) == NULL) {
+				eprintk_ctx("task obj allocation failure\n");
+				err = -ENOMEM;
+				goto out;
+			}
+
+			read_lock(&tasklist_lock);
+		}
+
+#ifndef CONFIG_UTRACE
+		list_for_each_entry(child, &tsk->ptrace_children, ptrace_list) {
+			if (child->parent != tsk)
+				continue;
+			if (child->pid != child->tgid)
+				continue;
+			get_task_struct(child);
+			read_unlock(&tasklist_lock);
+
+			if ((head = remember_task(child, head, ctx)) == NULL) {
+				eprintk_ctx("task obj allocation failure\n");
+				err = -ENOMEM;
+				goto out;
+			}
+
+			read_lock(&tasklist_lock);
+		}
+#endif
+		read_unlock(&tasklist_lock);
+	}
+
+	return 0;
+
+out:
+	while (!list_empty(&ctx->object_array[CPT_OBJ_TASK])) {
+		struct list_head *head = ctx->object_array[CPT_OBJ_TASK].next;
+		cpt_object_t *obj = list_entry(head, cpt_object_t, o_list);
+		struct task_struct *tsk;
+
+		list_del(head);
+		tsk = obj->o_obj;
+		put_task_struct(tsk);
+		free_cpt_object(obj, ctx);
+	}
+	return err;
+}
+
+static int cpt_collect(struct cpt_context *ctx)
+{
+	int err;
+
+	if ((err = cpt_collect_mm(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_sysv(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_namespace(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_files(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_fs(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_signals(ctx)) != 0)
+		return err;
+
+	if (virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_COLLECT, ctx) & NOTIFY_FAIL)
+		return -ECHRNG;
+
+	return 0;
+}
+
+static int cpt_dump_veinfo(cpt_context_t *ctx)
+{
+	struct cpt_veinfo_image *i = cpt_get_buf(ctx);
+	struct ve_struct *ve;
+	struct timespec delta;
+	struct ipc_namespace *ns;
+
+	cpt_open_section(ctx, CPT_SECT_VEINFO);
+	cpt_open_object(NULL, ctx);
+
+	memset(i, 0, sizeof(*i));
+
+	i->cpt_next = CPT_NULL;
+	i->cpt_object = CPT_OBJ_VEINFO;
+	i->cpt_hdrlen = sizeof(*i);
+	i->cpt_content = CPT_CONTENT_VOID;
+
+	ve = get_exec_env();
+	ns = ve->ve_ns->ipc_ns;
+
+	i->shm_ctl_all = ns->shm_ctlall;
+	if (ns->shm_ctlall > 0xFFFFFFFFU)
+		i->shm_ctl_all = 0xFFFFFFFFU;
+	i->shm_ctl_max = ns->shm_ctlmax;
+	if (ns->shm_ctlmax > 0xFFFFFFFFU)
+		i->shm_ctl_max = 0xFFFFFFFFU;
+	i->shm_ctl_mni = ns->shm_ctlmni;
+
+	i->msg_ctl_max = ns->msg_ctlmax;
+	i->msg_ctl_mni = ns->msg_ctlmni;
+	i->msg_ctl_mnb = ns->msg_ctlmnb;
+
+	BUILD_BUG_ON(sizeof(ns->sem_ctls) != sizeof(i->sem_ctl_arr));
+	i->sem_ctl_arr[0] = ns->sem_ctls[0];
+	i->sem_ctl_arr[1] = ns->sem_ctls[1];
+	i->sem_ctl_arr[2] = ns->sem_ctls[2];
+	i->sem_ctl_arr[3] = ns->sem_ctls[3];
+
+	do_posix_clock_monotonic_gettime(&delta);
+	set_normalized_timespec(&delta,
+			delta.tv_sec - ve->start_timespec.tv_sec,
+			delta.tv_nsec - ve->start_timespec.tv_nsec);
+	i->start_timespec_delta = cpt_timespec_export(&delta);
+	i->start_jiffies_delta = get_jiffies_64() - ve->start_jiffies;
+
+	i->last_pid = ve->last_vpid;
+	i->rnd_va_space	= ve->_randomize_va_space + 1;
+	i->vpid_max = ve->vpid_max;
+
+	ctx->write(i, sizeof(*i), ctx);
+	cpt_release_buf(ctx);
+	cpt_close_object(ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int cpt_dump_utsname(cpt_context_t *ctx)
+{
+	int len;
+	struct cpt_object_hdr o;
+	struct ve_struct *ve;
+	struct uts_namespace *ns;
+
+	cpt_open_section(ctx, CPT_SECT_UTSNAME);
+
+	ve = get_exec_env();
+	ns = ve->ve_ns->uts_ns;
+
+ 	cpt_open_object(NULL, ctx);
+	len = strlen(ns->name.nodename);
+ 	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(ns->name.nodename, len+1, ctx);
+	ctx->align(ctx);
+ 	cpt_close_object(ctx);
+ 
+ 	cpt_open_object(NULL, ctx);
+	len = strlen(ns->name.domainname);
+ 	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(ns->name.domainname, len+1, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+
+ 	cpt_open_object(NULL, ctx);
+	len = strlen(ns->name.release);
+ 	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(ns->name.release, len+1, ctx);
+	ctx->align(ctx);
+ 	cpt_close_object(ctx);
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+#ifndef CONFIG_IA64
+static int cpt_dump_vsyscall(cpt_context_t *ctx)
+{
+	struct cpt_page_block *pgb = cpt_get_buf(ctx);
+
+	cpt_open_section(ctx, CPT_SECT_VSYSCALL);
+	cpt_open_object(NULL, ctx);
+
+	pgb->cpt_next = CPT_NULL;
+	pgb->cpt_object = CPT_OBJ_VSYSCALL;
+	pgb->cpt_hdrlen = sizeof(*pgb);
+	pgb->cpt_content = CPT_CONTENT_DATA;
+	pgb->cpt_start = cpt_ptr_export(vsyscall_addr);
+	pgb->cpt_end = pgb->cpt_start + PAGE_SIZE;
+
+	ctx->write(pgb, sizeof(*pgb), ctx);
+	cpt_release_buf(ctx);
+
+	ctx->write(vsyscall_addr, PAGE_SIZE, ctx);
+
+	cpt_close_object(ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
+#endif
+
+int cpt_dump(struct cpt_context *ctx)
+{
+	struct ve_struct *oldenv, *env;
+	struct nsproxy *old_ns;
+	int err, err2 = 0;
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+
+	down_read(&env->op_sem);
+	err = -ESRCH;
+	if (!env->is_running)
+		goto out_noenv;
+	if (!env->is_locked)
+		goto out_noenv;
+
+	oldenv = set_exec_env(env);
+	old_ns = current->nsproxy;
+	current->nsproxy = env->ve_ns;
+
+	/* Phase 2: real checkpointing */
+	err = cpt_open_dumpfile(ctx);
+	if (err)
+		goto out;
+	
+	cpt_major_hdr_out(ctx);
+
+	if (!err)
+		err = cpt_dump_veinfo(ctx);
+	if (!err)
+		err = cpt_dump_ubc(ctx);
+	if (!err)
+		err = cpt_dump_namespace(ctx);
+	if (!err)
+		err = cpt_dump_files(ctx);
+	if (!err)
+		err = cpt_dump_files_struct(ctx);
+	if (!err)
+		err = cpt_dump_fs_struct(ctx);
+	/* netdevices should be dumped after dumping open files
+	   as we need to restore netdevice binding to /dev/net/tun file */
+	if (!err)
+		err = cpt_dump_ifinfo(ctx);
+	if (!err)
+		err = cpt_dump_sighand(ctx);
+	if (!err)
+		err = cpt_dump_vm(ctx);
+	if (!err)
+		err = cpt_dump_sysvsem(ctx);
+	if (!err)
+		err = cpt_dump_sysvmsg(ctx);
+	if (!err)
+		err = cpt_dump_tasks(ctx);
+	if (!err)
+		err = cpt_dump_orphaned_sockets(ctx);
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+	if (!err)
+		err = cpt_dump_ip_conntrack(ctx);
+#endif
+	if (!err) {
+		if (virtinfo_notifier_call(VITYPE_SCP,
+					VIRTINFO_SCP_DUMP, ctx) & NOTIFY_FAIL)
+			err = -ECHRNG;
+	}
+	if (!err)
+		err = cpt_dump_utsname(ctx);
+
+#ifndef CONFIG_IA64
+	if (!err)
+		err = cpt_dump_vsyscall(ctx);
+#endif
+
+	if (!err)
+		err = cpt_dump_tail(ctx);
+
+	err2 = cpt_close_dumpfile(ctx);
+
+out:
+	current->nsproxy = old_ns;
+	set_exec_env(oldenv);
+out_noenv:
+	up_read(&env->op_sem);
+	put_ve(env);
+	return err ? : err2;
+}
+
+int cpt_vps_suspend(struct cpt_context *ctx)
+{
+	struct ve_struct *oldenv, *env;
+	struct nsproxy *old_ns;
+	int err = 0;
+
+	ctx->kernel_config_flags = test_kernel_config();
+	cpt_object_init(ctx);
+
+	if (!ctx->ve_id) {
+		env = get_exec_env();
+		if (env == get_ve0())
+			return -EINVAL;
+		wprintk("undefined ve_id\n");
+		ctx->ve_id = env->veid;
+		get_ve(env);
+	} else {
+		env = get_ve_by_id(ctx->ve_id);
+		if (!env)
+			return -ESRCH;
+	}
+
+#ifdef CONFIG_VE_IPTABLES
+	ctx->iptables_mask = env->_iptables_modules;
+#endif
+	ctx->features = env->features;
+
+	down_write(&env->op_sem);
+	err = -ESRCH;
+	if (!env->is_running)
+		goto out_noenv;
+
+	err = -EBUSY;
+	if (env->is_locked)
+		goto out_noenv;
+	env->is_locked = 1;
+	downgrade_write(&env->op_sem);
+
+	oldenv = set_exec_env(env);
+	old_ns = current->nsproxy;
+	current->nsproxy = env->ve_ns;
+
+	/* Start syncing NFS */
+	ve_nfs_sync(env, 0);
+
+	/* Find and stop all the tasks */
+	if ((err = vps_stop_tasks(ctx)) != 0)
+		goto out;
+
+	ve_nfs_sync(env, 1);
+
+	/* Wait for syncing NFS mounts */
+	if ((err = ve_nfs_sync(env, 2)) != 0) {
+		eprintk_ctx("failed to sync nfs\n");
+		goto out_wake;
+	}
+
+	if ((err = cpt_suspend_network(ctx)) != 0)
+		goto out_wake;
+
+	/* At the moment all the state is frozen. We do not need to lock
+	 * the state, which can be changed only if the tasks are running.
+	 */
+
+	/* Collect task tree */
+	if ((err = vps_collect_tasks(ctx)) != 0)
+		goto out_wake;
+
+	/* Collect all the resources */
+	if ((err = cpt_collect(ctx)) != 0)
+		goto out;
+
+out:
+	current->nsproxy = old_ns;
+	set_exec_env(oldenv);
+	up_read(&env->op_sem);
+	put_ve(env);
+        return err;
+
+out_noenv:
+	up_write(&env->op_sem);
+	put_ve(env);
+	return err;
+
+out_wake:
+	read_lock(&tasklist_lock);
+	cpt_wake_ve();
+	read_unlock(&tasklist_lock);
+	goto out;
+}
+
+static void check_unsupported_netdevices(struct cpt_context *ctx, __u32 *caps)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev; dev = dev->next) {
+		if (dev->cpt_ops == NULL) {
+			eprintk_ctx("unsupported netdevice %s\n", dev->name);
+			*caps |= (1<<CPT_UNSUPPORTED_NETDEV);
+		}
+	}
+	read_unlock(&dev_base_lock);
+}
+
+static void check_one_process(struct cpt_context *ctx, __u32 *caps,
+		unsigned int flags, struct ve_struct *env,
+		struct task_struct *root, struct task_struct *p)
+{
+	struct namespace *ns;
+	if (tsk_used_math(p)) {
+		*caps |= flags & ((1<<CPT_CPU_X86_FXSR) |
+				(1<<CPT_CPU_X86_SSE) |
+				(1<<CPT_CPU_X86_SSE2) |
+				(1<<CPT_CPU_X86_MMX) |
+				(1<<CPT_CPU_X86_3DNOW) |
+				(1<<CPT_CPU_X86_3DNOW2));
+	}
+	/* This is not 100% true. VE could migrate with vdso using int80.
+	 * In this case we do not need SEP/SYSCALL32 caps. It is not so easy
+	 * to test, so that we do not. */
+#ifdef CONFIG_X86_64
+	if (!(p->thread_info->flags & _TIF_IA32))
+		*caps |= flags & ((1<<CPT_CPU_X86_EMT64)|(1<<CPT_CPU_X86_SYSCALL));
+	else if (p->mm && p->mm->context.vdso) {
+		if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+			*caps |= flags & (1<<CPT_CPU_X86_SEP);
+		else
+			*caps |= flags & (1<<CPT_CPU_X86_SYSCALL32);
+	}
+#elif defined(CONFIG_X86_32)
+	if (p->mm && p->mm->context.vdso)
+		*caps |= flags & (1<<CPT_CPU_X86_SEP);
+#endif
+#ifdef CONFIG_IA64
+	if (!IS_IA32_PROCESS(task_pt_regs(p)))
+		*caps |= (1<<CPT_CPU_X86_IA64);
+#endif
+	if (vps_child_level(root, p) >= 0) {
+		if (!is_virtual_pid(virt_pid(p))) {
+			eprintk_ctx("external process %d/%d(%s) inside CT (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
+			*caps |= (1<<CPT_EXTERNAL_PROCESS);
+		}
+		if (!is_virtual_pid(virt_pgid(p))) {
+			eprintk_ctx("external process group %d/%d(%s) inside CT (e.g. vzctl enter or vzctl exec).\n", virt_pgid(p), p->pid, p->comm);
+			*caps |= (1<<CPT_EXTERNAL_PROCESS);
+		}
+		if (!is_virtual_pid(virt_sid(p))) {
+			eprintk_ctx("external process session %d/%d(%s) inside CT (e.g. vzctl enter or vzctl exec).\n", virt_sid(p), p->pid, p->comm);
+			*caps |= (1<<CPT_EXTERNAL_PROCESS);
+		}
+	} else {
+		if (!cpt_skip_task(p)) {
+			eprintk_ctx("foreign process %d/%d(%s) inside CT (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
+			*caps |= (1<<CPT_EXTERNAL_PROCESS);
+		}
+	}
+	ns = get_task_mnt_ns(p);
+	if (ns) {
+		if (ns != current->nsproxy->namespace) {
+			eprintk_ctx("namespaces are not supported: process %d/%d(%s)\n", virt_pid(p), p->pid, p->comm);
+			*caps |= (1<<CPT_NAMESPACES);
+		}
+		put_namespace(ns);
+	}
+	if (p->policy != SCHED_NORMAL) {
+		eprintk_ctx("scheduler policy is not supported %d/%d(%s)\n", virt_pid(p), p->pid, p->comm);
+		*caps |= (1<<CPT_SCHEDULER_POLICY);
+	}
+	if (check_utrace(p, root, ctx)) {
+		eprintk_ctx("task %d/%d(%s) is ptraced from host system\n", p->pid, virt_pid(p), p->comm);
+		*caps |= (1<<CPT_PTRACED_FROM_VE0);
+	}
+	if (cpt_check_unsupported(p, ctx)) {
+		*caps |= (1<<CPT_UNSUPPORTED_MISC);
+	}
+}
+
+static void check_unsupported_mounts(struct cpt_context *ctx, __u32 *caps,
+		struct ve_struct *env, struct namespace *n, char *path_buf)
+{
+	struct list_head *p;
+	char *path;
+
+	down_read(&namespace_sem);
+	list_for_each(p, &n->list) {
+		struct vfsmount *mnt = list_entry(p, struct vfsmount, mnt_list);
+
+		spin_lock(&dcache_lock);
+		path = __d_path(mnt->mnt_root, mnt,
+				env->fs_root, env->fs_rootmnt,
+				path_buf, PAGE_SIZE);
+		spin_unlock(&dcache_lock);
+		if (IS_ERR(path))
+			continue;
+
+		if (check_one_vfsmount(mnt)) {
+			eprintk_ctx("Unsupported filesystem %s\n", mnt->mnt_sb->s_type->name);
+			*caps |= (1<<CPT_UNSUPPORTED_FSTYPE);
+		}
+	}
+	up_read(&namespace_sem);
+}
+
+int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps)
+{
+	struct task_struct *p;
+	struct task_struct *root;
+	struct ve_struct *env;
+	struct ve_struct *old_env;
+	struct nsproxy *old_ns;
+	struct namespace *n;
+	int err;
+	unsigned int flags = test_cpu_caps_and_features();
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (env == NULL)
+		return -ESRCH;
+
+	down_read(&env->op_sem);
+	err = -ESRCH;
+	if (!env->is_running) {
+		eprintk_ctx("CT is not running\n");
+		goto out_noenv;
+	}
+
+	err = -EBUSY;
+	if (env->is_locked) {
+		eprintk_ctx("CT is locked\n");
+		goto out_noenv;
+	}
+
+	*caps = flags & (1<<CPT_CPU_X86_CMOV);
+
+	if (flags & (1 << CPT_SLM_DMPRST)) {
+		eprintk_ctx("SLM is enabled, but slm_dmprst module is not loaded\n");
+		*caps |= (1 << CPT_SLM_DMPRST);
+	}
+
+	old_env = set_exec_env(env);
+	old_ns = current->nsproxy;
+	current->nsproxy = env->ve_ns;
+
+	check_unsupported_netdevices(ctx, caps);
+
+	read_lock(&tasklist_lock);
+	root = find_task_by_pid_ve(1);
+	if (!root) {
+		read_unlock(&tasklist_lock);
+		eprintk_ctx("cannot find ve init\n");
+		err = -ESRCH;
+		goto out;
+	}
+	get_task_struct(root);
+	for (p = __first_task_ve(env); p != NULL ; p = __next_task_ve(env, p))
+		check_one_process(ctx, caps, flags, env, root, p);
+	read_unlock(&tasklist_lock);
+
+	n = get_task_mnt_ns(root);
+	if (n) {
+		char *path_buf;
+
+		path_buf = (char *) __get_free_page(GFP_KERNEL);
+		if (!path_buf) {
+			put_namespace(n);
+			err = -ENOMEM;
+			goto out_root;
+		}
+
+		check_unsupported_mounts(ctx, caps, env, n, path_buf);
+
+		free_page((unsigned long) path_buf);
+		put_namespace(n);
+	}
+
+	err = 0;
+
+out_root:
+	put_task_struct(root);
+out:
+	current->nsproxy = old_ns;
+	set_exec_env(old_env);
+out_noenv:
+	up_read(&env->op_sem);
+	put_ve(env);
+
+	return err;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_dump.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_dump.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_dump.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_dump.h	2017-01-13 08:40:25.000000000 -0500
@@ -0,0 +1,16 @@
+int cpt_dump(struct cpt_context *cpt);
+int rst_undump(struct cpt_context *cpt);
+int cpt_suspend(struct cpt_context *cpt);
+int cpt_resume(struct cpt_context *cpt);
+int cpt_kill(struct cpt_context *cpt);
+int rst_clean(struct cpt_context *cpt);
+int rst_resume(struct cpt_context *cpt);
+int rst_kill(struct cpt_context *cpt);
+
+int cpt_freeze_one(pid_t pid, int freeze);
+int cpt_vps_suspend(struct cpt_context *ctx);
+int vps_rst_undump(struct cpt_context *ctx);
+
+int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps);
+
+int cpt_check_unsupported(struct task_struct *tsk, struct cpt_context *ctx);
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_epoll.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_epoll.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_epoll.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_epoll.c	2017-01-13 08:40:25.000000000 -0500
@@ -0,0 +1,116 @@
+/*
+ *
+ *  kernel/cpt/cpt_epoll.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/eventpoll.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+extern struct file_operations eventpoll_fops;
+
+int cpt_dump_epolldev(cpt_object_t *obj, cpt_context_t *ctx)
+{
+	int err = 0;
+	struct file *file = obj->o_obj;
+	struct eventpoll *ep;
+	struct rb_node *rbp;
+	struct cpt_epoll_image ei;
+
+	if (file->f_op != &eventpoll_fops) {
+		eprintk_ctx("bad epoll file\n");
+		return -EINVAL;
+	}
+
+	ep = file->private_data;
+
+	/* eventpoll.c does not protect open /proc/N/fd, silly.
+	 * Opener will get an invalid file with uninitialized private_data
+	 */
+	if (unlikely(ep == NULL)) {
+		eprintk_ctx("bad epoll device\n");
+		return -EINVAL;
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	ei.cpt_next = CPT_NULL;
+	ei.cpt_object = CPT_OBJ_EPOLL;
+	ei.cpt_hdrlen = sizeof(ei);
+	ei.cpt_content = CPT_CONTENT_ARRAY;
+	ei.cpt_file = obj->o_pos;
+
+	ctx->write(&ei, sizeof(ei), ctx);
+
+	mutex_lock(&epmutex);
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		loff_t saved_obj;
+		cpt_object_t *tobj;
+		struct cpt_epoll_file_image efi;
+		struct epitem *epi;
+		epi = rb_entry(rbp, struct epitem, rbn);
+		tobj = lookup_cpt_object(CPT_OBJ_FILE, epi->ffd.file, ctx);
+		if (tobj == NULL) {
+			eprintk_ctx("epoll device refers to an external file\n");
+			err = -EBUSY;
+			break;
+		}
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		efi.cpt_next = CPT_NULL;
+		efi.cpt_object = CPT_OBJ_EPOLL_FILE;
+		efi.cpt_hdrlen = sizeof(efi);
+		efi.cpt_content = CPT_CONTENT_VOID;
+		efi.cpt_file = tobj->o_pos;
+		efi.cpt_fd = epi->ffd.fd;
+		efi.cpt_events = epi->event.events;
+		efi.cpt_data = epi->event.data;
+		efi.cpt_revents = epi->revents;
+		efi.cpt_ready = 0;
+		if (!list_empty(&epi->rdllink))
+			efi.cpt_ready = 1;
+
+		ctx->write(&efi, sizeof(efi), ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	mutex_unlock(&epmutex);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_exports.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_exports.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_exports.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_exports.c	2017-01-13 08:40:25.000000000 -0500
@@ -0,0 +1,12 @@
+#include <linux/module.h>
+
+#include "cpt_obj.h"
+
+EXPORT_SYMBOL(alloc_cpt_object);
+EXPORT_SYMBOL(intern_cpt_object);
+EXPORT_SYMBOL(insert_cpt_object);
+EXPORT_SYMBOL(__cpt_object_add);
+EXPORT_SYMBOL(cpt_object_add);
+EXPORT_SYMBOL(cpt_object_get);
+EXPORT_SYMBOL(lookup_cpt_object);
+EXPORT_SYMBOL(lookup_cpt_obj_bypos);
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_files.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_files.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_files.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_files.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,2119 @@
+/*
+ *
+ *  kernel/cpt/cpt_files.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <linux/pagemap.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/ve_nfs.h>
+#include <linux/ve_proto.h>
+#include <ub/ub_mem.h>
+#include <linux/cpt_image.h>
+#include <linux/miscdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/signalfd.h>
+
+#include <linux/nfs_mount.h>
+#include <linux/nfs_fs.h>
+#undef dprintk
+
+#include "../../fs/autofs4/autofs_i.h"
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+static inline int is_signalfd_inode(struct inode *ino)
+{
+	/* no other users of it yet */
+	return ino->i_sb->s_magic == FSMAGIC_ANON;
+}
+
+void cpt_printk_dentry(struct dentry *d, struct vfsmount *mnt)
+{
+	char *path;
+	unsigned long pg = __get_free_page(GFP_KERNEL);
+
+	if (!pg)
+		return;
+
+	path = d_path(d, mnt, (char *)pg, PAGE_SIZE);
+
+	if (!IS_ERR(path))
+		eprintk("<%s>", path);
+	free_page(pg);
+}
+
+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
+			 int verify, cpt_context_t *ctx)
+{
+	if (d->d_inode->i_sb->s_magic == FSMAGIC_PROC &&
+	    proc_dentry_of_dead_task(d))
+		return 0;
+
+	if (cpt_need_delayfs(mnt))
+		return 0;
+
+	if (path[0] == '/' && !(!IS_ROOT(d) && d_unhashed(d))) {
+		struct nameidata nd;
+		if (path_lookup(path, 0, &nd)) {
+			eprintk_ctx("d_path cannot be looked up %s\n", path);
+			return -EINVAL;
+		}
+		if (nd.dentry != d || (verify && nd.mnt != mnt)) {
+			eprintk_ctx("d_path is invisible %s\n", path);
+			path_release(&nd);
+			return -EINVAL;
+		}
+		path_release(&nd);
+	}
+	return 0;
+}
+
+int cpt_need_delayfs(struct vfsmount *mnt)
+{
+	if (top_beancounter(slab_ub(mnt)) != top_beancounter(get_exec_ub()))
+		return 0;
+	if (mnt->mnt_sb->s_magic == FSMAGIC_NFS)
+		return 1;
+	if (is_autofs_mount(mnt))
+		return 1;
+	if (is_autofs_mount(mnt->mnt_parent))
+		return 1;
+	return 0;
+}
+
+int cpt_need_vfsmount(struct dentry *dentry, struct vfsmount *vfsmnt)
+{
+	if (vfsmnt == get_exec_env()->shmem_mnt)
+		return 0;
+
+	switch (dentry->d_inode->i_sb->s_magic) {
+		case FSMAGIC_PIPEFS:
+		case FSMAGIC_SOCKFS:
+		case FSMAGIC_BDEV:
+		case FSMAGIC_EPOLL:
+		case FSMAGIC_FUTEX:
+		case FSMAGIC_INOTIFY:
+		case FSMAGIC_MQUEUE:
+		case FSMAGIC_ANON:
+			return 0;
+		default:
+			eprintk("no vfsmount: ");
+			cpt_printk_dentry(dentry, vfsmnt);
+			eprintk(" magic:%lx\n", dentry->d_inode->i_sb->s_magic);
+			return 1;
+	}
+}
+
+static int
+cpt_replaced(struct dentry * de, struct vfsmount *mnt, cpt_context_t * ctx)
+{
+	int result = 0;
+
+#if defined(CONFIG_VZFS_FS) || defined(CONFIG_VZFS_FS_MODULE)
+	char *path;
+	unsigned long pg;
+	struct dentry * renamed_dentry;
+
+	if (de->d_sb->s_magic != FSMAGIC_VEFS)
+		return 0;
+	if (de->d_inode->i_nlink != 0 ||
+	    atomic_read(&de->d_inode->i_writecount) > 0) 
+		return 0;
+
+	renamed_dentry = vefs_replaced_dentry(de);
+	if (renamed_dentry == NULL)
+		return 0;
+
+	pg = __get_free_page(GFP_KERNEL);
+	if (!pg)
+		return 0;
+
+	path = d_path(de, mnt, (char *)pg, PAGE_SIZE);
+	if (!IS_ERR(path)) {
+		int len;
+		struct nameidata nd;
+
+		len = pg + PAGE_SIZE - 1 - (unsigned long)path;
+		if (len >= sizeof("(deleted) ") - 1 &&
+		    !memcmp(path, "(deleted) ", sizeof("(deleted) ") - 1)) {
+			len -= sizeof("(deleted) ") - 1;
+			path += sizeof("(deleted) ") - 1;
+		}
+
+		if (path_lookup(path, 0, &nd) == 0) {
+			if (mnt == nd.mnt &&
+			    vefs_is_renamed_dentry(nd.dentry, renamed_dentry))
+				result = 1;
+			path_release(&nd);
+		}
+	}
+	free_page(pg);
+#endif
+	return result;
+}
+
+static int cpt_dump_path(struct dentry *d, struct vfsmount *mnt,
+			   int replaced, cpt_context_t *ctx)
+{
+	int len;
+	char *path;
+	char *pg = cpt_get_buf(ctx);
+	loff_t saved;
+	struct nfs_unlinkdata *ud = NULL;
+
+	if (d->d_flags & DCACHE_NFSFS_RENAMED) {
+		ud = d->d_fsdata;
+		d = d->d_parent;
+	}
+
+	path = d_path(d, mnt, pg, PAGE_SIZE);
+	len = PTR_ERR(path);
+
+	if (IS_ERR(path)) {
+		struct cpt_object_hdr o;
+		char tmp[1];
+
+		/* VZ changes d_path() to return EINVAL, when path
+		 * is not supposed to be visible inside VE.
+		 * This changes behaviour of d_path() comparing
+		 * to mainstream kernel, f.e. d_path() fails
+		 * on any kind of shared memory. Maybe, there are
+		 * another cases, but I am aware only about this one.
+		 * So, we just ignore error on shmem mounts and proceed.
+		 * Otherwise, checkpointing is prohibited because
+		 * of reference to an invisible file.
+		 */
+		if (len != -EINVAL ||
+		    mnt != get_exec_env()->shmem_mnt)
+			eprintk_ctx("d_path err=%d\n", len);
+		else
+			len = 0;
+
+		cpt_push_object(&saved, ctx);
+		cpt_open_object(NULL, ctx);
+		o.cpt_next = CPT_NULL;
+		o.cpt_object = CPT_OBJ_NAME;
+		o.cpt_hdrlen = sizeof(o);
+		o.cpt_content = CPT_CONTENT_NAME;
+		tmp[0] = 0;
+
+		ctx->write(&o, sizeof(o), ctx);
+		ctx->write(tmp, 1, ctx);
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved, ctx);
+
+		__cpt_release_buf(ctx);
+		return len;
+	} else {
+		struct cpt_object_hdr o;
+
+		if (ud) {
+			char *old_path = path;
+			int appendix_len = ud->args.name.len + 1;
+
+			if (path - pg < appendix_len) {
+				eprintk_ctx("d_path err=%d\n", len);
+				__cpt_release_buf(ctx);
+				return -ENOMEM;
+			}
+			path = old_path - appendix_len;
+			strcpy(path, old_path);
+			strcat(path, "/");
+			strncat(path, ud->args.name.name, ud->args.name.len);
+		}
+		len = pg + PAGE_SIZE - 1 - path;
+		if (replaced &&
+		    len >= sizeof("(deleted) ") - 1 &&
+		    !memcmp(path, "(deleted) ", sizeof("(deleted) ") - 1)) {
+			len -= sizeof("(deleted) ") - 1;
+			path += sizeof("(deleted) ") - 1;
+		}
+		o.cpt_next = CPT_NULL;
+		o.cpt_object = CPT_OBJ_NAME;
+		o.cpt_hdrlen = sizeof(o);
+		o.cpt_content = CPT_CONTENT_NAME;
+		path[len] = 0;
+
+		cpt_push_object(&saved, ctx);
+		cpt_open_object(NULL, ctx);
+		ctx->write(&o, sizeof(o), ctx);
+		ctx->write(path, len+1, ctx);
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved, ctx);
+		__cpt_release_buf(ctx);
+	}
+	return 0;
+}
+
+int cpt_dump_string(const char *s, struct cpt_context *ctx)
+{
+	int len;
+	struct cpt_object_hdr o;
+
+	cpt_open_object(NULL, ctx);
+	len = strlen(s);
+	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(s, len+1, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	return 0;
+}
+
+cpt_object_t *cpt_lookup_vfsmount_obj(struct vfsmount *mnt,
+		struct cpt_context *ctx)
+{
+	while (is_nfs_automount(mnt))
+		mnt = mnt->mnt_parent;
+
+	if (is_autofs_mount(mnt->mnt_parent))
+		mnt = mnt->mnt_parent;
+
+	return lookup_cpt_object(CPT_OBJ_VFSMOUNT_REF, mnt, ctx);
+}
+
+int cpt_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	if (cpt_need_delayfs(mnt)) {
+		generic_fillattr(dentry->d_inode, stat);
+		return 0;
+	}
+
+	return vfs_getattr(mnt, dentry, stat);
+}
+
+int cpt_dump_inode(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_inode_image *v = cpt_get_buf(ctx);
+	struct kstat sbuf;
+	cpt_object_t *mntobj;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_INODE;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	if ((err = cpt_getattr(mnt, d, &sbuf)) != 0) {
+		cpt_release_buf(ctx);
+		return err;
+	}
+
+	mntobj = cpt_lookup_vfsmount_obj(mnt, ctx);
+	if (!mntobj && cpt_need_vfsmount(d, mnt)) {
+		cpt_release_buf(ctx);
+		return -ENODEV;
+	}
+
+	v->cpt_dev	= d->d_inode->i_sb->s_dev;
+	v->cpt_ino	= d->d_inode->i_ino;
+	v->cpt_mode	= sbuf.mode;
+	v->cpt_nlink	= sbuf.nlink;
+	v->cpt_uid	= sbuf.uid;
+	v->cpt_gid	= sbuf.gid;
+	v->cpt_rdev	= d->d_inode->i_rdev;
+	v->cpt_size	= sbuf.size;
+	v->cpt_atime	= cpt_timespec_export(&sbuf.atime);
+	v->cpt_mtime	= cpt_timespec_export(&sbuf.mtime);
+	v->cpt_ctime	= cpt_timespec_export(&sbuf.ctime);
+	v->cpt_blksize	= sbuf.blksize;
+	v->cpt_blocks	= sbuf.blocks;
+	v->cpt_sb	= d->d_inode->i_sb->s_magic;
+	v->cpt_vfsmount = mntobj ? mntobj->o_pos : CPT_NULL;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_collect_files(cpt_context_t * ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	int index = 0;
+
+	/* Collect process fd sets */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+		if (tsk->files && cpt_object_add(CPT_OBJ_FILES, tsk->files, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	/* Collect files from fd sets */
+	for_each_object(obj, CPT_OBJ_FILES) {
+		int fd;
+		struct files_struct *f = obj->o_obj;
+
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if (obj->o_count != atomic_read(&f->count)) {
+			eprintk_ctx("files_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&f->count));
+			return -EBUSY;
+		}
+
+		for (fd = 0; fd < f->fdt->max_fds; fd++) {
+			struct file *file = fcheck_files(f, fd);
+			if (file && cpt_object_add(CPT_OBJ_FILE, file, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+
+	/* Collect files queued by AF_UNIX sockets. */
+	if ((err = cpt_collect_passedfds(ctx)) < 0)
+		return err;
+
+	/* OK. At this point we should count all the references. */
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		struct file *parent;
+		cpt_object_t *ino_obj;
+
+		if (obj->o_count != atomic_read(&file->f_count)) {
+			eprintk_ctx("file struct is referenced outside %d %d\n", obj->o_count, atomic_read(&file->f_count));
+			cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
+			return -EBUSY;
+		}
+
+		switch (file->f_dentry->d_inode->i_sb->s_magic) {
+		case FSMAGIC_FUTEX:
+		case FSMAGIC_MQUEUE:
+		case FSMAGIC_BDEV:
+#ifndef CONFIG_INOTIFY_USER
+		case FSMAGIC_INOTIFY:
+#endif
+			eprintk_ctx("file on unsupported FS: magic %08lx\n", file->f_dentry->d_inode->i_sb->s_magic);
+			return -EBUSY;
+		}
+
+		/* Collect inode. It is necessary mostly to resolve deleted
+		 * hard links. */
+		ino_obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+		if (ino_obj == NULL)
+			return -ENOMEM;
+
+		parent = ino_obj->o_parent;
+		if (!parent || (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry)))
+			ino_obj->o_parent = file;
+
+		if (S_ISCHR(file->f_dentry->d_inode->i_mode)) {
+			int maj = imajor(file->f_dentry->d_inode);
+			if (maj == PTY_MASTER_MAJOR ||
+			    (maj >= UNIX98_PTY_MASTER_MAJOR &&
+			     maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
+			    maj == PTY_SLAVE_MAJOR ||
+			    maj == UNIX98_PTY_SLAVE_MAJOR ||
+			    maj == TTYAUX_MAJOR) {
+				err = cpt_collect_tty(file, ctx);
+				if (err)
+					return err;
+			}
+		}
+
+		if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
+			err = cpt_collect_socket(file, ctx);
+			if (err)
+				return err;
+		}
+	}
+
+	err = cpt_index_sockets(ctx);
+
+	return err;
+}
+
+/* /dev/ptmx is special, all the files share one inode, but real tty backend
+ * is attached via file->private_data.
+ */
+
+static inline int is_cloning_inode(struct inode *ino)
+{
+	return S_ISCHR(ino->i_mode) && 
+		ino->i_rdev == MKDEV(TTYAUX_MAJOR,2);
+}
+
+static int dump_one_flock(struct file_lock *fl, int owner,
+		struct cpt_context *ctx, int delay)
+{
+	pid_t pid;
+	struct cpt_flock_image *v;
+
+	if (delay && !fl->fl_ops)
+		delay = 0; /* no remote locks */
+
+	v = cpt_get_buf(ctx);
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_FLOCK;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_owner = owner;
+
+	pid = fl->fl_pid;
+	if (pid && !is_virtual_pid(fl->fl_pid)) {
+		pid = _pid_to_vpid(fl->fl_pid);
+		if (pid == -1) {
+			if (!(fl->fl_flags&FL_FLOCK)) {
+				eprintk_ctx("posix lock from another container?\n");
+				cpt_release_buf(ctx);
+				return -EBUSY;
+			}
+			pid = 0;
+		}
+	}
+
+	v->cpt_pid = pid;
+	v->cpt_start = fl->fl_start;
+	v->cpt_end = fl->fl_end;
+	v->cpt_flags = fl->fl_flags;
+	if (delay)
+		v->cpt_flags |= CPT_FLOCK_DELAYED;
+	v->cpt_type = fl->fl_type;
+	v->cpt_svid = delay ? (__u32)fl->fl_ops->fl_owner_id(fl) : CPT_NOINDEX;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	return 0;
+}
+
+
+int cpt_dump_flock(struct file *file, struct cpt_context *ctx)
+{
+	int err = 0, delay;
+	struct file_lock *fl;
+
+	lock_kernel();
+	for (fl = file->f_dentry->d_inode->i_flock;
+	     fl; fl = fl->fl_next) {
+		if (file != fl->fl_file)
+			continue;
+		if (fl->fl_flags & FL_LEASE) {
+			eprintk_ctx("lease lock is not supported\n");
+			err = -EINVAL;
+			break;
+		}
+
+		delay = cpt_need_delayfs(file->f_vfsmnt);
+
+		if (fl->fl_flags & FL_POSIX) {
+			cpt_object_t *obj;
+			obj = lookup_cpt_object(CPT_OBJ_FILES, fl->fl_owner, ctx);
+			if (obj) {
+				dump_one_flock(fl, obj->o_index, ctx, delay);
+				continue;
+			} else {
+				eprintk_ctx("unknown lock owner %p\n", fl->fl_owner);
+				err = -EINVAL;
+			}
+		}
+		if (fl->fl_flags & FL_FLOCK) {
+			dump_one_flock(fl, -1, ctx, delay);
+			continue;
+		}
+	}
+	unlock_kernel();
+	return err;
+}
+
+static int __comb_pid_to_vpid(int pid)
+{
+	int vpid = pid;
+
+	if (pid > 0) {
+		vpid = _pid_to_vpid(pid);
+		if (unlikely(vpid < 0)) {
+			dprintk("pid %d does not exist amymore.\n", pid);
+			return 0;
+		}
+	} else if (pid < 0) {
+		vpid = _pid_to_vpid(-pid);
+		if (unlikely(vpid < 0)) {
+			dprintk("pgid %d does not exist amymore.\n", -pid);
+			return 0;
+		}
+		vpid = -vpid;
+	}
+	return vpid;
+}
+
+static int dump_one_file(cpt_object_t *obj, struct file *file, cpt_context_t *ctx)
+{
+	int err = 0;
+	cpt_object_t *iobj;
+	struct cpt_file_image *v = cpt_get_buf(ctx);
+	struct kstat sbuf;
+	int replaced = 0;
+	cpt_object_t *mntobj;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILE;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_flags = file->f_flags;
+	v->cpt_mode = file->f_mode;
+	v->cpt_pos = file->f_pos;
+	v->cpt_uid = file->f_uid;
+	v->cpt_gid = file->f_gid;
+
+	cpt_getattr(file->f_vfsmnt, file->f_dentry, &sbuf);
+
+	mntobj = cpt_lookup_vfsmount_obj(file->f_vfsmnt, ctx);
+	if (!mntobj && cpt_need_vfsmount(file->f_dentry, file->f_vfsmnt))
+		return -ENODEV;
+
+	v->cpt_i_mode = sbuf.mode;
+	v->cpt_lflags = 0;
+
+	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_PROC) {
+		v->cpt_lflags |= CPT_DENTRY_PROC;
+		if (proc_dentry_of_dead_task(file->f_dentry))
+			v->cpt_lflags |= CPT_DENTRY_PROCPID_DEAD;
+	}
+
+	if (cpt_need_delayfs(file->f_vfsmnt)) {
+		struct dentry *de = file->f_dentry;
+
+		if (obj)
+			obj->o_flags |= CPT_FILE_DELAYFS;
+
+		if (de->d_flags & DCACHE_NFSFS_RENAMED) {
+			spin_lock(&de->d_lock);
+			de->d_flags &= ~DCACHE_NFSFS_RENAMED;
+			spin_unlock(&de->d_lock);
+			v->cpt_lflags |= CPT_DENTRY_SILLYRENAME;
+			if (obj)
+				obj->o_flags |= CPT_FILE_SILLYRENAME;
+		}
+	}
+
+	if (IS_ROOT(file->f_dentry))
+		v->cpt_lflags |= CPT_DENTRY_ROOT;
+	else if (d_unhashed(file->f_dentry)) {
+		if (cpt_replaced(file->f_dentry, file->f_vfsmnt, ctx)) {
+			v->cpt_lflags |= CPT_DENTRY_REPLACED;
+			replaced = 1;
+		} else if (!(v->cpt_lflags & CPT_DENTRY_PROCPID_DEAD))
+			v->cpt_lflags |= CPT_DENTRY_DELETED;
+	}
+	if (is_cloning_inode(file->f_dentry->d_inode))
+		v->cpt_lflags |= CPT_DENTRY_CLONING;
+
+	v->cpt_inode = CPT_NULL;
+	if (!(v->cpt_lflags & CPT_DENTRY_REPLACED)) {
+		iobj = lookup_cpt_object(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+		if (iobj) {
+			v->cpt_inode = iobj->o_pos;
+			if (iobj->o_flags & CPT_INODE_HARDLINKED)
+				v->cpt_lflags |= CPT_DENTRY_HARDLINKED;
+		}
+	}
+	v->cpt_priv = CPT_NULL;
+	v->cpt_fown_fd = -1;
+	if (S_ISCHR(v->cpt_i_mode)) {
+		iobj = lookup_cpt_object(CPT_OBJ_TTY, file->private_data, ctx);
+		if (iobj) {
+			v->cpt_priv = iobj->o_pos;
+			if (file->f_flags&FASYNC)
+				v->cpt_fown_fd = cpt_tty_fasync(file, ctx);
+		}
+		if (imajor(file->f_dentry->d_inode) == MISC_MAJOR &&
+				iminor(file->f_dentry->d_inode) == TUN_MINOR)
+			v->cpt_lflags |= CPT_DENTRY_TUNTAP;
+	}
+	if (S_ISSOCK(v->cpt_i_mode)) {
+		if (obj->o_index < 0) {
+			eprintk_ctx("BUG: no socket index\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_priv = obj->o_index;
+		if (file->f_flags&FASYNC)
+			v->cpt_fown_fd = cpt_socket_fasync(file, ctx);
+	}
+	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
+		v->cpt_priv = file->f_dentry->d_inode->i_ino;
+		v->cpt_lflags |= CPT_DENTRY_EPOLL;
+	}
+	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_INOTIFY) {
+		v->cpt_priv = file->f_dentry->d_inode->i_ino;
+		v->cpt_lflags |= CPT_DENTRY_INOTIFY;
+	}
+	if (is_signalfd_inode(file->f_dentry->d_inode)) {
+		struct signalfd_ctx *ctx = file->private_data;
+		v->cpt_lflags |= CPT_DENTRY_SIGNALFD;
+		v->cpt_priv = cpt_sigset_export(&ctx->sigmask);
+	}
+
+	v->cpt_fown_pid = __comb_pid_to_vpid((int)file->f_owner.pid);
+	v->cpt_fown_uid = file->f_owner.uid;
+	v->cpt_fown_euid = file->f_owner.euid;
+	v->cpt_fown_signo = file->f_owner.signum;
+
+	v->cpt_vfsmount = mntobj ? mntobj->o_pos : CPT_NULL;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (!S_ISSOCK(v->cpt_i_mode)) {
+		err = cpt_dump_path(file->f_dentry, file->f_vfsmnt,
+				replaced, ctx);
+		if (err)
+			return err;
+		if ((file->f_mode & FMODE_WRITE) &&
+				file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_VEFS)
+			vefs_track_notify(file->f_dentry, 1);
+	}
+
+	if (file->f_dentry->d_inode->i_flock)
+		err = cpt_dump_flock(file, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_page_is_zero(struct page * page)
+{
+	int i;
+	int res;
+	char * kaddr;
+
+	res = 1;
+
+	kaddr = kmap_atomic(page, KM_USER0);
+
+	for (i=0; i<PAGE_SIZE/sizeof(unsigned long); i++) {
+		if (((unsigned long*)(kaddr))[i] != 0) {
+			res = 0;
+			break;
+		}
+	}
+
+	kunmap_atomic(kaddr, KM_USER0);
+	return res;
+}
+
+enum {
+	TYPE_NONE,
+	TYPE_ZERO,
+	TYPE_DATA,
+	TYPE_ITER
+};
+
+struct dump_data
+{
+	cpt_context_t * ctx;
+	loff_t obj_opened;
+	struct cpt_page_block pgb;
+	int type;
+};
+
+static void flush_block(struct dump_data *dat)
+{
+	cpt_context_t * ctx = dat->ctx;
+
+	if (dat->type == TYPE_NONE)
+		return;
+	if (dat->type == TYPE_ZERO)
+		return;
+
+	ctx->pwrite(&dat->pgb.cpt_end, 8, ctx,
+		    dat->obj_opened + offsetof(struct cpt_page_block, cpt_end));
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	dat->obj_opened = CPT_NULL;
+	dat->type = TYPE_NONE;
+}
+
+
+static int
+dump_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
+{
+	struct dump_data * dat = desc->arg.data;
+	cpt_context_t * ctx = dat->ctx;
+	unsigned long count = desc->count;
+
+	if (size > count)
+		size = count;
+
+	if (page == ZERO_PAGE(0) ||
+	    cpt_page_is_zero(page)) {
+		if (dat->type == TYPE_ZERO) {
+			/* Just append. */
+			dat->pgb.cpt_end += PAGE_SIZE;
+		}
+		/* Flush opened segment */
+		if (dat->type != TYPE_NONE)
+			flush_block(dat);
+
+		dat->pgb.cpt_start = page->index << PAGE_CACHE_SHIFT;
+		dat->type = TYPE_ZERO;
+	} else {
+		int ntype = TYPE_DATA;
+
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+		if (test_bit(PG_checkpointed, &page->flags) &&
+		    ctx->iter_shm_start &&
+		    !cpt_verify_wrprot(page, ctx))
+			ntype = TYPE_ITER;
+#endif
+
+		if (ntype != dat->type ||
+		    (ntype == TYPE_ITER &&
+		     dat->pgb.cpt_end - dat->pgb.cpt_start >= 16*PAGE_SIZE))
+			flush_block(dat);
+
+		if (ntype != dat->type) {
+			cpt_open_object(NULL, ctx);
+			dat->obj_opened = ctx->file->f_pos;
+			dat->pgb.cpt_next = CPT_NULL;
+			dat->pgb.cpt_object = ntype == TYPE_DATA ? CPT_OBJ_PAGES :
+				CPT_OBJ_ITERPAGES;
+			dat->pgb.cpt_hdrlen = sizeof(dat->pgb);
+			dat->pgb.cpt_content = CPT_CONTENT_DATA;
+			dat->pgb.cpt_start = page->index << PAGE_CACHE_SHIFT;
+			dat->pgb.cpt_end = dat->pgb.cpt_start;
+
+			ctx->write(&dat->pgb, sizeof(dat->pgb), ctx);
+			dat->type = ntype;
+		}
+
+		if (ntype == TYPE_DATA) {
+			char * kaddr = kmap(page);
+			ctx->write(kaddr, size, ctx);
+			kunmap(page);
+			if (size < PAGE_SIZE) {
+				kaddr = kmap(ZERO_PAGE(0));
+				ctx->write(kaddr, PAGE_SIZE - size, ctx);
+				kunmap(ZERO_PAGE(0));
+				size = PAGE_SIZE;
+			}
+		} else {
+			__u64 pfn = page_to_pfn(page);
+			ctx->write(&pfn, 8, ctx);
+			size = PAGE_SIZE;
+		}
+	}
+	dat->pgb.cpt_end += size;
+
+	desc->count = count - size;
+	desc->written += size;
+	return size;
+}
+
+static int dump_content_regular(struct file *file, struct cpt_context *ctx)
+{
+	loff_t saved_pos;
+	loff_t pos = 0;
+	ssize_t (*doit)(struct file *in_file, loff_t *ppos,
+		     size_t count, read_actor_t actor, void *target);
+	struct dump_data dat;
+	ssize_t written;
+
+	if (file->f_op == NULL)
+		return -EINVAL;
+
+	if ((doit = file->f_op->sendfile) == NULL) {
+		doit = file->f_dentry->d_inode->i_fop->sendfile;
+		if (doit == NULL)
+			return -EINVAL;
+
+		/* If "officially" there is no sendfile, it is something
+		 * unsupported unless it is SYSV SHM.
+		 */
+		if (file->f_op->read != NULL)
+			return -EINVAL;
+
+		if (file->f_dentry->d_inode->i_sb->s_magic != FSMAGIC_TMPFS) {
+			eprintk_ctx("unreadable, but not SYSV SHM file\n");
+			return -EINVAL;
+		}
+		cpt_dump_content_sysvshm(file, ctx);
+	}
+
+	dat.ctx = ctx;
+	dat.type = TYPE_NONE;
+
+	cpt_push_object(&saved_pos, ctx);
+
+	doit = file->f_op->sendfile;
+	if (!doit)
+		doit = file->f_dentry->d_inode->i_fop->sendfile;
+
+	do {
+		written = doit(file, &pos, 0x40000000UL, dump_actor, &dat);
+		if (written < 0)
+			return written;
+	} while (written);
+
+	if (dat.type != TYPE_NONE)
+		flush_block(&dat);
+
+	cpt_pop_object(&saved_pos, ctx);
+
+	return 0;
+}
+
+
+static int dump_content_chrdev(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	int maj;
+
+	maj = imajor(ino);
+	if (maj == MEM_MAJOR) {
+		/* Well, OK. */
+		return 0;
+	}
+	if (maj == PTY_MASTER_MAJOR ||
+	    (maj >= UNIX98_PTY_MASTER_MAJOR &&
+	     maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
+	    maj == PTY_SLAVE_MAJOR ||
+	    maj == UNIX98_PTY_SLAVE_MAJOR ||
+	    maj == TTYAUX_MAJOR) {
+		return cpt_dump_content_tty(file, ctx);
+	}
+	if (maj == MISC_MAJOR && iminor(ino) == TUN_MINOR)
+		return 0;
+
+	eprintk_ctx("unsupported chrdev %d/%d\n", maj, iminor(ino));
+	return -EINVAL;
+}
+
+static int dump_content_blkdev(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+
+	/* We are not going to transfer them. */
+	eprintk_ctx("unsupported blkdev %d/%d\n", imajor(ino), iminor(ino));
+	return -EINVAL;
+}
+
+static int dump_content_fifo(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	cpt_object_t *obj;
+	loff_t saved_pos;
+	int readers;
+	int writers;
+	int anon = 0;
+
+	mutex_lock(&ino->i_mutex);
+	readers = ino->i_pipe->readers;
+	writers = ino->i_pipe->writers;
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file1 = obj->o_obj;
+		if (file1->f_dentry->d_inode == ino) {
+			if (file1->f_mode & FMODE_READ)
+				readers--;
+			if (file1->f_mode & FMODE_WRITE)
+				writers--;
+		}
+	}
+	mutex_unlock(&ino->i_mutex);
+	if (readers || writers) {
+		struct dentry *dr = file->f_dentry->d_sb->s_root;
+		if (dr->d_name.len == 7 && memcmp(dr->d_name.name,"pipefs:",7) == 0)
+			anon = 1;
+
+		if (anon) {
+			eprintk_ctx("pipe has %d/%d external readers/writers\n", readers, writers);
+			return -EBUSY;
+		}
+		/* If fifo has external readers/writers, we are in troubles.
+		 * If the buffer is not empty, we must move its content.
+		 * But if the fifo is owned by a service, we cannot do
+		 * this. See?
+		 *
+		 * For now we assume, that if fifo is opened by another
+		 * process, we do not own it and, hence, migrate without
+		 * data.
+		 */
+		return 0;
+	}
+
+	/* OK, we must save fifo state. No semaphores required. */
+
+	if (ino->i_pipe->nrbufs) {
+		struct cpt_obj_bits *v = cpt_get_buf(ctx);
+		struct pipe_inode_info *info;
+		int count, buf, nrbufs;
+
+		mutex_lock(&ino->i_mutex);
+		info =  ino->i_pipe;
+		count = 0;
+		buf = info->curbuf;
+		nrbufs = info->nrbufs;
+		while (--nrbufs >= 0) {
+			if (!info->bufs[buf].ops->can_merge) {
+				mutex_unlock(&ino->i_mutex);
+				eprintk_ctx("unknown format of pipe buffer\n");
+				return -EINVAL;
+			}
+			count += info->bufs[buf].len;
+			buf = (buf+1) & (PIPE_BUFFERS-1);
+		}
+
+		if (!count) {
+			mutex_unlock(&ino->i_mutex);
+			return 0;
+		}
+
+		cpt_push_object(&saved_pos, ctx);
+		cpt_open_object(NULL, ctx);
+		v->cpt_next = CPT_NULL;
+		v->cpt_object = CPT_OBJ_BITS;
+		v->cpt_hdrlen = sizeof(*v);
+		v->cpt_content = CPT_CONTENT_DATA;
+		v->cpt_size = count;
+		ctx->write(v, sizeof(*v), ctx);
+		cpt_release_buf(ctx);
+
+		count = 0;
+		buf = info->curbuf;
+		nrbufs = info->nrbufs;
+		while (--nrbufs >= 0) {
+			struct pipe_buffer *b = info->bufs + buf;
+			/* need to ->pin first? */
+			void * addr = b->ops->map(info, b, 0);
+			ctx->write(addr + b->offset, b->len, ctx);
+			b->ops->unmap(info, b, addr);
+			buf = (buf+1) & (PIPE_BUFFERS-1);
+		}
+
+		mutex_unlock(&ino->i_mutex);
+
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_pos, ctx);
+	}
+
+	return 0;
+}
+
+static int dump_content_socket(struct file *file, struct cpt_context *ctx)
+{
+	return 0;
+}
+
+struct cpt_dirent {
+	unsigned long	ino;
+	char		*name;
+	int		namelen;
+	int		found;
+};
+
+static int cpt_filldir(void * __buf, const char * name, int namelen,
+		loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct cpt_dirent * dirent = __buf;
+
+	if ((ino == dirent->ino) && (namelen < PAGE_SIZE - 1)) {
+		memcpy(dirent->name, name, namelen);
+		dirent->name[namelen] = '\0';
+		dirent->namelen = namelen;
+		dirent->found = 1;
+		return 1;
+	}
+	return 0;
+}
+
+static int find_linked_dentry(struct dentry *d, struct vfsmount *mnt,
+		struct inode *ino, struct cpt_context *ctx)
+{
+	int err = -EBUSY;
+	struct file *f = NULL;
+	struct cpt_dirent entry;
+	struct dentry *de, *found = NULL;
+
+	dprintk_ctx("deleted reference to existing inode, try to find file\n");
+	/* 1. Try to find not deleted dentry in ino->i_dentry list */
+	spin_lock(&dcache_lock);
+	list_for_each_entry(de, &ino->i_dentry, d_alias) {
+		if (!IS_ROOT(de) && d_unhashed(de) &&
+		    !(de->d_flags & DCACHE_NFSFS_RENAMED))
+			continue;
+		found = de;
+		dget_locked(found);
+		break;
+	}
+	spin_unlock(&dcache_lock);
+	if (found) {
+		err = cpt_dump_path(found, mnt, 0, ctx);
+		dput(found);
+		if (!err) {
+			dprintk_ctx("dentry found in aliases\n");
+			return 0;
+		}
+	}
+
+	/* 2. Try to find file in current dir */
+	de = dget_parent(d);
+	if (!de)
+		return -EINVAL;
+
+	mntget(mnt);
+	f = dentry_open(de, mnt, O_RDONLY | O_LARGEFILE);
+	if (IS_ERR(f))
+		return PTR_ERR(f);
+
+	entry.ino = ino->i_ino;
+	entry.name = cpt_get_buf(ctx);
+	entry.found = 0;
+	err = vfs_readdir(f, cpt_filldir, &entry);
+	if (err || !entry.found) {
+		err = err ? err : -ENOENT;
+		goto err_readdir;
+	}
+
+	found = lookup_one_len(entry.name, de, entry.namelen);
+	if (IS_ERR(found)) {
+		err = PTR_ERR(found);
+		goto err_readdir;
+	}
+
+	err = -ENOENT;
+	if (found->d_inode != ino)
+		goto err_lookup;
+
+	dprintk_ctx("dentry found in dir\n");
+	__cpt_release_buf(ctx);
+	err = cpt_dump_path(found, mnt, 0, ctx);
+
+err_lookup:
+	dput(found);
+err_readdir:
+	fput(f);
+	__cpt_release_buf(ctx);
+	return err;
+}
+
+static struct dentry *find_linkdir(struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int i;
+
+	for (i = 0; i < ctx->linkdirs_num; i++)
+		if (ctx->linkdirs[i]->f_vfsmnt == mnt)
+			return ctx->linkdirs[i]->f_dentry;
+	return NULL;
+}
+
+struct dentry *cpt_fake_link(struct dentry *d, struct vfsmount *mnt,
+		struct inode *ino, struct cpt_context *ctx)
+{
+	int err;
+	int order = 8;
+	const char *prefix = ".cpt_hardlink.";
+	int preflen = strlen(prefix) + order;
+	char name[preflen + 1];
+	struct dentry *dirde, *hardde;
+
+	dirde = find_linkdir(mnt, ctx);
+	if (!dirde) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	ctx->linkcnt++;
+	snprintf(name, sizeof(name), "%s%0*u", prefix, order, ctx->linkcnt);
+
+	mutex_lock(&dirde->d_inode->i_mutex);
+	hardde = lookup_one_len(name, dirde, strlen(name));
+	if (IS_ERR(hardde)) {
+		err = PTR_ERR(hardde);
+		goto out_unlock;
+	}
+
+	if (hardde->d_inode) {
+		/* Userspace should clean hardlinked files from previous
+		 * dump/undump
+		 */
+		eprintk_ctx("Hardlinked file already exists: %s\n", name);
+		err = -EEXIST;
+		goto out_put;
+	}
+
+	if (d == NULL)
+		err = vfs_create(dirde->d_inode, hardde, 0600, NULL);
+	else
+		err = vfs_link(d, dirde->d_inode, hardde);
+	if (err) {
+		eprintk_ctx("error hardlink %s, %d\n", name, err);
+		goto out_put;
+	}
+
+out_unlock:
+	mutex_unlock(&dirde->d_inode->i_mutex);
+out:
+	return err ? ERR_PTR(err) : hardde;
+
+out_put:
+	dput(hardde);
+	goto out_unlock;
+}
+
+static int create_dump_hardlink(struct dentry *d, struct vfsmount *mnt,
+				struct inode *ino, struct cpt_context *ctx)
+{
+	int err;
+	struct dentry *hardde;
+
+	hardde = cpt_fake_link(d, mnt, ino, ctx);
+	if (IS_ERR(hardde))
+		return PTR_ERR(hardde);
+
+	err = cpt_dump_path(hardde, mnt, 0, ctx);
+	dput(hardde);
+
+	return err;
+}
+
+static int dump_one_inode(struct file *file, struct dentry *d,
+			  struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct inode *ino = d->d_inode;
+	cpt_object_t *iobj;
+	int dump_it = 0;
+
+	iobj = lookup_cpt_object(CPT_OBJ_INODE, ino, ctx);
+	if (!iobj)
+		return -EINVAL;
+
+	if (iobj->o_pos >= 0)
+		return 0;
+
+	if (ino->i_sb->s_magic == FSMAGIC_PROC &&
+	    proc_dentry_of_dead_task(d))
+		return 0;
+
+	if ((!IS_ROOT(d) && d_unhashed(d)) &&
+	    !cpt_replaced(d, mnt, ctx))
+		dump_it = 1;
+	if (!S_ISREG(ino->i_mode) && !S_ISDIR(ino->i_mode)) {
+		/* One more bug in epoll: invalid inode mode.
+		 * What a load of crap...
+		 */
+		if (ino->i_sb->s_magic == FSMAGIC_EPOLL &&
+		    (ino->i_mode & S_IFMT) == 0)
+			return 0;
+		if (is_signalfd_inode(ino))
+			return 0;
+		dump_it = 1;
+	}
+
+	if (!dump_it)
+		return 0;
+
+	cpt_open_object(iobj, ctx);
+	cpt_dump_inode(d, mnt, ctx);
+
+	if (!IS_ROOT(d) && d_unhashed(d)) {
+		struct file *parent;
+		parent = iobj->o_parent;
+		if (!parent ||
+		    (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry))) {
+			/* Inode is not deleted, but it does not
+			 * have references from inside checkpointed
+			 * process group. */
+			if (ino->i_nlink != 0) {
+				err = find_linked_dentry(d, mnt, ino, ctx);
+				if (err && S_ISREG(ino->i_mode)) {
+					err = create_dump_hardlink(d, mnt, ino, ctx);
+					iobj->o_flags |= CPT_INODE_HARDLINKED;
+				} else if (S_ISCHR(ino->i_mode) ||
+					   S_ISBLK(ino->i_mode) ||
+					   S_ISFIFO(ino->i_mode))
+					err = 0;
+
+				if (err) {
+					eprintk_ctx("deleted reference to existing inode, checkpointing is impossible: %d\n", err);
+					return -EBUSY;
+				}
+				if (S_ISREG(ino->i_mode) || S_ISDIR(ino->i_mode))
+					dump_it = 0;
+			}
+		} else {
+			/* Refer to _another_ file name. */
+			err = cpt_dump_path(parent->f_dentry,
+					parent->f_vfsmnt, 0, ctx);
+			if (err)
+				return err;
+			if (S_ISREG(ino->i_mode) || S_ISDIR(ino->i_mode))
+				dump_it = 0;
+		}
+	}
+	if (dump_it) {
+		if (S_ISREG(ino->i_mode)) {
+			if ((err = dump_content_regular(file, ctx)) != 0) {
+				eprintk_ctx("dump_content_regular ");
+				cpt_printk_dentry(d, mnt);
+			}
+		} else if (S_ISDIR(ino->i_mode)) {
+			/* We cannot do anything. The directory should be
+			 * empty, so it is not a big deal.
+			 */
+		} else if (S_ISCHR(ino->i_mode)) {
+			err = dump_content_chrdev(file, ctx);
+		} else if (S_ISBLK(ino->i_mode)) {
+			err = dump_content_blkdev(file, ctx);
+		} else if (S_ISFIFO(ino->i_mode)) {
+			err = dump_content_fifo(file, ctx);
+		} else if (S_ISSOCK(ino->i_mode)) {
+			err = dump_content_socket(file, ctx);
+		} else {
+			eprintk_ctx("unknown inode mode %o\n", ino->i_mode & S_IFMT);
+			err = -EINVAL;
+		}
+	}
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+static void cpt_stop_vzfs_trackers(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_VFSMOUNT_REF) {
+		struct vfsmount *mnt = obj->o_obj;
+		if (mnt->mnt_sb->s_magic == FSMAGIC_VEFS)
+			vefs_track_force_stop(mnt->mnt_sb);
+	}
+}
+
+int cpt_dump_files(struct cpt_context *ctx)
+{
+	int epoll_nr, inotify_nr;
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_TTY);
+	for_each_object(obj, CPT_OBJ_TTY) {
+		int err;
+
+		if ((err = cpt_dump_tty(obj, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	cpt_open_section(ctx, CPT_SECT_INODE);
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		int err;
+
+		if ((err = dump_one_inode(file, file->f_dentry,
+					  file->f_vfsmnt, ctx)) != 0)
+			return err;
+	}
+	for_each_object(obj, CPT_OBJ_FS) {
+		struct fs_struct *fs = obj->o_obj;
+		int err;
+
+		if (fs->root &&
+		    (err = dump_one_inode(NULL, fs->root, fs->rootmnt, ctx)) != 0)
+			return err;
+		if (fs->pwd &&
+		    (err = dump_one_inode(NULL, fs->pwd, fs->pwdmnt, ctx)) != 0)
+			return err;
+		if (fs->altroot &&
+		    (err = dump_one_inode(NULL, fs->altroot, fs->altrootmnt, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	epoll_nr = 0;
+	inotify_nr = 0;
+	cpt_open_section(ctx, CPT_SECT_FILES);
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		int err;
+
+		if ((err = dump_one_file(obj, file, ctx)) != 0)
+			return err;
+		if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL)
+			epoll_nr++;
+		if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_INOTIFY)
+			inotify_nr++;
+	}
+	cpt_close_section(ctx);
+
+	if (epoll_nr) {
+		cpt_open_section(ctx, CPT_SECT_EPOLL);
+		for_each_object(obj, CPT_OBJ_FILE) {
+			struct file *file = obj->o_obj;
+			if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
+				int err;
+				if ((err = cpt_dump_epolldev(obj, ctx)) != 0)
+					return err;
+			}
+		}
+		cpt_close_section(ctx);
+	}
+
+	if (inotify_nr) {
+		cpt_open_section(ctx, CPT_SECT_INOTIFY);
+		for_each_object(obj, CPT_OBJ_FILE) {
+			struct file *file = obj->o_obj;
+			if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_INOTIFY) {
+				int err = -EINVAL;
+#ifdef CONFIG_INOTIFY_USER
+				if ((err = cpt_dump_inotify(obj, ctx)) != 0)
+#endif
+					return err;
+			}
+		}
+		cpt_close_section(ctx);
+	}
+
+	cpt_open_section(ctx, CPT_SECT_SOCKET);
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		int err;
+
+		if ((err = cpt_dump_socket(obj, obj->o_obj, obj->o_index, -1, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	cpt_stop_vzfs_trackers(ctx);
+
+	return 0;
+}
+
+static int dump_filedesc(int fd, struct file *file,
+			 struct files_struct *f, struct cpt_context *ctx)
+{
+	struct cpt_fd_image *v = cpt_get_buf(ctx);
+	cpt_object_t *obj;
+
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILEDESC;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_fd = fd;
+	obj = lookup_cpt_object(CPT_OBJ_FILE, file, ctx);
+	if (!obj) BUG();
+	v->cpt_file = obj->o_pos;
+	v->cpt_flags = 0;
+	if (FD_ISSET(fd, f->fdt->close_on_exec))
+		v->cpt_flags = CPT_FD_FLAG_CLOSEEXEC;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+static int dump_one_file_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct files_struct *f = obj->o_obj;
+	struct cpt_files_struct_image *v = cpt_get_buf(ctx);
+	int fd;
+	loff_t saved_obj;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILES;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_index = obj->o_index;
+	v->cpt_max_fds = f->fdt->max_fds;
+	v->cpt_next_fd = f->next_fd;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	for (fd = 0; fd < f->fdt->max_fds; fd++) {
+		struct file *file = fcheck_files(f, fd);
+		if (file)
+			dump_filedesc(fd, file, f, ctx);
+	}
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+int cpt_dump_files_struct(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_FILES_STRUCT);
+
+	for_each_object(obj, CPT_OBJ_FILES) {
+		int err;
+
+		if ((err = dump_one_file_struct(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_collect_fs(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+		if (tsk->fs) {
+			if (cpt_object_add(CPT_OBJ_FS, tsk->fs, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->pwd &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->pwd->d_inode, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->root &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->root->d_inode, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->altroot &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->altroot->d_inode, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+int cpt_dump_dir(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	struct file file;
+
+	memset(&file, 0, sizeof(file));
+
+	file.f_dentry = d;
+	file.f_vfsmnt = mnt;
+	file.f_mode = FMODE_READ|FMODE_PREAD|FMODE_LSEEK;
+	return dump_one_file(NULL, &file, ctx);
+}
+
+static int dump_one_fs(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct fs_struct *fs = obj->o_obj;
+	struct cpt_fs_struct_image *v = cpt_get_buf(ctx);
+	loff_t saved_obj;
+	int err;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FS;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_umask = fs->umask;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	err = cpt_dump_dir(fs->root, fs->rootmnt, ctx);
+	if (!err)
+		err = cpt_dump_dir(fs->pwd, fs->pwdmnt, ctx);
+	if (!err && fs->altroot)
+		err = cpt_dump_dir(fs->altroot, fs->altrootmnt, ctx);
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_dump_fs_struct(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_FS);
+
+	for_each_object(obj, CPT_OBJ_FS) {
+		int err;
+
+		if ((err = dump_one_fs(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int check_autofs(struct super_block *sb, struct cpt_context *ctx)
+{
+	struct autofs_sb_info *si;
+	struct file *f;
+
+	si = autofs4_sbi(sb);
+	if (si->version > 5) {
+		eprintk_ctx("autofs higher than ver5 is not supported\n");
+		return -EINVAL;
+	}
+
+	f = get_task_file(si->pipe_pid, si->pipefd);
+	if (IS_ERR(f)) {
+		eprintk_ctx("autofs pipe is not attached (%ld)\n", PTR_ERR(f));
+		return PTR_ERR(f);
+	}
+
+	if (f != si->pipe) {
+		eprintk_ctx("autofs pipe is not attached\n");
+		fput(f);
+		return -EBADF;
+	}
+
+	if (f->f_mode & FMODE_READ) {
+		fput(f);
+		eprintk_ctx("autofs pipe is attached by the wrong end\n");
+		return -EBADF;
+	}
+
+	/*
+	 * currently autofs' pipefd is
+	 *  a) opened write only
+	 *  b) attached to the daemon task
+	 * these two points make our life very easy:
+	 *  a) we can attach the file to sbi on restore after
+	 *     unfreeze - daemon will not try to write in there
+	 *  b) we can avoid dumping the fd for sbi separately,
+	 *     since the required file will be restore with the
+	 *     task struct in question
+	 *
+	 * In case this breaks some time later (I don't believe it)
+	 * we'll have to dump the opened file ID to the pipe_fd_id
+	 * field of the autofs_mount_data
+	 */
+	fput(f);
+
+	return cpt_object_add(CPT_OBJ_FILE, si->pipe, ctx) ? 0 : -ENOMEM;
+}
+
+static int check_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct namespace *n = obj->o_obj;
+	struct list_head *p;
+	char *path_buf, *path;
+
+	path_buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!path_buf)
+		return -ENOMEM;
+
+	down_read(&namespace_sem);
+	list_for_each(p, &n->list) {
+		struct vfsmount *mnt = list_entry(p, struct vfsmount, mnt_list);
+
+		path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+		if (IS_ERR(path))
+			continue;
+
+		if (check_one_vfsmount(mnt)) {
+			eprintk_ctx("unsupported fs type %s\n", mnt->mnt_sb->s_type->name);
+			err = -EINVAL;
+			break;
+		}
+
+		if (is_autofs_mount(mnt->mnt_parent))
+			continue;
+
+		if (is_nfs_automount(mnt))
+			continue;
+
+		if (cpt_need_delayfs(mnt->mnt_parent)) {
+			eprintk_ctx("unsupported delayfs submount: %s\n", path);
+			err = -EINVAL;
+			break;
+		}
+
+		if (cpt_need_delayfs(mnt) && !IS_ROOT(mnt->mnt_root)) {
+			eprintk_ctx("unsupported delayfs bindmount: %s\n", path);
+			err = -EINVAL;
+			break;
+		}
+
+		if (is_autofs_mount(mnt)) {
+			err = check_autofs(mnt->mnt_sb, ctx);
+			if (err)
+				break;
+		}
+	}
+	up_read(&namespace_sem);
+
+	free_page((unsigned long) path_buf);
+
+	return err;
+}
+
+int cpt_collect_namespace(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+		if (tsk->nsproxy && tsk->nsproxy->namespace &&
+				cpt_object_add(CPT_OBJ_NAMESPACE,
+					tsk->nsproxy->namespace, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	for_each_object(obj, CPT_OBJ_NAMESPACE) {
+		int err;
+		if ((err = check_one_namespace(obj, ctx)) != 0)
+			return err;
+	}
+
+	return 0;
+}
+
+/* see nfs_show_options and nfs_get_sb */
+static void dump_nfs_mount_data(struct vfsmount *mnt, cpt_context_t * ctx) {
+	struct cpt_object_hdr o;
+	struct nfs_mount_data d;
+	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+	struct nfs_client *clp = nfss->nfs_client;
+	struct nfs_fh *mntfh = NFS_FH(mnt->mnt_root->d_inode);
+
+	BUG_ON(mnt->mnt_sb->s_magic != FSMAGIC_NFS);
+	BUILD_BUG_ON(sizeof(d) != 688);
+
+	memset(&d, 0, sizeof(d));
+
+	d.version = 6;
+	d.fd = -1;
+	d.flags = nfss->flags;
+	d.rsize = nfss->rsize;
+	d.wsize = nfss->wsize;
+	d.timeo = 10U * clp->retrans_timeo / HZ;
+	d.retrans = clp->retrans_count;
+	d.acregmin = nfss->acregmin/HZ;
+	d.acregmax = nfss->acregmax/HZ;
+	d.acdirmin = nfss->acdirmin/HZ;
+	d.acdirmax = nfss->acdirmax/HZ;
+	memcpy(&d.addr, &clp->cl_addr, sizeof(d.addr));
+	if (clp->cl_hostname)
+		strncpy(d.hostname, clp->cl_hostname, sizeof(d.hostname));
+	d.namlen = nfss->namelen;
+	d.bsize = nfss->bsize;
+	d.root.size = mntfh->size;
+	memcpy(d.root.data, mntfh->data, sizeof(d.root.data));
+	d.pseudoflavor = nfss->client->cl_auth->au_flavor;
+
+	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_VOID;
+
+	cpt_open_object(NULL, ctx);
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(&d, sizeof(d), ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+}
+
+static void dump_autofs_mount_data(struct vfsmount *mnt, cpt_context_t * ctx)
+{
+	struct autofs_mount_data d;
+	struct autofs_sb_info *si;
+	struct cpt_object_hdr o;
+
+	si = autofs4_sbi(mnt->mnt_sb);
+
+	d.i_uid = mnt->mnt_sb->s_root->d_inode->i_uid;
+	d.i_gid = mnt->mnt_sb->s_root->d_inode->i_gid;
+	d.oz_pgrp = si->oz_pgrp;
+	d.type = si->type;
+	d.min_proto = si->min_proto;
+	d.max_proto = si->max_proto;
+	d.exp_timeout = si->exp_timeout;
+	d.pipefd = si->pipefd;
+	d.pipe_pid = si->pipe_pid;
+	d.is32bit = 0;
+#if defined CONFIG_X86_64 && defined CONFIG_IA32_EMULATION
+	d.is32bit = si->is32bit;
+#endif
+	d.pipe_fd_id = CPT_NULL;
+
+	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_VOID;
+
+	cpt_open_object(NULL, ctx);
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(&d, sizeof(d), ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+}
+
+struct args_t
+{
+	int* pfd;
+	char* path;
+	envid_t veid;
+};
+
+static int dumptmpfs(void *arg)
+{
+	int i;
+	struct args_t *args = arg;
+	int *pfd = args->pfd;
+	int fd0, fd2;
+	char *path = args->path;
+	char *argv[] = { "tar", "-c", "-S", "--numeric-owner", path, NULL };
+
+	i = real_env_create(args->veid, VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
+	if (i < 0) {
+		eprintk("cannot enter ve to dump tmpfs\n");
+		module_put(THIS_MODULE);
+		return 255 << 8;
+	}
+
+	if (pfd[1] != 1)
+		sc_dup2(pfd[1], 1);
+	set_fs(KERNEL_DS);
+	fd0 = sc_open("/dev/null", O_RDONLY, 0);
+	fd2 = sc_open("/dev/null", O_WRONLY, 0);
+	if (fd0 < 0 || fd2 < 0) {
+		eprintk("can not open /dev/null for tar: %d %d\n", fd0, fd2);
+		module_put(THIS_MODULE);
+		return 255 << 8;
+	}
+	if (fd0 != 0)
+		sc_dup2(fd0, 0);
+	if (fd2 != 2)
+		sc_dup2(fd2, 2);
+
+	for (i = 3; i < current->files->fdt->max_fds; i++) {
+		sc_close(i);
+	}
+
+	module_put(THIS_MODULE);
+
+	i = sc_execve("/bin/tar", argv, NULL);
+	eprintk("failed to exec /bin/tar: %d\n", i);
+	return 255 << 8;
+}
+
+static int cpt_dump_tmpfs(char *path, struct cpt_context *ctx)
+{
+	int err;
+	int pid;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	char buf[16];
+	int n;
+	loff_t saved_obj;
+	struct args_t args;
+	int status;
+	mm_segment_t oldfs;
+	sigset_t ignore, blocked;
+	struct ve_struct *oldenv;
+	
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	args.pfd = pfd;
+	args.path = path;
+	args.veid = VEID(get_exec_env());
+	ignore.sig[0] = CPT_SIG_IGNORE_MASK;
+	sigprocmask(SIG_BLOCK, &ignore, &blocked);
+	oldenv = set_exec_env(get_ve0());
+	err = pid = local_kernel_thread(dumptmpfs, (void*)&args,
+			SIGCHLD | CLONE_VFORK, 0);
+	set_exec_env(oldenv);
+	if (err < 0) {
+		eprintk_ctx("tmpfs local_kernel_thread: %d\n", err);
+		goto out;
+	}
+	f = fget(pfd[0]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NAME;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	do {
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
+		set_fs(oldfs);
+		if (n > 0)
+			ctx->write(buf, n, ctx);
+	} while (n > 0);
+
+	fput(f);
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if ((err = sc_waitx(pid, 0, &status)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+	else if ((status & 0x7f) == 0) {
+		err = (status & 0xff00) >> 8;
+		if (err != 0) {
+			eprintk_ctx("tar exited with %d\n", err);
+			err = -EINVAL;
+		}
+	} else {
+		eprintk_ctx("tar terminated\n");
+		err = -EINVAL;
+	}
+	set_fs(oldfs);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+
+	buf[0] = 0;
+	ctx->write(buf, 1, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	return n ? : err;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+	return err;
+}
+
+static int loopy_root(struct vfsmount *mnt)
+{
+	struct list_head *p;
+
+	list_for_each(p, &mnt->mnt_namespace->list) {
+		struct vfsmount * m = list_entry(p, struct vfsmount, mnt_list);
+		if (m == mnt)
+			return 0;
+		if (m->mnt_sb == mnt->mnt_sb)
+			return 1;
+	}
+	/* Cannot happen */
+	return 0;
+}
+
+static cpt_object_t *cpt_lookup_bind_source(struct vfsmount *mnt,
+		cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+	struct vfsmount *src;
+
+	for_each_object(obj, CPT_OBJ_VFSMOUNT_REF) {
+		src = obj->o_obj;
+		if (src->mnt_sb != mnt->mnt_sb)
+			continue;
+		if (IS_ERR(d_path(mnt->mnt_root, src, NULL, 0)))
+			continue;
+		return obj;
+	}
+	return NULL;
+}
+
+static int dump_vfsmount(struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct cpt_vfsmount_image v;
+	loff_t saved_obj;
+	char *path_buf, *path;
+	cpt_object_t *obj, *bind_obj = NULL;
+
+	if (is_autofs_mount(mnt->mnt_parent))
+		return 0;
+
+	if (is_nfs_automount(mnt))
+		return 0;
+
+	obj = cpt_object_add(CPT_OBJ_VFSMOUNT_REF, mnt, ctx);
+	if (!obj)
+		return -ENOMEM;
+	mntget(mnt);
+
+	path_buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!path_buf)
+		return -ENOMEM;
+
+	path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+	if (IS_ERR(path)) {
+		free_page((unsigned long) path_buf);
+		return PTR_ERR(path) == -EINVAL ? 0 : PTR_ERR(path);
+	}
+
+	cpt_open_object(obj, ctx);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_VFSMOUNT;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	v.cpt_mntflags = mnt->mnt_flags;
+	if (top_beancounter(slab_ub(mnt)) != top_beancounter(get_exec_ub())) {
+		v.cpt_mntflags |= CPT_MNT_EXT;
+	} else if (cpt_need_delayfs(mnt)) {
+		v.cpt_mntflags |= CPT_MNT_DELAYFS;
+		obj->o_flags |= CPT_VFSMOUNT_DELAYFS;
+	} else if (mnt->mnt_root != mnt->mnt_sb->s_root || loopy_root(mnt)) {
+		v.cpt_mntflags |= CPT_MNT_BIND;
+		bind_obj = cpt_lookup_bind_source(mnt, ctx);
+		if (!bind_obj) {
+			err = -ENODEV;
+			eprintk_ctx("bind mount source not found: %s\n", path);
+			goto out_err;
+		}
+	}
+
+	v.cpt_flags = mnt->mnt_sb->s_flags;
+	v.cpt_mnt_bind = bind_obj ? bind_obj->o_pos : CPT_NULL;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_dump_string(mnt->mnt_devname ? : "none", ctx);
+	cpt_dump_string(path, ctx);
+	cpt_dump_string(mnt->mnt_sb->s_type->name, ctx);
+
+	if (v.cpt_mntflags & CPT_MNT_BIND)
+		err = cpt_dump_path(mnt->mnt_root, bind_obj->o_obj, 0, ctx);
+	else if (!(v.cpt_mntflags & CPT_MNT_EXT)) {
+
+		if (mnt->mnt_sb->s_type->fs_flags & FS_REQUIRES_DEV) {
+			eprintk_ctx("Checkpoint supports only nodev fs: %s\n",
+				    mnt->mnt_sb->s_type->name);
+			err = -EXDEV;
+		} else if (!strcmp(mnt->mnt_sb->s_type->name, "tmpfs")) {
+			mntget(mnt);
+			up_read(&namespace_sem);
+			err = cpt_dump_tmpfs(path, ctx);
+			down_read(&namespace_sem);
+			if (!err && list_empty(&mnt->mnt_list))
+				err = -EBUSY;
+			mntput(mnt);
+		}
+	}
+	if (v.cpt_mntflags & CPT_MNT_DELAYFS) {
+		if (mnt->mnt_sb->s_magic == FSMAGIC_NFS) {
+			dump_nfs_mount_data(mnt, ctx);
+		} else if (is_autofs_mount(mnt)) {
+			dump_autofs_mount_data(mnt, ctx);
+		} else {
+			//FIXME dump sb show_options output
+			BUG();
+		}
+	}
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+out_err:
+	free_page((unsigned long) path_buf);
+
+	return err;
+}
+
+static int dump_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct namespace *n = obj->o_obj;
+	struct cpt_object_hdr v;
+	struct vfsmount *rootmnt, *p;
+	loff_t saved_obj;
+	int err = 0;
+
+	cpt_open_object(obj, ctx);
+
+	v.cpt_next = -1;
+	v.cpt_object = CPT_OBJ_NAMESPACE;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+
+	down_read(&namespace_sem);
+	rootmnt = n->root;
+	for (p = rootmnt; p; p = next_mnt(p, rootmnt)) {
+		err = dump_vfsmount(p, ctx);
+		if (err)
+			break;
+	}
+	up_read(&namespace_sem);
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_dump_namespace(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_NAMESPACE);
+
+	for_each_object(obj, CPT_OBJ_NAMESPACE) {
+		int err;
+
+		if ((err = dump_one_namespace(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+void cpt_finish_vfsmount_ref(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_VFSMOUNT_REF)
+		mntput(obj->o_obj);
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_files.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_files.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_files.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_files.h	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,101 @@
+int cpt_collect_files(cpt_context_t *);
+int cpt_collect_fs(cpt_context_t *);
+int cpt_collect_namespace(cpt_context_t *);
+int cpt_collect_sysvsem_undo(cpt_context_t *);
+int cpt_collect_tty(struct file *, cpt_context_t *);
+int cpt_dump_files(struct cpt_context *ctx);
+int cpt_dump_files_struct(struct cpt_context *ctx);
+int cpt_dump_fs_struct(struct cpt_context *ctx);
+int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx);
+int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx);
+int cpt_dump_tty(cpt_object_t *, struct cpt_context *ctx);
+struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx);
+struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii, unsigned flags, struct cpt_context *ctx);
+__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx);
+
+int rst_posix_locks(struct cpt_context *ctx);
+
+struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx);
+int rst_files(struct cpt_task_image *ti, struct cpt_context *ctx);
+__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_restore_fs(struct cpt_context *ctx);
+
+int cpt_collect_sysv(cpt_context_t *);
+int cpt_dump_sysvsem(struct cpt_context *ctx);
+int cpt_dump_sysvmsg(struct cpt_context *ctx);
+int rst_sysv_ipc(struct cpt_context *ctx);
+int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int cpt_dump_namespace(struct cpt_context *ctx);
+int rst_root_namespace(struct cpt_context *ctx);
+
+int rst_stray_files(struct cpt_context *ctx);
+int rst_tty_jobcontrol(struct cpt_context *ctx);
+
+void rst_flush_filejobs(struct cpt_context *);
+int rst_do_filejobs(struct cpt_context *);
+
+int rst_eventpoll(struct cpt_context *);
+struct file *cpt_open_epolldev(struct cpt_file_image *fi,
+			       unsigned flags,
+			       struct cpt_context *ctx);
+int cpt_dump_epolldev(cpt_object_t *obj, struct cpt_context *);
+
+int cpt_dump_dir(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx);
+int cpt_get_dentry(struct dentry **dp, struct vfsmount **mp,
+		   loff_t *pos, struct cpt_context *ctx);
+
+int cpt_dump_inotify(cpt_object_t *obj, cpt_context_t *ctx);
+int rst_inotify(cpt_context_t *ctx);
+struct file *rst_open_inotify(struct cpt_file_image *fi,
+			      unsigned flags,
+			      struct cpt_context *ctx);
+
+struct dentry *cpt_fake_link(struct dentry *d, struct vfsmount *mnt,
+		struct inode *ino, struct cpt_context *ctx);
+
+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
+			 int verify, cpt_context_t *ctx);
+int rst_path_lookup_at(struct vfsmount *mnt, struct dentry *dentry,
+		const char *name, unsigned int flags, struct nameidata *nd);
+int rst_path_lookup(cpt_object_t *mntobj, const char *path,
+		unsigned int flags, struct nameidata *nd);
+
+#define check_one_vfsmount(mnt) \
+	(strcmp(mnt->mnt_sb->s_type->name, "rootfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "vzfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "ext3") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "ext2") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "simfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "unionfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "tmpfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "nfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "autofs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "devpts") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "proc") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "sysfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "binfmt_misc") != 0)
+
+#define is_autofs_mount(mnt) ((mnt)->mnt_sb->s_magic == FSMAGIC_AUTOFS)
+
+int cpt_page_is_zero(struct page * page);
+
+void cpt_finish_vfsmount_ref(struct cpt_context *ctx);
+void rst_finish_vfsmount_ref(struct cpt_context *ctx);
+
+struct vfsmount *rst_kern_mount(const char *fstype, int flags,
+		const char *name, void *data);
+
+cpt_object_t *cpt_lookup_vfsmount_obj(struct vfsmount *mnt,
+		struct cpt_context *ctx);
+
+int cpt_need_delayfs(struct vfsmount *mnt);
+extern struct file_system_type delayfs_type;
+struct file *rst_delayfs_screw(struct vfsmount *mnt, char *name, int flags, loff_t offset, unsigned int mode);
+struct vfsmount *rst_mount_delayfs(char *type, int flags,
+		char *name, void *data, cpt_context_t *ctx);
+int rst_freeze_delayfs(cpt_context_t *ctx);
+int rst_init_delayfs_daemon(cpt_context_t *ctx);
+int rst_delay_flock(struct file *, struct cpt_flock_image *, cpt_context_t *);
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_fsmagic.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_fsmagic.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_fsmagic.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_fsmagic.h	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,19 @@
+/* Collected from kernel sources. */
+
+#define FSMAGIC_TMPFS	0x01021994
+#define FSMAGIC_PIPEFS	0x50495045
+#define FSMAGIC_SOCKFS	0x534F434B
+#define FSMAGIC_PFMFS	0xa0b4d889
+#define FSMAGIC_BDEV	0x62646576
+#define FSMAGIC_EPOLL	0x03111965
+#define FSMAGIC_FUTEX	0x0BAD1DEA
+#define FSMAGIC_INOTIFY	0x2BAD1DEA
+#define FSMAGIC_MQUEUE	0x19800202
+#define FSMAGIC_PROC	0x9fa0
+#define FSMAGIC_DEVPTS	0x1CD1
+#define FSMAGIC_AUTOFS	0x0187
+#define FSMAGIC_EXT2	0xEF53
+#define FSMAGIC_REISER	0x52654973
+#define FSMAGIC_VEFS    0x565a4653
+#define FSMAGIC_ANON	0x09041934
+#define FSMAGIC_NFS	0x6969
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_inotify.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_inotify.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_inotify.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_inotify.c	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,174 @@
+/*
+ *
+ *  kernel/cpt/cpt_inotify.c
+ *
+ *  Copyright (C) 2000-2007  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/inotify.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+extern struct file_operations inotify_fops;
+
+static int dump_watch_inode(struct inotify_watch *watch, cpt_context_t *ctx)
+{
+	int err;
+	struct dentry *d;
+
+	if (cpt_need_delayfs(watch->mnt)) {
+		eprintk_ctx("inotify migration for delayed mounts (NFS) is not "
+				"supported\n");
+		return -EINVAL;
+	}
+
+	d = watch->dentry;
+	if (IS_ROOT(d) || !d_unhashed(d))
+		goto dump_dir;
+
+	d = cpt_fake_link(d->d_inode->i_nlink ? d : NULL,
+			watch->mnt, d->d_inode, ctx);
+
+	if (IS_ERR(d))
+		return PTR_ERR(d);
+
+dump_dir:
+	err = cpt_dump_dir(d, watch->mnt, ctx);
+	if (d != watch->dentry)
+		dput(d);
+
+	return err;
+}
+
+int cpt_dump_inotify(cpt_object_t *obj, cpt_context_t *ctx)
+{
+	int err = 0;
+	struct file *file = obj->o_obj;
+	struct inotify_device *dev;
+	struct inotify_watch *watch;
+	struct inotify_kernel_event *kev;
+	struct cpt_inotify_image ii;
+
+	if (file->f_op != &inotify_fops) {
+		eprintk_ctx("bad inotify file\n");
+		return -EINVAL;
+	}
+
+	dev = file->private_data;
+
+	/* inotify_user.c does not protect open /proc/N/fd, silly.
+	 * Opener will get an invalid file with uninitialized private_data
+	 */
+	if (unlikely(dev == NULL)) {
+		eprintk_ctx("bad inotify dev\n");
+		return -EINVAL;
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	ii.cpt_next = CPT_NULL;
+	ii.cpt_object = CPT_OBJ_INOTIFY;
+	ii.cpt_hdrlen = sizeof(ii);
+	ii.cpt_content = CPT_CONTENT_ARRAY;
+	ii.cpt_file = obj->o_pos;
+	ii.cpt_user = dev->user->uid;
+	ii.cpt_max_events = dev->max_events;
+	ii.cpt_last_wd = dev->ih->last_wd;
+
+	ctx->write(&ii, sizeof(ii), ctx);
+
+	mutex_lock(&dev->ih->mutex);
+	list_for_each_entry(watch, &dev->ih->watches, h_list) {
+		loff_t saved_obj;
+		loff_t saved_obj2;
+		struct cpt_inotify_wd_image wi;
+
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		wi.cpt_next = CPT_NULL;
+		wi.cpt_object = CPT_OBJ_INOTIFY_WATCH;
+		wi.cpt_hdrlen = sizeof(wi);
+		wi.cpt_content = CPT_CONTENT_ARRAY;
+		wi.cpt_wd = watch->wd;
+		wi.cpt_mask = watch->mask;
+
+		ctx->write(&wi, sizeof(wi), ctx);
+
+		cpt_push_object(&saved_obj2, ctx);
+		err = dump_watch_inode(watch, ctx);
+		cpt_pop_object(&saved_obj2, ctx);
+		if (err)
+			break;
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	mutex_unlock(&dev->ih->mutex);
+
+	if (err)
+		return err;
+
+	mutex_lock(&dev->ev_mutex);
+	list_for_each_entry(kev, &dev->events, list) {
+		loff_t saved_obj;
+		struct cpt_inotify_ev_image ei;
+
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		ei.cpt_next = CPT_NULL;
+		ei.cpt_object = CPT_OBJ_INOTIFY_EVENT;
+		ei.cpt_hdrlen = sizeof(ei);
+		ei.cpt_content = CPT_CONTENT_NAME;
+		ei.cpt_wd = kev->event.wd;
+		ei.cpt_mask = kev->event.mask;
+		ei.cpt_cookie = kev->event.cookie;
+		ei.cpt_namelen = kev->name ? strlen(kev->name) : 0;
+
+		ctx->write(&ei, sizeof(ei), ctx);
+
+		if (kev->name) {
+			ctx->write(kev->name, ei.cpt_namelen+1, ctx);
+			ctx->align(ctx);
+		}
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	mutex_unlock(&dev->ev_mutex);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_kernel.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_kernel.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_kernel.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_kernel.c	2017-01-13 08:40:28.000000000 -0500
@@ -0,0 +1,198 @@
+/*
+ *
+ *  kernel/cpt/cpt_kernel.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#define __KERNEL_SYSCALLS__ 1
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/ve_proto.h>
+#ifdef CONFIG_X86
+#include <asm/cpufeature.h>
+#endif
+#include <linux/cpt_image.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+
+#include "cpt_kernel.h"
+#include "cpt_syscalls.h"
+
+int debug_level = 1;
+
+#ifdef CONFIG_X86_32
+
+extern void kernel_thread_helper(void);
+
+/*
+ * Create a kernel thread
+ */
+int asm_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+
+	regs.ebx = (unsigned long) fn;
+	regs.edx = (unsigned long) arg;
+
+	regs.xds = __USER_DS;
+	regs.xes = __USER_DS;
+	regs.orig_eax = -1;
+	regs.eip = (unsigned long) kernel_thread_helper;
+#ifndef CONFIG_X86_XEN
+	regs.xcs = __KERNEL_CS;
+#else
+	regs.xcs = GET_KERNEL_CS();
+#endif
+	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+
+	/* Ok, create the new process.. */
+	return do_fork_pid(flags | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL, pid);
+}
+#endif
+
+#ifdef CONFIG_IA64
+pid_t
+asm_kernel_thread (int (*fn)(void *), void *arg, unsigned long flags, pid_t pid)
+{
+	extern void start_kernel_thread (void);
+	unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
+	struct {
+		struct switch_stack sw;
+		struct pt_regs pt;
+	} regs;
+
+	memset(&regs, 0, sizeof(regs));
+	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
+	regs.pt.r1 = helper_fptr[1];		/* set GP */
+	regs.pt.r9 = (unsigned long) fn;	/* 1st argument */
+	regs.pt.r11 = (unsigned long) arg;	/* 2nd argument */
+	/* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read.  */
+	regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+	regs.pt.cr_ifs = 1UL << 63;		/* mark as valid, empty frame */
+	regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
+	regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
+	regs.sw.pr = (1 << 2 /*PRED_KERNEL_STACK*/);
+	return do_fork_pid(flags | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL, pid);
+}
+#endif
+
+int local_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
+{
+	pid_t ret;
+
+	if (current->fs == NULL) {
+		/* do_fork_pid() hates processes without fs, oopses. */
+		printk("CPT BUG: local_kernel_thread: current->fs==NULL\n");
+		return -EINVAL;
+	}
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+	while ((ret = asm_kernel_thread(fn, arg, flags, pid)) ==
+							-ERESTARTNOINTR) {
+		if (signal_pending(current))
+			break;
+		cond_resched();
+	}
+	if (ret < 0)
+		module_put(THIS_MODULE);
+	return ret;
+}
+
+#ifdef __i386__
+int __execve(const char *file, char **argv, char **envp)
+{
+	long res;
+	__asm__ volatile ("int $0x80"
+	: "=a" (res)
+	: "0" (__NR_execve),"b" ((long)(file)),"c" ((long)(argv)),
+		  "d" ((long)(envp)) : "memory");
+	return (int)res;
+}
+#endif
+
+int sc_execve(char *cmd, char **argv, char **env)
+{
+	int ret;
+#ifdef __i386__
+	ret = __execve(cmd, argv, env);
+#elif defined(__ia64__)
+	ret = -execve(cmd, argv, env);
+#else
+	ret = execve(cmd, argv, env);
+#endif
+	return ret;
+}
+
+unsigned int test_cpu_caps_and_features(void)
+{
+	unsigned int flags = 0;
+
+#ifdef CONFIG_X86
+	if (boot_cpu_has(X86_FEATURE_CMOV))
+		flags |= 1 << CPT_CPU_X86_CMOV;
+	if (cpu_has_fxsr)
+		flags |= 1 << CPT_CPU_X86_FXSR;
+	if (cpu_has_xmm)
+		flags |= 1 << CPT_CPU_X86_SSE;
+#ifndef CONFIG_X86_64
+	if (cpu_has_xmm2)
+#endif
+		flags |= 1 << CPT_CPU_X86_SSE2;
+	if (cpu_has_mmx)
+		flags |= 1 << CPT_CPU_X86_MMX;
+	if (boot_cpu_has(X86_FEATURE_3DNOW))
+		flags |= 1 << CPT_CPU_X86_3DNOW;
+	if (boot_cpu_has(X86_FEATURE_3DNOWEXT))
+		flags |= 1 << CPT_CPU_X86_3DNOW2;
+	if (boot_cpu_has(X86_FEATURE_SYSCALL))
+		flags |= 1 << CPT_CPU_X86_SYSCALL;
+#ifdef CONFIG_X86_64
+	if (boot_cpu_has(X86_FEATURE_SYSCALL) &&
+			boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		flags |= 1 << CPT_CPU_X86_SYSCALL32;
+#endif
+	if (boot_cpu_has(X86_FEATURE_SEP)
+#ifdef CONFIG_X86_64
+			&& boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
+#endif
+	   )
+		flags |= ((1 << CPT_CPU_X86_SEP) | (1 << CPT_CPU_X86_SEP32));
+#ifdef CONFIG_X86_64
+	flags |= 1 << CPT_CPU_X86_EMT64;
+#endif
+#endif
+#ifdef CONFIG_IA64
+	flags |= 1 << CPT_CPU_X86_IA64;
+	flags |= 1 << CPT_CPU_X86_FXSR;
+#endif
+	if (virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_TEST, NULL) & NOTIFY_FAIL)
+		flags |= 1 << CPT_SLM_DMPRST;
+
+	if (ve_ipv6_ops_get() == NULL)
+		flags |= 1 << CPT_NO_IPV6;
+
+	return flags;
+}
+
+unsigned int test_kernel_config(void)
+{
+	unsigned int flags = 0;
+#ifdef CONFIG_X86
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
+	flags |= 1 << CPT_KERNEL_CONFIG_PAE;
+#endif
+#endif
+	return flags;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_kernel.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_kernel.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_kernel.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_kernel.h	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,69 @@
+/* Interface to kernel vars which we had to _add_. */
+
+#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+#define TASK_TRACED TASK_STOPPED
+#define unix_peer(sk) ((sk)->sk_pair)
+#define page_mapcount(pg) ((pg)->mapcount)
+#else
+#define unix_peer(sk) (unix_sk(sk)->peer)
+#endif
+
+#ifdef CONFIG_X86_64
+#define cpu_has_fxsr 1
+#endif
+#ifdef CONFIG_IA64
+#define cpu_has_fxsr 1
+#endif
+
+#define CPT_SIG_IGNORE_MASK (\
+        (1 << (SIGCONT - 1)) | (1 << (SIGCHLD - 1)) | \
+	(1 << (SIGWINCH - 1)) | (1 << (SIGURG - 1)))
+
+static inline void do_gettimespec(struct timespec *ts)
+{
+	struct timeval tv;
+	do_gettimeofday(&tv);
+	ts->tv_sec = tv.tv_sec;
+	ts->tv_nsec = tv.tv_usec*1000;
+}
+
+int local_kernel_thread(int (*fn)(void *),
+		void * arg,
+		unsigned long flags,
+		pid_t pid);
+int asm_kernel_thread(int (*fn)(void *),
+		void * arg,
+		unsigned long flags,
+		pid_t pid);
+
+#if defined(CONFIG_VZFS_FS) || defined(CONFIG_VZFS_FS_MODULE)
+void vefs_track_force_stop(struct super_block *super);
+
+void vefs_track_notify(struct dentry *vdentry, int track_cow);
+
+struct dentry * vefs_replaced_dentry(struct dentry *de);
+int vefs_is_renamed_dentry(struct dentry *vde, struct dentry *pde);
+#else
+static inline void vefs_track_force_stop(struct super_block *super) { };
+
+static inline void vefs_track_notify(struct dentry *vdentry, int track_cow) { };
+#endif
+
+unsigned int test_cpu_caps_and_features(void);
+int rst_image_acceptable(unsigned long version);
+unsigned int test_kernel_config(void);
+
+#define test_one_flag_old(src, dst, flag, message, ret) \
+if (src & (1 << flag)) \
+	if (!(dst & (1 << flag))) { \
+		wprintk("Destination cpu does not have " message "\n"); \
+		ret = 1; \
+	}
+#define test_one_flag(src, dst, flag, message, ret) \
+if (src & (1 << flag)) \
+	if (!(dst & (1 << flag))) { \
+		eprintk_ctx("Destination cpu does not have " message "\n"); \
+		ret = 1; \
+	}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_mm.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_mm.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_mm.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_mm.c	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,919 @@
+/*
+ *
+ *  kernel/cpt/cpt_mm.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/errno.h>
+#include <linux/ve.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#ifdef CONFIG_X86
+#include <asm/ldt.h>
+#endif
+#include <asm/mmu.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+#include "cpt_pagein.h"
+#endif
+#include "cpt_ubc.h"
+
+static int collect_one_aio_ctx(struct mm_struct *mm, struct kioctx *aio_ctx,
+			       cpt_context_t *ctx)
+{
+	if (!list_empty(&aio_ctx->run_list)) {
+		/* This is impossible at least with kernel 2.6.8.1 or 2.6.16 */
+		eprintk_ctx("run list is not empty, cannot suspend AIO\n");
+		return -EBUSY;
+	}
+
+	/* Wait for pending IOCBs. Linux AIO is mostly _fake_.
+	 * It is actually synchronous, except for direct IO and
+	 * some funny raw USB things, which cannot happen inside VE.
+	 * However, we do this for future.
+	 *
+	 * Later note: in 2.6.16 we may allow O_DIRECT, so that
+	 * it is not meaningless code.
+	 */
+	wait_for_all_aios(aio_ctx);
+
+	if (!list_empty(&aio_ctx->run_list) ||
+	    !list_empty(&aio_ctx->active_reqs) ||
+	    aio_ctx->reqs_active) {
+		eprintk_ctx("were not able to suspend AIO\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int collect_one_mm(struct mm_struct *mm, cpt_context_t * ctx)
+{
+	struct vm_area_struct *vma;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (vma->vm_file) {
+			if (cpt_object_add(CPT_OBJ_FILE, vma->vm_file, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+#ifdef CONFIG_USER_RESOURCE
+	if (cpt_add_ubc(mm->mm_ub, ctx) == NULL)
+		return -ENOMEM;
+#endif
+
+	if (mm->ioctx_list) {
+		struct kioctx *aio_ctx;
+		int err;
+
+		for (aio_ctx = mm->ioctx_list; aio_ctx; aio_ctx = aio_ctx->next)
+			if ((err = collect_one_aio_ctx(mm, aio_ctx, ctx)) != 0)
+				return err;
+	}
+
+	return 0;
+}
+
+int cpt_collect_mm(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	int err;
+	int index;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+		if (tsk->mm && cpt_object_add(CPT_OBJ_MM, tsk->mm, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	index = 1;
+	for_each_object(obj, CPT_OBJ_MM) {
+		struct mm_struct *mm = obj->o_obj;
+		if (obj->o_count != atomic_read(&mm->mm_users)) {
+			eprintk_ctx("mm_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&mm->mm_users));
+			return -EAGAIN;
+		}
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if ((err = collect_one_mm(mm, ctx)) != 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int zcnt, scnt, scnt0, ucnt;
+
+/* Function where_is_anon_page() returns address of a anonymous page in mm
+ * of already dumped process. This happens f.e. after fork(). We do not use
+ * this right now, just keep statistics, it is diffucult to restore such state,
+ * but the most direct use is to save space in dumped image. */
+
+
+static inline unsigned long
+vma_address0(struct page *page, struct vm_area_struct *vma)
+{
+	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	unsigned long address;
+
+	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+		address |= 1;
+	return address;
+}
+
+int cpt_check_page(struct vm_area_struct *vma, unsigned long address,
+		   struct page *page, int wrprot)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	spinlock_t *ptl;
+	int result;
+
+	pgd = pgd_offset(mm, address);
+	if (unlikely(!pgd_present(*pgd)))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (unlikely(!pmd_present(*pmd)))
+		return 0;
+
+	result = 0;
+	pte = pte_offset_map(pmd, address);
+	if (!pte_present(*pte)) {
+		pte_unmap(pte);
+		return 0;
+	}
+
+	ptl = pte_lockptr(mm, pmd);
+	spin_lock(ptl);
+	if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
+		result = 1;
+		if (wrprot < 0)
+			result = pte_write(*pte);
+		else if (wrprot)
+			ptep_set_wrprotect(mm, address, pte);
+	}
+	pte_unmap_unlock(pte, ptl);
+	return result;
+}
+
+static loff_t where_is_anon_page(cpt_object_t *mmobj, unsigned long mapaddr,
+				 struct page *page, cpt_context_t * ctx)
+{
+	loff_t mmptr = CPT_NULL;
+	struct anon_vma *anon_vma;
+	struct vm_area_struct *vma;
+	int idx = mmobj->o_index;
+
+	if (!PageAnon(page))
+		return CPT_NULL;
+
+	anon_vma = page_lock_anon_vma(page);
+	if (!anon_vma)
+		return CPT_NULL;
+
+	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		unsigned long addr = vma_address0(page, vma);
+		cpt_object_t *obj;
+
+		/* We do not try to support mremapped regions (addr != mapaddr),
+		 * only mmaps directly inherited via fork().
+		 * With this limitation we may check self-consistency of
+		 * vmas (vm_start, vm_pgoff, anon_vma) before
+		 * doing __copy_page_range() in rst_mm.
+		 */
+		if (mmobj->o_obj != vma->vm_mm && addr == mapaddr) {
+			obj = lookup_cpt_object(CPT_OBJ_MM, vma->vm_mm, ctx);
+			if (obj && obj->o_pos != CPT_NULL && obj->o_index < idx) {
+				if (cpt_check_page(vma, addr, page, 0)) {
+					mmptr = obj->o_pos;
+					idx = obj->o_index;
+				}
+			}
+		}
+	}
+	spin_unlock(&anon_vma->lock);
+
+	return mmptr;
+}
+
+struct page_area
+{
+	int type;
+	unsigned long start;
+	unsigned long end;
+	pgoff_t pgoff;
+	loff_t mm;
+	__u64 list[16];
+};
+
+struct page_desc
+{
+	int	type;
+	pgoff_t	index;
+	loff_t	mm;
+	int	shared;
+};
+
+enum {
+	PD_ABSENT,
+	PD_COPY,
+	PD_ZERO,
+	PD_CLONE,
+	PD_FUNKEY,
+	PD_LAZY,
+	PD_ITER,
+	PD_ITERYOUNG,
+};
+
+/* 0: page can be obtained from backstore, or still not mapped anonymous  page,
+      or something else, which does not requre copy.
+   1: page requires copy
+   2: page requres copy but its content is zero. Quite useless.
+   3: wp page is shared after fork(). It is to be COWed when modified.
+   4: page is something unsupported... We copy it right now.
+ */
+
+
+
+static void page_get_desc(cpt_object_t *mmobj,
+			  struct vm_area_struct *vma, unsigned long addr,
+			  struct page_desc *pdesc, cpt_context_t * ctx)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	spinlock_t *ptl;
+	struct page *pg = NULL;
+	pgoff_t linear_index = (addr - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff;
+
+	pdesc->index = linear_index;
+	pdesc->shared = 0;
+	pdesc->mm = CPT_NULL;
+
+	if (vma->vm_flags & VM_IO) {
+		pdesc->type = PD_ABSENT;
+		return;
+	}
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+		goto out_absent;
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+		goto out_absent;
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+		goto out_absent;
+#ifdef CONFIG_X86
+	if (pmd_huge(*pmd)) {
+		eprintk_ctx("page_huge\n");
+		goto out_unsupported;
+	}
+#endif
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+retry:
+#endif
+	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	pte = *ptep;
+	pte_unmap(ptep);
+
+	if (pte_none(pte))
+		goto out_absent_unlock;
+
+	if (!pte_present(pte)) {
+		if (pte_file(pte)) {
+			pdesc->index = pte_to_pgoff(pte);
+			goto out_absent_unlock;
+		}
+		if (vma->vm_flags & VM_SHARED) {
+			/* It is impossible: shared mappings cannot be in swap */
+			eprintk_ctx("shared mapping is not present: %08lx@%Ld\n", addr, mmobj->o_pos);
+			goto out_unsupported_unlock;
+		}
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+		/* Otherwise it is in swap. */
+		if (!ctx->lazy_vm) {
+			int err;
+			/* If lazy transfer is not enabled,
+			 * raise it from swap now, so that we
+			 * save at least when the page is shared.
+			 */
+			spin_unlock(ptl);
+			err = handle_mm_fault(mm, vma, addr, 0);
+			if (err == VM_FAULT_SIGBUS)
+				goto out_absent;
+			if (err == VM_FAULT_OOM)
+				goto out_absent;
+			err = 0;
+			goto retry;
+		}
+#endif
+		pdesc->type = PD_LAZY;
+		goto out_unlock;
+	}
+
+	if ((pg = vm_normal_page(vma, addr, pte)) == NULL) {
+		pdesc->type = PD_COPY;
+		goto out_unlock;
+	}
+
+	get_page(pg);
+	spin_unlock(ptl);
+
+	if (pg->mapping && !PageAnon(pg)) {
+		if (vma->vm_file == NULL) {
+			eprintk_ctx("pg->mapping!=NULL for fileless vma: %08lx\n", addr);
+			goto out_unsupported;
+		}
+		if (vma->vm_file->f_mapping != pg->mapping) {
+			eprintk_ctx("pg->mapping!=f_mapping: %08lx %p %p %Ld\n",
+				    addr, vma->vm_file->f_mapping, pg->mapping,
+				    mmobj->o_pos);
+			goto out_unsupported;
+		}
+		pdesc->index = (pg->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
+		/* Page is in backstore. For us it is like
+		 * it is not present.
+		 */
+		goto out_absent;
+	}
+
+	if (PageReserved(pg)) {
+		/* Special case: ZERO_PAGE is used, when an
+		 * anonymous page is accessed but not written. */
+		if (pg == ZERO_PAGE(addr)) {
+			if (pte_write(pte)) {
+				eprintk_ctx("not funny already, writable ZERO_PAGE\n");
+				goto out_unsupported;
+			}
+			zcnt++;
+			goto out_absent;
+		}
+		eprintk_ctx("reserved page %lu at %08lx@%Ld\n", pg->index,
+			    addr, mmobj->o_pos);
+		goto out_unsupported;
+	}
+
+	if (pg == ZERO_PAGE(addr)) {
+		wprintk_ctx("that's how it works now\n");
+	}
+
+	if (!pg->mapping) {
+		eprintk_ctx("page without mapping at %08lx@%Ld\n", addr,
+			    mmobj->o_pos);
+		goto out_unsupported;
+	}
+
+	if (pg->mapping && page_mapcount(pg) > 1) {
+		pdesc->shared = 1;
+		pdesc->mm = where_is_anon_page(mmobj, addr, pg, ctx);
+		if (pdesc->mm != CPT_NULL) {
+			scnt0++;
+			pdesc->type = PD_CLONE;
+			goto out_put;
+		} else {
+			scnt++;
+		}
+	}
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+	if (ctx->iter_done &&
+	    test_bit(PG_checkpointed, &pg->flags)) {
+		if (pte_write(pte)) {
+			wprintk_ctx("writable PG_checkpointed page\n");
+		}
+		pdesc->index = page_to_pfn(pg);
+		pdesc->type = pte_young(pte) ? PD_ITERYOUNG : PD_ITER;
+		goto out_put;
+	}
+#endif
+	pdesc->type = pte_young(pte) ? PD_COPY : PD_LAZY;
+
+out_put:
+	if (pg)
+		put_page(pg);
+	return;
+
+out_unlock:
+	spin_unlock(ptl);
+	goto out_put;
+
+out_absent_unlock:
+	spin_unlock(ptl);
+out_absent:
+	pdesc->type = PD_ABSENT;
+	goto out_put;
+
+out_unsupported_unlock:
+	spin_unlock(ptl);
+out_unsupported:
+	ucnt++;
+	pdesc->type = PD_FUNKEY;
+	goto out_put;
+}
+
+/* ATTN: We give "current" to get_user_pages(). This is wrong, but get_user_pages()
+ * does not really need this thing. It just stores some page fault stats there.
+ *
+ * BUG: some archs (f.e. sparc64, but not Intel*) require flush cache pages
+ * before accessing vma.
+ */
+void dump_pages(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, struct cpt_context *ctx)
+{
+#define MAX_PAGE_BATCH 16
+	struct page *pg[MAX_PAGE_BATCH];
+	int npages = (end - start)/PAGE_SIZE;
+	int count = 0;
+
+	while (count < npages) {
+		int copy = npages - count;
+		int n;
+
+		if (copy > MAX_PAGE_BATCH)
+			copy = MAX_PAGE_BATCH;
+		n = get_user_pages(current, vma->vm_mm, start, copy,
+				   0, 1, pg, NULL);
+		if (n == copy) {
+			int i;
+			for (i=0; i<n; i++) {
+				char *maddr = kmap(pg[i]);
+				ctx->write(maddr, PAGE_SIZE, ctx);
+				kunmap(pg[i]);
+			}
+		} else {
+			eprintk_ctx("get_user_pages fault");
+			for ( ; n > 0; n--)
+				page_cache_release(pg[n-1]);
+			return;
+		}
+		start += n*PAGE_SIZE;
+		count += n;
+		for ( ; n > 0; n--)
+			page_cache_release(pg[n-1]);
+	}
+	return;
+}
+
+int dump_page_block(struct vm_area_struct *vma, struct cpt_page_block *pgb,
+		    int copy,
+		    struct cpt_context *ctx)
+{
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb->cpt_object = (copy != PD_LAZY) ? CPT_OBJ_PAGES : CPT_OBJ_LAZYPAGES;
+	pgb->cpt_hdrlen = sizeof(*pgb);
+	pgb->cpt_content = (copy == PD_COPY || copy == PD_LAZY) ? CPT_CONTENT_DATA : CPT_CONTENT_VOID;
+
+	ctx->write(pgb, sizeof(*pgb), ctx);
+	if (copy == PD_COPY || copy == PD_LAZY)
+		dump_pages(vma, pgb->cpt_start, pgb->cpt_end, ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_remappage_block(struct vm_area_struct *vma, struct page_area *pa,
+			 struct cpt_context *ctx)
+{
+	struct cpt_remappage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_REMAPPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	pgb.cpt_pgoff = pa->pgoff - (pa->end-pa->start)/PAGE_SIZE + 1;
+
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_copypage_block(struct vm_area_struct *vma, struct page_area *pa,
+			struct cpt_context *ctx)
+{
+	struct cpt_copypage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_COPYPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	pgb.cpt_source = pa->mm;
+
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_lazypage_block(struct vm_area_struct *vma, struct page_area *pa,
+			cpt_context_t *ctx)
+{
+	struct cpt_lazypage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_LAZYPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	pgb.cpt_index = cpt_alloc_pgin_index(vma, pa->start,
+				     (pa->end-pa->start)/PAGE_SIZE, ctx);
+#endif
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_iterpage_block(struct vm_area_struct *vma, struct page_area *pa,
+			cpt_context_t *ctx)
+{
+	struct cpt_iterpage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = pa->type == PD_ITER ? CPT_OBJ_ITERPAGES :
+		CPT_OBJ_ITERYOUNGPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	ctx->write(&pgb, sizeof(pgb), ctx);
+
+	ctx->write(pa->list, 8*((pa->end-pa->start)/PAGE_SIZE), ctx);
+
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+
+static int can_expand(struct page_area *pa, struct page_desc *pd)
+{
+	if (pa->start == pa->end)
+		return 1;
+	if (pa->type != pd->type)
+		return 0;
+	if (pa->type == PD_ITER || pa->type == PD_ITERYOUNG) {
+		if (pa->end - pa->start >= PAGE_SIZE*16)
+			return 0;
+		pa->list[(pa->end - pa->start)/PAGE_SIZE] = pd->index;
+	}
+	if (pa->type == PD_ABSENT)
+		return pd->index == pa->pgoff + 1;
+	if (pa->type == PD_CLONE)
+		return pd->mm == pa->mm;
+	return 1;
+}
+
+static int dump_one_vma(cpt_object_t *mmobj,
+			struct vm_area_struct *vma, struct cpt_context *ctx)
+{
+	struct cpt_vma_image *v = cpt_get_buf(ctx);
+	unsigned long addr;
+	loff_t saved_object;
+	struct cpt_page_block pgb;
+	struct page_area pa;
+	int cloned_pages = 0;
+
+	cpt_push_object(&saved_object, ctx);
+
+	v->cpt_object = CPT_OBJ_VMA;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_start = vma->vm_start;
+	v->cpt_end = vma->vm_end;
+	v->cpt_flags = vma->vm_flags;
+	if (vma->vm_flags&VM_HUGETLB) {
+		eprintk_ctx("huge TLB VMAs are still not supported\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_pgprot = vma->vm_page_prot.pgprot;
+	v->cpt_pgoff = vma->vm_pgoff;
+	v->cpt_file = CPT_NULL;
+#ifndef CONFIG_IA64
+	if ((void *)vma->vm_start == vma->vm_mm->context.vdso &&
+			vma->vm_ops == &vsyscall_vm_ops)
+		v->cpt_type = CPT_VMA_VDSO;
+	else
+#endif
+		v->cpt_type = CPT_VMA_TYPE_0;
+	v->cpt_anonvma = 0;
+
+	/* We have to remember what VMAs are bound to one anon_vma.
+	 * So, we store an identifier of group of VMAs. It is handy
+	 * to use absolute address of anon_vma as this identifier. */
+	v->cpt_anonvmaid = (unsigned long)vma->anon_vma;
+
+	if (vma->vm_file) {
+		struct file *filp;
+		cpt_object_t *obj = lookup_cpt_object(CPT_OBJ_FILE, vma->vm_file, ctx);
+		if (obj == NULL) BUG();
+		filp = obj->o_obj;
+		if (filp->f_op &&
+		    filp->f_op->read == NULL &&
+		    filp->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_TMPFS)
+			v->cpt_type = CPT_VMA_TYPE_SHM;
+		v->cpt_file = obj->o_pos;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	if (v->cpt_type == CPT_VMA_VDSO)
+		goto out;
+
+	pa.type = PD_ABSENT;
+	pa.pgoff = vma->vm_pgoff;
+	pa.mm = CPT_NULL;
+	pa.start = vma->vm_start;
+	pa.end = vma->vm_start;
+
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+		struct page_desc pd;
+
+		page_get_desc(mmobj, vma, addr, &pd, ctx);
+		cloned_pages += pd.shared;
+
+		if (pd.type == PD_FUNKEY) {
+			eprintk_ctx("dump_one_vma: funkey page\n");
+			return -EINVAL;
+		}
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+		if (pd.type == PD_LAZY &&
+		    (ctx->lazy_vm == 0 || (vma->vm_flags&VM_LOCKED)))
+			pd.type = PD_COPY;
+#else
+		if (pd.type == PD_LAZY)
+			pd.type = PD_COPY;
+#endif
+
+		if (!can_expand(&pa, &pd)) {
+			if (pa.type == PD_COPY ||
+			    pa.type == PD_ZERO) {
+				pgb.cpt_start = pa.start;
+				pgb.cpt_end = pa.end;
+				dump_page_block(vma, &pgb, pa.type, ctx);
+			} else if (pa.type == PD_CLONE) {
+				dump_copypage_block(vma, &pa, ctx);
+				cloned_pages++;
+			} else if (pa.type == PD_LAZY) {
+				dump_lazypage_block(vma, &pa, ctx);
+			} else if (pa.type == PD_ITER || pa.type == PD_ITERYOUNG) {
+				dump_iterpage_block(vma, &pa, ctx);
+				cloned_pages++;
+			} else if (pa.type == PD_ABSENT &&
+				   pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
+				dump_remappage_block(vma, &pa, ctx);
+			}
+			pa.start = addr;
+		}
+		pa.type = pd.type;
+		pa.end = addr + PAGE_SIZE;
+		pa.pgoff = pd.index;
+		if (addr == pa.start)
+			pa.list[0] = pd.index;
+		pa.mm = pd.mm;
+	}
+
+	if (pa.end > pa.start) {
+		if (pa.type == PD_COPY ||
+		    pa.type == PD_ZERO) {
+			pgb.cpt_start = pa.start;
+			pgb.cpt_end = pa.end;
+			dump_page_block(vma, &pgb, pa.type, ctx);
+		} else if (pa.type == PD_CLONE) {
+			dump_copypage_block(vma, &pa, ctx);
+			cloned_pages++;
+		} else if (pa.type == PD_LAZY) {
+			dump_lazypage_block(vma, &pa, ctx);
+		} else if (pa.type == PD_ITER || pa.type == PD_ITERYOUNG) {
+			dump_iterpage_block(vma, &pa, ctx);
+			cloned_pages++;
+		} else if (pa.type == PD_ABSENT &&
+			   pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
+			dump_remappage_block(vma, &pa, ctx);
+		}
+	}
+
+	if (cloned_pages) {
+		__u32 anonvma = 1;
+		loff_t anonpos = ctx->current_object + offsetof(struct cpt_vma_image, cpt_anonvma);
+		ctx->pwrite(&anonvma, 4, ctx, anonpos);
+	}
+
+out:
+	cpt_close_object(ctx);
+
+	cpt_pop_object(&saved_object, ctx);
+
+	return 0;
+}
+
+static int dump_one_aio_ctx(struct mm_struct *mm, struct kioctx *aio_ctx,
+			    cpt_context_t *ctx)
+{
+	loff_t saved_object;
+	struct cpt_aio_ctx_image aimg;
+
+	if (!list_empty(&aio_ctx->run_list) ||
+	    !list_empty(&aio_ctx->active_reqs) ||
+	    aio_ctx->reqs_active) {
+		eprintk_ctx("AIO is active after suspend\n");
+		return -EBUSY;
+	}
+
+	cpt_push_object(&saved_object, ctx);
+
+	aimg.cpt_next = CPT_ALIGN(sizeof(aimg));
+	aimg.cpt_object = CPT_OBJ_AIO_CONTEXT;
+	aimg.cpt_hdrlen = sizeof(aimg);
+	aimg.cpt_content = CPT_CONTENT_ARRAY;
+
+	aimg.cpt_max_reqs = aio_ctx->max_reqs;
+	aimg.cpt_ring_pages = aio_ctx->ring_info.nr_pages;
+	aimg.cpt_nr = aio_ctx->ring_info.nr;
+	aimg.cpt_tail = aio_ctx->ring_info.tail;
+	aimg.cpt_mmap_base = aio_ctx->ring_info.mmap_base;
+
+	ctx->write(&aimg, sizeof(aimg), ctx);
+
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+static int dump_one_mm(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = obj->o_obj;
+	struct vm_area_struct *vma;
+	struct cpt_mm_image *v = cpt_get_buf(ctx);
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = -1;
+	v->cpt_object = CPT_OBJ_MM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_start_code = mm->start_code;
+	v->cpt_end_code = mm->end_code;
+	v->cpt_start_data = mm->start_data;
+	v->cpt_end_data = mm->end_data;
+	v->cpt_start_brk = mm->start_brk;
+	v->cpt_brk = mm->brk;
+	v->cpt_start_stack = mm->start_stack;
+	v->cpt_start_arg = mm->arg_start;
+	v->cpt_end_arg = mm->arg_end;
+	v->cpt_start_env = mm->env_start;
+	v->cpt_end_env = mm->env_end;
+	v->cpt_def_flags = mm->def_flags;
+#ifdef CONFIG_USER_RESOURCE
+	v->cpt_mmub = cpt_lookup_ubc(mm->mm_ub, ctx);
+#endif
+	v->cpt_dumpable = mm->dumpable;
+	v->cpt_vps_dumpable = mm->vps_dumpable;
+	v->cpt_used_hugetlb = 0; /* not used */
+#ifndef CONFIG_IA64
+	v->cpt_vdso = (__u32)(unsigned long)mm->context.vdso;
+#endif
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+#ifdef CONFIG_X86
+	if (mm->context.size) {
+		loff_t saved_object;
+		struct cpt_obj_bits b;
+		int size, i;
+
+		dprintk_ctx("nontrivial LDT\n");
+
+		cpt_push_object(&saved_object, ctx);
+
+		cpt_open_object(NULL, ctx);
+		b.cpt_next = CPT_NULL;
+		b.cpt_object = CPT_OBJ_BITS;
+		b.cpt_hdrlen = sizeof(b);
+		b.cpt_content = CPT_CONTENT_MM_CONTEXT;
+		b.cpt_size = mm->context.size*LDT_ENTRY_SIZE;
+
+		ctx->write(&b, sizeof(b), ctx);
+
+		size = mm->context.size*LDT_ENTRY_SIZE;
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_XEN) || \
+			LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19)
+		ctx->write(mm->context.ldt, size, ctx);
+#else
+		for (i = 0; i < size; i += PAGE_SIZE) {
+			int nr = i / PAGE_SIZE, bytes;
+			char *kaddr = kmap(mm->context.ldt_pages[nr]);
+
+			bytes = size - i;
+			if (bytes > PAGE_SIZE)
+				bytes = PAGE_SIZE;
+			ctx->write(kaddr, bytes, ctx);
+			kunmap(mm->context.ldt_pages[nr]);
+		}
+#endif
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_object, ctx);
+	}
+#endif
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		int err;
+
+		if ((err = dump_one_vma(obj, vma, ctx)) != 0)
+			return err;
+	}
+
+	if (mm->ioctx_list) {
+		struct kioctx *aio_ctx;
+		int err;
+
+		for (aio_ctx = mm->ioctx_list; aio_ctx; aio_ctx = aio_ctx->next)
+			if ((err = dump_one_aio_ctx(mm, aio_ctx, ctx)) != 0)
+				return err;
+	}
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+int cpt_dump_vm(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	scnt = scnt0 = zcnt = 0;
+
+	cpt_open_section(ctx, CPT_SECT_MM);
+
+	for_each_object(obj, CPT_OBJ_MM) {
+		int err;
+
+		if ((err = dump_one_mm(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+
+	if (scnt)
+		dprintk_ctx("cpt_dump_vm: %d shared private anon pages\n", scnt);
+	if (scnt0)
+		dprintk_ctx("cpt_dump_vm: %d anon pages are cloned\n", scnt0);
+	if (zcnt)
+		dprintk_ctx("cpt_dump_vm: %d silly pages canceled\n", zcnt);
+	return 0;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_mm.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_mm.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_mm.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_mm.h	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,48 @@
+int cpt_collect_mm(cpt_context_t *);
+
+int cpt_dump_vm(struct cpt_context *ctx);
+
+__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int cpt_mm_prepare(unsigned long veid);
+
+int cpt_free_pgin_dir(struct cpt_context *);
+int cpt_start_pagein(struct cpt_context *);
+int rst_setup_pagein(struct cpt_context *);
+int rst_complete_pagein(struct cpt_context *, int);
+int rst_pageind(struct cpt_context *);
+int cpt_iteration(cpt_context_t *ctx);
+int rst_iteration(cpt_context_t *ctx);
+void rst_drop_iter_dir(cpt_context_t *ctx);
+int rst_iter(struct vm_area_struct *vma, u64 pfn,
+	     unsigned long addr, cpt_context_t * ctx);
+int rst_iter_chunk(struct file *file, loff_t pos,
+		   struct cpt_page_block * pgb,
+		   cpt_context_t *ctx);
+
+int rst_swapoff(struct cpt_context *);
+
+int cpt_check_page(struct vm_area_struct *vma, unsigned long address,
+		   struct page *page, int wrprot);
+int cpt_verify_wrprot(struct page * page, cpt_context_t * ctx);
+
+
+#ifdef CONFIG_X86_64
+extern void *syscall32_page;
+/* currently special_mapping_vmops are used by vdso map only */
+extern struct vm_operations_struct special_mapping_vmops;
+#define vsyscall_addr syscall32_page
+#define vsyscall_vm_ops special_mapping_vmops
+#define CPT_SYSENTER_RETURN VSYSCALL32_SYSEXIT
+#elif defined(CONFIG_X86_32)
+extern void *syscall_page;
+/* currently special_mapping_vmops are used by vdso map only */
+extern struct vm_operations_struct special_mapping_vmops;
+extern void SYSENTER_RETURN;
+#define vsyscall_addr syscall_page
+#define vsyscall_vm_ops special_mapping_vmops
+#define CPT_SYSENTER_RETURN (current->mm->context.vdso + \
+		(unsigned long)&SYSENTER_RETURN)
+#endif
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_net.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_net.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_net.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_net.c	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,637 @@
+/*
+ *
+ *  kernel/cpt/cpt_net.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/rtnetlink.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+#include <linux/nfcalls.h>
+#include <linux/ip.h>
+#include <linux/cpt_exports.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+#include "cpt_syscalls.h"
+
+static void cpt_dump_netstats(struct net_device *dev, struct cpt_context * ctx)
+{
+	struct cpt_netstats_image *n;
+	struct net_device_stats *stats;
+
+	if (!dev->get_stats)
+		return;
+
+	n = cpt_get_buf(ctx);
+	stats = dev->get_stats(dev);
+	cpt_open_object(NULL, ctx);
+
+	n->cpt_next = CPT_NULL;
+	n->cpt_object = CPT_OBJ_NET_STATS;
+	n->cpt_hdrlen = sizeof(*n);
+	n->cpt_content = CPT_CONTENT_VOID;
+
+	n->cpt_rx_packets = stats->rx_packets;
+	n->cpt_tx_packets = stats->tx_packets;
+	n->cpt_rx_bytes = stats->rx_bytes;
+	n->cpt_tx_bytes = stats->tx_bytes;
+	n->cpt_rx_errors = stats->rx_errors;
+	n->cpt_tx_errors = stats->tx_errors;
+	n->cpt_rx_dropped = stats->rx_dropped;
+	n->cpt_tx_dropped = stats->tx_dropped;
+	n->cpt_multicast = stats->multicast;
+	n->cpt_collisions = stats->collisions;
+	n->cpt_rx_length_errors = stats->rx_length_errors;
+	n->cpt_rx_over_errors = stats->rx_over_errors;
+	n->cpt_rx_crc_errors = stats->rx_crc_errors;
+	n->cpt_rx_frame_errors = stats->rx_frame_errors;
+	n->cpt_rx_fifo_errors = stats->rx_fifo_errors;
+	n->cpt_rx_missed_errors = stats->rx_missed_errors;
+	n->cpt_tx_aborted_errors = stats->tx_aborted_errors;
+	n->cpt_tx_carrier_errors = stats->tx_carrier_errors;
+	n->cpt_tx_fifo_errors = stats->tx_fifo_errors;
+	n->cpt_tx_heartbeat_errors = stats->tx_heartbeat_errors;
+	n->cpt_tx_window_errors = stats->tx_window_errors;
+	n->cpt_rx_compressed = stats->rx_compressed;
+	n->cpt_tx_compressed = stats->tx_compressed;
+
+	ctx->write(n, sizeof(*n), ctx);
+	cpt_close_object(ctx);
+	cpt_release_buf(ctx);
+	return;
+}
+
+int cpt_dump_link(struct cpt_context * ctx)
+{
+	struct net_device *dev;
+
+	cpt_open_section(ctx, CPT_SECT_NET_DEVICE);
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct cpt_netdev_image v;
+		struct cpt_hwaddr_image hw;
+		loff_t saved_obj;
+
+		if (dev->cpt_ops == NULL) {
+			eprintk_ctx("unsupported netdevice %s\n", dev->name);
+			cpt_close_section(ctx);
+			return -EBUSY;
+		}
+
+		cpt_open_object(NULL, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_NET_DEVICE;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_ARRAY;
+
+		v.cpt_index = dev->ifindex;
+		v.cpt_flags = dev->flags;
+		memcpy(v.cpt_name, dev->name, IFNAMSIZ);
+		ctx->write(&v, sizeof(v), ctx);
+
+		cpt_push_object(&saved_obj, ctx);
+
+		cpt_open_object(NULL, ctx);
+		dev->cpt_ops->dump(dev, &cpt_ops, ctx);
+		cpt_close_object(ctx);
+
+		/* Dump hardware address */
+		cpt_open_object(NULL, ctx);
+		hw.cpt_next = CPT_NULL;
+		hw.cpt_object = CPT_OBJ_NET_HWADDR;
+		hw.cpt_hdrlen = sizeof(hw);
+		hw.cpt_content = CPT_CONTENT_VOID;
+		BUG_ON(sizeof(hw.cpt_dev_addr) != sizeof(dev->dev_addr));
+		memcpy(hw.cpt_dev_addr, dev->dev_addr, sizeof(hw.cpt_dev_addr));
+		ctx->write(&hw, sizeof(hw), ctx);
+		cpt_close_object(ctx);
+		
+		cpt_dump_netstats(dev, ctx);
+
+		cpt_pop_object(&saved_obj, ctx);
+
+		cpt_close_object(ctx);
+
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_suspend_network(struct cpt_context *ctx)
+{
+	get_exec_env()->disable_net = 1;
+	synchronize_net();
+	return 0;
+}
+
+int cpt_resume_network(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	env->disable_net = 0;
+	put_ve(env);
+	return 0;
+}
+
+int cpt_dump_ifaddr(struct cpt_context * ctx)
+{
+	struct net_device *dev;
+
+	cpt_open_section(ctx, CPT_SECT_NET_IFADDR);
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct in_device *idev = in_dev_get(dev);
+		struct in_ifaddr *ifa;
+
+		if (!idev)
+			continue;
+
+		for (ifa = idev->ifa_list; ifa; ifa = ifa->ifa_next) {
+			struct cpt_ifaddr_image v;
+			cpt_open_object(NULL, ctx);
+
+			v.cpt_next = CPT_NULL;
+			v.cpt_object = CPT_OBJ_NET_IFADDR;
+			v.cpt_hdrlen = sizeof(v);
+			v.cpt_content = CPT_CONTENT_VOID;
+
+			v.cpt_index = dev->ifindex;
+			v.cpt_family = AF_INET;
+			v.cpt_masklen = ifa->ifa_prefixlen;
+			v.cpt_flags = ifa->ifa_flags;
+			v.cpt_scope = ifa->ifa_scope;
+			memset(&v.cpt_address, 0, sizeof(v.cpt_address));
+			memset(&v.cpt_peer, 0, sizeof(v.cpt_peer));
+			memset(&v.cpt_broadcast, 0, sizeof(v.cpt_broadcast));
+			v.cpt_address[0] = ifa->ifa_local;
+			v.cpt_peer[0] = ifa->ifa_address;
+			v.cpt_broadcast[0] = ifa->ifa_broadcast;
+			memcpy(v.cpt_label, ifa->ifa_label, IFNAMSIZ);
+			ctx->write(&v, sizeof(v), ctx);
+			cpt_close_object(ctx);
+		}
+		in_dev_put(idev);
+	}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct inet6_dev *idev = in6_dev_get(dev);
+		struct inet6_ifaddr *ifa;
+
+		if (!idev)
+			continue;
+
+		for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
+			struct cpt_ifaddr_image v;
+
+			if (dev == &loopback_dev &&
+			    ifa->prefix_len == 128 &&
+			    ifa->addr.s6_addr32[0] == 0 &&
+			    ifa->addr.s6_addr32[1] == 0 &&
+			    ifa->addr.s6_addr32[2] == 0 &&
+			    ifa->addr.s6_addr32[3] == htonl(1))
+				continue;
+
+			cpt_open_object(NULL, ctx);
+
+			v.cpt_next = CPT_NULL;
+			v.cpt_object = CPT_OBJ_NET_IFADDR;
+			v.cpt_hdrlen = sizeof(v);
+			v.cpt_content = CPT_CONTENT_VOID;
+
+			v.cpt_index = dev->ifindex;
+			v.cpt_family = AF_INET6;
+			v.cpt_masklen = ifa->prefix_len;
+			v.cpt_flags = ifa->flags;
+			v.cpt_scope = ifa->scope;
+			v.cpt_valid_lft = ifa->valid_lft;
+			v.cpt_prefered_lft = ifa->prefered_lft;
+			memcpy(&v.cpt_address, &ifa->addr, 16);
+			memcpy(&v.cpt_peer, &ifa->addr, 16);
+			memset(&v.cpt_broadcast, 0, sizeof(v.cpt_broadcast));
+			memcpy(v.cpt_label, dev->name, IFNAMSIZ);
+			ctx->write(&v, sizeof(v), ctx);
+			cpt_close_object(ctx);
+		}
+		__in6_dev_put(idev);
+	}
+#endif
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int cpt_dump_route(struct cpt_context * ctx)
+{
+	int err;
+	struct socket *sock;
+	struct msghdr msg;
+	struct iovec iov;
+	struct {
+		struct nlmsghdr nlh;
+		struct rtgenmsg g;
+	} req;
+	struct sockaddr_nl nladdr;
+	struct cpt_object_hdr v;
+	mm_segment_t oldfs;
+	char *pg;
+
+	err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
+	if (err)
+		return err;
+
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family = AF_NETLINK;
+
+	req.nlh.nlmsg_len = sizeof(req);
+	req.nlh.nlmsg_type = RTM_GETROUTE;
+	req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
+	req.nlh.nlmsg_pid = 0;
+	req.g.rtgen_family = AF_INET;
+
+	iov.iov_base=&req;
+	iov.iov_len=sizeof(req);
+	msg.msg_name=&nladdr;
+	msg.msg_namelen=sizeof(nladdr);
+	msg.msg_iov=&iov;
+	msg.msg_iovlen=1;
+	msg.msg_control=NULL;
+	msg.msg_controllen=0;
+	msg.msg_flags=MSG_DONTWAIT;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = sock_sendmsg(sock, &msg, sizeof(req));
+	set_fs(oldfs);
+
+	if (err < 0)
+		goto out_sock;
+
+	pg = (char*)__get_free_page(GFP_KERNEL);
+	if (pg == NULL) {
+		err = -ENOMEM;
+		goto out_sock;
+	}
+
+	cpt_open_section(ctx, CPT_SECT_NET_ROUTE);
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_ROUTE;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NLMARRAY;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+restart:
+#endif
+	for (;;) {
+		struct nlmsghdr *h;
+
+		iov.iov_base = pg;
+		iov.iov_len = PAGE_SIZE;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
+		set_fs(oldfs);
+
+		if (err < 0)
+			goto out_sock_pg;
+		if (msg.msg_flags & MSG_TRUNC) {
+			err = -ENOBUFS;
+			goto out_sock_pg;
+		}
+
+		h = (struct nlmsghdr*)pg;
+		while (NLMSG_OK(h, err)) {
+			if (h->nlmsg_type == NLMSG_DONE) {
+				err = 0;
+				goto done;
+			}
+			if (h->nlmsg_type == NLMSG_ERROR) {
+				struct nlmsgerr *errm = (struct nlmsgerr*)NLMSG_DATA(h);
+				err = errm->error;
+				eprintk_ctx("NLMSG error: %d\n", errm->error);
+				goto done;
+			}
+			if (h->nlmsg_type != RTM_NEWROUTE) {
+				eprintk_ctx("NLMSG: %d\n", h->nlmsg_type);
+				err = -EINVAL;
+				goto done;
+			}
+			ctx->write(h, NLMSG_ALIGN(h->nlmsg_len), ctx);
+			h = NLMSG_NEXT(h, err);
+		}
+		if (err) {
+			eprintk_ctx("!!!Remnant of size %d %d %d\n", err, h->nlmsg_len, h->nlmsg_type);
+			err = -EINVAL;
+			break;
+		}
+	}
+done:
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	if (!err && req.g.rtgen_family == AF_INET) {
+		req.g.rtgen_family = AF_INET6;
+		iov.iov_base=&req;
+		iov.iov_len=sizeof(req);
+		msg.msg_name=&nladdr;
+		msg.msg_namelen=sizeof(nladdr);
+		msg.msg_iov=&iov;
+		msg.msg_iovlen=1;
+		msg.msg_control=NULL;
+		msg.msg_controllen=0;
+		msg.msg_flags=MSG_DONTWAIT;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_sendmsg(sock, &msg, sizeof(req));
+		set_fs(oldfs);
+
+		if (err > 0)
+			goto restart;
+	}
+#endif
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	cpt_close_section(ctx);
+
+out_sock_pg:
+	free_page((unsigned long)pg);
+out_sock:
+	sock_release(sock);
+	return err;
+}
+
+struct args_t
+{
+	int* pfd;
+	envid_t veid;
+};
+
+static int dumpfn(void *arg)
+{
+	int i;
+	struct args_t *args = arg;
+	int *pfd = args->pfd;
+	char *argv[] = { "iptables-save", "-c", NULL };
+
+	i = real_env_create(args->veid, VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
+	if (i < 0) {
+		eprintk("cannot enter ve to dump iptables\n");
+		module_put(THIS_MODULE);
+		return 255 << 8;
+	}
+
+	if (pfd[1] != 1)
+		sc_dup2(pfd[1], 1);
+
+	for (i=0; i<current->files->fdt->max_fds; i++) {
+		if (i != 1)
+			sc_close(i);
+	}
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/sbin/iptables-save", argv, NULL);
+	if (i == -ENOENT)
+		i = sc_execve("/usr/sbin/iptables-save", argv, NULL);
+	eprintk("failed to exec iptables-save: %d\n", i);
+	return 255 << 8;
+}
+
+
+static int cpt_dump_iptables(struct cpt_context * ctx)
+{
+	int err = 0;
+#ifdef CONFIG_VE_IPTABLES
+	int pid;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	char buf[16];
+	loff_t pos;
+	int n;
+	int status;
+	mm_segment_t oldfs;
+	sigset_t ignore, blocked;
+	struct args_t args;
+	struct ve_struct *oldenv;
+
+	if (!(get_exec_env()->_iptables_modules & VE_IP_IPTABLES_MOD))
+		return 0;
+
+	err = sc_pipe(pfd);
+	if (err < 0) {
+		eprintk_ctx("sc_pipe: %d\n", err);
+		return err;
+	}
+	args.pfd = pfd;
+	args.veid = VEID(get_exec_env());
+	ignore.sig[0] = CPT_SIG_IGNORE_MASK;
+	sigprocmask(SIG_BLOCK, &ignore, &blocked);
+	oldenv = set_exec_env(get_ve0());
+	err = pid = local_kernel_thread(dumpfn, (void*)&args,
+			SIGCHLD | CLONE_VFORK, 0);
+	set_exec_env(oldenv);
+	if (err < 0) {
+		eprintk_ctx("local_kernel_thread: %d\n", err);
+		goto out;
+	}
+	f = fget(pfd[0]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	cpt_open_section(ctx, CPT_SECT_NET_IPTABLES);
+
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NAME;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	pos = ctx->file->f_pos;
+	do {
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
+		set_fs(oldfs);
+		if (n > 0)
+			ctx->write(buf, n, ctx);
+	} while (n > 0);
+
+	if (n < 0)
+		eprintk_ctx("read: %d\n", n);
+
+	fput(f);
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if ((err = sc_waitx(pid, 0, &status)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+	else if ((status & 0x7f) == 0) {
+		err = (status & 0xff00) >> 8;
+		if (err != 0) {
+			eprintk_ctx("iptables-save exited with %d\n", err);
+			err = -EINVAL;
+		}
+	} else {
+		eprintk_ctx("iptables-save terminated\n");
+		err = -EINVAL;
+	}
+	set_fs(oldfs);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+
+	if (ctx->file->f_pos != pos) {
+		buf[0] = 0;
+		ctx->write(buf, 1, ctx);
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_close_section(ctx);
+	} else {
+		pos = ctx->current_section;
+		cpt_close_object(ctx);
+		cpt_close_section(ctx);
+		ctx->sections[CPT_SECT_NET_IPTABLES] = CPT_NULL;
+		ctx->file->f_pos = pos;
+	}
+	return n ? : err;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+#endif
+	return err;
+}
+
+static unsigned long fold_field(void *mib[], int offt)
+{
+	unsigned long res = 0;
+	int i;
+
+	for_each_possible_cpu(i) {
+		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
+		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
+	}
+	return res;
+}
+
+static void cpt_dump_snmp_stat(struct cpt_context *ctx, void *mib[], int n)
+{
+	int i;
+	struct cpt_object_hdr o;
+	__u32 *stats;
+
+	stats = cpt_get_buf(ctx);
+
+	cpt_open_object(NULL, ctx);
+
+	for (i = 0; i < n; i++)
+		stats[i] = fold_field(mib, i);
+
+ 	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_BITS;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_DATA;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(stats, n * sizeof(*stats), ctx);
+	ctx->align(ctx);
+
+	cpt_close_object(ctx);
+
+	cpt_release_buf(ctx);
+}
+
+static void cpt_dump_snmp_stub(struct cpt_context *ctx)
+{
+	struct cpt_object_hdr o;
+
+	cpt_open_object(NULL, ctx);
+ 	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_BITS;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_VOID;
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+}
+
+static int cpt_dump_snmp(struct cpt_context *ctx)
+{
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+
+	cpt_open_section(ctx, CPT_SECT_SNMP_STATS);
+
+	cpt_dump_snmp_stat(ctx, (void **)&ve->_net_statistics,
+				LINUX_MIB_MAX);
+	cpt_dump_snmp_stat(ctx, (void **)&ve->_ip_statistics,
+				IPSTATS_MIB_MAX);
+	cpt_dump_snmp_stat(ctx, (void **)&ve->_tcp_statistics,
+				TCP_MIB_MAX);
+	cpt_dump_snmp_stat(ctx, (void **)&ve->_udp_statistics,
+				UDP_MIB_MAX);
+	cpt_dump_snmp_stat(ctx, (void **)&ve->_icmp_statistics,
+				ICMP_MIB_MAX);
+	cpt_dump_snmp_stat(ctx, (void **)&ve->_icmpmsg_statistics,
+				ICMPMSG_MIB_MAX);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	cpt_dump_snmp_stat(ctx, (void **)&ve->_ipv6_statistics,
+				IPSTATS_MIB_MAX);
+	cpt_dump_snmp_stat(ctx, (void **)&ve->_udp_stats_in6,
+				UDP_MIB_MAX);
+	cpt_dump_snmp_stat(ctx, (void **)&ve->_icmpv6_statistics,
+				ICMP6_MIB_MAX);
+#else
+	cpt_dump_snmp_stub(ctx);
+	cpt_dump_snmp_stub(ctx);
+	cpt_dump_snmp_stub(ctx);
+#endif
+	cpt_close_section(ctx);
+
+	return 0;
+}
+
+int cpt_dump_ifinfo(struct cpt_context * ctx)
+{
+	int err;
+
+	rtnl_lock();
+	err = cpt_dump_link(ctx);
+	if (!err)
+		err = cpt_dump_ifaddr(ctx);
+	rtnl_unlock();
+	if (!err)
+		err = cpt_dump_route(ctx);
+	if (!err)
+		err = cpt_dump_iptables(ctx);
+	if (!err)
+		err = cpt_dump_snmp(ctx);
+	return err;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_net.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_net.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_net.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_net.h	2017-01-13 08:40:25.000000000 -0500
@@ -0,0 +1,7 @@
+int cpt_dump_ifinfo(struct cpt_context *ctx);
+int rst_restore_net(struct cpt_context *ctx);
+int cpt_suspend_network(struct cpt_context *ctx);
+int cpt_resume_network(struct cpt_context *ctx);
+int rst_resume_network(struct cpt_context *ctx);
+int cpt_dump_ip_conntrack(struct cpt_context *ctx);
+int rst_restore_ip_conntrack(struct cpt_context * ctx);
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_obj.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_obj.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_obj.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_obj.c	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,162 @@
+/*
+ *
+ *  kernel/cpt/cpt_obj.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = kmalloc(sizeof(cpt_object_t), gfp);
+	if (obj) {
+		INIT_LIST_HEAD(&obj->o_list);
+		INIT_LIST_HEAD(&obj->o_hash);
+		obj->o_count = 1;
+		obj->o_pos = CPT_NULL;
+		obj->o_lock = 0;
+		obj->o_parent = NULL;
+		obj->o_index = CPT_NOINDEX;
+		obj->o_obj = NULL;
+		obj->o_image = NULL;
+		obj->o_flags = 0;
+		ctx->objcount++;
+	}
+	return obj;
+}
+
+void free_cpt_object(cpt_object_t *obj, cpt_context_t *ctx)
+{
+	kfree(obj);
+	ctx->objcount--;
+}
+
+void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_context_t *ctx)
+{
+	list_add_tail(&obj->o_list, &ctx->object_array[type]);
+}
+
+void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj,
+			cpt_object_t *head, cpt_context_t *ctx)
+{
+	list_add(&obj->o_list, &head->o_list);
+}
+
+cpt_object_t * __cpt_object_add(enum _cpt_object_type type, void *p,
+		unsigned gfp_mask, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(type, p, ctx);
+
+	if (obj) {
+		obj->o_count++;
+		return obj;
+	}
+
+	if ((obj = alloc_cpt_object(gfp_mask, ctx)) != NULL) {
+		if (p)
+			cpt_obj_setobj(obj, p, ctx);
+		intern_cpt_object(type, obj, ctx);
+		return obj;
+	}
+	return NULL;
+}
+
+cpt_object_t * cpt_object_add(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
+{
+	return __cpt_object_add(type, p, GFP_KERNEL, ctx);
+}
+
+cpt_object_t * cpt_object_get(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(type, p, ctx);
+
+	if (obj)
+		obj->o_count++;
+
+	return obj;
+}
+
+int cpt_object_init(cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<CPT_OBJ_MAX; i++) {
+		INIT_LIST_HEAD(&ctx->object_array[i]);
+	}
+	return 0;
+}
+
+int cpt_object_destroy(cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<CPT_OBJ_MAX; i++) {
+		while (!list_empty(&ctx->object_array[i])) {
+			struct list_head *head = ctx->object_array[i].next;
+			cpt_object_t *obj = list_entry(head, cpt_object_t, o_list);
+			list_del(head);
+			if (obj->o_image)
+				kfree(obj->o_image);
+			free_cpt_object(obj, ctx);
+		}
+	}
+	if (ctx->objcount != 0)
+		eprintk_ctx("BUG: ctx->objcount=%d\n", ctx->objcount);
+	return 0;
+}
+
+cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_obj == p)
+			return obj;
+	}
+	return NULL;
+}
+
+cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_pos == pos)
+			return obj;
+	}
+	return NULL;
+}
+
+cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_index == index)
+			return obj;
+	}
+	return NULL;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_obj.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_obj.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_obj.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_obj.h	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,69 @@
+#ifndef __CPT_OBJ_H_
+#define __CPT_OBJ_H_ 1
+
+#undef ITER_DEBUG
+
+#include <linux/list.h>
+#include <linux/cpt_image.h>
+
+typedef struct _cpt_object
+{
+	struct list_head	o_list;
+	struct list_head	o_hash;
+	int			o_count;
+	int			o_index;
+	int			o_lock;
+	loff_t			o_pos;
+	loff_t			o_ppos;
+	void			*o_obj;
+	void			*o_image;
+	void			*o_parent;
+	unsigned int		o_flags;
+#define CPT_INODE_HARDLINKED	0x1
+#define CPT_VFSMOUNT_DELAYFS	0x1
+#define CPT_FILE_DELAYFS	0x1
+#define CPT_FILE_SILLYRENAME	0x2
+} cpt_object_t;
+
+struct cpt_context;
+
+#define for_each_object(obj, type) list_for_each_entry(obj, &ctx->object_array[type], o_list)
+#define for_each_object_safe(obj, nobj, type) list_for_each_entry_safe(obj, nobj, &ctx->object_array[type], o_list)
+
+
+extern cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx);
+extern void free_cpt_object(cpt_object_t *obj, struct cpt_context *ctx);
+
+cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx);
+cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx);
+
+static inline void cpt_obj_setpos(cpt_object_t *cpt, loff_t pos, struct cpt_context *ctx)
+{
+	cpt->o_pos = pos;
+	/* Add to pos hash table */
+}
+
+static inline void cpt_obj_setobj(cpt_object_t *cpt, void *ptr, struct cpt_context *ctx)
+{
+	cpt->o_obj = ptr;
+	/* Add to hash table */
+}
+
+static inline void cpt_obj_setindex(cpt_object_t *cpt, __u32 index, struct cpt_context *ctx)
+{
+	cpt->o_index = index;
+	/* Add to index hash table */
+}
+
+
+extern void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, struct cpt_context *ctx);
+extern void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_object_t *head, struct cpt_context *ctx);
+extern cpt_object_t *cpt_object_add(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+extern cpt_object_t *__cpt_object_add(enum _cpt_object_type type, void *p, unsigned int gfp_mask, struct cpt_context *ctx);
+extern cpt_object_t *cpt_object_get(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+
+extern int cpt_object_init(struct cpt_context *ctx);
+extern int cpt_object_destroy(struct cpt_context *ctx);
+
+#endif /* __CPT_OBJ_H_ */
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_proc.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_proc.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_proc.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_proc.c	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,643 @@
+/*
+ *
+ *  kernel/cpt/cpt_proc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_ioctl.h>
+#include <linux/delay.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+
+MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
+MODULE_LICENSE("GPL");
+
+/* List of contexts and lock protecting the list */
+static struct list_head cpt_context_list;
+static spinlock_t cpt_context_lock;
+
+static int proc_read(char *buffer, char **start, off_t offset,
+		     int length, int *eof, void *data)
+{
+	off_t pos = 0;
+	off_t begin = 0;
+	int len = 0;
+	cpt_context_t *ctx;
+
+	len += sprintf(buffer, "Ctx      Id       VE       State\n");
+
+	spin_lock(&cpt_context_lock);
+
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		len += sprintf(buffer+len,"%p %08x %-8u %d",
+			       ctx,
+			       ctx->contextid,
+			       ctx->ve_id,
+			       ctx->ctx_state
+			       );
+
+		buffer[len++] = '\n';
+
+		pos = begin+len;
+		if (pos < offset) {
+			len = 0;
+			begin = pos;
+		}
+		if (pos > offset+length)
+			goto done;
+	}
+	*eof = 1;
+
+done:
+	spin_unlock(&cpt_context_lock);
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+	if(len > length)
+		len = length;
+	if(len < 0)
+		len = 0;
+	return len;
+}
+
+void cpt_context_release(cpt_context_t *ctx)
+{
+	int i;
+
+	list_del(&ctx->ctx_list);
+	spin_unlock(&cpt_context_lock);
+
+	if (ctx->ctx_state > 0)
+		cpt_resume(ctx);
+	ctx->ctx_state = CPT_CTX_ERROR;
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pgin_task)
+		put_task_struct(ctx->pgin_task);
+	if (ctx->pgin_dir)
+		cpt_free_pgin_dir(ctx);
+	if (ctx->pagein_file_out)
+		fput(ctx->pagein_file_out);
+	if (ctx->pagein_file_in)
+		fput(ctx->pagein_file_in);
+#endif
+	if (ctx->objcount)
+		eprintk_ctx("%d objects leaked\n", ctx->objcount);
+	if (ctx->file)
+		fput(ctx->file);
+	cpt_flush_error(ctx);
+	if (ctx->errorfile) {
+		fput(ctx->errorfile);
+		ctx->errorfile = NULL;
+	}
+	for (i = 0; i < ctx->linkdirs_num; i++)
+		fput(ctx->linkdirs[i]);
+	if (ctx->error_msg) {
+		free_page((unsigned long)ctx->error_msg);
+		ctx->error_msg = NULL;
+	}
+	if (ctx->statusfile)
+		fput(ctx->statusfile);
+	if (ctx->lockfile)
+		fput(ctx->lockfile);
+	kfree(ctx);
+
+	spin_lock(&cpt_context_lock);
+}
+
+static void __cpt_context_put(cpt_context_t *ctx)
+{
+	if (!--ctx->refcount)
+		cpt_context_release(ctx);
+}
+
+static void cpt_context_put(cpt_context_t *ctx)
+{
+	spin_lock(&cpt_context_lock);
+	__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+}
+
+cpt_context_t * cpt_context_open(void)
+{
+	cpt_context_t *ctx;
+
+	if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
+		cpt_context_init(ctx);
+		spin_lock(&cpt_context_lock);
+		list_add_tail(&ctx->ctx_list, &cpt_context_list);
+		spin_unlock(&cpt_context_lock);
+		ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
+		if (ctx->error_msg != NULL)
+			ctx->error_msg[0] = 0;
+	}
+	return ctx;
+}
+
+static cpt_context_t * cpt_context_lookup(unsigned int contextid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->contextid == contextid) {
+			ctx->refcount++;
+			spin_unlock(&cpt_context_lock);
+			return ctx;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return NULL;
+}
+
+int cpt_context_lookup_veid(unsigned int veid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->ve_id == veid && ctx->ctx_state > 0) {
+			spin_unlock(&cpt_context_lock);
+			return 1;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return 0;
+}
+
+static int cpt_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	cpt_context_t *ctx;
+	struct file *dfile = NULL;
+	int try;
+
+	unlock_kernel();
+
+	if (cmd == CPT_VMPREP) {
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+		err = cpt_mm_prepare(arg);
+#else
+		err = -EINVAL;
+#endif
+		goto out_lock;
+	}
+
+	if (cmd == CPT_TEST_CAPS) {
+		unsigned int src_flags, dst_flags = arg;
+
+		err = 0;
+		src_flags = test_cpu_caps_and_features();
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
+		goto out_lock;
+	}
+
+	if (cmd == CPT_TEST_VERSION) {
+		err = CPT_CURRENT_VERSION;
+		goto out_lock;
+	}
+
+	if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
+		cpt_context_t *old_ctx;
+
+		ctx = NULL;
+		if (cmd == CPT_JOIN_CONTEXT) {
+			err = -ENOENT;
+			ctx = cpt_context_lookup(arg);
+			if (!ctx)
+				goto out_lock;
+		}
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		file->private_data = ctx;
+
+		if (old_ctx) {
+			if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
+				old_ctx->sticky = 0;
+				old_ctx->refcount--;
+			}
+			__cpt_context_put(old_ctx);
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_lock;
+	}
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	if (ctx)
+		ctx->refcount++;
+	spin_unlock(&cpt_context_lock);
+
+	if (!ctx) {
+		cpt_context_t *old_ctx;
+
+		err = -ENOMEM;
+		ctx = cpt_context_open();
+		if (!ctx)
+			goto out_lock;
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		if (!old_ctx) {
+			ctx->refcount++;
+			file->private_data = ctx;
+		} else {
+			old_ctx->refcount++;
+		}
+		if (old_ctx) {
+			__cpt_context_put(ctx);
+			ctx = old_ctx;
+		}
+		spin_unlock(&cpt_context_lock);
+	}
+
+	if (cmd == CPT_GET_CONTEXT) {
+		unsigned int contextid = (unsigned int)arg;
+
+		if (ctx->contextid && ctx->contextid != contextid) {
+			err = -EINVAL;
+			goto out_nosem;
+		}
+		if (!ctx->contextid) {
+			cpt_context_t *c1 = cpt_context_lookup(contextid);
+			if (c1) {
+				cpt_context_put(c1);
+				err = -EEXIST;
+				goto out_nosem;
+			}
+			ctx->contextid = contextid;
+		}
+		spin_lock(&cpt_context_lock);
+		if (!ctx->sticky) {
+			ctx->sticky = 1;
+			ctx->refcount++;
+		}
+		spin_unlock(&cpt_context_lock);
+		goto out_nosem;
+	}
+
+	down(&ctx->main_sem);
+
+	err = -EBUSY;
+	if (ctx->ctx_state < 0)
+		goto out;
+
+	err = 0;
+	switch (cmd) {
+	case CPT_SET_DUMPFD:
+		if (ctx->ctx_state == CPT_CTX_DUMPING) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			err = -EBADF;
+			dfile = fget(arg);
+			if (dfile == NULL)
+				break;
+			if (dfile->f_op == NULL ||
+			    dfile->f_op->write == NULL) {
+				fput(dfile);
+				break;
+			}
+			err = 0;
+		}
+		if (ctx->file)
+			fput(ctx->file);
+		ctx->file = dfile;
+		break;
+	case CPT_LINKDIR_ADD:
+		if (ctx->linkdirs_num >= CPT_MAX_LINKDIRS) {
+			err = -EMLINK;
+			break;
+		}
+
+		dfile = fget(arg);
+		if (!dfile) {
+			err = -EBADFD;
+			break;
+		}
+
+		if (!S_ISDIR(dfile->f_dentry->d_inode->i_mode)) {
+			err = -ENOTDIR;
+			fput(dfile);
+			break;
+		}
+
+		ctx->linkdirs[ctx->linkdirs_num++] = dfile;
+		break;
+	case CPT_SET_ERRORFD:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->errorfile)
+			fput(ctx->errorfile);
+		ctx->errorfile = dfile;
+		break;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	case CPT_SET_PAGEINFDIN:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_in)
+			fput(ctx->pagein_file_in);
+		ctx->pagein_file_in = dfile;
+		break;
+	case CPT_SET_PAGEINFDOUT:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_out)
+			fput(ctx->pagein_file_out);
+		ctx->pagein_file_out = dfile;
+		break;
+	case CPT_SET_LAZY:
+		ctx->lazy_vm = arg;
+		break;
+	case CPT_ITER:
+		err = cpt_iteration(ctx);
+		break;
+	case CPT_PAGEIND:
+		err = cpt_start_pagein(ctx);
+		break;
+#endif
+	case CPT_SET_VEID:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->ve_id = arg;
+		break;
+	case CPT_SET_CPU_FLAGS:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->dst_cpu_flags = arg;
+		ctx->src_cpu_flags = test_cpu_caps_and_features();
+		break;
+	case CPT_SUSPEND:
+		if (cpt_context_lookup_veid(ctx->ve_id) ||
+		    ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+
+#ifdef ITER_DEBUG
+		cpt_iteration(ctx);
+#endif
+
+		ctx->ctx_state = CPT_CTX_SUSPENDING;
+		try = 0;
+		do {
+			err = cpt_vps_suspend(ctx);
+			if (err)
+				cpt_resume(ctx);
+			if (err == -EAGAIN)
+				msleep(1000);
+			try++;
+		} while (err == -EAGAIN && try < 3);
+		if (err) {
+			ctx->ctx_state = CPT_CTX_IDLE;
+		} else {
+			ctx->ctx_state = CPT_CTX_SUSPENDED;
+		}
+		break;
+	case CPT_DUMP:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		if (!ctx->file) {
+			err = -EBADF;
+			break;
+		}
+		err = cpt_dump(ctx);
+		break;
+	case CPT_RESUME:
+		if (ctx->ctx_state == CPT_CTX_IDLE) {
+			err = -ENOENT;
+			break;
+		}
+		err = cpt_resume(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_KILL:
+		if (ctx->ctx_state == CPT_CTX_IDLE) {
+			err = -ENOENT;
+			break;
+		}
+		err = cpt_kill(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_TEST_VECAPS:
+	{
+		__u32 dst_flags = arg;
+		__u32 src_flags;
+
+		err = cpt_vps_caps(ctx, &src_flags);
+		if (err)
+			break;
+
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_EMT64, "emt64", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_IA64, "ia64", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SYSCALL, "syscall", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SYSCALL32, "syscall32", err);
+		if (dst_flags & (1 << CPT_SLM_DMPRST)) {
+			eprintk_ctx("SLM is enabled on destination node, but slm_dmprst module is not loaded\n");
+			err = 1;
+		}
+
+		if (src_flags & CPT_UNSUPPORTED_MASK)
+			err = 2;
+
+		if ((dst_flags & (1 << CPT_NO_IPV6)) &&
+				!(src_flags & (1 << CPT_NO_IPV6))) {
+			eprintk_ctx("IPv6 not loaded on destination node\n");
+			err = 1;
+		}
+
+		break;
+	}
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+out:
+	cpt_flush_error(ctx);
+	up(&ctx->main_sem);
+out_nosem:
+	cpt_context_put(ctx);
+out_lock:
+	lock_kernel();
+	if (err == -ERESTARTSYS || err == -ERESTARTNOINTR ||
+	    err == -ERESTARTNOHAND || err == -ERESTART_RESTARTBLOCK)
+		err = -EINTR;
+	return err;
+}
+
+static int cpt_open(struct inode *inode, struct file *file)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int cpt_release(struct inode * inode, struct file * file)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	file->private_data = NULL;
+
+	if (ctx)
+		__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+
+static struct file_operations cpt_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = cpt_open,
+	.release = cpt_release,
+	.ioctl	 = cpt_ioctl,
+};
+
+static struct proc_dir_entry *proc_ent;
+
+static struct ctl_table_header *ctl_header;
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 9475,
+		.procname	= "cpt",
+		.data		= &debug_level,
+		.maxlen		= sizeof(debug_level),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static int __init init_cpt(void)
+{
+	int err;
+
+	err = -ENOMEM;
+	ctl_header = register_sysctl_table(root_table, 0);
+	if (!ctl_header)
+		goto err_mon;
+
+	spin_lock_init(&cpt_context_lock);
+	INIT_LIST_HEAD(&cpt_context_list);
+
+	err = -EINVAL;
+	proc_ent = create_proc_entry_mod("cpt", 0600, NULL, THIS_MODULE);
+	if (!proc_ent)
+		goto err_out;
+
+	cpt_fops.read = proc_ent->proc_fops->read;
+	cpt_fops.write = proc_ent->proc_fops->write;
+	cpt_fops.llseek = proc_ent->proc_fops->llseek;
+	proc_ent->proc_fops = &cpt_fops;
+
+	proc_ent->read_proc = proc_read;
+	proc_ent->data = NULL;
+	proc_ent->owner = THIS_MODULE;
+	return 0;
+
+err_out:
+	unregister_sysctl_table(ctl_header);
+err_mon:
+	return err;
+}
+module_init(init_cpt);
+
+static void __exit exit_cpt(void)
+{
+	remove_proc_entry("cpt", NULL);
+	unregister_sysctl_table(ctl_header);
+
+	spin_lock(&cpt_context_lock);
+	while (!list_empty(&cpt_context_list)) {
+		cpt_context_t *ctx;
+		ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
+
+		if (!ctx->sticky)
+			ctx->refcount++;
+		ctx->sticky = 0;
+
+		BUG_ON(ctx->refcount != 1);
+
+		__cpt_context_put(ctx);
+	}
+	spin_unlock(&cpt_context_lock);
+}
+module_exit(exit_cpt);
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_process.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_process.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_process.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_process.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,1525 @@
+/*
+ *
+ *  kernel/cpt/cpt_process.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/compat.h>
+#include <linux/cpt_image.h>
+#include <linux/ptrace.h>
+#ifdef CONFIG_UTRACE
+#include <linux/utrace.h>
+#endif
+#include <linux/ve_nfs.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_ubc.h"
+#include "cpt_process.h"
+#include "cpt_kernel.h"
+
+#ifdef CONFIG_X86_32
+#undef task_pt_regs
+#define task_pt_regs(t) ((struct pt_regs *)((t)->thread.esp0) - 1)
+#endif
+
+int check_task_state(struct task_struct *tsk, struct cpt_context *ctx)
+{
+#ifdef CONFIG_X86_64
+	struct vm_area_struct *vma;
+	if (!(tsk->thread_info->flags&_TIF_IA32)) {
+		if (task_pt_regs(tsk)->rip >= VSYSCALL_START &&
+				task_pt_regs(tsk)->rip < VSYSCALL_END) {
+			eprintk_ctx(CPT_FID "cannot be checkpointied while vsyscall, try later\n", CPT_TID(tsk));
+			return -EAGAIN;
+		}
+		vma = find_vma(current->mm, task_pt_regs(tsk)->rip);
+		if (vma && vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) {
+			eprintk_ctx(CPT_FID "cannot be checkpointied while vdso, try later\n", CPT_TID(tsk));
+			return -EAGAIN;
+		}
+	}
+#endif
+	return 0;
+}
+
+#ifdef CONFIG_X86
+
+static u32 encode_segment(u32 segreg)
+{
+	segreg &= 0xFFFF;
+
+	if (segreg == 0)
+		return CPT_SEG_ZERO;
+	if ((segreg & 3) != 3) {
+		wprintk("Invalid RPL of a segment reg %x\n", segreg);
+		return CPT_SEG_ZERO;
+	}
+
+	/* LDT descriptor, it is just an index to LDT array */
+	if (segreg & 4)
+		return CPT_SEG_LDT + (segreg >> 3);
+
+	/* TLS descriptor. */
+	if ((segreg >> 3) >= GDT_ENTRY_TLS_MIN &&
+	    (segreg >> 3) <= GDT_ENTRY_TLS_MAX)
+		return CPT_SEG_TLS1 + ((segreg>>3) - GDT_ENTRY_TLS_MIN);
+
+	/* One of standard desriptors */
+#ifdef CONFIG_X86_64
+	if (segreg == __USER32_DS)
+		return CPT_SEG_USER32_DS;
+	if (segreg == __USER32_CS)
+		return CPT_SEG_USER32_CS;
+	if (segreg == __USER_DS)
+		return CPT_SEG_USER64_DS;
+	if (segreg == __USER_CS)
+		return CPT_SEG_USER64_CS;
+#else
+	if (segreg == __USER_DS)
+		return CPT_SEG_USER32_DS;
+	if (segreg == __USER_CS)
+		return CPT_SEG_USER32_CS;
+#endif
+	wprintk("Invalid segment reg %x\n", segreg);
+	return CPT_SEG_ZERO;
+}
+
+#ifdef CONFIG_X86_64
+static void xlate_ptregs_64_to_32(struct cpt_x86_regs *d, struct pt_regs *s,
+		struct task_struct *tsk)
+{
+	d->cpt_ebp = s->rbp;
+	d->cpt_ebx = s->rbx;
+	d->cpt_eax = s->rax;
+	d->cpt_ecx = s->rcx;
+	d->cpt_edx = s->rdx;
+	d->cpt_esi = s->rsi;
+	d->cpt_edi = s->rdi;
+	d->cpt_orig_eax = s->orig_rax;
+	d->cpt_eip = s->rip;
+	d->cpt_xcs = encode_segment(s->cs);
+	d->cpt_eflags = s->eflags;
+	d->cpt_esp = s->rsp;
+	d->cpt_xss = encode_segment(s->ss);
+	d->cpt_xds = encode_segment(tsk->thread.ds);
+	d->cpt_xes = encode_segment(tsk->thread.es);
+}
+
+static int dump_registers(struct task_struct *tsk, struct cpt_context *ctx)
+{
+	cpt_open_object(NULL, ctx);
+
+	if (tsk->thread_info->flags&_TIF_IA32) {
+		struct cpt_x86_regs ri;
+		ri.cpt_next = sizeof(ri);
+		ri.cpt_object = CPT_OBJ_X86_REGS;
+		ri.cpt_hdrlen = sizeof(ri);
+		ri.cpt_content = CPT_CONTENT_VOID;
+
+		ri.cpt_debugreg[0] = tsk->thread.debugreg0;
+		ri.cpt_debugreg[1] = tsk->thread.debugreg1;
+		ri.cpt_debugreg[2] = tsk->thread.debugreg2;
+		ri.cpt_debugreg[3] = tsk->thread.debugreg3;
+		ri.cpt_debugreg[4] = 0;
+		ri.cpt_debugreg[5] = 0;
+		ri.cpt_debugreg[6] = tsk->thread.debugreg6;
+		ri.cpt_debugreg[7] = tsk->thread.debugreg7;
+		ri.cpt_fs = encode_segment(tsk->thread.fsindex);
+		ri.cpt_gs = encode_segment(tsk->thread.gsindex);
+
+		xlate_ptregs_64_to_32(&ri, task_pt_regs(tsk), tsk);
+
+		ctx->write(&ri, sizeof(ri), ctx);
+	} else {
+		struct cpt_x86_64_regs ri;
+		ri.cpt_next = sizeof(ri);
+		ri.cpt_object = CPT_OBJ_X86_64_REGS;
+		ri.cpt_hdrlen = sizeof(ri);
+		ri.cpt_content = CPT_CONTENT_VOID;
+
+		ri.cpt_fsbase = tsk->thread.fs;
+		ri.cpt_gsbase = tsk->thread.gs;
+		ri.cpt_fsindex = encode_segment(tsk->thread.fsindex);
+		ri.cpt_gsindex = encode_segment(tsk->thread.gsindex);
+		ri.cpt_ds = encode_segment(tsk->thread.ds);
+		ri.cpt_es = encode_segment(tsk->thread.es);
+		ri.cpt_debugreg[0] = tsk->thread.debugreg0;
+		ri.cpt_debugreg[1] = tsk->thread.debugreg1;
+		ri.cpt_debugreg[2] = tsk->thread.debugreg2;
+		ri.cpt_debugreg[3] = tsk->thread.debugreg3;
+		ri.cpt_debugreg[4] = 0;
+		ri.cpt_debugreg[5] = 0;
+		ri.cpt_debugreg[6] = tsk->thread.debugreg6;
+		ri.cpt_debugreg[7] = tsk->thread.debugreg7;
+
+		memcpy(&ri.cpt_r15, task_pt_regs(tsk), sizeof(struct pt_regs));
+
+		ri.cpt_cs = encode_segment(task_pt_regs(tsk)->cs);
+		ri.cpt_ss = encode_segment(task_pt_regs(tsk)->ss);
+
+		ctx->write(&ri, sizeof(ri), ctx);
+
+	}
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+#else
+
+static int dump_registers(struct task_struct *tsk, struct cpt_context *ctx)
+{
+	struct cpt_x86_regs ri;
+
+	cpt_open_object(NULL, ctx);
+
+	ri.cpt_next = sizeof(ri);
+	ri.cpt_object = CPT_OBJ_X86_REGS;
+	ri.cpt_hdrlen = sizeof(ri);
+	ri.cpt_content = CPT_CONTENT_VOID;
+
+	ri.cpt_debugreg[0] = tsk->thread.debugreg[0];
+	ri.cpt_debugreg[1] = tsk->thread.debugreg[1];
+	ri.cpt_debugreg[2] = tsk->thread.debugreg[2];
+	ri.cpt_debugreg[3] = tsk->thread.debugreg[3];
+	ri.cpt_debugreg[4] = tsk->thread.debugreg[4];
+	ri.cpt_debugreg[5] = tsk->thread.debugreg[5];
+	ri.cpt_debugreg[6] = tsk->thread.debugreg[6];
+	ri.cpt_debugreg[7] = tsk->thread.debugreg[7];
+	ri.cpt_fs = encode_segment(tsk->thread.fs);
+	ri.cpt_gs = encode_segment(tsk->thread.gs);
+
+	memcpy(&ri.cpt_ebx, task_pt_regs(tsk), sizeof(struct pt_regs));
+
+	ri.cpt_xcs = encode_segment(task_pt_regs(tsk)->xcs);
+	ri.cpt_xss = encode_segment(task_pt_regs(tsk)->xss);
+	ri.cpt_xds = encode_segment(task_pt_regs(tsk)->xds);
+        ri.cpt_xes = encode_segment(task_pt_regs(tsk)->xes);
+
+	ctx->write(&ri, sizeof(ri), ctx);
+	cpt_close_object(ctx);
+
+	return 0;
+}
+#endif
+#endif
+
+#ifdef CONFIG_IA64
+
+/*
+   PMD?
+ */
+
+#define _C(x) do { if ((err = (x)) < 0) { printk("atm:" CPT_FID #x " %d\n", \
+						 CPT_TID(tsk), err); return -EINVAL; } } while (0) 
+
+static int ass_to_mouth(struct cpt_ia64_regs *r, struct task_struct *tsk,
+			struct cpt_context *ctx)
+{
+	int err;
+	struct unw_frame_info info;
+	struct ia64_fpreg fpval;
+	int i;
+
+	unw_init_from_blocked_task(&info, tsk);
+	_C(unw_unwind_to_user(&info));
+
+	/* NAT_BITS */
+	do {
+		unsigned long scratch_unat;
+
+		scratch_unat = info.sw->caller_unat;
+		if (info.pri_unat_loc)
+			scratch_unat = *info.pri_unat_loc;
+
+		r->nat[0] = ia64_get_scratch_nat_bits(task_pt_regs(tsk), scratch_unat);
+		/* Just to be on safe side. */
+		r->nat[0] &= 0xFFFFFFFFUL;
+	} while (0);
+
+	/* R4-R7 */
+	for (i = 4; i <= 7; i++) {
+		char nat = 0;
+		_C(unw_access_gr(&info, i, &r->gr[i], &nat, 0));
+		r->nat[0] |= (nat != 0) << i;
+	}
+
+	/* B1-B5 */
+	for (i = 1; i <= 5; i++) {
+		_C(unw_access_br(&info, i, &r->br[i], 0));
+	}
+
+	/* AR_EC, AR_LC */
+	_C(unw_access_ar(&info, UNW_AR_EC, &r->ar_ec, 0));
+	_C(unw_access_ar(&info, UNW_AR_LC, &r->ar_lc, 0));
+
+	/* F2..F5, F16..F31 */
+	for (i = 2; i <= 5; i++) {
+		_C(unw_get_fr(&info, i, &fpval));
+		memcpy(&r->fr[i*2], &fpval, 16);
+	}
+	for (i = 16; i <= 31; i++) {
+		_C(unw_get_fr(&info, i, &fpval));
+		memcpy(&r->fr[i*2], &fpval, 16);
+	}
+	return 0;
+}
+
+#undef _C
+
+static int dump_registers(struct task_struct *tsk, struct cpt_context *ctx)
+{
+	int err;
+	unsigned long pg;
+	struct cpt_ia64_regs *r;
+	struct ia64_psr *psr;
+	struct switch_stack *sw;
+	struct pt_regs *pt;
+	void *krbs = (void *)tsk + IA64_RBS_OFFSET;
+	unsigned long reg;
+
+	if (tsk->exit_state)
+		return 0;
+
+	pt = task_pt_regs(tsk);
+
+	sw = (struct switch_stack *) (tsk->thread.ksp + 16);
+
+	if ((pg = __get_free_page(GFP_KERNEL)) == 0)
+		return -ENOMEM;
+
+	r = (void*)pg;
+	/* To catch if we forgot some register */
+	memset(r, 0xA5, sizeof(*r));
+
+	r->gr[0] = 0;
+	r->fr[0] = r->fr[1] = 0;
+	r->fr[2] = 0x8000000000000000UL;
+	r->fr[3] = 0xffff;
+
+	r->nat[0] = r->nat[1] = 0;
+
+	err = ass_to_mouth(r, tsk, ctx);
+	if (err) {
+		printk("ass_to_mouth error %d\n", err);
+		goto out;
+	}
+
+	/* gr 1,2-3,8-11,12-13,14,15,16-31 are on pt_regs */
+	memcpy(&r->gr[1], &pt->r1, 8*(2-1));
+	memcpy(&r->gr[2], &pt->r2, 8*(4-2));
+	memcpy(&r->gr[8], &pt->r8, 8*(12-8));
+	memcpy(&r->gr[12], &pt->r12, 8*(14-12));
+	memcpy(&r->gr[14], &pt->r14, 8*(15-14));
+	memcpy(&r->gr[15], &pt->r15, 8*(16-15));
+	memcpy(&r->gr[16], &pt->r16, 8*(32-16));
+
+	r->br[0] = pt->b0;
+	r->br[6] = pt->b6;
+	r->br[7] = pt->b7;
+
+	r->ar_bspstore = pt->ar_bspstore;
+	r->ar_unat = pt->ar_unat;
+	r->ar_pfs = pt->ar_pfs;
+	r->ar_ccv = pt->ar_ccv;
+	r->ar_fpsr = pt->ar_fpsr;
+	r->ar_csd = pt->ar_csd;
+	r->ar_ssd = pt->ar_ssd;
+	r->ar_rsc = pt->ar_rsc;
+
+	r->cr_iip = pt->cr_iip;
+	r->cr_ipsr = pt->cr_ipsr;
+
+	r->pr = pt->pr;
+
+	r->cfm = pt->cr_ifs;
+	r->ar_rnat = pt->ar_rnat;
+
+	/* fpregs 6..9,10..11 are in pt_regs */
+	memcpy(&r->fr[2*6], &pt->f6, 16*(10-6));
+	memcpy(&r->fr[2*10], &pt->f10, 16*(12-10));
+	/* fpreg 12..15 are on switch stack */
+	memcpy(&r->fr[2*12], &sw->f12, 16*(16-12));
+	/* fpregs 32...127 */
+	psr = ia64_psr(task_pt_regs(tsk));
+	preempt_disable();
+	if (ia64_is_local_fpu_owner(tsk) && psr->mfh) {
+		psr->mfh = 0;
+		tsk->thread.flags |= IA64_THREAD_FPH_VALID;
+		ia64_save_fpu(&tsk->thread.fph[0]);
+	}
+	preempt_enable();
+	memcpy(&r->fr[32*2], tsk->thread.fph, 16*(128-32));
+
+	if (tsk->thread.flags & IA64_THREAD_DBG_VALID) {
+		memcpy(r->ibr, tsk->thread.ibr, sizeof(r->ibr));
+		memcpy(r->dbr, tsk->thread.dbr, sizeof(r->ibr));
+	} else {
+		memset(r->ibr, 0, sizeof(r->ibr));
+		memset(r->dbr, 0, sizeof(r->dbr));
+	}
+
+	r->loadrs = pt->loadrs;
+	r->num_regs = ia64_rse_num_regs(krbs, krbs + 8*(pt->loadrs >> 19));
+	if ((long)pt->cr_ifs > 0)
+		r->num_regs += (pt->cr_ifs & 0x7f);
+
+	if (r->num_regs > 96) {
+		eprintk_ctx(CPT_FID " too much RSE regs %lu\n",
+			    CPT_TID(tsk), r->num_regs);
+		return -EINVAL;
+	}
+
+	for (reg = 0; reg < r->num_regs; reg++) {
+		unsigned long *ptr = ia64_rse_skip_regs(krbs, reg);
+		unsigned long *rnatp = ia64_rse_rnat_addr(ptr);
+
+		r->gr[32+reg] = *ptr;
+
+		if ((unsigned long)rnatp >= sw->ar_bspstore)
+			rnatp = &sw->ar_rnat;
+		if (*rnatp & (1UL<<ia64_rse_slot_num(ptr))) {
+			if (reg < 32)
+				r->nat[0] |= (1UL<<(reg+32));
+			else
+				r->nat[1] |= (1UL<<(reg-32));
+		}
+	}
+	if (r->nat[0] | r->nat[1])
+		wprintk_ctx(CPT_FID " nat bits %lx%016lx\n", CPT_TID(tsk),
+			    r->nat[1], r->nat[0]);
+
+	cpt_open_object(NULL, ctx);
+	r->cpt_next = sizeof(*r);
+	r->cpt_object = CPT_OBJ_IA64_REGS;
+	r->cpt_hdrlen = sizeof(*r);
+	r->cpt_content = CPT_CONTENT_VOID;
+	ctx->write(r, sizeof(*r), ctx);
+	cpt_close_object(ctx);
+	err = 0;
+
+out:
+	free_page(pg);
+	return err;
+}
+#endif
+
+static int dump_kstack(struct task_struct *tsk, struct cpt_context *ctx)
+{
+	struct cpt_obj_bits hdr;
+	unsigned long size;
+	void *start;
+
+	cpt_open_object(NULL, ctx);
+
+#ifdef CONFIG_X86_64
+	size = tsk->thread.rsp0 - tsk->thread.rsp;
+	start = (void*)tsk->thread.rsp;
+#elif defined(CONFIG_X86_32)
+	size = tsk->thread.esp0 - tsk->thread.esp;
+	start = (void*)tsk->thread.esp;
+#elif defined(CONFIG_IA64)
+	size = (unsigned long)(task_pt_regs(tsk)+1) - tsk->thread.ksp;
+	start = (void*)tsk->thread.ksp;
+#else
+#error Arch is not supported
+#endif
+
+	hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
+	hdr.cpt_object = CPT_OBJ_BITS;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = CPT_CONTENT_STACK;
+	hdr.cpt_size = size;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	ctx->write(start, size, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	return 0;
+}
+
+#ifdef CONFIG_X86
+/* Formats of i387_fxsave_struct are the same for x86_64
+ * and i386. Plain luck. */
+
+static int dump_fpustate(struct task_struct *tsk, struct cpt_context *ctx)
+{
+	struct cpt_obj_bits hdr;
+	unsigned long size;
+	int type;
+
+	cpt_open_object(NULL, ctx);
+
+	type = CPT_CONTENT_X86_FPUSTATE;
+	size = sizeof(struct i387_fxsave_struct);
+#ifndef CONFIG_X86_64
+	if (!cpu_has_fxsr) {
+		size = sizeof(struct i387_fsave_struct);
+		type = CPT_CONTENT_X86_FPUSTATE_OLD;
+	}
+#endif
+
+	hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
+	hdr.cpt_object = CPT_OBJ_BITS;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = type;
+	hdr.cpt_size = size;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	ctx->write(&tsk->thread.i387, size, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_IA64
+
+static int dump_fpustate(struct task_struct *tsk, struct cpt_context *ctx)
+{
+	return 0;
+}
+#endif
+
+static int encode_siginfo(struct cpt_siginfo_image *si, siginfo_t *info)
+{
+	si->cpt_signo = info->si_signo;
+	si->cpt_errno = info->si_errno;
+	si->cpt_code = info->si_code;
+
+	switch(si->cpt_code & __SI_MASK) {
+	case __SI_TIMER:
+		si->cpt_pid = info->si_tid;
+		si->cpt_uid = info->si_overrun;
+		si->cpt_sigval = cpt_ptr_export(info->_sifields._timer._sigval.sival_ptr);
+		si->cpt_utime = info->si_sys_private;
+		break;
+	case __SI_POLL:
+		si->cpt_pid = info->si_band;
+		si->cpt_uid = info->si_fd;
+		break;
+	case __SI_FAULT:
+		si->cpt_sigval = cpt_ptr_export(info->si_addr);
+#ifdef __ARCH_SI_TRAPNO
+		si->cpt_pid = info->si_trapno;
+#endif
+		break;
+	case __SI_CHLD:
+		si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_to_vpid(info->si_pid);
+		si->cpt_uid = info->si_uid;
+		si->cpt_sigval = info->si_status;
+		si->cpt_stime = info->si_stime;
+		si->cpt_utime = info->si_utime;
+		break;
+	case __SI_KILL:
+	case __SI_RT:
+	case __SI_MESGQ:
+	default:
+		si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_to_vpid(info->si_pid);
+		si->cpt_uid = info->si_uid;
+		si->cpt_sigval = cpt_ptr_export(info->si_ptr);
+		break;
+	}
+	return 0;
+}
+
+static int dump_sigqueue(struct sigpending *list, struct cpt_context *ctx)
+{
+	struct sigqueue *q;
+	loff_t saved_obj;
+
+	if (list_empty(&list->list))
+		return 0;
+
+	cpt_push_object(&saved_obj, ctx);
+	list_for_each_entry(q, &list->list, list) {
+		struct cpt_siginfo_image si;
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_SIGINFO;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		si.cpt_qflags = q->flags;
+		si.cpt_user = q->user->uid;
+
+		if (encode_siginfo(&si, &q->info))
+			return -EINVAL;
+
+		ctx->write(&si, sizeof(si), ctx);
+	}
+	cpt_pop_object(&saved_obj, ctx);
+	return 0;
+}
+
+
+
+static int dump_one_signal_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct signal_struct *sig = obj->o_obj;
+	struct cpt_signal_image *v = cpt_get_buf(ctx);
+	struct task_struct *tsk;
+	int i;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SIGNAL_STRUCT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	if (sig->pgrp <= 0) {
+		eprintk_ctx("bad pgid\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_pgrp_type = CPT_PGRP_NORMAL;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->pgrp);
+	if (tsk == NULL)
+		v->cpt_pgrp_type = CPT_PGRP_ORPHAN;
+	read_unlock(&tasklist_lock);
+	v->cpt_pgrp = pid_to_vpid(sig->pgrp);
+
+	v->cpt_old_pgrp = 0;
+	if (sig->tty_old_pgrp < 0) {
+		eprintk_ctx("bad tty_old_pgrp\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	if (sig->tty_old_pgrp > 0) {
+		v->cpt_old_pgrp_type = CPT_PGRP_NORMAL;
+		read_lock(&tasklist_lock);
+		tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->tty_old_pgrp);
+		if (tsk == NULL) {
+			v->cpt_old_pgrp_type = CPT_PGRP_ORPHAN;
+			tsk = find_task_by_pid_type_ve(PIDTYPE_PGID, sig->tty_old_pgrp);
+		}
+		read_unlock(&tasklist_lock);
+		if (tsk == NULL) {
+			eprintk_ctx("tty_old_pgrp does not exist anymore\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_old_pgrp = _pid_to_vpid(sig->tty_old_pgrp);
+		if ((int)v->cpt_old_pgrp < 0) {
+			dprintk_ctx("stray tty_old_pgrp %d\n", sig->tty_old_pgrp);
+			v->cpt_old_pgrp = -1;
+			v->cpt_old_pgrp_type = CPT_PGRP_STRAY;
+		}
+	}
+
+	if (sig->session <= 0) {
+		eprintk_ctx("bad session\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_session_type = CPT_PGRP_NORMAL;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->session);
+	if (tsk == NULL)
+		v->cpt_session_type = CPT_PGRP_ORPHAN;
+	read_unlock(&tasklist_lock);
+	v->cpt_session = pid_to_vpid(sig->session);
+
+	v->cpt_leader = sig->leader;
+	v->cpt_ctty = CPT_NULL;
+	if (sig->tty) {
+		cpt_object_t *cobj = lookup_cpt_object(CPT_OBJ_TTY, sig->tty, ctx);
+		if (cobj)
+			v->cpt_ctty = cobj->o_pos;
+		else {
+			eprintk_ctx("controlling tty is not found\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	memcpy(&v->cpt_sigpending, &sig->shared_pending.signal, 8);
+
+	v->cpt_curr_target = 0;
+	if (sig->curr_target)
+		v->cpt_curr_target = virt_pid(sig->curr_target);
+	v->cpt_group_exit = ((sig->flags & SIGNAL_GROUP_EXIT) != 0);
+	v->cpt_group_exit_code = sig->group_exit_code;
+	v->cpt_group_exit_task = 0;
+	if (sig->group_exit_task)
+		v->cpt_group_exit_task = virt_pid(sig->group_exit_task);
+	v->cpt_notify_count = sig->notify_count;
+	v->cpt_group_stop_count = sig->group_stop_count;
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,8)
+	v->cpt_utime = sig->utime;
+	v->cpt_stime = sig->stime;
+	v->cpt_cutime = sig->cutime;
+	v->cpt_cstime = sig->cstime;
+	v->cpt_nvcsw = sig->nvcsw;
+	v->cpt_nivcsw = sig->nivcsw;
+	v->cpt_cnvcsw = sig->cnvcsw;
+	v->cpt_cnivcsw = sig->cnivcsw;
+	v->cpt_min_flt = sig->min_flt;
+	v->cpt_maj_flt = sig->maj_flt;
+	v->cpt_cmin_flt = sig->cmin_flt;
+	v->cpt_cmaj_flt = sig->cmaj_flt;
+
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
+		if (i < RLIM_NLIMITS) {
+			v->cpt_rlim_cur[i] = sig->rlim[i].rlim_cur;
+			v->cpt_rlim_max[i] = sig->rlim[i].rlim_max;
+		} else {
+			v->cpt_rlim_cur[i] = CPT_NULL;
+			v->cpt_rlim_max[i] = CPT_NULL;
+		}
+	}
+#endif
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	dump_sigqueue(&sig->shared_pending, ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+#ifdef CONFIG_UTRACE
+static int find_tracer(struct task_struct *tsk, struct ptrace_state **tracer,
+		       cpt_context_t *ctx)
+{
+	int err = 0;
+	struct utrace *utrace;
+	extern struct utrace_engine_ops ptrace_utrace_ops;
+
+	*tracer = NULL;
+
+	rcu_read_lock();
+	utrace = rcu_dereference(tsk->utrace);
+	smp_rmb();
+
+	if (utrace) {
+#ifndef CONFIG_PTRACE
+		err = -EBUSY;
+#else
+		struct utrace_attached_engine *engine;
+
+		spin_lock(&utrace->lock);
+		list_for_each_entry_rcu(engine, &utrace->engines, entry) {
+			struct ptrace_state *state;
+
+			if (engine->ops != &ptrace_utrace_ops) {
+				eprintk_ctx("task " CPT_FID " is utraced\n", CPT_TID(tsk));
+				err = -EBUSY;
+				break;
+			}
+			state = (struct ptrace_state *) engine->data;
+			if (!state) {
+				eprintk_ctx("task " CPT_FID " has null utrace engine\n", CPT_TID(tsk));
+				err = -EINVAL;
+				break;
+			}
+			if (!state->parent) {
+				eprintk_ctx("task " CPT_FID " is ptraced by nobody\n", CPT_TID(tsk));
+				err = -EINVAL;
+				break;
+			}
+			if (!lookup_cpt_object(CPT_OBJ_TASK, state->parent, ctx)) {
+				eprintk_ctx("task " CPT_FID " is ptraced from ve0\n", CPT_TID(tsk));
+				err = -EBUSY;
+				break;
+			}
+			if (*tracer) {
+				eprintk_ctx("task " CPT_FID " is ptraced twice\n", CPT_TID(tsk));
+				err = -EBUSY;
+				break;
+			}
+			/* No need to get this task, it is frozen. */
+			*tracer = state;
+		}
+		spin_unlock(&utrace->lock);
+#endif
+	}
+	rcu_read_unlock();
+	return err;
+}
+
+static int dump_ptrace(struct task_struct *tsk, struct cpt_task_image *v,
+		       cpt_context_t *ctx)
+{
+#ifndef CONFIG_UTRACE
+	v->cpt_ptrace = tsk->ptrace;
+	v->cpt_ptrace_message = tsk->ptrace_message;
+	if (tsk->parent &&
+	    tsk->parent != tsk->real_parent &&
+	    !lookup_cpt_object(CPT_OBJ_TASK, tsk->parent, ctx)) {
+		eprintk_ctx("task " CPT_FID " is ptraced from ve0\n", CPT_TID(tsk));
+		return -EBUSY;
+	}
+#else
+	int err;
+	struct ptrace_state *tracer;
+
+	v->cpt_ptrace = 0;
+	v->cpt_ptrace_message = 0;
+	err = find_tracer(tsk, &tracer, ctx);
+	if (err)
+		return err;
+
+	if (tracer) {
+		unsigned long flags;
+
+		v->cpt_ppid = virt_pid(tracer->parent);
+		if (tracer->have_eventmsg)
+			v->cpt_ptrace_message = tracer->u.eventmsg;
+		v->cpt_ptrace = CPT_PT_PTRACED;
+		if (tracer->parent != tsk->parent)
+			v->cpt_ptrace |= CPT_PT_ATTACHED;
+		if (tracer->options & PTRACE_O_TRACESYSGOOD)
+			v->cpt_ptrace |= CPT_PT_TRACESYSGOOD;
+		if (tracer->options & PTRACE_O_TRACEFORK)
+			v->cpt_ptrace |= CPT_PT_TRACE_FORK;
+		if (tracer->options & PTRACE_O_TRACEVFORK)
+			v->cpt_ptrace |= CPT_PT_TRACE_VFORK;
+		if (tracer->options & PTRACE_O_TRACECLONE)
+			v->cpt_ptrace |= CPT_PT_TRACE_CLONE;
+		if (tracer->options & PTRACE_O_TRACEEXEC)
+			v->cpt_ptrace |= CPT_PT_TRACE_EXEC;
+		if (tracer->options & PTRACE_O_TRACEVFORKDONE)
+			v->cpt_ptrace |= CPT_PT_TRACE_VFORK_DONE;
+		if (tracer->options & PTRACE_O_TRACEEXIT)
+			v->cpt_ptrace |= CPT_PT_TRACE_EXIT;
+		if (tracer->cap_sys_ptrace)
+			v->cpt_ptrace |= CPT_PT_PTRACE_CAP;
+
+		flags = UTRACE_EVENT(DEATH) | UTRACE_EVENT(EXEC) |
+			UTRACE_EVENT_SIGNAL_ALL | UTRACE_EVENT(JCTL) |
+			UTRACE_EVENT(CLONE) | UTRACE_ACTION_NOREAP | UTRACE_EVENT(REAP);
+		if (tracer->options & PTRACE_O_TRACEEXIT)
+			flags |= UTRACE_EVENT(EXIT);
+		if (tracer->options & PTRACE_O_TRACEVFORKDONE)
+			flags |= UTRACE_EVENT(VFORK_DONE);
+		if (tsk->thread_info->flags & _TIF_SYSCALL_TRACE)
+			flags |= UTRACE_EVENT_SYSCALL;
+		if (tsk->thread_info->flags & _TIF_SINGLESTEP)
+			flags |= UTRACE_ACTION_SINGLESTEP;
+		if (tsk->pn_state)
+			flags |= UTRACE_ACTION_QUIESCE;
+		if (tsk->utrace_flags != flags) {
+			eprintk_ctx("tsk->utrace_flags=%lx %lx\n", tsk->utrace_flags, tsk->utrace_flags^flags);
+			return -EBUSY;
+		}
+		if (tracer->engine->flags != flags) {
+			eprintk_ctx("tracer->engine->flags=%lx %lx\n", tracer->engine->flags, tracer->engine->flags^flags);
+			return -EBUSY;
+		}
+	}
+#endif
+	return 0;
+}
+#endif
+
+static int dump_lastsiginfo(struct task_struct *tsk, cpt_context_t *ctx)
+{
+	struct siginfo * lsi;
+
+#ifndef CONFIG_UTRACE
+	lsi = tsk->last_siginfo;
+#else
+	int err;
+	struct ptrace_state *tracer;
+
+	err = find_tracer(tsk, &tracer, ctx);
+	if (err)
+		return err;
+	if (tracer == NULL || tracer->have_eventmsg)
+		return 0;
+	lsi = tracer->u.siginfo;
+	if (lsi && (tsk->state != TASK_TRACED || tsk->pn_state == 0)) {
+		eprintk_ctx(CPT_FID " traced task st=%lu %d", CPT_TID(tsk),
+			    tsk->state, tsk->pn_state);
+		return -EBUSY;
+	}
+#endif
+
+	if (lsi) {
+		struct cpt_siginfo_image si;
+		loff_t saved_obj;
+
+		cpt_push_object(&saved_obj, ctx);
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_LASTSIGINFO;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		if (encode_siginfo(&si, lsi))
+			return -EINVAL;
+
+		ctx->write(&si, sizeof(si), ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	return 0;
+}
+
+int cpt_check_unsupported(struct task_struct *tsk, cpt_context_t *ctx)
+{
+	if (tsk->splice_pipe) {
+		eprintk_ctx("splice is used by " CPT_FID "\n", CPT_TID(tsk));
+		return -EBUSY;
+	}
+#ifdef CONFIG_KEYS
+	if (tsk->request_key_auth || tsk->thread_keyring) {
+		eprintk_ctx("keys are used by " CPT_FID "\n", CPT_TID(tsk));
+		return -EBUSY;
+	}
+#endif
+#ifdef CONFIG_NUMA
+	if (tsk->mempolicy) {
+		eprintk_ctx("NUMA mempolicy is used by " CPT_FID "\n", CPT_TID(tsk));
+		return -EBUSY;
+	}
+#endif
+#ifdef CONFIG_TUX
+	if (tsk->tux_info) {
+		eprintk_ctx("TUX is used by " CPT_FID "\n", CPT_TID(tsk));
+		return -EBUSY;
+	}
+#endif
+	return 0;
+}
+
+int cpt_skip_task(struct task_struct *tsk)
+{
+	if (tsk == current)
+		return 1;
+
+	if (nlmsvc_pid == tsk->pid) /* FIXME - make it better */
+		return 1;
+
+	return 0;
+}
+
+static int dump_one_process(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct task_struct *tsk = obj->o_obj;
+	int last_thread;
+	struct cpt_task_image *v = cpt_get_buf(ctx);
+	cpt_object_t *tobj;
+	cpt_object_t *tg_obj;
+	loff_t saved_obj;
+	int i;
+	int err;
+	struct timespec delta;
+	struct mm_struct * tsk_mm;
+	struct files_struct * tsk_files;
+	struct fs_struct * tsk_fs;
+	struct namespace * tsk_ns;
+
+	if (cpt_skip_task(tsk))
+		return 0;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_signal = CPT_NULL;
+	tg_obj = lookup_cpt_object(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx);
+	if (!tg_obj) BUG();
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_TASK;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_state = tsk->state;
+	if (tsk->state == EXIT_ZOMBIE) {
+		eprintk_ctx("invalid zombie state on" CPT_FID "\n", CPT_TID(tsk));
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	} else if (tsk->state == EXIT_DEAD) {
+		if (tsk->exit_state != EXIT_DEAD &&
+		    tsk->exit_state != EXIT_ZOMBIE) {
+			eprintk_ctx("invalid exit_state %ld on" CPT_FID "\n", tsk->exit_state, CPT_TID(tsk));
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	if (tsk->exit_state) {
+		v->cpt_state = tsk->exit_state;
+		if (tsk->state != EXIT_DEAD) {
+			eprintk_ctx("invalid tsk->state %ld/%ld on" CPT_FID "\n",
+				tsk->state, tsk->exit_state, CPT_TID(tsk));
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	if (cpt_check_unsupported(tsk, ctx)) {
+		cpt_release_buf(ctx);
+		return -EBUSY;
+	}
+
+	v->cpt_flags = tsk->flags & CPT_TASK_FLAGS_MASK;
+	v->cpt_prio = tsk->prio;
+	v->cpt_exit_code = tsk->exit_code;
+	v->cpt_exit_signal = tsk->exit_signal;
+	v->cpt_pdeath_signal = tsk->pdeath_signal;
+	v->cpt_static_prio = tsk->static_prio;
+	v->cpt_rt_priority = tsk->rt_priority;
+	v->cpt_policy = tsk->policy;
+	if (v->cpt_policy != SCHED_NORMAL) {
+		eprintk_ctx("scheduler policy is not supported %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+
+	/* Unpleasant moment. When leader of thread group exits,
+	 * it remains in zombie state until all the group exits.
+	 * We save not-NULL pointers to process mm/files/fs, so
+	 * that we can restore this thread group.
+	 */
+	tsk_mm = tsk->mm;
+	tsk_files = tsk->files;
+	tsk_fs = tsk->fs;
+	tsk_ns = tsk->nsproxy ? tsk->nsproxy->namespace : NULL;
+
+	if (tsk->exit_state && !thread_group_empty(tsk) &&
+	    thread_group_leader(tsk)) {
+		struct task_struct * p = tsk;
+
+		read_lock(&tasklist_lock);
+		do {
+			if (p->mm)
+				tsk_mm = p->mm;
+			if (p->files)
+				tsk_files = p->files;
+			if (p->fs)
+				tsk_fs = p->fs;
+			if (p->nsproxy && p->nsproxy->namespace)
+				tsk_ns = p->nsproxy->namespace;
+			p = next_thread(p);
+		} while (p != tsk);
+		read_unlock(&tasklist_lock);
+	}
+
+	v->cpt_mm = CPT_NULL;
+	if (tsk_mm) {
+		tobj = lookup_cpt_object(CPT_OBJ_MM, tsk_mm, ctx);
+		if (!tobj) BUG();
+		v->cpt_mm = tobj->o_pos;
+	}
+	v->cpt_files = CPT_NULL;
+	if (tsk_files) {
+		tobj = lookup_cpt_object(CPT_OBJ_FILES, tsk_files, ctx);
+		if (!tobj) BUG();
+		v->cpt_files = tobj->o_pos;
+	}
+	v->cpt_fs = CPT_NULL;
+	if (tsk_fs) {
+		tobj = lookup_cpt_object(CPT_OBJ_FS, tsk_fs, ctx);
+		if (!tobj) BUG();
+		v->cpt_fs = tobj->o_pos;
+	}
+	v->cpt_namespace = CPT_NULL;
+	if (tsk_ns) {
+		tobj = lookup_cpt_object(CPT_OBJ_NAMESPACE, tsk_ns, ctx);
+		if (!tobj) BUG();
+		v->cpt_namespace = tobj->o_pos;
+
+		if (tsk_ns != current->nsproxy->namespace)
+			eprintk_ctx("namespaces are not supported:"
+					"process " CPT_FID "\n", CPT_TID(tsk));
+	}
+	v->cpt_sysvsem_undo = CPT_NULL;
+	if (tsk->sysvsem.undo_list && !tsk->exit_state) {
+		tobj = lookup_cpt_object(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx);
+		if (!tobj) BUG();
+		v->cpt_sysvsem_undo = tobj->o_pos;
+	}
+	v->cpt_sighand = CPT_NULL;
+	if (tsk->sighand) {
+		tobj = lookup_cpt_object(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx);
+		if (!tobj) BUG();
+		v->cpt_sighand = tobj->o_pos;
+	}
+	v->cpt_sigblocked = cpt_sigset_export(&tsk->blocked);
+	v->cpt_sigrblocked = cpt_sigset_export(&tsk->real_blocked);
+	v->cpt_sigsuspend_blocked = cpt_sigset_export(&tsk->saved_sigmask);
+
+	v->cpt_pid = virt_pid(tsk);
+	v->cpt_tgid = virt_tgid(tsk);
+	v->cpt_ppid = tsk->parent ? virt_pid(tsk->parent) : 0;
+#ifdef CONFIG_UTRACE
+	v->cpt_rppid = tsk->parent ? virt_pid(tsk->parent) : 0;
+#else
+	v->cpt_rppid = tsk->real_parent ? virt_pid(tsk->real_parent) : 0;
+#endif
+	v->cpt_pgrp = virt_pgid(tsk);
+	v->cpt_session = virt_sid(tsk);
+	v->cpt_old_pgrp = 0;
+	if (tsk->signal->tty_old_pgrp)
+		v->cpt_old_pgrp = _pid_to_vpid(tsk->signal->tty_old_pgrp);
+	v->cpt_leader = tsk->group_leader ? virt_pid(tsk->group_leader) : 0;
+	v->cpt_set_tid = (unsigned long)tsk->set_child_tid;
+	v->cpt_clear_tid = (unsigned long)tsk->clear_child_tid;
+	memcpy(v->cpt_comm, tsk->comm, 16);
+	v->cpt_user = tsk->user->uid;
+	v->cpt_uid = tsk->uid;
+	v->cpt_euid = tsk->euid;
+	v->cpt_suid = tsk->suid;
+	v->cpt_fsuid = tsk->fsuid;
+	v->cpt_gid = tsk->gid;
+	v->cpt_egid = tsk->egid;
+	v->cpt_sgid = tsk->sgid;
+	v->cpt_fsgid = tsk->fsgid;
+	v->cpt_ngids = 0;
+	if (tsk->group_info && tsk->group_info->ngroups != 0) {
+		int i = tsk->group_info->ngroups;
+		if (i > 32) {
+			/* Shame... I did a simplified version and _forgot_
+			 * about this. Later, later. */
+			eprintk_ctx("too many of groups " CPT_FID "\n", CPT_TID(tsk));
+			return -EINVAL;
+		}
+		v->cpt_ngids = i;
+		for (i--; i>=0; i--)
+			v->cpt_gids[i] = tsk->group_info->small_block[i];
+	}
+	v->cpt_prctl_uac = 0;
+	v->cpt_prctl_fpemu = 0;
+	v->__cpt_pad1 = 0;
+#ifdef CONFIG_IA64
+	v->cpt_prctl_uac = (tsk->thread.flags & IA64_THREAD_UAC_MASK) >> IA64_THREAD_UAC_SHIFT;
+	v->cpt_prctl_fpemu = (tsk->thread.flags & IA64_THREAD_FPEMU_MASK) >> IA64_THREAD_FPEMU_SHIFT;
+#endif
+	memcpy(&v->cpt_ecap, &tsk->cap_effective, 8);
+	memcpy(&v->cpt_icap, &tsk->cap_inheritable, 8);
+	memcpy(&v->cpt_pcap, &tsk->cap_permitted, 8);
+	v->cpt_keepcap = tsk->keep_capabilities;
+
+	v->cpt_did_exec = tsk->did_exec;
+	v->cpt_exec_domain = -1;
+	v->cpt_thrflags = tsk->thread_info->flags & ~(1<<TIF_FREEZE);
+	v->cpt_64bit = 0;
+#ifdef CONFIG_X86_64
+	/* Clear x86_64 specific flags */
+	v->cpt_thrflags &= ~(_TIF_FORK|_TIF_IA32);
+	if (!(tsk->thread_info->flags & _TIF_IA32)) {
+		ctx->tasks64++;
+		v->cpt_64bit = 1;
+	}
+#endif
+#ifdef CONFIG_IA64
+	/* Clear ia64 specific flags */
+	//// v->cpt_thrflags &= ~(_TIF_FORK|_TIF_ABI_PENDING|_TIF_IA32);
+	if (!IS_IA32_PROCESS(task_pt_regs(tsk))) {
+		ctx->tasks64++;
+		v->cpt_64bit = 1;
+	}
+#endif
+	v->cpt_thrstatus = tsk->thread_info->status;
+	v->cpt_addr_limit = -1;
+
+	v->cpt_personality = tsk->personality;
+
+#ifdef CONFIG_X86
+	for (i=0; i<GDT_ENTRY_TLS_ENTRIES; i++) {
+		if (i>=3) {
+			eprintk_ctx("too many tls descs\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+#ifndef CONFIG_X86_64
+		v->cpt_tls[i] = (((u64)tsk->thread.tls_array[i].b)<<32) + tsk->thread.tls_array[i].a;
+#else
+		v->cpt_tls[i] = tsk->thread.tls_array[i];
+#endif
+	}
+#endif
+
+	v->cpt_restart.fn = CPT_RBL_0;
+	if (tsk->thread_info->restart_block.fn != current->thread_info->restart_block.fn) {
+		ktime_t e;
+
+		if (tsk->thread_info->restart_block.fn != nanosleep_restart
+#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
+		    && tsk->thread_info->restart_block.fn != compat_nanosleep_restart
+#endif
+		    ) {
+			eprintk_ctx("unknown restart block %p\n", tsk->thread_info->restart_block.fn);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_restart.fn = CPT_RBL_NANOSLEEP;
+#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
+		if (tsk->thread_info->restart_block.fn == compat_nanosleep_restart)
+			v->cpt_restart.fn = CPT_RBL_COMPAT_NANOSLEEP;
+#endif
+
+		e.tv64 = ((u64)tsk->thread_info->restart_block.arg1 << 32) |
+			(u64) tsk->thread_info->restart_block.arg0;
+		e = ktime_sub(e, timespec_to_ktime(ctx->cpt_monotonic_time));
+		v->cpt_restart.arg0 = ktime_to_ns(e);
+		v->cpt_restart.arg1 = 0;
+		v->cpt_restart.arg2 = tsk->thread_info->restart_block.arg2;
+		v->cpt_restart.arg3 = tsk->thread_info->restart_block.arg3;
+		dprintk_ctx(CPT_FID " %Lu\n", CPT_TID(tsk), (unsigned long long)v->cpt_restart.arg0);
+	}
+
+	v->cpt_it_real_incr = 0;
+	v->cpt_it_prof_incr = 0;
+	v->cpt_it_virt_incr = 0;
+	v->cpt_it_real_value = 0;
+	v->cpt_it_prof_value = 0;
+	v->cpt_it_virt_value = 0;
+	if (thread_group_leader(tsk) && tsk->exit_state == 0) {
+		ktime_t rem;
+
+		v->cpt_it_real_incr = ktime_to_ns(tsk->signal->it_real_incr);
+		v->cpt_it_prof_incr = tsk->signal->it_prof_incr;
+		v->cpt_it_virt_incr = tsk->signal->it_virt_incr;
+
+		rem = hrtimer_get_remaining(&tsk->signal->real_timer);
+
+		if (hrtimer_active(&tsk->signal->real_timer)) {
+			if (rem.tv64 <= 0)
+				rem.tv64 = NSEC_PER_USEC;
+			v->cpt_it_real_value = ktime_to_ns(rem);
+			dprintk("cpt itimer " CPT_FID " %Lu\n", CPT_TID(tsk), (unsigned long long)v->cpt_it_real_value);
+		}
+		v->cpt_it_prof_value = tsk->signal->it_prof_expires;
+		v->cpt_it_virt_value = tsk->signal->it_virt_expires;
+	}
+	v->cpt_used_math = (tsk_used_math(tsk) != 0);
+
+	if (tsk->notifier) {
+		eprintk_ctx("task notifier is in use: process %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+
+	v->cpt_utime = tsk->utime;
+	v->cpt_stime = tsk->stime;
+	delta = tsk->start_time;
+	set_normalized_timespec(&delta,
+			delta.tv_sec - get_exec_env()->start_timespec.tv_sec,
+			delta.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
+	v->cpt_starttime = cpt_timespec_export(&delta);
+	v->cpt_nvcsw = tsk->nvcsw;
+	v->cpt_nivcsw = tsk->nivcsw;
+	v->cpt_min_flt = tsk->min_flt;
+	v->cpt_maj_flt = tsk->maj_flt;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
+	v->cpt_cutime = tsk->cutime;
+	v->cpt_cstime = tsk->cstime;
+	v->cpt_cnvcsw = tsk->cnvcsw;
+	v->cpt_cnivcsw = tsk->cnivcsw;
+	v->cpt_cmin_flt = tsk->cmin_flt;
+	v->cpt_cmaj_flt = tsk->cmaj_flt;
+
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
+		if (i < RLIM_NLIMITS) {
+			v->cpt_rlim_cur[i] = tsk->rlim[i].rlim_cur;
+			v->cpt_rlim_max[i] = tsk->rlim[i].rlim_max;
+		} else {
+			v->cpt_rlim_cur[i] = CPT_NULL;
+			v->cpt_rlim_max[i] = CPT_NULL;
+		}
+	}
+#else
+	v->cpt_cutime = tsk->signal->cutime;
+	v->cpt_cstime = tsk->signal->cstime;
+	v->cpt_cnvcsw = tsk->signal->cnvcsw;
+	v->cpt_cnivcsw = tsk->signal->cnivcsw;
+	v->cpt_cmin_flt = tsk->signal->cmin_flt;
+	v->cpt_cmaj_flt = tsk->signal->cmaj_flt;
+
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
+		if (i < RLIM_NLIMITS) {
+			v->cpt_rlim_cur[i] = tsk->signal->rlim[i].rlim_cur;
+			v->cpt_rlim_max[i] = tsk->signal->rlim[i].rlim_max;
+		} else {
+			v->cpt_rlim_cur[i] = CPT_NULL;
+			v->cpt_rlim_max[i] = CPT_NULL;
+		}
+	}
+#endif
+
+#ifdef CONFIG_USER_RESOURCE
+	if (tsk->mm)
+		v->cpt_mm_ub = cpt_lookup_ubc(tsk->mm->mm_ub, ctx);
+	else
+		v->cpt_mm_ub = CPT_NULL;
+	v->cpt_task_ub = cpt_lookup_ubc(tsk->task_bc.task_ub, ctx);
+	v->cpt_exec_ub = cpt_lookup_ubc(tsk->task_bc.exec_ub, ctx);
+	v->cpt_fork_sub = cpt_lookup_ubc(tsk->task_bc.fork_sub, ctx);
+#endif
+
+	v->cpt_pn_state = tsk->pn_state;
+	v->cpt_stopped_state = tsk->stopped_state;
+	v->cpt_sigsuspend_state = 0;
+
+#ifdef CONFIG_X86_32
+	if (tsk->thread.vm86_info) {
+		eprintk_ctx("vm86 task is running\n");
+		cpt_release_buf(ctx);
+		return -EBUSY;
+	}
+#endif
+
+	v->cpt_sigpending = cpt_sigset_export(&tsk->pending.signal);
+
+	if (dump_ptrace(tsk, v, ctx)) {
+		cpt_release_buf(ctx);
+		return -EBUSY;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	dump_kstack(tsk, ctx);
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	err = dump_registers(tsk, ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	if (err)
+		return err;
+
+	if (tsk_used_math(tsk)) {
+		cpt_push_object(&saved_obj, ctx);
+		dump_fpustate(tsk, ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+
+	err = dump_lastsiginfo(tsk, ctx);
+#ifndef CONFIG_UTRACE
+	if (tsk->last_siginfo) {
+		struct cpt_siginfo_image si;
+		cpt_push_object(&saved_obj, ctx);
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_LASTSIGINFO;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		if (encode_siginfo(&si, tsk->last_siginfo))
+			return -EINVAL;
+
+		ctx->write(&si, sizeof(si), ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+#endif
+
+	if (tsk->sas_ss_size) {
+		struct cpt_sigaltstack_image si;
+		cpt_push_object(&saved_obj, ctx);
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_SIGALTSTACK;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		si.cpt_stack = tsk->sas_ss_sp;
+		si.cpt_stacksize = tsk->sas_ss_size;
+
+		ctx->write(&si, sizeof(si), ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+	if (tsk->robust_list
+#ifdef CONFIG_COMPAT
+	    || tsk->compat_robust_list
+#endif
+	    ) {
+		struct cpt_task_aux_image ai;
+		cpt_push_object(&saved_obj, ctx);
+
+		ai.cpt_next = sizeof(ai);
+		ai.cpt_object = CPT_OBJ_TASK_AUX;
+		ai.cpt_hdrlen = sizeof(ai);
+		ai.cpt_content = CPT_CONTENT_VOID;
+
+		ai.cpt_robust_list = (unsigned long)tsk->robust_list;
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_COMPAT
+		if (tsk->thread_info->flags&_TIF_IA32)
+			ai.cpt_robust_list = (unsigned long)tsk->compat_robust_list;
+#endif
+#endif
+		ctx->write(&ai, sizeof(ai), ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+	dump_sigqueue(&tsk->pending, ctx);
+
+	last_thread = 1;
+	read_lock(&tasklist_lock);
+	do {
+		struct task_struct * next = next_thread(tsk);
+		if (next != tsk && !thread_group_leader(next))
+			last_thread = 0;
+	} while (0);
+	read_unlock(&tasklist_lock);
+
+	if (last_thread) {
+		struct task_struct *prev_tsk;
+		int err;
+		loff_t pos = ctx->file->f_pos;
+
+		cpt_push_object(&saved_obj, ctx);
+		err = dump_one_signal_struct(tg_obj, ctx);
+		cpt_pop_object(&saved_obj, ctx);
+		if (err)
+			return err;
+
+		prev_tsk = tsk;
+		for (;;) {
+			if (prev_tsk->tgid == tsk->tgid) {
+				loff_t tg_pos;
+
+				tg_pos = obj->o_pos + offsetof(struct cpt_task_image, cpt_signal);
+				ctx->pwrite(&pos, sizeof(pos), ctx, tg_pos);
+				if (thread_group_leader(prev_tsk))
+					break;
+			}
+
+			if (obj->o_list.prev == &ctx->object_array[CPT_OBJ_TASK]) {
+				eprintk_ctx("bug: thread group leader is lost\n");
+				return -EINVAL;
+			}
+
+			obj = list_entry(obj->o_list.prev, cpt_object_t, o_list);
+			prev_tsk = obj->o_obj;
+		}
+	}
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_tasks(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_TASKS);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		int err;
+
+		if ((err = dump_one_process(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_collect_signals(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	/* Collect process fd sets */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+		if (tsk->signal && !list_empty(&tsk->signal->posix_timers)) {
+			eprintk_ctx("task %d/%d(%s) uses posix timers\n", tsk->pid, virt_pid(tsk), tsk->comm);
+			return -EBUSY;
+		}
+		if (tsk->signal && cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx) == NULL)
+			return -ENOMEM;
+		if (tsk->sighand && cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx) == NULL)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+
+static int dump_one_sighand_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct sighand_struct *sig = obj->o_obj;
+	struct cpt_sighand_image *v = cpt_get_buf(ctx);
+	int i;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SIGHAND_STRUCT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	for (i=0; i< _NSIG; i++) {
+		if (sig->action[i].sa.sa_handler != SIG_DFL ||
+		    sig->action[i].sa.sa_flags) {
+			loff_t saved_obj;
+			struct cpt_sighandler_image *o = cpt_get_buf(ctx);
+
+			cpt_push_object(&saved_obj, ctx);
+			cpt_open_object(NULL, ctx);
+
+			o->cpt_next = CPT_NULL;
+			o->cpt_object = CPT_OBJ_SIGHANDLER;
+			o->cpt_hdrlen = sizeof(*o);
+			o->cpt_content = CPT_CONTENT_VOID;
+
+			o->cpt_signo = i;
+			o->cpt_handler = (unsigned long)sig->action[i].sa.sa_handler;
+			o->cpt_restorer = 0;
+#ifdef CONFIG_X86
+			o->cpt_restorer = (unsigned long)sig->action[i].sa.sa_restorer;
+#endif
+			o->cpt_flags = sig->action[i].sa.sa_flags;
+			memcpy(&o->cpt_mask, &sig->action[i].sa.sa_mask, 8);
+			ctx->write(o, sizeof(*o), ctx);
+			cpt_release_buf(ctx);
+			cpt_close_object(ctx);
+			cpt_pop_object(&saved_obj, ctx);
+		}
+	}
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_sighand(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_SIGHAND_STRUCT);
+
+	for_each_object(obj, CPT_OBJ_SIGHAND_STRUCT) {
+		int err;
+
+		if ((err = dump_one_sighand_struct(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_process.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_process.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_process.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_process.h	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,13 @@
+int cpt_collect_signals(cpt_context_t *);
+int cpt_dump_signal(struct cpt_context *);
+int cpt_dump_sighand(struct cpt_context *);
+int cpt_dump_tasks(struct cpt_context *);
+
+int rst_signal_complete(struct cpt_task_image *ti, int *exiting, struct cpt_context *ctx);
+__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int rst_restore_process(struct cpt_context *ctx);
+int rst_process_linkage(struct cpt_context *ctx);
+
+int check_task_state(struct task_struct *tsk, struct cpt_context *ctx);
+int cpt_skip_task(struct task_struct *tsk);
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_socket.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_socket.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_socket.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_socket.c	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,852 @@
+/*
+ *
+ *  kernel/cpt/cpt_socket.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <linux/tcp.h>
+#include <linux/mount.h>
+#include <linux/ve_nfs.h>
+#include <net/sock.h>
+#include <net/scm.h>
+#include <net/af_unix.h>
+#include <net/tcp.h>
+#include <net/netlink_sock.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+
+static int dump_rqueue(int owner, struct sock *sk, struct cpt_context *ctx);
+
+
+/* Sockets are quite different of another kinds of files.
+ * There is one simplification: only one struct file can refer to a socket,
+ * so we could store information about socket directly in section FILES as
+ * a description of a file and append f.e. array of not-yet-accepted
+ * connections of listening socket as array of auxiliary data.
+ *
+ * Complications are:
+ * 1. TCP sockets can be orphans. We have to relocate orphans as well,
+ *    so we have to create special section for orphans.
+ * 2. AF_UNIX sockets are distinguished objects: set of links between
+ *    AF_UNIX sockets is quite arbitrary.
+ *    A. Each socket can refers to many of files due to FD passing.
+ *    B. Each socket except for connected ones can have in queue skbs
+ *       sent by any of sockets.
+ *
+ *    2A is relatively easy: after our tasks are frozen we make an additional
+ *    recursive pass throgh set of collected files and get referenced to
+ *    FD passed files. After end of recursion, all the files are treated
+ *    in the same way. All they will be stored in section FILES.
+ *
+ *    2B. We have to resolve all those references at some point.
+ *    It is the place where pipe-like approach to image fails.
+ *
+ * All this makes socket checkpointing quite chumbersome.
+ * Right now we collect all the sockets and assign some numeric index value
+ * to each of them. The socket section is separate and put after section FILES,
+ * so section FILES refers to sockets by index, section SOCKET refers to FILES
+ * as usual by position in image. All the refs inside socket section are
+ * by index. When restoring we read socket section, create objects to hold
+ * mappings index <-> pos. At the second pass we open sockets (simultaneosly
+ * with their pairs) and create FILE objects.
+ */ 
+
+
+/* ====== FD passing ====== */
+
+/* Almost nobody does FD passing via AF_UNIX sockets, nevertheless we
+ * have to implement this. A problem is that in general case we receive
+ * skbs from an unknown context, so new files can arrive to checkpointed
+ * set of processes even after they are stopped. Well, we are going just
+ * to ignore unknown fds while doing real checkpointing. It is fair because
+ * links outside checkpointed set are going to fail anyway.
+ *
+ * ATTN: the procedure is recursive. We linearize the recursion adding
+ * newly found files to the end of file list, so they will be analyzed
+ * in the same loop.
+ */
+
+static int collect_one_passedfd(struct file *file, cpt_context_t * ctx)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct socket *sock;
+	struct sock *sk;
+	struct sk_buff *skb;
+
+	if (!S_ISSOCK(inode->i_mode))
+		return -ENOTSOCK;
+
+	sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
+
+	if (sock->ops->family != AF_UNIX)
+		return 0;
+
+	sk = sock->sk;
+
+	/* Subtle locking issue. skbs cannot be removed while
+	 * we are scanning, because all the processes are stopped.
+	 * They still can be added to tail of queue. Locking while
+	 * we dereference skb->next is enough to resolve this.
+	 * See above about collision with skbs added after we started
+	 * checkpointing.
+	 */
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		if (UNIXCB(skb).fp && skb->sk &&
+		    (!sock_flag(skb->sk, SOCK_DEAD) || unix_peer(sk) == skb->sk)) {
+			struct scm_fp_list *fpl = UNIXCB(skb).fp;
+			int i;
+
+			for (i = fpl->count-1; i >= 0; i--) {
+				if (cpt_object_add(CPT_OBJ_FILE, fpl->fp[i], ctx) == NULL)
+					return -ENOMEM;
+			}
+		}
+
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+
+	return 0;
+}
+
+int cpt_collect_passedfds(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
+			int err;
+
+			if ((err = collect_one_passedfd(file, ctx)) < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+/* ====== End of FD passing ====== */
+
+/* Must be called under bh_lock_sock() */
+
+void clear_backlog(struct sock *sk)
+{
+	struct sk_buff *skb = sk->sk_backlog.head;
+
+	sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
+	while (skb) {
+		struct sk_buff *next = skb->next;
+
+		skb->next = NULL;
+		kfree_skb(skb);
+		skb = next;
+	}
+}
+
+void release_sock_nobacklog(struct sock *sk)
+{
+	spin_lock_bh(&(sk->sk_lock.slock));
+	clear_backlog(sk);
+	sk->sk_lock.owner = NULL;
+        if (waitqueue_active(&(sk->sk_lock.wq)))
+		wake_up(&(sk->sk_lock.wq));
+	spin_unlock_bh(&(sk->sk_lock.slock));
+}
+
+int cpt_dump_skb(int type, int owner, struct sk_buff *skb,
+		 struct sock *sk, struct cpt_context *ctx)
+{
+	struct cpt_skb_image *v = cpt_get_buf(ctx);
+	loff_t saved_obj;
+	struct timeval tmptv;
+	int ret = 0;
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SKB;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_owner = owner;
+	v->cpt_queue = type;
+	skb_get_timestamp(skb, &tmptv);
+	v->cpt_stamp = cpt_timeval_export(&tmptv);
+	v->cpt_hspace = skb->data - skb->head;
+	v->cpt_tspace = skb->end - skb->tail;
+	v->cpt_h = skb->h.raw - skb->head;
+	v->cpt_nh = skb->nh.raw - skb->head;
+	v->cpt_mac = skb->mac.raw - skb->head;
+	memset(v->cpt_cb, 0, sizeof(v->cpt_cb));
+
+	/*
+	 * IPv6 enabled 'tcp_skb_cb' does not fit into 'cpt_skb_image.cb'.
+	 * 'ack_seq' is missing, but hopefully it is not needed while
+	 * skb is in queue.
+	 * BUILD_BUG_ON(sizeof(v->cpt_cb) < sizeof(skb->cb));
+	 */
+	 BUILD_BUG_ON(sizeof(v->cpt_cb) != 40);
+	 BUILD_BUG_ON(sizeof(struct inet_skb_parm) != 16);
+	 BUILD_BUG_ON(sizeof(struct inet6_skb_parm) != 24);
+	 BUILD_BUG_ON(sizeof(*TCP_SKB_CB(skb)) -
+		sizeof(TCP_SKB_CB(skb)->header) != 20);
+#if !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE)
+	if (sk->sk_protocol == IPPROTO_TCP) {
+		/* Save control block according to tcp_skb_cb with IPv6 */
+
+		/*
+		 * IPv6 enabled 'tcp_skb_cb' does not fit into 'cpt_skb_image.cb'.
+		 * BUILD_BUG_ON(sizeof(v->cpt_cb) - sizeof(struct inet6_skb_parm) <
+		 *	sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm));
+		 */
+		memcpy(v->cpt_cb, skb->cb, sizeof(struct inet_skb_parm));
+		memcpy((void *)v->cpt_cb + sizeof(struct inet6_skb_parm),
+		       skb->cb + sizeof(struct inet_skb_parm),
+		       min(sizeof(v->cpt_cb) - sizeof(struct inet6_skb_parm),
+		       sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm)));
+	} else
+#endif
+		memcpy(v->cpt_cb, skb->cb, sizeof(v->cpt_cb));
+	if (sizeof(skb->cb) > sizeof(v->cpt_cb)) {
+		int i;
+		for (i=sizeof(v->cpt_cb); i<sizeof(skb->cb); i++) {
+			if (skb->cb[i]) {
+				wprintk_ctx("dirty skb cb");
+				break;
+			}
+		}
+	}
+	v->cpt_len = skb->len;
+	v->cpt_mac_len = skb->mac_len;
+	v->cpt_csum = skb->csum;
+	v->cpt_local_df = skb->local_df;
+	v->cpt_pkt_type = skb->pkt_type;
+	v->cpt_ip_summed = skb->ip_summed;
+	v->cpt_priority = skb->priority;
+	v->cpt_protocol = skb->protocol;
+	v->cpt_security = 0;
+	v->cpt_gso_segs = skb_shinfo(skb)->gso_segs;
+	v->cpt_gso_size = skb_shinfo(skb)->gso_size;
+	if (skb_shinfo(skb)->gso_type) {
+		eprintk_ctx("skb ufo is not supported\n");
+		cpt_release_buf(ctx);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (skb->len + (skb->data - skb->head) > 0) {
+		struct cpt_obj_bits ob;
+		loff_t saved_obj2;
+
+		cpt_push_object(&saved_obj2, ctx);
+		cpt_open_object(NULL, ctx);
+		ob.cpt_next = CPT_NULL;
+		ob.cpt_object = CPT_OBJ_BITS;
+		ob.cpt_hdrlen = sizeof(ob);
+		ob.cpt_content = CPT_CONTENT_DATA;
+		ob.cpt_size = skb->len + v->cpt_hspace;
+
+		ctx->write(&ob, sizeof(ob), ctx);
+
+		ctx->write(skb->head, (skb->data-skb->head) + (skb->len-skb->data_len), ctx);
+		if (skb->data_len) {
+			int offset = skb->len - skb->data_len;
+			while (offset < skb->len) {
+				int copy = skb->len - offset;
+				if (copy > PAGE_SIZE)
+					copy = PAGE_SIZE;
+				(void)cpt_get_buf(ctx);
+				if (skb_copy_bits(skb, offset, ctx->tmpbuf, copy))
+					BUG();
+				ctx->write(ctx->tmpbuf, copy, ctx);
+				__cpt_release_buf(ctx);
+				offset += copy;
+			}
+		}
+
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj2, ctx);
+	}
+
+	if (skb->sk && skb->sk->sk_family == AF_UNIX) {
+		struct scm_fp_list *fpl = UNIXCB(skb).fp;
+
+		if (fpl) {
+			int i;
+
+			for (i = 0; i < fpl->count; i++) {
+				struct cpt_fd_image v;
+				cpt_object_t *obj;
+				loff_t saved_obj2;
+
+				obj = lookup_cpt_object(CPT_OBJ_FILE, fpl->fp[i], ctx);
+
+				if (!obj) {
+					eprintk_ctx("lost passed FD\n");
+					ret = -EINVAL;
+					goto out;
+				}
+
+				cpt_push_object(&saved_obj2, ctx);
+				cpt_open_object(NULL, ctx);
+				v.cpt_next = CPT_NULL;
+				v.cpt_object = CPT_OBJ_FILEDESC;
+				v.cpt_hdrlen = sizeof(v);
+				v.cpt_content = CPT_CONTENT_VOID;
+
+				v.cpt_fd = i;
+				v.cpt_file = obj->o_pos;
+				v.cpt_flags = 0;
+				ctx->write(&v, sizeof(v), ctx);
+				cpt_close_object(ctx);
+				cpt_pop_object(&saved_obj2, ctx);
+			}
+		}
+	}
+
+out:
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	return ret;
+}
+
+static int dump_rqueue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+	struct sock *sk_cache = NULL;
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		int err;
+
+		if (sk->sk_family == AF_UNIX) {
+			cpt_object_t *obj;
+			if (skb->sk != sk_cache) {
+				idx = -1;
+				sk_cache = NULL;
+				obj = lookup_cpt_object(CPT_OBJ_SOCKET, skb->sk, ctx);
+				if (obj) {
+					idx = obj->o_index;
+					sk_cache = skb->sk;
+				} else if (unix_peer(sk) != skb->sk)
+					goto next_skb;
+			}
+		}
+
+		err = cpt_dump_skb(CPT_SKB_RQ, idx, skb, sk, ctx);
+		if (err)
+			return err;
+
+next_skb:
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+	return 0;
+}
+
+static int dump_wqueue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+
+	skb = skb_peek(&sk->sk_write_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_write_queue) {
+		int err = cpt_dump_skb(CPT_SKB_WQ, idx, skb, sk, ctx);
+		if (err)
+			return err;
+
+		spin_lock_irq(&sk->sk_write_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_write_queue.lock);
+	}
+	return 0;
+}
+
+void cpt_dump_sock_attr(struct sock *sk, cpt_context_t *ctx)
+{
+	loff_t saved_obj;
+	if (sk->sk_filter) {
+		struct cpt_obj_bits v;
+
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_SKFILTER;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_DATA;
+		v.cpt_size = sk->sk_filter->len*sizeof(struct sock_filter);
+
+		ctx->write(&v, sizeof(v), ctx);
+		ctx->write(sk->sk_filter->insns, v.cpt_size, ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
+		cpt_push_object(&saved_obj, ctx);
+		cpt_dump_mcfilter(sk, ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+}
+
+static int cpt_dump_unix_mount(struct sock *sk, struct cpt_sock_image *v,
+		cpt_context_t *ctx)
+{
+	cpt_object_t *mntobj;
+
+	mntobj = cpt_lookup_vfsmount_obj(unix_sk(sk)->mnt, ctx);
+	if (mntobj == NULL) {
+		eprintk_ctx("can't get unix vfsmount\n");
+		return -EINVAL;
+	}
+
+	v->cpt_vfsmount_ref = mntobj->o_pos;
+	return 0;
+}
+
+static int cpt_dump_unix_socket(struct sock *sk, struct cpt_sock_image *v, cpt_context_t *ctx)
+{
+	if (unix_sk(sk)->dentry) {
+		struct dentry *d = unix_sk(sk)->dentry;
+
+		if (IS_ROOT(d) || !d_unhashed(d)) {
+			int err = 0;
+			char *path;
+			unsigned long pg = __get_free_page(GFP_KERNEL);
+
+			if (!pg)
+				return -ENOMEM;
+
+			path = d_path(d, unix_sk(sk)->mnt, (char *)pg, PAGE_SIZE);
+
+			if (!IS_ERR(path)) {
+				int len = strlen(path);
+				if (len < 126) {
+					strcpy(((char*)v->cpt_laddr)+2, path); 
+					v->cpt_laddrlen = len + 2;
+				} else {
+					wprintk_ctx("af_unix path is too long: %s (%s)\n", path, ((char*)v->cpt_laddr)+2);
+				}
+				if (cpt_need_delayfs(unix_sk(sk)->mnt))
+					v->cpt_sockflags |= CPT_SOCK_DELAYED;
+
+				v->cpt_i_mode = d->d_inode->i_mode & S_IALLUGO;
+
+				err = cpt_dump_unix_mount(sk, v, ctx);
+			} else {
+				eprintk_ctx("cannot get path of an af_unix socket\n");
+				err = PTR_ERR(path);
+			}
+			free_page(pg);
+			if (err)
+				return err;
+		} else
+			v->cpt_sockflags |= CPT_SOCK_DELETED;
+	}
+
+	/* If the socket is connected, find its peer. If peer is not
+	 * in our table, the socket is connected to external process
+	 * and we consider it disconnected.
+	 */
+	if (unix_peer(sk)) {
+		cpt_object_t *pobj;
+		pobj = lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(sk), ctx);
+		if (pobj)
+			v->cpt_peer = pobj->o_index;
+		else
+			v->cpt_shutdown = SHUTDOWN_MASK;
+
+		if (unix_peer(unix_peer(sk)) == sk)
+			v->cpt_socketpair = 1;
+	}
+
+	/* If the socket shares address with another socket it is
+	 * child of some listening socket. Find and record it. */
+	if (unix_sk(sk)->addr &&
+			atomic_read(&unix_sk(sk)->addr->refcnt) > 1 &&
+			sk->sk_state != TCP_LISTEN) {
+		cpt_object_t *pobj;
+		for_each_object(pobj, CPT_OBJ_SOCKET) {
+			struct sock *psk = pobj->o_obj;
+			if (psk->sk_family == AF_UNIX &&
+					psk->sk_state == TCP_LISTEN &&
+					unix_sk(psk)->addr == unix_sk(sk)->addr) {
+				v->cpt_parent = pobj->o_index;
+				break;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/* Dump socket content */
+
+int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx)
+{
+	struct cpt_sock_image *v = cpt_get_buf(ctx);
+	struct socket *sock;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SOCKET;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_file = CPT_NULL;
+	sock = sk->sk_socket;
+	if (sock && sock->file) {
+		cpt_object_t *tobj;
+		tobj = lookup_cpt_object(CPT_OBJ_FILE, sock->file, ctx);
+		if (tobj)
+			v->cpt_file = tobj->o_pos;
+	}
+	v->cpt_index = index;
+	v->cpt_parent = parent;
+
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
+		if (sock && !obj->o_lock) {
+			lockdep_off();
+			lock_sock(sk);
+			lockdep_on();
+			obj->o_lock = 1;
+		}
+	}
+
+	/* Some bits stored in inode */
+	v->cpt_ssflags = sock ? sock->flags : 0;
+	v->cpt_sstate = sock ? sock->state : 0;
+	v->cpt_passcred = sock ? test_bit(SOCK_PASSCRED, &sock->flags) : 0;
+
+	/* Common data */
+	v->cpt_family = sk->sk_family;
+	v->cpt_type = sk->sk_type;
+	v->cpt_state = sk->sk_state;
+	v->cpt_reuse = sk->sk_reuse;
+	v->cpt_zapped = sock_flag(sk, SOCK_ZAPPED);
+	v->cpt_shutdown = sk->sk_shutdown;
+	v->cpt_userlocks = sk->sk_userlocks;
+	v->cpt_no_check = sk->sk_no_check;
+	v->cpt_zapped = sock_flag(sk, SOCK_DBG);
+	v->cpt_rcvtstamp = sock_flag(sk, SOCK_RCVTSTAMP);
+	v->cpt_localroute = sock_flag(sk, SOCK_LOCALROUTE);
+	v->cpt_protocol = sk->sk_protocol;
+	v->cpt_err = sk->sk_err;
+	v->cpt_err_soft = sk->sk_err_soft;
+	v->cpt_max_ack_backlog = sk->sk_max_ack_backlog;
+	v->cpt_priority = sk->sk_priority;
+	v->cpt_rcvlowat = sk->sk_rcvlowat;
+	v->cpt_rcvtimeo = CPT_NULL;
+	if (sk->sk_rcvtimeo != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_rcvtimeo = sk->sk_rcvtimeo > INT_MAX ? INT_MAX : sk->sk_rcvtimeo;
+	v->cpt_sndtimeo = CPT_NULL;
+	if (sk->sk_sndtimeo != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_sndtimeo = sk->sk_sndtimeo > INT_MAX ? INT_MAX : sk->sk_sndtimeo;
+	v->cpt_rcvbuf = sk->sk_rcvbuf;
+	v->cpt_sndbuf = sk->sk_sndbuf;
+	v->cpt_bound_dev_if = sk->sk_bound_dev_if;
+	v->cpt_flags = sk->sk_flags;
+	v->cpt_lingertime = CPT_NULL;
+	if (sk->sk_lingertime != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_lingertime = sk->sk_lingertime > INT_MAX ? INT_MAX : sk->sk_lingertime;
+	v->cpt_peer_pid = sk->sk_peercred.pid;
+	v->cpt_peer_uid = sk->sk_peercred.uid;
+	v->cpt_peer_gid = sk->sk_peercred.gid;
+	v->cpt_stamp = cpt_timeval_export(&sk->sk_stamp);
+
+	v->cpt_peer = -1;
+	v->cpt_socketpair = 0;
+	v->cpt_sockflags = 0;
+
+	v->cpt_laddrlen = 0;
+	if (sock) {
+		int alen = sizeof(v->cpt_laddr);
+		int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_laddr, &alen, 0);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		v->cpt_laddrlen = alen;
+	}
+	v->cpt_raddrlen = 0;
+	if (sock) {
+		int alen = sizeof(v->cpt_raddr);
+		int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_raddr, &alen, 2);
+		if (!err)
+			v->cpt_raddrlen = alen;
+	}
+
+	if (sk->sk_family == AF_UNIX) {
+		int err;
+		
+		err = cpt_dump_unix_socket(sk, v, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+	}
+
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+		cpt_dump_socket_in(v, sk, ctx);
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_dump_sock_attr(sk, ctx);
+
+	dump_rqueue(index, sk, ctx);
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
+		dump_wqueue(index, sk, ctx);
+		cpt_dump_ofo_queue(index, sk, ctx);
+	}
+
+	if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+	    && sk->sk_state == TCP_LISTEN)
+		cpt_dump_synwait_queue(sk, index, ctx);
+
+	cpt_close_object(ctx);
+
+	if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+	    && sk->sk_state == TCP_LISTEN)
+		cpt_dump_accept_queue(sk, index, ctx);
+
+	return 0;
+}
+
+int cpt_dump_orphaned_sockets(struct cpt_context *ctx)
+{
+	int i;
+
+	cpt_open_section(ctx, CPT_SECT_ORPHANS);
+
+	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
+		struct sock *sk;
+		struct hlist_node *node;
+
+retry:
+		read_lock_bh(&tcp_hashinfo.ehash[i].lock);
+		sk_for_each(sk, node, &tcp_hashinfo.ehash[i].chain) {
+
+			if (sk->owner_env != get_exec_env())
+				continue;
+			if (sk->sk_socket)
+				continue;
+			if (!sock_flag(sk, SOCK_DEAD))
+				continue;
+			if (lookup_cpt_object(CPT_OBJ_SOCKET, sk, ctx))
+				continue;
+			sock_hold(sk);
+			read_unlock_bh(&tcp_hashinfo.ehash[i].lock);
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			if (sock_owned_by_user(sk))
+				eprintk_ctx("BUG: sk locked by whom?\n");
+			sk->sk_lock.owner = (void *)1;
+			bh_unlock_sock(sk);
+			local_bh_enable();
+
+			cpt_dump_socket(NULL, sk, -1, -1, ctx);
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			sk->sk_lock.owner = NULL;
+			clear_backlog(sk);
+			tcp_done(sk);
+			bh_unlock_sock(sk);
+			local_bh_enable();
+			sock_put(sk);
+
+			goto retry;
+		}
+		read_unlock_bh(&tcp_hashinfo.ehash[i].lock);
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int can_dump(struct sock *sk, cpt_context_t *ctx)
+{
+	switch (sk->sk_family) {
+	case AF_NETLINK:
+		if (((struct netlink_sock *)sk)->cb) {
+			eprintk_ctx("netlink socket has active callback\n");
+			return 0;
+		}
+		break;
+	}
+	return 1;
+}
+
+/* We are not going to block suspend when we have external AF_UNIX connections.
+ * But we cannot stop feed of new packets/connections to our environment
+ * from outside. Taking into account that it is intrincically unreliable,
+ * we collect some amount of data, but when checkpointing/restoring we
+ * are going to drop everything, which does not make sense: skbs sent
+ * by outside processes, connections from outside etc. etc.
+ */
+
+/* The first pass. When we see socket referenced by a file, we just
+ * add it to socket table */
+int cpt_collect_socket(struct file *file, cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	struct socket *sock;
+	struct sock *sk;
+
+	if (!S_ISSOCK(file->f_dentry->d_inode->i_mode))
+		return -ENOTSOCK;
+	sock = &container_of(file->f_dentry->d_inode, struct socket_alloc, vfs_inode)->socket;
+	sk = sock->sk;
+	if (!can_dump(sk, ctx))
+		return -EAGAIN;
+	if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sk, ctx)) == NULL)
+		return -ENOMEM;
+	obj->o_parent = file;
+
+	return 0;
+}
+
+/*
+ * We should end with table containing:
+ *  * all sockets opened by our processes in the table.
+ *  * all the sockets queued in listening queues on _our_ listening sockets,
+ *    which are connected to our opened sockets.
+ */
+
+static int collect_one_unix_listening_sock(cpt_object_t *obj, cpt_context_t * ctx)
+{
+	struct sock *sk = obj->o_obj;
+	cpt_object_t *cobj;
+	struct sk_buff *skb;
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		struct sock *lsk = skb->sk;
+		if (unix_peer(lsk) &&
+		    lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(lsk), ctx)) {
+			if ((cobj = cpt_object_add(CPT_OBJ_SOCKET, lsk, ctx)) == NULL)
+				return -ENOMEM;
+			cobj->o_parent = obj->o_parent;
+		}
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+
+	return 0;
+}
+
+int cpt_index_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	unsigned long index = 0;
+
+	/* Collect not-yet-accepted children of listening sockets. */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+
+		if (sk->sk_state != TCP_LISTEN)
+			continue;
+
+		if (sk->sk_family == AF_UNIX)
+			collect_one_unix_listening_sock(obj, ctx);
+	}
+
+	/* Assign indices to all the sockets. */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if (sk->sk_socket && sk->sk_socket->file) {
+			cpt_object_t *tobj;
+			tobj = lookup_cpt_object(CPT_OBJ_FILE, sk->sk_socket->file, ctx);
+			if (tobj)
+				cpt_obj_setindex(tobj, obj->o_index, ctx);
+		}
+	}
+
+	return 0;
+}
+
+void cpt_unlock_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	lockdep_off();
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk && obj->o_lock) {
+			if (sk->sk_socket)
+				release_sock(sk);
+		}
+	}
+	lockdep_on();
+}
+
+void cpt_kill_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk && obj->o_lock) {
+			struct ve_struct *old_env;
+			old_env = set_exec_env(sk->owner_env);
+			cpt_kill_socket(sk, ctx);
+			if (sk->sk_socket)
+				release_sock_nobacklog(sk);
+			set_exec_env(old_env);
+		}
+	}
+}
+
+__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx)
+{
+	struct fasync_struct *fa;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct socket *sock;
+
+	sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
+
+	for (fa = sock->fasync_list; fa; fa = fa->fa_next) {
+		if (fa->fa_file == file)
+			return fa->fa_fd;
+	}
+	return -1;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_socket.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_socket.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_socket.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_socket.h	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,51 @@
+struct sock;
+
+int cpt_collect_passedfds(cpt_context_t *);
+int cpt_index_sockets(cpt_context_t *);
+int cpt_collect_socket(struct file *, cpt_context_t *);
+int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx);
+int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx);
+int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx);
+int rst_sockets(struct cpt_context *ctx);
+int rst_sockets_complete(struct cpt_context *ctx);
+int cpt_dump_orphaned_sockets(struct cpt_context *ctx);
+
+int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx);
+struct sk_buff * rst_skb(struct sock *sk, loff_t *pos_p, __u32 *owner,
+			 __u32 *queue, struct cpt_context *ctx);
+
+void cpt_unlock_sockets(cpt_context_t *);
+void cpt_kill_sockets(cpt_context_t *);
+
+
+int cpt_kill_socket(struct sock *, cpt_context_t *);
+int cpt_dump_socket_in(struct cpt_sock_image *, struct sock *, struct cpt_context*);
+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *, struct cpt_context *ctx);
+int rst_listen_socket_in(struct sock *sk, struct cpt_sock_image *si,
+			 loff_t pos, struct cpt_context *ctx);
+__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx);
+int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *);
+int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si, loff_t pos, struct cpt_context *ctx);
+int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx);
+int cpt_dump_skb(int type, int owner, struct sk_buff *skb, struct sock *sk,
+		 struct cpt_context *ctx);
+int cpt_dump_mcfilter(struct sock *sk, struct cpt_context *ctx);
+
+int rst_sk_mcfilter_in(struct sock *sk, struct cpt_sockmc_image *v,
+		       loff_t pos, cpt_context_t *ctx);
+int rst_sk_mcfilter_in6(struct sock *sk, struct cpt_sockmc_image *v,
+			loff_t pos, cpt_context_t *ctx);
+
+int rst_delay_unix_bind(struct sock *,
+			struct cpt_sock_image *, cpt_context_t *);
+
+struct unix_bind_info {
+	struct sock *sk;
+	char path[128];
+	int path_off;
+	umode_t i_mode;
+	struct unix_bind_info *next;
+};
+
+int rebind_unix_socket(struct vfsmount *rmnt, struct unix_bind_info *bii,
+			int flags);
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_socket_in.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_socket_in.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_socket_in.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_socket_in.c	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,450 @@
+/*
+ *
+ *  kernel/cpt/cpt_socket_in.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/igmp.h>
+#include <linux/ipv6.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+static inline __u32 jiffies_export(unsigned long tmo)
+{
+	__s32 delta = (long)(tmo - jiffies);
+	return delta;
+}
+
+static inline __u32 tcp_jiffies_export(__u32 tmo)
+{
+	__s32 delta = tmo - tcp_time_stamp;
+	return delta;
+}
+
+int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+	struct tcp_sock *tp;
+
+	if (sk->sk_type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP)
+		return 0;
+
+	tp = tcp_sk(sk);
+
+	skb = skb_peek(&tp->out_of_order_queue);
+	while (skb && skb != (struct sk_buff*)&tp->out_of_order_queue) {
+		int err;
+
+		err = cpt_dump_skb(CPT_SKB_OFOQ, idx, skb, sk, ctx);
+		if (err)
+			return err;
+
+		spin_lock_irq(&tp->out_of_order_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&tp->out_of_order_queue.lock);
+	}
+	return 0;
+}
+
+static inline int sk_ipv6_mapped(struct sock *sk)
+{
+	const struct inet_connection_sock_af_ops *ops;
+
+	ops = inet_csk(sk)->icsk_af_ops;
+
+	BUILD_BUG_ON(sizeof(struct iphdr) == sizeof(struct ipv6hdr));
+	BUILD_BUG_ON(sizeof(struct sockaddr_in) == sizeof(struct sockaddr_in6));
+
+	return sk->sk_family == AF_INET6 &&
+		ops->net_header_len == sizeof(struct iphdr) &&
+		ops->sockaddr_len == sizeof(struct sockaddr_in6);
+}
+
+static int cpt_dump_socket_tcp(struct cpt_sock_image *si, struct sock *sk,
+			       struct cpt_context *ctx)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	si->cpt_pred_flags = tp->pred_flags;
+	si->cpt_rcv_nxt = tp->rcv_nxt;
+	si->cpt_snd_nxt = tp->snd_nxt;
+	si->cpt_snd_una = tp->snd_una;
+	si->cpt_snd_sml = tp->snd_sml;
+	si->cpt_rcv_tstamp = tcp_jiffies_export(tp->rcv_tstamp);
+	si->cpt_lsndtime = tcp_jiffies_export(tp->lsndtime);
+	si->cpt_tcp_header_len = tp->tcp_header_len;
+	si->cpt_ack_pending = inet_csk(sk)->icsk_ack.pending;
+	si->cpt_quick = inet_csk(sk)->icsk_ack.quick;
+	si->cpt_pingpong = inet_csk(sk)->icsk_ack.pingpong;
+	si->cpt_blocked = inet_csk(sk)->icsk_ack.blocked;
+	si->cpt_ato = inet_csk(sk)->icsk_ack.ato;
+	si->cpt_ack_timeout = jiffies_export(inet_csk(sk)->icsk_ack.timeout);
+	si->cpt_lrcvtime = tcp_jiffies_export(inet_csk(sk)->icsk_ack.lrcvtime);
+	si->cpt_last_seg_size = inet_csk(sk)->icsk_ack.last_seg_size;
+	si->cpt_rcv_mss = inet_csk(sk)->icsk_ack.rcv_mss;
+	si->cpt_snd_wl1 = tp->snd_wl1;
+	si->cpt_snd_wnd = tp->snd_wnd;
+	si->cpt_max_window = tp->max_window;
+	si->cpt_pmtu_cookie = inet_csk(sk)->icsk_pmtu_cookie;
+	si->cpt_mss_cache = tp->mss_cache;
+	si->cpt_mss_cache_std = tp->mss_cache; /* FIXMW was tp->mss_cache_std */
+	si->cpt_mss_clamp = tp->rx_opt.mss_clamp;
+	si->cpt_ext_header_len = inet_csk(sk)->icsk_ext_hdr_len;
+	si->cpt_ext2_header_len = 0;
+	si->cpt_ca_state = inet_csk(sk)->icsk_ca_state;
+	si->cpt_retransmits = inet_csk(sk)->icsk_retransmits;
+	si->cpt_reordering = tp->reordering;
+	si->cpt_frto_counter = tp->frto_counter;
+	si->cpt_frto_highmark = tp->frto_highmark;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+	// // si->cpt_adv_cong = tp->adv_cong;
+#endif
+	si->cpt_defer_accept = inet_csk(sk)->icsk_accept_queue.rskq_defer_accept;
+	si->cpt_backoff = inet_csk(sk)->icsk_backoff;
+	si->cpt_srtt = tp->srtt;
+	si->cpt_mdev = tp->mdev;
+	si->cpt_mdev_max = tp->mdev_max;
+	si->cpt_rttvar = tp->rttvar;
+	si->cpt_rtt_seq = tp->rtt_seq;
+	si->cpt_rto = inet_csk(sk)->icsk_rto;
+	si->cpt_packets_out = tp->packets_out;
+	si->cpt_left_out = tp->left_out;
+	si->cpt_retrans_out = tp->retrans_out;
+	si->cpt_lost_out = tp->lost_out;
+	si->cpt_sacked_out = tp->sacked_out;
+	si->cpt_fackets_out = tp->fackets_out;
+	si->cpt_snd_ssthresh = tp->snd_ssthresh;
+	si->cpt_snd_cwnd = tp->snd_cwnd;
+	si->cpt_snd_cwnd_cnt = tp->snd_cwnd_cnt;
+	si->cpt_snd_cwnd_clamp = tp->snd_cwnd_clamp;
+	si->cpt_snd_cwnd_used = tp->snd_cwnd_used;
+	si->cpt_snd_cwnd_stamp = tcp_jiffies_export(tp->snd_cwnd_stamp);
+	si->cpt_timeout = jiffies_export(inet_csk(sk)->icsk_timeout);
+	si->cpt_ka_timeout = 0;
+	si->cpt_rcv_wnd = tp->rcv_wnd;
+	si->cpt_rcv_wup = tp->rcv_wup;
+	si->cpt_write_seq = tp->write_seq;
+	si->cpt_pushed_seq = tp->pushed_seq;
+	si->cpt_copied_seq = tp->copied_seq;
+	si->cpt_tstamp_ok = tp->rx_opt.tstamp_ok;
+	si->cpt_wscale_ok = tp->rx_opt.wscale_ok;
+	si->cpt_sack_ok = tp->rx_opt.sack_ok;
+	si->cpt_saw_tstamp = tp->rx_opt.saw_tstamp;
+	si->cpt_snd_wscale = tp->rx_opt.snd_wscale;
+	si->cpt_rcv_wscale = tp->rx_opt.rcv_wscale;
+	si->cpt_nonagle = tp->nonagle;
+	si->cpt_keepalive_probes = tp->keepalive_probes;
+	si->cpt_rcv_tsval = tp->rx_opt.rcv_tsval;
+	si->cpt_rcv_tsecr = tp->rx_opt.rcv_tsecr;
+	si->cpt_ts_recent = tp->rx_opt.ts_recent;
+	si->cpt_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+	si->cpt_user_mss = tp->rx_opt.user_mss;
+	si->cpt_dsack = tp->rx_opt.dsack;
+	si->cpt_eff_sacks = tp->rx_opt.eff_sacks;
+	si->cpt_sack_array[0] = tp->duplicate_sack[0].start_seq;
+	si->cpt_sack_array[1] = tp->duplicate_sack[0].end_seq;
+	si->cpt_sack_array[2] = tp->selective_acks[0].start_seq;
+	si->cpt_sack_array[3] = tp->selective_acks[0].end_seq;
+	si->cpt_sack_array[4] = tp->selective_acks[1].start_seq;
+	si->cpt_sack_array[5] = tp->selective_acks[1].end_seq;
+	si->cpt_sack_array[6] = tp->selective_acks[2].start_seq;
+	si->cpt_sack_array[7] = tp->selective_acks[2].end_seq;
+	si->cpt_sack_array[8] = tp->selective_acks[3].start_seq;
+	si->cpt_sack_array[9] = tp->selective_acks[3].end_seq;
+	si->cpt_window_clamp = tp->window_clamp;
+	si->cpt_rcv_ssthresh = tp->rcv_ssthresh;
+	si->cpt_probes_out = inet_csk(sk)->icsk_probes_out;
+	si->cpt_num_sacks = tp->rx_opt.num_sacks;
+	si->cpt_advmss = tp->advmss;
+	si->cpt_syn_retries = inet_csk(sk)->icsk_syn_retries;
+	si->cpt_ecn_flags = tp->ecn_flags;
+	si->cpt_prior_ssthresh = tp->prior_ssthresh;
+	si->cpt_high_seq = tp->high_seq;
+	si->cpt_retrans_stamp = tp->retrans_stamp;
+	si->cpt_undo_marker = tp->undo_marker;
+	si->cpt_undo_retrans = tp->undo_retrans;
+	si->cpt_urg_seq = tp->urg_seq;
+	si->cpt_urg_data = tp->urg_data;
+	si->cpt_pending = inet_csk(sk)->icsk_pending;
+	si->cpt_urg_mode = tp->urg_mode;
+	si->cpt_snd_up = tp->snd_up;
+	si->cpt_keepalive_time = tp->keepalive_time;
+	si->cpt_keepalive_intvl = tp->keepalive_intvl;
+	si->cpt_linger2 = tp->linger2;
+
+	if (sk->sk_state != TCP_LISTEN &&
+	    sk->sk_state != TCP_CLOSE &&
+	    sock_flag(sk, SOCK_KEEPOPEN)) {
+		si->cpt_ka_timeout = jiffies_export(sk->sk_timer.expires);
+	}
+
+	if (sk_ipv6_mapped(sk))
+		si->cpt_mapped = 1;
+	return 0;
+}
+
+
+int cpt_dump_socket_in(struct cpt_sock_image *si, struct sock *sk,
+		       struct cpt_context *ctx)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	if (sk->sk_family == AF_INET) {
+		struct sockaddr_in *sin = ((struct sockaddr_in*)si->cpt_laddr);
+		sin->sin_family = AF_INET;
+		sin->sin_port = inet->sport;
+		sin->sin_addr.s_addr = inet->rcv_saddr;
+		si->cpt_laddrlen = sizeof(*sin);
+	} else if (sk->sk_family == AF_INET6) {
+		struct sockaddr_in6 *sin6 = ((struct sockaddr_in6*)si->cpt_laddr);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = inet->sport;
+		memcpy(&sin6->sin6_addr, &np->rcv_saddr, 16);
+		si->cpt_laddrlen = sizeof(*sin6);
+	}
+	if (!inet->num)
+		si->cpt_laddrlen = 0;
+
+	si->cpt_daddr = inet->daddr;
+	si->cpt_dport = inet->dport;
+	si->cpt_saddr = inet->saddr;
+	si->cpt_rcv_saddr = inet->rcv_saddr;
+	si->cpt_sport = inet->sport;
+	si->cpt_uc_ttl = inet->uc_ttl;
+	si->cpt_tos = inet->tos;
+	si->cpt_cmsg_flags = inet->cmsg_flags;
+	si->cpt_mc_index = inet->mc_index;
+	si->cpt_mc_addr = inet->mc_addr;
+	si->cpt_hdrincl = inet->hdrincl;
+	si->cpt_mc_ttl = inet->mc_ttl;
+	si->cpt_mc_loop = inet->mc_loop;
+	si->cpt_pmtudisc = inet->pmtudisc;
+	si->cpt_recverr = inet->recverr;
+	si->cpt_freebind = inet->freebind;
+	si->cpt_idcounter = inet->id;
+
+	si->cpt_cork_flags = inet->cork.flags;
+	si->cpt_cork_fragsize = 0;
+	si->cpt_cork_length = inet->cork.length;
+	si->cpt_cork_addr = inet->cork.addr;
+	si->cpt_cork_saddr = inet->cork.fl.fl4_src;
+	si->cpt_cork_daddr = inet->cork.fl.fl4_dst;
+	si->cpt_cork_oif = inet->cork.fl.oif;
+	if (inet->cork.rt) {
+		si->cpt_cork_fragsize = inet->cork.fragsize;
+		si->cpt_cork_saddr = inet->cork.rt->fl.fl4_src;
+		si->cpt_cork_daddr = inet->cork.rt->fl.fl4_dst;
+		si->cpt_cork_oif = inet->cork.rt->fl.oif;
+	}
+
+	if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
+		struct udp_sock *up = udp_sk(sk);
+		si->cpt_udp_pending  = up->pending;
+		si->cpt_udp_corkflag  = up->corkflag;
+		si->cpt_udp_encap  = up->encap_type;
+		si->cpt_udp_len  = up->len;
+	}
+
+	if (sk->sk_family == AF_INET6) {
+		memcpy(si->cpt_saddr6, &np->saddr, 16);
+		memcpy(si->cpt_rcv_saddr6, &np->rcv_saddr, 16);
+		memcpy(si->cpt_daddr6, &np->daddr, 16);
+		si->cpt_flow_label6 = np->flow_label;
+		si->cpt_frag_size6 = np->frag_size;
+		si->cpt_hop_limit6 = np->hop_limit;
+		si->cpt_mcast_hops6 = np->mcast_hops;
+		si->cpt_mcast_oif6 = np->mcast_oif;
+		si->cpt_rxopt6 = np->rxopt.all;
+		si->cpt_mc_loop6 = np->mc_loop;
+		si->cpt_recverr6 = np->recverr;
+		si->cpt_sndflow6 = np->sndflow;
+		si->cpt_pmtudisc6 = np->pmtudisc;
+		si->cpt_ipv6only6 = np->ipv6only;
+		si->cpt_mapped = 0;
+	}
+
+	if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
+		cpt_dump_socket_tcp(si, sk, ctx);
+
+	return 0;
+}
+
+int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx)
+{
+	struct request_sock *req;
+
+	for (req=inet_csk(sk)->icsk_accept_queue.rskq_accept_head; req; req=req->dl_next)
+		cpt_dump_socket(NULL, req->sk, -1, index, ctx);
+	return 0;
+}
+
+
+static int dump_openreq(struct request_sock *req, struct sock *sk, int index,
+			struct cpt_context *ctx)
+{
+	struct cpt_openreq_image *v = cpt_get_buf(ctx);
+
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_OPENREQ;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_rcv_isn = tcp_rsk(req)->rcv_isn;
+	v->cpt_snt_isn = tcp_rsk(req)->snt_isn;
+	v->cpt_rmt_port = inet_rsk(req)->rmt_port;
+	v->cpt_mss = req->mss;
+	v->cpt_family = req->rsk_ops->family;
+	v->cpt_retrans = req->retrans;
+	v->cpt_snd_wscale = inet_rsk(req)->snd_wscale;
+	v->cpt_rcv_wscale = inet_rsk(req)->rcv_wscale;
+	v->cpt_tstamp_ok = inet_rsk(req)->tstamp_ok;
+	v->cpt_sack_ok = inet_rsk(req)->sack_ok;
+	v->cpt_wscale_ok = inet_rsk(req)->wscale_ok;
+	v->cpt_ecn_ok = inet_rsk(req)->ecn_ok;
+	v->cpt_acked = inet_rsk(req)->acked;
+	v->cpt_window_clamp = req->window_clamp;
+	v->cpt_rcv_wnd = req->rcv_wnd;
+	v->cpt_ts_recent = req->ts_recent;
+	v->cpt_expires = jiffies_export(req->expires);
+
+	if (v->cpt_family == AF_INET) {
+		memcpy(v->cpt_loc_addr, &inet_rsk(req)->loc_addr, 4);
+		memcpy(v->cpt_rmt_addr, &inet_rsk(req)->rmt_addr, 4);
+	} else {
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		memcpy(v->cpt_loc_addr, &inet6_rsk(req)->loc_addr, 16);
+		memcpy(v->cpt_rmt_addr, &inet6_rsk(req)->rmt_addr, 16);
+		v->cpt_iif = inet6_rsk(req)->iif;
+#endif
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx)
+{
+	struct listen_sock *lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+	struct request_sock *req;
+	int i;
+
+	for (i=0; i<TCP_SYNQ_HSIZE; i++) {
+		for (req=lopt->syn_table[i]; req; req=req->dl_next) {
+			loff_t saved_obj;
+			cpt_push_object(&saved_obj, ctx);
+			dump_openreq(req, sk, index, ctx);
+			cpt_pop_object(&saved_obj, ctx);
+		}
+	}
+	return 0;
+}
+
+
+int cpt_kill_socket(struct sock *sk, cpt_context_t * ctx)
+{
+	if (sk->sk_state != TCP_CLOSE &&
+	    (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
+	    sk->sk_protocol == IPPROTO_TCP) {
+		if (sk->sk_state != TCP_LISTEN)
+			tcp_set_state(sk, TCP_CLOSE);
+		else
+			sk->sk_prot->disconnect(sk, 0);
+	}
+	return 0;
+}
+
+int cpt_dump_mcfilter(struct sock *sk, cpt_context_t *ctx)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_mc_socklist *iml;
+
+	for (iml = inet->mc_list; iml; iml = iml->next) {
+		struct cpt_sockmc_image smi;
+		int scnt = 0;
+		int i;
+
+		if (iml->sflist)
+			scnt = iml->sflist->sl_count*16;
+
+		smi.cpt_next = sizeof(smi) + scnt;
+		smi.cpt_object = CPT_OBJ_SOCK_MCADDR;
+		smi.cpt_hdrlen = sizeof(smi);
+		smi.cpt_content = CPT_CONTENT_DATA;
+
+		smi.cpt_family = AF_INET;
+		smi.cpt_mode = iml->sfmode;
+		smi.cpt_ifindex = iml->multi.imr_ifindex;
+		memset(&smi.cpt_mcaddr, 0, sizeof(smi.cpt_mcaddr));
+		smi.cpt_mcaddr[0] = iml->multi.imr_multiaddr.s_addr;
+
+		ctx->write(&smi, sizeof(smi), ctx);
+
+		for (i = 0; i < scnt; i++) {
+			u32 addr[4];
+			memset(&addr, 0, sizeof(addr));
+			addr[0] = iml->sflist->sl_addr[i];
+			ctx->write(&addr, sizeof(addr), ctx);
+		}
+	}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_mc_socklist *mcl;
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		for (mcl = np->ipv6_mc_list; mcl; mcl = mcl->next) {
+			struct cpt_sockmc_image smi;
+			int scnt = 0;
+			int i;
+
+			if (mcl->sflist)
+				scnt = mcl->sflist->sl_count*16;
+
+			smi.cpt_next = sizeof(smi) + scnt;
+			smi.cpt_object = CPT_OBJ_SOCK_MCADDR;
+			smi.cpt_hdrlen = sizeof(smi);
+			smi.cpt_content = CPT_CONTENT_DATA;
+
+			smi.cpt_family = AF_INET6;
+			smi.cpt_mode = mcl->sfmode;
+			smi.cpt_ifindex = mcl->ifindex;
+			memcpy(&smi.cpt_mcaddr, &mcl->addr, sizeof(smi.cpt_mcaddr));
+
+			ctx->write(&smi, sizeof(smi), ctx);
+			for (i = 0; i < scnt; i++)
+				ctx->write(&mcl->sflist->sl_addr[i], 16, ctx);
+		}
+	}
+#endif
+	return 0;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_syscalls.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_syscalls.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_syscalls.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_syscalls.h	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,100 @@
+#include <linux/unistd.h>
+#include <linux/syscalls.h>
+#include <asm/uaccess.h>
+
+#define WRAP(c, args) return sys_##c args
+#define WRAP2(c, args) int err; mm_segment_t oldfs; \
+	               oldfs = get_fs(); set_fs(KERNEL_DS); \
+                       err = sys_##c args ;\
+                       set_fs(oldfs); \
+                       return err
+
+static inline int sc_close(int fd)
+{
+	WRAP(close, (fd));
+}
+
+static inline int sc_dup2(int fd1, int fd2)
+{
+	WRAP(dup2, (fd1, fd2));
+}
+
+static inline int sc_unlink(char *name)
+{
+	WRAP2(unlink, (name));
+}
+
+static inline int sc_pipe(int *pfd)
+{
+	return do_pipe(pfd);
+}
+
+static inline int sc_mknod(char *name, int mode, int dev)
+{
+	WRAP2(mknod, (name, mode, dev));
+}
+
+static inline int sc_chmod(char *name, int mode)
+{
+	WRAP2(chmod, (name, mode));
+}
+
+static inline int sc_chown(char *name, int uid, int gid)
+{
+	WRAP2(chown, (name, uid, gid));
+}
+
+static inline int sc_mkdir(char *name, int mode)
+{
+	WRAP2(mkdir, (name, mode));
+}
+
+static inline int sc_rmdir(char *name)
+{
+	WRAP2(rmdir, (name));
+}
+
+static inline int sc_mount(char *mntdev, char *mntpnt, char *type, unsigned long flags)
+{
+	WRAP2(mount, (mntdev ? : "none", mntpnt, type, flags, NULL));
+}
+
+static inline int sc_mprotect(unsigned long start, size_t len,
+			      unsigned long prot)
+{
+	WRAP(mprotect, (start, len, prot));
+}
+
+static inline int sc_mlock(unsigned long start, size_t len)
+{
+	WRAP(mlock, (start, len));
+}
+
+static inline int sc_munlock(unsigned long start, size_t len)
+{
+	WRAP(munlock, (start, len));
+}
+
+static inline int sc_remap_file_pages(unsigned long start, size_t len,
+				      unsigned long prot, unsigned long pgoff,
+				      unsigned long flags)
+{
+	WRAP(remap_file_pages, (start, len, prot, pgoff, flags));
+}
+
+static inline int sc_waitx(int pid, int opt, int *stat_addr)
+{
+	WRAP(wait4, (pid, stat_addr, opt, NULL));
+}
+
+static inline int sc_flock(int fd, int flags)
+{
+	WRAP(flock, (fd, flags));
+}
+
+static inline int sc_open(char* path, int flags, int mode)
+{
+	WRAP(open, (path, flags, mode));
+}
+
+extern int sc_execve(char *cms, char **argv, char **env);
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_sysvipc.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_sysvipc.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_sysvipc.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_sysvipc.c	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,404 @@
+/*
+ *
+ *  kernel/cpt/cpt_sysvipc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+
+struct _warg {
+		struct file			*file;
+		struct cpt_sysvshm_image	*v;
+};
+
+static int dump_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	struct _warg *warg = arg;
+	struct cpt_sysvshm_image *v = (struct cpt_sysvshm_image *)warg->v;
+
+	if (shp->shm_file != warg->file)
+		return 0;
+
+	v->cpt_key = shp->shm_perm.key;
+	v->cpt_uid = shp->shm_perm.uid;
+	v->cpt_gid = shp->shm_perm.gid;
+	v->cpt_cuid = shp->shm_perm.cuid;
+	v->cpt_cgid = shp->shm_perm.cgid;
+	v->cpt_mode = shp->shm_perm.mode;
+	v->cpt_seq = shp->shm_perm.seq;
+
+	v->cpt_id = shp->id;
+	v->cpt_segsz = shp->shm_segsz;
+	v->cpt_atime = shp->shm_atim;
+	v->cpt_ctime = shp->shm_ctim;
+	v->cpt_dtime = shp->shm_dtim;
+	v->cpt_creator = shp->shm_cprid;
+	v->cpt_last = shp->shm_lprid;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+	v->cpt_mlockuser = shp->mlock_user ? shp->mlock_user->uid : -1;
+#else
+	v->cpt_mlockuser = -1;
+#endif
+	return 1;
+}
+
+int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx)
+{
+	struct cpt_sysvshm_image *v = cpt_get_buf(ctx);
+	struct _warg warg;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_SYSV_SHM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	warg.file = file;
+	warg.v = v;
+	if (sysvipc_walk_shm(dump_one_shm, &warg) == 0) {
+		cpt_release_buf(ctx);
+		return -ESRCH;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+
+int match_sem(int id, struct sem_array *sema, void *arg)
+{
+	if (id != (unsigned long)arg)
+		return 0;
+	return sema->sem_nsems + 1;
+}
+
+static int get_sem_nsem(int id, cpt_context_t *ctx)
+{
+	int res;
+	res = sysvipc_walk_sem(match_sem, (void*)(unsigned long)id);
+	if (res > 0)
+		return res - 1;
+	eprintk_ctx("get_sem_nsem: SYSV semaphore %d not found\n", id);
+	return -ESRCH;
+}
+
+static int dump_one_semundo(struct sem_undo *su, struct cpt_context *ctx)
+{
+	struct cpt_sysvsem_undo_image v;
+	loff_t saved_obj;
+
+	cpt_open_object(NULL, ctx);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_SYSVSEM_UNDO_REC;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_SEMUNDO;
+	v.cpt_id = su->semid;
+	v.cpt_nsem = get_sem_nsem(su->semid, ctx);
+	if ((int)v.cpt_nsem < 0)
+		return -ESRCH;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	ctx->write(su->semadj, v.cpt_nsem*sizeof(short), ctx);
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+struct sem_warg {
+	int				last_id;
+	struct cpt_sysvsem_image	*v;
+};
+
+static int dump_one_sem(int id, struct sem_array *sma, void *arg)
+{
+	struct sem_warg * warg = (struct sem_warg *)arg;
+	struct cpt_sysvsem_image *v = warg->v;
+	int i;
+
+	if (warg->last_id != -1) {
+		if ((id % IPCMNI) <= warg->last_id)
+			return 0;
+	}
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_SYSV_SEM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_SEMARRAY;
+
+	v->cpt_key = sma->sem_perm.key;
+	v->cpt_uid = sma->sem_perm.uid;
+	v->cpt_gid = sma->sem_perm.gid;
+	v->cpt_cuid = sma->sem_perm.cuid;
+	v->cpt_cgid = sma->sem_perm.cgid;
+	v->cpt_mode = sma->sem_perm.mode;
+	v->cpt_seq = sma->sem_perm.seq;
+
+	v->cpt_id = id;
+	v->cpt_ctime = sma->sem_ctime;
+	v->cpt_otime = sma->sem_otime;
+
+	for (i=0; i<sma->sem_nsems; i++) {
+		struct {
+			__u32 semval;
+			__u32 sempid;
+		} *s = (void*)v + v->cpt_next;
+		if (v->cpt_next >= PAGE_SIZE - sizeof(*s))
+			return -EINVAL;
+		s->semval = sma->sem_base[i].semval;
+		s->sempid = sma->sem_base[i].sempid;
+		v->cpt_next += sizeof(*s);
+	}
+
+	warg->last_id = id % IPCMNI;
+	return 1;
+}
+
+
+int cpt_dump_sysvsem(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	struct sem_warg warg;
+
+	/* Dumping semaphores is quite tricky because we cannot
+	 * write to dump file under lock inside sysvipc_walk_sem().
+	 */
+	cpt_open_section(ctx, CPT_SECT_SYSV_SEM);
+	warg.last_id = -1;
+	warg.v = cpt_get_buf(ctx);
+	for (;;) {
+		if (sysvipc_walk_sem(dump_one_sem, &warg) <= 0)
+			break;
+		ctx->write(warg.v, warg.v->cpt_next, ctx);
+	}
+	cpt_release_buf(ctx);
+	cpt_close_section(ctx);
+
+	cpt_open_section(ctx, CPT_SECT_SYSVSEM_UNDO);
+	for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
+		struct sem_undo_list *semu = obj->o_obj;
+		struct sem_undo *su;
+		struct cpt_object_hdr v;
+		loff_t saved_obj;
+
+		cpt_open_object(obj, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_SYSVSEM_UNDO;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_ARRAY;
+
+		ctx->write(&v, sizeof(v), ctx);
+
+		cpt_push_object(&saved_obj, ctx);
+		for (su = semu->proc_list; su; su = su->proc_next) {
+			if (su->semid != -1) {
+				int err;
+				err = dump_one_semundo(su, ctx);
+				if (err < 0)
+					return err;
+			}
+		}
+		cpt_pop_object(&saved_obj, ctx);
+
+		cpt_close_object(ctx);
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+struct msg_warg {
+	int				last_id;
+	struct msg_queue		*msq;
+	struct cpt_sysvmsg_image	*v;
+};
+
+static int dump_one_msg(int id, struct msg_queue *msq, void *arg)
+{
+	struct msg_warg * warg = (struct msg_warg *)arg;
+	struct cpt_sysvmsg_image *v = warg->v;
+
+	if (warg->last_id != -1) {
+		if ((id % IPCMNI) <= warg->last_id)
+			return 0;
+	}
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_SYSVMSG;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_key = msq->q_perm.key;
+	v->cpt_uid = msq->q_perm.uid;
+	v->cpt_gid = msq->q_perm.gid;
+	v->cpt_cuid = msq->q_perm.cuid;
+	v->cpt_cgid = msq->q_perm.cgid;
+	v->cpt_mode = msq->q_perm.mode;
+	v->cpt_seq = msq->q_perm.seq;
+
+	v->cpt_id = id;
+	v->cpt_stime = msq->q_stime;
+	v->cpt_rtime = msq->q_rtime;
+	v->cpt_ctime = msq->q_ctime;
+	v->cpt_last_sender = msq->q_lspid;
+	v->cpt_last_receiver = msq->q_lrpid;
+	v->cpt_qbytes = msq->q_qbytes;
+
+	warg->msq = msq;
+	warg->last_id = id % IPCMNI;
+	return 1;
+}
+
+static int do_store(void * src, int len, int offset, void * data)
+{
+	cpt_context_t * ctx = data;
+	ctx->write(src, len, ctx);
+	return 0;
+}
+
+static void cpt_dump_one_sysvmsg(struct msg_msg *m, cpt_context_t * ctx)
+{
+	loff_t saved_obj;
+	struct cpt_sysvmsg_msg_image mv;
+			
+	cpt_open_object(NULL, ctx);
+	mv.cpt_next = CPT_NULL;
+	mv.cpt_object = CPT_OBJ_SYSVMSG_MSG;
+	mv.cpt_hdrlen = sizeof(mv);
+	mv.cpt_content = CPT_CONTENT_DATA;
+
+	mv.cpt_type = m->m_type;
+	mv.cpt_size = m->m_ts;
+
+	ctx->write(&mv, sizeof(mv), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	sysv_msg_store(m, do_store, m->m_ts, ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	cpt_close_object(ctx);
+}
+
+int cpt_dump_sysvmsg(struct cpt_context *ctx)
+{
+	struct msg_warg warg;
+
+	/* Dumping msg queues is tricky because we cannot
+	 * write to dump file under lock inside sysvipc_walk_msg().
+	 *
+	 * And even worse, we have to access msg list in an unserialized
+	 * context. It is fragile. But VE is still frozen, remember?
+	 */
+	cpt_open_section(ctx, CPT_SECT_SYSV_MSG);
+	warg.last_id = -1;
+	warg.v = cpt_get_buf(ctx);
+	for (;;) {
+		loff_t saved_obj;
+		struct msg_msg * m;
+
+		if (sysvipc_walk_msg(dump_one_msg, &warg) <= 0)
+			break;
+
+		cpt_open_object(NULL, ctx);
+
+		ctx->write(warg.v, warg.v->cpt_next, ctx);
+
+		cpt_push_object(&saved_obj, ctx);
+		list_for_each_entry(m, &warg.msq->q_messages, m_list) {
+			cpt_dump_one_sysvmsg(m, ctx);
+		}
+		cpt_pop_object(&saved_obj, ctx);
+
+		cpt_close_object(ctx);
+	}
+	cpt_release_buf(ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int cpt_collect_sysvsem_undo(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+		if (tsk->exit_state) {
+			/* ipc/sem.c forgets to clear tsk->sysvsem.undo_list
+			 * on exit. Grrr... */
+			continue;
+		}
+		if (tsk->sysvsem.undo_list &&
+		    cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
+		struct sem_undo_list *semu = obj->o_obj;
+
+		if (atomic_read(&semu->refcnt) != obj->o_count) {
+			eprintk_ctx("sem_undo_list is referenced outside %d %d\n", obj->o_count, atomic_read(&semu->refcnt));
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+static int collect_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	cpt_context_t *ctx = arg;
+
+	if (__cpt_object_add(CPT_OBJ_FILE, shp->shm_file, GFP_ATOMIC, ctx) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+int cpt_collect_sysvshm(cpt_context_t * ctx)
+{
+	int err;
+
+	err = sysvipc_walk_shm(collect_one_shm, ctx);
+
+	return err < 0 ? err : 0;
+}
+
+int cpt_collect_sysv(cpt_context_t * ctx)
+{
+	int err;
+
+	err = cpt_collect_sysvsem_undo(ctx);
+	if (err)
+		return err;
+	err = cpt_collect_sysvshm(ctx);
+	if (err)
+		return err;
+
+	return 0;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_tty.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_tty.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_tty.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_tty.c	2017-01-13 08:40:25.000000000 -0500
@@ -0,0 +1,216 @@
+/*
+ *
+ *  kernel/cpt/cpt_tty.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/tty.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+/* We must support at least N_TTY. */
+
+int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx)
+{
+	struct tty_struct *tty = file->private_data;
+	cpt_object_t *obj;
+	struct cpt_obj_ref o;
+	loff_t saved_pos;
+
+	obj = lookup_cpt_object(CPT_OBJ_TTY, tty, ctx);
+	if (!obj)
+		return -EINVAL;
+
+	cpt_push_object(&saved_pos, ctx);
+
+	o.cpt_next = sizeof(o);
+	o.cpt_object = CPT_OBJ_REF;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_VOID;
+	o.cpt_pos = obj->o_pos;
+	ctx->write(&o, sizeof(o), ctx);
+
+	cpt_pop_object(&saved_pos, ctx);
+
+	return 0;
+}
+
+int cpt_collect_tty(struct file *file, cpt_context_t * ctx)
+{
+	struct tty_struct *tty = file->private_data;
+
+	if (tty) {
+		if (cpt_object_add(CPT_OBJ_TTY, tty, ctx) == NULL)
+			return -ENOMEM;
+		if (tty->link) {
+			cpt_object_t *obj;
+
+			obj = cpt_object_add(CPT_OBJ_TTY, tty->link, ctx);
+			if (obj == NULL)
+				return -ENOMEM;
+			/* Undo o_count, tty->link is not a reference */
+			obj->o_count--;
+		}
+	}
+	return 0;
+}
+
+int cpt_dump_tty(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct tty_struct *tty = obj->o_obj;
+	struct cpt_tty_image *v;
+
+	if (tty->link) {
+		if (lookup_cpt_object(CPT_OBJ_TTY, tty->link, ctx) == NULL) {
+			eprintk_ctx("orphan pty %s %d\n", tty->name, tty->driver->subtype == PTY_TYPE_SLAVE);
+			return -EINVAL;
+		}
+		if (tty->link->link != tty) {
+			eprintk_ctx("bad pty pair\n");
+			return -EINVAL;
+		}
+		if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+		    tty->driver->subtype == PTY_TYPE_SLAVE &&
+		    tty->link->count)
+			obj->o_count++;
+	}
+	if (obj->o_count != tty->count) {
+		eprintk_ctx("tty %s is referenced outside %d %d\n", tty->name, obj->o_count, tty->count);
+		return -EBUSY;
+	}
+
+	cpt_open_object(obj, ctx);
+
+	v = cpt_get_buf(ctx);
+	v->cpt_next = -1;
+	v->cpt_object = CPT_OBJ_TTY;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_index = tty->index;
+	v->cpt_link = -1;
+	if (tty->link)
+		v->cpt_link = tty->link->index;
+	v->cpt_drv_type = tty->driver->type;
+	v->cpt_drv_subtype = tty->driver->subtype;
+	v->cpt_drv_flags = tty->driver->flags;
+	v->cpt_packet = tty->packet;
+	v->cpt_stopped = tty->stopped;
+	v->cpt_hw_stopped = tty->hw_stopped;
+	v->cpt_flow_stopped = tty->flow_stopped;
+	v->cpt_flags = tty->flags;
+	v->cpt_ctrl_status = tty->ctrl_status;
+	v->cpt_canon_data = tty->canon_data;
+	v->cpt_canon_head = tty->canon_head - tty->read_tail;
+	v->cpt_canon_column = tty->canon_column;
+	v->cpt_column = tty->column;
+	v->cpt_erasing = tty->erasing;
+	v->cpt_lnext = tty->lnext;
+	v->cpt_icanon = tty->icanon;
+	v->cpt_raw = tty->raw;
+	v->cpt_real_raw = tty->real_raw;
+	v->cpt_closing = tty->closing;
+	v->cpt_minimum_to_wake = tty->minimum_to_wake;
+	v->cpt_pgrp = 0;
+	if (tty->pgrp > 0) {
+		v->cpt_pgrp = _pid_to_vpid(tty->pgrp);
+		if ((int)v->cpt_pgrp < 0) {
+			dprintk_ctx("cannot map tty->pgrp %d -> %d\n", tty->pgrp, (int)v->cpt_pgrp);
+			v->cpt_pgrp = -1;
+		}
+	}
+	v->cpt_session = 0;
+	if (tty->session > 0) {
+		v->cpt_session = _pid_to_vpid(tty->session);
+		if ((int)v->cpt_session < 0) {
+			eprintk_ctx("cannot map tty->session %d -> %d\n", tty->session, (int)v->cpt_session);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	memcpy(v->cpt_name, tty->name, 64);
+	v->cpt_ws_row = tty->winsize.ws_row;
+	v->cpt_ws_col = tty->winsize.ws_col;
+	v->cpt_ws_prow = tty->winsize.ws_ypixel;
+	v->cpt_ws_pcol = tty->winsize.ws_xpixel;
+	if (tty->termios == NULL) {
+		eprintk_ctx("NULL termios");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_c_line = tty->termios->c_line;
+	v->cpt_c_iflag = tty->termios->c_iflag;
+	v->cpt_c_oflag = tty->termios->c_oflag;
+	v->cpt_c_cflag = tty->termios->c_cflag;
+	v->cpt_c_lflag = tty->termios->c_lflag;
+	memcpy(v->cpt_c_cc, tty->termios->c_cc, NCCS);
+	if (NCCS < 32)
+		memset(v->cpt_c_cc + NCCS, 255, 32 - NCCS);
+	memcpy(v->cpt_read_flags, tty->read_flags, sizeof(v->cpt_read_flags));
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (tty->read_buf && tty->read_cnt) {
+		struct cpt_obj_bits *v = cpt_get_buf(ctx);
+		loff_t saved_pos;
+
+		cpt_push_object(&saved_pos, ctx);
+		cpt_open_object(NULL, ctx);
+		v->cpt_next = CPT_NULL;
+		v->cpt_object = CPT_OBJ_BITS;
+		v->cpt_hdrlen = sizeof(*v);
+		v->cpt_content = CPT_CONTENT_DATA;
+		v->cpt_size = tty->read_cnt;
+		ctx->write(v, sizeof(*v), ctx);
+		cpt_release_buf(ctx);
+
+		if (tty->read_cnt) {
+			int n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
+			ctx->write(tty->read_buf + tty->read_tail, n, ctx);
+			if (tty->read_cnt > n)
+				ctx->write(tty->read_buf, tty->read_cnt-n, ctx);
+			ctx->align(ctx);
+		}
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_pos, ctx);
+	}
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx)
+{
+	struct tty_struct * tty;
+	struct fasync_struct *fa;
+
+	tty = (struct tty_struct *)file->private_data;
+
+	for (fa = tty->fasync; fa; fa = fa->fa_next) {
+		if (fa->fa_file == file)
+			return fa->fa_fd;
+	}
+	return -1;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_ubc.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_ubc.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_ubc.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_ubc.c	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,134 @@
+/*
+ *
+ *  kernel/cpt/cpt_ubc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/types.h>
+#include <ub/beancounter.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = cpt_object_add(CPT_OBJ_UBC, bc, ctx);
+	if (obj != NULL) {
+		if (obj->o_count == 1)
+			get_beancounter(bc);
+		if (bc->parent != NULL && obj->o_parent == NULL)
+			obj->o_parent = cpt_add_ubc(bc->parent, ctx);
+	}
+	return obj;
+}
+
+__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(CPT_OBJ_UBC, bc, ctx);
+	if (obj == NULL) {
+		char buf[48];
+		print_ub_uid(bc, buf, sizeof(buf));
+		eprintk("CPT: unknown ub %s (%p)\n", buf, bc);
+		dump_stack();
+		return CPT_NULL;
+	}
+	return obj->o_pos;
+}
+
+static void dump_one_bc_parm(struct cpt_ubparm *dmp, struct ubparm *prm,
+		int held)
+{
+	dmp->barrier = (prm->barrier < UB_MAXVALUE ? prm->barrier : CPT_NULL);
+	dmp->limit = (prm->limit < UB_MAXVALUE ? prm->limit : CPT_NULL);
+	dmp->held = (held ? prm->held : CPT_NULL);
+	dmp->maxheld = prm->maxheld;
+	dmp->minheld = prm->minheld;
+	dmp->failcnt = prm->failcnt;
+}
+
+static int dump_one_bc(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct user_beancounter *bc;
+	struct cpt_beancounter_image *v;
+	int i;
+
+	bc = obj->o_obj;
+	v = cpt_get_buf(ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_UBC;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	if (obj->o_parent != NULL)
+		v->cpt_parent = ((cpt_object_t *)obj->o_parent)->o_pos;
+	else
+		v->cpt_parent = CPT_NULL;
+	v->cpt_id = (obj->o_parent != NULL) ? bc->ub_uid : 0;
+	v->cpt_ub_resources = UB_RESOURCES;
+	BUILD_BUG_ON(ARRAY_SIZE(v->cpt_parms) < UB_RESOURCES * 2);
+	for (i = 0; i < UB_RESOURCES; i++) {
+		dump_one_bc_parm(v->cpt_parms + i * 2, bc->ub_parms + i, 0);
+		dump_one_bc_parm(v->cpt_parms + i * 2 + 1, bc->ub_store + i, 1);
+	}
+	memset(v->cpt_parms + UB_RESOURCES * 2, 0,
+			sizeof(v->cpt_parms)
+				- UB_RESOURCES * 2 * sizeof(v->cpt_parms[0]));
+
+	cpt_open_object(obj, ctx);
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_close_object(ctx);
+
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_dump_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	int skipped;
+	int top;
+
+	cpt_open_section(ctx, CPT_SECT_UBC);
+
+	do {
+		skipped = 0;
+		top = 0;
+		for_each_object(obj, CPT_OBJ_UBC) {
+			if (obj->o_parent == NULL)
+				top++;
+			if (obj->o_pos != CPT_NULL)
+				continue;
+			if (obj->o_parent != NULL &&
+			    ((cpt_object_t *)obj->o_parent)->o_pos == CPT_NULL)
+				skipped++;
+			else
+				dump_one_bc(obj, ctx);
+		}
+	} while (skipped && (top < 2));
+
+	cpt_close_section(ctx);
+	if (top > 1) {
+		eprintk_ctx("More than one top level ub exist");
+		return -EINVAL;
+	}
+		
+	return 0;
+}
+
+void cpt_finish_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_UBC)
+		put_beancounter(obj->o_obj);
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_ubc.h kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_ubc.h
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_ubc.h	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_ubc.h	2017-01-13 08:40:25.000000000 -0500
@@ -0,0 +1,23 @@
+#ifdef CONFIG_USER_RESOURCE
+cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
+__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
+int cpt_dump_ubc(struct cpt_context *ctx);
+
+struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx);
+int rst_undump_ubc(struct cpt_context *ctx);
+
+void cpt_finish_ubc(struct cpt_context *ctx);
+void rst_finish_ubc(struct cpt_context *ctx);
+void copy_one_ubparm(struct ubparm *from, struct ubparm *to, int bc_parm_id);
+void set_one_ubparm_to_max(struct ubparm *ubprm, int bc_parm_id);
+#else
+static int inline cpt_dump_ubc(struct cpt_context *ctx)
+{ return 0; }
+static int inline rst_undump_ubc(struct cpt_context *ctx)
+{ return 0; }
+static void inline cpt_finish_ubc(struct cpt_context *ctx)
+{ return; }
+static void inline rst_finish_ubc(struct cpt_context *ctx)
+{ return; }
+#endif
+
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_x8664.S kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_x8664.S
--- kernel-2.6.18-417.el5.orig/kernel/cpt/cpt_x8664.S	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/cpt_x8664.S	2017-01-13 08:40:25.000000000 -0500
@@ -0,0 +1,69 @@
+#define ASSEMBLY 1
+#include <linux/config.h>
+
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/dwarf2.h>
+#include <asm/calling.h>
+#include <asm/msr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/hw_irq.h>
+#include <asm/errno.h>
+
+	.code64
+
+	.macro FAKE_STACK_FRAME child_rip
+	/* push in order ss, rsp, eflags, cs, rip */
+	xorq %rax, %rax
+	pushq %rax /* ss */
+	pushq %rax /* rsp */
+	pushq $(1<<9) /* eflags - interrupts on */
+	pushq $__KERNEL_CS /* cs */
+	pushq \child_rip /* rip */
+	pushq	%rax /* orig rax */
+	.endm
+
+	.macro UNFAKE_STACK_FRAME
+	addq $8*6, %rsp
+	.endm
+
+ENTRY(asm_kernel_thread)
+	CFI_STARTPROC
+	FAKE_STACK_FRAME $child_rip
+	SAVE_ALL
+
+	# rdi: flags, rsi: usp, rdx: will be &pt_regs
+	movq %rdx,%rdi
+	orq  $0x00800000,%rdi
+	movq $-1, %rsi
+	movq %rsp, %rdx
+
+	xorl %r8d,%r8d
+	xorl %r9d,%r9d
+	pushq %rcx
+	call do_fork_pid
+	addq $8, %rsp
+	/* call do_fork */
+	movq %rax,RAX(%rsp)
+	xorl %edi,%edi
+	RESTORE_ALL
+	UNFAKE_STACK_FRAME
+	ret
+	CFI_ENDPROC
+ENDPROC(asm_kernel_thread)
+
+child_rip:
+	pushq $0		# fake return address
+	CFI_STARTPROC
+	movq %rdi, %rax
+	movq %rsi, %rdi
+	call *%rax
+	movq %rax, %rdi
+	call do_exit
+	CFI_ENDPROC
+ENDPROC(child_rip)
+
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/Makefile kernel-2.6.18-417.el5-028stab121/kernel/cpt/Makefile
--- kernel-2.6.18-417.el5.orig/kernel/cpt/Makefile	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/Makefile	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,56 @@
+#
+#
+#  kernel/cpt/Makefile
+#
+#  Copyright (C) 2000-2005  SWsoft
+#  All rights reserved.
+#
+#  Licensing governed by "linux/COPYING.SWsoft" file.
+
+obj-$(CONFIG_VZ_CHECKPOINT) += vzcpt.o vzrst.o
+
+vzcpt-objs := cpt_proc.o cpt_dump.o cpt_obj.o cpt_context.o cpt_process.o \
+	cpt_mm.o cpt_files.o cpt_kernel.o \
+	cpt_socket.o cpt_socket_in.o cpt_tty.o cpt_sysvipc.o cpt_net.o \
+	cpt_conntrack.o cpt_epoll.o
+
+vzrst-objs := rst_proc.o rst_undump.o rst_context.o rst_process.o \
+	rst_mm.o rst_files.o \
+	rst_socket.o rst_socket_in.o rst_tty.o rst_sysvipc.o rst_net.o \
+	rst_conntrack.o rst_epoll.o rst_delayfs.o
+
+ifeq ($(CONFIG_USER_RESOURCE), y)
+vzcpt-objs += cpt_ubc.o
+endif
+
+ifeq ($(CONFIG_USER_RESOURCE), y)
+vzrst-objs += rst_ubc.o
+endif
+
+ifeq ($(CONFIG_INOTIFY_USER), y)
+vzcpt-objs += cpt_inotify.o
+vzrst-objs += rst_inotify.o
+endif
+
+vzrst-objs += cpt_exports.o
+
+ifeq ($(CONFIG_VZ_CHECKPOINT), m)
+vzrst-objs += cpt_obj.o cpt_kernel.o
+endif
+
+ifeq ($(CONFIG_VZ_CHECKPOINT_ITER), y)
+vzcpt-objs += cpt_iterative.o
+vzrst-objs += rst_iterative.o
+endif
+
+ifeq ($(CONFIG_VZ_CHECKPOINT_LAZY), y)
+vzcpt-objs += cpt_pagein.o
+vzrst-objs += rst_pagein.o
+endif
+
+ifeq ($(CONFIG_X86_64), y)
+vzcpt-objs += cpt_x8664.o
+ifeq ($(CONFIG_VZ_CHECKPOINT), m)
+vzrst-objs += cpt_x8664.o
+endif
+endif
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_conntrack.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_conntrack.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_conntrack.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_conntrack.c	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,330 @@
+/*
+ *
+ *  kernel/cpt/rst_conntrack.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/unistd.h>
+#include <linux/ve.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+
+#define ASSERT_READ_LOCK(x) do { } while (0)
+#define ASSERT_WRITE_LOCK(x) do { } while (0)
+
+#include <linux/netfilter_ipv4/listhelp.h>
+
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+struct ct_holder
+{
+	struct ct_holder *next;
+	struct ip_conntrack *ct;
+	int index;
+};
+
+static int decode_tuple(struct cpt_ipct_tuple *v,
+			 struct ip_conntrack_tuple *tuple, int dir,
+			 cpt_context_t *ctx)
+{
+	tuple->dst.ip = v->cpt_dst;
+	tuple->dst.u.all = v->cpt_dstport;
+	if (ctx->image_version < CPT_VERSION_16) {
+		/* In 2.6.9 kernel protonum has short type */
+		__u16 protonum = *(__u16 *)&v->cpt_protonum;
+		if (protonum > 0xff && protonum < 0xffff) {
+			eprintk_ctx("tuple: protonum > 255: %u\n", protonum);
+			return -EINVAL;
+		}
+		tuple->dst.protonum = protonum;
+		tuple->dst.dir = dir;
+	} else {
+		tuple->dst.protonum = v->cpt_protonum;
+		tuple->dst.dir = v->cpt_dir;
+		if (dir != tuple->dst.dir) {
+			eprintk_ctx("dir != tuple->dst.dir\n");
+			return -EINVAL;
+		}
+	}
+
+	tuple->src.ip = v->cpt_src;
+	tuple->src.u.all = v->cpt_srcport;
+	return 0;
+}
+
+
+static int undump_expect_list(struct ip_conntrack *ct,
+			      struct cpt_ip_conntrack_image *ci,
+			      loff_t pos, struct ct_holder *ct_list,
+			      cpt_context_t *ctx)
+{
+	loff_t end;
+	int err;
+
+	end = pos + ci->cpt_next;
+	pos += ci->cpt_hdrlen;
+	while (pos < end) {
+		struct cpt_ip_connexpect_image v;
+		struct ip_conntrack_expect *exp;
+		struct ip_conntrack *sibling;
+
+		err = rst_get_object(CPT_OBJ_NET_CONNTRACK_EXPECT, pos, &v, ctx);
+		if (err)
+			return err;
+
+		sibling = NULL;
+		if (v.cpt_sibling_conntrack) {
+			struct ct_holder *c;
+
+			for (c = ct_list; c; c = c->next) {
+				if (c->index == v.cpt_sibling_conntrack) {
+					sibling = c->ct;
+					break;
+				}
+			}
+			if (!sibling) {
+				eprintk_ctx("lost sibling of expectation\n");
+				return -EINVAL;
+			}
+		}
+
+		write_lock_bh(&ip_conntrack_lock);
+
+		/* It is possible. Helper module could be just unregistered,
+		 * if expectation were on the list, it would be destroyed. */
+		if (ct->helper == NULL) {
+			write_unlock_bh(&ip_conntrack_lock);
+			dprintk_ctx("conntrack: no helper and non-trivial expectation\n");
+			continue;
+		}
+
+		exp = ip_conntrack_expect_alloc(NULL);
+		if (exp == NULL) {
+			write_unlock_bh(&ip_conntrack_lock);
+			return -ENOMEM;
+		}
+
+		if (decode_tuple(&v.cpt_tuple, &exp->tuple, 0, ctx) ||
+		    decode_tuple(&v.cpt_mask, &exp->mask, 0, ctx)) {
+			ip_conntrack_expect_put(exp);
+			write_unlock_bh(&ip_conntrack_lock);
+			return -EINVAL;
+		}
+
+		exp->master = ct;
+		ip_conntrack_expect_insert(exp);
+#if 0
+		if (sibling) {
+			exp->sibling = sibling;
+			sibling->master = exp;
+			LIST_DELETE(&ve_ip_conntrack_expect_list, exp);
+			ct->expecting--;
+			nf_conntrack_get(&master_ct(sibling)->infos[0]);
+		} else
+#endif
+		if (ct->helper->timeout) {
+			mod_timer(&exp->timeout, jiffies + v.cpt_timeout);
+		}
+		write_unlock_bh(&ip_conntrack_lock);
+
+		ip_conntrack_expect_put(exp);
+
+		pos += v.cpt_next;
+	}
+	return 0;
+}
+
+static int undump_one_ct(struct cpt_ip_conntrack_image *ci, loff_t pos,
+			 struct ct_holder **ct_list, cpt_context_t *ctx)
+{
+	int err = 0;
+	struct ip_conntrack *conntrack;
+	struct ct_holder *c;
+	struct ip_conntrack_tuple orig, repl;
+
+	c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
+	if (c == NULL)
+		return -ENOMEM;
+
+	if (decode_tuple(&ci->cpt_tuple[0], &orig, 0, ctx) ||
+	    decode_tuple(&ci->cpt_tuple[1], &repl, 1, ctx)) {
+		kfree(c);
+		return -EINVAL;
+	}
+
+	conntrack = ip_conntrack_alloc(&orig, &repl, get_exec_env()->_ip_conntrack->ub);
+	if (!conntrack || IS_ERR(conntrack)) {
+		kfree(c);
+		return -ENOMEM;
+	}
+
+	c->ct = conntrack;
+	c->next = *ct_list;
+	*ct_list = c;
+	c->index = ci->cpt_index;
+
+	conntrack->status = ci->cpt_status;
+
+	memcpy(&conntrack->proto, ci->cpt_proto_data, sizeof(conntrack->proto));
+	memcpy(&conntrack->help, ci->cpt_help_data, sizeof(conntrack->help));
+
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	conntrack->mark = ci->cpt_mark;
+#endif
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+	conntrack->nat.masq_index = ci->cpt_masq_index;
+#endif
+	if (ci->cpt_initialized) {
+		conntrack->nat.info.seq[0].correction_pos = ci->cpt_nat_seq[0].cpt_correction_pos;
+		conntrack->nat.info.seq[0].offset_before = ci->cpt_nat_seq[0].cpt_offset_before;
+		conntrack->nat.info.seq[0].offset_after = ci->cpt_nat_seq[0].cpt_offset_after;
+		conntrack->nat.info.seq[1].correction_pos = ci->cpt_nat_seq[1].cpt_correction_pos;
+		conntrack->nat.info.seq[1].offset_before = ci->cpt_nat_seq[1].cpt_offset_before;
+		conntrack->nat.info.seq[1].offset_after = ci->cpt_nat_seq[1].cpt_offset_after;
+	}
+	if (conntrack->status & IPS_NAT_DONE_MASK)
+		ip_nat_hash_conntrack(conntrack);
+#endif
+
+	if (ci->cpt_ct_helper) {
+		conntrack->helper = ip_conntrack_helper_find_get(&conntrack->tuplehash[1].tuple);
+		if (conntrack->helper == NULL) {
+			eprintk_ctx("conntrack: cannot find helper, some module is not loaded\n");
+			err = -EINVAL;
+		}
+	}
+
+	ip_conntrack_hash_insert(conntrack);
+	conntrack->timeout.expires = jiffies + ci->cpt_timeout;
+
+	if (err == 0 && ci->cpt_next > ci->cpt_hdrlen)
+		err = undump_expect_list(conntrack, ci, pos, *ct_list, ctx);
+
+	if (conntrack->helper)
+		ip_conntrack_helper_put(conntrack->helper);
+
+	return err;
+}
+
+static void convert_conntrack_image(struct cpt_ip_conntrack_image *ci)
+{
+	struct cpt_ip_conntrack_image_compat img;
+
+	memcpy(&img, ci, sizeof(struct cpt_ip_conntrack_image_compat));
+	/* 
+	 * Size of cpt_help_data in 2.6.9 kernel is 16 bytes,
+	 * in 2.6.18 cpt_help_data size is 24 bytes, so zero the rest 8 bytes
+	 */
+	memset(ci->cpt_help_data + 4, 0, 8);
+	ci->cpt_initialized = img.cpt_initialized;
+	ci->cpt_num_manips = img.cpt_num_manips;
+	memcpy(ci->cpt_nat_manips, img.cpt_nat_manips, sizeof(img.cpt_nat_manips));
+	memcpy(ci->cpt_nat_seq, img.cpt_nat_seq, sizeof(img.cpt_nat_seq));
+	ci->cpt_masq_index = img.cpt_masq_index;
+	/* Id will be assigned in ip_conntrack_hash_insert(), so make it 0 here */
+	ci->cpt_id = 0;
+	/* mark was not supported in 2.6.9, so set it to default 0 value */
+	ci->cpt_mark = 0;
+
+}
+
+int rst_restore_ip_conntrack(struct cpt_context * ctx)
+{
+	int err = 0;
+	loff_t sec = ctx->sections[CPT_SECT_NET_CONNTRACK];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_ip_conntrack_image ci;
+	struct ct_holder *c;
+	struct ct_holder *ct_list = NULL;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	if (sizeof(ci.cpt_proto_data) != sizeof(union ip_conntrack_proto)) {
+		eprintk_ctx("conntrack module ct->proto version mismatch\n");
+		return -EINVAL;
+	}
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_CONNTRACK || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		err = rst_get_object(CPT_OBJ_NET_CONNTRACK, sec, &ci, ctx);
+		if (err)
+			break;
+		if (ctx->image_version < CPT_VERSION_16)
+			convert_conntrack_image(&ci);
+		err = undump_one_ct(&ci, sec, &ct_list, ctx);
+		if (err)
+			break;
+		sec += ci.cpt_next;
+	}
+
+	while ((c = ct_list) != NULL) {
+		ct_list = c->next;
+		if (c->ct)
+			add_timer(&c->ct->timeout);
+		kfree(c);
+	}
+
+	return err;
+}
+
+#else
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+int rst_restore_ip_conntrack(struct cpt_context * ctx)
+{
+	if (ctx->sections[CPT_SECT_NET_CONNTRACK] != CPT_NULL)
+		return -EINVAL;
+	return 0;
+}
+
+#endif
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_context.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_context.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_context.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_context.c	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,342 @@
+/*
+ *
+ *  kernel/cpt/rst_context.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/cpt_image.h>
+#include <linux/cpt_exports.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+
+static ssize_t file_read(void *addr, size_t count, struct cpt_context *ctx)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->read(file, addr, count, &file->f_pos);
+	set_fs(oldfs);
+	if (err != count)
+		return err >= 0 ? -EIO : err;
+	return 0;
+}
+
+static ssize_t file_pread(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->read(file, addr, count, &pos);
+	set_fs(oldfs);
+	if (err != count)
+		return err >= 0 ? -EIO : err;
+	return 0;
+}
+
+static void file_align(struct cpt_context *ctx)
+{
+	struct file *file = ctx->file;
+
+	if (file)
+		file->f_pos = CPT_ALIGN(file->f_pos);
+}
+
+int rst_get_section(int type, struct cpt_context *ctx, loff_t *start, loff_t *end)
+{
+	struct cpt_section_hdr hdr;
+	int err;
+	loff_t pos;
+
+	pos = ctx->sections[type];
+	*start = *end = pos;
+
+	if (pos != CPT_NULL) {
+		if ((err = ctx->pread(&hdr, sizeof(hdr), ctx, pos)) != 0)
+			return err;
+		if (hdr.cpt_section != type || hdr.cpt_hdrlen < sizeof(hdr))
+			return -EINVAL;
+		*start = pos + hdr.cpt_hdrlen;
+		*end = pos + hdr.cpt_next;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(rst_get_section);
+
+void rst_context_init(struct cpt_context *ctx)
+{
+	int i;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	init_MUTEX(&ctx->main_sem);
+	ctx->refcount = 1;
+
+	ctx->current_section = -1;
+	ctx->current_object = -1;
+	ctx->pagesize = PAGE_SIZE;
+	ctx->read = file_read;
+	ctx->pread = file_pread;
+	ctx->align = file_align;
+	for (i=0; i < CPT_SECT_MAX; i++)
+		ctx->sections[i] = CPT_NULL;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	init_completion(&ctx->pgin_notify);
+#endif
+	cpt_object_init(ctx);
+}
+
+static int parse_sections(loff_t start, loff_t end, cpt_context_t *ctx)
+{
+	struct cpt_section_hdr h;
+
+	while (start < end) {
+		int err;
+
+		err = ctx->pread(&h, sizeof(h), ctx, start);
+		if (err)
+			return err;
+		if (h.cpt_hdrlen < sizeof(h) ||
+		    h.cpt_next < h.cpt_hdrlen ||
+		    start + h.cpt_next > end)
+			return -EINVAL;
+		if (h.cpt_section >= CPT_SECT_MAX)
+			return -EINVAL;
+		ctx->sections[h.cpt_section] = start;
+		start += h.cpt_next;
+	}
+	return 0;
+}
+
+int rst_image_acceptable(unsigned long version)
+{
+	if (version > CPT_CURRENT_VERSION ||
+			(version < CPT_VERSION_18 &&
+			 CPT_VERSION_MINOR(version) > 2))
+		return 0;
+	else
+		return 1;
+}
+
+int rst_open_dumpfile(struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_major_tail *v;
+	struct cpt_major_hdr  h;
+	unsigned long size;
+
+	err = -EBADF;
+	if (!ctx->file)
+		goto err_out;
+
+	err = -ENOMEM;
+	ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
+	if (ctx->tmpbuf == NULL)
+		goto err_out;
+	__cpt_release_buf(ctx);
+
+	size = ctx->file->f_dentry->d_inode->i_size;
+
+	if (size & 7) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	if (size < sizeof(struct cpt_major_hdr) +
+	    sizeof(struct cpt_major_tail)) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	err = ctx->pread(&h, sizeof(h), ctx, 0);
+	if (err) {
+		eprintk_ctx("too short image 1 %d\n", err);
+		goto err_out;
+	}
+	if (h.cpt_signature[0] != CPT_SIGNATURE0 ||
+	    h.cpt_signature[1] != CPT_SIGNATURE1 ||
+	    h.cpt_signature[2] != CPT_SIGNATURE2 ||
+	    h.cpt_signature[3] != CPT_SIGNATURE3) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	if (h.cpt_hz != HZ) {
+		err = -EINVAL;
+		eprintk_ctx("HZ mismatch: %d != %d\n", h.cpt_hz, HZ);
+		goto err_out;
+	}
+	ctx->virt_jiffies64 = h.cpt_start_jiffies64;
+	ctx->start_time.tv_sec = h.cpt_start_sec;
+	ctx->start_time.tv_nsec = h.cpt_start_nsec;
+	ctx->kernel_config_flags = h.cpt_kernel_config[0];
+	ctx->iptables_mask = h.cpt_iptables_mask;
+	if (!rst_image_acceptable(h.cpt_image_version)) {
+		eprintk_ctx("Unknown image version: %x. Can't restore.\n",
+				h.cpt_image_version);
+		err = -EINVAL;
+		goto err_out;
+	}
+	ctx->image_version = h.cpt_image_version;
+	ctx->features = (__u64)((__u64)h.cpt_ve_features2<<32 | h.cpt_ve_features);
+	ctx->image_arch = h.cpt_os_arch;
+
+	v = cpt_get_buf(ctx);
+	err = ctx->pread(v, sizeof(*v), ctx, size - sizeof(*v));
+	if (err) {
+		eprintk_ctx("too short image 2 %d\n", err);
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+	if (v->cpt_signature[0] != CPT_SIGNATURE0 ||
+	    v->cpt_signature[1] != CPT_SIGNATURE1 ||
+	    v->cpt_signature[2] != CPT_SIGNATURE2 ||
+	    v->cpt_signature[3] != CPT_SIGNATURE3 ||
+	    v->cpt_nsect != CPT_SECT_MAX_INDEX) {
+		err = -EINVAL;
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+	if ((err = parse_sections(h.cpt_hdrlen, size - sizeof(*v) - sizeof(struct cpt_section_hdr), ctx)) < 0) {
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	ctx->lazypages = v->cpt_lazypages;
+#endif
+	ctx->tasks64 = v->cpt_64bit;
+	cpt_release_buf(ctx);
+	return 0;
+
+err_out:
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+	return err;
+}
+
+void rst_close_dumpfile(struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		fput(ctx->file);
+		ctx->file = NULL;
+	}
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+}
+
+int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_object_hdr *hdr = tmp;
+	err = ctx->pread(hdr, sizeof(struct cpt_object_hdr), ctx, pos);
+	if (err)
+		return err;
+	if (type > 0 && type != hdr->cpt_object)
+		return -EINVAL;
+	if (hdr->cpt_hdrlen > hdr->cpt_next)
+		return -EINVAL;
+	if (hdr->cpt_hdrlen < sizeof(struct cpt_object_hdr))
+		return -EINVAL;
+	if (size < sizeof(*hdr))
+		return -EINVAL;
+	if (size > hdr->cpt_hdrlen) {
+		memset((char *)tmp + hdr->cpt_hdrlen, 0, size - hdr->cpt_hdrlen);
+		size = hdr->cpt_hdrlen;
+	}
+	if (size > sizeof(*hdr))
+		err = ctx->pread(hdr+1, size - sizeof(*hdr),
+				 ctx, pos + sizeof(*hdr));
+	return err;
+}
+EXPORT_SYMBOL(_rst_get_object);
+
+void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	void *tmp;
+	struct cpt_object_hdr hdr;
+	err = ctx->pread(&hdr, sizeof(hdr), ctx, pos);
+	if (err)
+		return NULL;
+	if (type > 0 && type != hdr.cpt_object)
+		return NULL;
+	if (hdr.cpt_hdrlen > hdr.cpt_next)
+		return NULL;
+	if (hdr.cpt_hdrlen < sizeof(struct cpt_object_hdr))
+		return NULL;
+	tmp = kmalloc(hdr.cpt_hdrlen, GFP_KERNEL);
+	if (!tmp)
+		return NULL;
+	err = ctx->pread(tmp, hdr.cpt_hdrlen, ctx, pos);
+	if (!err)
+		return tmp;
+	kfree(tmp);
+	return NULL;
+}
+EXPORT_SYMBOL(__rst_get_object);
+
+__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_object_hdr hdr;
+	__u8 *name;
+
+	err = rst_get_object(CPT_OBJ_NAME, *pos_p, &hdr, ctx);
+	if (err)
+		return NULL;
+	if (hdr.cpt_next - hdr.cpt_hdrlen > PAGE_SIZE)
+		return NULL;
+	name = (void*)__get_free_page(GFP_KERNEL);
+	if (!name)
+		return NULL;
+	err = ctx->pread(name, hdr.cpt_next - hdr.cpt_hdrlen,
+		   ctx, *pos_p + hdr.cpt_hdrlen);
+	if (err) {
+		free_page((unsigned long)name);
+		return NULL;
+	}
+	*pos_p += hdr.cpt_next;
+	return name;
+}
+
+__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx)
+{
+	return __rst_get_name(&pos, ctx);
+}
+
+void rst_put_name(__u8 *name, struct cpt_context *ctx)
+{
+	unsigned long addr = (unsigned long)name;
+
+	if (addr)
+		free_page(addr&~(PAGE_SIZE-1));
+}
+
+struct cpt_ops rst_ops = {
+	.get_object = _rst_get_object,
+	.rst_file = rst_file,
+};
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_delayfs.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_delayfs.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_delayfs.c	2017-01-13 08:40:27.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_delayfs.c	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,1605 @@
+/*
+ *  kernel/cpt/rst_delayfs.c
+ *
+ *  Copyright (C) 2009 Parallels
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  TODO:
+ *	- handling of a case when top mount got broken
+ *	- FIXMEs below
+ *	- do_coredump (filp_open, do_truncate)
+ *	- check nfs submounts
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <linux/ve_proto.h>
+#include <linux/lockd/bind.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <linux/nfs_mount.h>
+#include <linux/ve_nfs.h>
+#include <net/af_unix.h>
+#include <net/sock.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+#include "cpt_syscalls.h"
+
+#define DEBUG
+
+#define D(FMT, ...)	dprintk( \
+		"delayfs %d(%s) %s:%d " FMT "\n", \
+		current->pid, current->comm, \
+		__func__, __LINE__, ##__VA_ARGS__)
+
+enum {
+	SB_INITIAL = 0,
+	SB_LOCKED,
+	SB_FINISHED,
+	SB_BROKEN
+};
+
+struct delay_sb_info {
+	int state;
+	wait_queue_head_t blocked_tasks;
+
+	char *type;
+	void *data;
+	struct vfsmount *real;
+	spinlock_t file_lock;
+
+	struct unix_bind_info *bi_list;
+
+	int block_intr;
+	unsigned long delay_tmo;
+	void (*handle_mount_failure)(struct delay_sb_info *si);
+	void (*restore_mount_params)(struct delay_sb_info *si);
+
+	/* NFS original mount options */
+	int nfs_mnt_soft;
+	int nfs_delay_tmo;
+	int nfs_mnt_retrans;
+};
+
+static struct dentry_operations delay_dir_dops;
+
+#define FNAME(file) ((file)->f_dentry->d_name.name)
+
+/* mm */
+
+static struct page *delay_remmap(struct vm_area_struct *vma,
+		struct file* fake, struct file *real)
+{
+	struct address_space *mapping;
+
+	if (vma->vm_file != fake)
+		return NOPAGE_RESTART;
+
+	if (IS_ERR(real))
+		return NOPAGE_OOM;
+
+	unlink_file_vma(vma);
+	vma->vm_file = real;
+	if (real->f_op->mmap(real, vma)) {
+		vma->vm_file = fake;
+		mapping = fake->f_mapping;
+		spin_lock(&mapping->i_mmap_lock);
+		__vma_link_file(vma);
+		spin_unlock(&mapping->i_mmap_lock);
+		return NOPAGE_SIGBUS;
+	}
+
+	mapping = real->f_mapping;
+	spin_lock(&mapping->i_mmap_lock);
+	__vma_link_file(vma);
+	vma->vm_truncate_count = mapping->truncate_count;
+	spin_unlock(&mapping->i_mmap_lock);
+	get_file(real);
+	vma->vm_flags &= ~VM_DONTEXPAND;
+	fput(fake);
+
+	return NOPAGE_RESTART;
+}
+
+static struct page *delay_nopage(struct vm_area_struct *vma, unsigned long address, int *type)
+{
+	struct delay_sb_info *si;
+	struct file *fake, **real;
+	static DEFINE_MUTEX(lock); /* protect cross-thread remmap */
+	struct page *ret;
+
+	mutex_lock(&lock);
+	if (vma->vm_ops->nopage != delay_nopage) {
+		mutex_unlock(&lock);
+		return NOPAGE_RESTART;	/* race with other thread */
+	}
+	fake = vma->vm_file;
+	get_file(fake);
+	mutex_unlock(&lock);
+
+	si = fake->f_dentry->d_sb->s_fs_info;
+	real = (struct file **)&fake->f_dentry->d_fsdata;
+
+	D("addr:%p mnt:%p file:%p(%s)", (void *)address, fake->f_vfsmnt, fake, FNAME(fake));
+	if (debug_level > 3)
+		dump_stack();
+
+	if (si->state == SB_INITIAL) {
+		ret = ZERO_PAGE(address);
+		get_page(ret);
+		goto out_put;
+	}
+
+	if (!wait_event_timeout(si->blocked_tasks, *real, si->delay_tmo)) {
+		ret = NOPAGE_SIGBUS;
+		goto out_put;
+	}
+
+	mutex_lock(&lock);
+	ret = delay_remmap(vma, fake, *real);
+	mutex_unlock(&lock);
+out_put:
+	fput(fake);
+	return ret;
+}
+
+static struct vm_operations_struct delay_vma_ops = {
+	.nopage = delay_nopage,
+};
+
+static int delay_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	D("mnt:%p file:%p(%s) offset:%lu range:%p-%p", file->f_vfsmnt, file,
+			FNAME(file), vma->vm_pgoff,
+			(void *)vma->vm_start, (void *)vma->vm_end);
+	vma->vm_ops = &delay_vma_ops;
+	vma->vm_flags |= VM_DONTEXPAND;
+	return 0;
+}
+
+/* switch */
+
+static void delay_switch_mm(struct mm_struct *mm, struct vfsmount *mnt)
+{
+	struct vm_area_struct *vma;
+	struct file *fake, *real;
+
+	down_write(&mm->mmap_sem);
+	for ( vma = mm->mmap ; vma ; vma = vma->vm_next ) {
+		fake = vma->vm_file;
+		if (!fake || fake->f_vfsmnt != mnt)
+			continue;
+		real = vma->vm_file->f_dentry->d_fsdata;
+		if (real)
+			delay_remmap(vma, fake, real);
+	}
+	up_write(&mm->mmap_sem);
+}
+
+struct delayed_flock_info {
+	struct file_lock *fl;
+	int svid;
+	struct delayed_flock_info *next;
+};
+
+static void delayed_flock(struct delayed_flock_info *dfi, struct file *file)
+{
+	int err;
+	struct file_lock *fl = dfi->fl;
+
+	err = nlmclnt_set_lockowner(file->f_dentry->d_inode, fl,
+			(fl_owner_t)file, dfi->svid);
+	if (err)
+		goto out;
+
+	fl->fl_file = file;
+	fl->fl_flags |= FL_LOCAL;
+
+	if (fl->fl_flags & FL_FLOCK)
+		err = file->f_op->flock(file, F_SETLK, fl);
+	else
+		err = file->f_op->lock(file, F_SETLK, fl);
+
+out:
+	locks_free_lock(fl);
+	dfi->fl = NULL;
+	kfree(dfi);
+
+	if (err)
+		eprintk("oh shit :( can't lock file back in %d:%s (%d)\n",
+				get_exec_env()->veid,
+				file->f_dentry->d_name.name, err);
+}
+
+static void apply_delayed_locks(struct file *fake, struct file *real)
+{
+	struct delayed_flock_info *dfi;
+
+	while (fake->private_data != NULL) {
+		dfi = fake->private_data;
+		fake->private_data = dfi->next;
+		delayed_flock(dfi, real);
+	}
+}
+
+static void delay_switch_fd(struct files_struct *files, struct vfsmount *mnt)
+{
+	struct fdtable *fdt;
+	int i;
+	struct file *fake, *real, *rel = NULL;
+
+	i = 0;
+restart:
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	for ( ; i < fdt->max_fdset && i < fdt->max_fds ; i++ ) {
+		fake = fdt->fd[i];
+		if (!fake || fake->f_vfsmnt != mnt)
+			continue;
+
+		real = fake->f_dentry->d_fsdata;
+		if (!real || IS_ERR(real))
+			continue;
+
+		get_file(real);
+		rcu_assign_pointer(fdt->fd[i], real);
+		spin_unlock(&files->file_lock);
+
+		apply_delayed_locks(fake, real);
+
+		fake->private_data = rel;
+		rel = fake;
+		goto restart;
+	}
+	spin_unlock(&files->file_lock);
+
+	synchronize_rcu(); /* wait till fget_light gets the reference */
+
+	while (rel != NULL) {
+		fake = rel;
+		rel = fake->private_data;
+		fake->private_data = NULL;
+		fput(fake);
+	}
+}
+
+static void delay_switch_fs(struct fs_struct *fs, struct vfsmount *mnt)
+{
+	struct file *filp;
+
+	if (fs->rootmnt == mnt) {
+		filp = fs->root->d_fsdata;
+		if (filp && !IS_ERR(filp))
+			set_fs_root(fs, filp->f_vfsmnt, filp->f_dentry);
+	}
+
+	if (fs->pwdmnt == mnt) {
+		filp = fs->pwd->d_fsdata;
+		if (filp && !IS_ERR(filp))
+			set_fs_pwd(fs, filp->f_vfsmnt, filp->f_dentry);
+	}
+}
+
+static void delay_switch_current(struct vfsmount *mnt)
+{
+	delay_switch_fs(current->fs, mnt);
+	delay_switch_fd(current->files, mnt);
+	delay_switch_mm(current->mm, mnt);
+}
+
+static void delay_switch_one(struct task_struct *p, struct vfsmount *mnt)
+{
+	struct files_struct *files;
+	struct fs_struct *fs;
+	struct mm_struct *mm;
+
+	D("mnt:%p task:%d(%s)", mnt, p->pid, p->comm);
+	task_lock(p);
+	fs = p->fs;
+	if (fs) {
+		atomic_inc(&fs->count);
+		task_unlock(p);
+		delay_switch_fs(fs, mnt);
+		put_fs_struct(fs);
+	} else
+		task_unlock(p);
+
+	files = get_files_struct(p);
+	if (files) {
+		delay_switch_fd(files, mnt);
+		put_files_struct(files);
+	}
+
+	mm = get_task_mm(p);
+	if (mm) {
+		delay_switch_mm(mm, mnt);
+		mmput(mm);
+	}
+}
+
+static void delayfs_switch_all(struct vfsmount *mnt)
+{
+	struct ve_struct *env;
+	struct task_struct *p;
+
+	env = get_exec_env();
+
+	/* see comment in cpt_mm_prepare */
+	write_lock_irq(&tasklist_lock);
+	do {
+		if (list_empty(&env->vetask_auxlist))
+			break;
+
+		p = list_entry(env->vetask_auxlist.prev,
+				struct task_struct, ve_task_info.aux_list);
+		list_del(&VE_TASK_INFO(p)->aux_list);
+		list_add(&VE_TASK_INFO(p)->aux_list, &env->vetask_auxlist);
+
+		get_task_struct(p);
+		write_unlock_irq(&tasklist_lock);
+
+		delay_switch_one(p, mnt);
+
+		put_task_struct(p);
+
+		cond_resched();
+
+		write_lock_irq(&tasklist_lock);
+	} while (p != env->init_entry);
+	write_unlock_irq(&tasklist_lock);
+}
+
+/* wait */
+
+static int delayfs_restart(void)
+{
+	if (signal_pending(current))
+		return -EINTR;
+
+	set_tsk_thread_flag(current, TIF_SIGPENDING);
+	return -ERESTARTSYS;
+}
+
+static int delayfs_wait_mnt(struct vfsmount *mnt)
+{
+	struct delay_sb_info *si = mnt->mnt_sb->s_fs_info;
+	int res;
+
+	if (si->state == SB_INITIAL) {
+		WARN_ON(1);
+		return -EDEADLK;
+	}
+
+	if (si->state == SB_BROKEN)
+		return -EIO;
+
+	D("si:%p from:%p", si, __builtin_return_address(0));
+	if (debug_level > 3)
+		dump_stack();
+
+	if (si->block_intr)
+		res = wait_event_timeout(si->blocked_tasks, 
+						si->state >= SB_FINISHED,
+						si->delay_tmo);
+	else
+		res = wait_event_interruptible_timeout(si->blocked_tasks,
+						si->state >= SB_FINISHED,
+						si->delay_tmo);
+
+	if (!res)
+		return -EIO;
+
+	delay_switch_current(mnt);
+
+	return delayfs_restart();
+}
+
+static int delayfs_preopen(struct file *fake, struct delay_sb_info *si);
+
+static int delayfs_wait_file(struct file *fake)
+{
+	struct delay_sb_info *si = fake->f_dentry->d_sb->s_fs_info;
+	struct file **real = (struct file **)&fake->f_dentry->d_fsdata;
+	int res;
+
+	if (si->state == SB_INITIAL) {
+		WARN_ON(1);
+		return -EDEADLK;
+	}
+
+	D("mnt:%p file:%p(%s) from:%p", fake->f_vfsmnt, fake, FNAME(fake),
+			__builtin_return_address(0));
+	if (debug_level > 3)
+		dump_stack();
+
+	if (si->block_intr)
+		res = wait_event_timeout(si->blocked_tasks, si->real,
+							si->delay_tmo);
+	else
+		res = wait_event_interruptible_timeout(si->blocked_tasks, si->real,
+							si->delay_tmo);
+
+	if (!res)
+		return -EIO;
+
+	if (!*real) {
+		if (delayfs_preopen(fake, si))
+			return -EIO;
+	}
+
+	delay_switch_current(fake->f_vfsmnt);
+
+	if (IS_ERR(*real))
+		return -EIO;
+
+	return delayfs_restart();
+}
+
+/* stubs */
+
+static int delay_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	return delayfs_wait_mnt(nd->mnt);
+}
+
+static int delay_getattr(struct vfsmount *mnt, struct dentry *d, struct kstat *stat)
+{
+	return delayfs_wait_mnt(mnt);
+}
+
+#ifdef DEBUG
+
+static int delay_create (struct inode *dir, struct dentry *dentry,
+		int mode, struct nameidata *nd)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static struct dentry *delay_lookup(struct inode *dir,
+			struct dentry *dentry, struct nameidata *nd)
+{
+	WARN_ON(1);
+	return ERR_PTR(-EIO);
+}
+
+static int delay_link (struct dentry *old_dentry, struct inode *dir,
+		struct dentry *dentry)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static int delay_unlink(struct inode *dir, struct dentry *dentry)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static int delay_symlink (struct inode *dir, struct dentry *dentry,
+		const char *symname)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static int delay_mkdir(struct inode *dir, struct dentry *dentry,
+			int mode)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static int delay_rmdir (struct inode *dir, struct dentry *dentry)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static int delay_mknod (struct inode *dir, struct dentry *dentry,
+			int mode, dev_t rdev)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static int delay_rename (struct inode *old_dir, struct dentry *old_dentry,
+		struct inode *new_dir, struct dentry *new_dentry)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static void delay_truncate (struct inode *inode)
+{
+	WARN_ON(1);
+}
+
+static int delay_setattr(struct dentry *dentry, struct iattr *attrs)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static int delay_setxattr(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static ssize_t delay_getxattr(struct dentry *dentry, const char *name,
+			void *buffer, size_t size)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static ssize_t delay_listxattr(struct dentry *dentry, char *buffer,
+			size_t buffer_size)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static int delay_removexattr(struct dentry *dentry, const char *name)
+{
+	WARN_ON(1);
+	return -EIO;
+}
+
+static void delay_truncate_range(struct inode *inode, loff_t start, loff_t stop)
+{
+	WARN_ON(1);
+}
+
+#endif /* DEBUG */
+
+static struct inode_operations delay_dir_iops = {
+	/*
+	 * It's a hack - all the lookup happens with the
+	 * permission checks, thus we can safely freeeze
+	 * the tasks in this call
+	 */
+	.permission = delay_permission,
+	.getattr = delay_getattr,
+#ifdef DEBUG
+	.create		= delay_create,
+	.lookup		= delay_lookup,
+	.link		= delay_link,
+	.unlink		= delay_unlink,
+	.symlink	= delay_symlink,
+	.mkdir		= delay_mkdir,
+	.rmdir		= delay_rmdir,
+	.mknod		= delay_mknod,
+	.rename		= delay_rename,
+	/* .readlink	- EINVAL on root and sleep on permitions */
+	/* .follow_link	- must be no-op
+	   .put_link	*/
+	.truncate	= delay_truncate,
+	.setattr	= delay_setattr,
+	.setxattr	= delay_setxattr,
+	.getxattr	= delay_getxattr,
+	.listxattr	= delay_listxattr,
+	.removexattr	= delay_removexattr,
+	.truncate_range = delay_truncate_range, /* exists only in shm */
+#endif /* DEBUG */
+};
+
+static long delay_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	return delayfs_wait_file(filp);
+}
+
+static loff_t delay_llseek(struct file *filp, loff_t offset, int origin)
+{
+	return delayfs_wait_file(filp);
+}
+
+static ssize_t delay_read(struct file *filp, char __user *buf,
+			size_t size, loff_t *ppos)
+{
+	return delayfs_wait_file(filp);
+}
+
+static ssize_t delay_write(struct file *filp, const char __user *buf,
+			size_t siz, loff_t *ppos)
+{
+	return delayfs_wait_file(filp);
+}
+
+static int delay_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	return delayfs_wait_file(filp);
+}
+
+static int delay_fsync(struct file *filp, struct dentry *dentry,
+			int datasync)
+{
+	// nothing to sync and there no reason to block
+	return 0;
+}
+
+static int delay_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	return delayfs_wait_file(filp);
+}
+
+static ssize_t delay_readv(struct file *filp, const struct iovec *iov,
+			unsigned long nr_segs, loff_t *ppos)
+{
+	return delayfs_wait_file(filp);
+}
+
+static ssize_t delay_writev(struct file *filp, const struct iovec *iov,
+			unsigned long nr_segs, loff_t *ppos)
+{
+	return delayfs_wait_file(filp);
+}
+
+static ssize_t delay_sendfile(struct file *filp, loff_t *ppos,
+			size_t count, read_actor_t actor, void *target)
+{
+	return delayfs_wait_file(filp);
+}
+
+/* see do_sendfile, generic_file_sendfile and file_send_actor*/
+static ssize_t delay_sendpage(struct file *filp, struct page *page,
+			int off, size_t len, loff_t *pos, int more)
+{
+	return delayfs_wait_file(filp);
+}
+
+static int delay_dir_notify(struct file *filp, unsigned long arg)
+{
+	return delayfs_wait_file(filp);
+}
+
+static int delay_flock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	return delayfs_wait_file(filp);
+}
+
+static ssize_t delay_splice_write(struct pipe_inode_info *pipe,
+			struct file *filp, loff_t *ppos, size_t len,
+			unsigned int flags)
+{
+	return delayfs_wait_file(filp);
+}
+
+static ssize_t delay_splice_read(struct file *filp, loff_t *ppos,
+			struct pipe_inode_info *pipe, size_t len,
+			unsigned int flags)
+{
+	return delayfs_wait_file(filp);
+}
+
+static int delay_release(struct inode *ino, struct file *f)
+{
+	struct delayed_flock_info *dfi;
+
+	while (f->private_data) {
+		dfi = f->private_data;
+		f->private_data = dfi->next;
+
+		if (dfi->fl)
+			locks_free_lock(dfi->fl);
+		kfree(dfi);
+	}
+	return 0;
+}
+
+static struct file_operations delay_dir_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl = delay_ioctl,
+	.compat_ioctl	= delay_ioctl,
+	.mmap = delay_mmap,
+	/* .open	= not required */
+	.release	= delay_release,
+	.llseek		= delay_llseek,
+	.read		= delay_read,
+	.write		= delay_write,
+	.readdir	= delay_readdir,
+	/* .poll	- not required. by default return DEFAULT_POLLMASK */
+	/* .flush	- not required */
+	.fsync		= delay_fsync, /* non-blocked */
+	/* .fasync	- not required */
+	.lock		= delay_lock,
+	.readv		= delay_readv,
+	.writev		= delay_writev,
+	.sendfile	= delay_sendfile,
+	.sendpage	= delay_sendpage,
+	/* .get_unmapped_area - not required. for NOMMU only? */
+	/* .check_flags		FIXME problem with O_NOATIME O_DIRECT in setfl */
+	.dir_notify	= delay_dir_notify,
+	.flock		= delay_flock,
+	.splice_write	= delay_splice_write,
+	.splice_read	= delay_splice_read,
+	/* .aio_read	- aio banned. sys_io_submit return -EINVAL
+	   .aio_write
+	   .aio_fsync	*/
+};
+
+static void delayfs_release_dentry(struct dentry *dentry)
+{
+	struct file *real = dentry->d_fsdata;
+
+	D("de:%p name:%s real:%p", dentry, dentry->d_name.name, real);
+	if (real && !IS_ERR(real))
+		fput(real);
+}
+
+static struct dentry_operations delay_dir_dops = {
+       .d_release = delayfs_release_dentry,
+};
+
+static struct super_operations delay_super_ops = {
+};
+
+static int delay_fill_sb(struct super_block *sb, void *data, int silent)
+{
+	struct inode *rinode;
+	struct delay_sb_info *si;
+
+	si = kzalloc(sizeof(struct delay_sb_info), GFP_KERNEL);
+	if (!si)
+		goto err;
+
+	init_waitqueue_head(&si->blocked_tasks);
+	spin_lock_init(&si->file_lock);
+
+	sb->s_fs_info = si;
+	sb->s_op = &delay_super_ops;
+
+	rinode = new_inode(sb);
+	if (!rinode)
+		goto err_free;
+
+	rinode->i_ino = 1;
+	rinode->i_mtime = rinode->i_atime = rinode->i_ctime = CURRENT_TIME;
+	rinode->i_blocks = 0;
+	rinode->i_uid = rinode->i_gid = 0;
+	rinode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
+	rinode->i_op = &delay_dir_iops;
+	rinode->i_fop = &delay_dir_fops;
+	rinode->i_nlink = 2;
+
+	sb->s_root = d_alloc_root(rinode);
+	if (!sb->s_root)
+		goto err_iput;
+
+	D("sb:%p si:%p ino:%p root:%p", sb, si, rinode, sb->s_root);
+	return 0;
+
+err_iput:
+	iput(rinode);
+err_free:
+	kfree(si);
+err:
+	return -ENOMEM;
+}
+
+static int delay_get_sb(struct file_system_type *type, int flags,
+		const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	D();
+	return get_sb_nodev(type, flags, data, delay_fill_sb, mnt);
+}
+
+static void delay_kill_sb(struct super_block *s)
+{
+	struct delay_sb_info *si = s->s_fs_info;
+
+	D("si:%p", si);
+	BUG_ON(waitqueue_active(&si->blocked_tasks));
+
+	while (si->bi_list) {
+		struct unix_bind_info *i;
+
+		i = si->bi_list;
+		si->bi_list = i->next;
+
+		sock_put(i->sk);
+		kfree(i);
+	}
+
+	mntput(si->real);
+	kfree(si->type);
+	free_page((unsigned long )si->data);
+	kfree(si);
+	kill_anon_super(s);
+}
+
+struct file_system_type delayfs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "delayfs",
+	.get_sb		= delay_get_sb,
+	.kill_sb	= delay_kill_sb,
+	.fs_flags	= FS_MANGLE_PROC | FS_VIRTUALIZED,
+};
+
+static int create_delayed_context(cpt_context_t *ctx)
+{
+	int i;
+	struct cpt_delayed_context *dctx;
+
+	if (ctx->dctx != NULL)
+		return 0;
+
+	dctx = kzalloc(sizeof(*dctx), GFP_KERNEL);
+	if (dctx == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < CPT_DOBJ_MAX; i++)
+		INIT_LIST_HEAD(&dctx->object_array[i]);
+	dctx->ve_id = ctx->ve_id;
+
+	ctx->dctx = dctx;
+	return 0;
+}
+
+#define DELAYFS_INITIAL_RETRY_TIMEOUT (16 * HZ)
+static int delay_max_timeout = 120 * HZ; 
+
+static void delayfs_nfs_handle_mount_failure(struct delay_sb_info *si)
+{
+	struct nfs_mount_data *mount_data = si->data;
+
+	if (si->delay_tmo < delay_max_timeout)
+		si->delay_tmo <<= 1;
+	if (mount_data->timeo < delay_max_timeout)
+		mount_data->timeo <<= 1;
+}
+
+static void delayfs_nfs_restore_mount_params(struct delay_sb_info *si)
+{
+	nfs_change_server_params(si->real->mnt_sb->s_fs_info, si->nfs_mnt_soft,
+		       			si->nfs_delay_tmo, si->nfs_mnt_retrans);
+}
+
+static void delayfs_prepare_for_remount_loop(struct delay_sb_info *si)
+{
+	if (!strcmp(si->type, "nfs")) {
+		struct nfs_mount_data *mount_data = si->data;
+
+		/*
+		 * Save real NFS mount parameters for further replacement.
+		 */
+		si->nfs_mnt_soft = mount_data->flags & NFS_MOUNT_SOFT;
+		si->nfs_delay_tmo = mount_data->timeo;
+		si->nfs_mnt_retrans = mount_data->retrans;
+		/*
+		 * Set DFS parameters used during remount procedure.
+		 */
+		si->delay_tmo = (si->nfs_mnt_soft ?
+				(si->nfs_delay_tmo * si->nfs_mnt_retrans * HZ) :
+				MAX_SCHEDULE_TIMEOUT);
+		si->block_intr = (mount_data->flags & NFS_MOUNT_INTR) ? 0 : 1;
+		si->handle_mount_failure = delayfs_nfs_handle_mount_failure;
+		si->restore_mount_params = delayfs_nfs_restore_mount_params;
+		/*
+		 * Hack NFS mount options to avoid hanging during remount.
+		 */
+		mount_data->flags |= NFS_MOUNT_SOFT;
+		mount_data->timeo = 1;
+		mount_data->retrans = 1;
+	} else {
+		si->block_intr = 0;
+		si->delay_tmo = MAX_SCHEDULE_TIMEOUT;
+		si->handle_mount_failure = NULL;
+		si->restore_mount_params = NULL;
+	}
+}
+
+/* first stage */
+
+struct vfsmount *rst_mount_delayfs(char *type, int flags,
+		char *name, void *data, cpt_context_t *ctx)
+{
+	struct vfsmount *mnt;
+	struct delay_sb_info *si;
+	int err;
+
+	if (check_fs_presence(type)) {
+		eprintk_ctx("failed to find %s file system\n", type);
+		err = -ENODEV;
+		goto out;
+	}
+
+	err = create_delayed_context(ctx);
+	if (err)
+		goto out;
+
+	mnt = vfs_kern_mount(&delayfs_type, flags, name, NULL);
+	err = PTR_ERR(mnt);
+	if (IS_ERR(mnt))
+		goto out;
+
+	si = mnt->mnt_sb->s_fs_info;
+
+	err = -ENOMEM;
+	si->data = (void *) __get_free_page(GFP_KERNEL);
+	if (!si->data)
+		goto out_put;
+	copy_page(si->data, data);
+
+	err = -ENOMEM;
+	si->type = kstrdup(type, GFP_KERNEL);
+	if (!si->type)
+		goto out_put;
+
+	delayfs_prepare_for_remount_loop(si);
+
+	return mnt;
+
+out_put:
+	kern_umount(mnt);
+out:
+	return ERR_PTR(err);
+}
+
+struct file *rst_delayfs_screw(struct vfsmount *mnt,
+		char *name, int flags, loff_t offset, unsigned int mode)
+{
+	struct dentry *dentry;
+	struct inode *inode = NULL;
+	struct file *filp;
+	int err;
+
+	err = -EFAULT;
+	if (mnt->mnt_sb->s_type != &delayfs_type)
+		goto out;
+
+	err = -ENOMEM;
+	inode = new_inode(mnt->mnt_sb);
+	if (!inode)
+		goto out;
+	inode->i_op = &delay_dir_iops;
+	inode->i_fop = &delay_dir_fops;
+	inode->i_mode = mode & S_IFMT;
+
+	dentry = d_alloc_name(mnt->mnt_root, name);
+	err = -ENOMEM;
+	if (!dentry)
+		goto out;
+
+	dentry->d_op = &delay_dir_dops;
+	d_instantiate(dentry, inode);
+	inode = NULL;
+
+	mntget(mnt);
+	filp = dentry_open(dentry, mnt, flags);
+	err = PTR_ERR(filp);
+	if (IS_ERR(filp))
+		goto out;
+
+	filp->f_pos = offset;
+	filp->f_heavy = 1;
+
+	D("mnt:%p file:%p de:%p ino:%p name:%s flags:%x offset:%lld",
+			mnt, filp, dentry, dentry->d_inode, name, flags, offset);
+	return filp;
+
+out:
+	D("mnt:%p name:%s flags:%x err:%d", mnt, name, flags, err);
+	iput(inode);
+	return ERR_PTR(err);
+}
+
+static int mknod_by_mntref(const char __user *filename, int mode,
+				unsigned dev, struct vfsmount *mnt)
+{
+	struct dentry * dentry;
+	struct nameidata nd;
+	int error = 0;
+
+	if (S_ISDIR(mode))
+		return -EPERM;
+
+	error = rst_path_lookup_at(mnt,  mnt->mnt_root, filename, LOOKUP_PARENT |
+			LOOKUP_DIVE, &nd);
+	if (error)
+		return error;
+
+	dentry = lookup_create(&nd, 0);
+	error = PTR_ERR(dentry);
+	
+	if (!IS_POSIXACL(nd.dentry->d_inode))
+		mode &= ~current->fs->umask;
+	if (!IS_ERR(dentry)) {
+		switch (mode & S_IFMT) {
+		case 0: case S_IFREG:
+			error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd);
+			break;
+		case S_IFCHR: case S_IFBLK:
+			error = vfs_mknod(nd.dentry->d_inode,dentry,mode,
+					new_decode_dev(dev));
+			break;
+		case S_IFIFO: case S_IFSOCK:
+			error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0);
+			break;
+		case S_IFDIR:
+			error = -EPERM;
+			break;
+		default:
+			error = -EINVAL;
+		}
+		dput(dentry);
+	}
+	mutex_unlock(&nd.dentry->d_inode->i_mutex);
+	path_release(&nd);
+
+	return error;
+
+}
+
+/* second stage */
+int rebind_unix_socket(struct vfsmount *rmnt, struct unix_bind_info *bi,
+	       		int flags)
+{
+	int err;
+	struct nameidata nd;
+	char *name = ((char *)bi->path) + bi->path_off;
+
+	if (rst_path_lookup_at(rmnt,  rmnt->mnt_root, name, flags, &nd) < 0) {
+		err = mknod_by_mntref(name, S_IFSOCK | (bi->i_mode & S_IALLUGO),
+					0, rmnt);
+		if (err) {
+			printk("%s: mknod [%s] err %d\n", __func__, name, err);
+			return err;
+		}
+
+		err = rst_path_lookup_at(rmnt,  rmnt->mnt_root, name, flags, &nd);
+		if (err < 0) {
+			printk("%s: lookup [%s] err %d\n", __func__, name, err);
+			return err;
+		}
+	}
+
+	if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) {
+		printk("%s: not a socket dentry %s\n", __func__, name);
+		path_release(&nd);
+		return -EINVAL;
+	}
+
+	err = unix_bind_path(bi->sk, nd.dentry, nd.mnt);
+	if (err < 0)
+		printk("%s: bind-path [%s] err %d\n", __func__, name, err);
+
+	return err;
+}
+
+static void rebind_unix_sockets(struct vfsmount *rmnt,
+		struct delay_sb_info *si)
+{
+	struct unix_bind_info *bi;
+
+	while ((bi = si->bi_list) != NULL) {
+		si->bi_list = bi->next;
+
+		rebind_unix_socket(rmnt, bi, 0);
+
+		sock_put(bi->sk);
+		kfree(bi);
+	}
+}
+
+static int rst_remount_delayfs(struct vfsmount *mnt)
+{
+	struct delay_sb_info *si = mnt->mnt_sb->s_fs_info;
+	struct vfsmount *real_mnt;
+
+	if (si->real)
+		return -EBUSY;
+
+	real_mnt = rst_kern_mount(si->type, mnt->mnt_sb->s_flags | MS_CPTMOUNT,
+			mnt->mnt_devname, si->data);
+
+	if (IS_ERR(real_mnt))
+		return PTR_ERR(real_mnt);
+
+	D("fake: %p(%s)", mnt, mnt->mnt_sb->s_type->name);
+	D("real: %p(%s)", real_mnt, real_mnt->mnt_sb->s_type->name);
+	D("prnt: %p(%s)", mnt->mnt_parent, mnt->mnt_parent->mnt_sb->s_type->name);
+
+	si->real = mntget(real_mnt);
+
+	replace_mount(real_mnt, mnt);
+
+	rebind_unix_sockets(real_mnt, si);
+
+	return 0;
+}
+
+static int make_flags(struct file *filp)
+{
+	int flags = O_NOFOLLOW|O_NONBLOCK|O_NOCTTY;
+
+	switch (filp->f_mode &(FMODE_READ|FMODE_WRITE)) {
+		case FMODE_READ|FMODE_WRITE:
+			flags |= O_RDWR; break;
+		case FMODE_WRITE:
+			flags |= O_WRONLY; break;
+		case FMODE_READ:
+			flags |= O_RDONLY; break;
+		default: break;
+	}
+	flags |= filp->f_flags & ~(O_ACCMODE|O_CREAT|O_TRUNC|O_EXCL|FASYNC);
+	return flags;
+}
+
+static int delayfs_preopen(struct file *fake, struct delay_sb_info *si)
+{
+	struct nameidata nd;
+	struct file *real;
+	int err, flags;
+
+	flags = make_flags(fake);
+
+	D("fake:%p(%s) flags:%d pos:%lld real_mnt:%p",
+			fake, FNAME(fake), flags,
+			(long long)fake->f_pos, si->real);
+
+	err = rst_path_lookup_at(si->real, si->real->mnt_root,
+			FNAME(fake), LOOKUP_FOLLOW, &nd);
+	if (err)
+		goto out;
+
+	real = dentry_open(nd.dentry, nd.mnt, flags);
+	err = PTR_ERR(real);
+	if (IS_ERR(real))
+		goto out;
+
+	D("real:%p mnt:%p de:%p ino:%p", real, real->f_vfsmnt, real->f_dentry,
+			real->f_dentry->d_inode);
+
+	if (fake->f_pos != real->f_pos) {
+		loff_t off;
+
+		off = vfs_llseek(real, fake->f_pos, 0);
+		if (off < 0) {
+			eprintk("%s llseek:%d\n", __func__, (int)off);
+			real->f_pos = fake->f_pos;
+		}
+	}
+
+	spin_lock(&si->file_lock);
+	if (!fake->f_dentry->d_fsdata) {
+		fake->f_dentry->d_fsdata = real;
+		real = NULL;
+	}
+	spin_unlock(&si->file_lock);
+
+	if (real)
+		fput(real);
+
+	err = 0;
+out:
+	D("file:%p(%s) err:%d", fake, fake->f_dentry->d_name.name, err);
+
+	return err;
+}
+
+static void delayfs_break(struct file *fake)
+{
+	fake->f_dentry->d_fsdata = ERR_PTR(-EIO);
+}
+
+static void delay_break_all(struct cpt_delayed_context *ctx)
+{
+	cpt_object_t *obj;
+	struct file *file;
+	struct vfsmount *mnt;
+	struct delay_sb_info *si;
+
+	for_each_object(obj, CPT_DOBJ_FILE) {
+		file = obj->o_obj;
+		if (file->f_dentry->d_fsdata == NULL)
+			delayfs_break(file);
+	}
+
+	for_each_object(obj, CPT_DOBJ_VFSMOUNT_REF) {
+		mnt = obj->o_obj;
+
+		si = mnt->mnt_sb->s_fs_info;
+		si->state = SB_BROKEN;
+		wake_up_all(&si->blocked_tasks);
+	}
+}
+
+static void dctx_release_objects(struct cpt_delayed_context *ctx)
+{
+	cpt_object_t *obj, *nobj;
+
+	for_each_object_safe(obj, nobj, CPT_DOBJ_VFSMOUNT_REF) {
+		list_del(&obj->o_list);
+		mntput(obj->o_obj);
+		kfree(obj->o_image);
+		kfree(obj);
+	}
+
+	for_each_object_safe(obj, nobj, CPT_DOBJ_FILE) {
+		list_del(&obj->o_list);
+		fput(obj->o_obj);
+		kfree(obj->o_image);
+		kfree(obj);
+	}
+}
+
+void destroy_delayed_context(struct cpt_delayed_context *dctx)
+{
+	delay_break_all(dctx);
+	dctx_release_objects(dctx);
+	kfree(dctx);
+}
+
+static int delayfs_sillyrename(struct file *fake)
+{
+	struct file *real = fake->f_dentry->d_fsdata;
+	int err;
+
+	if (!real || IS_ERR(real))
+		return -ENODEV;
+
+	dget(real->f_dentry); /* see nfs_unlink */
+	err = vfs_unlink(real->f_dentry->d_parent->d_inode, real->f_dentry);
+	dput(real->f_dentry);
+
+	D("file:%p(%s) ret:%d", fake, fake->f_dentry->d_name.name, err);
+	return err;
+}
+
+/* wire */
+
+int rst_freeze_delayfs(cpt_context_t *ctx)
+{
+	cpt_object_t *obj, *nobj;
+	struct vfsmount *mnt;
+	struct delay_sb_info *si;
+	/* dctx must be not NULL if any delayed object exists */
+	struct cpt_delayed_context *dctx = ctx->dctx;
+
+	for_each_object_safe(obj, nobj, CPT_OBJ_VFSMOUNT_REF) {
+		if (!(obj->o_flags & CPT_VFSMOUNT_DELAYFS))
+			continue;
+
+		list_move(&obj->o_list,
+				&dctx->object_array[CPT_DOBJ_VFSMOUNT_REF]);
+		ctx->objcount--;
+		mnt = obj->o_obj;
+		si = mnt->mnt_sb->s_fs_info;
+		si->state = SB_LOCKED;
+	}
+
+	for_each_object_safe(obj, nobj, CPT_OBJ_FILE)
+		if (obj->o_flags & CPT_FILE_DELAYFS) {
+			list_move(&obj->o_list,
+					&dctx->object_array[CPT_DOBJ_FILE]);
+			ctx->objcount--;
+		}
+	return 0;
+}
+
+static void delayfs_resume(struct cpt_delayed_context *ctx,
+		struct list_head *broken_mounts)
+{
+	int ret;
+	struct delay_sb_info *si;
+	cpt_object_t *obj, *nobj;
+	struct vfsmount *mnt;
+	struct file *file;
+
+	/* mount */
+	for_each_object_safe(obj, nobj, CPT_DOBJ_VFSMOUNT_REF) {
+		BUG_ON(!(obj->o_flags & CPT_VFSMOUNT_DELAYFS));
+
+		mnt = obj->o_obj;
+		si = mnt->mnt_sb->s_fs_info;
+		ret = rst_remount_delayfs(mnt);
+		if (ret) {
+			if (si->handle_mount_failure)
+				si->handle_mount_failure(si);
+			list_move(&obj->o_list, broken_mounts);
+		} else
+			wake_up_all(&si->blocked_tasks);
+	}
+
+	/* preopen */
+	for_each_object(obj, CPT_DOBJ_FILE) {
+		BUG_ON(!(obj->o_flags & CPT_FILE_DELAYFS));
+
+		file = obj->o_obj;
+		si = file->f_vfsmnt->mnt_sb->s_fs_info;
+		/* mount is broken or already reopened */
+		if (!si->real || file->f_dentry->d_fsdata != NULL)
+			continue;
+
+		ret = delayfs_preopen(file, si);
+		if (ret) {
+			printk("%s: preopen %s err %d\n", __func__,
+					FNAME(file), ret);
+			delayfs_break(file);
+		} else {
+			if (obj->o_flags & CPT_FILE_SILLYRENAME)
+				delayfs_sillyrename(file);
+		}
+	}
+
+	/* wakeup */
+	for_each_object(obj, CPT_DOBJ_VFSMOUNT_REF) {
+		mnt = obj->o_obj;
+
+		D("wakeup %p", mnt);
+
+		si = mnt->mnt_sb->s_fs_info;
+		if (si->restore_mount_params)
+			si->restore_mount_params(si);
+		si->state = SB_FINISHED;
+		wake_up_all(&si->blocked_tasks);
+	}
+
+	/**
+	 * all files preopened or broken -- now noone block mmap_sem write lock
+	 */
+
+	/* switch */
+	for_each_object(obj, CPT_DOBJ_VFSMOUNT_REF) {
+		mnt = obj->o_obj;
+		delayfs_switch_all(mnt);
+	}
+}
+
+static int delay_first_timeout = 1 * HZ;
+
+struct ctl_table delayfs_table[] = {
+	{
+		.ctl_name	= 9486,
+		.procname	= "first_timeout",
+		.data		= &delay_first_timeout,
+		.maxlen		= sizeof(delay_first_timeout),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= 9487,
+		.procname	= "max_timeout",
+		.data		= &delay_max_timeout,
+		.maxlen		= sizeof(delay_max_timeout),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+
+static int delayfs_resume_fn(void *d)
+{
+	struct cpt_delayed_context *dctx = d;
+	int retry_timeout = DELAYFS_INITIAL_RETRY_TIMEOUT;
+	unsigned long abort_timeout;
+	LIST_HEAD(broken_mounts);
+	LIST_HEAD(live_mounts);
+
+	dctx->dfs_daemon = current;
+
+	abort_timeout = (xprt_abort_timeout == RPC_MAX_ABORT_TIMEOUT ? 0 :
+		jiffies + (unsigned long)xprt_abort_timeout * HZ);
+
+	daemonize("dfs_resume/%d", dctx->ve_id);
+	ve_printk(VE_LOG_BOTH, "DFS%d: resuming daemon started\n", dctx->ve_id);
+
+	allow_signal(SIGKILL);
+
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+	complete(&dctx->dfs_notify);
+	/* Waiting for delayed context to be filled by resume process */
+	schedule();
+
+	schedule_timeout_interruptible(delay_first_timeout);
+
+try_again:
+	if (signal_pending(current)) {
+		ve_printk(VE_LOG_BOTH, "DFS%d: Got kill signal\n", dctx->ve_id);
+		goto out_splice;
+	}
+
+	if (abort_timeout && time_after(jiffies, abort_timeout)) {
+		ve_printk(VE_LOG_BOTH, "DFS%d: Timed out\n", dctx->ve_id);
+		goto out_splice;
+	}
+
+	delayfs_resume(dctx, &broken_mounts);
+
+	list_splice_init(&dctx->object_array[CPT_DOBJ_VFSMOUNT_REF],
+			&live_mounts);
+
+	if (!list_empty(&broken_mounts)) {
+		list_splice_init(&broken_mounts,
+				&dctx->object_array[CPT_DOBJ_VFSMOUNT_REF]);
+
+		ve_printk(VE_LOG_BOTH, "DFS%d: Retrying delayed mount in %d seconds\n",
+					dctx->ve_id, retry_timeout / HZ);
+		schedule_timeout_interruptible(retry_timeout);
+		if (retry_timeout < delay_max_timeout)
+			retry_timeout <<= 1;
+
+		goto try_again;
+	}
+	ve_printk(VE_LOG_BOTH, "DFS%d: Delayed mounts successfully resumed\n", dctx->ve_id);
+out_splice:
+	list_splice(&live_mounts, &dctx->object_array[CPT_DOBJ_VFSMOUNT_REF]);
+	destroy_delayed_context(dctx);
+	module_put_and_exit(0);
+}
+
+int rst_init_delayfs_daemon(cpt_context_t *ctx)
+{
+	int pid;
+	struct cpt_delayed_context *dctx = ctx->dctx;
+
+	if (dctx == NULL)
+		return 0;
+
+	__module_get(THIS_MODULE);
+
+	init_completion(&dctx->dfs_notify);
+
+	pid = local_kernel_thread(delayfs_resume_fn, dctx,
+			CLONE_FS | CLONE_FILES | CLONE_VM | SIGCHLD, -1);
+	if (pid < 0) {
+		eprintk_ctx("%d: Failed to start delayfs daemon (err: %d)\n",
+			       	dctx->ve_id, pid);
+		destroy_delayed_context(dctx);
+		kfree(dctx);
+		ctx->dctx = NULL;
+		module_put(THIS_MODULE);
+		return pid;
+	}
+
+	wait_for_completion(&dctx->dfs_notify);
+
+	return 0;
+}
+
+int rst_delay_flock(struct file *f, struct cpt_flock_image *fli,
+		cpt_context_t *ctx)
+{
+	int err;
+	struct delayed_flock_info *dfi;
+	struct file_lock *fl;
+
+	err = -EINVAL;
+	if (!cpt_object_has(fli, cpt_svid) ||
+			fli->cpt_svid == CPT_NOINDEX) {
+		eprintk_ctx("No SVID for flock\n");
+		goto out;
+	}
+
+	err = nlmclnt_reserve_pid(fli->cpt_svid);
+	if (err)
+		goto out;
+
+	err = -ENOMEM;
+	dfi = kmalloc(sizeof(*dfi), GFP_KERNEL);
+	if (dfi == NULL)
+		goto out;
+
+	err = -ENOMEM;
+	fl = locks_alloc_lock(1);
+	if (fl == NULL)
+		goto out1;
+
+	if (fli->cpt_flags & FL_FLOCK) {
+		fl->fl_flags = FL_FLOCK;
+		fl->fl_start = 0;
+		fl->fl_end = OFFSET_MAX;
+		fl->fl_pid = 0;
+		fl->fl_type = fli->cpt_type;
+	} else {
+		cpt_object_t *obj;
+
+		fl->fl_flags = fli->cpt_flags & ~FL_SLEEP;
+		fl->fl_end = fli->cpt_end;
+		fl->fl_start = fli->cpt_start;
+		fl->fl_type = fli->cpt_type;
+
+		err = -EINVAL;
+		obj = lookup_cpt_obj_byindex(CPT_OBJ_FILES,
+				fli->cpt_owner, ctx);
+		if (!obj) {
+			eprintk_ctx("unknown lock owner %d\n",
+					(int)fli->cpt_owner);
+			goto out2;
+		}
+		fl->fl_owner = obj->o_obj;
+		if (fl->fl_owner == NULL)
+			eprintk_ctx("no lock owner\n");
+
+		fl->fl_pid = vpid_to_pid(fli->cpt_pid);
+		if (fl->fl_pid < 0) {
+			eprintk_ctx("unknown lock pid %d\n", fl->fl_pid);
+			goto out2;
+		}
+	}
+
+	dfi->fl = fl;
+	dfi->svid = fli->cpt_svid;
+	dfi->next = f->private_data;
+
+	f->private_data = dfi;
+	return 0;
+
+out2:
+	locks_free_lock(fl);
+out1:
+	kfree(dfi);
+out:
+	return err;
+}
+
+int rst_delay_unix_bind(struct sock *sk, struct cpt_sock_image *v,
+		cpt_context_t *ctx)
+{
+	int err;
+	cpt_object_t *mntobj;
+	struct vfsmount *mnt;
+	struct super_block *sb;
+	struct unix_bind_info *dbi;
+	struct delay_sb_info *sbi;
+
+	BUG_ON(v->cpt_sockflags & CPT_SOCK_DELETED);
+
+	mntobj = lookup_cpt_obj_bypos(CPT_OBJ_VFSMOUNT_REF,
+			v->cpt_vfsmount_ref, ctx);
+	if (mntobj == NULL) {
+		eprintk_ctx("can't find vfsmount for unix socket\n");
+		return -EINVAL;
+	}
+
+	mnt = mntobj->o_obj;
+	sb = mnt->mnt_sb;
+	BUG_ON(sb->s_op != &delay_super_ops);
+
+	if (v->cpt_laddrlen - 2 <= mntobj->o_lock) {
+		eprintk_ctx("unix socket with too sort name (%d %s)\n",
+				mntobj->o_lock, (char *)v->cpt_laddr);
+		return -EINVAL;
+	}
+
+	err = unix_attach_addr(sk, (struct sockaddr_un *)v->cpt_laddr,
+			v->cpt_laddrlen);
+	if (err) {
+		eprintk_ctx("can't attach unix address %d\n", err);
+		return err;
+	}
+
+	dbi = kzalloc(sizeof(*dbi), GFP_KERNEL);
+	if (dbi == NULL)
+		return -ENOMEM;
+
+	sock_hold(sk);
+	dbi->sk = sk;
+	strcpy(dbi->path, ((char *)v->cpt_laddr) + 2);
+	dbi->path_off = mntobj->o_lock;
+
+	if (cpt_object_has(v, cpt_i_mode))
+		dbi->i_mode = v->cpt_i_mode;
+
+	sbi = sb->s_fs_info;
+	dbi->next = sbi->bi_list;
+	sbi->bi_list = dbi;
+
+	return 0;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_epoll.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_epoll.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_epoll.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_epoll.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,175 @@
+/*
+ *
+ *  kernel/cpt/rst_epoll.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/eventpoll.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+/* Those funcations are static in fs/eventpoll.c */
+extern struct file_operations eventpoll_fops;
+extern int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+		     struct file *tfile, int fd);
+extern struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+extern void ep_release_epitem(struct epitem *epi);
+extern void clear_tfile_check_list(void);
+
+
+struct file *cpt_open_epolldev(struct cpt_file_image *fi,
+			       unsigned flags,
+			       struct cpt_context *ctx)
+{
+	struct file *file;
+	int efd;
+
+	/* Argument "size" is ignored, use just 1 */
+	efd = sys_epoll_create(1);
+	if (efd < 0)
+		return ERR_PTR(efd);
+
+	file = fget(efd);
+	sys_close(efd);
+	return file;
+}
+
+static int restore_one_epoll(cpt_object_t *obj,
+			     loff_t pos,
+			     struct cpt_epoll_image *ebuf,
+			     cpt_context_t *ctx)
+{
+	int err = 0;
+	loff_t endpos;
+	struct file *file = obj->o_obj;
+	struct eventpoll *ep;
+
+	if (file->f_op != &eventpoll_fops) {
+		eprintk_ctx("bad epoll file\n");
+		return -EINVAL;
+	}
+
+	ep = file->private_data;
+
+	if (unlikely(ep == NULL)) {
+		eprintk_ctx("bad epoll device\n");
+		return -EINVAL;
+	}
+
+	endpos = pos + ebuf->cpt_next;
+	pos += ebuf->cpt_hdrlen;
+	while (pos < endpos) {
+		struct cpt_epoll_file_image efi;
+		struct epoll_event epds;
+		
+		cpt_object_t *tobj;
+
+		err = rst_get_object(CPT_OBJ_EPOLL_FILE, pos, &efi, ctx);
+		if (err)
+			return err;
+		tobj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, efi.cpt_file, ctx);
+		if (!tobj) {
+			eprintk_ctx("epoll file not found\n");
+			return -EINVAL;
+		}
+		epds.events = efi.cpt_events;
+		epds.data = efi.cpt_data;
+		down_write(&ep->sem);
+		err = ep_insert(ep, &epds, tobj->o_obj, efi.cpt_fd);
+		if (!err) {
+			struct epitem *epi;
+			epi = ep_find(ep, tobj->o_obj, efi.cpt_fd);
+			if (epi) {
+				epi->revents = efi.cpt_revents;
+				if (efi.cpt_ready) {
+					unsigned long flags;
+					write_lock_irqsave(&ep->lock, flags);
+					if (list_empty(&epi->rdllink))
+						list_add_tail(&epi->rdllink, &ep->rdllist);
+					write_unlock_irqrestore(&ep->lock, flags);
+				}
+				ep_release_epitem(epi);
+			}
+		}
+		clear_tfile_check_list();
+		up_write(&ep->sem);
+		if (err)
+			break;
+		pos += efi.cpt_next;
+	}
+	return err;
+}
+
+int rst_eventpoll(cpt_context_t *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_EPOLL];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_EPOLL || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		cpt_object_t *obj;
+		struct cpt_epoll_image *ebuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_EPOLL, sec, ebuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, ebuf->cpt_file, ctx);
+		if (obj == NULL) {
+			eprintk_ctx("cannot find epoll file object\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		err = restore_one_epoll(obj, sec, ebuf, ctx);
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		sec += ebuf->cpt_next;
+	}
+
+	return 0;
+	
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_files.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_files.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_files.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_files.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,1984 @@
+/*
+ *
+ *  kernel/cpt/rst_files.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/mount.h>
+#include <linux/tty.h>
+#include <linux/namei.h>
+#include <linux/vmalloc.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <asm/uaccess.h>
+#include <ub/ub_mem.h>
+#include <linux/cpt_image.h>
+#include <linux/namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/signalfd.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+
+#include "cpt_syscalls.h"
+
+
+struct filejob {
+	struct filejob *next;
+	int	pid;
+	loff_t	fdi;
+};
+
+static int rst_filejob_queue(loff_t pos, cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	j = kmalloc(sizeof(*j), GFP_KERNEL);
+	if (j == NULL)
+		return -ENOMEM;
+	j->pid = current->pid;
+	j->fdi = pos;
+	j->next = ctx->filejob_queue;
+	ctx->filejob_queue = j;
+	return 0;
+}
+
+static void _anon_pipe_buf_release(struct pipe_inode_info *pipe,
+				  struct pipe_buffer *buf)
+{
+	struct page *page = buf->page;
+
+	/*
+	 * If nobody else uses this page, and we don't already have a
+	 * temporary page, let's keep track of it as a one-deep
+	 * allocation cache. (Otherwise just release our reference to it)
+	 */
+	if (page_count(page) == 1 && !pipe->tmp_page)
+		pipe->tmp_page = page;
+	else
+		page_cache_release(page);
+
+	module_put(THIS_MODULE);
+}
+
+static void *_anon_pipe_buf_map(struct pipe_inode_info *pipe,
+			   struct pipe_buffer *buf, int atomic)
+{
+	if (atomic) {
+		buf->flags |= PIPE_BUF_FLAG_ATOMIC;
+		return kmap_atomic(buf->page, KM_USER0);
+	}
+
+	return kmap(buf->page);
+}
+
+static void _anon_pipe_buf_unmap(struct pipe_inode_info *pipe,
+			    struct pipe_buffer *buf, void *map_data)
+{
+	if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
+		buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
+		kunmap_atomic(map_data, KM_USER0);
+	} else
+		kunmap(buf->page);
+}
+
+static int _anon_pipe_buf_steal(struct pipe_inode_info *pipe,
+			   struct pipe_buffer *buf)
+{
+	struct page *page = buf->page;
+
+	if (page_count(page) == 1) {
+		lock_page(page);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void _anon_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+	page_cache_get(buf->page);
+}
+
+static int _anon_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+	return 0;
+}
+
+static struct pipe_buf_operations _anon_pipe_buf_ops = {
+	.can_merge = 1,
+	.map = _anon_pipe_buf_map,
+	.unmap = _anon_pipe_buf_unmap,
+	.release = _anon_pipe_buf_release,
+	.pin = _anon_pipe_buf_pin,
+	.get = _anon_pipe_buf_get,
+	.steal = _anon_pipe_buf_steal,
+};
+
+/* Sorta ugly... Multiple readers/writers of named pipe rewrite buffer
+ * many times. We need to mark it in CPT_OBJ_INODE table in some way.
+ */
+static int fixup_pipe_data(struct file *file, struct cpt_file_image *fi,
+			   struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	struct cpt_inode_image ii;
+	struct cpt_obj_bits b;
+	struct pipe_inode_info *info;
+	int err;
+	int count;
+
+	if (!S_ISFIFO(ino->i_mode)) {
+		eprintk_ctx("fixup_pipe_data: not a pipe %Ld\n", (long long)fi->cpt_inode);
+		return -EINVAL;
+	}
+	if (fi->cpt_inode == CPT_NULL)
+		return 0;
+
+	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
+	if (err)
+		return err;
+
+	if (ii.cpt_next <= ii.cpt_hdrlen)
+		return 0;
+
+	err = rst_get_object(CPT_OBJ_BITS, fi->cpt_inode + ii.cpt_hdrlen, &b, ctx);
+	if (err)
+		return err;
+
+	if (b.cpt_size == 0)
+		return 0;
+
+	mutex_lock(&ino->i_mutex);
+	info = ino->i_pipe;
+	if (info->nrbufs) {
+		mutex_unlock(&ino->i_mutex);
+		eprintk("pipe buffer is restored already\n");
+		return -EINVAL;
+	}
+	info->curbuf = 0;
+	count = 0;
+	while (count < b.cpt_size) {
+		struct pipe_buffer *buf = info->bufs + info->nrbufs;
+		void * addr;
+		int chars;
+
+		chars = b.cpt_size - count;
+		if (chars > PAGE_SIZE)
+			chars = PAGE_SIZE;
+		if (!try_module_get(THIS_MODULE)) {
+			err = -EBUSY;
+			break;
+		}
+
+		buf->page = alloc_page(GFP_HIGHUSER);
+		if (buf->page == NULL) {
+			err = -ENOMEM;
+			break;
+		}
+		buf->ops = &_anon_pipe_buf_ops;
+		buf->offset = 0;
+		buf->len = chars;
+		info->nrbufs++;
+		addr = kmap(buf->page);
+		err = ctx->pread(addr, chars, ctx,
+				 fi->cpt_inode + ii.cpt_hdrlen + b.cpt_hdrlen + count);
+		if (err)
+			break;
+		count += chars;
+	}
+	mutex_unlock(&ino->i_mutex);
+
+	return err;
+}
+
+static int make_flags(struct cpt_file_image *fi)
+{
+	int flags = O_NOFOLLOW;
+	switch (fi->cpt_mode&(FMODE_READ|FMODE_WRITE)) {
+	case FMODE_READ|FMODE_WRITE:
+		flags |= O_RDWR; break;
+	case FMODE_WRITE:
+		flags |= O_WRONLY; break;
+	case FMODE_READ:
+		flags |= O_RDONLY; break;
+	default: break;
+	}
+	flags |= fi->cpt_flags&~(O_ACCMODE|O_CREAT|O_TRUNC|O_EXCL|FASYNC);
+	flags |= O_NONBLOCK|O_NOCTTY;
+	return flags;
+}
+
+static struct file *open_pipe(cpt_object_t *mntobj, char *name,
+			      struct cpt_file_image *fi,
+			      unsigned flags,
+			      struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct cpt_inode_image ii;
+	struct file *rf, *wf;
+
+	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
+	if (err)
+		return ERR_PTR(err);
+
+	if (ii.cpt_sb == FSMAGIC_PIPEFS) {
+		int pfd[2];
+
+		if ((err = sc_pipe(pfd)) < 0)
+			return ERR_PTR(err);
+
+		rf = fcheck(pfd[0]);
+		wf = fcheck(pfd[1]);
+		get_file(rf);
+		get_file(wf);
+		sc_close(pfd[0]);
+		sc_close(pfd[1]);
+
+		if (fi->cpt_mode&FMODE_READ) {
+			struct file *tf;
+			tf = wf; wf = rf; rf = tf;
+		}
+	} else {
+		struct nameidata nd;
+
+		err = rst_path_lookup(mntobj, name, LOOKUP_FOLLOW, &nd);
+		if (err)
+			return ERR_PTR(err);
+
+		if (fi->cpt_mode&FMODE_READ) {
+			rf = dentry_open(nd.dentry, nd.mnt, flags);
+			if (IS_ERR(rf)) {
+				dprintk_ctx("filp_open\n");
+				return rf;
+			}
+			dprintk_ctx(CPT_FID "open RDONLY fifo ino %Ld %p %x\n", CPT_TID(current),
+				    (long long)fi->cpt_inode, rf, rf->f_dentry->d_inode->i_mode);
+			return rf;
+		}
+
+		dprintk_ctx(CPT_FID "open WRONLY fifo ino %Ld\n", CPT_TID(current), (long long)fi->cpt_inode);
+
+		rf = dentry_open(nd.dentry, nd.mnt, O_RDWR|O_NONBLOCK);
+		if (IS_ERR(rf))
+			return rf;
+		wf = dentry_open(dget(rf->f_dentry),
+				 mntget(rf->f_vfsmnt), flags);
+	}
+
+	/* Add pipe inode to obj table. */
+	obj = cpt_object_add(CPT_OBJ_INODE, wf->f_dentry->d_inode, ctx);
+	if (obj == NULL) {
+		fput(rf); fput(wf);
+		return ERR_PTR(-ENOMEM);
+	}
+	cpt_obj_setpos(obj, fi->cpt_inode, ctx);
+	obj->o_parent = rf;
+
+	/* Add another side of pipe to obj table, it will not be used
+	 * (o_pos = PT_NULL), another processes opeining pipe will find
+	 * inode and open it with dentry_open(). */
+	obj = cpt_object_add(CPT_OBJ_FILE, rf, ctx);
+	if (obj == NULL) {
+		fput(wf);
+		return ERR_PTR(-ENOMEM);
+	}
+	return wf;
+}
+
+static struct file *open_special(struct cpt_file_image *fi,
+				 unsigned flags,
+				 int deleted,
+				 struct cpt_context *ctx)
+{
+	struct cpt_inode_image *ii;
+	struct file *file;
+
+	/* Directories and named pipes are not special actually */
+	if (S_ISDIR(fi->cpt_i_mode) || S_ISFIFO(fi->cpt_i_mode))
+		return NULL;
+
+	/* No support for block devices at the moment. */
+	if (S_ISBLK(fi->cpt_i_mode))
+		return ERR_PTR(-EINVAL);
+
+	if (S_ISSOCK(fi->cpt_i_mode)) {
+		eprintk_ctx("bug: socket is not open\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Support only (some) character devices at the moment. */
+	if (!S_ISCHR(fi->cpt_i_mode))
+		return ERR_PTR(-EINVAL);
+
+	ii = __rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, ctx);
+	if (ii == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Do not worry about this right now. /dev/null,zero,*random are here.
+	 * To prohibit at least /dev/mem?
+	 */
+	if (MAJOR(ii->cpt_rdev) == MEM_MAJOR) {
+		kfree(ii);
+		return NULL;
+	}
+
+	/* /dev/net/tun will be opened by caller */
+	if (fi->cpt_lflags & CPT_DENTRY_TUNTAP) {
+		kfree(ii);
+		return NULL;
+	}	
+
+	file = rst_open_tty(fi, ii, flags, ctx);
+	kfree(ii);
+	return file;
+}
+
+static int restore_posix_lock(struct file *file, struct cpt_flock_image *fli,
+		cpt_context_t *ctx)
+{
+	struct file_lock lock;
+	cpt_object_t *obj;
+
+	if (fli->cpt_flags & CPT_FLOCK_DELAYED)
+		return rst_delay_flock(file, fli, ctx);
+
+	memset(&lock, 0, sizeof(lock));
+	lock.fl_type = fli->cpt_type;
+	lock.fl_flags = fli->cpt_flags & ~FL_SLEEP;
+	lock.fl_start = fli->cpt_start;
+	lock.fl_end = fli->cpt_end;
+	obj = lookup_cpt_obj_byindex(CPT_OBJ_FILES, fli->cpt_owner, ctx);
+	if (!obj) {
+		eprintk_ctx("unknown lock owner %d\n", (int)fli->cpt_owner);
+		return -EINVAL;
+	}
+	lock.fl_owner = obj->o_obj;
+	lock.fl_pid = vpid_to_pid(fli->cpt_pid);
+	if (lock.fl_pid < 0) {
+		eprintk_ctx("unknown lock pid %d\n", lock.fl_pid);
+		return -EINVAL;
+	}
+	lock.fl_file = file;
+
+	if (lock.fl_owner == NULL)
+		eprintk_ctx("no lock owner\n");
+	return posix_lock_file(file, &lock);
+}
+
+static int restore_flock(struct file *file, struct cpt_flock_image *fli,
+		cpt_context_t *ctx)
+{
+	int cmd, err, fd;
+
+	if (fli->cpt_flags & CPT_FLOCK_DELAYED)
+		return rst_delay_flock(file, fli, ctx);
+
+	fd = get_unused_fd();
+	if (fd < 0) {
+		eprintk_ctx("BSD flock cannot be restored\n");
+		return fd;
+	}
+	get_file(file);
+	fd_install(fd, file);
+	if (fli->cpt_type == F_RDLCK) {
+		cmd = LOCK_SH;
+	} else if (fli->cpt_type == F_WRLCK) {
+		cmd = LOCK_EX;
+	} else {
+		eprintk_ctx("flock flavor is unknown: %u\n", fli->cpt_type);
+		sc_close(fd);
+		return -EINVAL;
+	}
+
+	err = sc_flock(fd, LOCK_NB | cmd);
+	sc_close(fd);
+	return err;
+}
+
+
+static int fixup_posix_locks(struct file *file,
+			     struct cpt_file_image *fi,
+			     loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end;
+	struct cpt_flock_image fli;
+
+	end = pos + fi->cpt_next;
+	pos += fi->cpt_hdrlen;
+	while (pos < end) {
+		err = rst_get_object(-1, pos, &fli, ctx);
+		if (err)
+			return err;
+		if (fli.cpt_object == CPT_OBJ_FLOCK &&
+		    (fli.cpt_flags&FL_POSIX)) {
+			err = restore_posix_lock(file, &fli, ctx);
+			if (err)
+				return err;
+			dprintk_ctx("posix lock restored\n");
+		}
+		pos += fli.cpt_next;
+	}
+	return 0;
+}
+
+int rst_posix_locks(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		struct cpt_file_image fi;
+
+		if (obj->o_pos == CPT_NULL)
+			continue;
+
+		err = rst_get_object(CPT_OBJ_FILE, obj->o_pos, &fi, ctx);
+		if (err < 0)
+			return err;
+		if (fi.cpt_next > fi.cpt_hdrlen)
+			fixup_posix_locks(file, &fi, obj->o_pos, ctx);
+	}
+	return 0;
+}
+
+static int fixup_flocks(struct file *file,
+			struct cpt_file_image *fi,
+			loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end;
+	struct cpt_flock_image fli;
+
+	end = pos + fi->cpt_next;
+	pos += fi->cpt_hdrlen;
+	while (pos < end) {
+		err = rst_get_object(-1, pos, &fli, ctx);
+		if (err)
+			return err;
+		if (fli.cpt_object == CPT_OBJ_FLOCK &&
+		    (fli.cpt_flags&FL_FLOCK)) {
+			err = restore_flock(file, &fli, ctx);
+			if (err)
+				return err;
+			dprintk_ctx("bsd lock restored\n");
+		}
+		pos += fli.cpt_next;
+	}
+	return 0;
+}
+
+static int
+restore_data_chunk(struct file *file, loff_t pos, struct cpt_page_block * pgb,
+		   cpt_context_t *ctx)
+{
+	loff_t ipos = pos + pgb->cpt_hdrlen;
+	loff_t opos = pgb->cpt_start;
+	int count = pgb->cpt_end-pgb->cpt_start;
+	int err;
+
+	while (count > 0) {
+		mm_segment_t oldfs;
+		int copy = count;
+
+		if (copy > PAGE_SIZE)
+			copy = PAGE_SIZE;
+		(void)cpt_get_buf(ctx);
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
+		set_fs(oldfs);
+		if (err) {
+			__cpt_release_buf(ctx);
+			return err;
+		}
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		ipos += copy;
+		err = file->f_op->write(file, ctx->tmpbuf, copy, &opos);
+		set_fs(oldfs);
+		__cpt_release_buf(ctx);
+		if (err != copy) {
+			eprintk_ctx("write() failure\n");
+			if (err >= 0)
+				err = -EIO;
+			return err;
+		}
+		count -= copy;
+	}
+	return 0;
+}
+
+
+static int fixup_reg_data(struct file *file, loff_t pos, loff_t end,
+			  struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_page_block pgb;
+
+	if (file->f_op->write == NULL) {
+		eprintk_ctx("no write method. Cannot restore contents of the file.\n");
+		return -EINVAL;
+	}
+
+	atomic_inc(&file->f_count);
+
+	while (pos < end) {
+		err = rst_get_object(-1, pos, &pgb, ctx);
+		if (err)
+			goto out;
+
+		dprintk_ctx("restoring file data block: %08x-%08x\n",
+		       (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
+
+		err = -EINVAL;
+		switch (pgb.cpt_object) {
+		case CPT_OBJ_PAGES:
+			if (!(file->f_mode & FMODE_WRITE) ||
+			    (file->f_flags&O_DIRECT)) {
+				fput(file);
+				file = dentry_open(dget(file->f_dentry),
+						   mntget(file->f_vfsmnt),
+						   O_WRONLY | O_LARGEFILE);
+				if (IS_ERR(file))
+					return PTR_ERR(file);
+			}
+
+			err = restore_data_chunk(file, pos, &pgb, ctx);
+			if (err)
+				goto out;
+			break;
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+		case CPT_OBJ_ITERPAGES:
+		case CPT_OBJ_ITERYOUNGPAGES:
+			err = -EINVAL;
+			if (file->f_vfsmnt != get_exec_env()->shmem_mnt)
+				goto out;
+			err = rst_iter_chunk(file, pos, &pgb, ctx);
+			if (err)
+				goto out;
+			break;
+#endif
+		default:
+			err = -EINVAL;
+			goto out;
+		}
+		pos += pgb.cpt_next;
+	}
+	err = 0;
+
+out:
+	fput(file);
+	return err;
+}
+
+
+static int fixup_file_content(struct file **file_p, struct cpt_file_image *fi,
+			      struct cpt_inode_image *ii,
+			      struct cpt_context *ctx)
+{
+	int err;
+	struct file *file = *file_p;
+	struct iattr newattrs;
+
+	if (!S_ISREG(fi->cpt_i_mode))
+		return 0;
+
+	if (file == NULL) {
+		file = shmem_file_setup("dev/zero", ii->cpt_size, 0);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+		*file_p = file;
+	}
+
+	if (ii->cpt_next > ii->cpt_hdrlen) {
+		struct cpt_object_hdr hdr;
+		err = ctx->pread(&hdr, sizeof(struct cpt_object_hdr), ctx, fi->cpt_inode+ii->cpt_hdrlen);
+		if (err)
+			return err;
+		if (hdr.cpt_object == CPT_OBJ_PAGES ||
+		    hdr.cpt_object == CPT_OBJ_ITERPAGES) {
+			err = fixup_reg_data(file, fi->cpt_inode+ii->cpt_hdrlen,
+					fi->cpt_inode+ii->cpt_next, ctx);
+			if (err)
+				return err;
+		}
+	}
+
+	mutex_lock(&file->f_dentry->d_inode->i_mutex);
+	/* stage 1 - update size like do_truncate does */
+	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+	newattrs.ia_size = ii->cpt_size;
+	cpt_timespec_import(&newattrs.ia_ctime, ii->cpt_ctime);
+	err = notify_change(file->f_dentry, &newattrs);
+	if (err)
+		goto out;
+
+	/* stage 2 - update times, owner and mode */
+	newattrs.ia_valid = ATTR_MTIME | ATTR_ATIME |
+		ATTR_ATIME_SET | ATTR_MTIME_SET |
+		ATTR_MODE | ATTR_UID | ATTR_GID;
+	newattrs.ia_uid = ii->cpt_uid;
+	newattrs.ia_gid = ii->cpt_gid;
+	newattrs.ia_mode = file->f_dentry->d_inode->i_mode & S_IFMT;
+	newattrs.ia_mode |= (ii->cpt_mode & ~S_IFMT);
+	cpt_timespec_import(&newattrs.ia_atime, ii->cpt_atime);
+	cpt_timespec_import(&newattrs.ia_mtime, ii->cpt_mtime);
+	err = notify_change(file->f_dentry, &newattrs);
+
+out:
+	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+	return err;
+}
+
+static int fixup_file_flags(struct file *file, struct cpt_file_image *fi,
+			    int was_dentry_open, loff_t pos,
+			    cpt_context_t *ctx)
+{
+	if (fi->cpt_pos != file->f_pos) {
+		int err = -ESPIPE;
+		if (file->f_op->llseek)
+			err = file->f_op->llseek(file, fi->cpt_pos, 0);
+		if (err < 0) {
+			dprintk_ctx("file %Ld lseek %Ld - %Ld\n",
+				    (long long)pos,
+				    (long long)file->f_pos,
+				    (long long)fi->cpt_pos);
+			file->f_pos = fi->cpt_pos;
+		}
+	}
+	file->f_uid = fi->cpt_uid;
+	file->f_gid = fi->cpt_gid;
+	file->f_owner.pid = 0;
+	if (fi->cpt_fown_pid) {
+		file->f_owner.pid = comb_vpid_to_pid(fi->cpt_fown_pid);
+		if (file->f_owner.pid == 0) {
+			wprintk_ctx("fixup_file_flags: owner %d does not exist anymore\n", file->f_owner.pid);
+			return -EINVAL;
+		}
+	}
+	file->f_owner.uid = fi->cpt_fown_uid;
+	file->f_owner.euid = fi->cpt_fown_euid;
+	file->f_owner.signum = fi->cpt_fown_signo;
+
+	if (file->f_mode != fi->cpt_mode) {
+		if (was_dentry_open &&
+		    ((file->f_mode^fi->cpt_mode)&(FMODE_PREAD|FMODE_LSEEK))) {
+			file->f_mode &= ~(FMODE_PREAD|FMODE_LSEEK);
+			file->f_mode |= fi->cpt_mode&(FMODE_PREAD|FMODE_LSEEK);
+		}
+		if (file->f_mode != fi->cpt_mode)
+			wprintk_ctx("file %ld mode mismatch %08x %08x\n", (long)pos, file->f_mode, fi->cpt_mode);
+	}
+	if (file->f_flags != fi->cpt_flags) {
+		if (!(fi->cpt_flags&O_NOFOLLOW))
+			file->f_flags &= ~O_NOFOLLOW;
+		if ((file->f_flags^fi->cpt_flags)&O_NONBLOCK) {
+			file->f_flags &= ~O_NONBLOCK;
+			file->f_flags |= fi->cpt_flags&O_NONBLOCK;
+		}
+		if (fi->cpt_flags&FASYNC) {
+			if (fi->cpt_fown_fd == -1) {
+				wprintk_ctx("No fd for FASYNC\n");
+				return -EINVAL;
+			} else if (file->f_op && file->f_op->fasync) {
+				if (file->f_op->fasync(fi->cpt_fown_fd, file, 1) < 0) {
+					wprintk_ctx("FASYNC problem\n");
+					return -EINVAL;
+				} else {
+					file->f_flags |= FASYNC;
+				}
+			}
+		}
+		if (file->f_flags != fi->cpt_flags) {
+			eprintk_ctx("file %ld flags mismatch %08x %08x\n", (long)pos, file->f_flags, fi->cpt_flags);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static struct file *
+open_deleted(char *name, unsigned flags, struct cpt_file_image *fi,
+	     struct cpt_inode_image *ii, cpt_context_t *ctx)
+{
+	struct file * file;
+	char *suffix = NULL;
+	int attempt = 0;
+	int tmp_pass = 0;
+	mode_t mode = fi->cpt_i_mode;
+
+	/* Strip (deleted) part... */
+	if (strlen(name) > strlen(" (deleted)")) {
+		if (strcmp(name + strlen(name) - strlen(" (deleted)"), " (deleted)") == 0) {
+			suffix = &name[strlen(name) - strlen(" (deleted)")];
+			*suffix = 0;
+		} else if (memcmp(name, "(deleted) ", strlen("(deleted) ")) == 0) {
+			memmove(name, name + strlen("(deleted) "), strlen(name) - strlen(" (deleted)") + 1);
+			suffix = name + strlen(name);
+		}
+	}
+
+try_again:
+	for (;;) {
+		if (attempt) {
+			if (attempt > 1000) {
+				eprintk_ctx("open_deleted: failed after %d attempts\n", attempt);
+				return ERR_PTR(-EEXIST);
+			}
+			if (suffix == NULL) {
+				eprintk_ctx("open_deleted: no suffix\n");
+				return ERR_PTR(-EEXIST);
+			}
+			sprintf(suffix, ".%08x", (unsigned)((xtime.tv_nsec>>10)+attempt));
+		}
+		attempt++;
+
+		if (S_ISFIFO(mode)) {
+			int err;
+			err = sc_mknod(name, S_IFIFO|(mode&017777), 0);
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = open_pipe(NULL, name, fi, flags, ctx);
+			sc_unlink(name);
+		} else if (S_ISCHR(mode)) {
+			int err;
+			err = sc_mknod(name, S_IFCHR|(mode&017777), new_encode_dev(ii->cpt_rdev));
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = filp_open(name, flags, mode&017777);
+			sc_unlink(name);
+		} else if (S_ISDIR(mode)) {
+			int err;
+			err = sc_mkdir(name, mode&017777);
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = filp_open(name, flags, mode&017777);
+			sc_rmdir(name);
+		} else {
+			file = filp_open(name, O_CREAT|O_EXCL|flags, mode&017777);
+			if (IS_ERR(file)) {
+				if (PTR_ERR(file) == -EEXIST)
+					continue;
+				if (!tmp_pass)
+					goto change_dir;
+			} else {
+				sc_unlink(name);
+			}
+		}
+		break;
+	}
+
+	if (IS_ERR(file)) {
+		eprintk_ctx("filp_open %s: %ld\n", name, PTR_ERR(file));
+		return file;
+	} else {
+		dprintk_ctx("deleted file created as %s, %p, %x\n", name, file, file->f_dentry->d_inode->i_mode);
+	}
+	return file;
+
+change_dir:
+	sprintf(name, "/tmp/rst%u", current->pid);
+	suffix = name + strlen(name);
+	attempt = 1;
+	tmp_pass = 1;
+	goto try_again;
+}
+
+#ifdef CONFIG_SIGNALFD
+static struct file *open_signalfd(struct cpt_file_image *fi, int flags, struct cpt_context *ctx)
+{
+	sigset_t mask;
+	mm_segment_t old_fs;
+	int fd;
+	struct file *file;
+
+	cpt_sigset_import(&mask, fi->cpt_priv);
+
+	old_fs = get_fs(); set_fs(KERNEL_DS);
+	fd = do_signalfd(-1, &mask, flags & (O_CLOEXEC | O_NONBLOCK));
+	set_fs(old_fs);
+
+	if (fd < 0)
+		return ERR_PTR(fd);
+
+	file = fget(fd);
+	sys_close(fd);
+
+	return file;
+}
+#else
+static struct file *open_signalfd(struct cpt_file_image *fi, int flags, struct cpt_context *ctx)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif
+
+struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx)
+{
+	int err;
+	int was_dentry_open = 0;
+	cpt_object_t *obj;
+	cpt_object_t *iobj;
+	struct cpt_file_image fi;
+	__u8 *name = NULL;
+	struct file *file;
+	struct proc_dir_entry *proc_dead_file;
+	int flags;
+	loff_t pos2;
+	cpt_object_t *mntobj = NULL;
+	struct nameidata nd;
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, pos, ctx);
+	if (obj) {
+		file = obj->o_obj;
+		if (obj->o_index >= 0) {
+			dprintk_ctx("file is attached to a socket\n");
+			err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
+			if (err < 0)
+				goto err_out;
+			fixup_file_flags(file, &fi, 0, pos, ctx);
+		}
+		get_file(file);
+		return file;
+	}
+
+	err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
+	if (err < 0)
+		goto err_out;
+
+	flags = make_flags(&fi);
+
+	/* Easy way, inode has been already open. */
+	if (fi.cpt_inode != CPT_NULL &&
+	    !(fi.cpt_lflags & CPT_DENTRY_CLONING) &&
+	    (iobj = lookup_cpt_obj_bypos(CPT_OBJ_INODE, fi.cpt_inode, ctx)) != NULL &&
+	    iobj->o_parent) {
+		struct file *filp = iobj->o_parent;
+		file = dentry_open(dget(filp->f_dentry),
+				   mntget(filp->f_vfsmnt), flags);
+		dprintk_ctx("rst_file: file obtained by dentry_open\n");
+		was_dentry_open = 1;
+		goto map_file;
+	}
+
+	pos2 = pos + fi.cpt_hdrlen;
+	if (fi.cpt_next > fi.cpt_hdrlen)
+		name = __rst_get_name(&pos2, ctx);
+
+	if (!name) {
+		eprintk_ctx("no name for file?\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if (cpt_object_has(&fi, cpt_vfsmount) && fi.cpt_vfsmount != CPT_NULL) {
+		mntobj = lookup_cpt_obj_bypos(CPT_OBJ_VFSMOUNT_REF,
+				fi.cpt_vfsmount, ctx);
+		if (!mntobj) {
+			eprintk_ctx("no vfsmount found for file: %s\n", name);
+			err = -ENODEV;
+			goto err_out;
+		}
+	}
+
+	if (fi.cpt_lflags & CPT_DENTRY_DELETED) {
+		struct cpt_inode_image ii;
+		if (fi.cpt_inode == CPT_NULL) {
+			eprintk_ctx("deleted file and no inode.\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		err = rst_get_object(CPT_OBJ_INODE, fi.cpt_inode, &ii, ctx);
+		if (err)
+			goto err_out;
+
+		if (ii.cpt_next > ii.cpt_hdrlen) {
+			struct cpt_object_hdr hdr;
+			err = ctx->pread(&hdr, sizeof(hdr), ctx,
+					fi.cpt_inode + ii.cpt_hdrlen);
+			if (err)
+				goto err_out;
+			if (hdr.cpt_object == CPT_OBJ_NAME) {
+				rst_put_name(name, ctx);
+				name = rst_get_name(fi.cpt_inode+ii.cpt_hdrlen,
+						ctx);
+				if (!name) {
+					eprintk_ctx("no name for link?\n");
+					err = -EINVAL;
+					goto err_out;
+				}
+				if (cpt_object_has(&ii, cpt_vfsmount) &&
+						ii.cpt_vfsmount != CPT_NULL) {
+					mntobj = lookup_cpt_obj_bypos(CPT_OBJ_VFSMOUNT_REF,
+							ii.cpt_vfsmount, ctx);
+					if (!mntobj) {
+						eprintk_ctx("no vfsmount found: %s\n", name);
+						err = -ENODEV;
+						goto err_out;
+					}
+				}
+				if ((fi.cpt_lflags & CPT_DENTRY_HARDLINKED) &&
+				    !ctx->hardlinked_on) {
+					eprintk_ctx("Open hardlinked is off\n");
+					err = -EPERM;
+					goto err_out;
+				}
+				goto open_file;
+			}
+		}
+
+		/* One very special case... */
+		if (S_ISREG(fi.cpt_i_mode) &&
+		    (!name[0] || strcmp(name, "/dev/zero (deleted)") == 0)) {
+			/* MAP_ANON|MAP_SHARED mapping.
+			 * kernel makes this damn ugly way, when file which
+			 * is passed to mmap by user does not match
+			 * file finally attached to VMA. Ok, rst_mm
+			 * has to take care of this. Otherwise, it will fail.
+			 */
+			file = NULL;
+		} else if (S_ISREG(fi.cpt_i_mode) ||
+			   S_ISCHR(fi.cpt_i_mode) ||
+			   S_ISFIFO(fi.cpt_i_mode) ||
+			   S_ISDIR(fi.cpt_i_mode)) {
+			if (S_ISCHR(fi.cpt_i_mode)) {
+				file = open_special(&fi, flags, 1, ctx);
+				if (file != NULL)
+					goto map_file;
+			}
+			file = open_deleted(name, flags, &fi, &ii, ctx);
+			if (IS_ERR(file))
+				goto out;
+		} else {
+			eprintk_ctx("not a regular deleted file.\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		err = fixup_file_content(&file, &fi, &ii, ctx);
+		if (err)
+			goto err_put;
+		goto map_file;
+	} else {
+open_file:
+		if (!name[0]) {
+			eprintk_ctx("empty name for file?\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+		if ((fi.cpt_lflags & CPT_DENTRY_EPOLL) &&
+		    (file = cpt_open_epolldev(&fi, flags, ctx)) != NULL)
+			goto map_file;
+#ifdef CONFIG_INOTIFY_USER
+		if ((fi.cpt_lflags & CPT_DENTRY_INOTIFY) &&
+		    (file = rst_open_inotify(&fi, flags, ctx)) != NULL)
+			goto map_file;
+#else
+		if (fi.cpt_lflags & CPT_DENTRY_INOTIFY) {
+			err = -EINVAL;
+			goto err_out;
+		}
+#endif
+		if ((fi.cpt_lflags & CPT_DENTRY_SIGNALFD) &&
+			(file = open_signalfd(&fi, flags, ctx)) != NULL)
+			goto map_file;
+		if (S_ISFIFO(fi.cpt_i_mode) &&
+		    (file = open_pipe(mntobj, name, &fi, flags, ctx)) != NULL)
+			goto map_file;
+		if (!S_ISREG(fi.cpt_i_mode) &&
+		    (file = open_special(&fi, flags, 0, ctx)) != NULL)
+			goto map_file;
+	}
+
+	/* This hook is needed to open file /proc/<pid>/<somefile>
+	 * but there is no proccess with pid <pid>.
+	 */
+	proc_dead_file = NULL;
+	if (fi.cpt_lflags & CPT_DENTRY_PROCPID_DEAD) {
+		sprintf(name, "/proc/rst_dead_pid_file_%d", virt_pid(current));
+
+		proc_dead_file = create_proc_entry(name + 6, S_IRUGO|S_IWUGO,
+						   NULL);
+		if (!proc_dead_file) {
+			eprintk_ctx("can't create proc entry %s\n", name);
+			err = -ENOMEM;
+			goto err_out;
+		}
+#ifdef CONFIG_PROC_FS
+		proc_dead_file->proc_fops = &dummy_proc_pid_file_operations;
+#endif
+	}
+
+	if (mntobj && (mntobj->o_flags & CPT_VFSMOUNT_DELAYFS)) {
+		struct vfsmount *mnt = mntobj->o_obj;
+		char *rel_name;
+
+		if (fi.cpt_lflags & CPT_DENTRY_ROOT)
+			rel_name = "";
+		else if (strlen(name) > mntobj->o_lock)
+			rel_name = name + mntobj->o_lock + 1;
+		else {
+			eprintk_ctx("name %s to short for mnt %d\n", name, mntobj->o_lock);
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		file = rst_delayfs_screw(mnt, rel_name, flags, fi.cpt_pos, fi.cpt_i_mode);
+		goto map_file;
+	}
+
+	err = rst_path_lookup(mntobj, name, LOOKUP_FOLLOW, &nd);
+	if (err)
+		goto err_out;
+	file = dentry_open(nd.dentry, nd.mnt, flags);
+
+	if (proc_dead_file) {
+		remove_proc_entry(proc_dead_file->name, NULL);
+		if (!IS_ERR(file))
+			d_drop(file->f_dentry);
+	}
+map_file:
+	if (!IS_ERR(file)) {
+		fixup_file_flags(file, &fi, was_dentry_open, pos, ctx);
+
+		if (S_ISFIFO(fi.cpt_i_mode) && !was_dentry_open) {
+			err = fixup_pipe_data(file, &fi, ctx);
+			if (err)
+				goto err_put;
+		}
+
+		/* This is very special hack. Logically, cwd/root are
+		 * nothing but open directories. Nevertheless, this causes
+		 * failures of restores, when number of open files in VE
+		 * is close to limit. So, if it is rst_file() of cwd/root
+		 * (fd = -2) and the directory is not deleted, we skip
+		 * adding files to object table. If the directory is
+		 * not unlinked, this cannot cause any problems.
+		 */
+		if (fd != -2 ||
+		    !S_ISDIR(file->f_dentry->d_inode->i_mode) ||
+		    (fi.cpt_lflags & CPT_DENTRY_DELETED) ||
+		    (mntobj && (mntobj->o_flags & CPT_VFSMOUNT_DELAYFS))) {
+			obj = cpt_object_get(CPT_OBJ_FILE, file, ctx);
+			if (!obj) {
+				obj = cpt_object_add(CPT_OBJ_FILE, file, ctx);
+				if (obj)
+					get_file(file);
+			}
+			if (obj) {
+				cpt_obj_setpos(obj, pos, ctx);
+				if (mntobj && (mntobj->o_flags & CPT_VFSMOUNT_DELAYFS))
+					obj->o_flags |= CPT_FILE_DELAYFS;
+				if (fi.cpt_lflags & CPT_DENTRY_SILLYRENAME)
+					obj->o_flags |= CPT_FILE_SILLYRENAME;
+			}
+
+			obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+			if (obj) {
+				cpt_obj_setpos(obj, fi.cpt_inode, ctx);
+				if (!obj->o_parent || !(fi.cpt_lflags & CPT_DENTRY_DELETED))
+					obj->o_parent = file;
+			}
+		}
+
+		if (fi.cpt_next > fi.cpt_hdrlen) {
+			err = fixup_flocks(file, &fi, pos, ctx);
+			if (err)
+				goto err_put;
+		}
+	} else {
+		if ((fi.cpt_lflags & CPT_DENTRY_PROC) &&
+		    !(fi.cpt_lflags & CPT_DENTRY_PROCPID_DEAD)) {
+			dprintk_ctx("rst_file /proc delayed\n");
+			file = NULL;
+		} else if (name)
+			eprintk_ctx("can't open file %s\n", name);
+	}
+
+out:
+	if (name)
+		rst_put_name(name, ctx);
+	return file;
+
+err_put:
+	if (file)
+		fput(file);
+err_out:
+	if (name)
+		rst_put_name(name, ctx);
+	return ERR_PTR(err);
+}
+
+
+__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+	if (ti->cpt_files == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx))
+		flag |= CLONE_FILES;
+	if (ti->cpt_fs == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx))
+		flag |= CLONE_FS;
+	return flag;
+}
+
+static void local_close_files(struct files_struct * files)
+{
+	int i, j;
+
+	j = 0;
+	for (;;) {
+		unsigned long set;
+		i = j * __NFDBITS;
+		if (i >= files->fdt->max_fdset || i >= files->fdt->max_fds)
+			break;
+		set = files->fdt->open_fds->fds_bits[j];
+		while (set) {
+			if (set & 1) {
+				struct file * file = xchg(&files->fdt->fd[i], NULL);
+				if (file)
+					filp_close(file, files);
+			}
+			i++;
+			set >>= 1;
+		}
+		files->fdt->open_fds->fds_bits[j] = 0;
+		files->fdt->close_on_exec->fds_bits[j] = 0;
+		j++;
+	}
+}
+
+extern int expand_fdtable(struct files_struct *files, int nr);
+
+
+int rst_files(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct cpt_files_struct_image fi;
+	struct files_struct *f = current->files;
+	cpt_object_t *obj;
+	loff_t pos, endpos;
+	int err;
+
+	if (ti->cpt_files == CPT_NULL) {
+		current->files = NULL;
+		if (f)
+			put_files_struct(f);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			put_files_struct(f);
+			f = obj->o_obj;
+			atomic_inc(&f->count);
+			current->files = f;
+		}
+		return 0;
+	}
+
+	err = rst_get_object(CPT_OBJ_FILES, ti->cpt_files, &fi, ctx);
+	if (err)
+		return err;
+
+	local_close_files(f);
+
+	if (fi.cpt_max_fds > f->fdt->max_fds) {
+		spin_lock(&f->file_lock);
+		err = expand_fdtable(f, fi.cpt_max_fds-1);
+		spin_unlock(&f->file_lock);
+		if (err)
+			return err;
+	}
+
+	pos = ti->cpt_files + fi.cpt_hdrlen;
+	endpos = ti->cpt_files + fi.cpt_next;
+	while (pos < endpos) {
+		struct cpt_fd_image fdi;
+		struct file *filp;
+
+		err = rst_get_object(CPT_OBJ_FILEDESC, pos, &fdi, ctx);
+		if (err)
+			return err;
+
+		filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
+		if (IS_ERR(filp)) {
+			eprintk_ctx("rst_file: %ld %Lu\n", PTR_ERR(filp),
+				    (long long)fdi.cpt_file);
+			return PTR_ERR(filp);
+		}
+		if (filp == NULL) {
+			int err = rst_filejob_queue(pos, ctx);
+			if (err)
+				return err;
+		} else {
+			if (fdi.cpt_fd >= f->fdt->max_fds) BUG();
+			f->fdt->fd[fdi.cpt_fd] = filp;
+			FD_SET(fdi.cpt_fd, f->fdt->open_fds);
+			if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
+				FD_SET(fdi.cpt_fd, f->fdt->close_on_exec);
+		}
+
+		pos += fdi.cpt_next;
+	}
+	f->next_fd = fi.cpt_next_fd;
+
+	obj = cpt_object_add(CPT_OBJ_FILES, f, ctx);
+	if (obj) {
+		cpt_obj_setpos(obj, ti->cpt_files, ctx);
+		cpt_obj_setindex(obj, fi.cpt_index, ctx);
+	}
+	return 0;
+}
+
+int rst_do_filejobs(cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	while ((j = ctx->filejob_queue) != NULL) {
+		int err;
+		struct task_struct *tsk;
+		struct cpt_fd_image fdi;
+		struct file *filp;
+
+		read_lock(&tasklist_lock);
+		tsk = find_task_by_pid_ve(j->pid);
+		if (tsk)
+			get_task_struct(tsk);
+		read_unlock(&tasklist_lock);
+		if (!tsk)
+			return -EINVAL;
+
+		err = rst_get_object(CPT_OBJ_FILEDESC, j->fdi, &fdi, ctx);
+		if (err) {
+			put_task_struct(tsk);
+			return err;
+		}
+
+		if (fdi.cpt_fd >= tsk->files->fdt->max_fds) BUG();
+		if (tsk->files->fdt->fd[fdi.cpt_fd] ||
+		    FD_ISSET(fdi.cpt_fd, tsk->files->fdt->open_fds)) {
+			eprintk_ctx("doing filejob %Ld: fd is busy\n", j->fdi);
+			put_task_struct(tsk);
+			return -EBUSY;
+		}
+
+		filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
+		if (IS_ERR(filp)) {
+			eprintk_ctx("rst_do_filejobs: 1: %ld %Lu\n", PTR_ERR(filp), (unsigned long long)fdi.cpt_file);
+			put_task_struct(tsk);
+			return PTR_ERR(filp);
+		}
+		if (fdi.cpt_fd >= tsk->files->fdt->max_fds) BUG();
+		tsk->files->fdt->fd[fdi.cpt_fd] = filp;
+		FD_SET(fdi.cpt_fd, tsk->files->fdt->open_fds);
+		if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
+			FD_SET(fdi.cpt_fd, tsk->files->fdt->close_on_exec);
+
+		dprintk_ctx("filejob %Ld done\n", j->fdi);
+
+		put_task_struct(tsk);
+		ctx->filejob_queue = j->next;
+		kfree(j);
+	}
+	return 0;
+}
+
+void rst_flush_filejobs(cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	while ((j = ctx->filejob_queue) != NULL) {
+		ctx->filejob_queue = j->next;
+		kfree(j);
+	}
+}
+
+int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct fs_struct *f = current->fs;
+	cpt_object_t *obj;
+
+	if (ti->cpt_fs == CPT_NULL) {
+		exit_fs(current);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			exit_fs(current);
+			f = obj->o_obj;
+			atomic_inc(&f->count);
+			current->fs = f;
+		}
+		return 0;
+	}
+
+	/* Do _not_ restore root. Image contains absolute pathnames.
+	 * So, we fix it in context of rst process.
+	 */
+
+	obj = cpt_object_add(CPT_OBJ_FS, f, ctx);
+	if (obj)
+		cpt_obj_setpos(obj, ti->cpt_fs, ctx);
+
+	return 0;
+}
+
+int cpt_get_dentry(struct dentry **dp, struct vfsmount **mp,
+		   loff_t *pos, struct cpt_context *ctx)
+{
+	struct cpt_file_image fi;
+	struct file * file;
+	int err;
+
+	err = rst_get_object(CPT_OBJ_FILE, *pos, &fi, ctx);
+	if (err)
+		return err;
+
+	file = rst_file(*pos, -2, ctx);
+	if (IS_ERR(file)) {
+		if (PTR_ERR(file) == -EINVAL && S_ISLNK(fi.cpt_i_mode)) {
+			/* One special case: inotify on symlink */
+			struct nameidata nd;
+			__u8 *name = NULL;
+
+			if (fi.cpt_next > fi.cpt_hdrlen)
+				name = rst_get_name(*pos + fi.cpt_hdrlen, ctx);
+			if (!name) {
+				eprintk_ctx("can't get name for file\n");
+				return -EINVAL;
+			}
+			if ((err = path_lookup(name, 0, &nd)) != 0) {
+				eprintk_ctx("path_lookup %s: %d\n", name, err);
+				rst_put_name(name, ctx);
+				return -EINVAL;
+			}
+			*dp = nd.dentry;
+			*mp = nd.mnt;
+			*pos += fi.cpt_next;
+			rst_put_name(name, ctx);
+			return 0;
+		}
+		return PTR_ERR(file);
+	}
+
+	*dp = dget(file->f_dentry);
+	*mp = mntget(file->f_vfsmnt);
+	*pos += fi.cpt_next;
+	fput(file);
+	return 0;
+}
+
+static void __set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
+			  struct dentry *dentry)
+{
+	struct dentry *old_root;
+	struct vfsmount *old_rootmnt;
+	write_lock(&fs->lock);
+	old_root = fs->root;
+	old_rootmnt = fs->rootmnt;
+	fs->rootmnt = mnt;
+	fs->root = dentry;
+	write_unlock(&fs->lock);
+	if (old_root) {
+		dput(old_root);
+		mntput(old_rootmnt);
+	}
+}
+
+static void __set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
+			 struct dentry *dentry)
+{
+	struct dentry *old_pwd;
+	struct vfsmount *old_pwdmnt;
+
+	write_lock(&fs->lock);
+	old_pwd = fs->pwd;
+	old_pwdmnt = fs->pwdmnt;
+	fs->pwdmnt = mnt;
+	fs->pwd = dentry;
+	write_unlock(&fs->lock);
+
+	if (old_pwd) {
+		dput(old_pwd);
+		mntput(old_pwdmnt);
+	}
+}
+
+
+int rst_restore_fs(struct cpt_context *ctx)
+{
+	loff_t pos;
+	cpt_object_t *obj;
+	int err = 0;
+
+	for_each_object(obj, CPT_OBJ_FS) {
+		struct cpt_fs_struct_image fi;
+		struct fs_struct *fs = obj->o_obj;
+		int i;
+		struct dentry *d[3];
+		struct vfsmount *m[3];
+
+		err = rst_get_object(CPT_OBJ_FS, obj->o_pos, &fi, ctx);
+		if (err)
+			return err;
+
+		fs->umask = fi.cpt_umask;
+
+		pos = obj->o_pos + fi.cpt_hdrlen;
+		d[0] = d[1] = d[2] = NULL;
+		m[0] = m[1] = m[2] = NULL;
+		i = 0;
+		while (pos < obj->o_pos + fi.cpt_next && i<3) {
+			err = cpt_get_dentry(d+i, m+i, &pos, ctx);
+			if (err) {
+				eprintk_ctx("cannot get_dir: %d", err);
+				for (--i; i >= 0; i--) {
+					if (d[i])
+						dput(d[i]);
+					if (m[i])
+						mntput(m[i]);
+				}
+				return err;
+			}
+			i++;
+		}
+		if (d[0])
+			__set_fs_root(fs, m[0], d[0]);
+		if (d[1])
+			__set_fs_pwd(fs, m[1], d[1]);
+		if (d[2]) {
+			struct dentry *olddentry;
+			struct vfsmount *oldmnt;
+			write_lock(&fs->lock);
+			oldmnt = fs->altrootmnt;
+			olddentry = fs->altroot;
+			fs->altrootmnt = m[2];
+			fs->altroot = d[2];
+			write_unlock(&fs->lock);
+
+			if (olddentry) {
+				dput(olddentry);
+				mntput(oldmnt);
+			}
+		}
+	}
+	return err;
+}
+
+int rst_path_lookup_at(struct vfsmount *mnt, struct dentry *dentry,
+		const char *path, unsigned int flags, struct nameidata *nd)
+{
+	nd->flags = flags;
+	nd->last_type = LAST_ROOT;
+	nd->depth = 0;
+	nd->dentry = dget(dentry);
+	nd->mnt = mntget(mnt);
+
+	return path_walk(path, nd);
+}
+
+int rst_path_lookup(cpt_object_t *mntobj, const char *path,
+		unsigned int flags, struct nameidata *nd)
+{
+	struct vfsmount *mnt;
+
+	if (!mntobj)
+		return path_lookup(path, flags, nd);
+
+	if (strlen(path) < mntobj->o_lock) {
+		eprintk("path %s to short for mnt pos:%lu len:%d\n",
+				path, (unsigned long)mntobj->o_pos, mntobj->o_lock);
+		return -EINVAL;
+	}
+
+	mnt = mntobj->o_obj;
+	return rst_path_lookup_at(mnt, mnt->mnt_root,
+			path + mntobj->o_lock, flags | LOOKUP_DIVE, nd);
+}
+
+void rst_finish_vfsmount_ref(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_VFSMOUNT_REF)
+		mntput(obj->o_obj);
+}
+
+struct vfsmount *rst_kern_mount(const char *fstype, int flags,
+		const char *name, void *data)
+{
+	struct file_system_type *type = get_fs_type(fstype);
+	struct vfsmount *mnt;
+	if (!type)
+		return ERR_PTR(-ENODEV);
+	mnt = vfs_kern_mount(type, flags, name, data);
+	put_filesystem(type);
+	return mnt;
+}
+
+static int undumptmpfs(void *arg)
+{
+	int i;
+	int *pfd = arg;
+	int fd1, fd2, err;
+	char *argv[] = { "tar", "x", "-C", "/", "-S", NULL };
+
+	if (pfd[0] != 0)
+		sc_dup2(pfd[0], 0);
+
+	set_fs(KERNEL_DS);
+	fd1 = sc_open("/dev/null", O_WRONLY, 0);
+	fd2 = sc_open("/dev/null", O_WRONLY, 0);
+try:
+	if (fd1 < 0 || fd2 < 0) {
+		if (fd1 == -ENOENT && fd2 == -ENOENT) {
+			err = sc_mknod("/dev/null", S_IFCHR|0666,
+					new_encode_dev((MEM_MAJOR<<MINORBITS)|3));
+			if (err < 0) {
+				eprintk("can't create /dev/null: %d\n", err);
+				module_put(THIS_MODULE);
+				return 255 << 8;
+			}
+			fd1 = sc_open("/dev/null", O_WRONLY, 0666);
+			fd2 = sc_open("/dev/null", O_WRONLY, 0666);
+			sc_unlink("/dev/null");
+			goto try;
+		}
+		eprintk("can not open /dev/null for tar: %d %d\n", fd1, fd2);
+		module_put(THIS_MODULE);
+		return 255 << 8;
+	}
+	if (fd1 != 1)
+		sc_dup2(fd1, 1);
+	if (fd2 != 2)
+		sc_dup2(fd2, 2);
+
+	for (i = 3; i < current->files->fdt->max_fds; i++)
+		sc_close(i);
+
+	module_put(THIS_MODULE);
+
+	i = sc_execve("/bin/tar", argv, NULL);
+	eprintk("failed to exec /bin/tar: %d\n", i);
+	return 255 << 8;
+}
+
+static int rst_restore_tmpfs(loff_t *pos, struct cpt_context * ctx)
+{
+	int err;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	int n;
+	loff_t end;
+	int pid;
+	int status;
+	mm_segment_t oldfs;
+	sigset_t ignore, blocked;
+
+	err = rst_get_object(CPT_OBJ_NAME, *pos, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	ignore.sig[0] = CPT_SIG_IGNORE_MASK;
+	sigprocmask(SIG_BLOCK, &ignore, &blocked);
+	pid = err = local_kernel_thread(undumptmpfs, (void*)pfd, SIGCHLD, 0);
+	if (err < 0) {
+		eprintk_ctx("tmpfs local_kernel_thread: %d\n", err);
+		goto out;
+	}
+	f = fget(pfd[1]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	ctx->file->f_pos = *pos + v.cpt_hdrlen;
+	end = *pos + v.cpt_next;
+	*pos += v.cpt_next;
+	do {
+		char buf[16];
+
+		n = end - ctx->file->f_pos;
+		if (n > sizeof(buf))
+			n = sizeof(buf);
+
+		if (ctx->read(buf, n, ctx))
+			break;
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		f->f_op->write(f, buf, n, &f->f_pos);
+		set_fs(oldfs);
+	} while (ctx->file->f_pos < end);
+
+	fput(f);
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if ((err = sc_waitx(pid, 0, &status)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+	else if ((status & 0x7f) == 0) {
+		err = (status & 0xff00) >> 8;
+		if (err != 0) {
+			eprintk_ctx("tar exited with %d\n", err);
+			err = -EINVAL;
+		}
+	} else {
+		eprintk_ctx("tar terminated\n");
+		err = -EINVAL;
+	}
+	set_fs(oldfs);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+
+	return err;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+	return err;
+}
+
+struct vfsmount *rst_lookup_ext_mount(char *mntpnt, char *mnttype, struct cpt_context *ctx)
+{
+	struct namespace *n = current->nsproxy->namespace;
+	struct list_head *p;
+	struct vfsmount *t, *mnt;
+	char *path, *path_buf;
+
+	mnt = ERR_PTR(-ENOENT);
+	path_buf = cpt_get_buf(ctx);
+	down_read(&namespace_sem);
+	list_for_each(p, &n->list) {
+		t = list_entry(p, struct vfsmount, mnt_list);
+		path = d_path(t->mnt_root, t, path_buf, PAGE_SIZE);
+		if (IS_ERR(path))
+			continue;
+		if (!strcmp(path, mntpnt) &&
+		    !strcmp(t->mnt_sb->s_type->name, mnttype)) {
+			mnt = mntget(t);
+			break;
+		}
+	}
+	up_read(&namespace_sem);
+	__cpt_release_buf(ctx);
+	return mnt;
+}
+
+static cpt_object_t *rst_add_vfsmount(struct vfsmount *mnt, char *path,
+		loff_t obj_pos, unsigned int obj_flags, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = cpt_object_add(CPT_OBJ_VFSMOUNT_REF, mnt, ctx);
+	if (!obj) {
+		mntput(mnt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (obj->o_count > 1) {
+		eprintk_ctx("duplicate vfsmount at %s\n", path);
+		mntput(mnt);
+	}
+
+	obj->o_lock = strlen(path);
+	cpt_obj_setpos(obj, obj_pos, ctx);
+	obj->o_flags = obj_flags;
+
+	return obj;
+}
+
+static int rst_restore_vfsmount(struct vfsmount *mnt, char *path,
+		loff_t mntpos, int mnt_flags, int add_flags, cpt_context_t *ctx)
+{
+	struct nameidata nd;
+	int ret;
+	cpt_object_t *mntobj;
+
+	mntobj = rst_add_vfsmount(mnt, path, mntpos, add_flags, ctx);
+	if (IS_ERR(mntobj))
+		return PTR_ERR(mntobj);
+
+	ret = path_lookup(path, LOOKUP_FOLLOW, &nd);
+	if (ret) {
+		eprintk_ctx("Failed ot lookup path '%s'\n", path);
+		return ret;
+	}
+	ret = do_add_mount(mntget(mnt), &nd, mnt_flags | MNT_CPT, NULL);
+	path_release(&nd);
+	return ret;
+}
+
+int restore_one_vfsmount(struct cpt_vfsmount_image *mi, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t endpos;
+	loff_t mntpos = pos;
+	struct vfsmount *mnt;
+	cpt_object_t *mntobj, *bindobj;
+
+	endpos = pos + mi->cpt_next;
+	pos += mi->cpt_hdrlen;
+
+	while (pos < endpos) {
+		char *mntdev;
+		char *mntpnt;
+		char *mnttype;
+		char *mntbind;
+		char *mntdata;
+
+		mntdev = __rst_get_name(&pos, ctx);
+		mntpnt = __rst_get_name(&pos, ctx);
+		mnttype = __rst_get_name(&pos, ctx);
+
+		mntbind = NULL;
+		if (mi->cpt_mntflags & CPT_MNT_BIND)
+			mntbind = __rst_get_name(&pos, ctx);
+		if (mntbind && (strcmp(mntbind, "/") == 0 || strcmp(mntbind, "") == 0)) {
+			rst_put_name(mntbind, ctx);
+			mntbind = NULL;
+		}
+		if (mntbind)
+			mi->cpt_flags |= MS_BIND;
+
+		mntdata = NULL;
+		if (mi->cpt_mntflags & CPT_MNT_DELAYFS)
+			mntdata = __rst_get_name(&pos, ctx);
+
+		bindobj = NULL;
+		if (cpt_object_has(mi, cpt_mnt_bind) &&
+				mi->cpt_mnt_bind != CPT_NULL) {
+			bindobj = lookup_cpt_obj_bypos(CPT_OBJ_VFSMOUNT_REF,
+					mi->cpt_mnt_bind, ctx);
+			if (!bindobj) {
+				eprintk_ctx("bind mount source not found: %s\n",
+						mntbind);
+				err = -ENODEV;
+				goto out_err;
+			}
+		}
+
+		err = -EINVAL;
+		if (mnttype && mntpnt) {
+			err = 0;
+			if (mi->cpt_mntflags & CPT_MNT_DELAYFS) {
+				mnt = rst_mount_delayfs(mnttype, mi->cpt_flags,
+						mntdev, mntdata, ctx);
+				err = PTR_ERR(mnt);
+				if (IS_ERR(mnt))
+					goto out_err;
+
+				err = rst_restore_vfsmount(mnt, mntpnt, mntpos,
+						mi->cpt_mntflags,
+						CPT_VFSMOUNT_DELAYFS, ctx);
+			} else if (mi->cpt_mntflags & CPT_MNT_EXT) {
+				mnt = rst_lookup_ext_mount(mntpnt, mnttype, ctx);
+				if (IS_ERR(mnt)) {
+					err = PTR_ERR(mnt);
+					eprintk_ctx("mount point is missing: %s\n", mntpnt);
+					goto out_err;
+				}
+
+				mntobj = rst_add_vfsmount(mnt, mntpnt,
+						mntpos, 0, ctx);
+				if (IS_ERR(mntobj))
+					err = PTR_ERR(mntobj);
+			} else if (!strcmp(mntpnt, "/")) {
+				/* non-external root-mount. skip it. */
+			} else if (mi->cpt_mntflags & CPT_MNT_BIND) {
+				struct nameidata nd;
+
+				err = rst_path_lookup(bindobj, mntbind,
+						LOOKUP_FOLLOW, &nd);
+				if (err)
+					goto out_err;
+
+				mnt = vfs_bind_mount(nd.mnt, nd.dentry);
+				path_release(&nd);
+				err = PTR_ERR(mnt);
+				if (IS_ERR(mnt))
+					goto out_err;
+
+				err = rst_restore_vfsmount(mnt, mntpnt, mntpos,
+						mi->cpt_mntflags, 0, ctx);
+			} else {
+				mnt = rst_kern_mount(mnttype, mi->cpt_flags,
+						mntdev, NULL);
+				err = PTR_ERR(mnt);
+				if (IS_ERR(mnt))
+					goto out_err;
+
+				err = rst_restore_vfsmount(mnt, mntpnt, mntpos,
+						mi->cpt_mntflags, 0, ctx);
+				if (err)
+					goto out_err;
+
+				if (!strcmp(mnttype, "tmpfs"))
+					err = rst_restore_tmpfs(&pos, ctx);
+			}
+		}
+out_err:
+		if (mntdev)
+			rst_put_name(mntdev, ctx);
+		if (mntpnt)
+			rst_put_name(mntpnt, ctx);
+		if (mnttype)
+			rst_put_name(mnttype, ctx);
+		if (mntbind)
+			rst_put_name(mntbind, ctx);
+		if (mntdata)
+			rst_put_name(mntdata, ctx);
+		if (err) {
+			eprintk_ctx("Failed to restore mount point: dev '%s', "
+					"type '%s', path '%s'\n",
+					mntdev, mnttype, mntpnt);
+			return err;
+		}
+	}
+	return 0;
+}
+
+int restore_one_namespace(loff_t pos, loff_t endpos, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_vfsmount_image mi;
+
+	while (pos < endpos) {
+		err = rst_get_object(CPT_OBJ_VFSMOUNT, pos, &mi, ctx);
+		if (err)
+			return err;
+		err = restore_one_vfsmount(&mi, pos, ctx);
+		if (err)
+			return err;
+		pos += mi.cpt_next;
+	}
+	return 0;
+}
+
+int rst_root_namespace(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NAMESPACE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr sbuf;
+	int done = 0;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NAMESPACE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		err = rst_get_object(CPT_OBJ_NAMESPACE, sec, &sbuf, ctx);
+		if (err)
+			return err;
+		if (done) {
+			eprintk_ctx("multiple namespaces are not supported\n");
+			break;
+		}
+		done++;
+		err = restore_one_namespace(sec+sbuf.cpt_hdrlen, sec+sbuf.cpt_next, ctx);
+		if (err)
+			return err;
+		sec += sbuf.cpt_next;
+	}
+
+	return 0;
+}
+
+int rst_stray_files(struct cpt_context *ctx)
+{
+	int err = 0;
+	loff_t sec = ctx->sections[CPT_SECT_FILES];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_FILES || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_object_hdr sbuf;
+		cpt_object_t *obj;
+
+		err = _rst_get_object(CPT_OBJ_FILE, sec, &sbuf, sizeof(sbuf), ctx);
+		if (err)
+			break;
+
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, sec, ctx);
+		if (!obj) {
+			struct file *file;
+
+			dprintk_ctx("stray file %Ld\n", sec);
+
+			file = rst_sysv_shm(sec, ctx);
+
+			if (IS_ERR(file)) {
+				eprintk_ctx("rst_stray_files: %ld\n", PTR_ERR(file));
+				return PTR_ERR(file);
+			} else {
+				fput(file);
+			}
+		}
+		sec += sbuf.cpt_next;
+	}
+
+	return err;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_inotify.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_inotify.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_inotify.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_inotify.c	2017-01-13 08:40:25.000000000 -0500
@@ -0,0 +1,199 @@
+/*
+ *
+ *  kernel/cpt/rst_inotify.c
+ *
+ *  Copyright (C) 2000-2007  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/inotify.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+extern struct file_operations inotify_fops;
+
+struct file *rst_open_inotify(struct cpt_file_image *fi,
+			      unsigned flags,
+			      struct cpt_context *ctx)
+{
+	struct file *file;
+	int fd;
+
+	fd = sys_inotify_init();
+	if (fd < 0)
+		return ERR_PTR(fd);
+
+	file = fget(fd);
+	sys_close(fd);
+	return file;
+}
+
+static int restore_one_inotify(cpt_object_t *obj,
+			       loff_t pos,
+			       struct cpt_inotify_image *ibuf,
+			       cpt_context_t *ctx)
+{
+	int err = 0;
+	loff_t endpos;
+	struct file *file = obj->o_obj;
+	struct inotify_device *dev;
+
+	if (file->f_op != &inotify_fops) {
+		eprintk_ctx("bad inotify file\n");
+		return -EINVAL;
+	}
+
+	dev = file->private_data;
+
+	if (unlikely(dev == NULL)) {
+		eprintk_ctx("bad inotify device\n");
+		return -EINVAL;
+	}
+
+	endpos = pos + ibuf->cpt_next;
+	pos += ibuf->cpt_hdrlen;
+	while (pos < endpos) {
+		union {
+			struct cpt_inotify_wd_image wi;
+			struct cpt_inotify_ev_image ei;
+		} u;
+
+		err = rst_get_object(-1, pos, &u, ctx);
+		if (err) {
+			eprintk_ctx("rst_get_object: %d\n", err);
+			return err;
+		}
+		if (u.wi.cpt_object == CPT_OBJ_INOTIFY_WATCH) {
+			struct dentry *d;
+			struct vfsmount *mnt;
+			loff_t fpos = pos + u.wi.cpt_hdrlen;
+
+			err = cpt_get_dentry(&d, &mnt, &fpos, ctx);
+			if (err) {
+				eprintk_ctx("cpt_get_dentry: %d\n", err);
+				return err;
+			}
+
+			mutex_lock(&dev->up_mutex);
+			dev->ih->last_wd = u.wi.cpt_wd - 1;
+			err = inotify_create_watch(dev, d, mnt, u.wi.cpt_mask);
+			dev->ih->last_wd = ibuf->cpt_last_wd;
+			if (err != u.wi.cpt_wd) {
+				eprintk_ctx("wrong inotify descriptor %u %u\n", err, u.wi.cpt_wd);
+				if (err >= 0)
+					err = -EINVAL;
+			} else
+				err = 0;
+			mutex_unlock(&dev->up_mutex);
+			dput(d);
+			mntput(mnt);
+			if (err)
+				break;
+		} else if (u.wi.cpt_object == CPT_OBJ_INOTIFY_EVENT) {
+			struct inotify_user_watch dummy_watch;
+			struct inotify_watch *w;
+			char *name = NULL;
+
+			if (u.ei.cpt_namelen) {
+				name = kmalloc(u.ei.cpt_namelen+1, GFP_KERNEL);
+				if (name == NULL) {
+					err = -ENOMEM;
+					break;
+				}
+				name[u.ei.cpt_namelen] = 0;
+				err = ctx->pread(name, u.ei.cpt_namelen, ctx, pos + u.ei.cpt_hdrlen);
+				if (err) {
+					kfree(name);
+					break;
+				}
+			}
+
+			w = &dummy_watch.wdata;
+			dummy_watch.dev = dev;
+			atomic_set(&w->count, 2);
+
+			/* Trick to avoid destruction due to exit event */
+			if (u.ei.cpt_mask & (IN_IGNORED | IN_ONESHOT))
+				atomic_inc(&w->count);
+			dev->ih->in_ops->handle_event(w, u.ei.cpt_wd, u.ei.cpt_mask,
+						      u.ei.cpt_cookie, name, NULL);
+			if (name)
+				kfree(name);
+		} else {
+			eprintk_ctx("bad object: %u\n", u.wi.cpt_object);
+			err = -EINVAL;
+			break;
+		}
+		pos += u.wi.cpt_next;
+	}
+	return err;
+}
+
+int rst_inotify(cpt_context_t *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_INOTIFY];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_INOTIFY || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		cpt_object_t *obj;
+		struct cpt_inotify_image ibuf;
+
+		err = rst_get_object(CPT_OBJ_INOTIFY, sec, &ibuf, ctx);
+		if (err)
+			return err;
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, ibuf.cpt_file, ctx);
+		if (obj == NULL) {
+			eprintk_ctx("cannot find inotify file object\n");
+			return -EINVAL;
+		}
+		err = restore_one_inotify(obj, sec, &ibuf, ctx);
+		if (err)
+			return err;
+		sec += ibuf.cpt_next;
+	}
+
+	return 0;
+	
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_mm.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_mm.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_mm.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_mm.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,1181 @@
+/*
+ *
+ *  kernel/cpt/rst_mm.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <linux/hugetlb.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/mman.h>
+#include <linux/vmalloc.h>
+#include <linux/rmap.h>
+#include <linux/hash.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#ifdef CONFIG_X86
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#endif
+#include <asm/mmu_context.h>
+#include <linux/swapops.h>
+#include <linux/cpt_image.h>
+
+#ifdef CONFIG_VE
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+#endif
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_ubc.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+#include "cpt_pagein.h"
+#endif
+
+#include "cpt_syscalls.h"
+
+#define __PAGE_NX (1ULL<<63)
+
+static unsigned long make_prot(struct cpt_vma_image *vmai)
+{
+	unsigned long prot = 0;
+
+	if (vmai->cpt_flags&VM_READ)
+		prot |= PROT_READ;
+	if (vmai->cpt_flags&VM_WRITE)
+		prot |= PROT_WRITE;
+	if (vmai->cpt_flags&VM_EXEC)
+		prot |= PROT_EXEC;
+	if (vmai->cpt_flags&VM_GROWSDOWN)
+		prot |= PROT_GROWSDOWN;
+	if (vmai->cpt_flags&VM_GROWSUP)
+		prot |= PROT_GROWSUP;
+	return prot;
+}
+
+static unsigned long make_flags(struct cpt_vma_image *vmai)
+{
+	unsigned long flags = MAP_FIXED;
+
+	if (vmai->cpt_flags&(VM_SHARED|VM_MAYSHARE))
+		flags |= MAP_SHARED;
+	else
+		flags |= MAP_PRIVATE;
+
+	if (vmai->cpt_file == CPT_NULL)
+		flags |= MAP_ANONYMOUS;
+	if (vmai->cpt_flags&VM_GROWSDOWN)
+		flags |= MAP_GROWSDOWN;
+#ifdef MAP_GROWSUP
+	if (vmai->cpt_flags&VM_GROWSUP)
+		flags |= MAP_GROWSUP;
+#endif
+	if (vmai->cpt_flags&VM_DENYWRITE)
+		flags |= MAP_DENYWRITE;
+	if (vmai->cpt_flags&VM_EXECUTABLE)
+		flags |= MAP_EXECUTABLE;
+	if (!(vmai->cpt_flags&VM_ACCOUNT))
+		flags |= MAP_NORESERVE;
+	return flags;
+}
+
+#ifdef CONFIG_X86
+#if !defined(CONFIG_X86_64) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) \
+				&& !defined(CONFIG_XEN)
+static int __alloc_ldt(mm_context_t *pc, int mincount)
+{
+	int oldsize, newsize, nr;
+
+	if (mincount <= pc->size)
+		return 0;
+	/*
+	 * LDT got larger - reallocate if necessary.
+	 */
+	oldsize = pc->size;
+	mincount = (mincount+511)&(~511);
+	newsize = mincount*LDT_ENTRY_SIZE;
+	for (nr = 0; nr * PAGE_SIZE < newsize; nr++) {
+		BUG_ON(nr * PAGE_SIZE >= 64*1024);
+		if (!pc->ldt_pages[nr]) {
+			pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER|__GFP_UBC);
+			if (!pc->ldt_pages[nr])
+				goto nomem;
+			clear_highpage(pc->ldt_pages[nr]);
+		}
+	}
+	pc->size = mincount;
+	return 0;
+
+nomem:
+	while (--nr >= 0)
+		__free_page(pc->ldt_pages[nr]);
+	pc->size = 0;
+	return -ENOMEM;
+}
+
+static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = current->mm;
+	int i;
+	int err;
+	int size;
+
+	err = __alloc_ldt(&mm->context, li->cpt_size/LDT_ENTRY_SIZE);
+	if (err)
+		return err;
+
+	size = mm->context.size*LDT_ENTRY_SIZE;
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		int nr = i / PAGE_SIZE, bytes;
+		char *kaddr = kmap(mm->context.ldt_pages[nr]);
+
+		bytes = size - i;
+		if (bytes > PAGE_SIZE)
+			bytes = PAGE_SIZE;
+		err = ctx->pread(kaddr, bytes, ctx, pos + li->cpt_hdrlen + i);
+		kunmap(mm->context.ldt_pages[nr]);
+		if (err)
+			return err;
+	}
+
+	load_LDT(&mm->context);
+	return 0;
+}
+
+#else
+
+static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = current->mm;
+	int oldsize = mm->context.size;
+	void *oldldt;
+	void *newldt;
+	int err;
+
+	if (li->cpt_size > PAGE_SIZE)
+		newldt = ub_vmalloc(li->cpt_size);
+	else
+		newldt = ub_kmalloc(li->cpt_size, GFP_KERNEL);
+
+	if (!newldt)
+		return -ENOMEM;
+
+	err = ctx->pread(newldt, li->cpt_size, ctx, pos + li->cpt_hdrlen);
+	if (err) {
+		if (li->cpt_size > PAGE_SIZE)
+			vfree(newldt);
+		else
+			kfree(newldt);
+		return err;
+	}
+
+	oldldt = mm->context.ldt;
+	mm->context.ldt = newldt;
+	mm->context.size = li->cpt_size/LDT_ENTRY_SIZE;
+
+	load_LDT(&mm->context);
+
+	if (oldsize) {
+		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+			vfree(oldldt);
+		else
+			kfree(oldldt);
+	}
+	return 0;
+}
+#endif
+#endif
+
+static int
+restore_aio_ring(struct kioctx *aio_ctx, struct cpt_aio_ctx_image *aimg)
+{
+	struct aio_ring_info *info = &aio_ctx->ring_info;
+	unsigned nr_events = aio_ctx->max_reqs;
+	unsigned long size;
+	int nr_pages;
+
+	/* We recalculate parameters of the ring exactly like
+	 * fs/aio.c does and then compare calculated values
+	 * with ones, stored in dump. They must be the same. */
+
+	nr_events += 2;
+
+	size = sizeof(struct aio_ring);
+	size += sizeof(struct io_event) * nr_events;
+	nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT;
+
+	if (nr_pages != aimg->cpt_ring_pages)
+		return -EINVAL;
+
+	info->nr_pages = nr_pages;
+
+	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
+
+	if (nr_events != aimg->cpt_nr)
+		return -EINVAL;
+
+	info->nr = 0;
+	info->ring_pages = info->internal_pages;
+	if (nr_pages > AIO_RING_PAGES) {
+		info->ring_pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
+		if (!info->ring_pages)
+			return -ENOMEM;
+		memset(info->ring_pages, 0, sizeof(struct page *) * nr_pages);
+	}
+
+	info->mmap_size = nr_pages * PAGE_SIZE;
+
+	/* This piece of shit is not entirely my fault. Kernel aio.c makes
+	 * something odd mmap()ping some pages and then pinning them.
+	 * I guess it is just some mud remained of failed attempt to show ring
+	 * to user space. The result is odd. :-) Immediately after
+	 * creation of AIO context, kernel shares those pages with user
+	 * and user can read and even write there. But after the first
+	 * fork, pages are marked COW with evident consequences.
+	 * I remember, I did the same mistake in the first version
+	 * of mmapped packet socket, luckily that crap never reached
+	 * mainstream.
+	 *
+	 * So, what are we going to do? I can simulate this odd behaviour
+	 * exactly, but I am not insane yet. For now just take the pages
+	 * from user space. Alternatively, we could keep kernel copy
+	 * in AIO context image, which would be more correct.
+	 *
+	 * What is wrong now? If the pages are COWed, ring is transferred
+	 * incorrectly.
+	 */
+	down_read(&current->mm->mmap_sem);
+	info->mmap_base = aimg->cpt_mmap_base;
+	info->nr_pages = get_user_pages(current, current->mm,
+					info->mmap_base, nr_pages, 
+					1, 0, info->ring_pages, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (unlikely(info->nr_pages != nr_pages)) {
+		int i;
+
+		for (i=0; i<info->nr_pages; i++)
+			put_page(info->ring_pages[i]);
+		if (info->ring_pages && info->ring_pages != info->internal_pages)
+			kfree(info->ring_pages);
+		return -EFAULT;
+	}
+
+	aio_ctx->user_id = info->mmap_base;
+
+	info->nr = nr_events;
+	info->tail = aimg->cpt_tail;
+
+	return 0;
+}
+
+static int do_rst_aio(struct cpt_aio_ctx_image *aimg, loff_t pos, cpt_context_t *ctx)
+{
+	int err;
+	struct kioctx *aio_ctx;
+	extern spinlock_t aio_nr_lock;
+
+	aio_ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL);
+	if (!aio_ctx)
+		return -ENOMEM;
+
+	memset(aio_ctx, 0, sizeof(*aio_ctx));
+	aio_ctx->max_reqs = aimg->cpt_max_reqs;
+
+	if ((err = restore_aio_ring(aio_ctx, aimg)) < 0) {
+		kmem_cache_free(kioctx_cachep, aio_ctx);
+		eprintk_ctx("AIO %Ld restore_aio_ring: %d\n", pos, err);
+		return err;
+	}
+
+	aio_ctx->mm = current->mm;
+	atomic_inc(&aio_ctx->mm->mm_count);
+	atomic_set(&aio_ctx->users, 1);
+	spin_lock_init(&aio_ctx->ctx_lock);
+	spin_lock_init(&aio_ctx->ring_info.ring_lock);
+	init_waitqueue_head(&aio_ctx->wait);
+	INIT_LIST_HEAD(&aio_ctx->active_reqs);
+	INIT_LIST_HEAD(&aio_ctx->run_list);
+	INIT_WORK(&aio_ctx->wq, aio_kick_handler, ctx);
+
+	spin_lock(&aio_nr_lock);
+	aio_nr += aio_ctx->max_reqs;
+	spin_unlock(&aio_nr_lock);
+
+	write_lock(&aio_ctx->mm->ioctx_list_lock);
+	aio_ctx->next = aio_ctx->mm->ioctx_list;
+	aio_ctx->mm->ioctx_list = aio_ctx;
+	write_unlock(&aio_ctx->mm->ioctx_list_lock);
+
+	return 0;
+}
+
+struct anonvma_map
+{
+	struct hlist_node	list;
+	struct anon_vma		*avma;
+	__u64			id;
+};
+
+static int verify_create_anonvma(struct mm_struct *mm,
+				 struct cpt_vma_image *vmai,
+				 cpt_context_t *ctx)
+{
+	struct anon_vma *avma = NULL;
+	struct anon_vma *new_avma;
+	struct vm_area_struct *vma;
+	int h;
+
+	if (!ctx->anonvmas) {
+		if (CPT_ANONVMA_HSIZE*sizeof(struct hlist_head) > PAGE_SIZE)
+			return -EINVAL;
+		if ((ctx->anonvmas = (void*)__get_free_page(GFP_KERNEL)) == NULL)
+			return -ENOMEM;
+		for (h = 0; h < CPT_ANONVMA_HSIZE; h++)
+			INIT_HLIST_HEAD(&ctx->anonvmas[h]);
+	} else {
+		struct anonvma_map *map;
+		struct hlist_node *elem;
+
+		h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
+		hlist_for_each_entry(map, elem, &ctx->anonvmas[h], list) {
+			if (map->id == vmai->cpt_anonvmaid) {
+				avma = map->avma;
+				break;
+			}
+		}
+	}
+
+	down_read(&mm->mmap_sem);
+	if ((vma = find_vma(mm, vmai->cpt_start)) == NULL) {
+		up_read(&mm->mmap_sem);
+		return -ESRCH;
+	}
+	if (vma->vm_start != vmai->cpt_start) {
+		up_read(&mm->mmap_sem);
+		eprintk_ctx("vma start mismatch\n");
+		return -EINVAL;
+	}
+	if (vma->vm_pgoff != vmai->cpt_pgoff) { 
+		dprintk_ctx("vma pgoff mismatch, fixing\n");
+		if (vma->vm_file || (vma->vm_flags&(VM_SHARED|VM_MAYSHARE))) {
+			eprintk_ctx("cannot fixup vma pgoff\n");
+			up_read(&mm->mmap_sem);	
+			return -EINVAL;
+		}
+		vma->vm_pgoff = vmai->cpt_pgoff;
+	}
+
+	if (!vma->anon_vma) {
+		if (avma) {
+			vma->anon_vma = avma;
+			anon_vma_link(vma);
+		} else {
+			int err;
+
+			err = anon_vma_prepare(vma);
+
+			if (err) {
+				up_read(&mm->mmap_sem);
+				return err;
+			}
+		}
+	} else {
+		/* Note, we _can_ arrive to the situation, when two
+		 * different anonvmaid's point to one anon_vma, this happens
+		 * f.e. when mmap() merged new area to previous one and
+		 * they will share one anon_vma even if they did not on
+		 * original host.
+		 *
+		 * IT IS OK. To all that I understand, we may merge all
+		 * the anon_vma's and rmap can scan all the huge list of vmas
+		 * searching for page. It is just "suboptimal".
+		 *
+		 * Real disaster would happen, if vma already got an anon_vma
+		 * with different id. It is very rare case, kernel does the
+		 * best efforts to merge anon_vmas when some attributes are
+		 * different. In this case we will fall to copying memory.
+		 */
+		if (avma && vma->anon_vma != avma) {
+			up_read(&mm->mmap_sem);
+			wprintk_ctx("anon_vma mismatch\n");
+			return 0;
+		}
+	}
+
+	new_avma = vma->anon_vma;
+	up_read(&mm->mmap_sem);
+
+	if (!avma) {
+		struct anonvma_map *map;
+
+		if (!new_avma)
+			return -EINVAL;
+
+		if ((map = kmalloc(sizeof(*map), GFP_KERNEL)) == NULL)
+			return -ENOMEM;
+
+		map->id = vmai->cpt_anonvmaid;
+		map->avma = new_avma;
+		h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
+		hlist_add_head(&map->list, &ctx->anonvmas[h]);
+	}
+	return 0;
+}
+
+static int copy_mm_pages(struct mm_struct *src, unsigned long start,
+			 unsigned long end)
+{
+	int err;
+
+	for (; start < end; start += PAGE_SIZE) {
+		struct page *page;
+		struct page *spage;
+		void *maddr, *srcaddr;
+
+		err = get_user_pages(current, current->mm,
+				     start, 1, 1, 1, &page, NULL);
+		if (err == 0)
+			err = -EFAULT;
+		if (err < 0)
+			return err;
+
+		err = get_user_pages(current, src,
+				     start, 1, 0, 1, &spage, NULL);
+
+		if (err == 0)
+			err = -EFAULT;
+		if (err < 0) {
+			page_cache_release(page);
+			return err;
+		}
+
+		srcaddr = kmap(spage);
+		maddr = kmap(page);
+		memcpy(maddr, srcaddr, PAGE_SIZE);
+		set_page_dirty_lock(page);
+		kunmap(page);
+		kunmap(spage);
+		page_cache_release(page);
+		page_cache_release(spage);
+	}
+	return 0;
+}
+
+#ifdef CONFIG_X86_32
+static inline int cpt_setup_vdso(unsigned long addr, int unused)
+{
+	return arch_setup_additional_pages(NULL, 0, addr);
+}
+#endif
+
+#ifdef CONFIG_X86_64
+extern int syscall32_setup_pages(struct linux_binprm *bprm, int exstack,
+				       unsigned long start_code, 
+				       unsigned long interp_map_address,
+				       unsigned long map_address);
+
+static int cpt_setup_vdso(unsigned long addr, int is64bit)
+{
+	if (is64bit)
+		return arch_setup_additional_pages(NULL, 0, addr);
+	else
+		return syscall32_setup_pages(NULL, 0, 0, 0, addr);
+}
+#endif
+
+static int do_rst_vma(struct cpt_vma_image *vmai, int is64bit,
+		loff_t vmapos, loff_t mmpos, struct cpt_context *ctx)
+{
+	int err = 0;
+	unsigned long addr;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct file *file = NULL;
+	unsigned long prot;
+	int checked = 0;
+
+	if (vmai->cpt_type == CPT_VMA_VDSO) {
+		if (ctx->vdso == NULL
+#ifdef CONFIG_X86_64
+				      || !test_thread_flag(TIF_IA32)
+#endif
+				      				    ) {
+			err = cpt_setup_vdso(vmai->cpt_start, is64bit);
+			goto out;
+		}
+	}
+
+	prot = make_prot(vmai);
+
+	if (vmai->cpt_file != CPT_NULL) {
+		if (vmai->cpt_type == CPT_VMA_TYPE_0) {
+			file = rst_file(vmai->cpt_file, -1, ctx);
+			if (IS_ERR(file)) {
+				eprintk_ctx("do_rst_vma: rst_file: %Ld\n", (unsigned long long)vmai->cpt_file);
+				return PTR_ERR(file);
+			}
+		} else if (vmai->cpt_type == CPT_VMA_TYPE_SHM) {
+			file = rst_sysv_shm(vmai->cpt_file, ctx);
+			if (IS_ERR(file))
+				return PTR_ERR(file);
+		}
+	}
+
+	down_write(&mm->mmap_sem);
+	addr = do_mmap_pgoff(file, vmai->cpt_start,
+			     vmai->cpt_end-vmai->cpt_start,
+			     prot, make_flags(vmai),
+			     vmai->cpt_pgoff);
+
+	if (addr != vmai->cpt_start) {
+		up_write(&mm->mmap_sem);
+
+		err = -EINVAL;
+		if (IS_ERR((void*)addr))
+			err = addr;
+		goto out;
+	}
+
+	vma = find_vma(mm, vmai->cpt_start);
+	if (vma == NULL) {
+		up_write(&mm->mmap_sem);
+		eprintk_ctx("cannot find mmapped vma\n");
+		err = -ESRCH;
+		goto out;
+	}
+
+	/* do_mmap_pgoff() can merge new area to previous one (not to the next,
+	 * we mmap in order, the rest of mm is still unmapped). This can happen
+	 * f.e. if flags are to be adjusted later, or if we had different
+	 * anon_vma on two adjacent regions. Split it by brute force. */
+	if (vma->vm_start != vmai->cpt_start) {
+		dprintk_ctx("vma %Ld merged, split\n", vmapos);
+		err = split_vma(mm, vma, (unsigned long)vmai->cpt_start, 0);
+		if (err) {
+			up_write(&mm->mmap_sem);
+			eprintk_ctx("cannot split vma\n");
+			goto out;
+		}
+	}
+	up_write(&mm->mmap_sem);
+
+	if (vmai->cpt_anonvma && vmai->cpt_anonvmaid) {
+		err = verify_create_anonvma(mm, vmai, ctx);
+		if (err) {
+			eprintk_ctx("cannot verify_create_anonvma %Ld\n", vmapos);
+			goto out;
+		}
+	}
+
+	if (vmai->cpt_type == CPT_VMA_VDSO) {
+		struct page *page;
+		void *maddr;
+
+		err = get_user_pages(current, current->mm,
+				(unsigned long)vmai->cpt_start,
+				1, 1, 1, &page, NULL);
+		if (err == 0)
+			err = -EFAULT;
+		if (err < 0) {
+			eprintk_ctx("can't get vdso: get_user_pages: %d\n", err);
+			goto out;
+		}
+		err = 0;
+		maddr = kmap(page);
+		memcpy(maddr, ctx->vdso, PAGE_SIZE);
+		set_page_dirty_lock(page);
+		kunmap(page);
+		page_cache_release(page);
+		goto out;
+	}
+
+	if (vmai->cpt_next > vmai->cpt_hdrlen) {
+		loff_t offset = vmapos + vmai->cpt_hdrlen;
+
+		do {
+			union {
+				struct cpt_page_block pb;
+				struct cpt_remappage_block rpb;
+				struct cpt_copypage_block cpb;
+				struct cpt_lazypage_block lpb;
+				struct cpt_iterpage_block ipb;
+			} u;
+			loff_t pos;
+
+			err = rst_get_object(-1, offset, &u, ctx);
+			if (err) {
+				eprintk_ctx("vma fix object: %d\n", err);
+				goto out;
+			}
+			if (u.rpb.cpt_object == CPT_OBJ_REMAPPAGES) {
+				err = sc_remap_file_pages(u.rpb.cpt_start,
+							  u.rpb.cpt_end-u.rpb.cpt_start,
+							  0, u.rpb.cpt_pgoff, 0);
+				if (err < 0) {
+					eprintk_ctx("remap_file_pages: %d (%08x,%u,%u)\n", err,
+					       (__u32)u.rpb.cpt_start, (__u32)(u.rpb.cpt_end-u.rpb.cpt_start), 
+					       (__u32)u.rpb.cpt_pgoff);
+					goto out;
+				}
+				offset += u.rpb.cpt_next;
+				continue;
+			} else if (u.cpb.cpt_object == CPT_OBJ_LAZYPAGES) {
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+				unsigned long ptr = u.lpb.cpt_start;
+
+				down_read(&mm->mmap_sem);
+				if ((vma = find_vma(mm, u.lpb.cpt_start)) == NULL) {
+					up_read(&mm->mmap_sem);
+					eprintk_ctx("lost vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+				err = anon_vma_prepare(vma);
+				if (err) {
+					up_read(&mm->mmap_sem);
+					goto out;
+				}
+				while (ptr < u.lpb.cpt_end) {
+					err = rst_pagein(vma, u.lpb.cpt_index + (ptr-u.lpb.cpt_start)/PAGE_SIZE,
+							 ptr, ctx);
+					if (err)
+						break;
+					ptr += PAGE_SIZE;
+				}
+				up_read(&mm->mmap_sem);
+#else
+				err = -EINVAL;
+#endif
+				if (err)
+					goto out;
+				offset += u.cpb.cpt_next;
+				continue;
+			} else if (u.cpb.cpt_object == CPT_OBJ_COPYPAGES) {
+				struct vm_area_struct *vma, *vma1;
+				struct mm_struct *src;
+				struct anon_vma *src_anon;
+				cpt_object_t *mobj;
+
+				if (!vmai->cpt_anonvmaid) {
+					err = -EINVAL;
+					eprintk_ctx("CPT_OBJ_COPYPAGES in !anonvma\n");
+					goto out;
+				}
+
+				mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, u.cpb.cpt_source, ctx);
+				if (!mobj) {
+					eprintk_ctx("lost mm_struct to clone pages from\n");
+					err = -ESRCH;
+					goto out;
+				}
+				src = mobj->o_obj;
+
+				down_read(&src->mmap_sem);
+				src_anon = NULL;
+				vma1 = find_vma(src, u.cpb.cpt_start);
+				if (vma1)
+					src_anon = vma1->anon_vma;
+				up_read(&src->mmap_sem);
+
+				if (!vma1) {
+					eprintk_ctx("lost src vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+
+				down_read(&mm->mmap_sem);
+				if ((vma = find_vma(mm, u.cpb.cpt_start)) == NULL) {
+					up_read(&mm->mmap_sem);
+					eprintk_ctx("lost vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+
+				if (!src_anon ||
+				    !vma->anon_vma ||
+				    vma->anon_vma != src_anon ||
+				    vma->vm_start - vma1->vm_start !=
+				    (vma->vm_pgoff - vma1->vm_pgoff) << PAGE_SHIFT) {
+					up_read(&mm->mmap_sem);
+					wprintk_ctx("anon_vma mismatch in vm_area_struct %Ld\n", vmapos);
+					err = copy_mm_pages(mobj->o_obj,
+							    u.cpb.cpt_start,
+							    u.cpb.cpt_end);
+				} else {
+					err = __copy_page_range(vma, vma1,
+								u.cpb.cpt_start,
+								u.cpb.cpt_end-u.cpb.cpt_start);
+					up_read(&mm->mmap_sem);
+				}
+				if (err) {
+					eprintk_ctx("clone_page_range: %d (%08x,%u,%ld)\n", err,
+						(__u32)u.cpb.cpt_start, (__u32)(u.cpb.cpt_end-u.cpb.cpt_start), 
+						(long)u.cpb.cpt_source);
+					goto out;
+				}
+
+				offset += u.cpb.cpt_next;
+				continue;
+			} else if (u.pb.cpt_object == CPT_OBJ_ITERPAGES ||
+				   u.pb.cpt_object == CPT_OBJ_ITERYOUNGPAGES
+				   ) {
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+				unsigned long ptr = u.lpb.cpt_start;
+				u64 page_pos[16];
+				pos = offset + sizeof(u.pb);
+
+				err = ctx->pread(&page_pos,
+						 8*(u.lpb.cpt_end-ptr)/PAGE_SIZE,
+						 ctx,
+						 pos);
+				if (err) {
+					eprintk_ctx("Oops\n");
+					goto out;
+				}
+
+				down_read(&mm->mmap_sem);
+				if ((vma = find_vma(mm, u.lpb.cpt_start)) == NULL) {
+					up_read(&mm->mmap_sem);
+					eprintk_ctx("lost vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+				err = anon_vma_prepare(vma);
+				if (err) {
+					up_read(&mm->mmap_sem);
+					goto out;
+				}
+				while (ptr < u.lpb.cpt_end) {
+					err = rst_iter(vma,
+						       page_pos[(ptr-u.lpb.cpt_start)/PAGE_SIZE],
+						       ptr,
+						       ctx);
+					if (err)
+						break;
+					ptr += PAGE_SIZE;
+				}
+				if (u.pb.cpt_object == CPT_OBJ_ITERYOUNGPAGES) {
+					make_pages_present((unsigned long)u.lpb.cpt_start,
+							   (unsigned long)u.lpb.cpt_end);
+				}
+				up_read(&mm->mmap_sem);
+#else
+				err = -EINVAL;
+#endif
+				if (err)
+					goto out;
+				offset += u.cpb.cpt_next;
+				continue;
+			}
+			if (u.pb.cpt_object != CPT_OBJ_PAGES) {
+				eprintk_ctx("unknown vma fix object %d\n", u.pb.cpt_object);
+				err = -EINVAL;
+				goto out;
+			}
+			pos = offset + sizeof(u.pb);
+			if (!(vmai->cpt_flags&VM_ACCOUNT) && !(prot&PROT_WRITE)) {
+				/* I guess this is get_user_pages() messed things,
+				 * this happens f.e. when gdb inserts breakpoints.
+				 */
+				int i;
+				for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/PAGE_SIZE; i++) {
+					struct page *page;
+					void *maddr;
+					err = get_user_pages(current, current->mm,
+							     (unsigned long)u.pb.cpt_start + i*PAGE_SIZE,
+							     1, 1, 1, &page, NULL);
+					if (err == 0)
+						err = -EFAULT;
+					if (err < 0) {
+						eprintk_ctx("get_user_pages: %d\n", err);
+						goto out;
+					}
+					err = 0;
+					maddr = kmap(page);
+					if (u.pb.cpt_content == CPT_CONTENT_VOID) {
+						memset(maddr, 0, PAGE_SIZE);
+					} else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
+						err = ctx->pread(maddr, PAGE_SIZE,
+								 ctx, pos + i*PAGE_SIZE);
+						if (err) {
+							kunmap(page);
+							goto out;
+						}
+					} else {
+						err = -EINVAL;
+						kunmap(page);
+						goto out;
+					}
+					set_page_dirty_lock(page);
+					kunmap(page);
+					page_cache_release(page);
+				}
+			} else {
+				if (!(prot&PROT_WRITE))
+					sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
+				if (u.pb.cpt_content == CPT_CONTENT_VOID) {
+					int i;
+					for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/sizeof(unsigned long); i++) {
+						err = __put_user(0UL, ((unsigned long __user*)(unsigned long)u.pb.cpt_start) + i);
+						if (err) {
+							eprintk_ctx("__put_user 2 %d\n", err);
+							goto out;
+						}
+					}
+				} else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
+					loff_t tpos = pos;
+					ssize_t res;
+
+					res = ctx->file->f_op->read(ctx->file,
+							cpt_ptr_import(u.pb.cpt_start),
+							u.pb.cpt_end-u.pb.cpt_start,
+							&tpos);
+					if (res != u.pb.cpt_end-u.pb.cpt_start) {
+						err = res < 0 ? res : -EIO;
+						goto out;
+					}
+				} else {
+					err = -EINVAL;
+					goto out;
+				}
+				if (!(prot&PROT_WRITE))
+					sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
+			}
+			err = 0;
+			offset += u.pb.cpt_next;
+		} while (offset < vmapos + vmai->cpt_next);
+	}
+
+check:
+	do {
+		struct vm_area_struct *vma;
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, addr);
+		if (vma) {
+			if ((vma->vm_flags^vmai->cpt_flags)&VM_READHINTMASK) {
+				VM_ClearReadHint(vma);
+				vma->vm_flags |= vmai->cpt_flags&VM_READHINTMASK;
+			}
+			if ((vma->vm_flags^vmai->cpt_flags)&VM_LOCKED) {
+				dprintk_ctx("fixing up VM_LOCKED %Ld\n", vmapos);
+				up_read(&mm->mmap_sem);
+				if (vma->vm_flags&VM_LOCKED)
+					err = sc_munlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
+				else
+					err = sc_mlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
+				/* When mlock fails with EFAULT, it means
+				 * that it could not bring in pages.
+				 * It can happen after mlock() on unreadable
+				 * VMAs. But VMA is correctly locked,
+				 * so that this error can be ignored. */
+				if (err == -EFAULT)
+					err = 0;
+				if (err)
+					goto out;
+				goto check;
+			}
+			if ((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&~__PAGE_NX)
+				wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
+					    (unsigned long long)vma->vm_page_prot.pgprot,
+					    (unsigned long long)vmai->cpt_pgprot);
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
+			if (((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&__PAGE_NX) &&
+			    (ctx->kernel_config_flags & (1 << CPT_KERNEL_CONFIG_PAE)))
+				wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
+				       (__u64)vma->vm_page_prot.pgprot, (__u64)vmai->cpt_pgprot);
+#endif
+			if (vma->vm_flags != vmai->cpt_flags) {
+				unsigned long x = vma->vm_flags ^ vmai->cpt_flags;
+				if (x & VM_EXEC) {
+					/* Crap. On i386 this is OK.
+					 * It is impossible to make via mmap/mprotect
+					 * exec.c clears VM_EXEC on stack. */
+					vma->vm_flags &= ~VM_EXEC;
+				} else if ((x & VM_ACCOUNT) && !checked) {
+					checked = 1;
+					if (!(prot&PROT_WRITE)) {
+						up_read(&mm->mmap_sem);
+						sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
+						sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
+						goto check;
+					}
+					wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
+					       (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
+				} else {
+					wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
+					       (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
+				}
+			}
+		} else {
+			wprintk_ctx("no VMA for %08lx@%ld\n", addr, (long)vmapos);
+		}
+		up_read(&mm->mmap_sem);
+	} while (0);
+
+out:
+	if (file)
+		fput(file);
+	return err;
+}
+
+#ifndef CONFIG_IA64
+#define TASK_UNMAP_START	0
+#else
+/* On IA64 the first page is a special VM_IO|VM_RESERVED mapping
+ * used to accelerate speculative dereferences of NULL pointer. */
+#define TASK_UNMAP_START	PAGE_SIZE
+#endif
+
+static int do_rst_mm(struct cpt_mm_image *vmi, struct cpt_task_image *ti,
+		int is64bit, struct cpt_context *ctx)
+{
+	int err = 0;
+	unsigned int def_flags;
+	struct mm_struct *mm = current->mm;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *bc;
+#endif
+
+	down_write(&mm->mmap_sem);
+	do_munmap(mm, TASK_UNMAP_START, TASK_SIZE-TASK_UNMAP_START);
+
+#ifdef CONFIG_USER_RESOURCE
+	/*
+	 * MM beancounter is usually correct from the fork time,
+	 * but not for init, for example.
+	 * Luckily, mm_ub can be changed for a completely empty MM.
+	 */
+	bc = rst_lookup_ubc(vmi->cpt_mmub, ctx);
+	err = virtinfo_notifier_call(VITYPE_SCP, VIRTINFO_SCP_RSTMM, bc);
+	if (err & NOTIFY_FAIL) {
+		up_write(&mm->mmap_sem);
+		return -ECHRNG;
+	}
+	if ((err & VIRTNOTIFY_CHANGE) && bc != mm->mm_ub) {
+		struct user_beancounter *old_bc;
+
+		old_bc = mm->mm_ub;
+		mm->mm_ub = bc;
+		bc = old_bc;
+	}
+	err = 0;
+	put_beancounter(bc);
+#endif
+
+	mm->start_code = vmi->cpt_start_code;
+	mm->end_code = vmi->cpt_end_code;
+	mm->start_data = vmi->cpt_start_data;
+	mm->end_data = vmi->cpt_end_data;
+	mm->start_brk = vmi->cpt_start_brk;
+	mm->brk = vmi->cpt_brk;
+	mm->start_stack = vmi->cpt_start_stack;
+	mm->arg_start = vmi->cpt_start_arg;
+	mm->arg_end = vmi->cpt_end_arg;
+	mm->env_start = vmi->cpt_start_env;
+	mm->env_end = vmi->cpt_end_env;
+	mm->def_flags = 0;
+	def_flags = vmi->cpt_def_flags;
+
+#ifdef CONFIG_X86_64
+	if (!ti->cpt_64bit) {
+		set_thread_flag(TIF_IA32);
+		mm->free_area_cache = TASK_UNMAPPED_BASE;
+		arch_pick_mmap_layout(mm);
+	}
+#endif
+
+	mm->dumpable = vmi->cpt_dumpable;
+	mm->vps_dumpable = vmi->cpt_vps_dumpable;
+#ifndef CONFIG_IA64
+	if (ctx->image_version >= CPT_VERSION_9) {
+		mm->context.vdso = cpt_ptr_import(vmi->cpt_vdso);
+		current_thread_info()->sysenter_return = CPT_SYSENTER_RETURN;
+	}
+#endif
+
+#if 0 /* def CONFIG_HUGETLB_PAGE*/
+/* NB: ? */
+	int used_hugetlb;
+#endif
+	up_write(&mm->mmap_sem);
+
+	if (vmi->cpt_next > vmi->cpt_hdrlen) {
+		loff_t offset = ti->cpt_mm + vmi->cpt_hdrlen;
+		do {
+			union {
+				struct cpt_vma_image vmai;
+				struct cpt_aio_ctx_image aioi;
+				struct cpt_obj_bits bits;
+			} u;
+			err = rst_get_object(-1, offset, &u, ctx);
+			if (err)
+				goto out;
+			if (u.vmai.cpt_object == CPT_OBJ_VMA) {
+#ifdef CONFIG_IA64
+				//// Later...
+				if (u.vmai.cpt_start)
+#endif
+				err = do_rst_vma(&u.vmai, is64bit, offset, ti->cpt_mm, ctx);
+				if (err)
+					goto out;
+#ifdef CONFIG_X86
+			} else if (u.bits.cpt_object == CPT_OBJ_BITS &&
+				   u.bits.cpt_content == CPT_CONTENT_MM_CONTEXT) {
+				err = do_rst_ldt(&u.bits, offset, ctx);
+				if (err)
+					goto out;
+#endif
+			} else if (u.aioi.cpt_object == CPT_OBJ_AIO_CONTEXT) {
+				err = do_rst_aio(&u.aioi, offset, ctx);
+				if (err)
+					goto out;
+			} else {
+				eprintk_ctx("unknown object %u in mm image\n", u.vmai.cpt_object);
+				err = -EINVAL;
+				goto out;
+			}
+			offset += u.vmai.cpt_next;
+		} while (offset < ti->cpt_mm + vmi->cpt_next);
+	}
+
+	down_write(&mm->mmap_sem);
+	mm->def_flags = def_flags;
+	up_write(&mm->mmap_sem);
+
+
+out:
+	return err;
+}
+
+extern void exit_mm(struct task_struct * tsk);
+
+int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err = 0;
+	cpt_object_t *mobj;
+	void *tmp = (void*)__get_free_page(GFP_KERNEL);
+	struct cpt_mm_image *vmi = (struct cpt_mm_image *)tmp;
+
+	if (!tmp)
+		return -ENOMEM;
+
+	if (ti->cpt_mm == CPT_NULL) {
+		if (current->mm) {
+			virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_EXIT,
+					current);
+			exit_mm(current);
+		}
+		goto out;
+	}
+
+	mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
+	if (mobj) {
+		if (current->mm != mobj->o_obj) BUG();
+		goto out;
+	}
+
+	if (current->mm == NULL) {
+		struct mm_struct *mm = mm_alloc();
+		if (mm == NULL) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = init_new_context(current, mm);
+		if (err) {
+			mmdrop(mm);
+			goto out;
+		}
+		current->mm = mm;
+	}
+
+	if ((err = rst_get_object(CPT_OBJ_MM, ti->cpt_mm, vmi, ctx)) != 0)
+		goto out;
+	if ((err = do_rst_mm(vmi, ti, ti->cpt_64bit, ctx)) != 0) {
+		eprintk_ctx("do_rst_mm %Ld\n", (unsigned long long)ti->cpt_mm);
+		goto out;
+	}
+	err = -ENOMEM;
+	mobj = cpt_object_add(CPT_OBJ_MM, current->mm, ctx);
+	if (mobj != NULL) {
+		err = 0;
+		cpt_obj_setpos(mobj, ti->cpt_mm, ctx);
+	}
+
+out:
+	if (tmp)
+		free_page((unsigned long)tmp);
+	return err;
+}
+
+/* This is part of mm setup, made in parent context. Mostly, it is the place,
+ * where we graft mm of another process to child.
+ */
+
+int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct task_struct *tsk = obj->o_obj;
+	cpt_object_t *mobj;
+
+	/* Task without mm. Just get rid of this. */
+	if (ti->cpt_mm == CPT_NULL) {
+		if (tsk->mm) {
+			virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_EXIT,
+					tsk);
+			mmput(tsk->mm);
+			tsk->mm = NULL;
+		}
+		return 0;
+	}
+
+	mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
+	if (mobj) {
+		struct mm_struct *newmm = mobj->o_obj;
+		/* Good, the MM is already created. */
+		if (newmm == tsk->mm) {
+			/* Already done by clone(). */
+			return 0;
+		}
+		mmput(tsk->mm);
+		atomic_inc(&newmm->mm_users);
+		tsk->mm = newmm;
+		tsk->active_mm = newmm;
+	}
+	return 0;
+}
+
+/* We use CLONE_VM when mm of child is going to be shared with parent.
+ * Otherwise mm is copied.
+ */
+
+__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	if (ti->cpt_mm == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx))
+		return CLONE_VM;
+	return 0;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_net.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_net.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_net.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_net.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,756 @@
+/*
+ *
+ *  kernel/cpt/rst_net.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+#include <net/addrconf.h>
+#include <linux/nfcalls.h>
+#include <linux/ip.h>
+#include <linux/cpt_image.h>
+#include <linux/cpt_exports.h>
+
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+#include "cpt_net.h"
+#include "cpt_files.h"
+
+#include "cpt_syscalls.h"
+
+extern struct in_ifaddr *inet_alloc_ifa(void);
+extern int inet_insert_ifa(struct in_ifaddr *ifa);
+
+int rst_restore_ifaddr(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NET_IFADDR];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_ifaddr_image di;
+	struct net_device *dev;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_IFADDR || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int cindex = -1;
+		int err;
+		err = rst_get_object(CPT_OBJ_NET_IFADDR, sec, &di, ctx);
+		if (err)
+			return err;
+		cindex = di.cpt_index;
+		rtnl_lock();
+		dev = __dev_get_by_index(cindex);
+		if (dev && di.cpt_family == AF_INET) {
+			struct in_device *in_dev;
+			struct in_ifaddr *ifa;
+			if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
+				in_dev = inetdev_init(dev);
+			ifa = inet_alloc_ifa();
+			if (ifa) {
+				ifa->ifa_local = di.cpt_address[0];
+				ifa->ifa_address = di.cpt_peer[0];
+				ifa->ifa_broadcast = di.cpt_broadcast[0];
+				ifa->ifa_prefixlen = di.cpt_masklen;
+				ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
+				ifa->ifa_flags = di.cpt_flags;
+				ifa->ifa_scope = di.cpt_scope;
+				memcpy(ifa->ifa_label, di.cpt_label, IFNAMSIZ);
+				in_dev_hold(in_dev);
+				ifa->ifa_dev   = in_dev;
+				err = inet_insert_ifa(ifa);
+				if (err && err != -EEXIST) {
+					rtnl_unlock();
+					eprintk_ctx("add ifaddr err %d for %d %s\n", err, di.cpt_index, di.cpt_label);
+					return err;
+				}
+			}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		} else if (dev && di.cpt_family == AF_INET6) {
+			struct ve_struct *ve;
+			__u32 prefered_lft;
+			__u32 valid_lft;
+
+			ve = get_exec_env();
+			prefered_lft = (di.cpt_flags & IFA_F_DEPRECATED) ?
+				0 : di.cpt_prefered_lft;
+			valid_lft = (di.cpt_flags & IFA_F_PERMANENT) ?
+				0xFFFFFFFF : di.cpt_valid_lft;
+
+			if (ve->ipv6_ops)
+				err = ve->ipv6_ops->addr_add(dev->ifindex,
+						(struct in6_addr *)di.cpt_address,
+						di.cpt_masklen,
+						prefered_lft,
+						valid_lft);
+			else
+				err = -EPFNOSUPPORT;
+
+			if (err && err != -EEXIST) {
+				rtnl_unlock();
+				eprintk_ctx("add ifaddr err %d for %d %s\n", err, di.cpt_index, di.cpt_label);
+				return err;
+			}
+#endif
+		} else {
+			rtnl_unlock();
+			eprintk_ctx("unknown ifaddr 2 for %d\n", di.cpt_index);
+			return -EINVAL;
+		}
+		rtnl_unlock();
+		sec += di.cpt_next;
+	}
+	return 0;
+}
+
+static int rewrite_rtmsg(struct nlmsghdr *nlh, struct cpt_context *ctx)
+{
+	int min_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	__u32 prefix0 = 0;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *rta = (void*)nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(rta, attrlen)) {
+			if (rta->rta_type == RTA_DST) {
+				prefix0 = *(__u32*)RTA_DATA(rta);
+			}
+			rta = RTA_NEXT(rta, attrlen);
+		}
+	}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	if (rtm->rtm_family == AF_INET6) {
+		if (rtm->rtm_type == RTN_LOCAL)
+			return 2;
+		if (rtm->rtm_flags & RTM_F_CLONED)
+			return 2;
+		if (rtm->rtm_protocol == RTPROT_UNSPEC ||
+		    rtm->rtm_protocol == RTPROT_RA ||
+		    rtm->rtm_protocol == RTPROT_REDIRECT ||
+		    rtm->rtm_protocol == RTPROT_KERNEL)
+			return 2;
+		if (rtm->rtm_protocol == RTPROT_BOOT &&
+		    ((rtm->rtm_dst_len == 8 && prefix0 == htonl(0xFF000000)) ||
+		     (rtm->rtm_dst_len == 64 && prefix0 == htonl(0xFE800000))))
+			return 2;
+	}
+#endif
+	return rtm->rtm_protocol == RTPROT_KERNEL;
+}
+
+int rst_restore_route(struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct msghdr msg;
+	struct iovec iov;
+	struct sockaddr_nl nladdr;
+	mm_segment_t oldfs;
+	loff_t sec = ctx->sections[CPT_SECT_NET_ROUTE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr v;
+	char *pg;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_ROUTE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	if (h.cpt_hdrlen >= h.cpt_next)
+		return 0;
+
+	sec += h.cpt_hdrlen;
+	err = rst_get_object(CPT_OBJ_NET_ROUTE, sec, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
+	if (err)
+		return err;
+
+	pg = (char*)__get_free_page(GFP_KERNEL);
+	if (pg == NULL) {
+		err = -ENOMEM;
+		goto out_sock;
+	}
+
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family = AF_NETLINK;
+
+	endsec = sec + v.cpt_next;
+	sec += v.cpt_hdrlen;
+
+	while (sec < endsec) {
+		struct nlmsghdr *n;
+		struct nlmsghdr nh;
+		int kernel_flag;
+
+		if (endsec - sec < sizeof(nh))
+			break;
+
+		err = ctx->pread(&nh, sizeof(nh), ctx, sec);
+		if (err)
+			goto out_sock_pg;
+		if (nh.nlmsg_len < sizeof(nh) || nh.nlmsg_len > PAGE_SIZE ||
+		    endsec - sec < nh.nlmsg_len) {
+			err = -EINVAL;
+			goto out_sock_pg;
+		}
+		err = ctx->pread(pg, nh.nlmsg_len, ctx, sec);
+		if (err)
+			goto out_sock_pg;
+
+		n = (struct nlmsghdr*)pg;
+		n->nlmsg_flags = NLM_F_REQUEST|NLM_F_APPEND|NLM_F_CREATE;
+
+		err = rewrite_rtmsg(n, ctx);
+		if (err < 0)
+			goto out_sock_pg;
+		kernel_flag = err;
+
+		if (kernel_flag == 2)
+			goto do_next;
+
+		iov.iov_base=n;
+		iov.iov_len=nh.nlmsg_len;
+		msg.msg_name=&nladdr;
+		msg.msg_namelen=sizeof(nladdr);
+		msg.msg_iov=&iov;
+		msg.msg_iovlen=1;
+		msg.msg_control=NULL;
+		msg.msg_controllen=0;
+		msg.msg_flags=MSG_DONTWAIT;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_sendmsg(sock, &msg, nh.nlmsg_len);
+		set_fs(oldfs);
+
+		if (err < 0)
+			goto out_sock_pg;
+		err = 0;
+
+		iov.iov_base=pg;
+		iov.iov_len=PAGE_SIZE;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
+		set_fs(oldfs);
+		if (err != -EAGAIN) {
+			if (err == NLMSG_LENGTH(sizeof(struct nlmsgerr)) &&
+			    n->nlmsg_type == NLMSG_ERROR) {
+				struct nlmsgerr *e = NLMSG_DATA(n);
+				if (e->error != -EEXIST || !kernel_flag)
+					eprintk_ctx("NLMERR: %d\n", e->error);
+			} else {
+				eprintk_ctx("Res: %d %d\n", err, n->nlmsg_type);
+			}
+		}
+do_next:
+		err = 0;
+		sec += NLMSG_ALIGN(nh.nlmsg_len);
+	}
+
+out_sock_pg:
+	free_page((unsigned long)pg);
+out_sock:
+	sock_release(sock);
+	return err;
+}
+
+int rst_resume_network(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	env->disable_net = 0;
+	put_ve(env);
+	return 0;
+}
+
+static int rst_restore_netstats(loff_t pos, struct net_device *dev,
+			struct cpt_context * ctx)
+{
+	struct cpt_netstats_image *n;
+	struct net_device_stats *stats = NULL;
+	int err;
+
+	if (!dev->get_stats)
+		return 0;
+
+	n = cpt_get_buf(ctx);
+	err = rst_get_object(CPT_OBJ_NET_STATS, pos, n, ctx);
+	if (err)
+		goto out;
+	BUG_ON(sizeof(struct cpt_netstats_image) != n->cpt_hdrlen);
+	preempt_disable();
+
+	if (!dev->cpt_ops) {
+		err = -ENODEV;
+		eprintk_ctx("Network device %s is not supported\n", dev->name);
+		goto out;
+	}
+	stats = dev->cpt_ops->stats(dev);
+
+	stats->rx_packets = n->cpt_rx_packets;
+	stats->tx_packets = n->cpt_tx_packets;
+	stats->rx_bytes = n->cpt_rx_bytes;
+	stats->tx_bytes = n->cpt_tx_bytes;
+	stats->rx_errors = n->cpt_rx_errors;
+	stats->tx_errors = n->cpt_tx_errors;
+	stats->rx_dropped = n->cpt_rx_dropped;
+	stats->tx_dropped = n->cpt_tx_dropped;
+	stats->multicast = n->cpt_multicast;
+	stats->collisions = n->cpt_collisions;
+	stats->rx_length_errors = n->cpt_rx_length_errors;
+	stats->rx_over_errors = n->cpt_rx_over_errors;
+	stats->rx_crc_errors = n->cpt_rx_crc_errors;
+	stats->rx_frame_errors = n->cpt_rx_frame_errors;
+	stats->rx_fifo_errors = n->cpt_rx_fifo_errors;
+	stats->rx_missed_errors = n->cpt_rx_missed_errors;
+	stats->tx_aborted_errors = n->cpt_tx_aborted_errors;
+	stats->tx_carrier_errors = n->cpt_tx_carrier_errors;
+	stats->tx_fifo_errors = n->cpt_tx_fifo_errors;
+	stats->tx_heartbeat_errors = n->cpt_tx_heartbeat_errors;
+	stats->tx_window_errors = n->cpt_tx_window_errors;
+	stats->rx_compressed = n->cpt_rx_compressed;
+	stats->tx_compressed = n->cpt_tx_compressed;
+
+	if (dev->cpt_ops->post_restore_netstats)
+		dev->cpt_ops->post_restore_netstats(dev);
+out:
+	preempt_enable();
+	cpt_release_buf(ctx);
+	return err;
+}
+
+int rst_restore_netdev(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NET_DEVICE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_netdev_image di;
+	struct net_device *dev;
+
+	get_exec_env()->disable_net = 1;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_DEVICE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		loff_t pos;
+		struct dev_cpt_ops *ops;
+
+		struct net_device *dev_new;
+		err = rst_get_object(CPT_OBJ_NET_DEVICE, sec, &di, ctx);
+		if (err)
+			return err;
+
+		rtnl_lock();
+
+		pos = sec + di.cpt_hdrlen;
+		if (di.cpt_next > sizeof(di)) {
+			struct cpt_object_hdr hdr;
+			err = ctx->pread(&hdr, sizeof(struct cpt_object_hdr),
+					ctx, sec + di.cpt_hdrlen);
+			if (err)
+				goto out;
+			/*
+			 * loopback and venet don't have their ops. in other
+			 * cases of unknown object it will be caught later
+			 */
+			ops = NULL;
+			while (1) {
+				ops = dev_cpt_ops_get(hdr.cpt_object, ops);
+				if (!ops)
+					break;
+
+				err = ops->restore(sec, &di, &rst_ops, ctx);
+				if (!err) {
+					pos += hdr.cpt_next;
+					break;
+				} else if (err < 0) {
+					eprintk_ctx("restore %s %s: %d\n",
+							ops->name,
+							di.cpt_name, err);
+					goto out;
+				}
+			}
+		}
+
+		dev = __dev_get_by_name(di.cpt_name);
+		if (dev) {
+			if (dev->ifindex != di.cpt_index) {
+				dev_new = __dev_get_by_index(di.cpt_index);
+				if (!dev_new) {
+					write_lock_bh(&dev_base_lock);
+					hlist_del(&dev->index_hlist);
+					if (dev->iflink == dev->ifindex)
+						dev->iflink = di.cpt_index;
+					dev->ifindex = di.cpt_index;
+					hlist_add_head(&dev->index_hlist,
+							dev_index_hash(dev->ifindex,
+								get_exec_env()));
+					write_unlock_bh(&dev_base_lock);
+				} else {
+					write_lock_bh(&dev_base_lock);
+					hlist_del(&dev->index_hlist);
+					hlist_del(&dev_new->index_hlist);
+					if (dev_new->iflink == dev_new->ifindex)
+						dev_new->iflink = dev->ifindex;
+					dev_new->ifindex = dev->ifindex;
+					if (dev->iflink == dev->ifindex)
+						dev->iflink = di.cpt_index;
+					dev->ifindex = di.cpt_index;
+					hlist_add_head(&dev->index_hlist,
+							dev_index_hash(dev->ifindex,
+								get_exec_env()));
+					hlist_add_head(&dev_new->index_hlist,
+							dev_index_hash(dev_new->ifindex,
+								get_exec_env()));
+					write_unlock_bh(&dev_base_lock);
+				}
+			}
+			if (di.cpt_flags^dev->flags) {
+				err = dev_change_flags(dev, di.cpt_flags);
+				if (err)
+					eprintk_ctx("dev_change_flags err: %d\n", err);
+			}
+			while (pos < sec + di.cpt_next) {
+				struct cpt_object_hdr hdr;
+				err = ctx->pread(&hdr, sizeof(struct cpt_object_hdr),
+						ctx, pos);
+				if (err)
+					goto out;
+
+				if (hdr.cpt_object == CPT_OBJ_NET_HWADDR) {
+					/* Restore hardware address */
+					struct cpt_hwaddr_image hw;
+					err = rst_get_object(CPT_OBJ_NET_HWADDR,
+							pos, &hw, ctx);
+					if (err)
+						goto out;
+				BUG_ON(sizeof(hw.cpt_dev_addr) !=
+						sizeof(dev->dev_addr));
+					memcpy(dev->dev_addr, hw.cpt_dev_addr,
+							sizeof(hw.cpt_dev_addr));
+				} else if (hdr.cpt_object == CPT_OBJ_NET_STATS) {
+					err = rst_restore_netstats(pos, dev, ctx);
+					if (err) {
+						eprintk_ctx("rst stats %s: %d\n",
+								di.cpt_name, err);
+						goto out;
+					}
+				}
+				pos += hdr.cpt_next;
+			}
+		} else {
+			eprintk_ctx("unknown interface 2 %s\n", di.cpt_name);
+		}
+		rtnl_unlock();
+		sec += di.cpt_next;
+	}
+	return 0;
+out:
+	rtnl_unlock();
+	return err;
+}
+
+static int dumpfn(void *arg)
+{
+	int i;
+	int *pfd = arg;
+	char *argv[] = { "iptables-restore", "-c", NULL };
+
+	if (pfd[0] != 0)
+		sc_dup2(pfd[0], 0);
+
+	for (i=1; i<current->files->fdt->max_fds; i++)
+		sc_close(i);
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/sbin/iptables-restore", argv, NULL);
+	if (i == -ENOENT)
+		i = sc_execve("/usr/sbin/iptables-restore", argv, NULL);
+	eprintk("failed to exec iptables-restore: %d\n", i);
+	return 255 << 8;
+}
+
+static int rst_restore_iptables(struct cpt_context * ctx)
+{
+	int err;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	int n;
+	struct cpt_section_hdr h;
+	loff_t sec = ctx->sections[CPT_SECT_NET_IPTABLES];
+	loff_t end;
+	int pid;
+	int status;
+	mm_segment_t oldfs;
+	sigset_t ignore, blocked;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_IPTABLES || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	if (h.cpt_hdrlen == h.cpt_next)
+		return 0;
+	if (h.cpt_hdrlen > h.cpt_next)
+		return -EINVAL;
+	sec += h.cpt_hdrlen;
+	err = rst_get_object(CPT_OBJ_NAME, sec, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	ignore.sig[0] = CPT_SIG_IGNORE_MASK;
+	sigprocmask(SIG_BLOCK, &ignore, &blocked);
+	pid = err = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
+	if (err < 0) {
+		eprintk_ctx("iptables local_kernel_thread: %d\n", err);
+		goto out;
+	}
+	f = fget(pfd[1]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	ctx->file->f_pos = sec + v.cpt_hdrlen;
+	end = sec + v.cpt_next;
+	do {
+		char *p;
+		char buf[16];
+
+		n = end - ctx->file->f_pos;
+		if (n > sizeof(buf))
+			n = sizeof(buf);
+
+		if (ctx->read(buf, n, ctx))
+			break;
+		if ((p = memchr(buf, 0, n)) != NULL)
+			n = p - buf;
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		f->f_op->write(f, buf, n, &f->f_pos);
+		set_fs(oldfs);
+	} while (ctx->file->f_pos < end);
+
+	fput(f);
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if ((err = sc_waitx(pid, 0, &status)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+	else if ((status & 0x7f) == 0) {
+		err = (status & 0xff00) >> 8;
+		if (err != 0) {
+			eprintk_ctx("iptables-restore exited with %d\n", err);
+			eprintk_ctx("Most probably some iptables modules are not loaded\n");
+			err = -EINVAL;
+		}
+	} else {
+		eprintk_ctx("iptables-restore terminated\n");
+		err = -EINVAL;
+	}
+	set_fs(oldfs);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+
+	return err;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+	return err;
+}
+
+static int rst_restore_snmp_stat(struct cpt_context *ctx, void *mib[], int n,
+		loff_t *ppos, loff_t endpos)
+{
+	int err, in, i;
+	struct cpt_object_hdr o;
+	__u32 *stats;
+
+	err = rst_get_object(CPT_OBJ_BITS, *ppos, &o, ctx);
+	if (err)
+		return err;
+
+	in = o.cpt_next - o.cpt_hdrlen;
+	if (in >= PAGE_SIZE - 4) {
+		eprintk_ctx("Too long SNMP buf (%d)\n", in);
+		return -EINVAL;
+	}
+
+	if (o.cpt_content != CPT_CONTENT_DATA) {
+		if (o.cpt_content == CPT_CONTENT_VOID)
+			return 1;
+
+		eprintk_ctx("Corrupted SNMP stats\n");
+		return -EINVAL;
+	}
+
+	stats = cpt_get_buf(ctx);
+	err = ctx->pread(stats, in, ctx, (*ppos) + o.cpt_hdrlen);
+	if (err)
+		goto out;
+
+	in /= sizeof(*stats);
+	if (in > n)
+		wprintk_ctx("SNMP stats trimmed\n");
+	else
+		n = in;
+
+	for (i = 0; i < n; i++)
+		*((unsigned long *)(per_cpu_ptr(mib[0], 0)) + i) = stats[i];
+
+	*ppos += o.cpt_next;
+	if (*ppos < endpos)
+		err = 1; /* go on restoring */
+out:
+	cpt_release_buf(ctx);
+	return err;
+}
+
+static int rst_restore_snmp(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_SNMP_STATS];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct ve_struct *ve;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_SNMP_STATS || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	ve = get_exec_env();
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	if (sec >= endsec)
+		goto out;
+
+	err = rst_restore_snmp_stat(ctx, (void **)&ve->_net_statistics,
+			LINUX_MIB_MAX, &sec, endsec);
+	if (err <= 0)
+		goto out;
+	err = rst_restore_snmp_stat(ctx, (void **)&ve->_ip_statistics,
+			IPSTATS_MIB_MAX, &sec, endsec);
+	if (err <= 0)
+		goto out;
+	err = rst_restore_snmp_stat(ctx, (void **)&ve->_tcp_statistics,
+			TCP_MIB_MAX, &sec, endsec);
+	if (err <= 0)
+		goto out;
+	err = rst_restore_snmp_stat(ctx, (void **)&ve->_udp_statistics,
+			UDP_MIB_MAX, &sec, endsec);
+	if (err <= 0)
+		goto out;
+	err = rst_restore_snmp_stat(ctx, (void **)&ve->_icmp_statistics,
+			ICMP_MIB_MAX, &sec, endsec);
+	if (err <= 0)
+		goto out;
+	err = rst_restore_snmp_stat(ctx, (void **)&ve->_icmpmsg_statistics,
+			ICMPMSG_MIB_MAX, &sec, endsec);
+	if (err <= 0)
+		goto out;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	err = rst_restore_snmp_stat(ctx, (void **)&ve->_ipv6_statistics,
+			IPSTATS_MIB_MAX, &sec, endsec);
+	if (err <= 0)
+		goto out;
+	err = rst_restore_snmp_stat(ctx, (void **)&ve->_udp_stats_in6,
+			UDP_MIB_MAX, &sec, endsec);
+	if (err <= 0)
+		goto out;
+	err = rst_restore_snmp_stat(ctx, (void **)&ve->_icmpv6_statistics,
+			ICMP6_MIB_MAX, &sec, endsec);
+#endif
+	if (err == 1)
+		err = 0;
+out:
+	return err;
+}
+
+int rst_restore_net(struct cpt_context *ctx)
+{
+	int err;
+
+	err = rst_restore_netdev(ctx);
+	if (!err)
+		err = rst_restore_ifaddr(ctx);
+	if (!err)
+		err = rst_restore_route(ctx);
+	if (!err)
+		err = rst_restore_iptables(ctx);
+	if (!err)
+		err = rst_restore_ip_conntrack(ctx);
+	if (!err)
+		err = rst_restore_snmp(ctx);
+	return err;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_proc.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_proc.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_proc.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_proc.c	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,607 @@
+/*
+ *
+ *  kernel/cpt/rst_proc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_ioctl.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+
+MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
+MODULE_LICENSE("GPL");
+
+/* List of contexts and lock protecting the list */
+static struct list_head cpt_context_list;
+static spinlock_t cpt_context_lock;
+
+static int proc_read(char *buffer, char **start, off_t offset,
+		     int length, int *eof, void *data)
+{
+	off_t pos = 0;
+	off_t begin = 0;
+	int len = 0;
+	cpt_context_t *ctx;
+
+	len += sprintf(buffer, "Ctx      Id       VE       State\n");
+
+	spin_lock(&cpt_context_lock);
+
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		len += sprintf(buffer+len,"%p %08x %-8u %d",
+			       ctx,
+			       ctx->contextid,
+			       ctx->ve_id,
+			       ctx->ctx_state
+			       );
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+		len += pagein_info_printf(buffer+len, ctx);
+#endif
+
+		buffer[len++] = '\n';
+
+		pos = begin+len;
+		if (pos < offset) {
+			len = 0;
+			begin = pos;
+		}
+		if (pos > offset+length)
+			goto done;
+	}
+	*eof = 1;
+
+done:
+	spin_unlock(&cpt_context_lock);
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+	if(len > length)
+		len = length;
+	if(len < 0)
+		len = 0;
+	return len;
+}
+
+void rst_context_release(cpt_context_t *ctx)
+{
+	list_del(&ctx->ctx_list);
+	spin_unlock(&cpt_context_lock);
+
+	if (ctx->ctx_state > 0)
+		rst_resume(ctx);
+	ctx->ctx_state = CPT_CTX_ERROR;
+
+	rst_close_dumpfile(ctx);
+
+	if (ctx->anonvmas) {
+		int h;
+		for (h = 0; h < CPT_ANONVMA_HSIZE; h++) {
+			while (!hlist_empty(&ctx->anonvmas[h])) {
+				struct hlist_node *elem = ctx->anonvmas[h].first;
+				hlist_del(elem);
+				kfree(elem);
+			}
+		}
+		free_page((unsigned long)ctx->anonvmas);
+	}
+	cpt_flush_error(ctx);
+	if (ctx->errorfile) {
+		fput(ctx->errorfile);
+		ctx->errorfile = NULL;
+	}
+	if (ctx->error_msg) {
+		free_page((unsigned long)ctx->error_msg);
+		ctx->error_msg = NULL;
+	}
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+	rst_drop_iter_dir(ctx);
+#endif
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pagein_file_out)
+		fput(ctx->pagein_file_out);
+	if (ctx->pagein_file_in)
+		fput(ctx->pagein_file_in);
+	if (ctx->pgin_task)
+		put_task_struct(ctx->pgin_task);
+#endif
+	if (ctx->filejob_queue)
+		rst_flush_filejobs(ctx);
+	if (ctx->vdso)
+		free_page((unsigned long)ctx->vdso);
+	if (ctx->objcount)
+		eprintk_ctx("%d objects leaked\n", ctx->objcount);
+	kfree(ctx);
+
+	spin_lock(&cpt_context_lock);
+}
+
+static void __cpt_context_put(cpt_context_t *ctx)
+{
+	if (!--ctx->refcount)
+		rst_context_release(ctx);
+}
+
+static void cpt_context_put(cpt_context_t *ctx)
+{
+	spin_lock(&cpt_context_lock);
+	__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+}
+
+cpt_context_t * rst_context_open(void)
+{
+	cpt_context_t *ctx;
+
+	if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
+		rst_context_init(ctx);
+		spin_lock(&cpt_context_lock);
+		list_add_tail(&ctx->ctx_list, &cpt_context_list);
+		spin_unlock(&cpt_context_lock);
+		ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
+		if (ctx->error_msg != NULL)
+			ctx->error_msg[0] = 0;
+	}
+	return ctx;
+}
+
+void rst_report_error(int err, cpt_context_t *ctx)
+{
+	if (ctx->statusfile) {
+		mm_segment_t oldfs;
+		int status = 7 /* VZ_ENVCREATE_ERROR */;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		if (ctx->statusfile->f_op && ctx->statusfile->f_op->write)
+			ctx->statusfile->f_op->write(ctx->statusfile, (char*)&status, sizeof(status), &ctx->statusfile->f_pos);
+		set_fs(oldfs);
+		fput(ctx->statusfile);
+		ctx->statusfile = NULL;
+	}
+}
+
+
+static cpt_context_t * cpt_context_lookup(unsigned int ctxid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->contextid == ctxid) {
+			ctx->refcount++;
+			spin_unlock(&cpt_context_lock);
+			return ctx;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return NULL;
+}
+
+static int rst_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	cpt_context_t *ctx;
+	struct file *dfile = NULL;
+
+	unlock_kernel();
+
+	if (cmd == CPT_TEST_CAPS) {
+		err = test_cpu_caps_and_features();
+		goto out_lock;
+	}
+
+	if (cmd == CPT_TEST_VERSION) {
+		err = rst_image_acceptable(arg);
+		goto out_lock;
+	}
+
+	if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
+		cpt_context_t *old_ctx;
+
+		ctx = NULL;
+		if (cmd == CPT_JOIN_CONTEXT) {
+			err = -ENOENT;
+			ctx = cpt_context_lookup(arg);
+			if (!ctx)
+				goto out_lock;
+		}
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		file->private_data = ctx;
+
+		if (old_ctx) {
+			if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
+				old_ctx->sticky = 0;
+				old_ctx->refcount--;
+			}
+			__cpt_context_put(old_ctx);
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_lock;
+	}
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	if (ctx)
+		ctx->refcount++;
+	spin_unlock(&cpt_context_lock);
+
+	if (!ctx) {
+		cpt_context_t *old_ctx;
+
+		err = -ENOMEM;
+		ctx = rst_context_open();
+		if (!ctx)
+			goto out_lock;
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		if (!old_ctx) {
+			ctx->refcount++;
+			file->private_data = ctx;
+		} else {
+			old_ctx->refcount++;
+		}
+		if (old_ctx) {
+			__cpt_context_put(ctx);
+			ctx = old_ctx;
+		}
+		spin_unlock(&cpt_context_lock);
+	}
+
+	if (cmd == CPT_GET_CONTEXT) {
+		unsigned int contextid = (unsigned int)arg;
+
+		err = -EINVAL;
+		if (ctx->contextid && ctx->contextid != contextid)
+			goto out_nosem;
+		if (!ctx->contextid) {
+			cpt_context_t *c1 = cpt_context_lookup(contextid);
+			if (c1) {
+				cpt_context_put(c1);
+				err = -EEXIST;
+				goto out_nosem;
+			}
+			ctx->contextid = contextid;
+		}
+		spin_lock(&cpt_context_lock);
+		if (!ctx->sticky) {
+			ctx->sticky = 1;
+			ctx->refcount++;
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_nosem;
+	}
+
+	down(&ctx->main_sem);
+
+	err = -EBUSY;
+	if (ctx->ctx_state < 0)
+		goto out;
+
+	err = 0;
+	switch (cmd) {
+	case CPT_SET_DUMPFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			err = -EBADF;
+			dfile = fget(arg);
+			if (dfile == NULL)
+				break;
+			if (dfile->f_op == NULL ||
+			    dfile->f_op->read == NULL) {
+				fput(dfile);
+				break;
+			}
+			err = 0;
+		}
+		if (ctx->file)
+			fput(ctx->file);
+		ctx->file = dfile;
+		break;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	case CPT_SET_PAGEINFDIN:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_in)
+			fput(ctx->pagein_file_in);
+		ctx->pagein_file_in = dfile;
+		break;
+	case CPT_SET_PAGEINFDOUT:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_out)
+			fput(ctx->pagein_file_out);
+		ctx->pagein_file_out = dfile;
+		break;
+	case CPT_PAGEIND:
+		err = rst_pageind(ctx);
+		break;
+#endif
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+	case CPT_ITER:
+		err = rst_iteration(ctx);
+		break;
+#endif
+	case CPT_SET_LOCKFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->lockfile)
+			fput(ctx->lockfile);
+		ctx->lockfile = dfile;
+		break;
+	case CPT_SET_STATUSFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->statusfile)
+			fput(ctx->statusfile);
+		ctx->statusfile = dfile;
+		break;
+	case CPT_SET_ERRORFD:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->errorfile)
+			fput(ctx->errorfile);
+		ctx->errorfile = dfile;
+		break;
+	case CPT_HARDLNK_ON:
+		ctx->hardlinked_on = 1;
+		break;
+	case CPT_SET_VEID:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->ve_id = arg;
+		break;
+	case CPT_UNDUMP:
+		if (ctx->ctx_state > 0) {
+			err = -ENOENT;
+			break;
+		}
+		ctx->ctx_state = CPT_CTX_UNDUMPING;
+#ifdef ITER_DEBUG
+		rst_iteration(ctx);
+#endif
+		err = vps_rst_undump(ctx);
+		if (err) {
+			rst_report_error(err, ctx);
+			if (rst_kill(ctx) == 0)
+				ctx->ctx_state = CPT_CTX_IDLE;
+		} else {
+			ctx->ctx_state = CPT_CTX_UNDUMPED;
+		}
+		break;
+	case CPT_RESUME:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		err = rst_resume(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_KILL:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		err = rst_kill(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+out:
+	cpt_flush_error(ctx);
+	up(&ctx->main_sem);
+out_nosem:
+	cpt_context_put(ctx);
+out_lock:
+	lock_kernel();
+	if (err == -ERESTARTSYS || err == -ERESTARTNOINTR ||
+	    err == -ERESTARTNOHAND || err == -ERESTART_RESTARTBLOCK)
+		err = -EINTR;
+	return err;
+}
+
+static int rst_open(struct inode * inode, struct file * file)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int rst_release(struct inode * inode, struct file * file)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	file->private_data = NULL;
+	if (ctx)
+		__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+
+
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+static struct file_operations rst_fops =
+{
+	.owner		= THIS_MODULE,
+	.ioctl		= rst_ioctl,
+	.open		= rst_open,
+	.release	= rst_release,
+};
+
+
+static struct proc_dir_entry *proc_ent;
+extern void *schedule_tail_p;
+extern void schedule_tail_hook(void);
+extern struct ctl_table delayfs_table[];
+
+static struct ctl_table_header *ctl_header;
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 9476,
+		.procname	= "rst",
+		.data		= &debug_level,
+		.maxlen		= sizeof(debug_level),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= 9477,
+		.procname	= "delayfs",
+		.mode		= 0555,
+		.child		= delayfs_table,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static int __init init_rst(void)
+{
+	int err;
+
+	err = register_filesystem(&delayfs_type);
+	if (err)
+		goto err_fs;
+
+	err = -ENOMEM;
+	ctl_header = register_sysctl_table(root_table, 0);
+	if (!ctl_header)
+		goto err_mon;
+
+	spin_lock_init(&cpt_context_lock);
+	INIT_LIST_HEAD(&cpt_context_list);
+
+	err = -EINVAL;
+	proc_ent = create_proc_entry_mod("rst", 0600, NULL, THIS_MODULE);
+	if (!proc_ent)
+		goto err_out;
+
+	rst_fops.read = proc_ent->proc_fops->read;
+	rst_fops.write = proc_ent->proc_fops->write;
+	rst_fops.llseek = proc_ent->proc_fops->llseek;
+	proc_ent->proc_fops = &rst_fops;
+
+	proc_ent->read_proc = proc_read;
+	proc_ent->data = NULL;
+	proc_ent->owner = THIS_MODULE;
+	return 0;
+
+err_out:
+	unregister_sysctl_table(ctl_header);
+err_mon:
+	unregister_filesystem(&delayfs_type);
+err_fs:
+	return err;
+}
+module_init(init_rst);
+
+static void __exit exit_rst(void)
+{
+	remove_proc_entry("rst", NULL);
+	unregister_sysctl_table(ctl_header);
+
+	spin_lock(&cpt_context_lock);
+	while (!list_empty(&cpt_context_list)) {
+		cpt_context_t *ctx;
+		ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
+
+		if (!ctx->sticky)
+			ctx->refcount++;
+		ctx->sticky = 0;
+
+		BUG_ON(ctx->refcount != 1);
+
+		__cpt_context_put(ctx);
+	}
+	spin_unlock(&cpt_context_lock);
+	unregister_filesystem(&delayfs_type);
+}
+module_exit(exit_rst);
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_process.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_process.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_process.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_process.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,1964 @@
+/*
+ *
+ *  kernel/cpt/rst_process.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <linux/kmem_cache.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/ptrace.h>
+#ifdef CONFIG_UTRACE
+#include <linux/utrace.h>
+#endif
+#include <linux/tty.h>
+#ifdef CONFIG_X86
+#include <asm/desc.h>
+#endif
+#include <asm/unistd.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_misc.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_ubc.h"
+#include "cpt_process.h"
+#include "cpt_kernel.h"
+
+
+#define HOOK_RESERVE	256
+
+struct resume_info
+{
+	asmlinkage void (*hook)(struct resume_info *);
+	unsigned long	hooks;
+#define HOOK_TID	0
+#define HOOK_CONT	1
+#define HOOK_TRACE	2
+#define HOOK_RESTART	3
+	unsigned long	tid_ptrs[2];
+#ifdef CONFIG_UTRACE
+	struct utrace_signal usignal;
+#endif
+	siginfo_t	last_siginfo;
+};
+
+#ifdef CONFIG_X86_32
+
+#define IN_SYSCALL(regs)	((long)(regs)->orig_eax >= 0)
+#define IN_ERROR(regs)		((long)(regs)->eax < 0)
+#define SYSCALL_ERRNO(regs)	(-(long)((regs)->eax))
+#define SYSCALL_RETVAL(regs)	((regs)->eax)
+#define SYSCALL_NR(regs)	((regs)->orig_eax)
+
+#define SYSCALL_SETRET(regs,val)	do { (regs)->eax = (val); } while (0)
+
+#define SYSCALL_RESTART2(regs,new)	do { (regs)->eax = (new); \
+					     (regs)->eip -= 2; } while (0) 
+
+#define syscall_is(tsk,regs,name)	(SYSCALL_NR(regs) == __NR_##name)
+
+/* In new kernels task_pt_regs() is define to something inappropriate */
+#undef task_pt_regs
+#define task_pt_regs(t) ((struct pt_regs *)((t)->thread.esp0) - 1)
+
+#elif defined(CONFIG_X86_64)
+
+#define IN_SYSCALL(regs)	((long)(regs)->orig_rax >= 0)
+#define IN_ERROR(regs)		((long)(regs)->rax < 0)
+#define SYSCALL_ERRNO(regs)	(-(long)((regs)->rax))
+#define SYSCALL_RETVAL(regs)	((regs)->rax)
+#define SYSCALL_NR(regs)	((regs)->orig_rax)
+
+#define SYSCALL_SETRET(regs,val)	do { (regs)->rax = (val); } while (0)
+
+#define SYSCALL_RESTART2(regs,new)	do { (regs)->rax = (new); \
+					     (regs)->rip -= 2; } while (0) 
+
+#define __NR32_restart_syscall	0
+#define __NR32_rt_sigtimedwait	177
+#define __NR32_pause		29
+#define __NR32_futex		240
+
+#define syscall_is(tsk,regs,name) ((!((tsk)->thread_info->flags&_TIF_IA32) && \
+				    SYSCALL_NR(regs) == __NR_##name) || \
+				   (((tsk)->thread_info->flags&_TIF_IA32) && \
+				    SYSCALL_NR(regs) == __NR32_##name))
+
+#elif defined (CONFIG_IA64)
+
+#define IN_SYSCALL(regs)	((long)(regs)->cr_ifs >= 0)
+#define IN_ERROR(regs)		((long)(regs)->r10 == -1)
+#define SYSCALL_ERRNO(regs)	((regs)->r10 == -1 ? (long)((regs)->r8) : 0)
+#define SYSCALL_RETVAL(regs)	((regs)->r8)
+#define SYSCALL_NR(regs)	((regs)->cr_ifs >= 0 ? (regs)->r15 : -1)
+
+#define SYSCALL_SETRET(regs,val)	do { (regs)->r8 = (val); } while (0)
+
+#define SYSCALL_RESTART2(regs,new)	do { (regs)->r15 = (new); \
+					     (regs)->r10 = 0; \
+					     ia64_decrement_ip(regs); } while (0) 
+
+#define syscall_is(tsk,regs,name)	(SYSCALL_NR(regs) == __NR_##name)
+
+#else
+
+#error This arch is not supported
+
+#endif
+
+#define SYSCALL_RESTART(regs) SYSCALL_RESTART2(regs, SYSCALL_NR(regs))
+
+
+static void decode_siginfo(siginfo_t *info, struct cpt_siginfo_image *si)
+{
+	memset(info, 0, sizeof(*info));
+	switch(si->cpt_code & __SI_MASK) {
+	case __SI_TIMER:
+		info->si_tid = si->cpt_pid;
+		info->si_overrun = si->cpt_uid;
+		info->_sifields._timer._sigval.sival_ptr = cpt_ptr_import(si->cpt_sigval);
+		info->si_sys_private = si->cpt_utime;
+		break;
+	case __SI_POLL:
+		info->si_band = si->cpt_pid;
+		info->si_fd = si->cpt_uid;
+		break;
+	case __SI_FAULT:
+		info->si_addr = cpt_ptr_import(si->cpt_sigval);
+#ifdef __ARCH_SI_TRAPNO
+		info->si_trapno = si->cpt_pid;
+#endif
+		break;
+	case __SI_CHLD:
+		info->si_pid = si->cpt_pid;
+		info->si_uid = si->cpt_uid;
+		info->si_status = si->cpt_sigval;
+		info->si_stime = si->cpt_stime;
+		info->si_utime = si->cpt_utime;
+		break;
+	case __SI_KILL:
+	case __SI_RT:
+	case __SI_MESGQ:
+	default:
+		info->si_pid = si->cpt_pid;
+		info->si_uid = si->cpt_uid;
+		info->si_ptr = cpt_ptr_import(si->cpt_sigval);
+		break;
+	}
+	info->si_signo = si->cpt_signo;
+	info->si_errno = si->cpt_errno;
+	info->si_code = si->cpt_code;
+}
+
+static int restore_sigqueue(struct task_struct *tsk,
+			    struct sigpending *queue, unsigned long start,
+			    unsigned long end)
+{
+	while (start < end) {
+		struct cpt_siginfo_image *si = (struct cpt_siginfo_image *)start;
+		if (si->cpt_object == CPT_OBJ_SIGINFO) {
+			struct sigqueue *q = NULL;
+			struct user_struct *up;
+			up = alloc_uid(si->cpt_user);
+			if (!up)
+				return -ENOMEM;
+			q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
+			if (!q) {
+				free_uid(up);
+				return -ENOMEM;
+			}
+			if (ub_siginfo_charge(q, get_exec_ub())) {
+				kmem_cache_free(sigqueue_cachep, q);
+				free_uid(up);
+				return -ENOMEM;
+			}
+
+			INIT_LIST_HEAD(&q->list);
+			/* Preallocated elements (posix timers) are not
+			 * supported yet. It is safe to replace them with
+			 * a private one. */
+			q->flags = 0;
+			q->user = up;
+			atomic_inc(&q->user->sigpending);
+
+			decode_siginfo(&q->info, si);
+			list_add_tail(&q->list, &queue->list);
+		}
+		start += si->cpt_next;
+	}
+	return 0;
+}
+
+int rst_process_linkage(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+
+		if (tsk == NULL) {
+			eprintk_ctx("task %u(%s) is missing\n", ti->cpt_pid, ti->cpt_comm);
+			return -EINVAL;
+		}
+
+		if (virt_pgid(tsk) != ti->cpt_pgrp) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_pgrp)) < 0) {
+				eprintk_ctx("illegal PGRP " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			write_lock_irq(&tasklist_lock);
+			if (tsk->signal->pgrp != pid && find_pid(pid)) {
+				detach_pid(tsk, PIDTYPE_PGID);
+				tsk->signal->pgrp = pid;
+				if (thread_group_leader(tsk)) {
+					attach_pid(tsk, PIDTYPE_PGID, pid);
+					set_virt_pgid(tsk, ti->cpt_pgrp);
+				}
+			}
+			write_unlock_irq(&tasklist_lock);
+			if (tsk->signal->pgrp != pid) {
+				eprintk_ctx("cannot set PGRP " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+		}
+		if (virt_sid(tsk) != ti->cpt_session) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_session)) < 0) {
+				eprintk_ctx("illegal SID " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			write_lock_irq(&tasklist_lock);
+			if (tsk->signal->session != pid && find_pid(pid)) {
+				detach_pid(tsk, PIDTYPE_SID);
+				tsk->signal->session = pid;
+				if (thread_group_leader(tsk)) {
+					attach_pid(tsk, PIDTYPE_SID, pid);
+					set_virt_sid(tsk, ti->cpt_session);
+				}
+			}
+			write_unlock_irq(&tasklist_lock);
+			if (tsk->signal->session != pid) {
+				eprintk_ctx("cannot set SID " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+		}
+		if (ti->cpt_old_pgrp > 0 && tsk->signal->tty_old_pgrp == 0) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_old_pgrp)) < 0) {
+				eprintk_ctx("illegal OLD_PGRP " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			tsk->signal->tty_old_pgrp = pid;
+		}
+	}
+
+	return 0;
+}
+
+static int
+restore_one_signal_struct(struct cpt_task_image *ti, int *exiting, cpt_context_t *ctx)
+{
+	int err;
+	struct cpt_signal_image *si = cpt_get_buf(ctx);
+
+	current->signal->tty = NULL;
+
+	err = rst_get_object(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, si, ctx);
+	if (err) {
+		cpt_release_buf(ctx);
+		return err;
+	}
+
+	if (virt_pgid(current) != si->cpt_pgrp) {
+		int err;
+		struct pid *pid = NULL, *free = NULL;
+
+		if (si->cpt_pgrp_type == CPT_PGRP_ORPHAN) {
+			if (!is_virtual_pid(si->cpt_pgrp)) {
+				eprintk_ctx("external process group " CPT_FID, CPT_TID(current));
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			pid = alloc_pid();
+			if (pid == NULL) {
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			if ((err = alloc_vpid(pid, si->cpt_pgrp)) < 0) {
+				free_pid(pid);
+				pid = NULL;
+				if (err != -EEXIST) {
+					cpt_release_buf(ctx);
+					return err;
+				}
+			}
+			free = pid;
+		}
+
+		write_lock_irq(&tasklist_lock);
+		if (pid || (pid = find_vpid(si->cpt_pgrp)) != NULL) {
+			if (current->signal->pgrp != pid->nr) {
+				detach_pid(current, PIDTYPE_PGID);
+				current->signal->pgrp = pid->nr;
+				if (thread_group_leader(current)) {
+					attach_pid(current, PIDTYPE_PGID, pid->nr);
+					set_virt_pgid(current, si->cpt_pgrp);
+					free = NULL;
+				}
+			}
+		}
+		write_unlock_irq(&tasklist_lock);
+
+		if (free)
+			free_pid(free);
+	}
+
+	current->signal->tty_old_pgrp = 0;
+	if ((int)si->cpt_old_pgrp > 0) {
+		if (si->cpt_old_pgrp_type == CPT_PGRP_STRAY) {
+			current->signal->tty_old_pgrp = alloc_pidmap();
+			if (current->signal->tty_old_pgrp < 0) {
+				eprintk_ctx("failed to allocate stray tty_old_pgrp\n");
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			free_pidmap(current->signal->tty_old_pgrp);
+		} else {
+			current->signal->tty_old_pgrp = vpid_to_pid(si->cpt_old_pgrp);
+			if (current->signal->tty_old_pgrp < 0) {
+				dprintk_ctx("forward old tty PGID\n");
+				current->signal->tty_old_pgrp = 0;
+			}
+		}
+	}
+
+	if (virt_sid(current) != si->cpt_session) {
+		int err;
+		struct pid *pid = NULL, *free = NULL;
+
+		if (si->cpt_session_type == CPT_PGRP_ORPHAN) {
+			if (!is_virtual_pid(si->cpt_session)) {
+				eprintk_ctx("external process session " CPT_FID, CPT_TID(current));
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			pid = alloc_pid();
+			if (pid == NULL) {
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			if ((err = alloc_vpid(pid, si->cpt_session)) < 0) {
+				free_pid(pid);
+				pid = NULL;
+				if (err != -EEXIST) {
+					cpt_release_buf(ctx);
+					return err;
+				}
+			}
+			free = pid;
+		}
+		write_lock_irq(&tasklist_lock);
+		if (pid || (pid = find_vpid(si->cpt_session)) != NULL) {
+			if (current->signal->session != pid->nr) {
+				detach_pid(current, PIDTYPE_SID);
+				current->signal->session = pid->nr;
+				if (thread_group_leader(current)) {
+					attach_pid(current, PIDTYPE_SID, pid->nr);
+					set_virt_sid(current, si->cpt_session);
+					free = NULL;
+				}
+			}
+		}
+		write_unlock_irq(&tasklist_lock);
+
+		if (free)
+			free_pid(free);
+	}
+
+	cpt_sigset_import(&current->signal->shared_pending.signal, si->cpt_sigpending);
+	current->signal->leader = si->cpt_leader;
+	if (si->cpt_ctty != CPT_NULL) {
+		cpt_object_t *obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, si->cpt_ctty, ctx);
+		if (obj) {
+			struct tty_struct *tty = obj->o_obj;
+			if (tty->session == 0 || tty->session == current->signal->session) {
+				tty->session = current->signal->session;
+				current->signal->tty = tty;
+			} else {
+				wprintk_ctx("tty session mismatch\n");
+			}
+		}
+	}
+
+	if (si->cpt_curr_target)
+		current->signal->curr_target = find_task_by_pid_ve(si->cpt_curr_target);
+	current->signal->flags = 0;
+	*exiting = si->cpt_group_exit;
+	current->signal->group_exit_code = si->cpt_group_exit_code;
+	if (si->cpt_group_exit_task) {
+		current->signal->group_exit_task = find_task_by_pid_ve(si->cpt_group_exit_task);
+		if (current->signal->group_exit_task == NULL) {
+			eprintk_ctx("oops, group_exit_task=NULL, pid=%u\n", si->cpt_group_exit_task);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	current->signal->notify_count = si->cpt_notify_count;
+	current->signal->group_stop_count = si->cpt_group_stop_count;
+
+	if (si->cpt_next > si->cpt_hdrlen) {
+		char *buf = kmalloc(si->cpt_next - si->cpt_hdrlen, GFP_KERNEL);
+		if (buf == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		err = ctx->pread(buf, si->cpt_next - si->cpt_hdrlen, ctx,
+				 ti->cpt_signal + si->cpt_hdrlen);
+		if (err) {
+			kfree(buf);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		restore_sigqueue(current,
+				 &current->signal->shared_pending, (unsigned long)buf,
+				 (unsigned long)buf + si->cpt_next - si->cpt_hdrlen);
+		kfree(buf);
+	}
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int restore_one_sighand_struct(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_sighand_image si;
+	int i;
+	loff_t pos, endpos;
+	
+	err = rst_get_object(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, &si, ctx);
+	if (err)
+		return err;
+
+	for (i=0; i<_NSIG; i++) {
+		current->sighand->action[i].sa.sa_handler = SIG_DFL;
+#ifndef CONFIG_IA64
+		current->sighand->action[i].sa.sa_restorer = 0;
+#endif
+		current->sighand->action[i].sa.sa_flags = 0;
+		memset(&current->sighand->action[i].sa.sa_mask, 0, sizeof(sigset_t));
+	}
+
+	pos = ti->cpt_sighand + si.cpt_hdrlen;
+	endpos = ti->cpt_sighand + si.cpt_next;
+	while (pos < endpos) {
+		struct cpt_sighandler_image shi;
+
+		err = rst_get_object(CPT_OBJ_SIGHANDLER, pos, &shi, ctx);
+		if (err)
+			return err;
+		current->sighand->action[shi.cpt_signo].sa.sa_handler = (void*)(unsigned long)shi.cpt_handler;
+#ifndef CONFIG_IA64
+		current->sighand->action[shi.cpt_signo].sa.sa_restorer = (void*)(unsigned long)shi.cpt_restorer;
+#endif
+		current->sighand->action[shi.cpt_signo].sa.sa_flags = shi.cpt_flags;
+		cpt_sigset_import(&current->sighand->action[shi.cpt_signo].sa.sa_mask, shi.cpt_mask);
+		pos += shi.cpt_next;
+	}
+
+	return 0;
+}
+
+
+__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+	if (lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx))
+		flag |= CLONE_THREAD;
+	if (ti->cpt_sighand == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx))
+		flag |= CLONE_SIGHAND;
+	return flag;
+}
+
+int
+rst_signal_complete(struct cpt_task_image *ti, int * exiting, cpt_context_t *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	if (ti->cpt_signal == CPT_NULL || ti->cpt_sighand == CPT_NULL) {
+		return -EINVAL;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx);
+	if (obj) {
+		struct sighand_struct *sig = current->sighand;
+		if (obj->o_obj != sig) {
+			return -EINVAL;
+		}
+	} else {
+		obj = cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, current->sighand, ctx);
+		if (obj == NULL)
+			return -ENOMEM;
+		cpt_obj_setpos(obj, ti->cpt_sighand, ctx);
+		err = restore_one_sighand_struct(ti, ctx);
+		if (err)
+			return err;
+	}
+
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx);
+	if (obj) {
+		struct signal_struct *sig = current->signal;
+		if (obj->o_obj != sig) {
+			return -EINVAL;
+		}
+		if (current->signal) {
+			set_virt_pgid(current, pid_to_vpid(current->signal->pgrp));
+			set_virt_sid(current, pid_to_vpid(current->signal->session));
+		}
+	} else {
+		obj = cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, current->signal, ctx);
+		if (obj == NULL)
+			return -ENOMEM;
+		cpt_obj_setpos(obj, ti->cpt_signal, ctx);
+		err = restore_one_signal_struct(ti, exiting, ctx);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_X86
+static u32 decode_segment(u32 segid)
+{
+	if (segid == CPT_SEG_ZERO)
+		return 0;
+
+	/* TLS descriptors */
+	if (segid <= CPT_SEG_TLS3)
+		return ((GDT_ENTRY_TLS_MIN + segid-CPT_SEG_TLS1)<<3) + 3;
+
+	/* LDT descriptor, it is just an index to LDT array */
+	if (segid >= CPT_SEG_LDT)
+		return ((segid - CPT_SEG_LDT) << 3) | 7;
+
+	/* Check for one of standard descriptors */
+#ifdef CONFIG_X86_64
+	if (segid == CPT_SEG_USER32_DS)
+		return __USER32_DS;
+	if (segid == CPT_SEG_USER32_CS)
+		return __USER32_CS;
+	if (segid == CPT_SEG_USER64_DS)
+		return __USER_DS;
+	if (segid == CPT_SEG_USER64_CS)
+		return __USER_CS;
+#else
+	if (segid == CPT_SEG_USER32_DS)
+		return __USER_DS;
+	if (segid == CPT_SEG_USER32_CS)
+		return __USER_CS;
+#endif
+	wprintk("Invalid segment reg %d\n", segid);
+	return 0;
+}
+#endif
+
+#if defined (CONFIG_IA64)
+void ia64_decrement_ip (struct pt_regs *regs)
+{
+	unsigned long w0, ri = ia64_psr(regs)->ri - 1;
+
+	if (ia64_psr(regs)->ri == 0) {
+		regs->cr_iip -= 16;
+		ri = 2;
+		get_user(w0, (char __user *) regs->cr_iip + 0);
+		if (((w0 >> 1) & 0xf) == 2) {
+			/*
+			 * rfi'ing to slot 2 of an MLX bundle causes
+			 * an illegal operation fault.  We don't want
+			 * that to happen...
+			 */
+			ri = 1;
+		}
+	}
+	ia64_psr(regs)->ri = ri;
+}
+#endif
+
+static void rst_child_tid(unsigned long *child_tids)
+{
+	dprintk("rct: " CPT_FID "\n", CPT_TID(current));
+	current->clear_child_tid = (void*)child_tids[0];
+	current->set_child_tid = (void*)child_tids[1];
+}
+
+#ifndef CONFIG_UTRACE
+static void rst_finish_trace(struct resume_info *ri)
+{
+	int signr;
+	siginfo_t *info = current->last_siginfo;
+	struct pt_regs *regs = task_pt_regs(current);
+	struct k_sigaction *ka;
+	int ptrace_id;
+
+	dprintk("rst_finish_trace: " CPT_FID "\n", CPT_TID(current));
+
+	spin_lock_irq(&current->sighand->siglock);
+	current->last_siginfo = NULL;
+	recalc_sigpending();
+
+	ptrace_id = current->pn_state;
+	clear_pn_state(current);
+
+	switch (ptrace_id) {
+	case PN_STOP_TF:
+	case PN_STOP_TF_RT:
+		/* frame_*signal */
+		dprintk("SIGTRAP %u/%u(%s) %u/%u %u %ld %lu %lu\n",
+		       virt_pid(current), current->pid, current->comm,
+		       info->si_signo, info->si_code,
+		       current->exit_code, SYSCALL_NR(regs),
+		       current->ptrace, current->ptrace_message);
+		goto out;
+	case PN_STOP_ENTRY:
+	case PN_STOP_LEAVE:
+		/* do_syscall_trace */
+		spin_unlock_irq(&current->sighand->siglock);
+		dprintk("ptrace do_syscall_trace: %d %d\n", ptrace_id, current->exit_code);
+		if (current->exit_code) {
+			send_sig(current->exit_code, current, 1);
+			current->exit_code = 0;
+		}
+		if (IN_SYSCALL(regs)) {
+			if (ptrace_id == PN_STOP_ENTRY
+#ifdef CONFIG_X86
+			    && SYSCALL_ERRNO(regs) == ENOSYS
+#endif
+			    )
+				SYSCALL_RESTART(regs);
+			else if (IN_ERROR(regs) &&
+				 syscall_is(current, regs, rt_sigtimedwait) &&
+				 (SYSCALL_ERRNO(regs) == EAGAIN ||
+				  SYSCALL_ERRNO(regs) == EINTR))
+				SYSCALL_RESTART(regs);
+		}
+		return;
+	case PN_STOP_FORK:
+		/* fork */
+		SYSCALL_SETRET(regs, current->ptrace_message);
+		dprintk("ptrace fork returns pid %ld\n", SYSCALL_RETVAL(regs));
+		goto out;
+	case PN_STOP_VFORK:
+		/* after vfork */
+		SYSCALL_SETRET(regs, current->ptrace_message);
+		dprintk("ptrace after vfork returns pid %ld\n", SYSCALL_RETVAL(regs));
+		goto out;
+	case PN_STOP_SIGNAL:
+		/* normal case : dequeue signal */
+		break;
+	case PN_STOP_EXIT:
+		dprintk("ptrace exit caught\n");
+		current->ptrace &= ~PT_TRACE_EXIT;
+		spin_unlock_irq(&current->sighand->siglock);
+		module_put(THIS_MODULE);
+		complete_and_exit(NULL, current->ptrace_message);
+		BUG();
+	case PN_STOP_EXEC:
+		eprintk("ptrace after exec caught: must not happen\n");
+		BUG();
+	default:
+		eprintk("ptrace with unknown identity %d\n", ptrace_id);
+		BUG();
+	}
+
+	signr = current->exit_code;
+	if (signr == 0) {
+		dprintk("rst_finish_trace: canceled signal %d\n", info->si_signo);
+		goto out;
+	}
+	current->exit_code = 0;
+
+	if (signr != info->si_signo) {
+		info->si_signo = signr;
+		info->si_errno = 0;
+		info->si_code = SI_USER;
+		info->si_pid = virt_pid(current->parent);
+		info->si_uid = current->parent->uid;
+	}
+
+	/* If the (new) signal is now blocked, requeue it.  */
+	if (sigismember(&current->blocked, signr)) {
+		dprintk("going to requeue signal %d\n", signr);
+		goto out_resend_sig;
+	}
+
+	ka = &current->sighand->action[signr-1];
+	if (ka->sa.sa_handler == SIG_IGN) {
+		dprintk("going to resend signal %d (ignored)\n", signr);
+		goto out;
+	}
+	if (ka->sa.sa_handler != SIG_DFL) {
+		dprintk("going to resend signal %d (not SIG_DFL)\n", signr);
+		goto out_resend_sig;
+	}
+        if (signr == SIGCONT ||
+	    signr == SIGCHLD ||
+	    signr == SIGWINCH ||
+	    signr == SIGURG ||
+	    current->pid == 1)
+		goto out;
+
+	/* All the rest, which we cannot handle are requeued. */
+	dprintk("going to resend signal %d (sigh)\n", signr);
+out_resend_sig:
+	spin_unlock_irq(&current->sighand->siglock);
+	send_sig_info(signr, info, current);
+	return;
+
+out:
+	spin_unlock_irq(&current->sighand->siglock);
+}
+#else
+
+static struct ptrace_state * find_tracer(void)
+{
+	struct utrace *utrace;
+	struct ptrace_state *tracer;
+	extern struct utrace_engine_ops ptrace_utrace_ops;
+
+	tracer = NULL;
+
+	rcu_read_lock();
+	utrace = rcu_dereference(current->utrace);
+	smp_rmb();
+
+	if (utrace) {
+#ifdef CONFIG_PTRACE
+		struct utrace_attached_engine *engine;
+
+		spin_lock(&utrace->lock);
+		list_for_each_entry_rcu(engine, &utrace->engines, entry) {
+			struct ptrace_state *state = (struct ptrace_state *) engine->data;
+
+			if (engine->ops == &ptrace_utrace_ops) {
+				/* We cannot have two ptracers, this
+				 * should be BUG_ON() */
+				WARN_ON(tracer);
+				tracer = state;
+			}
+		}
+		spin_unlock(&utrace->lock);
+#endif
+	}
+	rcu_read_unlock();
+	return tracer;
+}
+
+/*
+ * This is the most tricky and not obvious part of restore.
+ *
+ * All the processes are restored in a context looking like
+ * exit from some syscall or interrupt. But there are a few contexts,
+ * where original process was blocked in the bowels of kernel.
+ * We have one simple place (do_signal_stop), handled by rst_finish_stopped()
+ * hook and one hairy place with processes stopped by ptrace.
+ *
+ * Essentially, we must do the same work which normally done upon
+ * wakeup, i.e. in the path utrace_quiescent() -> utrace_get_signal() -> 
+ *  get_signal_to_deliver(). When we cannot do everything, we do some
+ * actions resulting in the same result from user's viewpoint and
+ * not hurting kernel intimate parts.
+ */
+
+static void rst_finish_trace(struct resume_info *ri)
+{
+	int signr;
+	siginfo_t *info = &ri->last_siginfo;
+	struct pt_regs *regs = task_pt_regs(current);
+	struct k_sigaction *ka;
+	int ptrace_id;
+	struct ptrace_state *tracer;
+	struct utrace_signal *usig = NULL;
+
+	dprintk("rst_finish_trace: " CPT_FID "\n", CPT_TID(current));
+
+	ptrace_id = current->pn_state;
+	clear_pn_state(current);
+
+	if (current->utrace &&
+	    current->utrace->u.live.signal == &ri->usignal) {
+		usig = &ri->usignal;
+		current->utrace->u.live.signal = NULL;
+	}
+
+	tracer = find_tracer();
+	if (tracer == NULL)
+		return;
+
+	spin_lock_irq(&current->sighand->siglock);
+	tracer->u.siginfo = NULL;
+	recalc_sigpending();
+
+	switch (ptrace_id) {
+	case PN_STOP_TF:
+	case PN_STOP_TF_RT:
+		/* frame_*signal */
+		dprintk("SIGTRAP %u/%u(%s) %u/%u %u %ld\n",
+		       virt_pid(current), current->pid, current->comm,
+		       info->si_signo, info->si_code,
+		       current->exit_code, SYSCALL_NR(regs));
+		goto out;
+	case PN_STOP_ENTRY:
+	case PN_STOP_LEAVE:
+		/* do_syscall_trace */
+		spin_unlock_irq(&current->sighand->siglock);
+		dprintk("ptrace do_syscall_trace: %d %d\n", ptrace_id, current->exit_code);
+		if (current->exit_code) {
+			send_sig(current->exit_code, current, 1);
+			current->exit_code = 0;
+		}
+		if (IN_SYSCALL(regs)) {
+			if (ptrace_id == PN_STOP_ENTRY
+#ifdef CONFIG_X86
+			    && SYSCALL_ERRNO(regs) == ENOSYS
+#endif
+			    )
+				SYSCALL_RESTART(regs);
+			else if (IN_ERROR(regs) &&
+				 syscall_is(current, regs, rt_sigtimedwait) &&
+				 (SYSCALL_ERRNO(regs) == EAGAIN ||
+				  SYSCALL_ERRNO(regs) == EINTR))
+				SYSCALL_RESTART(regs);
+		}
+		return;
+	case PN_STOP_FORK:
+		/* fork */
+		SYSCALL_SETRET(regs, tracer->u.eventmsg);
+		dprintk("ptrace fork returns pid %ld\n", SYSCALL_RETVAL(regs));
+		goto out;
+	case PN_STOP_VFORK:
+		/* after vfork */
+		SYSCALL_SETRET(regs, tracer->u.eventmsg);
+		dprintk("ptrace after vfork returns pid %ld\n", SYSCALL_RETVAL(regs));
+		goto out;
+	case PN_STOP_SIGNAL:
+		/* normal case : dequeue signal */
+		break;
+	case PN_STOP_EXIT:
+		dprintk("ptrace exit caught\n");
+		//// current->ptrace &= ~PT_TRACE_EXIT;
+		spin_unlock_irq(&current->sighand->siglock);
+		module_put(THIS_MODULE);
+		complete_and_exit(NULL, tracer->u.eventmsg);
+		BUG();
+	case PN_STOP_EXEC:
+		eprintk("ptrace after exec caught: must not happen\n");
+		BUG();
+	default:
+		eprintk("ptrace with unknown identity %d\n", ptrace_id);
+		BUG();
+	}
+
+	signr = usig ? usig->signr : 0;
+	if (signr == 0) {
+		dprintk("rst_finish_trace: canceled signal %d\n", info->si_signo);
+		goto out;
+	}
+
+	if (signr != info->si_signo) {
+		info->si_signo = signr;
+		info->si_errno = 0;
+		info->si_code = SI_USER;
+		info->si_pid = virt_pid(tracer->parent);
+		info->si_uid = tracer->parent->uid;
+	}
+
+	/* If the (new) signal is now blocked, requeue it.  */
+	if (sigismember(&current->blocked, signr)) {
+		dprintk("going to requeue signal %d\n", signr);
+		goto out_resend_sig;
+	}
+
+	ka = &current->sighand->action[signr-1];
+	if (ka->sa.sa_handler == SIG_IGN) {
+		dprintk("going to resend signal %d (ignored)\n", signr);
+		goto out;
+	}
+	if (ka->sa.sa_handler != SIG_DFL) {
+		dprintk("going to resend signal %d (not SIG_DFL)\n", signr);
+		goto out_resend_sig;
+	}
+        if (signr == SIGCONT ||
+	    signr == SIGCHLD ||
+	    signr == SIGWINCH ||
+	    signr == SIGURG ||
+	    current->pid == 1)
+		goto out;
+
+	/* All the rest, which we cannot handle are requeued. */
+	dprintk("going to resend signal %d (sigh)\n", signr);
+out_resend_sig:
+	spin_unlock_irq(&current->sighand->siglock);
+	send_sig_info(signr, info, current);
+	return;
+
+out:
+	spin_unlock_irq(&current->sighand->siglock);
+}
+#endif
+
+static void rst_finish_stop(void)
+{
+	/* ...
+	 * do_signal() ->
+	 *   get_signal_to_deliver() ->
+	 *     do_signal_stop() ->
+	 *       finish_stop()
+	 *
+	 * Normally after SIGCONT it will dequeue the next signal. If no signal
+	 * is found, do_signal restarts syscall unconditionally.
+	 * Otherwise signal handler is pushed on user stack.
+	 */
+
+	dprintk("rfs: " CPT_FID "\n", CPT_TID(current));
+
+	clear_stop_state(current);
+	current->exit_code = 0;
+}
+
+static void rst_restart_sys(void)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+
+	/* This hook is supposed to be executed, when we have
+	 * to complete some interrupted syscall.
+	 */
+	dprintk("rrs: " CPT_FID "\n", CPT_TID(current));
+
+	if (!IN_SYSCALL(regs) || !IN_ERROR(regs))
+		return;
+
+#ifdef __NR_pause
+	if (syscall_is(current,regs,pause)) {
+		if (SYSCALL_ERRNO(regs) == ERESTARTNOHAND) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule();
+		}
+	} else
+#else
+	/* On this arch pause() is simulated with sigsuspend(). */
+	if (syscall_is(current,regs,rt_sigsuspend)) {
+		if (SYSCALL_ERRNO(regs) == ERESTARTNOHAND) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule();
+		}
+	} else
+#endif
+	if (syscall_is(current,regs,rt_sigtimedwait)) {
+		if (SYSCALL_ERRNO(regs) == EAGAIN ||
+		    SYSCALL_ERRNO(regs) == EINTR) {
+			SYSCALL_RESTART(regs);
+		}
+	} else if (syscall_is(current,regs,futex)) {
+		if (SYSCALL_ERRNO(regs) == EINTR &&
+		    !signal_pending(current)) {
+			SYSCALL_RESTART(regs);
+		}
+	}
+
+	if (!signal_pending(current) &&
+	    !test_thread_flag(TIF_RESTORE_SIGMASK)) {
+		if (SYSCALL_ERRNO(regs) == ERESTARTSYS ||
+		    SYSCALL_ERRNO(regs) == ERESTARTNOINTR ||
+		    SYSCALL_ERRNO(regs) == ERESTARTNOHAND) {
+			SYSCALL_RESTART(regs);
+		} else if (SYSCALL_ERRNO(regs) == ERESTART_RESTARTBLOCK) {
+			int new = __NR_restart_syscall;
+#ifdef CONFIG_X86_64
+			if (current->thread_info->flags&_TIF_IA32)
+				new = __NR32_restart_syscall;
+#endif
+			SYSCALL_RESTART2(regs, new);
+		}
+	}
+}
+
+#ifdef CONFIG_X86_32
+
+static int restore_registers(struct task_struct *tsk, struct pt_regs *regs,
+			     struct cpt_task_image *ti, struct cpt_x86_regs *b,
+			     struct resume_info **rip)
+{
+	extern char i386_ret_from_resume;
+
+	if (b->cpt_object != CPT_OBJ_X86_REGS)
+		return -EINVAL;
+
+	tsk->thread.esp = (unsigned long) regs;
+	tsk->thread.esp0 = (unsigned long) (regs+1);
+	tsk->thread.eip = (unsigned long) &i386_ret_from_resume;
+
+	tsk->thread.fs = decode_segment(b->cpt_fs);
+	tsk->thread.gs = decode_segment(b->cpt_gs);
+	tsk->thread.debugreg[0] = b->cpt_debugreg[0];
+	tsk->thread.debugreg[1] = b->cpt_debugreg[1];
+	tsk->thread.debugreg[2] = b->cpt_debugreg[2];
+	tsk->thread.debugreg[3] = b->cpt_debugreg[3];
+	tsk->thread.debugreg[4] = b->cpt_debugreg[4];
+	tsk->thread.debugreg[5] = b->cpt_debugreg[5];
+	tsk->thread.debugreg[6] = b->cpt_debugreg[6];
+	tsk->thread.debugreg[7] = b->cpt_debugreg[7];
+
+	memcpy(regs, &b->cpt_ebx, sizeof(struct pt_regs));
+
+	regs->xcs = decode_segment(b->cpt_xcs);
+	regs->xss = decode_segment(b->cpt_xss);
+	regs->xds = decode_segment(b->cpt_xds);
+	regs->xes = decode_segment(b->cpt_xes);
+
+	tsk->thread.esp -= HOOK_RESERVE;
+	memset((void*)tsk->thread.esp, 0, HOOK_RESERVE);
+	*rip = (void*)tsk->thread.esp;
+
+	return 0;
+}
+
+#elif defined(CONFIG_X86_64)
+
+static void xlate_ptregs_32_to_64(struct pt_regs *d, struct cpt_x86_regs *s)
+{
+	memset(d, 0, sizeof(struct pt_regs));
+	d->rbp = s->cpt_ebp;
+	d->rbx = s->cpt_ebx;
+	d->rax = (s32)s->cpt_eax;
+	d->rcx = s->cpt_ecx;
+	d->rdx = s->cpt_edx;
+	d->rsi = s->cpt_esi;
+	d->rdi = s->cpt_edi;
+	d->orig_rax = (s32)s->cpt_orig_eax;
+	d->rip = s->cpt_eip;
+	d->cs = s->cpt_xcs;
+	d->eflags = s->cpt_eflags;
+	d->rsp = s->cpt_esp;
+	d->ss = s->cpt_xss;
+}
+
+static int restore_registers(struct task_struct *tsk, struct pt_regs *regs,
+			     struct cpt_task_image *ti, struct cpt_obj_bits *hdr,
+			     struct resume_info **rip)
+{
+	if (hdr->cpt_object == CPT_OBJ_X86_64_REGS) {
+		struct cpt_x86_64_regs *b = (void*)hdr;
+
+		tsk->thread.rsp = (unsigned long) regs;
+		tsk->thread.rsp0 = (unsigned long) (regs+1);
+
+		tsk->thread.fs = b->cpt_fsbase;
+		tsk->thread.gs = b->cpt_gsbase;
+		tsk->thread.fsindex = decode_segment(b->cpt_fsindex);
+		tsk->thread.gsindex = decode_segment(b->cpt_gsindex);
+		tsk->thread.ds = decode_segment(b->cpt_ds);
+		tsk->thread.es = decode_segment(b->cpt_es);
+		tsk->thread.debugreg0 = b->cpt_debugreg[0];
+		tsk->thread.debugreg1 = b->cpt_debugreg[1];
+		tsk->thread.debugreg2 = b->cpt_debugreg[2];
+		tsk->thread.debugreg3 = b->cpt_debugreg[3];
+		tsk->thread.debugreg6 = b->cpt_debugreg[6];
+		tsk->thread.debugreg7 = b->cpt_debugreg[7];
+
+		memcpy(regs, &b->cpt_r15, sizeof(struct pt_regs));
+
+		tsk->thread.userrsp = regs->rsp;
+		regs->cs = decode_segment(b->cpt_cs);
+		regs->ss = decode_segment(b->cpt_ss);
+	} else if (hdr->cpt_object == CPT_OBJ_X86_REGS) {
+		struct cpt_x86_regs *b = (void*)hdr;
+
+		tsk->thread.rsp = (unsigned long) regs;
+		tsk->thread.rsp0 = (unsigned long) (regs+1);
+
+		tsk->thread.fs = 0;
+		tsk->thread.gs = 0;
+		tsk->thread.fsindex = decode_segment(b->cpt_fs);
+		tsk->thread.gsindex = decode_segment(b->cpt_gs);
+		tsk->thread.debugreg0 = b->cpt_debugreg[0];
+		tsk->thread.debugreg1 = b->cpt_debugreg[1];
+		tsk->thread.debugreg2 = b->cpt_debugreg[2];
+		tsk->thread.debugreg3 = b->cpt_debugreg[3];
+		tsk->thread.debugreg6 = b->cpt_debugreg[6];
+		tsk->thread.debugreg7 = b->cpt_debugreg[7];
+
+		xlate_ptregs_32_to_64(regs, b);
+
+		tsk->thread.userrsp = regs->rsp;
+		regs->cs = decode_segment(b->cpt_xcs);
+		regs->ss = decode_segment(b->cpt_xss);
+		tsk->thread.ds = decode_segment(b->cpt_xds);
+		tsk->thread.es = decode_segment(b->cpt_xes);
+	} else {
+		return -EINVAL;
+	}
+
+	tsk->thread.rsp -= HOOK_RESERVE;
+	memset((void*)tsk->thread.rsp, 0, HOOK_RESERVE);
+	*rip = (void*)tsk->thread.rsp;
+	return 0;
+}
+
+#elif defined(CONFIG_IA64)
+
+#define MASK(nbits)	((1UL << (nbits)) - 1)	/* mask with NBITS bits set */
+
+#define PUT_BITS(first, last, nat)					\
+	({								\
+		unsigned long bit = ia64_unat_pos(&pt->r##first);	\
+		unsigned long nbits = (last - first + 1);		\
+		unsigned long mask = MASK(nbits) << first;		\
+		long dist;						\
+		if (bit < first)					\
+			dist = 64 + bit - first;			\
+		else							\
+			dist = bit - first;				\
+		ia64_rotl(nat & mask, dist);				\
+	})
+
+unsigned long
+ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat)
+{
+	unsigned long scratch_unat;
+
+	/*
+	 * Registers that are stored consecutively in struct pt_regs
+	 * can be handled in parallel.  If the register order in
+	 * struct_pt_regs changes, this code MUST be updated.
+	 */
+	scratch_unat  = PUT_BITS( 1,  1, nat);
+	scratch_unat |= PUT_BITS( 2,  3, nat);
+	scratch_unat |= PUT_BITS(12, 13, nat);
+	scratch_unat |= PUT_BITS(14, 14, nat);
+	scratch_unat |= PUT_BITS(15, 15, nat);
+	scratch_unat |= PUT_BITS( 8, 11, nat);
+	scratch_unat |= PUT_BITS(16, 31, nat);
+
+	return scratch_unat;
+
+}
+
+static unsigned long
+ia64_put_saved_nat_bits (struct switch_stack *pt, unsigned long nat)
+{
+	unsigned long scratch_unat;
+
+	scratch_unat  = PUT_BITS( 4,  7, nat);
+
+	return scratch_unat;
+
+}
+
+#undef PUT_BITS
+
+
+static int restore_registers(struct task_struct *tsk, struct pt_regs *pt,
+			     struct cpt_task_image *ti,
+			     struct cpt_ia64_regs *r,
+			     struct resume_info **rip)
+{
+	extern char ia64_ret_from_resume;
+	struct switch_stack *sw;
+	struct resume_info *ri;
+	struct ia64_psr *psr = ia64_psr(pt);
+	void *krbs = (void *)tsk + IA64_RBS_OFFSET;
+	unsigned long reg;
+
+	if (r->cpt_object != CPT_OBJ_IA64_REGS)
+		return -EINVAL;
+
+	if (r->num_regs > 96) {
+		eprintk(CPT_FID " too much RSE regs %lu\n",
+			CPT_TID(tsk), r->num_regs);
+		return -EINVAL;
+	}
+
+	*rip = ri = ((void*)pt) - HOOK_RESERVE;
+	sw = ((struct switch_stack *) ri) - 1;
+
+	memmove(sw, (void*)tsk->thread.ksp + 16, sizeof(struct switch_stack));
+	memset(ri, 0, HOOK_RESERVE);
+
+	/* gr 1,2-3,8-11,12-13,14,15,16-31 are on pt_regs */
+	memcpy(&pt->r1,  &r->gr[1],  8*(2-1));
+	memcpy(&pt->r2,  &r->gr[2],  8*(4-2));
+	memcpy(&pt->r8,  &r->gr[8],  8*(12-8));
+	memcpy(&pt->r12, &r->gr[12], 8*(14-12));
+	memcpy(&pt->r14, &r->gr[14], 8*(15-14));
+	memcpy(&pt->r15, &r->gr[15], 8*(16-15));
+	memcpy(&pt->r16, &r->gr[16], 8*(32-16));
+
+	pt->b0 = r->br[0];
+	pt->b6 = r->br[6];
+	pt->b7 = r->br[7];
+
+	pt->ar_bspstore	= r->ar_bspstore;
+	pt->ar_unat	= r->ar_unat;
+	pt->ar_pfs	= r->ar_pfs;
+	pt->ar_ccv	= r->ar_ccv;
+	pt->ar_fpsr	= r->ar_fpsr;
+	pt->ar_csd	= r->ar_csd;
+	pt->ar_ssd	= r->ar_ssd;
+	pt->ar_rsc	= r->ar_rsc;
+
+	pt->cr_iip	= r->cr_iip;
+	pt->cr_ipsr	= r->cr_ipsr;
+
+	pt->pr = r->pr;
+
+	pt->cr_ifs = r->cfm;
+
+	/* fpregs 6..9,10..11 are in pt_regs */
+	memcpy(&pt->f6,  &r->fr[2*6],  16*(10-6));
+	memcpy(&pt->f10, &r->fr[2*10], 16*(12-10));
+	/* fpreg 12..15 are on switch stack */
+	memcpy(&sw->f12, &r->fr[2*12], 16*(16-12));
+	/* fpregs 32...127 */
+	tsk->thread.flags |= IA64_THREAD_FPH_VALID;
+	memcpy(tsk->thread.fph, &r->fr[32*2], 16*(128-32));
+	ia64_drop_fpu(tsk);
+	psr->dfh = 1;
+
+	memcpy(&sw->r4, &r->gr[4], 8*(8-4));
+	memcpy(&sw->b1, &r->br[1], 8*(6-1));
+	sw->ar_lc = r->ar_lc;
+
+	memcpy(&sw->f2, &r->fr[2*2], 16*(6-2));
+	memcpy(&sw->f16, &r->fr[2*16], 16*(32-16));
+
+	sw->caller_unat = 0;
+	sw->ar_fpsr = pt->ar_fpsr;
+	sw->ar_unat = 0;
+	if (r->nat[0] & 0xFFFFFF0FUL)
+		sw->caller_unat = ia64_put_scratch_nat_bits(pt, r->nat[0]);
+	if (r->nat[0] & 0xF0)
+		sw->ar_unat = ia64_put_saved_nat_bits(sw, r->nat[0]);
+
+	sw->ar_bspstore = (unsigned long)ia64_rse_skip_regs(krbs, r->num_regs);
+	memset(krbs, 0, (void*)sw->ar_bspstore - krbs);
+	sw->ar_rnat = 0;
+	sw->ar_pfs = 0;
+
+	/* This is tricky. When we are in syscall, we have frame
+	 * of output register (sometimes, plus one input reg sometimes).
+	 * It is not so easy to restore such frame, RSE optimizes
+	 * and does not fetch those regs from backstore. So, we restore
+	 * the whole frame as local registers, and then repartition it
+	 * in ia64_ret_from_resume().
+	 */
+	if ((long)pt->cr_ifs >= 0) {
+		unsigned long out = (r->cfm&0x7F) - ((r->cfm>>7)&0x7F);
+		sw->ar_pfs = out | (out<<7);
+	}
+	if (r->ar_ec)
+		sw->ar_pfs |= (r->ar_ec & 0x3F) << 52;
+
+	for (reg = 0; reg < r->num_regs; reg++) {
+		unsigned long *ptr = ia64_rse_skip_regs(krbs, reg);
+		unsigned long *rnatp;
+		unsigned long set_rnat = 0;
+
+		*ptr = r->gr[32+reg];
+
+		if (reg < 32)
+			set_rnat = (r->nat[0] & (1UL<<(reg+32)));
+		else
+			set_rnat = (r->nat[1] & (1UL<<(reg-32)));
+
+		if (set_rnat) {
+			rnatp = ia64_rse_rnat_addr(ptr);
+			if ((unsigned long)rnatp >= sw->ar_bspstore)
+				rnatp = &sw->ar_rnat;
+			*rnatp |= (1UL<<ia64_rse_slot_num(ptr));
+		}
+	}
+	
+	sw->b0 = (unsigned long) &ia64_ret_from_resume;
+	tsk->thread.ksp = (unsigned long) sw - 16;
+
+#define PRED_LEAVE_SYSCALL	1 /* TRUE iff leave from syscall */
+#define PRED_KERNEL_STACK	2 /* returning to kernel-stacks? */
+#define PRED_USER_STACK		3 /* returning to user-stacks? */
+#define PRED_SYSCALL		4 /* inside a system call? */
+#define PRED_NON_SYSCALL	5 /* complement of PRED_SYSCALL */
+
+	pt->loadrs = r->loadrs;
+	sw->pr = 0;
+	sw->pr &= ~(1UL << PRED_LEAVE_SYSCALL);
+	sw->pr &= ~((1UL << PRED_SYSCALL) | (1UL << PRED_NON_SYSCALL));
+	sw->pr &= ~(1UL << PRED_KERNEL_STACK);
+	sw->pr |= (1UL << PRED_USER_STACK);
+	if ((long)pt->cr_ifs < 0) {
+		sw->pr |= (1UL << PRED_NON_SYSCALL);
+	} else {
+		sw->pr |= ((1UL << PRED_SYSCALL) | (1UL << PRED_LEAVE_SYSCALL));
+	}
+
+	return 0;
+}
+#endif
+
+asmlinkage void rst_resume_work(struct resume_info *ri)
+{
+	if (ri->hooks & (1<<HOOK_TID))
+		rst_child_tid(ri->tid_ptrs);
+	if (ri->hooks & (1<<HOOK_CONT))
+		rst_finish_stop();
+	if (ri->hooks & (1<<HOOK_TRACE))
+		rst_finish_trace(ri);
+	if (ri->hooks & (1<<HOOK_RESTART))
+		rst_restart_sys();
+	module_put(THIS_MODULE);
+}
+
+#ifdef CONFIG_UTRACE
+static int rst_ptrace(struct task_struct *tsk, struct cpt_task_image *ti,
+		      struct resume_info *ri, cpt_context_t *ctx)
+{
+#ifndef CONFIG_PTRACE
+	eprintk_ctx("ptrace is not configured in kernel\n");
+	return -EINVAL;
+#else
+	struct task_struct *parent = tsk->parent;
+	struct utrace_attached_engine *engine;
+	struct ptrace_state *state;
+	unsigned long flags;
+
+	if (ti->cpt_ppid != ti->cpt_rppid) {
+		struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
+		parent = find_task_by_pid_ve(ti->cpt_ppid);
+		set_exec_env(env);
+		if (parent == NULL) {
+			eprintk_ctx("cannot find ptracer for " CPT_FID "\n", CPT_TID(tsk));
+			return -ESRCH;
+		}
+	}
+
+	engine = utrace_attach(tsk, UTRACE_ATTACH_CREATE |
+			            UTRACE_ATTACH_EXCLUSIVE |
+			            UTRACE_ATTACH_MATCH_OPS,
+			       &ptrace_utrace_ops, 0);
+	if (IS_ERR(engine))
+		return PTR_ERR(engine);
+
+	state = kzalloc(sizeof(struct ptrace_state), GFP_KERNEL);
+	if (unlikely(state == NULL)) {
+		utrace_detach(tsk, engine);
+		return -ENOMEM;
+	}
+
+	state->engine = engine;
+	state->task = tsk;
+	state->parent = parent;
+	state->options = 0;
+	atomic_set(&state->refcnt, 1);
+#ifdef PTRACE_DEBUG
+	atomic_set(&state->check_dead, 1);
+#endif
+	if (ti->cpt_ptrace & CPT_PT_TRACESYSGOOD)
+		state->options |= PTRACE_O_TRACESYSGOOD;
+	if (ti->cpt_ptrace & CPT_PT_TRACE_FORK)
+		state->options |= PTRACE_O_TRACEFORK;
+	if (ti->cpt_ptrace & CPT_PT_TRACE_VFORK)
+		state->options |= PTRACE_O_TRACEVFORK;
+	if (ti->cpt_ptrace & CPT_PT_TRACE_CLONE)
+		state->options |= PTRACE_O_TRACECLONE;
+	if (ti->cpt_ptrace & CPT_PT_TRACE_EXEC)
+		state->options |= PTRACE_O_TRACEEXEC;
+	if (ti->cpt_ptrace & CPT_PT_TRACE_VFORK_DONE)
+		state->options |= PTRACE_O_TRACEVFORKDONE;
+	if (ti->cpt_ptrace & CPT_PT_TRACE_EXIT)
+		state->options |= PTRACE_O_TRACEEXIT;
+	state->cap_sys_ptrace = !!(ti->cpt_ptrace & CPT_PT_PTRACE_CAP);
+
+	if (ti->cpt_pn_state == PN_STOP_ENTRY ||
+	    ti->cpt_pn_state == PN_STOP_LEAVE)
+		state->syscall = 1;
+
+	task_lock(parent);
+	list_add_rcu(&state->entry, &state->parent->ptracees);
+	task_unlock(state->parent);
+
+	rcu_assign_pointer(engine->data, state);
+
+	if (ri->hooks & (1<<HOOK_TRACE)) {
+		state->u.siginfo = &ri->last_siginfo;
+		tsk->utrace->u.live.signal = &ri->usignal;
+		/* To avoid warning about changing "const" value ... */
+		*(struct siginfo**)&ri->usignal.info = &ri->last_siginfo;
+		ri->usignal.return_ka = NULL;
+		ri->usignal.signr = 0;
+	} else if (ti->cpt_ptrace_message) {
+		state->have_eventmsg = 1;
+		state->u.eventmsg = ti->cpt_ptrace_message;
+	}
+
+	flags = UTRACE_EVENT(DEATH) | UTRACE_EVENT(EXEC) |
+		UTRACE_EVENT_SIGNAL_ALL | UTRACE_EVENT(JCTL) |
+		UTRACE_EVENT(CLONE) | UTRACE_ACTION_NOREAP | UTRACE_EVENT(REAP);
+	if (state->options & PTRACE_O_TRACEEXIT)
+		flags |= UTRACE_EVENT(EXIT);
+	if (state->options & PTRACE_O_TRACEVFORKDONE)
+		flags |= UTRACE_EVENT(VFORK_DONE);
+	if (ti->cpt_thrflags & _TIF_SYSCALL_TRACE)
+		flags |= UTRACE_EVENT_SYSCALL;
+	if (ti->cpt_thrflags & _TIF_SINGLESTEP)
+		flags |= UTRACE_ACTION_SINGLESTEP;
+	if (ti->cpt_state == TASK_TRACED)
+		flags |= UTRACE_ACTION_QUIESCE;
+
+	engine->flags = tsk->utrace_flags = flags;
+	return 0;
+#endif
+}
+#endif
+
+static void rst_apply_mxcsr_mask(struct task_struct *tsk)
+{
+#ifdef CONFIG_X86_32
+	unsigned int flags;
+
+	flags = test_cpu_caps_and_features();
+
+	/* if cpu does not support sse2 mask 6 bit (DAZ flag) and 16-31 bits
+	   in MXCSR to avoid general protection fault */
+	if (!(flags & (1 << CPT_CPU_X86_SSE2)))
+		tsk->thread.i387.fxsave.mxcsr &= 0x0000ffbf;
+#endif
+}
+
+#define RLIM_INFINITY32		0xffffffff
+#define RLIM_INFINITY64		(~0ULL)
+
+#ifdef CONFIG_X86_64
+#define rst_rlim_32_to_64(a, i, t, im)					\
+do {									\
+	if (im->cpt_rlim_##a[i] == RLIM_INFINITY32)			\
+		t->signal->rlim[i].rlim_##a = RLIM_INFINITY64;		\
+	else								\
+		t->signal->rlim[i].rlim_##a = im->cpt_rlim_##a[i];	\
+} while (0)
+#elif defined(CONFIG_X86_32)
+#define rst_rlim_64_to_32(a, i, t, im)					\
+do {									\
+	if (im->cpt_rlim_##a[i] == RLIM_INFINITY64)			\
+		t->signal->rlim[i].rlim_##a = RLIM_INFINITY32;		\
+	else if (im->cpt_rlim_##a[i] > RLIM_INFINITY32) {		\
+		eprintk_ctx("rlimit %Lu is too high for 32-bit task, "	\
+			    "dump file is corrupted\n",			\
+			    im->cpt_rlim_##a[i]);			\
+		return -EINVAL;						\
+	} else								\
+		t->signal->rlim[i].rlim_##a = im->cpt_rlim_##a[i];	\
+} while (0)
+#endif
+
+int rst_restore_process(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+		struct pt_regs * regs;
+		struct cpt_object_hdr *b;
+		struct group_info *gids, *ogids;
+		struct resume_info *ri = NULL;
+		int i;
+		int err = 0;
+#ifdef CONFIG_USER_RESOURCE
+		struct task_beancounter *tbc;
+		struct user_beancounter *new_bc, *old_bc;
+#endif
+
+		if (tsk == NULL) {
+			eprintk_ctx("oops, task %d/%s is missing\n", ti->cpt_pid, ti->cpt_comm);
+			return -EFAULT;
+		}
+
+		wait_task_inactive(tsk);
+#ifdef CONFIG_USER_RESOURCE
+		tbc = &tsk->task_bc;
+		new_bc = rst_lookup_ubc(ti->cpt_exec_ub, ctx);
+		err = virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RSTTSK, new_bc);
+		if (err & NOTIFY_FAIL) {
+			put_beancounter(new_bc);
+			return -ECHRNG; 
+		}
+		old_bc = tbc->exec_ub;
+		if ((err & VIRTNOTIFY_CHANGE) && old_bc != new_bc) {
+			dprintk(" *** replacing ub %p by %p for %p (%d %s)\n",
+					old_bc, new_bc, tsk,
+					tsk->pid, tsk->comm);
+			tbc->exec_ub = new_bc;
+			new_bc = old_bc;
+		}
+		put_beancounter(new_bc);
+#endif
+		regs = task_pt_regs(tsk);
+
+		if (!tsk->exit_state) {
+			tsk->lock_depth = -1;
+#ifdef CONFIG_PREEMPT
+			tsk->thread_info->preempt_count--;
+#endif
+		}
+
+		if (tsk->static_prio != ti->cpt_static_prio)
+			set_user_nice(tsk, PRIO_TO_NICE((s32)ti->cpt_static_prio));
+
+		cpt_sigset_import(&tsk->blocked, ti->cpt_sigblocked);
+		cpt_sigset_import(&tsk->real_blocked, ti->cpt_sigrblocked);
+		cpt_sigset_import(&tsk->saved_sigmask, ti->cpt_sigsuspend_blocked);
+		cpt_sigset_import(&tsk->pending.signal, ti->cpt_sigpending);
+
+		tsk->uid = ti->cpt_uid;
+		tsk->euid = ti->cpt_euid;
+		tsk->suid = ti->cpt_suid;
+		tsk->fsuid = ti->cpt_fsuid;
+		tsk->gid = ti->cpt_gid;
+		tsk->egid = ti->cpt_egid;
+		tsk->sgid = ti->cpt_sgid;
+		tsk->fsgid = ti->cpt_fsgid;
+#ifdef CONFIG_IA64
+		SET_UNALIGN_CTL(tsk, ti->cpt_prctl_uac);
+		SET_FPEMU_CTL(tsk, ti->cpt_prctl_fpemu);
+#endif
+		memcpy(&tsk->cap_effective, &ti->cpt_ecap, sizeof(tsk->cap_effective));
+		memcpy(&tsk->cap_inheritable, &ti->cpt_icap, sizeof(tsk->cap_inheritable));
+		memcpy(&tsk->cap_permitted, &ti->cpt_pcap, sizeof(tsk->cap_permitted));
+		tsk->keep_capabilities = (ti->cpt_keepcap != 0);
+		tsk->did_exec = (ti->cpt_did_exec != 0);
+		gids = groups_alloc(ti->cpt_ngids);
+		ogids = tsk->group_info;
+		if (gids) {
+			int i;
+			for (i=0; i<32; i++)
+				gids->small_block[i] = ti->cpt_gids[i];
+			tsk->group_info = gids;
+		}
+		if (ogids)
+			put_group_info(ogids);
+		tsk->utime = ti->cpt_utime;
+		tsk->stime = ti->cpt_stime;
+		if (ctx->image_version == CPT_VERSION_8)
+			tsk->start_time = ns_to_timespec(ti->cpt_starttime*TICK_NSEC);
+		else
+			cpt_timespec_import(&tsk->start_time, ti->cpt_starttime);
+		set_normalized_timespec(&tsk->start_time,
+					tsk->start_time.tv_sec +
+					VE_TASK_INFO(tsk)->owner_env->start_timespec.tv_sec,
+					tsk->start_time.tv_nsec +
+					VE_TASK_INFO(tsk)->owner_env->start_timespec.tv_nsec);
+
+		tsk->nvcsw = ti->cpt_nvcsw;
+		tsk->nivcsw = ti->cpt_nivcsw;
+		tsk->min_flt = ti->cpt_min_flt;
+		tsk->maj_flt = ti->cpt_maj_flt;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
+		tsk->cutime = ti->cpt_cutime;
+		tsk->cstime = ti->cpt_cstime;
+		tsk->cnvcsw = ti->cpt_cnvcsw;
+		tsk->cnivcsw = ti->cpt_cnivcsw;
+		tsk->cmin_flt = ti->cpt_cmin_flt;
+		tsk->cmaj_flt = ti->cpt_cmaj_flt;
+
+		if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+			__asm__("undefined\n");
+
+		for (i=0; i<RLIM_NLIMITS; i++) {
+			tsk->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
+			tsk->rlim[i].rlim_max = ti->cpt_rlim_max[i];
+		}
+#else
+		if (thread_group_leader(tsk) && tsk->signal) {
+			tsk->signal->utime = ti->cpt_utime;
+			tsk->signal->stime = ti->cpt_stime;
+			tsk->signal->cutime = ti->cpt_cutime;
+			tsk->signal->cstime = ti->cpt_cstime;
+			tsk->signal->nvcsw = ti->cpt_nvcsw;
+			tsk->signal->nivcsw = ti->cpt_nivcsw;
+			tsk->signal->cnvcsw = ti->cpt_cnvcsw;
+			tsk->signal->cnivcsw = ti->cpt_cnivcsw;
+			tsk->signal->min_flt = ti->cpt_min_flt;
+			tsk->signal->maj_flt = ti->cpt_maj_flt;
+			tsk->signal->cmin_flt = ti->cpt_cmin_flt;
+			tsk->signal->cmaj_flt = ti->cpt_cmaj_flt;
+
+			if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+				__asm__("undefined\n");
+
+			for (i=0; i<RLIM_NLIMITS; i++) {
+#ifdef CONFIG_X86_64
+				if (ctx->image_arch == CPT_OS_ARCH_I386) {
+					rst_rlim_32_to_64(cur, i, tsk, ti);
+					rst_rlim_32_to_64(max, i, tsk, ti);
+				} else 
+#elif defined(CONFIG_X86_32)
+				if (ctx->image_arch == CPT_OS_ARCH_EMT64) {
+					rst_rlim_64_to_32(cur, i, tsk, ti);
+					rst_rlim_64_to_32(max, i, tsk, ti);
+				} else 
+#endif
+				{
+					tsk->signal->rlim[i].rlim_cur =
+						ti->cpt_rlim_cur[i];
+					tsk->signal->rlim[i].rlim_max =
+						ti->cpt_rlim_max[i];
+				}
+			}
+		}
+#endif
+
+#ifdef CONFIG_X86
+		for (i=0; i<3; i++) {
+			if (i >= GDT_ENTRY_TLS_ENTRIES) {
+				eprintk_ctx("too many tls descs\n");
+			} else {
+#ifndef CONFIG_X86_64
+				tsk->thread.tls_array[i].a = ti->cpt_tls[i]&0xFFFFFFFF;
+				tsk->thread.tls_array[i].b = ti->cpt_tls[i]>>32;
+#else
+				tsk->thread.tls_array[i] = ti->cpt_tls[i];
+#endif
+			}
+		}
+#endif
+
+		clear_stopped_child_used_math(tsk);
+
+		b = (void *)(ti+1);
+		while ((void*)b < ((void*)ti) + ti->cpt_next) {
+			/* Siginfo objects are at the end of obj array */
+			if (b->cpt_object == CPT_OBJ_SIGINFO) {
+				struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
+				restore_sigqueue(tsk, &tsk->pending, (unsigned long)b, (unsigned long)ti + ti->cpt_next);
+				set_exec_env(env);
+				break;
+			}
+
+			switch (b->cpt_object) {
+#ifdef CONFIG_X86
+			case CPT_OBJ_BITS:
+				if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE &&
+				    cpu_has_fxsr) {
+					memcpy(&tsk->thread.i387,
+					       (void*)b + b->cpt_hdrlen,
+					       sizeof(struct i387_fxsave_struct));
+					rst_apply_mxcsr_mask(tsk);
+					if (ti->cpt_used_math)
+						set_stopped_child_used_math(tsk);
+				}
+#ifndef CONFIG_X86_64
+				else if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE_OLD &&
+					 !cpu_has_fxsr) {		
+					memcpy(&tsk->thread.i387,
+					       (void*)b + b->cpt_hdrlen,
+					       sizeof(struct i387_fsave_struct));
+					if (ti->cpt_used_math)
+						set_stopped_child_used_math(tsk);
+				}
+#endif
+				break;
+#endif
+			case CPT_OBJ_LASTSIGINFO:
+				if (ri &&
+				    ti->cpt_state == TASK_TRACED &&
+				    ti->cpt_pn_state) {
+					decode_siginfo(&ri->last_siginfo, (void*)b);
+					ri->hooks |= (1<<HOOK_TRACE);
+#ifndef CONFIG_UTRACE
+					tsk->last_siginfo = &ri->last_siginfo;
+#endif
+				}
+				break;
+			case CPT_OBJ_X86_REGS:
+			case CPT_OBJ_X86_64_REGS:
+			case CPT_OBJ_IA64_REGS:
+				if (restore_registers(tsk, regs, ti, (void*)b, &ri)) {
+					eprintk_ctx("cannot restore registers: image is corrupted\n");
+					return -EINVAL;
+				}
+				break;
+			case CPT_OBJ_SIGALTSTACK: {
+				struct cpt_sigaltstack_image *sas;
+				sas = (struct cpt_sigaltstack_image *)b;
+				tsk->sas_ss_sp = sas->cpt_stack;
+				tsk->sas_ss_size = sas->cpt_stacksize;
+				break;
+			    }
+			case CPT_OBJ_TASK_AUX: {
+				struct cpt_task_aux_image *ai;
+				ai = (struct cpt_task_aux_image *)b;
+				tsk->robust_list = cpt_ptr_import(ai->cpt_robust_list);
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_COMPAT
+				if (tsk->thread_info->flags&_TIF_IA32) {
+					tsk->robust_list = (void __user *)NULL;
+					tsk->compat_robust_list = cpt_ptr_import(ai->cpt_robust_list);
+				}
+#endif
+#endif
+				break;
+			    }
+			}
+			b = ((void*)b) + b->cpt_next;
+		}
+
+		if (ri == NULL && !(ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+			eprintk_ctx("missing register info\n");
+			return -EINVAL;
+		}
+
+#ifndef CONFIG_UTRACE
+		if (ti->cpt_ppid != ti->cpt_rppid) {
+			struct task_struct *parent;
+			struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
+			write_lock_irq(&tasklist_lock);
+			parent = find_task_by_pid_ve(ti->cpt_ppid);
+			if (parent && parent != tsk->parent) {
+				list_add(&tsk->ptrace_list, &tsk->parent->ptrace_children);
+				remove_parent(tsk);
+				tsk->parent = parent;
+				add_parent(tsk);
+			}
+			write_unlock_irq(&tasklist_lock);
+			set_exec_env(env);
+		}
+
+		tsk->ptrace_message = ti->cpt_ptrace_message;
+		tsk->ptrace = ti->cpt_ptrace;
+#else
+		if (ti->cpt_ptrace & CPT_PT_PTRACED) {
+			err = rst_ptrace(tsk, ti, ri, ctx);
+			if (err)
+				return err;
+		}
+#endif
+		if (ti->cpt_state == TASK_TRACED)
+			tsk->pn_state = ti->cpt_pn_state;
+		if (ri && tsk->pn_state) {
+			/* ... -> ptrace_notify()
+			 * or
+			 * ... -> do_signal() -> get_signal_to_deliver() ->
+			 *   ptrace stop
+			 */
+			ri->hooks |= (1<<HOOK_TRACE);
+		}
+
+		tsk->stopped_state = ti->cpt_stopped_state;
+
+                /*
+		 * TIF_IA32 thread flag was restored early
+		 */
+		tsk->thread_info->flags &= _TIF_IA32;
+		tsk->thread_info->flags |= ti->cpt_thrflags;
+
+		/* The image was created with kernel < 2.6.16, while
+		 * task hanged in sigsuspend -> do_signal.
+		 *
+		 * FIXME! This needs more brain efforts...
+		 */
+		if (ti->cpt_sigsuspend_state) {
+			tsk->thread_info->flags |= _TIF_RESTORE_SIGMASK;
+		}
+
+#ifdef CONFIG_X86_64
+		tsk->thread_info->flags |= _TIF_FORK | _TIF_RESUME;
+#endif
+
+#ifdef CONFIG_X86_32
+		do {
+			if (regs->orig_eax == __NR__newselect && regs->edi) {
+				struct timeval tv;
+				if (access_process_vm(tsk, regs->edi, &tv, 
+						sizeof(tv), 0) != sizeof(tv)) {
+					wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm: edi %ld\n",
+						virt_pid(tsk), tsk->pid, tsk->comm,
+					       regs->edi);
+					break;
+				}
+				dprintk_ctx("task %d/%d(%s): Old timeval in newselect: %ld.%ld\n",
+				       virt_pid(tsk), tsk->pid, tsk->comm,
+				       tv.tv_sec, tv.tv_usec);
+				tv.tv_sec -= ctx->delta_time.tv_sec;
+				if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
+					tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
+					tv.tv_sec--;
+				} else {
+					tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
+				}
+				if (tv.tv_sec < 0) {
+					tv.tv_sec = 0;
+					tv.tv_usec = 0;
+				}
+				dprintk_ctx("task %d/%d(%s): New timeval in newselect: %ld.%ld\n",
+					virt_pid(tsk), tsk->pid, tsk->comm,
+				       tv.tv_sec, tv.tv_usec);
+				if (access_process_vm(tsk, regs->edi, &tv, 
+						sizeof(tv), 1) != sizeof(tv)) {
+					wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm write: edi %ld\n",
+						virt_pid(tsk), tsk->pid, tsk->comm, regs->edi);
+				}
+				
+			} else if (regs->orig_eax == __NR_select && regs->edi) {
+				struct {
+					unsigned long n;
+					fd_set __user *inp, *outp, *exp;
+					struct timeval __user *tvp;
+				} a;
+				struct timeval tv;
+				if (access_process_vm(tsk, regs->ebx, &a, 
+						sizeof(a), 0) != sizeof(a)) {
+					wprintk_ctx("task %d: Error 2 in access_process_vm\n", tsk->pid);
+					break;
+				}
+				if (access_process_vm(tsk, (unsigned long)a.tvp,
+						&tv, sizeof(tv), 0) != sizeof(tv)) {
+					wprintk_ctx("task %d: Error 3 in access_process_vm\n", tsk->pid);
+					break;
+				}
+				dprintk_ctx("task %d: Old timeval in select: %ld.%ld\n",
+					tsk->pid, tv.tv_sec, tv.tv_usec);
+				tv.tv_sec -= ctx->delta_time.tv_sec;
+				if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
+					tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
+					tv.tv_sec--;
+				} else {
+					tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
+				}
+				if (tv.tv_sec < 0) {
+					tv.tv_sec = 0;
+					tv.tv_usec = 0;
+				}
+				dprintk_ctx("task %d: New timeval in select: %ld.%ld\n",
+					tsk->pid, tv.tv_sec, tv.tv_usec);
+				if (access_process_vm(tsk, (unsigned long)a.tvp,
+						&tv, sizeof(tv), 1) != sizeof(tv)) {
+					wprintk_ctx("task %d: Error 3 in access_process_vm write\n", tsk->pid);
+				}
+			}
+		} while (0);
+#endif
+
+		if (ri && IN_SYSCALL(regs) && IN_ERROR(regs)) {
+			switch (SYSCALL_ERRNO(regs)) {
+			case ERESTARTSYS:
+			case ERESTARTNOINTR:
+			case ERESTARTNOHAND:
+			case ERESTART_RESTARTBLOCK:
+			case EAGAIN:
+			case EINTR:
+				ri->hooks |= (1<<HOOK_RESTART);
+			}
+		}
+
+		tsk->flags = (tsk->flags & PF_USED_MATH) |
+			(ti->cpt_flags & CPT_TASK_FLAGS_MASK);
+		clear_tsk_thread_flag(tsk, TIF_FREEZE);
+		tsk->exit_signal = ti->cpt_exit_signal;
+
+		if (ri && tsk->stopped_state) {
+			dprintk_ctx("finish_stop\n");
+			if (ti->cpt_state != TASK_STOPPED)
+				eprintk_ctx("Hellooo, state is %u\n", (unsigned)ti->cpt_state);
+			ri->hooks |= (1<<HOOK_CONT);
+		}
+
+		if (ri && (ti->cpt_set_tid || ti->cpt_clear_tid)) {
+			ri->hooks |= (1<<HOOK_TID);
+			ri->tid_ptrs[0] = ti->cpt_clear_tid;
+			ri->tid_ptrs[1] = ti->cpt_set_tid;
+			dprintk_ctx("settids\n");
+		}
+
+		if (ri && ri->hooks &&
+		    !(ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+			if (try_module_get(THIS_MODULE))
+				ri->hook = rst_resume_work;
+		}
+
+		if (ti->cpt_state == TASK_TRACED)
+			tsk->state = TASK_TRACED;
+		else if (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD)) {
+			tsk->signal->it_virt_expires = 0;
+			tsk->signal->it_prof_expires = 0;
+			if (tsk->state != EXIT_DEAD)
+				eprintk_ctx("oops, schedule() did not make us dead\n");
+		}
+
+		if (thread_group_leader(tsk) &&
+		    ti->cpt_it_real_value &&
+		    !(ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+			ktime_t val;
+			s64 nsec;
+
+			nsec = ti->cpt_it_real_value;
+			val.tv64 = 0;
+
+			if (ctx->image_version < CPT_VERSION_9)
+				nsec *= TICK_NSEC;
+
+			val = ktime_add_ns(val, nsec);
+			if (val.tv64 <= 0)
+				val.tv64 = NSEC_PER_USEC;
+			dprintk("rst itimer " CPT_FID " +%Ld %Lu\n", CPT_TID(tsk),
+				(long long)val.tv64,
+				(unsigned long long)ti->cpt_it_real_value);
+
+			spin_lock_irq(&tsk->sighand->siglock);
+			if (hrtimer_try_to_cancel(&tsk->signal->real_timer) >= 0) {
+				/* FIXME. Check!!!! */
+				hrtimer_start(&tsk->signal->real_timer, val, HRTIMER_REL);
+			} else {
+				wprintk_ctx("Timer clash. Impossible?\n");
+			}
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			dprintk_ctx("itimer " CPT_FID " +%Lu\n", CPT_TID(tsk),
+				    (unsigned long long)val.tv64);
+		}
+
+		module_put(THIS_MODULE);
+	}
+	return 0;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_socket.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_socket.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_socket.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_socket.c	2017-01-13 08:40:27.000000000 -0500
@@ -0,0 +1,1100 @@
+/*
+ *
+ *  kernel/cpt/rst_socket.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <linux/mount.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/scm.h>
+#include <net/af_unix.h>
+
+#include <ub/ub_mem.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
+
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+#include "cpt_syscalls.h"
+
+
+static int setup_sock_common(struct sock *sk, struct cpt_sock_image *si,
+			     loff_t pos, struct cpt_context *ctx)
+{
+	if (sk->sk_socket) {
+		sk->sk_socket->flags = si->cpt_ssflags;
+		sk->sk_socket->state = si->cpt_sstate;
+	}
+	sk->sk_reuse = si->cpt_reuse;
+	sk->sk_shutdown = si->cpt_shutdown;
+	sk->sk_userlocks = si->cpt_userlocks;
+	sk->sk_no_check = si->cpt_no_check;
+	sock_reset_flag(sk, SOCK_DBG);
+	if (si->cpt_debug)
+		sock_set_flag(sk, SOCK_DBG);
+	sock_reset_flag(sk, SOCK_RCVTSTAMP);
+	if (si->cpt_rcvtstamp)
+		sock_set_flag(sk, SOCK_RCVTSTAMP);
+	sock_reset_flag(sk, SOCK_LOCALROUTE);
+	if (si->cpt_localroute)
+		sock_set_flag(sk, SOCK_LOCALROUTE);
+	sk->sk_protocol = si->cpt_protocol;
+	sk->sk_err = si->cpt_err;
+	sk->sk_err_soft = si->cpt_err_soft;
+	sk->sk_priority = si->cpt_priority;
+	sk->sk_rcvlowat = si->cpt_rcvlowat;
+	sk->sk_rcvtimeo = si->cpt_rcvtimeo;
+	if (si->cpt_rcvtimeo == CPT_NULL)
+		sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_sndtimeo = si->cpt_sndtimeo;
+	if (si->cpt_sndtimeo == CPT_NULL)
+		sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_rcvbuf = si->cpt_rcvbuf;
+	sk->sk_sndbuf = si->cpt_sndbuf;
+	sk->sk_bound_dev_if = si->cpt_bound_dev_if;
+	sk->sk_flags = si->cpt_flags;
+	sk->sk_lingertime = si->cpt_lingertime;
+	if (si->cpt_lingertime == CPT_NULL)
+		sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_peercred.pid = si->cpt_peer_pid;
+	sk->sk_peercred.uid = si->cpt_peer_uid;
+	sk->sk_peercred.gid = si->cpt_peer_gid;
+	cpt_timeval_import(&sk->sk_stamp, si->cpt_stamp);
+	return 0;
+}
+
+static struct file *sock_mapfile(struct socket *sock)
+{
+	int fd = sock_map_fd(sock);
+
+	if (fd >= 0) {
+		struct file *file = sock->file;
+		get_file(file);
+		sc_close(fd);
+		return file;
+	}
+	return ERR_PTR(fd);
+}
+
+/* Assumption is that /tmp exists and writable.
+ * In previous versions we assumed that listen() will autobind
+ * the socket. It does not do this for AF_UNIX by evident reason:
+ * socket in abstract namespace is accessible, unlike socket bound
+ * to deleted FS object.
+ */
+
+static int
+select_deleted_name(char * name, cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<100; i++) {
+		struct nameidata nd;
+		unsigned int rnd = net_random();
+
+		sprintf(name, "/tmp/SOCK.%08x", rnd);
+
+		if (path_lookup(name, 0, &nd) != 0)
+			return 0;
+
+		path_release(&nd);
+	}
+
+	eprintk_ctx("failed to allocate deleted socket inode\n");
+	return -ELOOP;
+}
+
+/*
+ * This function is used for backward compability with old image versions.
+ */
+static int unix_bind_to_path(struct socket *sock, char *name,
+				struct sockaddr* addr, int addrlen,
+				struct cpt_sock_image *si, cpt_context_t *ctx)
+{
+	struct sockaddr_un sun;
+	int err;
+	struct nameidata nd;
+
+	nd.dentry = NULL;
+
+	if (name[0]) {
+		if (si->cpt_sockflags & CPT_SOCK_DELETED) {
+			addr = (struct sockaddr*)&sun;
+			addr->sa_family = AF_UNIX;
+			name = ((char*)addr) + 2;
+			err = select_deleted_name(name, ctx);
+			if (err) {
+				eprintk_ctx("%s: can't select name\n", __func__);
+				return err;
+			}
+			addrlen = 2 + strlen(name);
+		} else {
+			if (path_lookup(name, 0, &nd))
+				nd.dentry = NULL;
+			else {
+				if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) {
+					eprintk_ctx("%s: not a socket dentry %s\n",
+							__func__, name);
+					return -EINVAL;
+				}
+				sc_unlink(name);
+			}
+		}
+	}
+
+	err = sock->ops->bind(sock, addr, addrlen);
+	if (!err && name[0]) {
+		if (si->cpt_sockflags & CPT_SOCK_DELETED)
+			sc_unlink(name);
+		else if (nd.dentry) {
+			sc_chown(name, nd.dentry->d_inode->i_uid,
+				 nd.dentry->d_inode->i_gid);
+			sc_chmod(name, nd.dentry->d_inode->i_mode);
+		}
+	}
+
+	if (nd.dentry)
+		path_release(&nd);
+
+	return err;
+}
+
+static int unix_bind_to_mntref(struct sock *sk, char *name,
+				struct sockaddr* addr, int addrlen,
+				struct cpt_sock_image *si, cpt_context_t *ctx)
+{
+	struct unix_bind_info bi;
+	int err;
+	cpt_object_t *mntobj;
+
+	err = unix_attach_addr(sk, (struct sockaddr_un *)addr,
+				addrlen);
+	if (err) {
+		eprintk_ctx("%s: can't attach unix address %d to %s\n",
+						__func__, err, name);
+		return err;
+	}
+
+	if (!name[0])
+		return 0;
+
+	mntobj = lookup_cpt_obj_bypos(CPT_OBJ_VFSMOUNT_REF,
+			si->cpt_vfsmount_ref, ctx);
+	if (mntobj == NULL) {
+		eprintk_ctx("%s: can't find vfsmount for unix socket %s\n",
+				__func__, name);
+		return -EINVAL;
+	}
+
+	if (strlen(name) < mntobj->o_lock) {
+		eprintk_ctx("%s: unix socket with too short name (%d %s)\n",
+			       	__func__, mntobj->o_lock, name);
+		return -EINVAL;
+	}
+
+	bi.sk = sk;
+	strcpy(bi.path, name);
+	bi.path_off = mntobj->o_lock;
+	bi.i_mode = 0;
+	if (cpt_object_has(si, cpt_i_mode))
+		bi.i_mode = si->cpt_i_mode;
+	bi.next = NULL;
+
+	return rebind_unix_socket(mntobj->o_obj, &bi, LOOKUP_DIVE);
+}
+
+static int can_be_rebound_by_mntref(struct socket *sock,
+	       				struct cpt_sock_image *si,
+					cpt_context_t *ctx)
+{
+	if (ctx->image_version < CPT_VERSION_18_4)
+		return 0;
+
+	if (si->cpt_sockflags & CPT_SOCK_DELETED)
+		return 0;
+
+	return 1;
+}
+
+/*
+ * We use this special bind function instead of sock->ops->bind because
+ * overmounted sockets can't be binded that generic way. And we want to have
+ * only one function for rebinding all kinds of sockets. 
+ */
+static int bind_unix_socket(struct socket *sock, struct cpt_sock_image *si,
+		 cpt_context_t *ctx)
+{
+	int err;
+	char *name;
+	struct sockaddr* addr;
+	int addrlen;
+
+	if ((addrlen = si->cpt_laddrlen) <= 2)
+		return 0;
+
+	if (si->cpt_sockflags & CPT_SOCK_DELAYED)
+		return rst_delay_unix_bind(sock->sk, si, ctx);
+
+	name = ((char*)si->cpt_laddr) + 2;
+	addr = (struct sockaddr *)si->cpt_laddr;
+
+	if (can_be_rebound_by_mntref(sock, si, ctx))
+		err = unix_bind_to_mntref(sock->sk, name, addr, addrlen, si, ctx);
+	else
+		err = unix_bind_to_path(sock, name, addr, addrlen, si, ctx);
+
+	if (err)
+		eprintk_ctx("%s: can't rebind unix socket %d\n", __func__, err);
+
+	return err;
+}
+
+static int fixup_unix_address(struct socket *sock, struct cpt_sock_image *si,
+			      struct cpt_context *ctx)
+{
+	struct sock *sk = sock->sk;
+	cpt_object_t *obj;
+	struct sock *parent;
+
+	if (sk->sk_family != AF_UNIX || sk->sk_state == TCP_LISTEN)
+		return 0;
+
+	if (si->cpt_parent == -1)
+		return bind_unix_socket(sock, si, ctx);
+
+	obj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+	if (!obj)
+		return 0;
+
+	parent = obj->o_obj;
+	if (unix_sk(parent)->addr) {
+		if (unix_sk(sk)->addr &&
+		    atomic_dec_and_test(&unix_sk(sk)->addr->refcnt))
+			kfree(unix_sk(sk)->addr);
+		atomic_inc(&unix_sk(parent)->addr->refcnt);
+		unix_sk(sk)->addr = unix_sk(parent)->addr;
+	}
+	return 0;
+}
+
+static int generic_restore_queues(struct sock *sk, struct cpt_sock_image *si,
+				  loff_t pos, struct cpt_context *ctx)
+{
+	loff_t endpos;
+
+	endpos = pos + si->cpt_next;
+	pos = pos + si->cpt_hdrlen;
+	while (pos < endpos) {
+		struct sk_buff *skb;
+		__u32 type;
+
+		skb = rst_skb(sk, &pos, NULL, &type, ctx);
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -EINVAL) {
+				int err;
+
+				err = rst_sock_attr(&pos, sk, ctx);
+				if (err)
+					return err;
+				continue;
+			}
+			return PTR_ERR(skb);
+		}
+
+		if (type == CPT_SKB_RQ) {
+			skb_set_owner_r(skb, sk);
+			skb_queue_tail(&sk->sk_receive_queue, skb);
+		} else {
+			wprintk_ctx("strange socket queue type %u\n", type);
+			kfree_skb(skb);
+		}
+	}
+	return 0;
+}
+
+static int open_socket(cpt_object_t *obj, struct cpt_sock_image *si,
+		       struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct socket *sock2 = NULL;
+	struct file *file;
+	cpt_object_t *fobj;
+	cpt_object_t *pobj = NULL;
+
+	err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
+			       &sock);
+	if (err)
+		return err;
+
+	if (si->cpt_socketpair) {
+		err = sock_create_kern(si->cpt_family, si->cpt_type,
+				       si->cpt_protocol, &sock2);
+		if (err)
+			goto err_out;
+
+		err = sock->ops->socketpair(sock, sock2);
+		if (err < 0)
+			goto err_out;
+
+		/* Socketpair with a peer outside our environment.
+		 * So, we create real half-open pipe and do not worry
+		 * about dead end anymore. */
+		if (si->cpt_peer == -1) {
+			sock_release(sock2);
+			sock2 = NULL;
+		}
+	}
+
+	cpt_obj_setobj(obj, sock->sk, ctx);
+
+	if (si->cpt_file != CPT_NULL) {
+		file = sock_mapfile(sock);
+		err = PTR_ERR(file);
+		if (IS_ERR(file))
+			goto err_out;
+
+		err = -ENOMEM;
+
+		obj->o_parent = file;
+
+		if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
+			goto err_out;
+		cpt_obj_setpos(fobj, si->cpt_file, ctx);
+		cpt_obj_setindex(fobj, si->cpt_index, ctx);
+	}
+
+	if (sock2) {
+		struct file *file2;
+
+		pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_peer, ctx);
+		if (!pobj) BUG();
+		if (pobj->o_obj) BUG();
+		cpt_obj_setobj(pobj, sock2->sk, ctx);
+
+		if (pobj->o_ppos != CPT_NULL) {
+			file2 = sock_mapfile(sock2);
+			err = PTR_ERR(file2);
+			if (IS_ERR(file2))
+				goto err_out;
+
+			err = -ENOMEM;
+			if ((fobj = cpt_object_add(CPT_OBJ_FILE, file2, ctx)) == NULL)
+				goto err_out;
+			cpt_obj_setpos(fobj, pobj->o_ppos, ctx);
+			cpt_obj_setindex(fobj, si->cpt_peer, ctx);
+
+			pobj->o_parent = file2;
+		}
+	}
+
+	setup_sock_common(sock->sk, si, obj->o_pos, ctx);
+	if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6) {
+		int saved_reuse = sock->sk->sk_reuse;
+
+		inet_sk(sock->sk)->freebind = 1;
+		sock->sk->sk_reuse = 2;
+		if (si->cpt_laddrlen) {
+			err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+			if (err) {
+				dprintk_ctx("binding failed: %d, do not worry\n", err);
+			}
+		}
+		sock->sk->sk_reuse = saved_reuse;
+		err = rst_socket_in(si, obj->o_pos, sock->sk, ctx);
+		if (err) {
+			eprintk_ctx("open_socket: Warning! socket restoring "
+					"failed: %d\n", err);
+			/*
+			 * For now we do not want to abort migration
+			 * due to a socket restoring failure.
+			 */
+		}
+	} else if (sock->sk->sk_family == AF_NETLINK) {
+		struct sockaddr_nl *nl = (struct sockaddr_nl *)&si->cpt_laddr;
+		if (nl->nl_pid) {
+			err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+			if (err) {
+				eprintk_ctx("AF_NETLINK binding failed: %d\n", err);
+			}
+		}
+		if (si->cpt_raddrlen && nl->nl_pid) {
+			err = sock->ops->connect(sock, (struct sockaddr *)&si->cpt_raddr, si->cpt_raddrlen, O_NONBLOCK);
+			if (err) {
+				eprintk_ctx("oops, AF_NETLINK connect failed: %d\n", err);
+			}
+		}
+		generic_restore_queues(sock->sk, si, obj->o_pos, ctx);
+	} else if (sock->sk->sk_family == PF_PACKET) {
+		struct sockaddr_ll *ll = (struct sockaddr_ll *)&si->cpt_laddr;
+		if (ll->sll_protocol || ll->sll_ifindex) {
+			int alen = si->cpt_laddrlen;
+			if (alen < sizeof(struct sockaddr_ll))
+				alen = sizeof(struct sockaddr_ll);
+			err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, alen);
+			if (err) {
+				eprintk_ctx("AF_PACKET binding failed: %d\n", err);
+			}
+		}
+		generic_restore_queues(sock->sk, si, obj->o_pos, ctx);
+	}
+	fixup_unix_address(sock, si, ctx);
+
+	if (sock2) {
+		err = rst_get_object(CPT_OBJ_SOCKET, pobj->o_pos, si, ctx);
+		if (err)
+			goto err_out;
+		setup_sock_common(sock2->sk, si, pobj->o_pos, ctx);
+		fixup_unix_address(sock2, si, ctx);
+	}
+
+	if ((sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
+	    && (int)si->cpt_parent != -1) {
+		cpt_object_t *lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+		if (lobj && cpt_attach_accept(lobj->o_obj, sock->sk, ctx) == 0)
+			sock->sk = NULL;
+	}
+
+
+	if (si->cpt_file == CPT_NULL && sock->sk &&
+	    sock->sk->sk_family == AF_INET) {
+		struct sock *sk = sock->sk;
+
+		if (sk) {
+			sock->sk = NULL;
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			if (sock_owned_by_user(sk))
+				eprintk_ctx("oops, sock is locked by user\n");
+
+			sock_hold(sk);
+			sock_orphan(sk);
+			ub_inc_orphan_count(sk);
+			bh_unlock_sock(sk);
+			local_bh_enable();
+			sock_put(sk);
+			dprintk_ctx("orphaning socket %p\n", sk);
+		}
+	}
+
+	if (si->cpt_file == CPT_NULL && sock->sk == NULL)
+		sock_release(sock);
+
+	return 0;
+
+err_out:
+	if (sock2)
+		sock_release(sock2);
+	sock_release(sock);
+	return err;
+}
+
+static int open_listening_socket(loff_t pos, struct cpt_sock_image *si,
+				 struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct file *file;
+	cpt_object_t *obj, *fobj;
+
+	err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
+			       &sock);
+	if (err) {
+		eprintk_ctx("open_listening_socket: sock_create_kern: %d\n", err);
+		return err;
+	}
+
+	sock->sk->sk_reuse = 2;
+	sock->sk->sk_bound_dev_if = si->cpt_bound_dev_if;
+
+	if (sock->sk->sk_family == AF_UNIX) {
+		err = bind_unix_socket(sock, si, ctx);
+		if (err) {
+			eprintk_ctx("bind unix: %d\n", err);
+			goto err_out;
+		}
+	} else if (si->cpt_laddrlen) {
+		if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
+			inet_sk(sock->sk)->freebind = 1;
+
+		err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+
+		if (err) {
+			eprintk_ctx("open_listening_socket: bind: %d\n", err);
+			goto err_out;
+		}
+	}
+
+	err = sock->ops->listen(sock, si->cpt_max_ack_backlog);
+	if (err) {
+		eprintk_ctx("open_listening_socket: listen: %d, %Ld, %x\n", err, pos, si->cpt_sockflags);
+		goto err_out;
+	}
+
+	/* Now we may access socket body directly and fixup all the things. */
+
+	file = sock_mapfile(sock);
+	err = PTR_ERR(file);
+	if (IS_ERR(file)) {
+		eprintk_ctx("open_listening_socket: map: %d\n", err);
+		goto err_out;
+	}
+
+	err = -ENOMEM;
+	if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
+		goto err_out;
+	if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sock->sk, ctx)) == NULL)
+		goto err_out;
+	cpt_obj_setpos(obj, pos, ctx);
+	cpt_obj_setindex(obj, si->cpt_index, ctx);
+	obj->o_parent = file;
+	cpt_obj_setpos(fobj, si->cpt_file, ctx);
+	cpt_obj_setindex(fobj, si->cpt_index, ctx);
+
+	setup_sock_common(sock->sk, si, pos, ctx);
+
+	if (si->cpt_family == AF_INET || si->cpt_family == AF_INET6) {
+		rst_listen_socket_in(sock->sk, si, pos, ctx);
+		rst_restore_synwait_queue(sock->sk, si, pos, ctx);
+	}
+
+	return 0;
+
+err_out:
+	sock_release(sock);
+	return err;
+}
+
+static int
+rst_sock_attr_mcfilter(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
+{
+	int err;
+	loff_t pos = *pos_p;
+	struct cpt_sockmc_image v;
+
+	err = rst_get_object(CPT_OBJ_SOCK_MCADDR, pos, &v, ctx);
+	if (err)
+		return err;
+
+	*pos_p += v.cpt_next;
+
+	if (v.cpt_family == AF_INET)
+		return rst_sk_mcfilter_in(sk, &v, pos, ctx);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (v.cpt_family == AF_INET6)
+		return rst_sk_mcfilter_in6(sk, &v, pos, ctx); 
+#endif
+	else
+		return -EAFNOSUPPORT;
+}
+
+
+static int
+rst_sock_attr_skfilter(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
+{
+	int err;
+	struct sk_filter *fp, *old_fp; 
+	loff_t pos = *pos_p;
+	struct cpt_obj_bits v;
+
+	err = rst_get_object(CPT_OBJ_SKFILTER, pos, &v, ctx);
+	if (err)
+		return err;
+
+	*pos_p += v.cpt_next;
+
+	if (v.cpt_size % sizeof(struct sock_filter))
+		return -EINVAL;
+
+	fp = sock_kmalloc(sk, v.cpt_size+sizeof(*fp), GFP_KERNEL_UBC);
+	if (fp == NULL)
+		return -ENOMEM;
+	atomic_set(&fp->refcnt, 1);
+	fp->len = v.cpt_size/sizeof(struct sock_filter);
+
+	err = ctx->pread(fp->insns, v.cpt_size, ctx, pos+v.cpt_hdrlen);
+	if (err) {
+		sk_filter_release(sk, fp);
+		return err;
+	}
+
+	old_fp = sk->sk_filter;
+	sk->sk_filter = fp;
+	if (old_fp)
+		sk_filter_release(sk, old_fp);
+	return 0;
+}
+
+
+int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
+{
+	int err;
+	loff_t pos = *pos_p;
+
+	err = rst_sock_attr_skfilter(pos_p, sk, ctx);
+	if (err && pos == *pos_p)
+		err = rst_sock_attr_mcfilter(pos_p, sk, ctx);
+	return err;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static void rst_tcp_cb_from_v4(struct cpt_skb_image *v, struct sk_buff *skb)
+{
+	/*
+	 * sizeof(struct inet_skb_parm) == 16
+	 * sizeof(struct tcp_skb_cb) - sizeof(tcp_skb_cb.header) == 20
+	 *   => sizeof(struct tcp_skb_cb) == 36
+	 * sizeof(struct cpt_skb_image.cb) = 40
+	 *   => tcp_skb_cb in IPv4 format fits into cpt_skb_image.cb
+	 */
+	BUILD_BUG_ON(sizeof(skb->cb) - sizeof(struct inet6_skb_parm) <
+		sizeof(struct tcp_skb_cb) - sizeof(struct inet6_skb_parm));
+	memcpy(skb->cb, v->cpt_cb, sizeof(struct inet_skb_parm));
+	memcpy(skb->cb + sizeof(struct inet6_skb_parm),
+		(void *)v->cpt_cb + sizeof(struct inet_skb_parm),
+		sizeof(struct tcp_skb_cb) - sizeof(struct inet6_skb_parm));
+}
+static void rst_tcp_cb_from_v6(struct cpt_skb_image *v, struct sk_buff *skb)
+{
+	memcpy(skb->cb, v->cpt_cb, sizeof(v->cpt_cb));
+}
+#else
+static void rst_tcp_cb_from_v4(struct cpt_skb_image *v, struct sk_buff *skb)
+{
+	memcpy(skb->cb, v->cpt_cb, sizeof(v->cpt_cb));
+}
+static void rst_tcp_cb_from_v6(struct cpt_skb_image *v, struct sk_buff *skb)
+{
+	/*
+	 * sizeof(struct inet6_skb_parm) == 24
+	 * sizeof(struct tcp_skb_cb) - sizeof(tcp_skb_cb.header) == 20
+	 *   => sizeof(struct tcp_skb_cb) == 44
+	 * sizeof(struct cpt_skb_image.cb) = 40
+	 *   => tcp_skb_cb in IPv6 format does not fit into cpt_skb_image.cb,
+	 *      do not write more than sizeof(v->cpt_cb)
+	 */
+	BUILD_BUG_ON(sizeof(skb->cb) - sizeof(struct inet_skb_parm) <
+		sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm));
+	memcpy(skb->cb, v->cpt_cb, sizeof(struct inet_skb_parm));
+	memcpy(skb->cb + sizeof(struct inet_skb_parm),
+		(void *)v->cpt_cb + sizeof(struct inet6_skb_parm),
+		min(sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm),
+			sizeof(v->cpt_cb) - sizeof(struct inet6_skb_parm)));
+}
+#endif
+
+struct tcp_skb_cb_ipv6 {
+	union {
+		struct inet_skb_parm	h4;
+		struct inet6_skb_parm	h6;
+	} header;
+	__u32		seq;
+	__u32		end_seq;
+	__u32		when;
+	__u8		flags;
+	__u8		sacked;
+	__u16		urg_ptr;
+	__u32		ack_seq;
+};
+
+struct sk_buff * rst_skb(struct sock *sk, loff_t *pos_p, __u32 *owner,
+			 __u32 *queue, struct cpt_context *ctx)
+{
+	int err;
+	struct sk_buff *skb;
+	struct cpt_skb_image v;
+	loff_t pos = *pos_p;
+	struct scm_fp_list *fpl = NULL;
+	struct timeval tmptv;
+
+	err = rst_get_object(CPT_OBJ_SKB, pos, &v, ctx);
+	if (err)
+		return ERR_PTR(err);
+	*pos_p = pos + v.cpt_next;
+
+	if (owner)
+		*owner = v.cpt_owner;
+	if (queue)
+		*queue = v.cpt_queue;
+
+	skb = alloc_skb(v.cpt_len + v.cpt_hspace + v.cpt_tspace, GFP_KERNEL);
+	if (skb == NULL)
+		return ERR_PTR(-ENOMEM);
+	skb_reserve(skb, v.cpt_hspace);
+	skb_put(skb, v.cpt_len);
+	skb->h.raw = skb->head + v.cpt_h;
+	skb->nh.raw = skb->head + v.cpt_nh;
+	skb->mac.raw = skb->head + v.cpt_mac;
+	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(v.cpt_cb));
+	if (sk->sk_protocol == IPPROTO_TCP) {
+		/*
+		 * 1) 2.6.9-x VZ kernels did not have IPv6 support compiled in
+		 *    => if image_version < CPT_VERSION_9*
+		 *    cpt_skb_image.cpt_cb is in IPv4 format.
+		 * 2) 2.6.18-x kernels with image_version >= CPT_VERSION_18_2
+		 *    and 2.6.16-x >= 027stab029 create cpt_skb_image.cpt_cb
+		 *    in IPv6 format despite the kernel IPv6 support.
+		 * 3) 2.6.18-x kernels with image_version < CPT_VERSION_18_2
+		 *    and 2.6.16-x < 027stab029 create cpt_cb in IPv4 format
+		 *    in case IPv6 support was not compiled in and
+		 *    in IPv6 format otherwise.
+		 *    All PVC 2.6.1[68]-x kernels have IPv6 support => we assume
+		 *    any 2.6.1[68]-x kernel produces cpt_cb in IPv6 format.
+		 *    Those, who compile old 2.6.1[68]-x kernels without IPv6
+		 *    support - beware!
+		 */
+		if (ctx->image_version >= CPT_VERSION_16) {
+			/*
+			 * we assume cpt_skb_image.cpt_cb is in IPv6 format
+			 * despite the kernel IPv6 support
+			 */
+			rst_tcp_cb_from_v6(&v, skb);
+		} else {
+			/*
+			 * this case is for 2.6.9-x kernels which produce
+			 * cpt_skb_image.cpt_cb in IPv4 format
+			 */
+			rst_tcp_cb_from_v4(&v, skb);
+		}
+	} else
+		memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
+	skb->mac_len = v.cpt_mac_len;
+
+	skb->csum = v.cpt_csum;
+	skb->local_df = v.cpt_local_df;
+	skb->pkt_type = v.cpt_pkt_type;
+	skb->ip_summed = v.cpt_ip_summed;
+	skb->priority = v.cpt_priority;
+	skb->protocol = v.cpt_protocol;
+	cpt_timeval_import(&tmptv, v.cpt_stamp);
+	skb_set_timestamp(skb, &tmptv);
+
+	skb_shinfo(skb)->gso_segs = v.cpt_gso_segs;
+	skb_shinfo(skb)->gso_size = v.cpt_gso_size;
+	if (ctx->image_version == 0) {
+		skb_shinfo(skb)->gso_segs = 1;
+		skb_shinfo(skb)->gso_size = 0;
+	}
+
+	if (v.cpt_next > v.cpt_hdrlen) {
+		pos = pos + v.cpt_hdrlen;
+		while (pos < *pos_p) {
+			union {
+				struct cpt_obj_bits b;
+				struct cpt_fd_image f;
+			} u;
+
+			err = rst_get_object(-1, pos, &u, ctx);
+			if (err) {
+				kfree_skb(skb);
+				return ERR_PTR(err);
+			}
+			if (u.b.cpt_object == CPT_OBJ_BITS) {
+				if (u.b.cpt_size != v.cpt_hspace + skb->len) {
+					eprintk_ctx("invalid skb image %u != %u + %u\n", u.b.cpt_size, v.cpt_hspace, skb->len);
+					kfree_skb(skb);
+					return ERR_PTR(-EINVAL);
+				}
+
+				err = ctx->pread(skb->head, u.b.cpt_size, ctx, pos+u.b.cpt_hdrlen);
+				if (err) {
+					kfree_skb(skb);
+					return ERR_PTR(err);
+				}
+			} else if (u.f.cpt_object == CPT_OBJ_FILEDESC) {
+				if (!fpl) {
+					fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+					if (!fpl) {
+						kfree_skb(skb);
+						return ERR_PTR(-ENOMEM);
+					}
+					fpl->count = 0;
+					UNIXCB(skb).fp = fpl;
+				}
+				fpl->fp[fpl->count] = rst_file(u.f.cpt_file, -1, ctx);
+				if (!IS_ERR(fpl->fp[fpl->count]))
+					fpl->count++;
+			}
+			pos += u.b.cpt_next;
+		}
+	}
+
+	return skb;
+}
+
+static int restore_unix_rqueue(struct sock *sk, struct cpt_sock_image *si,
+			       loff_t pos, struct cpt_context *ctx)
+{
+	loff_t endpos;
+
+	endpos = pos + si->cpt_next;
+	pos = pos + si->cpt_hdrlen;
+	while (pos < endpos) {
+		struct sk_buff *skb;
+		struct sock *owner_sk;
+		__u32 owner;
+
+		skb = rst_skb(sk, &pos, &owner, NULL, ctx);
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -EINVAL) {
+				int err;
+
+				err = rst_sock_attr(&pos, sk, ctx);
+				if (err)
+					return err;
+				continue;
+			}
+			return PTR_ERR(skb);
+		}
+
+		owner_sk = unix_peer(sk);
+		if (owner != -1) {
+			cpt_object_t *pobj;
+			pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, owner, ctx);
+			if (pobj == NULL) {
+				eprintk_ctx("orphan af_unix skb?\n");
+				kfree_skb(skb);
+				continue;
+			}
+			owner_sk = pobj->o_obj;
+		}
+		if (owner_sk == NULL) {
+			dprintk_ctx("orphan af_unix skb 2?\n");
+			kfree_skb(skb);
+			continue;
+		}
+		skb_set_owner_w(skb, owner_sk);
+		if (UNIXCB(skb).fp)
+			skb->destructor = unix_destruct_fds;
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		if (sk->sk_state == TCP_LISTEN) {
+			struct socket *sock = skb->sk->sk_socket;
+			if (sock == NULL) BUG();
+			if (sock->file) BUG();
+			skb->sk->sk_socket = NULL;
+			skb->sk->sk_sleep = NULL;
+			sock->sk = NULL;
+			sock_release(sock);
+		}
+	}
+	return 0;
+}
+
+
+/* All the sockets are created before we start to open files */
+
+int rst_sockets(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_SOCKET];
+	loff_t endsec;
+	cpt_object_t *obj;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err) {
+		eprintk_ctx("rst_sockets: ctx->pread: %d\n", err);
+		return err;
+	}
+	if (h.cpt_section != CPT_SECT_SOCKET || h.cpt_hdrlen < sizeof(h)) {
+		eprintk_ctx("rst_sockets: hdr err\n");
+		return -EINVAL;
+	}
+
+	/* The first pass: we create socket index and open listening sockets. */
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (sbuf->cpt_state == TCP_LISTEN) {
+			err = open_listening_socket(sec, sbuf, ctx); 
+			cpt_release_buf(ctx);
+			if (err) {
+				eprintk_ctx("rst_sockets: open_listening_socket: %d\n", err);
+				return err;
+			}
+		} else {
+			cpt_release_buf(ctx);
+			obj = alloc_cpt_object(GFP_KERNEL, ctx);
+			if (obj == NULL)
+				return -ENOMEM;
+			cpt_obj_setindex(obj, sbuf->cpt_index, ctx);
+			cpt_obj_setpos(obj, sec, ctx);
+			obj->o_ppos  = sbuf->cpt_file;
+			intern_cpt_object(CPT_OBJ_SOCKET, obj, ctx);
+		}
+		sec += sbuf->cpt_next;
+	}
+
+	/* Pass 2: really restore sockets */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct cpt_sock_image *sbuf;
+		if (obj->o_obj != NULL)
+			continue;
+		sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (sbuf->cpt_state == TCP_LISTEN) BUG();
+		err = open_socket(obj, sbuf, ctx); 
+		cpt_release_buf(ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: open_socket: %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int rst_orphans(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_ORPHANS];
+	loff_t endsec;
+	cpt_object_t *obj;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_ORPHANS || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		if (obj == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		obj->o_pos = sec;
+		obj->o_ppos  = sbuf->cpt_file;
+		err = open_socket(obj, sbuf, ctx);
+		dprintk_ctx("Restoring orphan: %d\n", err);
+		free_cpt_object(obj, ctx);
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		sec += sbuf->cpt_next;
+	}
+
+	return 0;
+}
+
+
+/* Pass 3: I understand, this is not funny already :-),
+ * but we have to do another pass to establish links between
+ * not-paired AF_UNIX SOCK_DGRAM sockets and to restore AF_UNIX
+ * skb queues with proper skb->sk links.
+ *
+ * This could be made at the end of rst_sockets(), but we defer
+ * restoring af_unix queues up to the end of restoring files to
+ * make restoring passed FDs cleaner.
+ */
+
+int rst_sockets_complete(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct cpt_sock_image *sbuf;
+		struct sock *sk = obj->o_obj;
+		struct sock *peer;
+
+		if (!sk) BUG();
+
+		if (sk->sk_family != AF_UNIX)
+			continue;
+
+		sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+
+		if (sbuf->cpt_next > sbuf->cpt_hdrlen)
+			restore_unix_rqueue(sk, sbuf, obj->o_pos, ctx);
+
+		cpt_release_buf(ctx);
+
+		if (sk->sk_type == SOCK_DGRAM && unix_peer(sk) == NULL) {
+			cpt_object_t *pobj;
+
+			sbuf = cpt_get_buf(ctx);
+			err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+			if (err) {
+				cpt_release_buf(ctx);
+				return err;
+			}
+
+			if (sbuf->cpt_peer != -1) {
+				pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, sbuf->cpt_peer, ctx);
+				if (pobj) {
+					peer = pobj->o_obj;
+					sock_hold(peer);
+					unix_peer(sk) = peer;
+				}
+			}
+			cpt_release_buf(ctx);
+		}
+	}
+
+	rst_orphans(ctx);
+
+	return 0;
+}
+
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_socket_in.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_socket_in.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_socket_in.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_socket_in.c	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,601 @@
+/*
+ *
+ *  kernel/cpt/rst_socket_in.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <linux/jhash.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/ipv6.h>
+#include <linux/igmp.h>
+#include <net/addrconf.h>
+#include <net/inet6_connection_sock.h>
+#include <linux/ve_proto.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+static inline unsigned long jiffies_import(__u32 tmo)
+{
+	__s32 delta = tmo;
+	return jiffies + (long)delta;
+}
+
+static inline __u32 tcp_jiffies_import(__u32 tmo)
+{
+	return ((__u32)jiffies) + tmo;
+}
+
+
+static int restore_queues(struct sock *sk, struct cpt_sock_image *si,
+			  loff_t pos, struct cpt_context *ctx)
+{
+	loff_t endpos;
+
+	endpos = pos + si->cpt_next;
+	pos = pos + si->cpt_hdrlen;
+	while (pos < endpos) {
+		struct sk_buff *skb;
+		__u32 type;
+
+		skb = rst_skb(sk, &pos, NULL, &type, ctx);
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -EINVAL) {
+				int err;
+
+				err = rst_sock_attr(&pos, sk, ctx);
+				if (err)
+					return err;
+				continue;
+			}
+			return PTR_ERR(skb);
+		}
+
+		if (sk->sk_type == SOCK_STREAM) {
+			if (type == CPT_SKB_RQ) {
+				sk_stream_set_owner_r(skb, sk);
+				ub_tcprcvbuf_charge_forced(sk, skb);
+				skb_queue_tail(&sk->sk_receive_queue, skb);
+			} else if (type == CPT_SKB_OFOQ) {
+				struct tcp_sock *tp = tcp_sk(sk);
+				sk_stream_set_owner_r(skb, sk);
+				ub_tcprcvbuf_charge_forced(sk, skb);
+				skb_queue_tail(&tp->out_of_order_queue, skb);
+			} else if (type == CPT_SKB_WQ) {
+				sk->sk_wmem_queued += skb->truesize;
+				sk->sk_forward_alloc -= skb->truesize;
+				ub_tcpsndbuf_charge_forced(sk, skb);
+				skb_queue_tail(&sk->sk_write_queue, skb);
+			} else {
+				wprintk_ctx("strange stream queue type %u\n", type);
+				kfree_skb(skb);
+			}
+		} else {
+			if (type == CPT_SKB_RQ) {
+				skb_set_owner_r(skb, sk);
+				skb_queue_tail(&sk->sk_receive_queue, skb);
+			} else if (type == CPT_SKB_WQ) {
+				struct inet_sock *inet = inet_sk(sk);
+				if (inet->cork.fragsize) {
+					skb_set_owner_w(skb, sk);
+					skb_queue_tail(&sk->sk_write_queue, skb);
+				} else {
+					eprintk_ctx("cork skb is dropped\n");
+					kfree_skb(skb);
+				}
+			} else {
+				wprintk_ctx("strange dgram queue type %u\n", type);
+				kfree_skb(skb);
+			}
+		}
+	}
+	return 0;
+}
+
+static struct sock *find_parent(__u16 sport, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk &&
+		    sk->sk_state == TCP_LISTEN &&
+		    (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
+		    inet_sk(sk)->sport == sport)
+			return sk;
+	}
+	return NULL;
+}
+
+static int rst_socket_tcp(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
+			  struct cpt_context *ctx)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	tp->pred_flags = si->cpt_pred_flags;
+	tp->rcv_nxt = si->cpt_rcv_nxt;
+	tp->snd_nxt = si->cpt_snd_nxt;
+	tp->snd_una = si->cpt_snd_una;
+	tp->snd_sml = si->cpt_snd_sml;
+	tp->rcv_tstamp = tcp_jiffies_import(si->cpt_rcv_tstamp);
+	tp->lsndtime = tcp_jiffies_import(si->cpt_lsndtime);
+	tp->tcp_header_len = si->cpt_tcp_header_len;
+	inet_csk(sk)->icsk_ack.pending = si->cpt_ack_pending;
+	inet_csk(sk)->icsk_ack.quick = si->cpt_quick;
+	inet_csk(sk)->icsk_ack.pingpong = si->cpt_pingpong;
+	inet_csk(sk)->icsk_ack.blocked = si->cpt_blocked;
+	inet_csk(sk)->icsk_ack.ato = si->cpt_ato;
+	inet_csk(sk)->icsk_ack.timeout = jiffies_import(si->cpt_ack_timeout);
+	inet_csk(sk)->icsk_ack.lrcvtime = tcp_jiffies_import(si->cpt_lrcvtime);
+	inet_csk(sk)->icsk_ack.last_seg_size = si->cpt_last_seg_size;
+	inet_csk(sk)->icsk_ack.rcv_mss = si->cpt_rcv_mss;
+	tp->snd_wl1 = si->cpt_snd_wl1;
+	tp->snd_wnd = si->cpt_snd_wnd;
+	tp->max_window = si->cpt_max_window;
+	inet_csk(sk)->icsk_pmtu_cookie = si->cpt_pmtu_cookie;
+	tp->mss_cache = si->cpt_mss_cache;
+	tp->rx_opt.mss_clamp = si->cpt_mss_clamp;
+	inet_csk(sk)->icsk_ext_hdr_len = si->cpt_ext_header_len;
+	inet_csk(sk)->icsk_ca_state = si->cpt_ca_state;
+	inet_csk(sk)->icsk_retransmits = si->cpt_retransmits;
+	tp->reordering = si->cpt_reordering;
+	tp->frto_counter = si->cpt_frto_counter;
+	tp->frto_highmark = si->cpt_frto_highmark;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
+	// // tp->adv_cong = si->cpt_adv_cong;
+#endif
+	inet_csk(sk)->icsk_accept_queue.rskq_defer_accept = si->cpt_defer_accept;
+	inet_csk(sk)->icsk_backoff = si->cpt_backoff;
+	tp->srtt = si->cpt_srtt;
+	tp->mdev = si->cpt_mdev;
+	tp->mdev_max = si->cpt_mdev_max;
+	tp->rttvar = si->cpt_rttvar;
+	tp->rtt_seq = si->cpt_rtt_seq;
+	inet_csk(sk)->icsk_rto = si->cpt_rto;
+	tp->packets_out = si->cpt_packets_out;
+	tp->left_out = si->cpt_left_out;
+	tp->retrans_out = si->cpt_retrans_out;
+	tp->lost_out = si->cpt_lost_out;
+	tp->sacked_out = si->cpt_sacked_out;
+	tp->fackets_out = si->cpt_fackets_out;
+	tp->snd_ssthresh = si->cpt_snd_ssthresh;
+	tp->snd_cwnd = si->cpt_snd_cwnd;
+	tp->snd_cwnd_cnt = si->cpt_snd_cwnd_cnt;
+	tp->snd_cwnd_clamp = si->cpt_snd_cwnd_clamp;
+	tp->snd_cwnd_used = si->cpt_snd_cwnd_used;
+	tp->snd_cwnd_stamp = tcp_jiffies_import(si->cpt_snd_cwnd_stamp);
+	inet_csk(sk)->icsk_timeout = tcp_jiffies_import(si->cpt_timeout);
+	tp->rcv_wnd = si->cpt_rcv_wnd;
+	tp->rcv_wup = si->cpt_rcv_wup;
+	tp->write_seq = si->cpt_write_seq;
+	tp->pushed_seq = si->cpt_pushed_seq;
+	tp->copied_seq = si->cpt_copied_seq;
+	tp->rx_opt.tstamp_ok = si->cpt_tstamp_ok;
+	tp->rx_opt.wscale_ok = si->cpt_wscale_ok;
+	tp->rx_opt.sack_ok = si->cpt_sack_ok;
+	tp->rx_opt.saw_tstamp = si->cpt_saw_tstamp;
+	tp->rx_opt.snd_wscale = si->cpt_snd_wscale;
+	tp->rx_opt.rcv_wscale = si->cpt_rcv_wscale;
+	tp->nonagle = si->cpt_nonagle;
+	tp->keepalive_probes = si->cpt_keepalive_probes;
+	tp->rx_opt.rcv_tsval = si->cpt_rcv_tsval;
+	tp->rx_opt.rcv_tsecr = si->cpt_rcv_tsecr;
+	tp->rx_opt.ts_recent = si->cpt_ts_recent;
+	tp->rx_opt.ts_recent_stamp = si->cpt_ts_recent_stamp;
+	tp->rx_opt.user_mss = si->cpt_user_mss;
+	tp->rx_opt.dsack = si->cpt_dsack;
+	tp->rx_opt.eff_sacks = si->cpt_num_sacks;
+	tp->duplicate_sack[0].start_seq = si->cpt_sack_array[0];
+	tp->duplicate_sack[0].end_seq = si->cpt_sack_array[1];
+	tp->selective_acks[0].start_seq = si->cpt_sack_array[2];
+	tp->selective_acks[0].end_seq = si->cpt_sack_array[3];
+	tp->selective_acks[1].start_seq = si->cpt_sack_array[4];
+	tp->selective_acks[1].end_seq = si->cpt_sack_array[5];
+	tp->selective_acks[2].start_seq = si->cpt_sack_array[6];
+	tp->selective_acks[2].end_seq = si->cpt_sack_array[7];
+	tp->selective_acks[3].start_seq = si->cpt_sack_array[8];
+	tp->selective_acks[3].end_seq = si->cpt_sack_array[9];
+
+	tp->window_clamp = si->cpt_window_clamp;
+	tp->rcv_ssthresh = si->cpt_rcv_ssthresh;
+	inet_csk(sk)->icsk_probes_out = si->cpt_probes_out;
+	tp->rx_opt.num_sacks = si->cpt_num_sacks;
+	tp->advmss = si->cpt_advmss;
+	inet_csk(sk)->icsk_syn_retries = si->cpt_syn_retries;
+	tp->ecn_flags = si->cpt_ecn_flags;
+	tp->prior_ssthresh = si->cpt_prior_ssthresh;
+	tp->high_seq = si->cpt_high_seq;
+	tp->retrans_stamp = si->cpt_retrans_stamp;
+	tp->undo_marker = si->cpt_undo_marker;
+	tp->undo_retrans = si->cpt_undo_retrans;
+	tp->urg_seq = si->cpt_urg_seq;
+	tp->urg_data = si->cpt_urg_data;
+	inet_csk(sk)->icsk_pending = si->cpt_pending;
+	tp->urg_mode = si->cpt_urg_mode;
+	tp->snd_up = si->cpt_snd_up;
+	tp->keepalive_time = si->cpt_keepalive_time;
+	tp->keepalive_intvl = si->cpt_keepalive_intvl;
+	tp->linger2 = si->cpt_linger2;
+
+	sk->sk_send_head = NULL;
+	for (skb = skb_peek(&sk->sk_write_queue);
+	     skb && skb != (struct sk_buff*)&sk->sk_write_queue;
+	     skb = skb->next) {
+		if (!after(tp->snd_nxt, TCP_SKB_CB(skb)->seq)) {
+			sk->sk_send_head = skb;
+			break;
+		}
+	}
+
+	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) {
+		struct inet_sock *inet = inet_sk(sk);
+		if (inet->num == 0) {
+			cpt_object_t *lobj = NULL;
+
+			if ((int)si->cpt_parent != -1)
+				lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+
+			if (lobj && lobj->o_obj) {
+				inet->num = ntohs(inet->sport);
+				local_bh_disable();
+				__inet_inherit_port(&tcp_hashinfo, lobj->o_obj, sk);
+				local_bh_enable();
+				dprintk_ctx("port inherited from parent\n");
+			} else {
+				struct sock *lsk = find_parent(inet->sport, ctx);
+				if (lsk) {
+					inet->num = ntohs(inet->sport);
+					local_bh_disable();
+					__inet_inherit_port(&tcp_hashinfo, lsk, sk);
+					local_bh_enable();
+					dprintk_ctx("port inherited\n");
+				} else {
+					eprintk_ctx("we are kinda lost...\n");
+				}
+			}
+		}
+
+		sk->sk_prot->hash(sk);
+
+		if (inet_csk(sk)->icsk_ack.pending&ICSK_ACK_TIMER)
+			sk_reset_timer(sk, &inet_csk(sk)->icsk_delack_timer, inet_csk(sk)->icsk_ack.timeout);
+		if (inet_csk(sk)->icsk_pending)
+			sk_reset_timer(sk, &inet_csk(sk)->icsk_retransmit_timer,
+				       inet_csk(sk)->icsk_timeout);
+		if (sock_flag(sk, SOCK_KEEPOPEN)) {
+			unsigned long expires = jiffies_import(si->cpt_ka_timeout);
+			if (time_after(jiffies, expires))
+				expires = jiffies + HZ;
+			sk_reset_timer(sk, &sk->sk_timer, expires);
+		}
+	}
+
+	if (sk->sk_family == AF_INET6)
+		sk->sk_gso_type = SKB_GSO_TCPV6;
+	else
+		sk->sk_gso_type = SKB_GSO_TCPV4;
+
+	return 0;
+}
+
+static void rst_listen_socket_tcp(struct cpt_sock_image *si, struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->rcv_tstamp = tcp_jiffies_import(si->cpt_rcv_tstamp);
+	tp->lsndtime = tcp_jiffies_import(si->cpt_lsndtime);
+	tp->tcp_header_len = si->cpt_tcp_header_len;
+	inet_csk(sk)->icsk_accept_queue.rskq_defer_accept = si->cpt_defer_accept;
+
+	/* Next options are inherited by children */
+	tp->mss_cache = si->cpt_mss_cache;
+	inet_csk(sk)->icsk_ext_hdr_len = si->cpt_ext_header_len;
+	tp->reordering = si->cpt_reordering;
+	tp->nonagle = si->cpt_nonagle;
+	tp->keepalive_probes = si->cpt_keepalive_probes;
+	tp->rx_opt.user_mss = si->cpt_user_mss;
+	inet_csk(sk)->icsk_syn_retries = si->cpt_syn_retries;
+	tp->keepalive_time = si->cpt_keepalive_time;
+	tp->keepalive_intvl = si->cpt_keepalive_intvl;
+	tp->linger2 = si->cpt_linger2;
+}
+
+int rst_listen_socket_in( struct sock *sk, struct cpt_sock_image *si,
+			  loff_t pos, struct cpt_context *ctx)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	lock_sock(sk);
+
+	inet->uc_ttl = si->cpt_uc_ttl;
+	inet->tos = si->cpt_tos;
+	inet->cmsg_flags = si->cpt_cmsg_flags;
+	inet->pmtudisc = si->cpt_pmtudisc;
+	inet->recverr = si->cpt_recverr;
+	inet->freebind = si->cpt_freebind;
+	inet->id = si->cpt_idcounter;
+
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		np->frag_size = si->cpt_frag_size6;
+		np->hop_limit = si->cpt_hop_limit6;
+
+		np->rxopt.all = si->cpt_rxopt6;
+		np->mc_loop = si->cpt_mc_loop6;
+		np->recverr = si->cpt_recverr6;
+		np->pmtudisc = si->cpt_pmtudisc6;
+		np->ipv6only = si->cpt_ipv6only6;
+	}
+
+	if (sk->sk_protocol == IPPROTO_TCP)
+		rst_listen_socket_tcp(si, sk);
+
+	release_sock(sk);
+	return 0;
+}
+
+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
+		  struct cpt_context *ctx)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	int err, ret_err = 0;
+
+	lock_sock(sk);
+
+	sk->sk_state = si->cpt_state;
+
+	inet->daddr = si->cpt_daddr;
+	inet->dport = si->cpt_dport;
+	inet->saddr = si->cpt_saddr;
+	inet->rcv_saddr = si->cpt_rcv_saddr;
+	inet->sport = si->cpt_sport;
+	inet->uc_ttl = si->cpt_uc_ttl;
+	inet->tos = si->cpt_tos;
+	inet->cmsg_flags = si->cpt_cmsg_flags;
+	inet->mc_index = si->cpt_mc_index;
+	inet->mc_addr = si->cpt_mc_addr;
+	inet->hdrincl = si->cpt_hdrincl;
+	inet->mc_ttl = si->cpt_mc_ttl;
+	inet->mc_loop = si->cpt_mc_loop;
+	inet->pmtudisc = si->cpt_pmtudisc;
+	inet->recverr = si->cpt_recverr;
+	inet->freebind = si->cpt_freebind;
+	inet->id = si->cpt_idcounter;
+
+	inet->cork.flags = si->cpt_cork_flags;
+	inet->cork.fragsize = si->cpt_cork_fragsize;
+	inet->cork.length = si->cpt_cork_length;
+	inet->cork.addr = si->cpt_cork_addr;
+	inet->cork.fl.fl4_src = si->cpt_cork_saddr;
+	inet->cork.fl.fl4_dst = si->cpt_cork_daddr;
+	inet->cork.fl.oif = si->cpt_cork_oif;
+	if (inet->cork.fragsize) {
+		if (ip_route_output_key(&inet->cork.rt, &inet->cork.fl)) {
+			eprintk_ctx("failed to restore cork route\n");
+			inet->cork.fragsize = 0;
+		}
+	}
+
+	if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
+		struct udp_sock *up = udp_sk(sk);
+		up->pending = si->cpt_udp_pending;
+		up->corkflag = si->cpt_udp_corkflag;
+		up->encap_type = si->cpt_udp_encap;
+		up->len = si->cpt_udp_len;
+	}
+
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		memcpy(&np->saddr, si->cpt_saddr6, 16);
+		memcpy(&np->rcv_saddr, si->cpt_rcv_saddr6, 16);
+		memcpy(&np->daddr, si->cpt_daddr6, 16);
+		np->flow_label = si->cpt_flow_label6;
+		np->frag_size = si->cpt_frag_size6;
+		np->hop_limit = si->cpt_hop_limit6;
+		np->mcast_hops = si->cpt_mcast_hops6;
+		np->mcast_oif = si->cpt_mcast_oif6;
+		np->rxopt.all = si->cpt_rxopt6;
+		np->mc_loop = si->cpt_mc_loop6;
+		np->recverr = si->cpt_recverr6;
+		np->sndflow = si->cpt_sndflow6;
+		np->pmtudisc = si->cpt_pmtudisc6;
+		np->ipv6only = si->cpt_ipv6only6;
+
+		if (si->cpt_mapped) {
+			struct ve_struct *ve;
+
+			ve = get_exec_env();
+			if (ve->ipv6_ops == NULL) {
+				eprintk_ctx("ipv6 socket with no ipv6 module?!\n");
+				return -EPFNOSUPPORT;
+			}
+
+			ve->ipv6_ops->make_sk_mapped(sk);
+		}
+	}
+
+	err = restore_queues(sk, si, pos, ctx);
+
+	if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP) {
+		ret_err = err;
+		rst_socket_tcp(si, pos, sk, ctx);
+	}
+
+	release_sock(sk);
+	return ret_err;
+}
+
+int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *ctx)
+{
+	struct request_sock *req;
+
+	if (lsk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	req = reqsk_alloc(&tcp_request_sock_ops);
+	if (!req)
+		return -ENOMEM;
+
+	sk->sk_socket = NULL;
+	sk->sk_sleep = NULL;
+	inet_csk_reqsk_queue_add(lsk, req, sk);
+	return 0;
+}
+
+static __inline__ u32 __tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
+{
+	return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
+}
+
+int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si,
+			      loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end = pos + si->cpt_next;
+
+	pos += si->cpt_hdrlen;
+
+	lock_sock(sk);
+	while (pos < end) {
+		struct cpt_openreq_image oi;
+
+		err = rst_get_object(CPT_OBJ_OPENREQ, pos, &oi, ctx);
+		if (err) {
+			err = rst_sock_attr(&pos, sk, ctx);
+			if (err) {
+				release_sock(sk);
+				return err;
+			}
+
+			continue;
+		}
+
+		if (oi.cpt_object == CPT_OBJ_OPENREQ) {
+			struct request_sock *req;
+			struct ve_struct *ve;
+
+			ve = get_exec_env();
+
+			if (oi.cpt_family == AF_INET6 &&
+			    sk->sk_family != AF_INET6)
+				/* related to non initialized cpt_family bug */
+				goto next;
+
+			if (oi.cpt_family == AF_INET6) {
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+				if (ve->ipv6_ops != NULL)
+					req = ve->ipv6_ops->reqsk_alloc();
+				else
+#endif
+				{
+					release_sock(sk);
+					return -EPFNOSUPPORT;
+				}
+			} else
+				req = reqsk_alloc(&tcp_request_sock_ops);
+
+			if (req == NULL) {
+				release_sock(sk);
+				return -ENOMEM;
+			}
+
+			tcp_rsk(req)->rcv_isn = oi.cpt_rcv_isn;
+			tcp_rsk(req)->snt_isn = oi.cpt_snt_isn;
+			inet_rsk(req)->rmt_port = oi.cpt_rmt_port;
+			req->mss = oi.cpt_mss;
+			req->retrans = oi.cpt_retrans;
+			inet_rsk(req)->snd_wscale = oi.cpt_snd_wscale;
+			inet_rsk(req)->rcv_wscale = oi.cpt_rcv_wscale;
+			inet_rsk(req)->tstamp_ok = oi.cpt_tstamp_ok;
+			inet_rsk(req)->sack_ok = oi.cpt_sack_ok;
+			inet_rsk(req)->wscale_ok = oi.cpt_wscale_ok;
+			inet_rsk(req)->ecn_ok = oi.cpt_ecn_ok;
+			inet_rsk(req)->acked = oi.cpt_acked;
+			inet_rsk(req)->opt = NULL;
+			req->window_clamp = oi.cpt_window_clamp;
+			req->rcv_wnd = oi.cpt_rcv_wnd;
+			req->ts_recent = oi.cpt_ts_recent;
+			req->expires = jiffies_import(oi.cpt_expires);
+			req->sk = NULL;
+			req->secid = 0;
+			req->peer_secid = 0;
+
+			if (oi.cpt_family == AF_INET6) {
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+				inet6_rsk(req)->pktopts = NULL;
+				memcpy(&inet6_rsk(req)->loc_addr, oi.cpt_loc_addr, 16);
+				memcpy(&inet6_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 16);
+				inet6_rsk(req)->iif = oi.cpt_iif;
+				ve->ipv6_ops->reqsk_queue(sk, req, TCP_TIMEOUT_INIT);
+#endif
+			} else {
+				memcpy(&inet_rsk(req)->loc_addr, oi.cpt_loc_addr, 4);
+				memcpy(&inet_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 4);
+				inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+			}
+		}
+next:
+		pos += oi.cpt_next;
+	}
+	release_sock(sk);
+	return 0;
+}
+
+int rst_sk_mcfilter_in(struct sock *sk, struct cpt_sockmc_image *v,
+		       loff_t pos, cpt_context_t *ctx)
+{
+	struct ip_mreqn imr;
+
+	if (v->cpt_mode || v->cpt_next != v->cpt_hdrlen) {
+		eprintk_ctx("IGMPv3 is still not supported\n");
+		return -EINVAL;
+	}
+
+	memset(&imr, 0, sizeof(imr));
+	imr.imr_ifindex = v->cpt_ifindex;
+	imr.imr_multiaddr.s_addr = v->cpt_mcaddr[0];
+	return ip_mc_join_group(sk, &imr);
+}
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+int rst_sk_mcfilter_in6(struct sock *sk, struct cpt_sockmc_image *v,
+			loff_t pos, cpt_context_t *ctx)
+{
+	struct ve_struct *ve;
+
+	if (v->cpt_mode || v->cpt_next != v->cpt_hdrlen) {
+		eprintk_ctx("IGMPv3 is still not supported\n");
+		return -EINVAL;
+	}
+
+	ve = get_exec_env();
+	if (ve->ipv6_ops == NULL)
+		return -EPFNOSUPPORT;
+
+	return ve->ipv6_ops->sock_mc_join(sk, v->cpt_ifindex,
+			(struct in6_addr*)v->cpt_mcaddr);
+}
+#endif
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_sysvipc.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_sysvipc.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_sysvipc.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_sysvipc.c	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,584 @@
+/*
+ *
+ *  kernel/cpt/rst_sysvipc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+#include <linux/msg.h>
+/* FIXME. x86_64 has asm/ipc.h forgotten? */
+#include <asm-generic/ipc.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <ub/ub_mem.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+
+struct _warg {
+		struct file		*file;
+		struct cpt_sysvshm_image	*v;
+};
+
+static int fixup_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	struct _warg *warg = arg;
+
+	if (shp->shm_file != warg->file)
+		return 0;
+	if (shp->shm_nattch)
+		return -EEXIST;
+
+	shp->shm_perm.uid = warg->v->cpt_uid;
+	shp->shm_perm.gid = warg->v->cpt_gid;
+	shp->shm_perm.cuid = warg->v->cpt_cuid;
+	shp->shm_perm.cgid = warg->v->cpt_cgid;
+	shp->shm_perm.mode = warg->v->cpt_mode;
+
+	shp->shm_atim = warg->v->cpt_atime;
+	shp->shm_dtim = warg->v->cpt_dtime;
+	shp->shm_ctim = warg->v->cpt_ctime;
+	shp->shm_cprid = warg->v->cpt_creator;
+	shp->shm_lprid = warg->v->cpt_last;
+
+	/* TODO: fix shp->mlock_user? */
+	return 1;
+}
+
+static int fixup_shm(struct file *file, struct cpt_sysvshm_image *v)
+{
+	struct _warg warg;
+
+	warg.file = file;
+	warg.v = v;
+
+	return sysvipc_walk_shm(fixup_one_shm, &warg);
+}
+
+static int
+restore_data_chunk(struct file *file, loff_t pos, struct cpt_page_block * pgb,
+		   cpt_context_t *ctx)
+{
+	loff_t ipos = pos + pgb->cpt_hdrlen;
+	loff_t opos = pgb->cpt_start;
+	int count = pgb->cpt_end-pgb->cpt_start;
+	int err;
+
+	while (count > 0) {
+		mm_segment_t oldfs;
+		int copy = count;
+
+		if (copy > PAGE_SIZE)
+			copy = PAGE_SIZE;
+		(void)cpt_get_buf(ctx);
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
+		set_fs(oldfs);
+		if (err) {
+			__cpt_release_buf(ctx);
+			return err;
+		}
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		ipos += copy;
+		err = file->f_dentry->d_inode->i_fop->write(file, ctx->tmpbuf,
+							    copy, &opos);
+		set_fs(oldfs);
+		__cpt_release_buf(ctx);
+		if (err != copy) {
+			eprintk_ctx("write() failure\n");
+			if (err >= 0)
+				err = -EIO;
+			return err;
+		}
+		count -= copy;
+	}
+	return 0;
+}
+
+static int fixup_shm_data(struct file *file, loff_t pos, loff_t end,
+			  struct cpt_context *ctx)
+{
+	struct cpt_page_block pgb;
+
+	if (file->f_dentry->d_inode->i_fop->write == NULL) {
+		eprintk_ctx("No TMPFS? Cannot restore content of SYSV SHM\n");
+		return -EINVAL;
+	}
+
+	while (pos < end) {
+		int err;
+
+		err = rst_get_object(-1, pos, &pgb, ctx);
+		if (err)
+			return err;
+		dprintk_ctx("restoring SHM block: %08x-%08x\n",
+		       (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
+		err = -EINVAL;
+		switch (pgb.cpt_object) {
+		case CPT_OBJ_PAGES:
+			err = restore_data_chunk(file, pos, &pgb, ctx);
+			if (err)
+				return err;
+			break;
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+		case CPT_OBJ_ITERPAGES:
+		case CPT_OBJ_ITERYOUNGPAGES:
+			err = rst_iter_chunk(file, pos, &pgb, ctx);
+			if (err)
+				return err;
+			break;
+#endif
+		default:
+			return -EINVAL;
+		}
+		pos += pgb.cpt_next;
+	}
+	return 0;
+}
+
+struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx)
+{
+	struct file *file;
+	int err;
+	loff_t dpos, epos;
+	union {
+		struct cpt_file_image		fi;
+		struct cpt_sysvshm_image	shmi;
+		struct cpt_inode_image 		ii;
+	} u;
+
+	err = rst_get_object(CPT_OBJ_FILE, pos, &u.fi, ctx);
+	if (err < 0)
+		goto err_out;
+	pos = u.fi.cpt_inode;
+	err = rst_get_object(CPT_OBJ_INODE, pos, &u.ii, ctx);
+	if (err < 0)
+		goto err_out;
+	dpos = pos + u.ii.cpt_hdrlen;
+	epos = pos + u.ii.cpt_next;
+	err = rst_get_object(CPT_OBJ_SYSV_SHM, pos + u.ii.cpt_hdrlen, &u.shmi, ctx);
+	if (err < 0)
+		goto err_out;
+	dpos += u.shmi.cpt_next;
+
+	file = sysvipc_setup_shm(u.shmi.cpt_key, u.shmi.cpt_id, 
+				 u.shmi.cpt_segsz, u.shmi.cpt_mode);
+	if (!IS_ERR(file)) {
+		err = fixup_shm(file, &u.shmi);
+		if (err != -EEXIST && dpos < epos) {
+			err = fixup_shm_data(file, dpos, epos, ctx);
+			if (err)
+				goto err_put;
+		}
+	}
+
+	return file;
+
+err_put:
+	fput(file);
+err_out:
+	return ERR_PTR(err);
+}
+
+static int attach_one_undo(int semid, struct sem_array *sma, void *arg)
+{
+	struct sem_undo *su = arg;
+	struct sem_undo_list *undo_list = current->sysvsem.undo_list;
+
+	if (semid != su->semid)
+		return 0;
+
+	su->proc_next = undo_list->proc_list;
+	undo_list->proc_list = su;
+
+	su->id_next = sma->undo;
+	sma->undo = su;
+
+	return 1;
+}
+
+static int attach_undo(struct sem_undo *su)
+{
+	return sysvipc_walk_sem(attach_one_undo, su);
+}
+
+static int do_rst_semundo(struct cpt_object_hdr *sui, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	struct sem_undo_list *undo_list;
+
+	if (current->sysvsem.undo_list) {
+		eprintk_ctx("Funny undo_list\n");
+		return 0;
+	}
+
+	undo_list = ub_kmalloc(sizeof(struct sem_undo_list), GFP_KERNEL);
+	if (undo_list == NULL)
+		return -ENOMEM;
+	memset(undo_list, 0, sizeof(struct sem_undo_list));
+	atomic_set(&undo_list->refcnt, 1);
+	spin_lock_init(&undo_list->lock);
+	current->sysvsem.undo_list = undo_list;
+
+	if (sui->cpt_next > sui->cpt_hdrlen) {
+		loff_t offset = pos + sui->cpt_hdrlen;
+		do {
+			struct sem_undo *new;
+			struct cpt_sysvsem_undo_image spi;
+			err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO_REC, offset, &spi, ctx);
+			if (err)
+				goto out;
+			new = ub_kmalloc(sizeof(struct sem_undo) +
+					 sizeof(short)*spi.cpt_nsem, GFP_KERNEL);
+			if (!new) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*spi.cpt_nsem);
+			new->semadj = (short *) &new[1];
+			new->semid = spi.cpt_id;
+			err = ctx->pread(new->semadj, spi.cpt_nsem*sizeof(short), ctx, offset + spi.cpt_hdrlen);
+			if (err) {
+				kfree(new);
+				goto out;
+			}
+			err = attach_undo(new);
+			if (err <= 0) {
+				if (err == 0)
+					err = -ENOENT;
+				kfree(new);
+				goto out;
+			}
+			offset += spi.cpt_next;
+		} while (offset < pos + sui->cpt_next);
+	}
+	err = 0;
+
+out:
+	return err;
+}
+
+__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+#if 0
+	if (ti->cpt_sysvsem_undo == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo))
+		flag |= CLONE_SYSVSEM;
+#endif
+	return flag;
+}
+
+int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err;
+	struct sem_undo_list *f = current->sysvsem.undo_list;
+	cpt_object_t *obj;
+	struct cpt_object_hdr sui;
+
+	if (ti->cpt_sysvsem_undo == CPT_NULL) {
+		exit_sem(current);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			exit_sem(current);
+			f = obj->o_obj;
+			atomic_inc(&f->refcnt);
+			current->sysvsem.undo_list = f;
+		}
+		return 0;
+	}
+
+	if ((err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, &sui, ctx)) != 0)
+		goto out;
+
+	if ((err = do_rst_semundo(&sui, ti->cpt_sysvsem_undo, ctx)) != 0)
+		goto out;
+
+	err = -ENOMEM;
+	obj = cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, f, ctx);
+	if (obj) {
+		err = 0;
+		cpt_obj_setpos(obj, ti->cpt_sysvsem_undo, ctx);
+	}
+
+	return 0;
+
+out:
+	return err;
+}
+
+struct _sarg {
+	int semid;
+	struct cpt_sysvsem_image	*v;
+	__u32				*arr;
+};
+
+static int fixup_one_sem(int semid, struct sem_array *sma, void *arg)
+{
+	struct _sarg *warg = arg;
+
+	if (semid != warg->semid)
+		return 0;
+
+	sma->sem_perm.uid = warg->v->cpt_uid;
+	sma->sem_perm.gid = warg->v->cpt_gid;
+	sma->sem_perm.cuid = warg->v->cpt_cuid;
+	sma->sem_perm.cgid = warg->v->cpt_cgid;
+	sma->sem_perm.mode = warg->v->cpt_mode;
+	sma->sem_perm.seq = warg->v->cpt_seq;
+
+	sma->sem_ctime = warg->v->cpt_ctime;
+	sma->sem_otime = warg->v->cpt_otime;
+	memcpy(sma->sem_base, warg->arr, sma->sem_nsems*8);
+	return 1;
+}
+
+static int fixup_sem(int semid, struct cpt_sysvsem_image *v, __u32 *arr)
+{
+	struct _sarg warg;
+
+	warg.semid = semid;
+	warg.v = v;
+	warg.arr = arr;
+
+	return sysvipc_walk_sem(fixup_one_sem, &warg);
+}
+
+
+static int restore_sem(loff_t pos, struct cpt_sysvsem_image *si,
+		       struct cpt_context *ctx)
+{
+	int err;
+	__u32 *arr;
+	int nsems = (si->cpt_next - si->cpt_hdrlen)/8;
+
+	arr = kmalloc(nsems*8, GFP_KERNEL);
+	if (!arr)
+		return -ENOMEM;
+
+	err = ctx->pread(arr, nsems*8, ctx, pos+si->cpt_hdrlen);
+	if (err)
+		goto out;
+	err = sysvipc_setup_sem(si->cpt_key, si->cpt_id, nsems, si->cpt_mode);
+	if (err < 0) {
+		eprintk_ctx("SEM 3\n");
+		goto out;
+	}
+	err = fixup_sem(si->cpt_id, si, arr);
+	if (err == 0)
+		err = -ESRCH;
+	if (err > 0)
+		err = 0;
+out:
+	kfree(arr);
+	return err;
+}
+
+static int rst_sysv_sem(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_SYSV_SEM];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_sysvsem_image sbuf;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_SYSV_SEM || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int err;
+		err = rst_get_object(CPT_OBJ_SYSV_SEM, sec, &sbuf, ctx);
+		if (err)
+			return err;
+		err = restore_sem(sec, &sbuf, ctx);
+		if (err)
+			return err;
+		sec += sbuf.cpt_next;
+	}
+	return 0;
+}
+
+struct _marg {
+	int				msqid;
+	struct cpt_sysvmsg_image	*v;
+	struct msg_queue		*m;
+};
+
+static int fixup_one_msg(int msqid, struct msg_queue *msq, void *arg)
+{
+	struct _marg *warg = arg;
+
+	if (msqid != warg->msqid)
+		return 0;
+
+	msq->q_perm.uid = warg->v->cpt_uid;
+	msq->q_perm.gid = warg->v->cpt_gid;
+	msq->q_perm.cuid = warg->v->cpt_cuid;
+	msq->q_perm.cgid = warg->v->cpt_cgid;
+	msq->q_perm.mode = warg->v->cpt_mode;
+	msq->q_perm.seq = warg->v->cpt_seq;
+
+	msq->q_stime = warg->v->cpt_stime;
+	msq->q_rtime = warg->v->cpt_rtime;
+	msq->q_ctime = warg->v->cpt_ctime;
+	msq->q_lspid = warg->v->cpt_last_sender;
+	msq->q_lrpid = warg->v->cpt_last_receiver;
+	msq->q_qbytes = warg->v->cpt_qbytes;
+
+	warg->m = msq;
+	return 1;
+}
+
+struct _larg
+{
+	cpt_context_t * ctx;
+	loff_t		pos;
+};
+
+static int do_load_msg(void * dst, int len, int offset, void * data)
+{
+	struct _larg * arg = data;
+	return arg->ctx->pread(dst, len, arg->ctx, arg->pos + offset);
+}
+
+static int fixup_msg(int msqid, struct cpt_sysvmsg_image *v, loff_t pos,
+		     cpt_context_t * ctx)
+{
+	int err;
+	struct _marg warg;
+	loff_t endpos = pos + v->cpt_next;
+	struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+
+	pos += v->cpt_hdrlen;
+
+	warg.msqid = msqid;
+	warg.v = v;
+
+	err = sysvipc_walk_msg(fixup_one_msg, &warg);
+	if (err <= 0)
+		return err;
+
+	while (pos < endpos) {
+		struct cpt_sysvmsg_msg_image mi;
+		struct msg_msg *m;
+		struct _larg data = {
+			.ctx = ctx
+		};
+
+		err = rst_get_object(CPT_OBJ_SYSVMSG_MSG, pos, &mi, ctx);
+		if (err)
+			return err;
+		data.pos = pos + mi.cpt_hdrlen;
+		m = sysv_msg_load(do_load_msg, mi.cpt_size, &data);
+		if (IS_ERR(m))
+			return PTR_ERR(m);
+		m->m_type = mi.cpt_type;
+		m->m_ts = mi.cpt_size;
+		list_add_tail(&m->m_list, &warg.m->q_messages);
+		warg.m->q_cbytes += m->m_ts;
+		warg.m->q_qnum++;
+		atomic_add(m->m_ts, &ns->msg_bytes);
+		atomic_inc(&ns->msg_hdrs);
+			
+		pos += mi.cpt_next;
+	}
+	return 1;
+}
+
+static int restore_msg(loff_t pos, struct cpt_sysvmsg_image *si,
+		       struct cpt_context *ctx)
+{
+	int err;
+
+	err = sysvipc_setup_msg(si->cpt_key, si->cpt_id, si->cpt_mode);
+	if (err < 0) {
+		eprintk_ctx("MSG 3\n");
+		goto out;
+	}
+	err = fixup_msg(si->cpt_id, si, pos, ctx);
+	if (err == 0)
+		err = -ESRCH;
+	if (err > 0)
+		err = 0;
+out:
+	return err;
+}
+
+static int rst_sysv_msg(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_SYSV_MSG];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_sysvmsg_image sbuf;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_SYSV_MSG || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int err;
+		err = rst_get_object(CPT_OBJ_SYSVMSG, sec, &sbuf, ctx);
+		if (err)
+			return err;
+		err = restore_msg(sec, &sbuf, ctx);
+		if (err)
+			return err;
+		sec += sbuf.cpt_next;
+	}
+	return 0;
+}
+
+
+int rst_sysv_ipc(struct cpt_context *ctx)
+{
+	int err;
+
+	err = rst_sysv_sem(ctx);
+	if (!err)
+		err = rst_sysv_msg(ctx);
+
+	return err;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_tty.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_tty.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_tty.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_tty.c	2017-01-13 08:40:25.000000000 -0500
@@ -0,0 +1,380 @@
+/*
+ *
+ *  kernel/cpt/rst_tty.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/mount.h>
+#include <linux/tty.h>
+#include <linux/vmalloc.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+
+static int pty_setup(struct tty_struct *stty, loff_t pos,
+		     struct cpt_tty_image *pi, struct cpt_context *ctx)
+{
+	unsigned long flags;
+
+	stty->pgrp = -1;
+	stty->session = 0;
+	stty->packet = pi->cpt_packet;
+	stty->stopped = pi->cpt_stopped;
+	stty->hw_stopped = pi->cpt_hw_stopped;
+	stty->flow_stopped = pi->cpt_flow_stopped;
+#define DONOT_CHANGE ((1<<TTY_CHARGED)|(1<<TTY_CLOSING)|(1<<TTY_LDISC))
+	flags = stty->flags & DONOT_CHANGE;
+	stty->flags = flags | (pi->cpt_flags & ~DONOT_CHANGE);
+	stty->ctrl_status = pi->cpt_ctrl_status;
+	stty->winsize.ws_row = pi->cpt_ws_row;
+	stty->winsize.ws_col = pi->cpt_ws_col;
+	stty->winsize.ws_ypixel = pi->cpt_ws_prow;
+	stty->winsize.ws_xpixel = pi->cpt_ws_pcol;
+	stty->canon_column = pi->cpt_canon_column;
+	stty->column = pi->cpt_column;
+	stty->raw = pi->cpt_raw;
+	stty->real_raw = pi->cpt_real_raw;
+	stty->erasing = pi->cpt_erasing;
+	stty->lnext = pi->cpt_lnext;
+	stty->icanon = pi->cpt_icanon;
+	stty->closing = pi->cpt_closing;
+	stty->minimum_to_wake = pi->cpt_minimum_to_wake;
+
+	stty->termios->c_iflag = pi->cpt_c_iflag;
+	stty->termios->c_oflag = pi->cpt_c_oflag;
+	stty->termios->c_lflag = pi->cpt_c_lflag;
+	stty->termios->c_cflag = pi->cpt_c_cflag;
+	memcpy(&stty->termios->c_cc, &pi->cpt_c_cc, NCCS);
+	memcpy(stty->read_flags, pi->cpt_read_flags, sizeof(stty->read_flags));
+
+	if (pi->cpt_next > pi->cpt_hdrlen) {
+		int err;
+		struct cpt_obj_bits b;
+		err = rst_get_object(CPT_OBJ_BITS, pos + pi->cpt_hdrlen, &b, ctx);
+		if (err)
+			return err;
+		if (b.cpt_size == 0)
+			return 0;
+		err = ctx->pread(stty->read_buf, b.cpt_size, ctx, pos + pi->cpt_hdrlen + b.cpt_hdrlen);
+		if (err)
+			return err;
+
+		spin_lock_irq(&stty->read_lock);
+		stty->read_tail = 0;
+		stty->read_cnt = b.cpt_size;
+		stty->read_head = b.cpt_size;
+		stty->canon_head = stty->read_tail + pi->cpt_canon_head;
+		stty->canon_data = pi->cpt_canon_data;
+		spin_unlock_irq(&stty->read_lock);
+	}
+
+	return 0;
+}
+
+/* Find slave/master tty in image, when we already know master/slave.
+ * It might be optimized, of course. */
+static loff_t find_pty_pair(struct tty_struct *stty, loff_t pos, struct cpt_tty_image *pi, struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_TTY];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_tty_image *pibuf;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return CPT_NULL;
+	if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
+		return CPT_NULL;
+	pibuf = kmalloc(sizeof(*pibuf), GFP_KERNEL);
+	if (pibuf == NULL) {
+		eprintk_ctx("cannot allocate buffer\n");
+		return CPT_NULL;
+	}
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx))
+			return CPT_NULL;
+		if (pibuf->cpt_index == pi->cpt_index &&
+		    !((pi->cpt_drv_flags^pibuf->cpt_drv_flags)&TTY_DRIVER_DEVPTS_MEM) &&
+		    pos != sec) {
+			pty_setup(stty, sec, pibuf, ctx);
+			return sec;
+		}
+		sec += pibuf->cpt_next;
+	}
+	kfree(pibuf);
+	return CPT_NULL;
+}
+
+static int fixup_tty_attrs(struct cpt_inode_image *ii, struct file *master,
+			   struct cpt_context *ctx)
+{
+	int err;
+	struct iattr newattrs;
+	struct dentry *d = master->f_dentry;
+
+	newattrs.ia_valid = ATTR_UID|ATTR_GID|ATTR_MODE;
+	newattrs.ia_uid = ii->cpt_uid;
+	newattrs.ia_gid = ii->cpt_gid;
+	newattrs.ia_mode = ii->cpt_mode;
+
+	mutex_lock(&d->d_inode->i_mutex);
+	err = notify_change(d, &newattrs);
+	mutex_unlock(&d->d_inode->i_mutex);
+
+	return err;
+}
+
+/* NOTE: "portable", but ugly thing. To allocate /dev/pts/N, we open
+ * /dev/ptmx until we get pty with desired index.
+ */
+
+struct file *ptmx_open(int index, unsigned int flags)
+{
+	struct file *file;
+	struct file **stack = NULL;
+	int depth = 0;
+
+	for (;;) {
+		struct tty_struct *tty;
+
+		file = filp_open("/dev/ptmx", flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+		if (IS_ERR(file))
+			break;
+		tty = file->private_data;
+		if (tty->index == index)
+			break;
+
+		if (depth == PAGE_SIZE/sizeof(struct file *)) {
+			fput(file);
+			file = ERR_PTR(-EBUSY);
+			break;
+		}
+		if (stack == NULL) {
+			stack = (struct file **)__get_free_page(GFP_KERNEL);
+			if (!stack) {
+				fput(file);
+				file = ERR_PTR(-ENOMEM);
+				break;
+			}
+		}
+		stack[depth] = file;
+		depth++;
+	}
+	while (depth > 0) {
+		depth--;
+		fput(stack[depth]);
+	}
+	if (stack)
+		free_page((unsigned long)stack);
+	return file;
+}
+
+
+struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii,
+			   unsigned flags, struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct file *master, *slave;
+	struct tty_struct *stty;
+	struct cpt_tty_image *pi;
+	static char *a = "pqrstuvwxyzabcde";
+	static char *b = "0123456789abcdef";
+	char pairname[16];
+	unsigned master_flags, slave_flags;
+
+	if (fi->cpt_priv == CPT_NULL)
+		return ERR_PTR(-EINVAL);
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, fi->cpt_priv, ctx);
+	if (obj && obj->o_parent) {
+		dprintk_ctx("obtained pty as pair to existing\n");
+		master = obj->o_parent;
+		stty = master->private_data;
+
+		if (stty->driver->subtype == PTY_TYPE_MASTER &&
+		    (stty->driver->flags&TTY_DRIVER_DEVPTS_MEM)) {
+			wprintk_ctx("cloning ptmx\n");
+			get_file(master);
+			return master;
+		}
+
+		master = dentry_open(dget(master->f_dentry),
+				     mntget(master->f_vfsmnt), flags);
+		if (!IS_ERR(master)) {
+			stty = master->private_data;
+			if (stty->driver->subtype != PTY_TYPE_MASTER)
+				fixup_tty_attrs(ii, master, ctx);
+		}
+		return master;
+	}
+
+	pi = cpt_get_buf(ctx);
+	err = rst_get_object(CPT_OBJ_TTY, fi->cpt_priv, pi, ctx);
+	if (err) {
+		cpt_release_buf(ctx);
+		return ERR_PTR(err);
+	}
+
+	master_flags = slave_flags = 0;
+	if (pi->cpt_drv_subtype == PTY_TYPE_MASTER)
+		master_flags = flags;
+	else
+		slave_flags = flags;
+
+	/*
+	 * Open pair master/slave.
+	 */
+	if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM) {
+		master = ptmx_open(pi->cpt_index, master_flags);
+	} else {
+		sprintf(pairname, "/dev/pty%c%c", a[pi->cpt_index/16], b[pi->cpt_index%16]);
+		master = filp_open(pairname, master_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+	}
+	if (IS_ERR(master)) {
+		eprintk_ctx("filp_open master: %Ld %ld\n", (long long)fi->cpt_priv, PTR_ERR(master));
+		cpt_release_buf(ctx);
+		return master;
+	}
+	stty = master->private_data;
+	clear_bit(TTY_PTY_LOCK, &stty->flags);
+	if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM)
+		sprintf(pairname, "/dev/pts/%d", stty->index);
+	else
+		sprintf(pairname, "/dev/tty%c%c", a[stty->index/16], b[stty->index%16]);
+	slave = filp_open(pairname, slave_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+	if (IS_ERR(slave)) {
+		eprintk_ctx("filp_open slave %s: %ld\n", pairname, PTR_ERR(slave));
+		fput(master);
+		cpt_release_buf(ctx);
+		return slave;
+	}
+
+	if (pi->cpt_drv_subtype != PTY_TYPE_MASTER)
+		fixup_tty_attrs(ii, slave, ctx);
+
+	cpt_object_add(CPT_OBJ_TTY, master->private_data, ctx);
+	cpt_object_add(CPT_OBJ_TTY, slave->private_data, ctx);
+	cpt_object_add(CPT_OBJ_FILE, master, ctx);
+	cpt_object_add(CPT_OBJ_FILE, slave, ctx);
+
+	if (pi->cpt_drv_subtype == PTY_TYPE_MASTER) {
+		loff_t pos;
+		obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
+		obj->o_parent = master;
+		cpt_obj_setpos(obj, fi->cpt_priv, ctx);
+		pty_setup(stty, fi->cpt_priv, pi, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
+		obj->o_parent = slave;
+		pos = find_pty_pair(stty->link, fi->cpt_priv, pi, ctx);
+		cpt_obj_setpos(obj, pos, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_FILE, slave, ctx);
+		cpt_obj_setpos(obj, CPT_NULL, ctx);
+		get_file(master);
+		cpt_release_buf(ctx);
+		return master;
+	} else {
+		loff_t pos;
+		obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
+		obj->o_parent = slave;
+		cpt_obj_setpos(obj, fi->cpt_priv, ctx);
+		pty_setup(stty->link, fi->cpt_priv, pi, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
+		obj->o_parent = master;
+		pos = find_pty_pair(stty, fi->cpt_priv, pi, ctx);
+		cpt_obj_setpos(obj, pos, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_FILE, master, ctx);
+		cpt_obj_setpos(obj, CPT_NULL, ctx);
+		get_file(slave);
+		cpt_release_buf(ctx);
+		return slave;
+	}
+}
+
+int rst_tty_jobcontrol(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_TTY];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		cpt_object_t *obj;
+		struct cpt_tty_image *pibuf = cpt_get_buf(ctx);
+
+		if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx)) {
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, sec, ctx);
+		if (obj) {
+			struct tty_struct *stty = obj->o_obj;
+			if ((int)pibuf->cpt_pgrp > 0) {
+				stty->pgrp = vpid_to_pid(pibuf->cpt_pgrp);
+				if (stty->pgrp == -1)
+					dprintk_ctx("unknown tty pgrp %d\n", pibuf->cpt_pgrp);
+			} else if (pibuf->cpt_pgrp) {
+				stty->pgrp = alloc_pidmap();
+				if (stty->pgrp < 0) {
+					eprintk_ctx("cannot allocate stray tty->pgrp");
+					cpt_release_buf(ctx);
+					return -EINVAL;
+				}
+				free_pidmap(stty->pgrp);
+			}
+			if ((int)pibuf->cpt_session > 0) {
+				int sess;
+				sess = vpid_to_pid(pibuf->cpt_session);
+				if (sess == -1) {
+					dprintk_ctx("unknown tty session %d\n", pibuf->cpt_session);
+				} else if (stty->session <= 0) {
+					stty->session = sess;
+				} else if (stty->session != sess) {
+					wprintk_ctx("tty session mismatch 2\n");
+				}
+			}
+		}
+		sec += pibuf->cpt_next;
+		cpt_release_buf(ctx);
+	}
+	return 0;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_ubc.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_ubc.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_ubc.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_ubc.c	2017-01-13 08:40:26.000000000 -0500
@@ -0,0 +1,142 @@
+/*
+ *
+ *  kernel/cpt/rst_ubc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/types.h>
+#include <ub/beancounter.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, pos, ctx);
+	if (obj == NULL) {
+		eprintk("RST: unknown ub @%Ld\n", (long long)pos);
+		return get_beancounter(get_exec_ub());
+	}
+	return get_beancounter(obj->o_obj);
+}
+
+void copy_one_ubparm(struct ubparm *from, struct ubparm *to, int bc_parm_id)
+{
+	to[bc_parm_id].barrier = from[bc_parm_id].barrier;
+	to[bc_parm_id].limit = from[bc_parm_id].limit;
+}
+
+void set_one_ubparm_to_max(struct ubparm *ubprm, int bc_parm_id)
+{
+	ubprm[bc_parm_id].barrier = UB_MAXVALUE;
+	ubprm[bc_parm_id].limit = UB_MAXVALUE;
+}
+
+static void restore_one_bc_parm(struct cpt_ubparm *dmp, struct ubparm *prm,
+		int held)
+{
+	prm->barrier = (dmp->barrier == CPT_NULL ? UB_MAXVALUE : dmp->barrier);
+	prm->limit = (dmp->limit == CPT_NULL ? UB_MAXVALUE : dmp->limit);
+	if (held)
+		prm->held = dmp->held;
+	prm->maxheld = dmp->maxheld;
+	prm->minheld = dmp->minheld;
+	prm->failcnt = dmp->failcnt;
+}
+
+static int restore_one_bc(struct cpt_beancounter_image *v,
+		cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct user_beancounter *bc;
+	cpt_object_t *pobj;
+	int resources, i;
+
+	if (v->cpt_parent != CPT_NULL) {
+		pobj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, v->cpt_parent, ctx);
+		if (pobj == NULL)
+			return -ESRCH;
+		bc = get_subbeancounter_byid(pobj->o_obj, v->cpt_id, 1);
+	} else {
+		bc = get_exec_ub();
+		while (bc->parent)
+			bc = bc->parent;
+		get_beancounter(bc);
+	}
+	if (bc == NULL)
+		return -ENOMEM;
+	obj->o_obj = bc;
+
+	if (ctx->image_version < CPT_VERSION_18 &&
+			CPT_VERSION_MINOR(ctx->image_version) < 1)
+		goto out;
+
+	if (v->cpt_content == CPT_CONTENT_ARRAY)
+		resources = v->cpt_ub_resources;
+	else
+		resources = UB_RESOURCES_COMPAT;
+
+	if (resources > UB_RESOURCES)
+		return -EINVAL;
+
+	for (i = 0; i < resources; i++) {
+		restore_one_bc_parm(v->cpt_parms + i * 2, bc->ub_parms + i, 0);
+		restore_one_bc_parm(v->cpt_parms + i * 2 + 1,
+				bc->ub_store + i, 1);
+	}
+
+out:
+	if (!bc->parent)
+		for (i = 0; i < UB_RESOURCES; i++)
+			copy_one_ubparm(bc->ub_parms, ctx->saved_ubc, i);
+
+	return 0;
+}
+
+int rst_undump_ubc(struct cpt_context *ctx)
+{
+	loff_t start, end;
+	struct cpt_beancounter_image *v;
+	cpt_object_t *obj;
+	int err;
+
+	err = rst_get_section(CPT_SECT_UBC, ctx, &start, &end);
+	if (err)
+		return err;
+
+	while (start < end) {
+		v = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_UBC, start, v, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		cpt_obj_setpos(obj, start, ctx);
+		intern_cpt_object(CPT_OBJ_UBC, obj, ctx);
+
+		err = restore_one_bc(v, obj, ctx);
+
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+
+		start += v->cpt_next;
+	}
+	return 0;
+}
+
+void rst_finish_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_UBC)
+		put_beancounter(obj->o_obj);
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpt/rst_undump.c kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_undump.c
--- kernel-2.6.18-417.el5.orig/kernel/cpt/rst_undump.c	2017-01-13 08:40:25.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpt/rst_undump.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,993 @@
+/*
+ *
+ *  kernel/cpt/rst_undump.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/namespace.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/smp_lock.h>
+#include <linux/ve_proto.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <linux/compat.h>
+#include <linux/vzcalluser.h>
+#include <linux/posix-timers.h>
+#include <ub/beancounter.h>
+#ifdef CONFIG_X86
+#include <asm/desc.h>
+#endif
+#include <asm/unistd.h>
+#include <linux/nsproxy.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_process.h"
+#include "cpt_socket.h"
+#include "cpt_net.h"
+#include "cpt_ubc.h"
+#include "cpt_kernel.h"
+
+#ifdef CONFIG_IA32_EMULATION
+extern struct linux_binfmt elf32_format;
+#endif
+
+static int rst_utsname(cpt_context_t *ctx);
+
+
+struct thr_context {
+	struct completion init_complete;
+	struct completion task_done;
+	int error;
+	struct cpt_context *ctx;
+	cpt_object_t	*tobj;
+};
+
+static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx);
+
+static int vps_rst_veinfo(struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_veinfo_image *i;
+	struct ve_struct *ve;
+	struct timespec delta;
+	loff_t start, end;
+	struct ipc_namespace *ns;
+
+	err = rst_get_section(CPT_SECT_VEINFO, ctx, &start, &end);
+	if (err)
+		goto out;
+
+	i = cpt_get_buf(ctx);
+	memset(i, 0, sizeof(*i));
+	err = rst_get_object(CPT_OBJ_VEINFO, start, i, ctx);
+	if (err)
+		goto out_rel;
+
+	ve = get_exec_env();
+	ns = ve->ve_ns->ipc_ns;
+
+	/* Damn. Fatal mistake, these two values are size_t! */
+	ns->shm_ctlall = i->shm_ctl_all ? : 0xFFFFFFFFU;
+	ns->shm_ctlmax = i->shm_ctl_max ? : 0xFFFFFFFFU;
+	ns->shm_ctlmni = i->shm_ctl_mni;
+
+	ns->msg_ctlmax = i->msg_ctl_max;
+	ns->msg_ctlmni = i->msg_ctl_mni;
+	ns->msg_ctlmnb = i->msg_ctl_mnb;
+
+	BUILD_BUG_ON(sizeof(ns->sem_ctls) != sizeof(i->sem_ctl_arr));
+	ns->sem_ctls[0] = i->sem_ctl_arr[0];
+	ns->sem_ctls[1] = i->sem_ctl_arr[1];
+	ns->sem_ctls[2] = i->sem_ctl_arr[2];
+	ns->sem_ctls[3] = i->sem_ctl_arr[3];
+
+	cpt_timespec_import(&delta, i->start_timespec_delta);
+	set_normalized_timespec(&ve->start_timespec,
+			ve->start_timespec.tv_sec - delta.tv_sec,
+			ve->start_timespec.tv_nsec - delta.tv_nsec);
+	ve->start_jiffies -= i->start_jiffies_delta;
+	// // FIXME: what???
+	// // ve->start_cycles -= (s64)i->start_jiffies_delta * cycles_per_jiffy;
+
+	ctx->last_vpid = i->last_pid;
+	if (i->rnd_va_space)
+		ve->_randomize_va_space = i->rnd_va_space - 1;
+	if (i->vpid_max && i->vpid_max < PID_MAX_LIMIT)
+		ve->vpid_max = i->vpid_max;
+
+	err = 0;
+out_rel:
+	cpt_release_buf(ctx);
+out:
+	return err;
+}
+
+static int vps_rst_reparent_root(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err;
+	struct env_create_param3 param;
+
+	do_posix_clock_monotonic_gettime(&ctx->cpt_monotonic_time);
+	do_gettimespec(&ctx->delta_time);
+
+	set_normalized_timespec(&ctx->delta_time,
+				 ctx->delta_time.tv_sec - ctx->start_time.tv_sec,
+				 ctx->delta_time.tv_nsec - ctx->start_time.tv_nsec);
+	ctx->delta_nsec = (s64)ctx->delta_time.tv_sec*NSEC_PER_SEC + ctx->delta_time.tv_nsec;
+	if (ctx->delta_nsec < 0) {
+		wprintk_ctx("Wall time is behind source by %Ld ns, "
+			    "time sensitive applications can misbehave\n", (long long)-ctx->delta_nsec);
+	}
+
+        set_normalized_timespec(&ctx->cpt_monotonic_time,
+                                 ctx->cpt_monotonic_time.tv_sec - ctx->delta_time.tv_sec,
+                                 ctx->cpt_monotonic_time.tv_nsec - ctx->delta_time.tv_nsec);
+
+	memset(&param, 0, sizeof(param));
+	param.iptables_mask = ctx->iptables_mask;
+	param.feature_mask = ctx->features;
+
+	/* feature_mask is set as required - pretend we know everything */
+	param.known_features = (ctx->image_version < CPT_VERSION_18) ?
+		VE_FEATURES_OLD : ~(__u64)0;
+
+	err = real_env_create(ctx->ve_id, VE_CREATE|VE_LOCK|VE_EXCLUSIVE, 2,
+			&param, sizeof(param));
+	if (err < 0)
+		eprintk_ctx("real_env_create: %d\n", err);
+
+	get_exec_env()->jiffies_fixup =
+		(ctx->delta_time.tv_sec < 0 ?
+		 0 : timespec_to_jiffies(&ctx->delta_time)) -
+		(unsigned long)(get_jiffies_64() - ctx->virt_jiffies64);
+	dprintk_ctx("JFixup %ld %Ld\n", get_exec_env()->jiffies_fixup,
+		    (long long)ctx->delta_nsec);
+	return err < 0 ? err : 0;
+}
+
+static int hook(void *arg)
+{
+	struct thr_context *thr_ctx = arg;
+	struct cpt_context *ctx;
+	cpt_object_t *tobj;
+	struct cpt_task_image *ti;
+	int err = 0;
+	int exiting = 0;
+
+	current->state = TASK_UNINTERRUPTIBLE;
+	complete(&thr_ctx->init_complete);
+	schedule();
+
+	ctx = thr_ctx->ctx;
+	tobj = thr_ctx->tobj;
+	ti = tobj->o_image;
+
+	current->fs->umask = 0;
+
+	if (ti->cpt_pid == 1) {
+#ifdef CONFIG_USER_RESOURCE
+		struct user_beancounter *bc;
+#endif
+
+		err = vps_rst_reparent_root(tobj, ctx);
+
+		if (err) {
+			rst_report_error(err, ctx);
+			goto out;
+		}
+
+		memcpy(&cap_bset, &ti->cpt_ecap, sizeof(kernel_cap_t));
+
+		if (ctx->statusfile) {
+			fput(ctx->statusfile);
+			ctx->statusfile = NULL;
+		}
+
+		if (ctx->lockfile) {
+			char b;
+			mm_segment_t oldfs;
+			err = -EINVAL;
+
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			if (ctx->lockfile->f_op && ctx->lockfile->f_op->read)
+				err = ctx->lockfile->f_op->read(ctx->lockfile, &b, 1, &ctx->lockfile->f_pos);
+			set_fs(oldfs);
+			fput(ctx->lockfile);
+			ctx->lockfile = NULL;
+		}
+
+		if (err) {
+			eprintk_ctx("CPT: lock fd is closed incorrectly: %d\n", err);
+			goto out;
+		}
+		err = vps_rst_veinfo(ctx);
+		if (err) {
+			eprintk_ctx("rst_veinfo: %d\n", err);
+			goto out;
+		}
+
+		err = rst_utsname(ctx);
+		if (err) {
+			eprintk_ctx("rst_utsname: %d\n", err);
+			goto out;
+		}
+
+		err = rst_root_namespace(ctx);
+		if (err) {
+			eprintk_ctx("rst_namespace: %d\n", err);
+			goto out;
+		}
+
+		if ((err = rst_restore_net(ctx)) != 0) {
+			eprintk_ctx("rst_restore_net: %d\n", err);
+			goto out;
+		}
+
+		err = rst_sockets(ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: %d\n", err);
+			goto out;
+		}
+		err = rst_sysv_ipc(ctx);
+		if (err) {
+			eprintk_ctx("rst_sysv_ipc: %d\n", err);
+			goto out;
+		}
+#ifdef CONFIG_USER_RESOURCE
+		bc = get_exec_ub();
+		set_one_ubparm_to_max(bc->ub_parms, UB_KMEMSIZE);
+		set_one_ubparm_to_max(bc->ub_parms, UB_NUMPROC);
+		set_one_ubparm_to_max(bc->ub_parms, UB_NUMFILE);
+		set_one_ubparm_to_max(bc->ub_parms, UB_DCACHESIZE);
+#endif
+	}
+
+	do {
+		if (current->user->uid != ti->cpt_user) {
+			struct user_struct *u = alloc_uid(ti->cpt_user);
+			if (!u) {
+				eprintk_ctx("alloc_user\n");
+			} else {
+				switch_uid(u);
+			}
+		}
+	} while (0);
+
+	if ((err = rst_mm_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_mm: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_files(ti, ctx)) != 0) {
+		eprintk_ctx("rst_files: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_fs_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_fs: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_semundo_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_semundo: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_signal_complete(ti, &exiting, ctx)) != 0) {
+		eprintk_ctx("rst_signal: %d\n", err);
+		goto out;
+	}
+
+	if (ti->cpt_personality != 0)
+		__set_personality(ti->cpt_personality);
+
+#ifdef CONFIG_X86_64
+	if (!ti->cpt_64bit) {
+		/* 32bit app from 32bit OS, won't have PER_LINUX32 set... :/ */
+		__set_personality(PER_LINUX32);
+		/* Task forked from 64bit app and thus has wrong binfmt
+		 * pointer */
+		set_binfmt(&elf32_format);
+	}
+#endif
+
+	current->set_child_tid = NULL;
+	current->clear_child_tid = NULL;
+	current->flags &= ~(PF_FORKNOEXEC|PF_SUPERPRIV);
+	current->flags |= ti->cpt_flags&(PF_FORKNOEXEC|PF_SUPERPRIV);
+	current->exit_code = ti->cpt_exit_code;
+	current->pdeath_signal = ti->cpt_pdeath_signal;
+
+	if (ti->cpt_restart.fn != CPT_RBL_0) {
+		if (ti->cpt_restart.fn != CPT_RBL_NANOSLEEP
+		    && ti->cpt_restart.fn != CPT_RBL_COMPAT_NANOSLEEP
+		    ) {
+			eprintk_ctx("unknown restart block\n");
+		} else {
+			ktime_t e;
+
+			e.tv64 = 0;
+
+			current->thread_info->restart_block.fn = nanosleep_restart;
+#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
+			if (!ti->cpt_64bit)
+				current->thread_info->restart_block.fn = compat_nanosleep_restart;
+#endif
+			if (ctx->image_version >= CPT_VERSION_9)
+				e = ktime_add_ns(e, ti->cpt_restart.arg0);
+			else
+				e = ktime_add_ns(e, ti->cpt_restart.arg0*TICK_NSEC);
+			if (e.tv64 < 0)
+				e.tv64 = TICK_NSEC;
+			e = ktime_add(e, timespec_to_ktime(ctx->cpt_monotonic_time));
+			current->thread_info->restart_block.arg0 = e.tv64 & 0xFFFFFFFF;
+			current->thread_info->restart_block.arg1 = e.tv64 >> 32;
+			if (ctx->image_version >= CPT_VERSION_9) {
+				current->thread_info->restart_block.arg2 = ti->cpt_restart.arg2;
+				current->thread_info->restart_block.arg3 = ti->cpt_restart.arg3;	
+			} else {
+				current->thread_info->restart_block.arg2 = ti->cpt_restart.arg1;
+				current->thread_info->restart_block.arg3 = CLOCK_MONOTONIC;	
+			}
+		}
+	}
+
+	if (thread_group_leader(current)) {
+		cputime_t virt_exp, prof_exp;
+
+		current->signal->it_real_incr.tv64 = 0;
+		if (ctx->image_version >= CPT_VERSION_9) {
+			current->signal->it_real_incr =
+			ktime_add_ns(current->signal->it_real_incr, ti->cpt_it_real_incr);
+		} else {
+			current->signal->it_real_incr =
+			ktime_add_ns(current->signal->it_real_incr, ti->cpt_it_real_incr*TICK_NSEC);
+		}
+		current->signal->it_prof_incr = ti->cpt_it_prof_incr;
+		current->signal->it_virt_incr = ti->cpt_it_virt_incr; 
+		current->signal->it_prof_expires = virt_exp = ti->cpt_it_prof_value;
+		current->signal->it_virt_expires = prof_exp = ti->cpt_it_virt_value;
+
+		if (!cputime_eq(virt_exp, cputime_zero))
+			set_process_cpu_timer(current, CPUCLOCK_VIRT, &virt_exp, NULL);
+
+		if (!cputime_eq(prof_exp, cputime_zero))
+			set_process_cpu_timer(current, CPUCLOCK_PROF, &prof_exp, NULL);
+	}
+
+	err = rst_clone_children(tobj, ctx);
+	if (err) {
+		eprintk_ctx("rst_clone_children\n");
+		goto out;
+	}
+
+	if (exiting)
+		current->signal->flags |= SIGNAL_GROUP_EXIT;
+
+	if (ti->cpt_pid == 1) {
+		if ((err = rst_process_linkage(ctx)) != 0) {
+			eprintk_ctx("rst_process_linkage: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_do_filejobs(ctx)) != 0) {
+			eprintk_ctx("rst_do_filejobs: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_eventpoll(ctx)) != 0) {
+			eprintk_ctx("rst_eventpoll: %d\n", err);
+			goto out;
+		}
+#ifdef CONFIG_INOTIFY_USER
+		if ((err = rst_inotify(ctx)) != 0) {
+			eprintk_ctx("rst_inotify: %d\n", err);
+			goto out;
+		}
+#endif
+		if ((err = rst_sockets_complete(ctx)) != 0) {
+			eprintk_ctx("rst_sockets_complete: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_stray_files(ctx)) != 0) {
+			eprintk_ctx("rst_stray_files: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_posix_locks(ctx)) != 0) {
+			eprintk_ctx("rst_posix_locks: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_tty_jobcontrol(ctx)) != 0) {
+			eprintk_ctx("rst_tty_jobcontrol: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_restore_fs(ctx)) != 0) {
+			eprintk_ctx("rst_restore_fs: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_init_delayfs_daemon(ctx)) != 0) {
+			eprintk_ctx("rst_init_delayfs_daemon: %d\n", err);
+			goto out;
+		}
+		if (virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RESTORE, ctx) & NOTIFY_FAIL) {
+			err = -ECHRNG;
+			eprintk_ctx("scp_restore failed\n");
+			if (ctx->dctx != NULL) {
+				send_sig(SIGKILL, ctx->dctx->dfs_daemon, 1);
+				wake_up_process(ctx->dctx->dfs_daemon);
+			}
+			goto out;
+		}
+		if (ctx->last_vpid)
+			get_exec_env()->last_vpid = ctx->last_vpid;
+	}
+
+out:
+	thr_ctx->error = err;
+	complete(&thr_ctx->task_done);
+
+	if (!err && (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+		current->flags |= PF_EXIT_RESTART;
+		do_exit(ti->cpt_exit_code);
+	} else {
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+	}
+
+	schedule();
+
+	dprintk_ctx("leaked through %d/%d %p\n", current->pid, virt_pid(current), current->mm);
+
+	module_put(THIS_MODULE);
+	complete_and_exit(NULL, 0);
+	return 0;
+}
+
+#if 0
+static void set_task_ubs(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct task_beancounter *tbc;
+
+	tbc = task_bc(current);
+
+	put_beancounter(tbc->fork_sub);
+	tbc->fork_sub = rst_lookup_ubc(ti->cpt_task_ub, ctx);
+	if (ti->cpt_mm_ub != CPT_NULL) {
+		put_beancounter(tbc->exec_ub);
+		tbc->exec_ub = rst_lookup_ubc(ti->cpt_mm_ub, ctx);
+	}
+}
+#endif
+
+static int create_root_task(cpt_object_t *obj, struct cpt_context *ctx,
+		struct thr_context *thr_ctx)
+{
+	struct task_struct *tsk;
+	int pid;
+
+	thr_ctx->ctx = ctx;
+	thr_ctx->error = 0;
+	init_completion(&thr_ctx->init_complete);
+	init_completion(&thr_ctx->task_done);
+#if 0
+	set_task_ubs(obj->o_image, ctx);
+#endif
+
+	pid = local_kernel_thread(hook, thr_ctx, 0, 0);
+	if (pid < 0)
+		return pid;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_ve(pid);
+	if (tsk)
+		get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+	if (tsk == NULL)
+		return -ESRCH;
+	cpt_obj_setobj(obj, tsk, ctx);
+	thr_ctx->tobj = obj;
+	return 0;
+}
+
+static int rst_basic_init_task(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct task_struct *tsk = obj->o_obj;
+	struct cpt_task_image *ti = obj->o_image;
+
+	memcpy(tsk->comm, ti->cpt_comm, sizeof(tsk->comm));
+	rst_mm_basic(obj, ti, ctx);
+	return 0;
+}
+
+static int make_baby(cpt_object_t *cobj,
+		     struct cpt_task_image *pi,
+		     struct cpt_context *ctx)
+{
+	unsigned long flags;
+	struct cpt_task_image *ci = cobj->o_image;
+	struct thr_context thr_ctx;
+	struct task_struct *tsk;
+	pid_t pid;
+	struct fs_struct *tfs = NULL;
+
+	flags = rst_mm_flag(ci, ctx) | rst_files_flag(ci, ctx)
+		| rst_signal_flag(ci, ctx) | rst_semundo_flag(ci, ctx);
+	if (ci->cpt_rppid != pi->cpt_pid) {
+		flags |= CLONE_THREAD|CLONE_PARENT;
+		if (ci->cpt_signal != pi->cpt_signal ||
+		    !(flags&CLONE_SIGHAND) ||
+		    (!(flags&CLONE_VM) && pi->cpt_mm != CPT_NULL)) {
+			eprintk_ctx("something is wrong with threads: %d %d %d %Ld %Ld %08lx\n",
+			       (int)ci->cpt_pid, (int)ci->cpt_rppid, (int)pi->cpt_pid,
+			       (long long)ci->cpt_signal, (long long)pi->cpt_signal, flags
+			       );
+			return -EINVAL;
+		}
+	}
+
+	thr_ctx.ctx = ctx;
+	thr_ctx.error = 0;
+	init_completion(&thr_ctx.init_complete);
+	init_completion(&thr_ctx.task_done);
+	thr_ctx.tobj = cobj;
+
+#if 0
+	set_task_ubs(ci, ctx);
+#endif
+
+	if (current->fs == NULL) {
+		tfs = get_exec_env()->init_entry->fs;
+		if (tfs == NULL)
+			return -EINVAL;
+		atomic_inc(&tfs->count);
+		current->fs = tfs;
+	}
+	pid = local_kernel_thread(hook, &thr_ctx, flags, ci->cpt_pid);
+	if (tfs) {
+		current->fs = NULL;
+		atomic_dec(&tfs->count);
+	}
+	if (pid < 0)
+		return pid;
+
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_ve(pid);
+	if (tsk)
+		get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+	if (tsk == NULL)
+		return -ESRCH;
+	cpt_obj_setobj(cobj, tsk, ctx);
+	thr_ctx.tobj = cobj;
+	wait_for_completion(&thr_ctx.init_complete);
+	wait_task_inactive(cobj->o_obj);
+	rst_basic_init_task(cobj, ctx);
+
+	/* clone() increases group_stop_count if it was not zero and
+	 * CLONE_THREAD was asked. Undo.
+	 */
+	if (current->signal->group_stop_count && (flags & CLONE_THREAD)) {
+		if (tsk->signal != current->signal) BUG();
+		current->signal->group_stop_count--;
+	}
+
+	wake_up_process(tsk);
+	wait_for_completion(&thr_ctx.task_done);
+	wait_task_inactive(tsk);
+
+	return thr_ctx.error;
+}
+
+static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct cpt_task_image *ti = obj->o_image;
+	cpt_object_t *cobj;
+
+	for_each_object(cobj, CPT_OBJ_TASK) {
+		struct cpt_task_image *ci = cobj->o_image;
+		if (cobj == obj)
+			continue;
+		if ((ci->cpt_rppid == ti->cpt_pid && ci->cpt_tgid == ci->cpt_pid) ||
+		    (ci->cpt_leader == ti->cpt_pid &&
+		     ci->cpt_tgid != ci->cpt_pid && ci->cpt_pid != 1)) {
+			err = make_baby(cobj, ti, ctx);
+			if (err) {
+				eprintk_ctx("make_baby: %d\n", err);
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+static int read_task_images(struct cpt_context *ctx)
+{
+	int err;
+	loff_t start, end;
+
+	err = rst_get_section(CPT_SECT_TASKS, ctx, &start, &end);
+	if (err)
+		return err;
+
+	while (start < end) {
+		cpt_object_t *obj;
+		struct cpt_task_image *ti = cpt_get_buf(ctx);
+
+		err = rst_get_object(CPT_OBJ_TASK, start, ti, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (ti->cpt_pid != 1 && !__is_virtual_pid(ti->cpt_pid)) {
+			eprintk_ctx("BUG: pid %d is not virtual\n", ti->cpt_pid);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		cpt_obj_setpos(obj, start, ctx);
+		intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
+		obj->o_image = kmalloc(ti->cpt_next, GFP_KERNEL);
+		if (obj->o_image == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		memcpy(obj->o_image, ti, sizeof(*ti));
+		err = ctx->pread(obj->o_image + sizeof(*ti),
+				 ti->cpt_next - sizeof(*ti), ctx, start + sizeof(*ti));
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		start += ti->cpt_next;
+	}
+	return 0;
+}
+
+
+static int vps_rst_restore_tree(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct thr_context thr_ctx_root;
+
+	err = read_task_images(ctx);
+	if (err)
+		return err;
+
+	err = rst_undump_ubc(ctx);
+	if (err)
+		return err;
+
+	if (virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RSTCHECK, ctx) & NOTIFY_FAIL)
+		return -ECHRNG;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	err = rst_setup_pagein(ctx);
+	if (err)
+		return err;
+#endif
+	for_each_object(obj, CPT_OBJ_TASK) {
+		err = create_root_task(obj, ctx, &thr_ctx_root);
+		if (err)
+			return err;
+
+		wait_for_completion(&thr_ctx_root.init_complete);
+		wait_task_inactive(obj->o_obj);
+		rst_basic_init_task(obj, ctx);
+
+		wake_up_process(obj->o_obj);
+		wait_for_completion(&thr_ctx_root.task_done);
+		wait_task_inactive(obj->o_obj);
+		err = thr_ctx_root.error;
+		if (err)
+			return err;
+		break;
+	}
+
+	return err;
+}
+
+#ifndef CONFIG_IA64
+int rst_read_vdso(struct cpt_context *ctx)
+{
+	int err;
+	loff_t start, end;
+	struct cpt_page_block *pgb;
+
+	ctx->vdso = NULL;
+	err = rst_get_section(CPT_SECT_VSYSCALL, ctx, &start, &end);
+	if (err)
+		return err;
+	if (start == CPT_NULL)
+		return 0;
+	if (end < start + sizeof(*pgb) + PAGE_SIZE)
+		return -EINVAL;
+
+	pgb = cpt_get_buf(ctx);
+	err = rst_get_object(CPT_OBJ_VSYSCALL, start, pgb, ctx);
+	if (err) {
+		goto err_buf;
+	}
+	ctx->vdso = (char*)__get_free_page(GFP_KERNEL);
+	if (ctx->vdso == NULL) {
+		err = -ENOMEM;
+		goto err_buf;
+	}
+	err = ctx->pread(ctx->vdso, PAGE_SIZE, ctx, start + sizeof(*pgb));
+	if (err)
+		goto err_page;
+	if (!memcmp(ctx->vdso, vsyscall_addr, PAGE_SIZE)) {
+		free_page((unsigned long)ctx->vdso);
+		ctx->vdso = NULL;
+	}
+
+	cpt_release_buf(ctx);
+	return 0;
+err_page:
+	free_page((unsigned long)ctx->vdso);
+	ctx->vdso = NULL;
+err_buf:
+	cpt_release_buf(ctx);
+	return err;
+}
+#endif
+
+int vps_rst_undump(struct cpt_context *ctx)
+{
+	int err;
+	unsigned long umask;
+
+	err = rst_open_dumpfile(ctx);
+	if (err)
+		return err;
+
+	if (ctx->tasks64) {
+#if defined(CONFIG_IA64)
+		if (ctx->image_arch != CPT_OS_ARCH_IA64)
+#elif defined(CONFIG_X86_64)
+		if (ctx->image_arch != CPT_OS_ARCH_EMT64)
+#else
+		if (1)
+#endif
+		{
+			eprintk_ctx("Cannot restore 64 bit container on this architecture\n");
+			return -EINVAL;
+		}
+	}
+
+	umask = current->fs->umask;
+	current->fs->umask = 0;
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	err = rst_setup_pagein(ctx);
+#endif
+#ifndef CONFIG_IA64
+	if (err == 0)
+		err = rst_read_vdso(ctx);
+#endif
+	if (err == 0)
+		err = vps_rst_restore_tree(ctx);
+
+	if (err == 0)
+		err = rst_restore_process(ctx);
+
+	if (err)
+		virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RSTFAIL, ctx);
+
+	current->fs->umask = umask;
+
+        return err;
+}
+
+static int rst_unlock_ve(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	down_write(&env->op_sem);
+	env->is_locked = 0;
+	up_write(&env->op_sem);
+	put_ve(env);
+	return 0;
+}
+
+int rst_resume(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	int err = 0;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *bc;
+#endif
+
+	rst_freeze_delayfs(ctx);
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		fput(file);
+	}
+
+#ifdef CONFIG_USER_RESOURCE
+	bc = get_beancounter_byuid(ctx->ve_id, 0);
+	BUG_ON(!bc);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_KMEMSIZE);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_NUMPROC);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_NUMFILE);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_DCACHESIZE);
+	put_beancounter(bc);
+#endif
+
+	rst_resume_network(ctx);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+
+		if (!tsk)
+			continue;
+
+		if (ti->cpt_state == TASK_UNINTERRUPTIBLE) {
+			dprintk_ctx("task %d/%d(%s) is started\n", virt_pid(tsk), tsk->pid, tsk->comm);
+
+			/* Weird... If a signal is sent to stopped task,
+			 * nobody makes recalc_sigpending(). We have to do
+			 * this by hands after wake_up_process().
+			 * if we did this before a signal could arrive before
+			 * wake_up_process() and stall.
+			 */
+			spin_lock_irq(&tsk->sighand->siglock);
+			if (!signal_pending(tsk))
+				recalc_sigpending_tsk(tsk);
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			wake_up_process(tsk);
+		} else {
+			if (ti->cpt_state == TASK_STOPPED ||
+			    ti->cpt_state == TASK_TRACED) {
+				set_task_state(tsk, ti->cpt_state);
+			}
+		}
+		put_task_struct(tsk);
+	}
+
+	rst_unlock_ve(ctx);
+
+	if (ctx->dctx != NULL)
+		wake_up_process(ctx->dctx->dfs_daemon);
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	rst_complete_pagein(ctx, 0);
+#endif
+
+	rst_finish_ubc(ctx);
+	rst_finish_vfsmount_ref(ctx);
+	cpt_object_destroy(ctx);
+
+        return err;
+}
+
+int rst_kill(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	int err = 0;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		fput(file);
+	}
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		struct task_struct *tsk = obj->o_obj;
+
+		if (tsk == NULL)
+			continue;
+
+		if (tsk->exit_state == 0) {
+			send_sig(SIGKILL, tsk, 1);
+
+			spin_lock_irq(&tsk->sighand->siglock);
+			sigfillset(&tsk->blocked);
+			sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
+			set_tsk_thread_flag(tsk, TIF_SIGPENDING);
+			clear_tsk_thread_flag(tsk, TIF_FREEZE);
+			if (tsk->flags & PF_FROZEN)
+				tsk->flags &= ~PF_FROZEN;
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			wake_up_process(tsk);
+		}
+
+		put_task_struct(tsk);
+	}
+
+	if (ctx->dctx && ctx->dctx->dfs_daemon) {
+		send_sig(SIGKILL, ctx->dctx->dfs_daemon, 1);
+		wake_up_process(ctx->dctx->dfs_daemon);
+	}
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	rst_complete_pagein(ctx, 1);
+#endif
+
+	rst_finish_ubc(ctx);
+	rst_finish_vfsmount_ref(ctx);
+	cpt_object_destroy(ctx);
+
+        return err;
+}
+
+static int rst_utsname(cpt_context_t *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_UTSNAME];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr o;
+	struct ve_struct *ve;
+	struct uts_namespace *ns;
+	int i;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_UTSNAME || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	ve = get_exec_env();
+	ns = ve->ve_ns->uts_ns;
+
+	i = 0;
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int len;
+		char *ptr;
+		err = rst_get_object(CPT_OBJ_NAME, sec, &o, ctx);
+		if (err)
+			return err;
+		len = o.cpt_next - o.cpt_hdrlen;
+		if (len > __NEW_UTS_LEN+1)
+			return -ENAMETOOLONG;
+		switch (i) {
+		case 0:
+			ptr = ns->name.nodename; break;
+		case 1:
+			ptr = ns->name.domainname; break;
+		case 2:
+			ptr = ns->name.release; break;
+		default:
+			return -EINVAL;
+		}
+		err = ctx->pread(ptr, len, ctx, sec+o.cpt_hdrlen);
+		if (err)
+			return err;
+		i++;
+		sec += o.cpt_next;
+	}
+
+	return 0;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpu.c kernel-2.6.18-417.el5-028stab121/kernel/cpu.c
--- kernel-2.6.18-417.el5.orig/kernel/cpu.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpu.c	2017-01-13 08:40:28.000000000 -0500
@@ -28,6 +28,10 @@ static int cpu_hotplug_disabled;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
+#ifdef CONFIG_SCHED_VCPU
+#error "CONFIG_HOTPLUG_CPU isn't supported with CONFIG_SCHED_VCPU"
+#endif
+
 /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
 static struct task_struct *recursive;
 static int recursive_depth;
@@ -92,8 +96,8 @@ static inline void check_for_tasks(int c
 	struct task_struct *p;
 
 	write_lock_irq(&tasklist_lock);
-	for_each_process(p) {
-		if (task_cpu(p) == cpu &&
+	for_each_process_all(p) {
+		if (task_pcpu(p) == cpu &&
 		    (!cputime_eq(p->utime, cputime_zero) ||
 		     !cputime_eq(p->stime, cputime_zero)))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
@@ -108,6 +112,13 @@ struct take_cpu_down_param {
 	void *hcpu;
 };
 
+#ifdef CONFIG_SCHED_VCPU
+#error VCPU vs. HOTPLUG: fix hotplug code below
+/*
+ * What should be fixed:
+ * - check for if (idle_cpu()) yield()
+ */
+#endif
 /* Take this CPU down. */
 static int take_cpu_down(void *_param)
 {
diff -upr kernel-2.6.18-417.el5.orig/kernel/cpuset.c kernel-2.6.18-417.el5-028stab121/kernel/cpuset.c
--- kernel-2.6.18-417.el5.orig/kernel/cpuset.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/cpuset.c	2017-01-13 08:40:19.000000000 -0500
@@ -971,7 +971,7 @@ static int update_nodemask(struct cpuset
 	n = 0;
 
 	/* Load up mmarray[] with mm reference for each task in cpuset. */
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		struct mm_struct *mm;
 
 		if (n >= ntasks) {
@@ -985,7 +985,7 @@ static int update_nodemask(struct cpuset
 		if (!mm)
 			continue;
 		mmarray[n++] = mm;
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	write_unlock_irq(&tasklist_lock);
 
 	/*
@@ -1233,7 +1233,7 @@ static int attach_task(struct cpuset *cs
 	if (pid) {
 		read_lock(&tasklist_lock);
 
-		tsk = find_task_by_pid(pid);
+		tsk = find_task_by_pid_all(pid);
 		if (!tsk || tsk->flags & PF_EXITING) {
 			read_unlock(&tasklist_lock);
 			return -ESRCH;
@@ -1703,13 +1703,13 @@ static int pid_array_load(pid_t *pidarra
 
 	read_lock(&tasklist_lock);
 
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		if (p->cpuset == cs) {
 			pidarray[n++] = p->pid;
 			if (unlikely(n == npids))
 				goto array_full;
 		}
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 array_full:
 	read_unlock(&tasklist_lock);
diff -upr kernel-2.6.18-417.el5.orig/kernel/exit.c kernel-2.6.18-417.el5-028stab121/kernel/exit.c
--- kernel-2.6.18-417.el5.orig/kernel/exit.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/exit.c	2017-01-13 08:40:41.000000000 -0500
@@ -14,6 +14,8 @@
 #include <linux/personality.h>
 #include <linux/tty.h>
 #include <linux/namespace.h>
+#include <linux/nsproxy.h>
+#include <linux/virtinfo.h>
 #include <linux/key.h>
 #include <linux/security.h>
 #include <linux/cpu.h>
@@ -39,9 +41,13 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/audit.h> /* for audit_free() */
 #include <linux/resource.h>
+#include <linux/grsecurity.h>
 #include <linux/task_io_accounting_ops.h>
 #include <trace/sched.h>
 
+#include <linux/ve_proto.h>
+#include <ub/ub_misc.h>
+
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/pgtable.h>
@@ -50,7 +56,7 @@
 extern void sem_exit (void);
 extern struct task_struct *child_reaper;
 
-static void exit_mm(struct task_struct * tsk);
+void exit_mm(struct task_struct * tsk);
 
 static void __unhash_process(struct task_struct *p)
 {
@@ -61,6 +67,10 @@ static void __unhash_process(struct task
 		detach_pid(p, PIDTYPE_SID);
 
 		list_del_rcu(&p->tasks);
+#ifdef CONFIG_VE
+		list_del_rcu(&p->ve_task_info.vetask_list);
+		list_del(&p->ve_task_info.aux_list);
+#endif
 		__get_cpu_var(process_counts)--;
 	}
 	list_del_rcu(&p->thread_group);
@@ -172,6 +182,8 @@ repeat:
 	BUG_ON(tracehook_check_released(p));
 	write_lock_irq(&tasklist_lock);
 	__exit_signal(p);
+	nr_zombie--;
+	atomic_inc(&nr_dead);
 
 	/*
 	 * If we are the last non-leader member of the thread
@@ -210,6 +222,8 @@ repeat:
 	write_unlock_irq(&tasklist_lock);
 	proc_flush_task(p);
 	release_thread(p);
+	ub_task_uncharge(p);
+	pput_ve(p->ve_task_info.owner_env);
 	call_rcu(&p->rcu, delayed_put_task_struct);
 
 	p = leader;
@@ -234,14 +248,16 @@ int session_of_pgrp(int pgrp)
 	struct task_struct *p;
 	int sid = -1;
 
+	WARN_ON(is_virtual_pid(pgrp));
+
 	read_lock(&tasklist_lock);
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p->signal->session > 0) {
 			sid = p->signal->session;
 			goto out;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
-	p = find_task_by_pid(pgrp);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+	p = find_task_by_pid_ve(pgrp);
 	if (p)
 		sid = p->signal->session;
 out:
@@ -263,17 +279,19 @@ static int will_become_orphaned_pgrp(int
 	struct task_struct *p;
 	int ret = 1;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	WARN_ON(is_virtual_pid(pgrp));
+
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p == ignored_task
 				|| p->exit_state
-				|| p->parent->pid == 1)
+				|| virt_pid(p->parent) == 1)
 			continue;
 		if (process_group(p->parent) != pgrp
 			    && p->parent->signal->session == p->signal->session) {
 			ret = 0;
 			break;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return ret;	/* (sighing) "Often!" */
 }
 
@@ -281,6 +299,8 @@ int is_orphaned_pgrp(int pgrp)
 {
 	int retval;
 
+	WARN_ON(is_virtual_pid(pgrp));
+
 	read_lock(&tasklist_lock);
 	retval = will_become_orphaned_pgrp(pgrp, NULL);
 	read_unlock(&tasklist_lock);
@@ -293,13 +313,13 @@ static int has_stopped_jobs(int pgrp)
 	int retval = 0;
 	struct task_struct *p;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p->state != TASK_STOPPED)
 			continue;
 
 		retval = 1;
 		break;
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return retval;
 }
 
@@ -346,6 +366,9 @@ void __set_special_pids(pid_t session, p
 {
 	struct task_struct *curr = current->group_leader;
 
+	WARN_ON(__is_virtual_pid(pgrp));
+	WARN_ON(__is_virtual_pid(session));
+
 	if (curr->signal->session != session) {
 		detach_pid(curr, PIDTYPE_SID);
 		curr->signal->session = session;
@@ -364,6 +387,7 @@ void set_special_pids(pid_t session, pid
 	__set_special_pids(session, pgrp);
 	write_unlock_irq(&tasklist_lock);
 }
+EXPORT_SYMBOL(set_special_pids);
 
 /*
  * Let kernel threads use this to say that they
@@ -416,6 +440,8 @@ void daemonize(const char *name, ...)
 	struct fs_struct *fs;
 	sigset_t blocked;
 
+	(void)virtinfo_gencall(VIRTINFO_DOEXIT, NULL);
+
 	va_start(args, name);
 	vsnprintf(current->comm, sizeof(current->comm), name, args);
 	va_end(args);
@@ -440,12 +466,14 @@ void daemonize(const char *name, ...)
 	/* Become as one with the init task */
 
 	exit_fs(current);	/* current->fs->count--; */
-	fs = init_task.fs;
+	fs = get_exec_env()->init_entry->fs;
 	current->fs = fs;
 	atomic_inc(&fs->count);
-	exit_namespace(current);
-	current->namespace = init_task.namespace;
-	get_namespace(current->namespace);
+
+	exit_task_namespaces(current);
+	current->nsproxy = init_task.nsproxy;
+	get_task_namespaces(current);
+
  	exit_files(current);
 	current->files = init_task.files;
 	atomic_inc(&current->files->count);
@@ -498,6 +526,7 @@ struct files_struct *get_files_struct(st
 
 	return files;
 }
+EXPORT_SYMBOL_GPL(get_files_struct);
 
 void fastcall put_files_struct(struct files_struct *files)
 {
@@ -572,6 +601,7 @@ void put_fs_struct(struct fs_struct *fs)
 {
 	__put_fs_struct(fs);
 }
+EXPORT_SYMBOL_GPL(put_fs_struct);
 
 static inline void __exit_fs(struct task_struct *tsk)
 {
@@ -596,13 +626,17 @@ EXPORT_SYMBOL_GPL(exit_fs);
  * Turn us into a lazy TLB process if we
  * aren't already..
  */
-static void exit_mm(struct task_struct * tsk)
+void exit_mm(struct task_struct * tsk)
 {
 	struct mm_struct *mm = tsk->mm;
 
 	mm_release(tsk, mm);
 	if (!mm)
 		return;
+
+	if (test_tsk_thread_flag(tsk, TIF_MEMDIE))
+		mm->oom_killed = 1;
+
 	/*
 	 * Serialize with any possible pending coredump.
 	 * We must hold mmap_sem around checking core_waiters
@@ -631,6 +665,7 @@ static void exit_mm(struct task_struct *
 	task_unlock(tsk);
 	mmput(mm);
 }
+EXPORT_SYMBOL_GPL(exit_mm);
 
 static inline void
 choose_new_parent(struct task_struct *p, struct task_struct *reaper)
@@ -697,13 +732,12 @@ reparent_thread(struct task_struct *p, s
 static void
 forget_original_parent(struct task_struct *father)
 {
-	struct task_struct *p, *reaper = father;
+	struct task_struct *p, *tsk_reaper, *reaper = father;
 	struct list_head *_p, *_n;
 
 	do {
 		reaper = next_thread(reaper);
 		if (reaper == father) {
-			reaper = child_reaper;
 			break;
 		}
 	} while (reaper->exit_state);
@@ -715,8 +749,15 @@ forget_original_parent(struct task_struc
                  * first time slices from childs anymore */     
 		if (p->first_time_slice == father->pid)
 		    p->first_time_slice = 0;
-		    
-		choose_new_parent(p, reaper);
+		tsk_reaper = reaper;
+		if (tsk_reaper == father)
+#ifdef CONFIG_VE
+			tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
+		if (tsk_reaper == p ||
+		    p->group_leader == VE_TASK_INFO(p)->owner_env->init_entry)
+#endif
+			tsk_reaper = child_reaper;
+		choose_new_parent(p, tsk_reaper);
 		reparent_thread(p, father);
 	}
 }
@@ -807,6 +848,10 @@ static void exit_notify(struct task_stru
 	     tsk->self_exec_id != tsk->parent_exec_id))
 		tsk->exit_signal = SIGCHLD;
 
+	if (tsk->exit_signal != -1 && t == child_reaper)
+		/* We dont want people slaying init. */
+		tsk->exit_signal = SIGCHLD;
+
 	if (!tracehook_notify_death(tsk, &noreap, &cookie)
 	    && tsk->exit_signal != -1 && thread_group_empty(tsk))
 		do_notify_parent(tsk, tsk->exit_signal);
@@ -815,6 +860,7 @@ static void exit_notify(struct task_stru
 	if (tsk->exit_signal == -1 && !noreap)
 		state = EXIT_DEAD;
 	tsk->exit_state = state;
+	nr_zombie++;
 
 	write_unlock_irq(&tasklist_lock);
 
@@ -825,6 +871,84 @@ static void exit_notify(struct task_stru
 		release_task(tsk);
 }
 
+#ifdef CONFIG_VE
+/*
+ * Handle exitting of init process, it's a special case for VE.
+ */
+static void do_initproc_exit(void)
+{
+	struct task_struct *tsk;
+	struct ve_struct *env;
+	struct siginfo info;
+	struct task_struct *g, *p;
+	long delay = 1L;
+
+	tsk = current;
+	env = VE_TASK_INFO(current)->owner_env;
+	if (env->init_entry != tsk)
+		return;
+
+	if (ve_is_super(env) && tsk->pid == 1)
+		panic("Attempted to kill init!");
+
+	memset(&info, 0, sizeof(info));
+	info.si_errno = 0;
+	info.si_code = SI_KERNEL;
+	info.si_pid = virt_pid(tsk);
+	info.si_uid = current->uid;
+	info.si_signo = SIGKILL;
+
+	/*
+	 * Here the VE changes its state into "not running".
+	 * op_sem taken for write is a barrier to all VE manipulations from
+	 * ioctl: it waits for operations currently in progress and blocks all
+	 * subsequent operations until is_running is set to 0 and op_sem is
+	 * released.
+	 */
+	down_write(&env->op_sem);
+	env->is_running = 0;
+	up_write(&env->op_sem);
+
+	ve_hook_iterate_fini(VE_INIT_EXIT_CHAIN, env);
+
+	/* send kill to all processes of VE */
+	read_lock(&tasklist_lock);
+	do_each_thread_ve(g, p) {
+		force_sig_info(SIGKILL, &info, p);
+	} while_each_thread_ve(g, p);
+	read_unlock(&tasklist_lock);
+
+	/* wait for all init childs exit */
+	while (atomic_read(&env->pcounter) > 1) {
+		if (sys_wait4(-1, NULL, __WALL | WNOHANG, NULL) > 0)
+			continue;
+		/* it was ENOCHLD or no more children somehow */
+		if (atomic_read(&env->pcounter) == 1)
+			break;
+
+		/* clear all signals to avoid wakeups */
+		if (signal_pending(tsk))
+			flush_signals(tsk);
+		/* we have child without signal sent */
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(delay);
+		delay = (delay < HZ) ? (delay << 1) : HZ;
+		read_lock(&tasklist_lock);
+		do_each_thread_ve(g, p) {
+			if (p != tsk)
+				force_sig_info(SIGKILL, &info, p);
+		} while_each_thread_ve(g, p);
+		read_unlock(&tasklist_lock);
+	}
+	env->init_entry = child_reaper;
+	write_lock_irq(&tasklist_lock);
+	remove_parent(tsk);
+	tsk->parent = child_reaper;
+	add_parent(tsk);
+	write_unlock_irq(&tasklist_lock);
+}
+#endif
+
 fastcall NORET_TYPE void do_exit(long code)
 {
 	struct task_struct *tsk = current;
@@ -840,8 +964,12 @@ fastcall NORET_TYPE void do_exit(long co
 		panic("Aiee, killing interrupt handler!");
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
+#ifdef CONFIG_VE
+	do_initproc_exit();
+#else
 	if (unlikely(tsk == child_reaper))
 		panic("Attempted to kill init!");
+#endif
 
 	/*
 	 * If do_exit is called because this processes oopsed, it's possible
@@ -852,6 +980,8 @@ fastcall NORET_TYPE void do_exit(long co
 	 */
 	set_fs(USER_DS);
 
+	(void)virtinfo_gencall(VIRTINFO_DOEXIT, NULL);
+
 	tracehook_report_exit(&code);
 
 	/*
@@ -933,7 +1063,7 @@ fastcall NORET_TYPE void do_exit(long co
 	exit_sem(tsk);
 	__exit_files(tsk);
 	__exit_fs(tsk);
-	exit_namespace(tsk);
+	exit_task_namespaces(tsk);
 	exit_thread();
 	cpuset_exit(tsk);
 	exit_keys(tsk);
@@ -946,8 +1076,15 @@ fastcall NORET_TYPE void do_exit(long co
 		module_put(tsk->binfmt->module);
 
 	tsk->exit_code = code;
-	proc_exit_connector(tsk);
-	exit_notify(tsk);
+	if (!(tsk->flags & PF_EXIT_RESTART)) {
+		proc_exit_connector(tsk);
+		exit_notify(tsk);
+	} else {
+		write_lock_irq(&tasklist_lock);
+		tsk->exit_state = EXIT_ZOMBIE;
+		nr_zombie++;
+		write_unlock_irq(&tasklist_lock);
+	}
 #ifdef CONFIG_NUMA
 	mpol_free(tsk->mempolicy);
 	tsk->mempolicy = NULL;
@@ -1047,14 +1184,19 @@ asmlinkage void sys_exit_group(int error
 static int eligible_child(pid_t pid, int options, struct task_struct *p)
 {
 	if (pid > 0) {
-		if (p->pid != pid)
+		if ((is_virtual_pid(pid) ? virt_pid(p) : p->pid) != pid)
 			return 0;
 	} else if (!pid) {
 		if (process_group(p) != process_group(current))
 			return 0;
 	} else if (pid != -1) {
-		if (process_group(p) != -pid)
-			return 0;
+		if (__is_virtual_pid(-pid)) {
+			if (virt_pgid(p) != -pid)
+				return 0;
+		} else {
+			if (process_group(p) != -pid)
+				return 0;
+		}
 	}
 
 	/*
@@ -1124,7 +1266,7 @@ static int wait_task_zombie(struct task_
 	int status;
 
 	if (unlikely(noreap)) {
-		pid_t pid = p->pid;
+		pid_t pid = get_task_pid(p);
 		uid_t uid = p->uid;
 		int exit_code = p->exit_code;
 		int why, status;
@@ -1254,7 +1396,7 @@ static int wait_task_zombie(struct task_
 			retval = put_user(status, &infop->si_status);
 	}
 	if (!retval && infop)
-		retval = put_user(p->pid, &infop->si_pid);
+		retval = put_user(get_task_pid(p), &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(p->uid, &infop->si_uid);
 	if (retval) {
@@ -1262,7 +1404,7 @@ static int wait_task_zombie(struct task_
 		p->exit_state = EXIT_ZOMBIE;
 		return retval;
 	}
-	retval = p->pid;
+	retval = get_task_pid(p);
 	release_task(p);
 
 	BUG_ON(!retval);
@@ -1302,12 +1444,11 @@ static int wait_task_stopped(struct task
 	read_unlock(&tasklist_lock);
 
 	if (unlikely(noreap)) {
-		pid_t pid = p->pid;
+		pid_t pid = get_task_pid(p);
 		uid_t uid = p->uid;
 
 		exit_code = p->exit_code;
-		if (unlikely(!exit_code) ||
-		    unlikely(p->state & TASK_TRACED))
+		if (unlikely(!exit_code) || unlikely(p->exit_state))
 			goto bail_ref;
 		return wait_noreap_copyout(p, pid, uid, CLD_STOPPED,
 					   (exit_code << 8) | 0x7f,
@@ -1370,11 +1511,11 @@ bail_ref:
 	if (!retval && infop)
 		retval = put_user(exit_code, &infop->si_status);
 	if (!retval && infop)
-		retval = put_user(p->pid, &infop->si_pid);
+		retval = put_user(get_task_pid(p), &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(p->uid, &infop->si_uid);
 	if (!retval)
-		retval = p->pid;
+		retval = get_task_pid(p);
 	put_task_struct(p);
 
 	BUG_ON(!retval);
@@ -1411,7 +1552,7 @@ static int wait_task_continued(struct ta
 		p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
 	spin_unlock_irq(&p->sighand->siglock);
 
-	pid = p->pid;
+	pid = get_task_pid(p);
 	uid = p->uid;
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
@@ -1422,7 +1563,7 @@ static int wait_task_continued(struct ta
 		if (!retval && stat_addr)
 			retval = put_user(0xffff, stat_addr);
 		if (!retval)
-			retval = p->pid;
+			retval = get_task_pid(p);
 	} else {
 		retval = wait_noreap_copyout(p, pid, uid,
 					     CLD_CONTINUED, SIGCONT,
@@ -1637,6 +1778,7 @@ asmlinkage long sys_wait4(pid_t pid, int
 	prevent_tail_call(ret);
 	return ret;
 }
+EXPORT_SYMBOL(sys_wait4);
 
 #ifdef __ARCH_WANT_SYS_WAITPID
 
diff -upr kernel-2.6.18-417.el5.orig/kernel/fairsched.c kernel-2.6.18-417.el5-028stab121/kernel/fairsched.c
--- kernel-2.6.18-417.el5.orig/kernel/fairsched.c	2017-01-13 08:40:28.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/fairsched.c	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,1489 @@
+/*
+ * Fair Scheduler
+ *
+ * Copyright (C) 2000-2005  SWsoft
+ * All rights reserved.
+ * 
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * Start-tag scheduling follows the theory presented in
+ * http://www.cs.utexas.edu/users/dmcl/papers/ps/SIGCOMM96.ps
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <asm/timex.h>
+#include <asm/atomic.h>
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/sysctl.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/console.h>
+#include <linux/fairsched.h>
+#include <linux/vsched.h>
+
+/* we need it for vsched routines in sched.c */
+spinlock_t fairsched_lock = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_FAIRSCHED
+
+#define FAIRSHED_DEBUG		" debug"
+
+
+/*********************************************************************/
+/*
+ * Special arithmetics
+ */
+/*********************************************************************/
+
+#define CYCLES_SHIFT (8)
+#define SCYCLES_TIME(time) \
+        ((scycles_t) {((time) + (1 << CYCLES_SHIFT) - 1)  >> CYCLES_SHIFT})
+
+#define CYCLES_ZERO (0)
+static inline int CYCLES_BEFORE(cycles_t x, cycles_t y)
+{
+        return (__s64)(x-y) < 0;
+}
+static inline int CYCLES_AFTER(cycles_t x, cycles_t y)
+{
+        return (__s64)(y-x) < 0;
+}
+static inline void CYCLES_DADD(cycles_t *x, fschdur_t y) {*x+=y.d;}
+
+/*
+ * fairsched_schedule() can be called rarely than on each timer tick 
+ * due to main scheduler optimizations, so new abstract timeslice must
+ * be introduced. It can have arbitrary number ot cycles, but main
+ * scheduler mustn't exceed this value and call fairsched scheduler
+ * before this timeslice is expired on a node.
+ */
+static cycles_t cycles_per_timeslice;
+#define FSCHDUR_ZERO (0)
+#define TICK_DUR ((fschdur_t){cycles_per_timeslice})
+static inline fschdur_t FSCHDURATION(cycles_t x, cycles_t y)
+{
+	return (fschdur_t){x - y};
+}
+static inline int FSCHDUR_CMP(fschdur_t x, fschdur_t y)
+{
+	if (x.d < y.d) return -1;
+	if (x.d > y.d) return 1;
+	return 0;
+}
+static inline fschdur_t FSCHDUR_SUB(fschdur_t x, fschdur_t y)
+{
+	return (fschdur_t){x.d - y.d};
+}
+
+#define FSCHTAG_ZERO ((fschtag_t){0})
+static inline int FSCHTAG_CMP(fschtag_t x, fschtag_t y)
+{
+	if (x.t < y.t) return -1;
+	if (x.t > y.t) return 1;
+	return 0;
+}
+static inline fschtag_t FSCHTAG_MAX(fschtag_t x, fschtag_t y)
+{
+	return x.t >= y.t ? x : y;
+}
+static inline int FSCHTAG_DADD(fschtag_t *tag, fschdur_t dur, unsigned w)
+{
+	cycles_t new_tag;
+	new_tag = tag->t + (cycles_t)dur.d * w;
+	if (new_tag < tag->t)
+		return -1;
+	/* DEBUG */
+	if (new_tag >= (1ULL << 48))
+		return -1;
+	tag->t = new_tag;
+	return 0;
+}
+static inline int FSCHTAG_ADD(fschtag_t *tag, fschtag_t y)
+{
+	cycles_t new_tag;
+	new_tag = tag->t + y.t;
+	if (new_tag < tag->t)
+		return -1;
+	tag->t = new_tag;
+	return 0;
+}
+static inline fschtag_t FSCHTAG_SUB(fschtag_t x, fschtag_t y)
+{
+	return (fschtag_t){x.t - y.t};
+}
+
+#define FSCHVALUE_FMT "%Lu"
+#define FSCHVALUE_PRINT(x) ((x).v)
+#define FSCHVALUE_ZERO ((fschvalue_t){0})
+#define TICK_VALUE ((fschvalue_t)	\
+	{(cycles_t)cycles_per_timeslice << FSCHRATE_SHIFT})
+static inline fschvalue_t FSCHVALUE(unsigned long t)
+{
+	return (fschvalue_t){(cycles_t)t << FSCHRATE_SHIFT};
+}
+static inline int FSCHVALUE_CMP(fschvalue_t x, fschvalue_t y)
+{
+	if (x.v < y.v) return -1;
+	if (x.v > y.v) return 1;
+	return 0;
+}
+static inline void FSCHVALUE_DADD(fschvalue_t *val, fschdur_t dur,
+		unsigned rate)
+{
+	val->v += (cycles_t)dur.d * rate;
+}
+static inline fschvalue_t FSCHVALUE_SUB(fschvalue_t x, fschvalue_t y)
+{
+	return (fschvalue_t){x.v - y.v};
+}
+static inline cycles_t FSCHVALUE_TO_DELAY(fschvalue_t val, unsigned rate)
+{
+	unsigned long t;
+	/*
+	 * Here we lose precision to make the division 32-bit on IA-32.
+	 * The value is not greater than TICK_VALUE.
+	 * (TICK_VALUE >> FSCHRATE_SHIFT) fits unsigned long.
+	 */
+	t = (val.v + (1 << FSCHRATE_SHIFT) - 1) >> FSCHRATE_SHIFT;
+	return (cycles_t)((t + rate - 1) / rate) << FSCHRATE_SHIFT;
+}
+
+
+/*********************************************************************/
+/*
+ * Global data
+ */
+/*********************************************************************/
+
+/*
+ * Assertions.
+ * Called with preemption disabled.
+ */
+
+#define fsch_assert(x)							\
+	do {								\
+		static int count;					\
+		if (x)							\
+			break;						\
+		if (count++ > 10)					\
+			break;						\
+		__printk_no_wake++;					\
+		printk("fsch_assert " #x " failed\n");			\
+		__printk_no_wake--;					\
+	} while (0)
+
+#define fsch_validate(x, fmt...)					\
+	do {								\
+		static int count;					\
+		if (x)							\
+			break;						\
+		if (count++ > 10)					\
+			break;						\
+		__printk_no_wake++;					\
+		printk("fsch_assert " #x " failed\n");			\
+		printk("fsch_assert: " fmt);				\
+		__printk_no_wake--;					\
+	} while (0)
+
+/*
+ * Configurable parameters
+ */
+unsigned fairsched_max_latency = 25; /* jiffies */
+
+/*
+ * Parameters initialized at startup
+ */
+/* Number of online CPUs */
+unsigned fairsched_nr_cpus;
+/* Token Bucket depth (burst size) */
+static fschvalue_t max_value;
+
+struct fairsched_node fairsched_init_node = {
+	.id		= INT_MAX,
+#ifdef CONFIG_VE
+	.owner_env	= get_ve0(),
+#endif
+	.weight		= 1,
+};
+EXPORT_SYMBOL(fairsched_init_node);
+
+struct fairsched_node fairsched_idle_node = {
+	.id =			-1,
+};
+
+static int fairsched_nr_nodes;
+static LIST_HEAD(fairsched_node_head);
+static LIST_HEAD(fairsched_running_head);
+static LIST_HEAD(fairsched_delayed_head);
+
+DEFINE_PER_CPU(cycles_t, prev_schedule);
+static fschtag_t max_latency;
+
+static DEFINE_MUTEX(fairsched_mutex);
+
+/*********************************************************************/
+/*
+ * Small helper routines
+ */
+/*********************************************************************/
+
+/* this didn't proved to be very valuable statistics... */
+#define fairsched_inc_ve_strv(node, cycles)  do {} while(0)
+#define fairsched_dec_ve_strv(node, cycles)  do {} while(0)
+
+/*********************************************************************/
+/*
+ * Runlist management
+ */
+/*********************************************************************/
+
+/*
+ * Returns the start_tag of the first runnable node, or 0.
+ */
+static inline fschtag_t virtual_time(void)
+{
+	struct fairsched_node *p;
+
+	if (!list_empty(&fairsched_running_head)) {
+		p = list_first_entry(&fairsched_running_head,
+				struct fairsched_node, runlist);
+		return p->start_tag;
+	}
+	return FSCHTAG_ZERO;
+}
+
+static void fairsched_recompute_max_latency(void)
+{
+	struct fairsched_node *p;
+	unsigned w;
+	fschtag_t tag;
+
+	w = FSCHWEIGHT_MAX;
+	for_each_fairsched_node(p) {
+		if (p->weight < w)
+			w = p->weight;
+	}
+	tag = FSCHTAG_ZERO;
+	(void) FSCHTAG_DADD(&tag, TICK_DUR,
+				fairsched_nr_cpus * fairsched_max_latency * w);
+	max_latency = tag;
+}
+
+static void fairsched_reset_start_tags(void)
+{
+	struct fairsched_node *cnode;
+	fschtag_t min_tag;
+
+	min_tag = virtual_time();
+	for_each_fairsched_node(cnode) {
+		if (FSCHTAG_CMP(cnode->start_tag, min_tag) > 0)
+			cnode->start_tag = FSCHTAG_SUB(cnode->start_tag,
+						       min_tag);
+		else
+			cnode->start_tag = FSCHTAG_ZERO;
+	}
+}
+
+static void fairsched_running_insert(struct fairsched_node *node)
+{
+	struct list_head *tmp;
+	struct fairsched_node *p;
+	fschtag_t start_tag_max;
+
+	if (!list_empty(&fairsched_running_head)) {
+		start_tag_max = virtual_time();
+		if (!FSCHTAG_ADD(&start_tag_max, max_latency) &&
+		    FSCHTAG_CMP(start_tag_max, node->start_tag) < 0)
+			node->start_tag = start_tag_max;
+	}
+
+	list_for_each(tmp, &fairsched_running_head) {
+		p = list_entry(tmp, struct fairsched_node, runlist);
+		if (FSCHTAG_CMP(node->start_tag, p->start_tag) <= 0)
+			break;
+	}
+	/* insert node just before tmp */
+	list_add_tail(&node->runlist, tmp);
+}
+
+static inline void fairsched_running_insert_fromsleep(
+		struct fairsched_node *node)
+{
+	node->start_tag = FSCHTAG_MAX(node->start_tag, virtual_time());
+	fairsched_running_insert(node);
+}
+
+
+/*********************************************************************/
+/*
+ * CPU limiting helper functions
+ *
+ * These functions compute rates, delays and manipulate with sleep
+ * lists and so on.
+ */
+/*********************************************************************/
+
+/*
+ * Insert a node into the list of nodes removed from scheduling,
+ * sorted by the time at which the the node is allowed to run,
+ * historically called `delay'.
+ */
+static void fairsched_delayed_insert(struct fairsched_node *node)
+{
+	struct fairsched_node *p;
+	struct list_head *tmp;
+
+	list_for_each(tmp, &fairsched_delayed_head) {
+		p = list_entry(tmp, struct fairsched_node,
+				   runlist);
+		if (CYCLES_AFTER(p->delay, node->delay))
+			break;
+	}
+        /* insert node just before tmp */
+	list_add_tail(&node->runlist, tmp);
+}
+
+static inline void nodevalue_add(struct fairsched_node *node,
+		fschdur_t duration, unsigned rate)
+{
+	FSCHVALUE_DADD(&node->value, duration, rate);
+	if (FSCHVALUE_CMP(node->value, max_value) > 0)
+		node->value = max_value;
+}
+
+/*
+ * The node has been selected to run.
+ * This function accounts in advance for the time that the node will run.
+ * The advance not used by the node will be credited back.
+ */
+static void fairsched_ratelimit_charge_advance(
+		struct fairsched_node *node,
+		cycles_t time)
+{
+	fsch_assert(!node->delayed);
+	fsch_validate(FSCHVALUE_CMP(node->value, TICK_VALUE) >= 0,
+			"charge, value " FSCHVALUE_FMT
+			", tick " FSCHVALUE_FMT
+			", delay %Lu, time %Lu"
+			", lastupd %Lu, rate %u\n",
+			FSCHVALUE_PRINT(node->value),
+			FSCHVALUE_PRINT(TICK_VALUE),
+			node->delay, time,
+			node->last_updated_at, node->rate);
+
+	/*
+	 * Account for the time passed since last update.
+	 * It might be needed if the node has become runnable because of
+	 * a wakeup, but hasn't gone through other functions updating
+	 * the bucket value.
+	 */
+	if (CYCLES_AFTER(time, node->last_updated_at)) {
+		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
+			      node->rate);
+		node->last_updated_at = time;
+	}
+
+	/* charge for the full tick the node might be running */
+	node->value = FSCHVALUE_SUB(node->value, TICK_VALUE);
+	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
+		list_del(&node->runlist);
+		node->delayed = 1;
+		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
+					FSCHVALUE_SUB(TICK_VALUE, node->value),
+					node->rate);
+		node->nr_ready = 0;
+		fairsched_delayed_insert(node);
+	}
+}
+
+static void fairsched_ratelimit_credit_unused(
+		struct fairsched_node *node,
+		cycles_t time, fschdur_t duration)
+{
+	/* account for the time passed since last update */
+	if (CYCLES_AFTER(time, node->last_updated_at)) {
+		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
+			      node->rate);
+		node->last_updated_at = time;
+	}
+
+	/*
+	 * When the node was given this CPU, it was charged for 1 tick.
+	 * Credit back the unused time.
+	 */
+	if (FSCHDUR_CMP(duration, TICK_DUR) < 0)
+		nodevalue_add(node, FSCHDUR_SUB(TICK_DUR, duration),
+			      1 << FSCHRATE_SHIFT);
+
+	/* check if the node is allowed to run */
+	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
+		/*
+		 * The node was delayed and remain such.
+		 * But since the bucket value has been updated,
+		 * update the delay time and move the node in the list.
+		 */
+		fsch_assert(node->delayed);
+		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
+					FSCHVALUE_SUB(TICK_VALUE, node->value),
+					node->rate);
+	} else if (node->delayed) {
+		/*
+		 * The node was delayed, but now it is allowed to run.
+		 * We do not manipulate with lists, it will be done by the
+		 * caller.
+		 */
+		node->nr_ready = node->nr_runnable;
+		node->delayed = 0;
+	}
+}
+
+static void fairsched_delayed_wake(cycles_t time)
+{
+	struct fairsched_node *p;
+
+	while (!list_empty(&fairsched_delayed_head)) {
+		p = list_entry(fairsched_delayed_head.next,
+				  struct fairsched_node,
+				  runlist);
+		if (CYCLES_AFTER(p->delay, time))
+			break;
+
+		/* ok, the delay period is completed */
+		/* account for the time passed since last update */
+		if (CYCLES_AFTER(time, p->last_updated_at)) {
+			nodevalue_add(p, FSCHDURATION(time, p->last_updated_at),
+					p->rate);
+			p->last_updated_at = time;
+		}
+
+		fsch_validate(FSCHVALUE_CMP(p->value, TICK_VALUE) >= 0,
+				"wake, value " FSCHVALUE_FMT
+				", tick " FSCHVALUE_FMT
+				", delay %Lu, time %Lu"
+				", lastupd %Lu, rate %u\n",
+				FSCHVALUE_PRINT(p->value),
+				FSCHVALUE_PRINT(TICK_VALUE),
+				p->delay, time,
+				p->last_updated_at, p->rate);
+		p->nr_ready = p->nr_runnable;
+		p->delayed = 0;
+		list_del_init(&p->runlist);
+		if (p->nr_ready)
+			fairsched_running_insert_fromsleep(p);
+	}
+}
+
+static struct fairsched_node *fairsched_find(unsigned int id);
+
+void fairsched_cpu_online_map(int id, cpumask_t *mask)
+{
+	struct fairsched_node *node;
+
+	mutex_lock(&fairsched_mutex);
+	node = fairsched_find(id);
+	if (node == NULL)
+		*mask = CPU_MASK_NONE;
+	else
+		vsched_cpu_online_map(node->vsched, mask);
+	mutex_unlock(&fairsched_mutex);
+}
+
+/*********************************************************************/
+/*
+ * The heart of the algorithm:
+ * fairsched_incrun, fairsched_decrun, fairsched_schedule
+ *
+ * Note: old property nr_ready >= nr_pcpu doesn't hold anymore.
+ * However, nr_runnable, nr_ready and delayed are maintained in sync.
+ */
+/*********************************************************************/
+
+/*
+ * Called on a wakeup inside the node.
+ */
+void fairsched_incrun(struct fairsched_node *node)
+{
+	if (!node->delayed && !node->nr_ready++)
+		/* the node wasn't on the running list, insert */
+		fairsched_running_insert_fromsleep(node);
+	node->nr_runnable++;
+}
+
+/*
+ * Called from inside schedule() when a sleeping state is entered.
+ */
+void fairsched_decrun(struct fairsched_node *node)
+{
+	if (!node->delayed && !--node->nr_ready)
+		/* nr_ready changed 1->0, remove from the running list */
+		list_del_init(&node->runlist);
+	--node->nr_runnable;
+}
+
+void fairsched_inccpu(struct fairsched_node *node)
+{
+	node->nr_pcpu++;
+	fairsched_dec_ve_strv(node, cycles);
+}
+
+static inline void __fairsched_deccpu(struct fairsched_node *node)
+{
+	node->nr_pcpu--;
+	fairsched_inc_ve_strv(node, cycles);
+}
+
+void fairsched_deccpu(struct fairsched_node *node)
+{
+	if (node == &fairsched_idle_node)
+		return;
+
+	__fairsched_deccpu(node);
+}
+
+static void fairsched_account(struct fairsched_node *node,
+		cycles_t time)
+{
+	fschdur_t duration;
+
+	duration = FSCHDURATION(time, __get_cpu_var(prev_schedule));
+#ifdef CONFIG_VE
+	CYCLES_DADD(&node->owner_env->cpu_used_ve, duration);
+#endif
+
+	/*
+	 * The duration is not greater than TICK_DUR since
+	 * task->need_resched is always 1.
+	 */
+	if (FSCHTAG_DADD(&node->start_tag, duration, node->weight)) {
+		fairsched_reset_start_tags();
+		(void) FSCHTAG_DADD(&node->start_tag, duration,
+					node->weight);
+	}
+
+	list_del_init(&node->runlist);
+	if (node->rate_limited)
+		fairsched_ratelimit_credit_unused(node, time, duration);
+	if (!node->delayed) {
+		if (node->nr_ready)
+			fairsched_running_insert(node);
+	} else
+		fairsched_delayed_insert(node);
+}
+
+/*
+ * Scheduling decision
+ *
+ * Updates CPU usage for the node releasing the CPU and selects a new node.
+ */
+
+struct fairsched_node *fairsched_first(struct fairsched_node *cur_node,
+		cycles_t time)
+{
+	struct fairsched_node *node;
+
+	fairsched_delayed_wake(time);
+	list_for_each_entry(node, &fairsched_running_head, runlist)
+		if (!list_empty(&node->vsched->active_list) || node == cur_node)
+			return node;
+	return NULL;
+}
+
+struct fairsched_node *fairsched_next(struct fairsched_node *cur_node,
+		struct fairsched_node *node)
+{
+	list_for_each_entry_continue(node, &fairsched_running_head, runlist)
+		if (!list_empty(&node->vsched->active_list) || node == cur_node)
+			return node;
+	return NULL;
+}
+
+void fairsched_switch(struct fairsched_node *prev_node,
+		struct fairsched_node *next_node, cycles_t time)
+{
+	if (prev_node != &fairsched_idle_node)
+		fairsched_account(prev_node, time);
+	__get_cpu_var(prev_schedule) = time;
+	if (next_node != &fairsched_idle_node && next_node->rate_limited)
+		fairsched_ratelimit_charge_advance(next_node, time);
+}
+
+/*********************************************************************/
+/*
+ * System calls 
+ *
+ * All do_xxx functions are called under fairsched semaphore and after
+ * capability check.
+ *
+ * The binary interfaces follow some other Fair Scheduler implementations
+ * (although some system call arguments are not needed for our implementation).
+ */
+/*********************************************************************/
+
+static struct fairsched_node *fairsched_find(unsigned int id)
+{
+	struct fairsched_node *p;
+
+	for_each_fairsched_node(p) {
+		if (p->id == id)
+			return p;
+	}
+	return NULL;
+}
+
+static int do_fairsched_mknod(unsigned int parent, unsigned int weight,
+		unsigned int newid)
+{
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -EINVAL;
+	if (weight < 1 || weight > FSCHWEIGHT_MAX)
+		goto out;
+	if (newid < 0 || newid > INT_MAX)
+		goto out;
+
+	retval = -EBUSY;
+	if (fairsched_find(newid) != NULL)
+		goto out;
+
+	retval = -ENOMEM;
+	node = kmalloc(sizeof(*node), GFP_KERNEL);
+	if (node == NULL)
+		goto out;
+
+	memset(node, 0, sizeof(*node));
+	node->weight = weight;
+	INIT_LIST_HEAD(&node->runlist);
+	node->id = newid;
+	node->vcpus = 0;
+#ifdef CONFIG_VE
+	node->owner_env = get_exec_env();
+#endif
+
+	spin_lock_irq(&fairsched_lock);
+	list_add(&node->nodelist, &fairsched_node_head);
+	fairsched_nr_nodes++;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	retval = newid;
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
+				    unsigned int newid)
+{
+	int retval;
+
+	if (!capable_setveid())
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_mknod(parent, weight, newid);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+
+static int do_fairsched_rmnod(unsigned int id)
+{
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -EINVAL;
+	node = fairsched_find(id);
+	if (node == NULL)
+		goto out;
+	if (node == &fairsched_init_node)
+		goto out;
+
+	/* check if node is empty */
+	retval = -EBUSY;
+	if (vsched_taskcount(node->vsched))
+		goto out;
+
+	retval = vsched_destroy(node->vsched);
+	if (retval)
+		goto out;
+
+	spin_lock_irq(&fairsched_lock);
+	list_del(&node->runlist); /* required for delayed nodes */
+	list_del(&node->nodelist);
+	fairsched_nr_nodes--;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	kfree(node);
+	retval = 0;
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_rmnod(unsigned int id)
+{
+	int retval;
+
+	if (!capable_setveid())
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_rmnod(id);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+
+static int do_fairsched_chwt(unsigned int id, unsigned weight)
+{
+	struct fairsched_node *node;
+
+	if (id == 0)
+		return -EINVAL;
+	if (weight < 1 || weight > FSCHWEIGHT_MAX)
+		return -EINVAL;
+
+	node = fairsched_find(id);
+	if (node == NULL)
+		return -ENOENT;
+
+	spin_lock_irq(&fairsched_lock);
+	node->weight = weight;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	return 0;
+}
+
+asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned weight)
+{
+	int retval;
+
+	if (!capable_setveid())
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_chwt(id, weight);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+
+static int do_fairsched_vcpus(unsigned int id, unsigned int vcpus)
+{
+	struct fairsched_node *node;
+	int ret = 0;
+
+	if (id == 0)
+		return -EINVAL;
+
+	node = fairsched_find(id);
+	if (node == NULL)
+		return -ENOENT;
+
+	if (vcpus < 1 || vcpus > num_online_cpus())
+		vcpus = num_online_cpus();
+
+	node->vcpus = vcpus;
+	if (node->vsched != NULL) {
+		ret = vsched_set_vcpus(node->vsched, vcpus);
+		/* FIXME: adjust rate ... */
+	}
+
+	return ret;
+}
+
+asmlinkage int sys_fairsched_vcpus(unsigned int id, unsigned int vcpus)
+{
+	int retval;
+
+	if (!capable_setveid())
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_vcpus(id, vcpus);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+
+static int do_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	struct fairsched_node *node;
+	cycles_t time;
+	int retval;
+
+	if (id == 0)
+		return -EINVAL;
+	if (op == FAIRSCHED_SET_RATE && (rate < 1 || rate >= (1UL << 31)))
+		return -EINVAL;
+
+	node = fairsched_find(id);
+	if (node == NULL)
+		return -ENOENT;
+
+	retval = -EINVAL;
+	spin_lock_irq(&fairsched_lock);
+	time = get_cycles();
+	switch (op) {
+		case FAIRSCHED_SET_RATE:
+			node->rate = rate;
+			if (node->rate > (fairsched_nr_cpus << FSCHRATE_SHIFT))
+				node->rate =
+					fairsched_nr_cpus << FSCHRATE_SHIFT;
+			node->rate_limited = 1;
+			node->value = max_value;
+			if (node->delayed) {
+				list_del(&node->runlist);
+				node->delay = time;
+				fairsched_delayed_insert(node);
+				node->last_updated_at = time;
+				fairsched_delayed_wake(time);
+			}
+			retval = node->rate;
+			break;
+		case FAIRSCHED_DROP_RATE:
+			node->rate = 0; /* This assignment is not needed
+					   for the kernel code, and it should
+					   not rely on rate being 0 when it's
+					   unset.  This is a band-aid for some
+					   existing tools (don't know which one
+					   exactly).  --SAW */
+			node->rate_limited = 0;
+			node->value = max_value;
+			if (node->delayed) {
+				list_del(&node->runlist);
+				node->delay = time;
+				fairsched_delayed_insert(node);
+				node->last_updated_at = time;
+				fairsched_delayed_wake(time);
+			}
+			retval = 0;
+			break;
+		case FAIRSCHED_GET_RATE:
+			if (node->rate_limited)
+				retval = node->rate;
+			else
+				retval = -ENODATA;
+			break;
+	}
+	spin_unlock_irq(&fairsched_lock);
+
+	return retval;
+}
+
+asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	int retval;
+
+	if (!capable_setveid())
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_rate(id, op, rate);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+
+/*
+ * Called under fairsched_mutex.
+ */
+static int __do_fairsched_mvpr(struct task_struct *p,
+		struct fairsched_node *node)
+{
+	int retval;
+
+	if (node->vsched == NULL) {
+		retval = vsched_create(node->id, node);
+		if (retval < 0)
+			return retval;
+	}
+
+	/* no need to destroy vsched in case of mvpr failure */
+	return vsched_mvpr(p, node->vsched);
+}
+
+static int do_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	struct task_struct *p;
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -ENOENT;
+	node = fairsched_find(nodeid);
+	if (node == NULL)
+		goto out;
+
+	read_lock(&tasklist_lock);
+	retval = -ESRCH;
+	p = find_task_by_pid_all(pid);
+	if (p == NULL)
+		goto out_unlock;
+	get_task_struct(p);
+	read_unlock(&tasklist_lock);
+
+	retval = __do_fairsched_mvpr(p, node);
+	put_task_struct(p);
+	return retval;
+
+out_unlock:
+	read_unlock(&tasklist_lock);
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	int retval;
+
+	if (!capable_setveid())
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_mvpr(pid, nodeid);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_mvpr);
+
+static int do_fairsched_cpumask(unsigned int nodeid, cpumask_t *mask)
+{
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -EINVAL;
+	if (nodeid == 0 || !cpus_intersects(*mask, cpu_online_map))
+		goto out;
+
+	retval = -ENOENT;
+	node = fairsched_find(nodeid);
+	if (node == NULL)
+		goto out;
+
+	if (node->vsched == NULL) {
+		retval = vsched_create(node->id, node);
+		if (retval < 0)
+			goto out;
+	}
+
+	spin_lock_irq(&fairsched_lock);
+	node->vsched->pcpu_allowed_map = *mask;
+	spin_unlock_irq(&fairsched_lock);
+
+	retval = 0;
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_cpumask(unsigned int id, unsigned int len,
+		unsigned long __user *user_mask_ptr)
+{
+	cpumask_t mask;
+	int retval;
+
+	if (!capable_setveid())
+		return -EPERM;
+
+	if (get_user_cpu_mask(user_mask_ptr, len, &mask))
+		return -EFAULT;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_cpumask(id, &mask);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+
+int fairsched_new_node(int id, unsigned int vcpus)
+{
+	int err;
+
+	mutex_lock(&fairsched_mutex);
+	/*
+	 * We refuse to switch to an already existing node since nodes
+	 * keep a pointer to their ve_struct...
+	 */
+	err = do_fairsched_mknod(0, 1, id);
+	if (err < 0) {
+		printk(KERN_WARNING "Can't create fairsched node %d\n", id);
+		goto out;
+	}
+	err = do_fairsched_vcpus(id, vcpus);
+	if (err) {
+		printk(KERN_WARNING "Can't set sched vcpus on node %d\n", id);
+		goto cleanup;
+	}
+	err = do_fairsched_mvpr(current->pid, id);
+	if (err) {
+		printk(KERN_WARNING "Can't switch to fairsched node %d\n", id);
+		goto cleanup;
+	}
+	mutex_unlock(&fairsched_mutex);
+	return 0;
+
+cleanup:
+	if (do_fairsched_rmnod(id))
+		printk(KERN_ERR "Can't clean fairsched node %d\n", id);
+out:
+	mutex_unlock(&fairsched_mutex);
+	return err;
+}
+EXPORT_SYMBOL(fairsched_new_node);
+
+void fairsched_drop_node(int id)
+{
+	mutex_lock(&fairsched_mutex);
+	if (task_vsched_id(current) == id)
+		if (do_fairsched_mvpr(current->pid, fairsched_init_node.id))
+			printk(KERN_WARNING "Can't leave sched node %d\n", id);
+	if (do_fairsched_rmnod(id))
+		printk(KERN_ERR "Can't remove fairsched node %d\n", id);
+	mutex_unlock(&fairsched_mutex);
+}
+EXPORT_SYMBOL(fairsched_drop_node);
+
+/*********************************************************************/
+/*
+ * proc interface
+ */
+/*********************************************************************/
+
+struct fairsched_node_dump {
+#ifdef CONFIG_VE
+	envid_t veid;
+#endif
+	int id;
+	unsigned weight;
+	unsigned rate;
+	unsigned rate_limited : 1,
+		 delayed : 1;
+	fschtag_t start_tag;
+	fschvalue_t value;
+	cycles_t delay;
+	int nr_ready;
+	int nr_runnable;
+	int nr_pcpu;
+	int nr_tasks, nr_runtasks;
+};
+
+struct fairsched_dump {
+	int len, compat;
+	struct fairsched_node_dump nodes[0];
+};
+
+static struct fairsched_dump *fairsched_do_dump(int compat)
+{
+	int nr_nodes;
+	int len, i;
+	struct fairsched_dump *dump;
+	struct fairsched_node *node;
+	struct fairsched_node_dump *p;
+	unsigned long flags;
+
+start:
+	nr_nodes = (ve_is_super(get_exec_env()) ? fairsched_nr_nodes + 16 : 1);
+	len = sizeof(*dump) + nr_nodes * sizeof(dump->nodes[0]);
+	dump = ub_vmalloc(len);
+	if (dump == NULL)
+		goto out;
+
+	spin_lock_irqsave(&fairsched_lock, flags);
+	if (ve_is_super(get_exec_env()) && nr_nodes < fairsched_nr_nodes)
+		goto repeat;
+	p = dump->nodes;
+	list_for_each_entry_reverse(node, &fairsched_node_head, nodelist) {
+		if ((char *)p - (char *)dump >= len)
+			break;
+		p->nr_tasks = 0;
+		p->nr_runtasks = 0;
+#ifdef CONFIG_VE
+		if (!ve_accessible(node->owner_env, get_exec_env()))
+			continue;
+		p->veid = node->owner_env->veid;
+		if (compat) {
+			p->nr_tasks = atomic_read(&node->owner_env->pcounter);
+			p->nr_runtasks += nr_running_vsched(node->vsched);
+			if (p->nr_runtasks < 0)
+				p->nr_runtasks = 0;
+		}
+#endif
+		p->id = node->id;
+		p->weight = node->weight;
+		p->rate = node->rate;
+		p->rate_limited = node->rate_limited;
+		p->delayed = node->delayed;
+		p->start_tag = node->start_tag;
+		p->value = node->value;
+		p->delay = node->delay;
+		p->nr_ready = node->nr_ready;
+		p->nr_runnable = node->nr_runnable;
+		p->nr_pcpu = node->nr_pcpu;
+		p++;
+	}
+	dump->len = p - dump->nodes;
+	dump->compat = compat;
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+
+out:
+	return dump;
+
+repeat:
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+	vfree(dump);
+	goto start;
+}
+
+#define FAIRSCHED_PROC_HEADLINES 2
+
+#if defined(CONFIG_VE)
+/*
+ * File format is dictated by compatibility reasons.
+ */
+static int fairsched_seq_show(struct seq_file *m, void *v)
+{
+	struct fairsched_dump *dump;
+	struct fairsched_node_dump *p;
+	unsigned vid, nid, pid, r;
+
+	dump = m->private;
+	p = (struct fairsched_node_dump *)((unsigned long)v & ~3UL);
+	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
+		if (p == dump->nodes)
+			seq_printf(m, "Version: 2.6 debug\n");
+		else if (p == dump->nodes + 1)
+			seq_printf(m,
+				       "      veid "
+				       "        id "
+				       "    parent "
+				       "weight "
+				       " rate "
+  				       "tasks "
+				       "  run "
+				       "cpus"
+				       " "
+				       "flg "
+				       "ready "
+				       "           start_tag "
+				       "               value "
+				       "               delay"
+				       "\n");
+	} else {
+		p -= FAIRSCHED_PROC_HEADLINES;
+		vid = nid = pid = 0;
+		r = (unsigned long)v & 3;
+		if (p == dump->nodes) {
+			if (r == 2)
+				nid = p->id;
+		} else {
+			if (!r)
+				nid = p->id;
+			else if (r == 1)
+				vid = pid = p->id;
+			else
+				vid = p->id, nid = 1;
+		}
+		seq_printf(m,
+			       "%10u "
+			       "%10u %10u %6u %5u %5u %5u %4u"
+			       " "
+			       " %c%c %5u %20Lu %20Lu %20Lu"
+			       "\n",
+			       vid,
+			       nid,
+			       pid,
+			       p->weight,
+			       p->rate,
+			       p->nr_tasks,
+			       p->nr_runtasks,
+			       p->nr_pcpu,
+			       p->rate_limited ? 'L' : '.',
+			       p->delayed ? 'D' : '.',
+			       p->nr_ready,
+			       (unsigned long long)p->start_tag.t,
+			       (unsigned long long)p->value.v,
+			       (unsigned long long)p->delay
+			       );
+	}
+
+	return 0;
+}
+
+static void *fairsched_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct fairsched_dump *dump;
+	unsigned long l;
+
+	dump = m->private;
+	if (*pos >= dump->len * 3 - 1 + FAIRSCHED_PROC_HEADLINES)
+		return NULL;
+	if (*pos < FAIRSCHED_PROC_HEADLINES)
+		return dump->nodes + *pos;
+	/* guess why... */
+	l = (unsigned long)(dump->nodes +
+		((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) / 3);
+	l |= ((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) % 3;
+	return (void *)l;
+}
+static void *fairsched_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return fairsched_seq_start(m, pos);
+}
+#endif
+
+static int fairsched2_seq_show(struct seq_file *m, void *v)
+{
+	struct fairsched_dump *dump;
+	struct fairsched_node_dump *p;
+
+	dump = m->private;
+	p = v;
+	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
+		if (p == dump->nodes)
+			seq_printf(m, "Version: 2.7" FAIRSHED_DEBUG "\n");
+		else if (p == dump->nodes + 1)
+			seq_printf(m,
+				       "        id "
+				       "weight "
+				       " rate "
+				       "  run "
+				       "cpus"
+#ifdef FAIRSHED_DEBUG
+				       " "
+				       "flg "
+				       "ready "
+				       "           start_tag "
+				       "               value "
+				       "               delay"
+#endif
+				       "\n");
+	} else {
+		p -= FAIRSCHED_PROC_HEADLINES;
+		seq_printf(m,
+			       "%10u %6u %5u %5u %4u"
+#ifdef FAIRSHED_DEBUG
+			       " "
+			       " %c%c %5u %20Lu %20Lu %20Lu"
+#endif
+			       "\n",
+			       p->id,
+			       p->weight,
+			       p->rate,
+			       p->nr_runnable,
+			       p->nr_pcpu
+#ifdef FAIRSHED_DEBUG
+			       ,
+			       p->rate_limited ? 'L' : '.',
+			       p->delayed ? 'D' : '.',
+			       p->nr_ready,
+			       (unsigned long long)p->start_tag.t,
+			       (unsigned long long)p->value.v,
+			       (unsigned long long)p->delay
+#endif
+			       );
+	}
+
+	return 0;
+}
+
+static void *fairsched2_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct fairsched_dump *dump;
+
+	dump = m->private;
+	if (*pos >= dump->len + FAIRSCHED_PROC_HEADLINES)
+		return NULL;
+	return dump->nodes + *pos;
+}
+static void *fairsched2_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return fairsched2_seq_start(m, pos);
+}
+static void fairsched2_seq_stop(struct seq_file *m, void *v)
+{
+}
+
+#ifdef CONFIG_VE
+static struct seq_operations fairsched_seq_op = {
+	.start		= fairsched_seq_start,
+	.next		= fairsched_seq_next,
+	.stop		= fairsched2_seq_stop,
+	.show		= fairsched_seq_show
+};
+#endif
+static struct seq_operations fairsched2_seq_op = {
+	.start		= fairsched2_seq_start,
+	.next		= fairsched2_seq_next,
+	.stop		= fairsched2_seq_stop,
+	.show		= fairsched2_seq_show
+};
+static int fairsched_seq_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct seq_file *m;
+	int compat;
+
+#ifdef CONFIG_VE
+	compat = (file->f_dentry->d_name.len == sizeof("fairsched") - 1);
+	ret = seq_open(file, compat ? &fairsched_seq_op : &fairsched2_seq_op);
+#else
+	compat = 0;
+	ret = seq_open(file, &fairsched2_seq_op);
+#endif
+	if (ret)
+		return ret;
+	m = file->private_data;
+	m->private = fairsched_do_dump(compat);
+	if (m->private == NULL) {
+		seq_release(inode, file);
+		ret = -ENOMEM;
+	}
+	return ret;
+}
+static int fairsched_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	struct fairsched_dump *dump;
+
+	m = file->private_data;
+	dump = m->private;
+	m->private = NULL;
+	vfree(dump);
+	seq_release(inode, file);
+	return 0;
+}
+static struct file_operations proc_fairsched_operations = {
+	.open		= fairsched_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= fairsched_seq_release
+};
+
+
+/*********************************************************************/
+/*
+ * Fairsched initialization
+ */
+/*********************************************************************/
+
+int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (!write || *valp == val)
+		return ret;
+
+	spin_lock_irq(&fairsched_lock);
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+	return ret;
+}
+
+static void fairsched_calibrate(void)
+{
+	fairsched_nr_cpus = num_online_cpus();
+	cycles_per_timeslice = msecs_to_jiffies(FSCH_TIMESLICE)
+							* cycles_per_jiffy;
+	max_value = FSCHVALUE(cycles_per_timeslice * (fairsched_nr_cpus + 1));
+}
+
+void __init fairsched_init_early(void)
+{
+	fairsched_init_node.vcpus = num_online_cpus();
+	list_add(&fairsched_init_node.nodelist, &fairsched_node_head);
+	fairsched_nr_nodes++;
+}
+
+/*
+ * Note: this function is execute late in the initialization sequence.
+ * We ourselves need calibrated cycles and initialized procfs...
+ * The consequence of this late initialization is that start tags are
+ * efficiently ignored and each node preempts others on insertion.
+ * But it isn't a problem (only init node can be runnable).
+ */
+void __init fairsched_init_late(void)
+{
+	struct proc_dir_entry *entry;
+
+	if (get_cycles() == 0)
+		panic("FAIRSCHED: no TSC!\n");
+	fairsched_calibrate();
+	fairsched_recompute_max_latency();
+
+	entry = create_proc_glob_entry("fairsched", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &proc_fairsched_operations;
+	entry = create_proc_glob_entry("fairsched2", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &proc_fairsched_operations;
+}
+
+
+#else /* CONFIG_FAIRSCHED */
+
+
+/*********************************************************************/
+/*
+ * No Fairsched
+ */
+/*********************************************************************/
+
+asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
+				    unsigned int newid)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_rmnod(unsigned int id)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned int weight)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_vcpus(unsigned int id, unsigned int vcpus)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_cpumask(unsigned int id, unsigned int len,
+		unsigned long __user *user_mask_ptr)
+{
+	return -ENOSYS;
+}
+
+void __init fairsched_init_late(void)
+{
+}
+
+#endif /* CONFIG_FAIRSCHED */
diff -upr kernel-2.6.18-417.el5.orig/kernel/fork.c kernel-2.6.18-417.el5-028stab121/kernel/fork.c
--- kernel-2.6.18-417.el5.orig/kernel/fork.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/fork.c	2017-01-13 08:40:40.000000000 -0500
@@ -19,6 +19,7 @@
 #include <linux/vmalloc.h>
 #include <linux/completion.h>
 #include <linux/namespace.h>
+#include <linux/file.h>
 #include <linux/personality.h>
 #include <linux/mempolicy.h>
 #include <linux/sem.h>
@@ -27,7 +28,9 @@
 #include <linux/binfmts.h>
 #include <linux/mman.h>
 #include <linux/mmu_notifier.h>
+#include <linux/virtinfo.h>
 #include <linux/fs.h>
+#include <linux/nsproxy.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
@@ -63,11 +66,18 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+#include <ub/ub_misc.h>
+#include <ub/ub_oom.h>
+#include <linux/kmem_cache.h>
+#include <ub/ub_mem.h>
+
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
 unsigned long total_forks;	/* Handle normal Linux uptimes. */
 int nr_threads; 		/* The idle threads do not count.. */
+EXPORT_SYMBOL(nr_threads);
 
 int max_threads;		/* tunable limit on nr_threads */
 
@@ -117,7 +127,7 @@ kmem_cache_t *files_cachep;
 kmem_cache_t *fs_cachep;
 
 /* SLAB cache for vm_area_struct structures */
-kmem_cache_t *vm_area_cachep;
+kmem_cache_t *__vm_area_cachep;
 
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static kmem_cache_t *mm_cachep;
@@ -137,11 +147,16 @@ void __put_task_struct(struct task_struc
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
+	ub_task_put(tsk);
 	security_task_free(tsk);
 	free_uid(tsk->user);
 	put_group_info(tsk->group_info);
 	delayacct_tsk_free(tsk);
 
+#ifdef CONFIG_VE
+	put_ve(VE_TASK_INFO(tsk)->owner_env);
+	atomic_dec(&nr_dead);
+#endif
 	if (!profile_handoff_task(tsk))
 		free_task(tsk);
 }
@@ -159,7 +174,7 @@ void __init fork_init(unsigned long memp
 	/* create a slab on which task_structs can be allocated */
 	task_struct_cachep =
 		kmem_cache_create("task_struct", sizeof(struct task_struct),
-			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
+			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_UBC, NULL, NULL);
 #endif
 
 	/*
@@ -199,7 +214,7 @@ static struct task_struct *dup_task_stru
 		return NULL;
 	}
 
-	aux = kmalloc(sizeof(*aux), GFP_KERNEL);
+	aux = kmalloc(sizeof(*aux), GFP_KERNEL_UBC);
 	if (!aux) {
 		free_thread_info(ti);
 		free_task_struct(tsk);
@@ -342,6 +357,10 @@ static inline int dup_mmap(struct mm_str
 	rb_parent = NULL;
 	pprev = &mm->mmap;
 
+	retval = ub_page_table_precharge(mm, oldmm->page_table_charged);
+	if (retval)
+		goto out;
+
 	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
 		struct file *file;
 
@@ -352,14 +371,19 @@ static inline int dup_mmap(struct mm_str
 								-pages);
 			continue;
 		}
+
 		charge = 0;
+		if (ub_memory_charge(mm, mpnt->vm_end - mpnt->vm_start,
+					mpnt->vm_flags & ~VM_LOCKED,
+					mpnt->vm_file, UB_HARD))
+			goto fail_noch;
 		if (mpnt->vm_flags & VM_ACCOUNT) {
 			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
 			if (security_vm_enough_memory(len))
 				goto fail_nomem;
 			charge = len;
 		}
-		tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+		tmp = allocate_vma(mm, SLAB_KERNEL);
 		if (!tmp)
 			goto fail_nomem;
 		*tmp = *mpnt;
@@ -399,7 +423,7 @@ static inline int dup_mmap(struct mm_str
 		rb_parent = &tmp->vm_rb;
 
 		mm->map_count++;
-		retval = copy_page_range(mm, oldmm, mpnt);
+		retval = copy_page_range(mm, oldmm, tmp, mpnt);
 
 		if (tmp->vm_ops && tmp->vm_ops->open)
 			tmp->vm_ops->open(tmp);
@@ -412,13 +436,17 @@ static inline int dup_mmap(struct mm_str
 #endif
 	retval = 0;
 out:
+	ub_page_table_commit(mm);
 	up_write(&mm->mmap_sem);
 	flush_tlb_mm(oldmm);
 	up_write(&oldmm->mmap_sem);
 	return retval;
 fail_nomem_policy:
-	kmem_cache_free(vm_area_cachep, tmp);
+	free_vma(mm, tmp);
 fail_nomem:
+	ub_memory_uncharge(mm, mpnt->vm_end - mpnt->vm_start,
+			mpnt->vm_flags & ~VM_LOCKED, mpnt->vm_file);
+fail_noch:
 	retval = -ENOMEM;
 	vm_unacct_memory(charge);
 	goto out;
@@ -444,12 +472,46 @@ static inline void mm_free_pgd(struct mm
 
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
 
+ EXPORT_SYMBOL(mmlist_lock);
+
 #define allocate_mm()	(kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
 #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
 
+struct vm_area_struct *allocate_vma(struct mm_struct *mm, gfp_t gfp_flags)
+{
+	struct vm_area_struct *vma;
+
+	vma = kmem_cache_alloc(__vm_area_cachep, gfp_flags);
+	if (unlikely(!vma))
+		goto out;
+
+	local_irq_disable();
+	if (ub_kmemsize_charge(mm->mm_ub, CHARGE_SIZE(__vm_area_cachep->objuse),
+				(gfp_flags & __GFP_SOFT_UBC)?UB_SOFT:UB_HARD))
+		goto out_free;
+	local_irq_enable();
+
+	return vma;
+
+out_free:
+	local_irq_enable();
+	kmem_cache_free(__vm_area_cachep, vma);
+out:
+	return NULL;
+}
+
+void free_vma(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	local_irq_disable();
+	ub_kmemsize_uncharge(mm->mm_ub, CHARGE_SIZE(__vm_area_cachep->objuse));
+	local_irq_enable();
+	kmem_cache_free(__vm_area_cachep, vma);
+}
+
 #include <linux/init_task.h>
 
-static struct mm_struct * mm_init(struct mm_struct * mm)
+static struct mm_struct * mm_init(struct mm_struct * mm,
+		struct task_struct *tsk)
 {
 	unsigned long mm_flags;
 
@@ -459,6 +521,8 @@ static struct mm_struct * mm_init(struct
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->core_waiters = 0;
 	mm->nr_ptes = 0;
+	mm->page_table_precharge = 0;
+	mm->page_table_charged = 0;
 	set_mm_counter(mm, file_rss, 0);
 	set_mm_counter(mm, anon_rss, 0);
 	spin_lock_init(&mm->page_table_lock);
@@ -467,6 +531,7 @@ static struct mm_struct * mm_init(struct
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
 
+	set_mm_ub(mm, tsk);
 	mm_flags = get_mm_flags(current->mm);
 	if (mm_flags != default_dump_filter) {
 		if (unlikely(set_mm_flags(mm, mm_flags, 0) < 0))
@@ -482,6 +547,7 @@ static struct mm_struct * mm_init(struct
 	if (mm_flags != default_dump_filter)
 		free_mm_flags(mm);
 fail_nomem:
+	put_mm_ub(mm);
 	free_mm(mm);
 	return NULL;
 }
@@ -496,10 +562,11 @@ struct mm_struct * mm_alloc(void)
 	mm = allocate_mm();
 	if (mm) {
 		memset(mm, 0, sizeof(*mm));
-		mm = mm_init(mm);
+		mm = mm_init(mm, NULL);
 	}
 	return mm;
 }
+EXPORT_SYMBOL_GPL(mm_alloc);
 
 /*
  * Called when the last reference to the mm
@@ -511,10 +578,14 @@ void fastcall __mmdrop(struct mm_struct 
 	BUG_ON(mm == &init_mm);
 	free_mm_flags(mm);
 	mm_free_pgd(mm);
+	BUG_ON(mm->page_table_precharge);
+	BUG_ON(mm->page_table_charged);
 	destroy_context(mm);
 	mmu_notifier_mm_destroy(mm);
+	put_mm_ub(mm);
 	free_mm(mm);
 }
+EXPORT_SYMBOL_GPL(__mmdrop);
 
 /*
  * Decrement the use count and release all resources for an mm.
@@ -532,6 +603,9 @@ void mmput(struct mm_struct *mm)
 			spin_unlock(&mmlist_lock);
 		}
 		put_swap_token(mm);
+		(void) virtinfo_gencall(VIRTINFO_EXITMMAP, mm);
+		if (mm->oom_killed)
+			ub_oom_task_dead(current);
 		mmdrop(mm);
 	}
 }
@@ -635,7 +709,7 @@ static struct mm_struct *dup_mm(struct t
 
 	memcpy(mm, oldmm, sizeof(*mm));
 
-	if (!mm_init(mm))
+	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
 	if (init_new_context(tsk, mm))
@@ -663,6 +737,7 @@ fail_nocontext:
 	 */
 	free_mm_flags(mm);
 	mm_free_pgd(mm);
+	put_mm_ub(mm);
 	free_mm(mm);
 	return NULL;
 }
@@ -925,7 +1000,9 @@ static int copy_files(unsigned long clon
 	if (!newf)
 		goto out;
 
+	task_lock(tsk);
 	tsk->files = newf;
+	task_unlock(tsk);
 	error = 0;
 out:
 	return error;
@@ -1077,7 +1154,7 @@ asmlinkage long sys_set_tid_address(int 
 {
 	current->clear_child_tid = tidptr;
 
-	return current->pid;
+	return virt_pid(current);
 }
 
 static inline void rt_mutex_init_task(struct task_struct *p)
@@ -1103,10 +1180,14 @@ static struct task_struct *copy_process(
 					unsigned long stack_size,
 					int __user *parent_tidptr,
 					int __user *child_tidptr,
-					int pid)
+					struct pid *pidp, int pid0)
 {
-	int retval;
+	int retval, vpid, vtgid;
 	struct task_struct *p = NULL;
+	pid_t pid = pidp ? pidp->nr : 0;
+
+	if (clone_flags & (CLONE_NAMESPACES_MASK & ~CLONE_NEWUTS))
+		return ERR_PTR(-EINVAL);
 
 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 		return ERR_PTR(-EINVAL);
@@ -1139,6 +1220,9 @@ static struct task_struct *copy_process(
 
 	p->tux_info = NULL;
 
+	if (ub_task_charge(current, p))
+		goto bad_fork_charge;
+
 #ifdef CONFIG_TRACE_IRQFLAGS
 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
@@ -1147,7 +1231,7 @@ static struct task_struct *copy_process(
 	if (atomic_read(&p->user->processes) >=
 			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
-				p->user != &root_user)
+				p->user->uid != 0)
 			goto bad_fork_free;
 	}
 
@@ -1173,9 +1257,18 @@ static struct task_struct *copy_process(
 	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
 	copy_flags(clone_flags, p);
 	p->pid = pid;
+#ifdef CONFIG_VE
+	vpid = pid;
+	if (pidp) {
+		retval = alloc_vpid(pidp, pid0 ? : -1);
+		if (retval < 0)
+			goto bad_fork_cleanup_delays_binfmt;
+		vpid = retval;
+	}
+#endif
 	retval = -EFAULT;
 	if (clone_flags & CLONE_PARENT_SETTID)
-		if (put_user(p->pid, parent_tidptr))
+		if (put_user(vpid, parent_tidptr))
 			goto bad_fork_cleanup_delays_binfmt;
 
 	INIT_LIST_HEAD(&p->children);
@@ -1246,8 +1339,11 @@ static struct task_struct *copy_process(
 #endif
 
 	p->tgid = p->pid;
-	if (clone_flags & CLONE_THREAD)
+	vtgid = vpid;
+	if (clone_flags & CLONE_THREAD) {
 		p->tgid = current->tgid;
+		vtgid = virt_tgid(current);
+	}
 
 	if ((retval = security_task_alloc(p)))
 		goto bad_fork_cleanup_policy;
@@ -1268,11 +1364,11 @@ static struct task_struct *copy_process(
 		goto bad_fork_cleanup_signal;
 	if ((retval = copy_keys(clone_flags, p)))
 		goto bad_fork_cleanup_mm;
-	if ((retval = copy_namespace(clone_flags, p)))
+	if ((retval = copy_namespaces(clone_flags, p)))
 		goto bad_fork_cleanup_keys;
 	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
 	if (retval)
-		goto bad_fork_cleanup_namespace;
+		goto bad_fork_cleanup_namespaces;
 
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
 	/*
@@ -1330,8 +1426,8 @@ static struct task_struct *copy_process(
 	 */
 	p->cpus_allowed = current->cpus_allowed;
 	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
-			!cpu_online(task_cpu(p))))
-		set_task_cpu(p, smp_processor_id());
+			!vcpu_online(task_cpu(p))))
+		set_task_cpu(p, task_cpu(current));
 
 	/* CLONE_PARENT re-uses the old parent */
 	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
@@ -1352,11 +1448,11 @@ static struct task_struct *copy_process(
 	 * A fatal signal pending means that current will exit, so the new
 	 * thread can't slip out of an OOM kill (or normal SIGKILL).
  	 */
-	if (fork_recalc_sigpending()) {
+	if (fork_recalc_sigpending(pid0)) {
 		spin_unlock(&current->sighand->siglock);
 		write_unlock_irq(&tasklist_lock);
 		retval = -ERESTARTNOINTR;
-		goto bad_fork_cleanup_namespace;
+		goto bad_fork_cleanup_namespaces;
 	}
 
 	if (clone_flags & CLONE_THREAD) {
@@ -1398,20 +1494,35 @@ static struct task_struct *copy_process(
 			attach_pid(p, PIDTYPE_SID, p->signal->session);
 
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
+#ifdef CONFIG_VE
+			list_add_tail_rcu(&p->ve_task_info.vetask_list,
+					&p->ve_task_info.owner_env->vetask_lh);
+			list_add_tail(&p->ve_task_info.aux_list,
+					&p->ve_task_info.owner_env->vetask_auxlist);
+#endif
 			__get_cpu_var(process_counts)++;
 		}
 		attach_pid(p, PIDTYPE_PID, p->pid);
+		set_virt_pid(p, vpid);
+		set_virt_tgid(p, vtgid);
+		set_virt_pgid(p, virt_pgid(current));
+		set_virt_sid(p, virt_sid(current));
 		nr_threads++;
 	}
+	get_ve(p->ve_task_info.owner_env);
+	pget_ve(p->ve_task_info.owner_env);
 
+#ifdef CONFIG_VE
+	seqcount_init(&p->ve_task_info.wakeup_lock);
+#endif
 	total_forks++;
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	return p;
 
-bad_fork_cleanup_namespace:
-	exit_namespace(p);
+bad_fork_cleanup_namespaces:
+	exit_task_namespaces(p);
 bad_fork_cleanup_keys:
 	exit_keys(p);
 bad_fork_cleanup_mm:
@@ -1449,6 +1560,9 @@ bad_fork_cleanup_count:
 	atomic_dec(&p->user->processes);
 	free_uid(p->user);
 bad_fork_free:
+	ub_task_uncharge(p);
+	ub_task_put(p);
+bad_fork_charge:
 	free_task(p);
 fork_out:
 	/*
@@ -1472,7 +1586,7 @@ struct task_struct * __devinit fork_idle
 	struct task_struct *task;
 	struct pt_regs regs;
 
-	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
+	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, NULL, 0);
 	if (!task)
 		return ERR_PTR(-ENOMEM);
 	init_idle(task, cpu);
@@ -1486,22 +1600,31 @@ struct task_struct * __devinit fork_idle
  * It copies the process, and if successful kick-starts
  * it and waits for it to finish using the VM if required.
  */
-long do_fork(unsigned long clone_flags,
+long do_fork_pid(unsigned long clone_flags,
 	      unsigned long stack_start,
 	      struct pt_regs *regs,
 	      unsigned long stack_size,
 	      int __user *parent_tidptr,
-	      int __user *child_tidptr)
+	      int __user *child_tidptr,
+	      long pid0)
 {
 	struct task_struct *p;
-	struct pid *pid = alloc_pid();
+	struct pid *pid;
 	long nr;
 
-	if (!pid)
-		return -EAGAIN;
+	nr = virtinfo_gencall(VIRTINFO_DOFORK, (void *)clone_flags);
+	if (nr)
+		return nr;
+
+	pid = alloc_pid();
+	if (!pid) {
+		nr = -EAGAIN;
+		goto out;
+	}
 	nr = pid->nr;
 
-	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, nr);
+	p = copy_process(clone_flags, stack_start, regs, stack_size,
+			parent_tidptr, child_tidptr, pid, pid0);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
@@ -1515,6 +1638,7 @@ long do_fork(unsigned long clone_flags,
 
 		trace_sched_process_fork(current, p);
 
+		nr = virt_pid(p);
 		if (clone_flags & CLONE_VFORK) {
 			task_aux(p)->vfork_done = &vfork;
 			init_completion(&vfork);
@@ -1526,6 +1650,7 @@ long do_fork(unsigned long clone_flags,
 
 		p->flags &= ~PF_STARTING;
 
+		(void)virtinfo_gencall(VIRTINFO_DOFORKRET, p);
 		if (clone_flags & CLONE_STOPPED) {
 			/*
 			 * We'll start up with an immediate SIGSTOP.
@@ -1549,6 +1674,8 @@ long do_fork(unsigned long clone_flags,
 		free_pid(pid);
 		nr = PTR_ERR(p);
 	}
+out:
+	(void)virtinfo_gencall(VIRTINFO_DOFORKPOST, (void *)(long)nr);
 	return nr;
 }
 
@@ -1561,31 +1688,47 @@ static void sighand_ctor(void *data, kme
 	struct sighand_struct *sighand = data;
 
 	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
-					SLAB_CTOR_CONSTRUCTOR)
+					SLAB_CTOR_CONSTRUCTOR) {
 		spin_lock_init(&sighand->siglock);
+		init_waitqueue_head(&sighand->signalfd_wqh);
+	}
+}
+
+EXPORT_SYMBOL(do_fork_pid);
+
+long do_fork(unsigned long clone_flags,
+		unsigned long stack_start,
+		struct pt_regs *regs,
+		unsigned long stack_size,
+		int __user *parent_tidptr,
+		int __user *child_tidptr)
+{
+	return do_fork_pid(clone_flags, stack_start, regs, stack_size,
+			parent_tidptr, child_tidptr, 0);
 }
 
 void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|
+				SLAB_DESTROY_BY_RCU|SLAB_UBC,
 			sighand_ctor, NULL);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_with_aux_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	files_cachep = kmem_cache_create("files_cache", 
 			sizeof(struct files_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	fs_cachep = kmem_cache_create("fs_cache", 
 			sizeof(struct fs_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-	vm_area_cachep = kmem_cache_create("vm_area_struct",
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+	__vm_area_cachep = kmem_cache_create("vm_area_struct",
 			sizeof(struct vm_area_struct), 0,
 			SLAB_PANIC, NULL, NULL);
 	mm_cachep = kmem_cache_create("mm_struct",
 			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 }
 
 
@@ -1656,10 +1799,9 @@ static int unshare_fs(unsigned long unsh
  */
 static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
 {
-	struct namespace *ns = current->namespace;
+	struct namespace *ns = current->nsproxy->namespace;
 
-	if ((unshare_flags & CLONE_NEWNS) &&
-	    (ns && atomic_read(&ns->count) > 1)) {
+	if ((unshare_flags & CLONE_NEWNS) && ns) {
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 
@@ -1731,6 +1873,16 @@ static int unshare_semundo(unsigned long
 	return 0;
 }
 
+#ifndef CONFIG_IPC_NS
+static inline int unshare_ipcs(unsigned long flags, struct ipc_namespace **ns)
+{
+	if (flags & CLONE_NEWIPC)
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
 /*
  * unshare allows a process to 'unshare' part of the process
  * context which was originally shared using clone.  copy_*
@@ -1748,14 +1900,22 @@ asmlinkage long sys_unshare(unsigned lon
 	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
 	struct files_struct *fd, *new_fd = NULL;
 	struct sem_undo_list *new_ulist = NULL;
+	struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
+	struct uts_namespace *uts, *new_uts = NULL;
+	struct ipc_namespace *ipc, *new_ipc = NULL;
 
 	check_unshare_flags(&unshare_flags);
 
 	/* Return -EINVAL for all unsupported flags */
 	err = -EINVAL;
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
-				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM))
+				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
+				CLONE_NEWUTS|CLONE_NEWIPC))
 		goto bad_unshare_out;
+#ifdef CONFIG_VE
+	if (unshare_flags & CLONE_NAMESPACES_MASK)
+		goto bad_unshare_out;
+#endif
 
 	if ((err = unshare_thread(unshare_flags)))
 		goto bad_unshare_out;
@@ -1771,11 +1931,32 @@ asmlinkage long sys_unshare(unsigned lon
 		goto bad_unshare_cleanup_vm;
 	if ((err = unshare_semundo(unshare_flags, &new_ulist)))
 		goto bad_unshare_cleanup_fd;
+#ifndef CONFIG_VE
+	if ((err = unshare_utsname(unshare_flags, &new_uts)))
+		goto bad_unshare_cleanup_semundo;
+	if ((err = unshare_ipcs(unshare_flags, &new_ipc)))
+		goto bad_unshare_cleanup_uts;
+#endif
+
+	if (new_ns || new_uts || new_ipc) {
+		old_nsproxy = current->nsproxy;
+		new_nsproxy = dup_namespaces(old_nsproxy);
+		if (!new_nsproxy) {
+			err = -ENOMEM;
+			goto bad_unshare_cleanup_ipc;
+		}
+	}
 
-	if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
+	if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist ||
+				new_uts || new_ipc) {
 
 		task_lock(current);
 
+		if (new_nsproxy) {
+			current->nsproxy = new_nsproxy;
+			new_nsproxy = old_nsproxy;
+		}
+
 		if (new_fs) {
 			fs = current->fs;
 			current->fs = new_fs;
@@ -1783,8 +1964,8 @@ asmlinkage long sys_unshare(unsigned lon
 		}
 
 		if (new_ns) {
-			ns = current->namespace;
-			current->namespace = new_ns;
+			ns = current->nsproxy->namespace;
+			current->nsproxy->namespace = new_ns;
 			new_ns = ns;
 		}
 
@@ -1809,9 +1990,35 @@ asmlinkage long sys_unshare(unsigned lon
 			new_fd = fd;
 		}
 
+		if (new_uts) {
+			uts = current->nsproxy->uts_ns;
+			current->nsproxy->uts_ns = new_uts;
+			new_uts = uts;
+		}
+
+		if (new_ipc) {
+			ipc = current->nsproxy->ipc_ns;
+			current->nsproxy->ipc_ns = new_ipc;
+			new_ipc = ipc;
+		}
+
 		task_unlock(current);
 	}
 
+	if (new_nsproxy)
+		put_nsproxy(new_nsproxy);
+
+bad_unshare_cleanup_ipc:
+#ifndef CONFIG_VE
+	if (new_ipc)
+		put_ipc_ns(new_ipc);
+
+bad_unshare_cleanup_uts:
+	if (new_uts)
+		put_uts_ns(new_uts);
+
+bad_unshare_cleanup_semundo:
+#endif
 bad_unshare_cleanup_fd:
 	if (new_fd)
 		put_files_struct(new_fd);
diff -upr kernel-2.6.18-417.el5.orig/kernel/futex.c kernel-2.6.18-417.el5-028stab121/kernel/futex.c
--- kernel-2.6.18-417.el5.orig/kernel/futex.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/futex.c	2017-01-13 08:40:40.000000000 -0500
@@ -141,6 +141,7 @@ struct futex_q {
 	/* Optional priority inheritance state: */
 	struct futex_pi_state *pi_state;
 	struct task_struct *task;
+	u32 bitset;
 };
 
 /*
@@ -443,7 +444,7 @@ static struct task_struct * futex_find_g
 	struct task_struct *p;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 
 	if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))
 		p = ERR_PTR(-ESRCH);
@@ -549,7 +550,7 @@ lookup_pi_state(u32 uval, struct futex_h
 				 * Bail out if user space manipulated the
 				 * futex value.
 				 */
-				if (pid != pi_state->owner->pid)
+				if (pid != virt_pid(pi_state->owner))
 					return -EINVAL;
 			}
 
@@ -569,6 +570,10 @@ lookup_pi_state(u32 uval, struct futex_h
 	p = futex_find_get_task(pid);
 	if (IS_ERR(p))
 		return PTR_ERR(p);
+	if (unlikely(p == current)) {
+		put_task_struct(p);
+		return -EDEADLK;
+	}
 
 	/*
 	 * We need to look at the task state flags to figure out,
@@ -590,6 +595,18 @@ lookup_pi_state(u32 uval, struct futex_h
 		return ret;
 	}
 
+	read_lock(&tasklist_lock);
+	/* To this moment p can go through do_exit and
+	 * clean its pi_state_list. We are going to recreate it
+	 * and it wil leak. The most obvious solution is to take
+	 * tasklist_lock. Probably, we can use pi_lock for the
+	 * same purpose. _ANK_
+	 */
+	if (p->exit_state) {
+		read_unlock(&tasklist_lock);
+		put_task_struct(p);
+		return -ESRCH;
+	}
 	pi_state = alloc_pi_state();
 
 	/*
@@ -609,6 +626,7 @@ lookup_pi_state(u32 uval, struct futex_h
 	put_task_struct(p);
 
 	me->pi_state = pi_state;
+	read_unlock(&tasklist_lock);
 
 	return 0;
 }
@@ -675,7 +693,7 @@ static int wake_futex_pi(u32 __user *uad
 	 */
 	if (!(uval & FUTEX_OWNER_DIED)) {
 		int ret = 0;
-		newval = FUTEX_WAITERS | new_owner->pid;
+		newval = FUTEX_WAITERS | virt_pid(new_owner);
 
 		inc_preempt_count();
 		curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
@@ -748,7 +766,7 @@ double_lock_hb(struct futex_hash_bucket 
  * to this virtual address:
  */
 static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
-		      int nr_wake)
+		      int nr_wake, u32 bitset)
 {
 	struct futex_hash_bucket *hb;
 	struct futex_q *this, *next;
@@ -756,6 +774,9 @@ static int futex_wake(u32 __user *uaddr,
 	union futex_key key;
 	int ret;
 
+	if (!bitset)
+		return -EINVAL;
+
 	if (fshared)
 		down_read(fshared);
 
@@ -773,6 +794,8 @@ static int futex_wake(u32 __user *uaddr,
 				ret = -EINVAL;
 				break;
 			}
+			if (!(this->bitset & bitset))
+				continue;
 			wake_futex(this);
 			if (++ret >= nr_wake)
 				break;
@@ -1137,7 +1160,7 @@ static void unqueue_me_pi(struct futex_q
 static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared,
 				struct futex_q *q, struct task_struct *newowner)
 {
-	u32 newtid = newowner->pid | FUTEX_WAITERS;
+	u32 newtid = virt_pid(newowner) | FUTEX_WAITERS;
 	struct futex_pi_state *pi_state = q->pi_state;
 	struct task_struct *oldowner = pi_state->owner;
 	u32 uval, curval, newval;
@@ -1234,7 +1257,7 @@ handle_fault:
 }
 
 static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
-		      u32 val, ktime_t *abs_time)
+		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
 {
 	struct task_struct *curr = current;
 	DECLARE_WAITQUEUE(wait, curr);
@@ -1245,7 +1268,11 @@ static int futex_wait(u32 __user *uaddr,
 	struct hrtimer_sleeper t;
 	int rem = 0;
 
+	if (!bitset)
+		return -EINVAL;
+
 	q.pi_state = NULL;
+	q.bitset = bitset;
  retry:
 	if (fshared)
 		down_read(fshared);
@@ -1328,7 +1355,10 @@ static int futex_wait(u32 __user *uaddr,
 		if (!abs_time)
 			schedule();
 		else {
-			hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_ABS);
+			hrtimer_init(&t.timer,
+				     clockrt ? CLOCK_REALTIME :
+				     CLOCK_MONOTONIC,
+				     HRTIMER_ABS);
 			hrtimer_init_sleeper(&t, current);
 			t.timer.expires = *abs_time;
 
@@ -1422,7 +1452,7 @@ static int futex_lock_pi(u32 __user *uad
 	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
 	 * the locks. It will most likely not succeed.
 	 */
-	newval = current->pid;
+	newval = virt_pid(current);
 
 	inc_preempt_count();
 	curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
@@ -1435,7 +1465,7 @@ static int futex_lock_pi(u32 __user *uad
 	 * Detect deadlocks. In case of REQUEUE_PI this is a valid
 	 * situation and we return success to user space.
 	 */
-	if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
+	if (unlikely((curval & FUTEX_TID_MASK) == virt_pid(current))) {
 		ret = -EDEADLK;
 		goto out_unlock_release_sem;
 	}
@@ -1464,7 +1494,7 @@ static int futex_lock_pi(u32 __user *uad
 	 */
 	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
 		/* Keep the OWNER_DIED bit */
-		newval = (curval & ~FUTEX_TID_MASK) | current->pid;
+		newval = (curval & ~FUTEX_TID_MASK) | virt_pid(current);
 		ownerdied = 0;
 		lock_taken = 1;
 	}
@@ -1674,7 +1704,7 @@ retry:
 	/*
 	 * We release only a lock we actually own:
 	 */
-	if ((uval & FUTEX_TID_MASK) != current->pid)
+	if ((uval & FUTEX_TID_MASK) != virt_pid(current))
 		return -EPERM;
 	/*
 	 * First take all the futex related locks:
@@ -1697,7 +1727,7 @@ retry_unlocked:
 	 */
 	if (!(uval & FUTEX_OWNER_DIED)) {
 		inc_preempt_count();
-		uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
+		uval = futex_atomic_cmpxchg_inatomic(uaddr, virt_pid(current), 0);
 		dec_preempt_count();
 	}
 
@@ -1707,7 +1737,7 @@ retry_unlocked:
 	 * Rare case: we managed to release the lock atomically,
 	 * no need to wake anyone else up:
 	 */
-	if (unlikely(uval == current->pid))
+	if (unlikely(uval == virt_pid(current)))
 		goto out_unlock;
 
 	/*
@@ -1938,7 +1968,7 @@ sys_get_robust_list(int pid, struct robu
 
 		ret = -ESRCH;
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 		if (!p)
 			goto err_unlock;
 		ret = -EPERM;
@@ -1971,7 +2001,7 @@ retry:
 	if (get_user(uval, uaddr))
 		return -1;
 
-	if ((uval & FUTEX_TID_MASK) == curr->pid) {
+	if ((uval & FUTEX_TID_MASK) == virt_pid(curr)) {
 		/*
 		 * Ok, this dying thread is truly holding a futex
 		 * of interest. Set the OWNER_DIED bit atomically
@@ -1997,7 +2027,8 @@ retry:
 		 */
 		if (!pi) {
 			if (uval & FUTEX_WAITERS)
-				futex_wake(uaddr, &curr->mm->mmap_sem, 1);
+				futex_wake(uaddr, &curr->mm->mmap_sem, 1,
+						FUTEX_BITSET_MATCH_ANY);
 		}
 	}
 	return 0;
@@ -2091,19 +2122,33 @@ void exit_robust_list(struct task_struct
 long do_futex(u32 __user *uaddr, int op, u32 val, void *timeout,
 		u32 __user *uaddr2, u32 val2, u32 val3)
 {
-	int ret;
+	int clockrt, ret;
 	int cmd = op & FUTEX_CMD_MASK;
 	struct rw_semaphore *fshared = NULL;
+	static int warning_left = 10;
 
 	if (!(op & FUTEX_PRIVATE_FLAG))
 		fshared = &current->mm->mmap_sem;
 
+	if (op & FUTEX_CLOCK_REALTIME) {
+		if (cmd != FUTEX_WAIT_BITSET)
+			return -ENOSYS;
+	}
+
+	clockrt = op & FUTEX_CLOCK_REALTIME;
+	if (clockrt && cmd != FUTEX_WAIT_BITSET)
+		return -ENOSYS;
+
 	switch (cmd) {
 	case FUTEX_WAIT:
-		ret = futex_wait(uaddr, fshared, val, (ktime_t *)timeout);
+		val3 = FUTEX_BITSET_MATCH_ANY;
+	case FUTEX_WAIT_BITSET:
+		ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
 		break;
 	case FUTEX_WAKE:
-		ret = futex_wake(uaddr, fshared, val);
+		val3 = FUTEX_BITSET_MATCH_ANY;
+	case FUTEX_WAKE_BITSET:
+		ret = futex_wake(uaddr, fshared, val, val3);
 		break;
 	case FUTEX_FD:
 		/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
@@ -2128,6 +2173,14 @@ long do_futex(u32 __user *uaddr, int op,
 		ret = futex_lock_pi(uaddr, fshared, 0, (ktime_t *)timeout, 1);
 		break;
 	default:
+		if (warning_left) {
+			printk(KERN_WARNING"Unsupported futex command: "
+					"%d Pid: %d comm: %s CT: %d\n",
+					cmd, current->pid, current->comm,
+					VEID(get_exec_env()));
+			WARN_ON(1);
+			warning_left--;
+		}
 		ret = -ENOSYS;
 	}
 	return ret;
@@ -2143,7 +2196,8 @@ asmlinkage long sys_futex(u32 __user *ua
 	u32 val2 = 0;
 	int cmd = op & FUTEX_CMD_MASK;
 
-	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) {
+	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+			cmd == FUTEX_WAIT_BITSET)) {
 		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
 			return -EFAULT;
 		if (!timespec_valid(&ts))
diff -upr kernel-2.6.18-417.el5.orig/kernel/futex_compat.c kernel-2.6.18-417.el5-028stab121/kernel/futex_compat.c
--- kernel-2.6.18-417.el5.orig/kernel/futex_compat.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/futex_compat.c	2017-01-13 08:40:40.000000000 -0500
@@ -136,7 +136,7 @@ compat_sys_get_robust_list(int pid, comp
 
 		ret = -ESRCH;
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 		if (!p)
 			goto err_unlock;
 		ret = -EPERM;
@@ -166,7 +166,8 @@ asmlinkage long compat_sys_futex(u32 __u
 	int val2 = 0;
 	int cmd = op & FUTEX_CMD_MASK;
 
-	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) {
+	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+			cmd == FUTEX_WAIT_BITSET)) {
 		if (get_compat_timespec(&ts, utime))
 			return -EFAULT;
 		if (!timespec_valid(&ts))
diff -upr kernel-2.6.18-417.el5.orig/kernel/hrtimer.c kernel-2.6.18-417.el5-028stab121/kernel/hrtimer.c
--- kernel-2.6.18-417.el5.orig/kernel/hrtimer.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/hrtimer.c	2017-01-13 08:40:24.000000000 -0500
@@ -484,6 +484,21 @@ hrtimer_start(struct hrtimer *timer, kti
 }
 EXPORT_SYMBOL_GPL(hrtimer_start);
 
+/*
+ * schedule_hrtimer taken from 2.6.16 kernel
+ * needed by CPT
+ */
+ktime_t __sched
+schedule_hrtimer(struct hrtimer *timer, const enum hrtimer_mode mode)
+{
+	hrtimer_start(timer, timer->expires, mode);
+	schedule();
+	hrtimer_cancel(timer);
+
+	return hrtimer_get_remaining(timer);
+}
+
+
 /**
  * hrtimer_try_to_cancel - try to deactivate a timer
  * @timer:	hrtimer to stop
@@ -724,7 +739,7 @@ static int __sched do_nanosleep(struct h
 	return t->task == NULL;
 }
 
-static long __sched nanosleep_restart(struct restart_block *restart)
+long __sched nanosleep_restart(struct restart_block *restart)
 {
 	struct hrtimer_sleeper t;
 	struct timespec __user *rmtp;
@@ -754,6 +769,7 @@ static long __sched nanosleep_restart(st
 	/* The other values in restart are already filled in */
 	return -ERESTART_RESTARTBLOCK;
 }
+EXPORT_SYMBOL_GPL(nanosleep_restart);
 
 long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 		       const enum hrtimer_mode mode, const clockid_t clockid)
diff -upr kernel-2.6.18-417.el5.orig/kernel/hung_task.c kernel-2.6.18-417.el5-028stab121/kernel/hung_task.c
--- kernel-2.6.18-417.el5.orig/kernel/hung_task.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/hung_task.c	2017-01-13 08:40:41.000000000 -0500
@@ -27,12 +27,12 @@ unsigned long __read_mostly sysctl_hung_
  * is disabled during the critical section. It also controls the size of
  * the RCU grace period. So it needs to be upper-bound.
  */
-#define HUNG_TASK_BATCHING 1024
+#define HUNG_TASK_BATCHING 512
 
 /*
  * Zero means infinite timeout - no checking done:
  */
-unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+unsigned long __read_mostly sysctl_hung_task_timeout_secs = 300;
 
 unsigned long __read_mostly sysctl_hung_task_warnings = 10;
 
@@ -142,7 +142,7 @@ static void check_hung_uninterruptible_t
 		return;
 
 	rcu_read_lock();
-	do_each_thread(g, t) {
+	do_each_thread_all(g, t) {
 		if (!--max_count)
 			goto unlock;
 		if (!--batch_count) {
@@ -156,7 +156,7 @@ static void check_hung_uninterruptible_t
 		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
 		if (t->state == TASK_UNINTERRUPTIBLE)
 			check_hung_task(t, timeout);
-	} while_each_thread(g, t);
+	} while_each_thread_all(g, t);
  unlock:
 	rcu_read_unlock();
 }
diff -upr kernel-2.6.18-417.el5.orig/kernel/kallsyms.c kernel-2.6.18-417.el5-028stab121/kernel/kallsyms.c
--- kernel-2.6.18-417.el5.orig/kernel/kallsyms.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/kallsyms.c	2017-01-13 08:40:14.000000000 -0500
@@ -57,6 +57,11 @@ static inline int is_kernel_extratext(un
 
 static inline int is_kernel_text(unsigned long addr)
 {
+#ifdef CONFIG_X86_4G
+	extern unsigned char __start___entry_text, __end___entry_text;
+	if (addr >= (unsigned long)__start___entry_text && addr <= (unsigned long)__end___entry_text)
+		return 1;
+#endif
 	if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext)
 		return 1;
 	return in_gate_area_no_task(addr);
diff -upr kernel-2.6.18-417.el5.orig/kernel/Kconfig.fairsched kernel-2.6.18-417.el5-028stab121/kernel/Kconfig.fairsched
--- kernel-2.6.18-417.el5.orig/kernel/Kconfig.fairsched	2017-01-13 08:40:28.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/Kconfig.fairsched	2017-01-13 08:40:28.000000000 -0500
@@ -0,0 +1,22 @@
+config SCHED_VCPU
+	bool "VCPU scheduler support"
+	default y
+	help
+	  VCPU scheduler support adds additional layer of abstraction
+	  which allows to virtualize cpu notion and split physical cpus
+	  and virtual cpus. This support allows to use CPU fair scheduler,
+	  dynamically add/remove cpus to/from VPS and so on.
+
+config FAIRSCHED
+	bool "Fair CPU scheduler (EXPERIMENTAL)"
+	depends on SCHED_VCPU
+	default SCHED_VCPU
+	help
+	  Config option for Fair CPU scheduler (fairsched).
+	  This option allows to group processes to scheduling nodes
+	  which receive CPU proportional to their weight.
+	  This is very important feature for process groups isolation and
+	  QoS management.
+
+	  If unsure, say N.
+
diff -upr kernel-2.6.18-417.el5.orig/kernel/Kconfig.openvz kernel-2.6.18-417.el5-028stab121/kernel/Kconfig.openvz
--- kernel-2.6.18-417.el5.orig/kernel/Kconfig.openvz	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/Kconfig.openvz	2017-01-13 08:40:25.000000000 -0500
@@ -0,0 +1,89 @@
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+menu "OpenVZ"
+
+config VE
+	bool "Virtual Environment support"
+	default y
+	select IPC_NS
+	select UTS_NS
+	help
+	  This option adds support of virtual Linux running on the original box
+	  with fully supported virtual network driver, tty subsystem and
+	  configurable access for hardware and other resources.
+
+config VE_CALLS
+	tristate "VE calls interface"
+	depends on VE
+	select VZ_DEV
+	default m
+	help
+	  This option controls how to build vzmon code containing VE calls.
+	  By default it's build in module vzmon.o
+
+config VZ_GENCALLS
+	bool
+	default y
+
+config VE_NETDEV
+	tristate "VE network device"
+	depends on VE_CALLS && NET
+	select VZ_DEV
+	default m
+	help
+	  This option controls whether to build venet device. This is a
+	  common interface for networking in VE.
+
+config VE_ETHDEV
+	tristate "Virtual ethernet device"
+	depends on VE_CALLS && NET
+	select VZ_DEV
+	default m
+	help
+	  This option controls whether to build virtual ethernet device.
+
+config VZ_DEV
+	tristate "VE device"
+	default m
+	help
+	  This option adds support of vzdev device, which is used by
+	  user-space applications to control Virtual Environments.
+
+config VE_IPTABLES
+	bool "VE netfiltering"
+	depends on VE && VE_NETDEV && INET && NETFILTER
+	default y
+	help
+	  This option controls whether to build VE netfiltering code.
+
+config VZ_WDOG
+	tristate "VE watchdog module"
+	depends on VE_CALLS
+	default m
+	help
+	  This option controls building of vzwdog module, which dumps
+	  a lot of useful system info on console periodically.
+ 
+config VZ_CHECKPOINT
+ 	tristate "Checkpointing & restoring Virtual Environments"
+ 	depends on VE_CALLS && INET
+ 	default m
+ 	help
+ 	  This option adds two modules, "cpt" and "rst", which allow
+ 	  to save a running Virtual Environment and restore it
+ 	  on another host (live migration) or on the same host (checkpointing).
+
+config VZ_EVENT
+ 	tristate "Enable sending notifications of the VE status change through the netlink socket"
+ 	depends on VE && VE_CALLS && NET
+ 	default m
+ 	help
+ 	  This option provides for sending notifications of the VE
+ 	  events to the curious user space applications through
+ 	  the netlink socket just like the core kernel
+ 	  networking code does. By now just the notifications of
+ 	  the VE essensial status changes are being sent.
+
+endmenu
diff -upr kernel-2.6.18-417.el5.orig/kernel/kmod.c kernel-2.6.18-417.el5-028stab121/kernel/kmod.c
--- kernel-2.6.18-417.el5.orig/kernel/kmod.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/kmod.c	2017-01-13 08:40:19.000000000 -0500
@@ -157,6 +157,10 @@ int request_module(const char *fmt, ...)
 	if (!req_info)
 		return -ENOMEM;
 
+	/* Don't allow request_module() inside VE. */
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	va_start(args, fmt);
 	ret = vsnprintf(req_info->module_name, MODULE_NAME_LEN, fmt, args);
 	va_end(args);
@@ -416,6 +420,9 @@ int call_usermodehelper_keys(char *path,
 	};
 	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
 
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	if (!khelper_wq)
 		return -EBUSY;
 
diff -upr kernel-2.6.18-417.el5.orig/kernel/kprobes.c kernel-2.6.18-417.el5-028stab121/kernel/kprobes.c
--- kernel-2.6.18-417.el5.orig/kernel/kprobes.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/kprobes.c	2017-01-13 08:40:24.000000000 -0500
@@ -38,6 +38,7 @@
 #include <linux/module.h>
 #include <linux/moduleloader.h>
 #include <linux/kallsyms.h>
+#include <linux/mm.h>
 #include <linux/cpu.h>
 #include <linux/sysctl.h>
 #include <asm-generic/sections.h>
diff -upr kernel-2.6.18-417.el5.orig/kernel/ksysfs.c kernel-2.6.18-417.el5-028stab121/kernel/ksysfs.c
--- kernel-2.6.18-417.el5.orig/kernel/ksysfs.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ksysfs.c	2017-01-13 08:40:22.000000000 -0500
@@ -26,7 +26,7 @@ static struct subsys_attribute _name##_a
 /* current uevent sequence number */
 static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page)
 {
-	return sprintf(page, "%llu\n", (unsigned long long)uevent_seqnum);
+	return sprintf(page, "%llu\n", (unsigned long long)ve_uevent_seqnum);
 }
 KERNEL_ATTR_RO(uevent_seqnum);
 
diff -upr kernel-2.6.18-417.el5.orig/kernel/kthread.c kernel-2.6.18-417.el5-028stab121/kernel/kthread.c
--- kernel-2.6.18-417.el5.orig/kernel/kthread.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/kthread.c	2017-01-13 08:40:19.000000000 -0500
@@ -126,7 +126,7 @@ static void keventd_create_kthread(void 
 		struct sched_param param = { .sched_priority = 0 };
 		wait_for_completion(&create->started);
 		read_lock(&tasklist_lock);
-		create->result = find_task_by_pid(pid);
+		create->result = find_task_by_pid_all(pid);
 		read_unlock(&tasklist_lock);
 		/*
 		 * root may have changed our (kthread wq's) priority or CPU
diff -upr kernel-2.6.18-417.el5.orig/kernel/lockdep.c kernel-2.6.18-417.el5-028stab121/kernel/lockdep.c
--- kernel-2.6.18-417.el5.orig/kernel/lockdep.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/lockdep.c	2017-01-13 08:40:20.000000000 -0500
@@ -665,9 +665,9 @@ print_circular_bug_entry(struct lock_lis
 
 static void print_kernel_version(void)
 {
-	printk("%s %.*s\n", system_utsname.release,
-		(int)strcspn(system_utsname.version, " "),
-		system_utsname.version);
+	printk("%s %.*s\n", init_uts_ns.name.release,
+		(int)strcspn(init_uts_ns.name.version, " "),
+		init_uts_ns.name.version);
 }
 
 #ifdef CONFIG_PROVE_LOCKING
@@ -3189,13 +3189,13 @@ retry:
 	if (count != 10)
 		printk(" locked it.\n");
 
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		if (p->lockdep_depth)
 			lockdep_print_held_locks(p);
 		if (!unlock)
 			if (read_trylock(&tasklist_lock))
 				unlock = 1;
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 	printk("\n");
 	printk("=============================================\n\n");
diff -upr kernel-2.6.18-417.el5.orig/kernel/Makefile kernel-2.6.18-417.el5-028stab121/kernel/Makefile
--- kernel-2.6.18-417.el5.orig/kernel/Makefile	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/Makefile	2017-01-13 08:40:28.000000000 -0500
@@ -2,16 +2,21 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y     = sched.o fairsched.o \
+	    fork.o exec_domain.o panic.o printk.o profile.o \
 	    exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
-	    hrtimer.o rwsem.o rh_taint.o pm_qos_params.o
+	    hrtimer.o rwsem.o rh_taint.o pm_qos_params.o nsproxy.o
 
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
+obj-$(CONFIG_USER_RESOURCE) += ub/
+obj-$(CONFIG_VE) += ve/
+obj-$(CONFIG_VZ_CHECKPOINT) += cpt/
+
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
 obj-$(CONFIG_LOCKDEP) += lockdep.o
 ifeq ($(CONFIG_PROC_FS),y)
@@ -49,6 +54,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
diff -upr kernel-2.6.18-417.el5.orig/kernel/module.c kernel-2.6.18-417.el5-028stab121/kernel/module.c
--- kernel-2.6.18-417.el5.orig/kernel/module.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/module.c	2017-01-13 08:40:40.000000000 -0500
@@ -40,6 +40,7 @@
 #include <linux/sched.h>
 #include <linux/mutex.h>
 #include <linux/unwind.h>
+#include <linux/mm.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 #include <asm/cacheflush.h>
@@ -396,6 +397,17 @@ static struct module *find_module(const 
 	return NULL;
 }
 
+struct module *find_module_by_name(const char *name)
+{
+	struct module *mod;
+
+	mutex_lock(&module_mutex);
+	mod = find_module(name);
+	mutex_unlock(&module_mutex);
+	return mod;
+}
+EXPORT_SYMBOL_GPL(find_module_by_name);
+
 #ifdef CONFIG_SMP
 /* Number of blocks used and allocated. */
 static unsigned int pcpu_num_used, pcpu_num_allocated;
@@ -2300,6 +2312,8 @@ static void *m_start(struct seq_file *m,
 	loff_t n = 0;
 
 	mutex_lock(&module_mutex);
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
 	list_for_each(i, &modules) {
 		if (n++ == *pos)
 			break;
diff -upr kernel-2.6.18-417.el5.orig/kernel/mutex.c kernel-2.6.18-417.el5-028stab121/kernel/mutex.c
--- kernel-2.6.18-417.el5.orig/kernel/mutex.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/mutex.c	2017-01-13 08:40:16.000000000 -0500
@@ -208,7 +208,7 @@ mutex_lock_nested(struct mutex *lock, un
 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_);
 }
 
-EXPORT_SYMBOL_GPL(mutex_lock_nested);
+EXPORT_SYMBOL(mutex_lock_nested);
 
 int __sched
 mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
@@ -217,7 +217,7 @@ mutex_lock_interruptible_nested(struct m
 	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_);
 }
 
-EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
+EXPORT_SYMBOL(mutex_lock_interruptible_nested);
 #endif
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/kernel/nsproxy.c kernel-2.6.18-417.el5-028stab121/kernel/nsproxy.c
--- kernel-2.6.18-417.el5.orig/kernel/nsproxy.c	2017-01-13 08:40:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/nsproxy.c	2017-01-13 08:40:15.000000000 -0500
@@ -0,0 +1,159 @@
+/*
+ *  Copyright (C) 2006 IBM Corporation
+ *
+ *  Author: Serge Hallyn <serue@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ *
+ *  Jun 2006 - namespaces support
+ *             OpenVZ, SWsoft Inc.
+ *             Pavel Emelianov <xemul@openvz.org>
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/nsproxy.h>
+#include <linux/namespace.h>
+#include <linux/utsname.h>
+
+void exit_task_namespaces(struct task_struct *p)
+{
+	struct nsproxy *ns = p->nsproxy;
+	if (ns) {
+		task_lock(p);
+		p->nsproxy = NULL;
+		task_unlock(p);
+		put_nsproxy(ns);
+	}
+}
+
+void get_task_namespaces(struct task_struct *tsk)
+{
+	struct nsproxy *ns = tsk->nsproxy;
+	if (ns) {
+		get_nsproxy(ns);
+	}
+}
+
+/*
+ * creates a copy of "orig" with refcount 1.
+ * This does not grab references to the contained namespaces,
+ * so that needs to be done by dup_namespaces.
+ */
+static inline struct nsproxy *clone_namespaces(struct nsproxy *orig)
+{
+	struct nsproxy *ns;
+
+	ns = kmalloc(sizeof(struct nsproxy), GFP_KERNEL);
+	if (ns) {
+		memcpy(ns, orig, sizeof(struct nsproxy));
+		atomic_set(&ns->count, 1);
+	}
+	return ns;
+}
+
+/*
+ * copies the nsproxy, setting refcount to 1, and grabbing a
+ * reference to all contained namespaces.  Called from
+ * sys_unshare()
+ */
+struct nsproxy *dup_namespaces(struct nsproxy *orig)
+{
+	struct nsproxy *ns = clone_namespaces(orig);
+
+	if (ns) {
+		if (ns->namespace)
+			get_namespace(ns->namespace);
+		if (ns->uts_ns)
+			get_uts_ns(ns->uts_ns);
+		if (ns->ipc_ns)
+			get_ipc_ns(ns->ipc_ns);
+	}
+
+	return ns;
+}
+
+/*
+ * called from clone.  This now handles copy for nsproxy and all
+ * namespaces therein.
+ */
+int copy_namespaces(int flags, struct task_struct *tsk)
+{
+	struct nsproxy *old_ns = tsk->nsproxy;
+	struct nsproxy *new_ns;
+	int err = 0;
+
+	if (!old_ns)
+		return 0;
+
+	get_nsproxy(old_ns);
+
+	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
+		return 0;
+
+	new_ns = clone_namespaces(old_ns);
+	if (!new_ns) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	tsk->nsproxy = new_ns;
+
+	err = copy_namespace(flags, tsk);
+	if (err)
+		goto out_ns;
+
+	err = copy_utsname(flags, tsk);
+	if (err)
+		goto out_uts;
+
+	err = copy_ipcs(flags, tsk);
+	if (err)
+		goto out_ipc;
+
+out:
+	put_nsproxy(old_ns);
+	return err;
+
+out_ipc:
+	if (new_ns->uts_ns)
+		put_uts_ns(new_ns->uts_ns);
+out_uts:
+	if (new_ns->namespace)
+		put_namespace(new_ns->namespace);
+out_ns:
+	tsk->nsproxy = old_ns;
+	kfree(new_ns);
+	goto out;
+}
+EXPORT_SYMBOL(copy_namespaces);
+
+void free_nsproxy(struct nsproxy *ns)
+{
+		if (ns->namespace)
+			put_namespace(ns->namespace);
+		if (ns->uts_ns)
+			put_uts_ns(ns->uts_ns);
+		if (ns->ipc_ns)
+			put_ipc_ns(ns->ipc_ns);
+		kfree(ns);
+}
+EXPORT_SYMBOL(free_nsproxy);
+
+struct namespace * get_task_mnt_ns(struct task_struct *tsk)
+{
+	struct namespace *mnt_ns = NULL;
+
+	task_lock(tsk);
+	if (tsk->nsproxy)
+		mnt_ns = tsk->nsproxy->namespace;
+	if (mnt_ns)
+		get_namespace(mnt_ns);
+	task_unlock(tsk);
+
+	return mnt_ns;
+}
+EXPORT_SYMBOL(get_task_mnt_ns);
diff -upr kernel-2.6.18-417.el5.orig/kernel/panic.c kernel-2.6.18-417.el5-028stab121/kernel/panic.c
--- kernel-2.6.18-417.el5.orig/kernel/panic.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/panic.c	2017-01-13 08:40:32.000000000 -0500
@@ -21,12 +21,14 @@
 #include <linux/debug_locks.h>
 
 int panic_on_oops = 1;
-int tainted;
+int tainted = 1;
 static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
 
 int panic_timeout;
+int kernel_text_csum_broken;
+EXPORT_SYMBOL(kernel_text_csum_broken);
 
 ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
 
@@ -183,7 +185,8 @@ const char *print_tainted(void)
 {
 	static char buf[40];
 	if (tainted) {
-		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c-------------------- %c%c",
+		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c-------------------- %c%c",
+			kernel_text_csum_broken ? 'B' : ' ',
 			tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
 			tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
 			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
diff -upr kernel-2.6.18-417.el5.orig/kernel/pid.c kernel-2.6.18-417.el5-028stab121/kernel/pid.c
--- kernel-2.6.18-417.el5.orig/kernel/pid.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/pid.c	2017-01-13 08:40:31.000000000 -0500
@@ -22,11 +22,20 @@
 
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/kmem_cache.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/hash.h>
 
+#ifdef CONFIG_VE
+int glob_virt_pids = 1;
+EXPORT_SYMBOL(glob_virt_pids);
+#endif
+
+#include <linux/kmem_cache.h>
+#include <ub/ub_mem.h>
+
 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
 static struct hlist_head *pid_hash;
 static int pidhash_shift;
@@ -41,26 +50,14 @@ EXPORT_SYMBOL_GPL(last_pid);
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;
 
-#define PIDMAP_ENTRIES		((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
-#define BITS_PER_PAGE		(PAGE_SIZE*8)
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
 #define mk_pid(map, off)	(((map) - pidmap_array)*BITS_PER_PAGE + (off))
+#define mk_vpid(ve, map, off)	(((map) - (ve)->ve_pidmap)*BITS_PER_PAGE +(off))
 #define find_next_offset(map, off)					\
 		find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
 
-/*
- * PID-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way a low pid_max
- * value does not cause lots of bitmaps to be allocated, but
- * the scheme scales to up to 4 million PIDs, runtime.
- */
-typedef struct pidmap {
-	atomic_t nr_free;
-	void *page;
-} pidmap_t;
-
 static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
-	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
+	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(PIDMAP_NRFREE), NULL } };
 
 /*
  * Note: disable interrupts while the pidmap_lock is held as an
@@ -77,12 +74,15 @@ static pidmap_t pidmap_array[PIDMAP_ENTR
  */
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
-static fastcall void free_pidmap(int pid)
+fastcall void free_pidmap(int pid)
 {
 	pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
 	int offset = pid & BITS_PER_PAGE_MASK;
 
-	clear_bit(offset, map->page);
+	BUG_ON(__is_virtual_pid(pid) || pid == 1);
+
+	if (test_and_clear_bit(offset, map->page) == 0)
+		BUG();
 	atomic_inc(&map->nr_free);
 }
 
@@ -123,33 +123,44 @@ static void set_last_pid(int *last, int 
 	} while ((prev != last_write) && (pid_before(base, last_write, pid)));
 }
 
-static int alloc_pidmap(void)
+static inline int alloc_pidmap_page(pidmap_t *map)
+{
+	if (unlikely(!map->page)) {
+		unsigned long page = get_zeroed_page(GFP_KERNEL);
+		/*
+		 * Free the page if someone raced with us
+		 * installing it:
+		 */
+		spin_lock_irq(&pidmap_lock);
+		if (map->page)
+			free_page(page);
+		else
+			map->page = (void *)page;
+		spin_unlock_irq(&pidmap_lock);
+		if (unlikely(!map->page))
+			return -ENOMEM;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(free_pidmap);
+
+int alloc_pidmap(void)
 {
 	int i, offset, max_scan, pid, last = last_pid;
 	pidmap_t *map;
 
 	pid = last + 1;
+	if (__is_virtual_pid(pid))
+		pid += VPID_DIV;
 	if (pid >= pid_max)
 		pid = RESERVED_PIDS;
 	offset = pid & BITS_PER_PAGE_MASK;
 	map = &pidmap_array[pid/BITS_PER_PAGE];
 	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
 	for (i = 0; i <= max_scan; ++i) {
-		if (unlikely(!map->page)) {
-			unsigned long page = get_zeroed_page(GFP_KERNEL);
-			/*
-			 * Free the page if someone raced with us
-			 * installing it:
-			 */
-			spin_lock_irq(&pidmap_lock);
-			if (map->page)
-				free_page(page);
-			else
-				map->page = (void *)page;
-			spin_unlock_irq(&pidmap_lock);
-			if (unlikely(!map->page))
-				break;
-		}
+		if (unlikely(alloc_pidmap_page(map)))
+			break;
+
 		if (likely(atomic_read(&map->nr_free))) {
 			do {
 				if (!test_and_set_bit(offset, map->page)) {
@@ -158,6 +169,8 @@ static int alloc_pidmap(void)
 					return pid;
 				}
 				offset = find_next_offset(map, offset);
+				if (__is_virtual_pid(offset))
+					offset += VPID_DIV;
 				pid = mk_pid(map, offset);
 			/*
 			 * find_next_offset() found a bit, the pid from it
@@ -183,6 +196,107 @@ static int alloc_pidmap(void)
 	return -1;
 }
 
+#ifdef CONFIG_VE
+static int set_vpidmap(struct ve_struct *ve, int vpid)
+{
+	int offset;
+	pidmap_t *map;
+
+	offset = vpid & BITS_PER_PAGE_MASK;
+	map = ve->ve_pidmap + vpid / BITS_PER_PAGE;
+	ve->last_vpid = vpid;
+
+	if (unlikely(alloc_pidmap_page(map)))
+		return -ENOMEM;
+
+	if (test_and_set_bit(offset, map->page))
+		return -EEXIST;
+
+	atomic_dec(&map->nr_free);
+	return vpid;
+}
+
+static void free_vpidmap(struct ve_struct *ve, int vpid)
+{
+	int offset;
+	pidmap_t *map;
+
+	offset = vpid & BITS_PER_PAGE_MASK;
+	map = ve->ve_pidmap + vpid / BITS_PER_PAGE;
+
+	if (test_and_clear_bit(offset, map->page) == 0)
+		BUG();
+
+	atomic_inc(&map->nr_free);
+}
+
+static int alloc_vpidmap(struct ve_struct *ve)
+{
+	int i, offset, max_scan, vpid, last = ve->last_vpid;
+	pidmap_t *map;
+	int vpid_max = ve->vpid_max;
+
+	vpid = last + 1;
+	if (!__is_virtual_pid(vpid))
+		vpid += VPID_DIV;
+	if (vpid >= vpid_max)
+		vpid = VPID_DIV;
+	offset = vpid & BITS_PER_PAGE_MASK;
+	map = &ve->ve_pidmap[vpid/BITS_PER_PAGE];
+	max_scan = (vpid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
+	for (i = 0; i <= max_scan; ++i) {
+		if (unlikely(alloc_pidmap_page(map)))
+			break;
+
+		if (likely(atomic_read(&map->nr_free))) {
+			while (1) {
+				if (!test_and_set_bit(offset, map->page)) {
+					atomic_dec(&map->nr_free);
+					ve->last_vpid = vpid;
+					return vpid;
+				}
+
+				offset = find_next_offset(map, offset);
+				if (!__is_virtual_pid(offset))
+					offset += VPID_DIV;
+				if (offset > BITS_PER_PAGE)
+					break;
+				vpid = mk_vpid(ve, map, offset);
+			}
+		}
+		if (map < &ve->ve_pidmap[(vpid_max-1)/BITS_PER_PAGE]) {
+			++map;
+			offset = 0;
+		} else {
+			map = &ve->ve_pidmap[0];
+			offset = VPID_DIV;
+		}
+
+		vpid = mk_vpid(ve, map, offset);
+	}
+	return -1;
+}
+
+static int next_vpidmap(struct ve_struct *ve, int vnr)
+{
+	int offset;
+	pidmap_t *map;
+
+	offset = (vnr + 1) & BITS_PER_PAGE_MASK;
+	map = &ve->ve_pidmap[(vnr + 1) / BITS_PER_PAGE];
+
+	for (; map < &ve->ve_pidmap[PIDMAP_ENTRIES]; map++, offset = 0) {
+		if (unlikely(map->page == NULL))
+			continue;
+
+		offset = find_next_bit(map->page, BITS_PER_PAGE, offset);
+		if (offset < BITS_PER_PAGE)
+			return mk_vpid(ve, map, offset);
+	}
+	return -1;
+}
+#endif
+
 static int next_pidmap(unsigned int last)
 {
 	int offset;
@@ -202,6 +316,7 @@ static int next_pidmap(unsigned int last
 	}
 	return -1;
 }
+EXPORT_SYMBOL_GPL(alloc_pidmap);
 
 fastcall void put_pid(struct pid *pid)
 {
@@ -218,24 +333,41 @@ static void delayed_put_pid(struct rcu_h
 	put_pid(pid);
 }
 
-fastcall void free_pid(struct pid *pid)
+static fastcall void __free_pid(struct pid *pid, struct ve_struct *ve)
 {
 	/* We can be called with write_lock_irq(&tasklist_lock) held */
 	unsigned long flags;
 
 	spin_lock_irqsave(&pidmap_lock, flags);
 	hlist_del_rcu(&pid->pid_chain);
-	spin_unlock_irqrestore(&pidmap_lock, flags);
+#ifdef CONFIG_VE
+	if (pid->veid) {
+		BUG_ON(pid->veid != ve->veid);
+		hlist_del_rcu(&pid->vpid_chain);
+		free_vpidmap(ve, pid->vnr);
+	}
+#endif
+	spin_unlock(&pidmap_lock);
+	ub_kmemsize_uncharge(pid->ub, CHARGE_SIZE(pid_cachep->objuse));
+	local_irq_restore(flags);
 
 	free_pidmap(pid->nr);
+	put_beancounter(pid->ub);
 	call_rcu(&pid->rcu, delayed_put_pid);
 }
 
+fastcall void free_pid(struct pid *pid)
+{
+	__free_pid(pid, get_exec_env());
+}
+EXPORT_SYMBOL_GPL(free_pid);
+
 struct pid *alloc_pid(void)
 {
 	struct pid *pid;
 	enum pid_type type;
 	int nr = -1;
+	struct user_beancounter *ub;
 
 	pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL);
 	if (!pid)
@@ -246,22 +378,41 @@ struct pid *alloc_pid(void)
 		goto out_free;
 
 	atomic_set(&pid->count, 1);
-	pid->nr = nr;
+	pid->nr = pid->vnr = nr;
 	for (type = 0; type < PIDTYPE_MAX; ++type)
 		INIT_HLIST_HEAD(&pid->tasks[type]);
+#ifdef CONFIG_VE
+	pid->vnr = nr;
+	pid->veid = 0;
+	INIT_HLIST_NODE(&pid->vpid_chain);
+#endif
+	local_irq_disable();
+#ifdef CONFIG_USER_RESOURCE
+	ub = get_exec_ub();
+	if (ub_kmemsize_charge(ub, CHARGE_SIZE(pid_cachep->objuse), UB_HARD))
+		goto out_free_map;
 
-	spin_lock_irq(&pidmap_lock);
+	pid->ub = get_beancounter(ub);
+#endif
+
+	spin_lock(&pidmap_lock);
 	hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
 	spin_unlock_irq(&pidmap_lock);
 
 out:
 	return pid;
 
+#ifdef CONFIG_USER_RESOURCE
+out_free_map:
+	local_irq_enable();
+	free_pidmap(nr);
+#endif
 out_free:
 	kmem_cache_free(pid_cachep, pid);
 	pid = NULL;
 	goto out;
 }
+EXPORT_SYMBOL_GPL(alloc_pid);
 
 struct pid * fastcall find_pid(int nr)
 {
@@ -275,6 +426,17 @@ struct pid * fastcall find_pid(int nr)
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(find_pid);
+
+static struct pid *__lookup_vpid_mapping(int vnr, int veid);
+
+struct pid * fastcall find_vpid(int nr)
+{
+	return (!is_virtual_pid(nr) ? find_pid(nr) :
+			__lookup_vpid_mapping(nr, VEID(get_exec_env())));
+}
+
+EXPORT_SYMBOL(find_vpid);
 
 int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
 {
@@ -290,6 +452,7 @@ int fastcall attach_pid(struct task_stru
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(attach_pid);
 
 void fastcall detach_pid(struct task_struct *task, enum pid_type type)
 {
@@ -307,8 +470,9 @@ void fastcall detach_pid(struct task_str
 		if (!hlist_empty(&pid->tasks[tmp]))
 			return;
 
-	free_pid(pid);
+	__free_pid(pid, task->ve_task_info.owner_env);
 }
+EXPORT_SYMBOL_GPL(detach_pid);
 
 struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
 {
@@ -321,17 +485,56 @@ struct task_struct * fastcall pid_task(s
 	}
 	return result;
 }
+EXPORT_SYMBOL(pid_task);
 
 /*
  * Must be called under rcu_read_lock() or with tasklist_lock read-held.
+ * This function shouldn't be used, but proprietary ATI video driver uses it.
  */
 struct task_struct *find_task_by_pid_type(int type, int nr)
 {
-	return pid_task(find_pid(nr), type);
+	static int warning_first = 1;
+
+	if (warning_first) {
+		warning_first = 0;
+		printk(KERN_ERR "%s: deprecated find_task_by_pid function is "
+			"used. If it is called by the proprietary ATI video driver it is ok, "
+			"otherwise please report the case to users@openvz.org\n", current->comm);
+		dump_stack();
+	}
+	return find_task_by_pid_type_ve(type, nr);
 }
 
 EXPORT_SYMBOL(find_task_by_pid_type);
 
+struct task_struct *find_task_by_pid_type_all(int type, int nr)
+{
+	BUG_ON(nr != -1 && is_virtual_pid(nr));
+	return pid_task(find_pid(nr), type);
+}
+
+EXPORT_SYMBOL(find_task_by_pid_type_all);
+
+#ifdef CONFIG_VE
+
+struct task_struct *find_task_by_pid_type_ve(int type, int nr)
+{
+	struct task_struct *tsk;
+	struct pid *pid;
+
+	pid = find_vpid(nr);
+	if (!pid)
+		return NULL;
+
+	tsk = pid_task(pid, type);
+	return (tsk != NULL && ve_accessible(VE_TASK_INFO(tsk)->owner_env,
+			get_exec_env()) ? tsk : NULL);
+}
+
+EXPORT_SYMBOL(find_task_by_pid_type_ve);
+
+#endif
+
 struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type)
 {
 	struct task_struct *result;
@@ -354,15 +557,34 @@ struct pid *find_get_pid(pid_t nr)
 	return pid;
 }
 
+#ifdef CONFIG_VE
+static struct pid *find_ge_pid_ve(int nr, struct ve_struct *ve)
+{
+	struct pid *pid;
+
+	do {
+		pid = __lookup_vpid_mapping(nr, ve->veid);
+		if (pid)
+			break;
+		nr = next_vpidmap(ve, nr);
+	} while (nr > 0);
+
+	return pid;
+}
+#endif
+
 /*
  * Used by proc to find the first pid that is greater then or equal to nr.
  *
  * If there is a pid at nr this function is exactly the same as find_pid.
  */
-struct pid *find_ge_pid(int nr)
+struct pid *find_ge_pid(int nr, struct ve_struct *ve)
 {
 	struct pid *pid;
 
+	if (!ve_is_super(ve))
+		return find_ge_pid_ve(nr, ve);
+
 	do {
 		pid = find_pid(nr);
 		if (pid)
@@ -373,6 +595,229 @@ struct pid *find_ge_pid(int nr)
 	return pid;
 }
 
+#ifdef CONFIG_VE
+
+/* Virtual PID bits.
+ *
+ * At the moment all internal structures in kernel store real global pid.
+ * The only place, where virtual PID is used, is at user frontend. We
+ * remap virtual pids obtained from user to global ones (vpid_to_pid) and
+ * map globals to virtuals before showing them to user (virt_pid_type).
+ *
+ * We hold virtual PIDs inside struct pid, so map global -> virtual is easy.
+ */
+
+pid_t _pid_to_vpid(pid_t pid)
+{
+	struct pid * p;
+
+	if (unlikely(is_virtual_pid(pid)))
+		return -1;
+
+	rcu_read_lock();
+	p = find_pid(pid);
+	pid = (p != NULL ? p->vnr : -1);
+	rcu_read_unlock();
+	return pid;
+}
+EXPORT_SYMBOL_GPL(_pid_to_vpid);
+
+pid_t pid_to_vpid(pid_t pid)
+{
+	int vpid;
+
+	if (unlikely(pid <= 0))
+		return pid;
+
+	BUG_ON(is_virtual_pid(pid));
+
+	if (ve_is_super(get_exec_env()))
+		return pid;
+
+	vpid = _pid_to_vpid(pid);
+	if (unlikely(vpid == -1))
+		/* It is allowed: global pid can be used everywhere.
+		 * This can happen, when kernel remembers stray pids:
+		 * signal queues, locks etc.
+		 */
+		vpid = pid;
+
+	return vpid;
+}
+EXPORT_SYMBOL_GPL(pid_to_vpid);
+
+/* To map virtual pids to global we maintain special hash table.
+ *
+ * Mapping entries are allocated when a process with non-trivial
+ * mapping is forked, which is possible only after VE migrated.
+ * Mappings are destroyed, when a global pid is removed from global
+ * pidmap, which means we do not need to refcount mappings.
+ */
+
+static struct hlist_head *vpid_hash;
+
+static inline int vpid_hashfn(int vnr, int veid)
+{
+	return hash_long((unsigned long)(vnr+(veid<<16)), pidhash_shift);
+}
+
+static struct pid *__lookup_vpid_mapping(int vnr, int veid)
+{
+	struct hlist_node *elem;
+	struct pid *map;
+
+	hlist_for_each_entry_rcu(map, elem,
+			&vpid_hash[vpid_hashfn(vnr, veid)], vpid_chain) {
+		if (map->vnr == vnr && map->veid == veid)
+			return map;
+	}
+	return NULL;
+}
+
+
+static pid_t  __vpid_to_pid_ve(pid_t pid, struct ve_struct *env)
+{
+	struct pid *map;
+
+	if (!env->sparse_vpid) {
+		int init_pid;
+
+		init_pid = env->init_entry->pid;
+		if (pid == 1)
+			return init_pid;
+		if (pid == init_pid + VPID_DIV)
+			return -1; /* vpid of init is 1 */
+		return pid - VPID_DIV;
+	}
+
+	rcu_read_lock();
+	map = __lookup_vpid_mapping(pid, VEID(env));
+	pid = (map != NULL ? map->nr : -1);
+	rcu_read_unlock();
+	return pid;
+}
+
+pid_t vpid_to_pid_ve(pid_t pid, struct ve_struct *env)
+{
+	if (unlikely(ve_is_super(env)))
+		return -1;
+
+	read_lock(&tasklist_lock);
+	pid = __vpid_to_pid_ve(pid, env);
+	read_unlock(&tasklist_lock);
+
+	return pid;
+}
+
+/* __vpid_to_pid() is raw version of vpid_to_pid(). It is to be used
+ * only under tasklist_lock. In some places we must use only this version
+ * (f.e. __kill_pg_info is called under write lock!)
+ *
+ * Caller should pass virtual pid. This function returns an error, when
+ * seeing a global pid.
+ */
+pid_t __vpid_to_pid(pid_t pid)
+{
+	if (unlikely(!is_virtual_pid(pid) || ve_is_super(get_exec_env())))
+		return -1;
+	return  __vpid_to_pid_ve(pid, get_exec_env());
+}
+EXPORT_SYMBOL_GPL(__vpid_to_pid);
+
+pid_t vpid_to_pid(pid_t pid)
+{
+	/* User gave bad pid. It is his problem. */
+	if (unlikely(pid <= 0))
+		return pid;
+
+	if (!is_virtual_pid(pid))
+		return pid;
+
+	return __vpid_to_pid(pid);
+}
+EXPORT_SYMBOL_GPL(vpid_to_pid);
+
+pid_t alloc_vpid(struct pid *pid, pid_t virt_pid)
+{
+	int result;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+	if (ve_is_super(ve))
+		return pid->vnr;
+
+	if (!ve->virt_pids)
+		return set_vpidmap(ve, pid->vnr);
+
+	if (!ve->sparse_vpid) {
+		result = pid->nr + VPID_DIV;
+
+		if (result > ve->vpid_max)
+			goto sparse;
+
+		if (virt_pid == -1)
+			goto out_set_map;
+
+		result = virt_pid;
+		if (virt_pid == 1 || virt_pid == pid->nr + VPID_DIV)
+			goto out_set_map;
+
+sparse:
+		ve->sparse_vpid = 1;
+	}
+
+	result = virt_pid;
+	if (unlikely(virt_pid != -1))
+		goto out_set_map;
+
+raced:
+	result = alloc_vpidmap(ve);
+	if (unlikely(result < 0))
+		return -ENOMEM;
+
+out_set:
+	pid->veid = ve->veid;
+	pid->vnr = result;
+
+	spin_lock_irq(&pidmap_lock);
+	hlist_add_head_rcu(&pid->vpid_chain,
+			&vpid_hash[vpid_hashfn(result, pid->veid)]);
+	spin_unlock_irq(&pidmap_lock);
+	return result;
+
+out_set_map:
+	result = set_vpidmap(ve, result);
+	if (unlikely(result < 0)) {
+		if (result == -EEXIST) {
+			if (virt_pid != -1)
+				return -EEXIST;
+
+			if (ve->sparse_vpid)
+				goto raced;
+		}
+
+		return result;
+	}
+	goto out_set;
+}
+EXPORT_SYMBOL(alloc_vpid);
+
+void free_vpid(struct pid * pid)
+{
+	if (pid->veid == 0)
+		return;
+
+	spin_lock_irq(&pidmap_lock);
+	hlist_del_rcu(&pid->vpid_chain);
+	spin_unlock_irq(&pidmap_lock);
+
+	free_vpidmap(get_exec_env(), pid->vnr);
+	pid->veid = 0;
+	pid->vnr = pid->nr;
+}
+EXPORT_SYMBOL(free_vpid);
+#endif
+
 /*
  * The pid hash table is scaled according to the amount of memory in the
  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
@@ -396,6 +841,14 @@ void __init pidhash_init(void)
 		panic("Could not alloc pidhash!\n");
 	for (i = 0; i < pidhash_size; i++)
 		INIT_HLIST_HEAD(&pid_hash[i]);
+ 
+#ifdef CONFIG_VE
+	vpid_hash = alloc_bootmem(pidhash_size * sizeof(struct hlist_head));
+	if (!vpid_hash)
+		panic("Could not alloc vpid_hash!\n");
+	for (i = 0; i < pidhash_size; i++)
+		INIT_HLIST_HEAD(&vpid_hash[i]);
+#endif
 }
 
 void __init pidmap_init(void)
diff -upr kernel-2.6.18-417.el5.orig/kernel/posix-cpu-timers.c kernel-2.6.18-417.el5-028stab121/kernel/posix-cpu-timers.c
--- kernel-2.6.18-417.el5.orig/kernel/posix-cpu-timers.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/posix-cpu-timers.c	2017-01-13 08:40:26.000000000 -0500
@@ -6,6 +6,7 @@
 #include <linux/posix-timers.h>
 #include <linux/errno.h>
 #include <asm/uaccess.h>
+#include <linux/module.h>
 
 static int check_clock(const clockid_t which_clock)
 {
@@ -20,7 +21,7 @@ static int check_clock(const clockid_t w
 		return 0;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p || (CPUCLOCK_PERTHREAD(which_clock) ?
 		   p->tgid != current->tgid : p->tgid != pid)) {
 		error = -EINVAL;
@@ -303,7 +304,7 @@ int posix_cpu_clock_get(const clockid_t 
 		 */
 		struct task_struct *p;
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 		if (p) {
 			if (CPUCLOCK_PERTHREAD(which_clock)) {
 				if (p->tgid == current->tgid) {
@@ -347,7 +348,7 @@ int posix_cpu_timer_create(struct k_itim
 		if (pid == 0) {
 			p = current;
 		} else {
-			p = find_task_by_pid(pid);
+			p = find_task_by_pid_ve(pid);
 			if (p && p->tgid != current->tgid)
 				p = NULL;
 		}
@@ -355,7 +356,7 @@ int posix_cpu_timer_create(struct k_itim
 		if (pid == 0) {
 			p = current->group_leader;
 		} else {
-			p = find_task_by_pid(pid);
+			p = find_task_by_pid_ve(pid);
 			if (p && p->tgid != pid)
 				p = NULL;
 		}
@@ -399,14 +400,21 @@ int posix_cpu_timer_del(struct k_itimer 
 			spin_unlock(&p->sighand->siglock);
 		}
 		read_unlock(&tasklist_lock);
-
-		if (!ret)
-			put_task_struct(p);
 	}
 
+	if (!ret)
+		posix_cpu_timer_cleanup(timer);
+
 	return ret;
 }
 
+void posix_cpu_timer_cleanup(struct k_itimer *timer)
+{
+	struct task_struct *p = timer->it.cpu.task;
+	if (likely(p != NULL))
+		put_task_struct(p);
+}
+
 /*
  * Clean out CPU timers still ticking when a thread exited.  The task
  * pointer is cleared, and the expiry time is replaced with the residual
@@ -1427,6 +1435,7 @@ void set_process_cpu_timer(struct task_s
 		process_timer_rebalance(tsk, clock_idx, expires, now);
 	}
 }
+EXPORT_SYMBOL(set_process_cpu_timer);
 
 static long posix_cpu_clock_nanosleep_restart(struct restart_block *);
 
diff -upr kernel-2.6.18-417.el5.orig/kernel/posix-timers.c kernel-2.6.18-417.el5-028stab121/kernel/posix-timers.c
--- kernel-2.6.18-417.el5.orig/kernel/posix-timers.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/posix-timers.c	2017-01-13 08:40:19.000000000 -0500
@@ -31,6 +31,7 @@
  * POSIX clocks & timers
  */
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
@@ -49,6 +50,8 @@
 #include <linux/workqueue.h>
 #include <linux/module.h>
 
+#include <ub/beancounter.h>
+
 /*
  * Management arrays for POSIX timers.	 Timers are kept in slab memory
  * Timer ids are allocated by an external routine that keeps track of the
@@ -199,6 +202,10 @@ static int common_timer_create(struct k_
 	return 0;
 }
 
+static inline void common_timer_cleanup(struct k_itimer *timer)
+{
+}
+
 /*
  * Return nonzero if we know a priori this clockid_t value is bogus.
  */
@@ -242,7 +249,8 @@ static __init int init_posix_timers(void
 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
 
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
-					sizeof (struct k_itimer), 0, 0, NULL, NULL);
+					sizeof (struct k_itimer), 0,
+					SLAB_UBC, NULL, NULL);
 	idr_init(&posix_timers_id);
 	return 0;
 }
@@ -298,6 +306,13 @@ void do_schedule_next_timer(struct sigin
 
 int posix_timer_event(struct k_itimer *timr, int si_private)
 {
+	int ret;
+	struct ve_struct *ve;
+	struct user_beancounter *ub;
+
+	ve = set_exec_env(timr->it_process->ve_task_info.owner_env);
+	ub = set_exec_ub(timr->it_process->task_bc.task_ub);
+
 	/*
 	 * FIXME: if ->sigq is queued we can race with
 	 * dequeue_signal()->do_schedule_next_timer().
@@ -318,11 +333,11 @@ int posix_timer_event(struct k_itimer *t
 
 	if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
 		struct task_struct *leader;
-		int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
+		ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
 					timr->it_process);
 
 		if (likely(ret >= 0))
-			return ret;
+			goto out;
 
 		timr->it_sigev_notify = SIGEV_SIGNAL;
 		leader = timr->it_process->group_leader;
@@ -330,8 +345,12 @@ int posix_timer_event(struct k_itimer *t
 		timr->it_process = leader;
 	}
 
-	return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
+	ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
 				   timr->it_process);
+out:
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(ve);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(posix_timer_event);
 
@@ -380,7 +399,7 @@ static struct task_struct * good_sigeven
 	struct task_struct *rtn = current->group_leader;
 
 	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
-		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
+		(!(rtn = find_task_by_pid_ve(event->sigev_notify_thread_id)) ||
 		 rtn->tgid != current->tgid ||
 		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
 		return NULL;
@@ -494,12 +513,12 @@ sys_timer_create(const clockid_t which_c
 	if (copy_to_user(created_timer_id,
 			 &new_timer_id, sizeof (new_timer_id))) {
 		error = -EFAULT;
-		goto out;
+		goto out_cleanup;
 	}
 	if (timer_event_spec) {
 		if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
 			error = -EFAULT;
-			goto out;
+			goto out_cleanup;
 		}
 		new_timer->it_sigev_notify = event.sigev_notify;
 		new_timer->it_sigev_signo = event.sigev_signo;
@@ -538,7 +557,7 @@ sys_timer_create(const clockid_t which_c
 		read_unlock(&tasklist_lock);
 		if (!process) {
 			error = -EINVAL;
-			goto out;
+			goto out_cleanup;
 		}
 	} else {
 		new_timer->it_sigev_notify = SIGEV_SIGNAL;
@@ -557,10 +576,12 @@ sys_timer_create(const clockid_t which_c
 	 * and may cease to exist at any time.  Don't use or modify
 	 * new_timer after the unlock call.
 	 */
+	return 0;
 
+out_cleanup:
+	CLOCK_DISPATCH(new_timer->it_clock, timer_cleanup, (new_timer));
 out:
-	if (error)
-		release_posix_timer(new_timer, it_id_set);
+	release_posix_timer(new_timer, it_id_set);
 
 	return error;
 }
diff -upr kernel-2.6.18-417.el5.orig/kernel/power/Kconfig kernel-2.6.18-417.el5-028stab121/kernel/power/Kconfig
--- kernel-2.6.18-417.el5.orig/kernel/power/Kconfig	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/power/Kconfig	2017-01-13 08:40:24.000000000 -0500
@@ -56,7 +56,7 @@ config PM_TRACE
 
 config SOFTWARE_SUSPEND
 	bool "Software Suspend"
-	depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP) && !X86_PAE) || ((FRV || PPC32) && !SMP))
+	depends on PM && SWAP && X86 || ((FRV || PPC32) && !SMP)
 	---help---
 	  Enable the possibility of suspending the machine.
 	  It doesn't need ACPI or APM.
diff -upr kernel-2.6.18-417.el5.orig/kernel/power/process.c kernel-2.6.18-417.el5-028stab121/kernel/power/process.c
--- kernel-2.6.18-417.el5.orig/kernel/power/process.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/power/process.c	2017-01-13 08:40:24.000000000 -0500
@@ -20,40 +20,20 @@
 
 #define TIMEOUT (120 * HZ)
 
+extern atomic_t global_suspend;
+
 static inline int freezeable(struct task_struct * p)
 {
 	if ((p == current) || 
 	    (p->flags & PF_NOFREEZE) ||
 	    (p->exit_state == EXIT_ZOMBIE) ||
 	    (p->exit_state == EXIT_DEAD) ||
-	    (p->state == TASK_STOPPED))
+	    (p->state == TASK_STOPPED) ||
+	    (p->state == TASK_TRACED))
 		return 0;
 	return 1;
 }
 
-/* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
-{
-	/* Hmm, should we be allowed to suspend when there are realtime
-	   processes around? */
-	long save;
-	save = current->state;
-	pr_debug("%s entered refrigerator\n", current->comm);
-	printk("=");
-
-	frozen_process(current);
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* We sent fake signal, clean it up */
-	spin_unlock_irq(&current->sighand->siglock);
-
-	while (frozen(current)) {
-		current->state = TASK_UNINTERRUPTIBLE;
-		schedule();
-	}
-	pr_debug("%s left refrigerator\n", current->comm);
-	current->state = save;
-}
-
 static inline void freeze_process(struct task_struct *p)
 {
 	unsigned long flags;
@@ -86,13 +66,14 @@ int freeze_processes(void)
 	unsigned long start_time;
 	struct task_struct *g, *p;
 
+	atomic_inc(&global_suspend);
 	printk( "Stopping tasks: " );
 	start_time = jiffies;
 	user_frozen = 0;
 	do {
 		nr_user = todo = 0;
 		read_lock(&tasklist_lock);
-		do_each_thread(g, p) {
+		do_each_thread_all(g, p) {
 			if (!freezeable(p))
 				continue;
 			if (frozen(p))
@@ -115,7 +96,7 @@ int freeze_processes(void)
 					freeze_process(p);
 				todo++;
 			}
-		} while_each_thread(g, p);
+		} while_each_thread_all(g, p);
 		read_unlock(&tasklist_lock);
 		todo += nr_user;
 		if (!user_frozen && !nr_user) {
@@ -128,6 +109,8 @@ int freeze_processes(void)
 			break;
 	} while(todo);
 
+	atomic_dec(&global_suspend);
+
 	/* This does not unfreeze processes that are already frozen
 	 * (we have slightly ugly calling convention in that respect,
 	 * and caller must call thaw_processes() if something fails),
@@ -139,16 +122,16 @@ int freeze_processes(void)
 			"after %d seconds (%d tasks remaining):\n",
 			TIMEOUT / HZ, todo);
 		read_lock(&tasklist_lock);
-		do_each_thread(g, p) {
+		do_each_thread_all(g, p) {
 			if (freezeable(p) && !frozen(p))
 				printk(KERN_ERR "  %s\n", p->comm);
 			cancel_freezing(p);
-		} while_each_thread(g, p);
+		} while_each_thread_all(g, p);
 		read_unlock(&tasklist_lock);
 		return todo;
 	}
 
-	printk( "|\n" );
+	/* printk( "|\n" ); */
 	BUG_ON(in_atomic());
 	return 0;
 }
@@ -159,16 +142,14 @@ void thaw_processes(void)
 
 	printk( "Restarting tasks..." );
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		if (!freezeable(p))
 			continue;
 		if (!thaw_process(p))
 			printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 	read_unlock(&tasklist_lock);
 	schedule();
 	printk( " done\n" );
 }
-
-EXPORT_SYMBOL(refrigerator);
diff -upr kernel-2.6.18-417.el5.orig/kernel/power/snapshot.c kernel-2.6.18-417.el5-028stab121/kernel/power/snapshot.c
--- kernel-2.6.18-417.el5.orig/kernel/power/snapshot.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/power/snapshot.c	2017-01-13 08:40:15.000000000 -0500
@@ -504,7 +504,7 @@ static void init_header(struct swsusp_in
 	memset(info, 0, sizeof(struct swsusp_info));
 	info->version_code = LINUX_VERSION_CODE;
 	info->num_physpages = num_physpages;
-	memcpy(&info->uts, &system_utsname, sizeof(system_utsname));
+	memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
 	info->cpus = num_online_cpus();
 	info->image_pages = nr_copy_pages;
 	info->pages = nr_copy_pages + nr_meta_pages + 1;
@@ -645,13 +645,13 @@ static int check_header(struct swsusp_in
 		reason = "kernel version";
 	if (info->num_physpages != num_physpages)
 		reason = "memory size";
-	if (strcmp(info->uts.sysname,system_utsname.sysname))
+	if (strcmp(info->uts.sysname,init_utsname()->sysname))
 		reason = "system type";
-	if (strcmp(info->uts.release,system_utsname.release))
+	if (strcmp(info->uts.release,init_utsname()->release))
 		reason = "kernel release";
-	if (strcmp(info->uts.version,system_utsname.version))
+	if (strcmp(info->uts.version,init_utsname()->version))
 		reason = "version";
-	if (strcmp(info->uts.machine,system_utsname.machine))
+	if (strcmp(info->uts.machine,init_utsname()->machine))
 		reason = "machine";
 	if (reason) {
 		printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
diff -upr kernel-2.6.18-417.el5.orig/kernel/printk.c kernel-2.6.18-417.el5-028stab121/kernel/printk.c
--- kernel-2.6.18-417.el5.orig/kernel/printk.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/printk.c	2017-01-13 08:40:22.000000000 -0500
@@ -32,8 +32,10 @@
 #include <linux/smp.h>
 #include <linux/security.h>
 #include <linux/bootmem.h>
+#include <linux/vzratelimit.h>
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
+#include <linux/veprintk.h>
 
 #include <asm/uaccess.h>
 #ifndef __GENKSYMS__
@@ -60,6 +62,9 @@ int console_printk[4] = {
 
 EXPORT_UNUSED_SYMBOL(console_printk);  /*  June 2006  */
 
+struct printk_aligned printk_no_wake_var[NR_CPUS];
+EXPORT_SYMBOL(printk_no_wake_var);
+
 /*
  * Low lever drivers may need that to know if they can schedule in
  * their unblank() callback or not. So let's export it.
@@ -90,7 +95,7 @@ static int console_locked, console_suspe
  * It is also used in interesting ways to provide interlocking in
  * release_console_sem().
  */
-static DEFINE_SPINLOCK(logbuf_lock);
+DEFINE_SPINLOCK(logbuf_lock);
 
 #define LOG_BUF_MASK	(log_buf_len-1)
 #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
@@ -121,6 +126,7 @@ static int preferred_console = -1;
 
 /* Flag: console code may call schedule() */
 static int console_may_schedule;
+int console_silence_loglevel;
 
 #ifdef CONFIG_PRINTK
 
@@ -137,6 +143,19 @@ void log_buf_kexec_setup(void)
 	SYMBOL(logged_chars);
 }
 
+static int __init setup_console_silencelevel(char *str)
+{
+	int level;
+
+	if (get_option(&str, &level) != 1)
+		return 0;
+
+	console_silence_loglevel = level;
+	return 1;
+}
+
+__setup("silencelevel=", setup_console_silencelevel);
+
 static int __init log_buf_len_setup(char *str)
 {
 	unsigned long size = memparse(str, &str);
@@ -234,6 +253,9 @@ int do_syslog(int type, char __user *buf
 	char c;
 	int error = 0;
 
+	if (!ve_is_super(get_exec_env()) && (type == 6 || type == 7))
+		goto out;
+
 	error = security_syslog(type);
 	if (error)
 		return error;
@@ -254,15 +276,15 @@ int do_syslog(int type, char __user *buf
 			error = -EFAULT;
 			goto out;
 		}
-		error = wait_event_interruptible(log_wait,
-							(log_start - log_end));
+		error = wait_event_interruptible(ve_log_wait,
+						(ve_log_start - ve_log_end));
 		if (error)
 			goto out;
 		i = 0;
 		spin_lock_irq(&logbuf_lock);
-		while (!error && (log_start != log_end) && i < len) {
-			c = LOG_BUF(log_start);
-			log_start++;
+		while (!error && (ve_log_start != ve_log_end) && i < len) {
+			c = VE_LOG_BUF(ve_log_start);
+			ve_log_start++;
 			spin_unlock_irq(&logbuf_lock);
 			error = __put_user(c,buf);
 			buf++;
@@ -288,15 +310,17 @@ int do_syslog(int type, char __user *buf
 			error = -EFAULT;
 			goto out;
 		}
+		if (ve_log_buf == NULL)
+			goto out;
 		count = len;
-		if (count > log_buf_len)
-			count = log_buf_len;
+		if (count > ve_log_buf_len)
+			count = ve_log_buf_len;
 		spin_lock_irq(&logbuf_lock);
-		if (count > logged_chars)
-			count = logged_chars;
+		if (count > ve_logged_chars)
+			count = ve_logged_chars;
 		if (do_clear)
-			logged_chars = 0;
-		limit = log_end;
+			ve_logged_chars = 0;
+		limit = ve_log_end;
 		/*
 		 * __put_user() could sleep, and while we sleep
 		 * printk() could overwrite the messages
@@ -305,9 +329,9 @@ int do_syslog(int type, char __user *buf
 		 */
 		for (i = 0; i < count && !error; i++) {
 			j = limit-1-i;
-			if (j + log_buf_len < log_end)
+			if (j + ve_log_buf_len < ve_log_end)
 				break;
-			c = LOG_BUF(j);
+			c = VE_LOG_BUF(j);
 			spin_unlock_irq(&logbuf_lock);
 			error = __put_user(c,&buf[count-1-i]);
 			cond_resched();
@@ -331,7 +355,7 @@ int do_syslog(int type, char __user *buf
 		}
 		break;
 	case 5:		/* Clear ring buffer */
-		logged_chars = 0;
+		ve_logged_chars = 0;
 		break;
 	case 6:		/* Disable logging to console */
 		console_loglevel = minimum_console_loglevel;
@@ -343,16 +367,19 @@ int do_syslog(int type, char __user *buf
 		error = -EINVAL;
 		if (len < 1 || len > 8)
 			goto out;
+		error = 0;
+		/* VE has no console, so return success */
+		if (!ve_is_super(get_exec_env()))
+			goto out;
 		if (len < minimum_console_loglevel)
 			len = minimum_console_loglevel;
 		console_loglevel = len;
-		error = 0;
 		break;
 	case 9:		/* Number of chars in the log buffer */
-		error = log_end - log_start;
+		error = ve_log_end - ve_log_start;
 		break;
 	case 10:	/* Size of the log buffer */
-		error = log_buf_len;
+		error = ve_log_buf_len;
 		break;
 	default:
 		error = -EINVAL;
@@ -463,16 +490,18 @@ static void call_console_drivers(unsigne
 
 static void emit_log_char(char c)
 {
-	LOG_BUF(log_end) = c;
-	log_end++;
-	if (log_end - log_start > log_buf_len)
-		log_start = log_end - log_buf_len;
-	if (log_end - con_start > log_buf_len)
-		con_start = log_end - log_buf_len;
-	if (logged_chars < log_buf_len)
-		logged_chars++;
+	VE_LOG_BUF(ve_log_end) = c;
+	ve_log_end++;
+	if (ve_log_end - ve_log_start > ve_log_buf_len)
+		ve_log_start = ve_log_end - ve_log_buf_len;
+	if (ve_is_super(get_exec_env()) && ve_log_end - con_start > ve_log_buf_len)
+		con_start = ve_log_end - ve_log_buf_len;
+	if (ve_logged_chars < ve_log_buf_len)
+		ve_logged_chars++;
 }
 
+static unsigned long do_release_console_sem(unsigned long *flags);
+
 /*
  * Zap console related locks when oopsing. Only zap at most once
  * every 10 seconds, to leave time for slow consoles to print a
@@ -550,6 +579,30 @@ static int have_callable_console(void)
  * See the vsnprintf() documentation for format string extensions over C99.
  */
 
+static inline int ve_log_init(void)
+{
+#ifdef CONFIG_VE
+	if (ve_log_buf != NULL)
+		return 0;
+
+	if (ve_is_super(get_exec_env())) {
+		ve0._log_wait = &log_wait;
+		ve0._log_start = &log_start;
+		ve0._log_end = &log_end;
+		ve0._logged_chars = &logged_chars;
+		ve0.log_buf = log_buf;
+		return 0;
+	}
+
+	ve_log_buf = kmalloc(ve_log_buf_len, GFP_ATOMIC);
+	if (!ve_log_buf)
+		return -ENOMEM;
+
+	memset(ve_log_buf, 0, ve_log_buf_len);
+#endif
+	return 0;
+}
+
 asmlinkage int printk(const char *fmt, ...)
 {
 	va_list args;
@@ -574,13 +627,13 @@ static const char recursion_bug_msg [] =
 			KERN_CRIT "BUG: recent printk recursion!\n";
 static int recursion_bug;
 
-asmlinkage int vprintk(const char *fmt, va_list args)
+asmlinkage int __vprintk(const char *fmt, va_list args)
 {
 	static int log_level_unknown = 1;
 	static char printk_buf[1024];
 	unsigned long flags;
 	int printed_len = 0;
-	int this_cpu;
+	int this_cpu, err, need_wake;
 	char *p;
 
 	preempt_disable();
@@ -610,6 +663,14 @@ asmlinkage int vprintk(const char *fmt, 
 	spin_lock(&logbuf_lock);
 	printk_cpu = this_cpu;
 
+	err = ve_log_init();
+	if (err) {
+		spin_unlock(&logbuf_lock);
+		lockdep_on();
+		local_irq_restore(flags);
+		return err;
+	}
+
 	if (recursion_bug) {
 		recursion_bug = 0;
 		strcpy(printk_buf, recursion_bug_msg);
@@ -676,7 +737,32 @@ asmlinkage int vprintk(const char *fmt, 
 			log_level_unknown = 1;
 	}
 
-	if (!down_trylock(&console_sem)) {
+	if (!ve_is_super(get_exec_env())) {
+		need_wake = (ve_log_start != ve_log_end);
+		printk_cpu = UINT_MAX;
+		spin_unlock(&logbuf_lock);
+		lockdep_on();
+		local_irq_restore(flags);
+		if (!oops_in_progress && need_wake)
+			wake_up_interruptible(&ve_log_wait);
+	} else if (__printk_no_wake) {
+		/*
+		 * A difficult case, created by the console semaphore mess...
+		 * All wakeups are omitted.
+		 */
+		if (!atomic_add_negative(-1, &console_sem.count)) {
+			console_locked = 1;
+			console_may_schedule = 0;
+			do_release_console_sem(&flags);
+			console_locked = 0;
+			console_may_schedule = 0;
+		}
+		atomic_inc(&console_sem.count);
+		printk_cpu = UINT_MAX;
+		spin_unlock(&logbuf_lock);
+		lockdep_on();
+		local_irq_restore(flags);
+	} else if (!down_trylock(&console_sem)) {
 		/*
 		 * We own the drivers.  We can drop the spinlock and
 		 * let release_console_sem() print the text, maybe ...
@@ -720,6 +806,65 @@ out_restore_irqs:
 EXPORT_SYMBOL(printk);
 EXPORT_SYMBOL(vprintk);
 
+static struct timer_list conswakeup_timer;
+static void conswakeup_timer_call(unsigned long dumy)
+{
+	if (!down_trylock(&console_sem)) {
+		console_locked = 1;
+		console_may_schedule = 0;
+		release_console_sem();
+	}
+	mod_timer(&conswakeup_timer, jiffies + 5 * HZ);
+}
+
+static int __init conswakeup_init(void)
+{
+	init_timer(&conswakeup_timer);
+	conswakeup_timer.function = &conswakeup_timer_call;
+	conswakeup_timer.expires = jiffies + 5 * HZ;
+	add_timer(&conswakeup_timer);
+	return 0;
+}
+console_initcall(conswakeup_init);
+
+asmlinkage int vprintk(const char *fmt, va_list args)
+{
+	int i;
+	struct ve_struct *env;
+
+	env = set_exec_env(get_ve0());
+	i = __vprintk(fmt, args);
+	set_exec_env(env);
+	return i;
+}
+
+asmlinkage int ve_vprintk(int dst, const char *fmt, va_list args)
+{
+	int printed_len;
+	va_list args2;
+
+	printed_len = 0;
+	va_copy(args2, args);
+	if (ve_is_super(get_exec_env()) || (dst & VE0_LOG))
+		printed_len = vprintk(fmt, args);
+	if (!ve_is_super(get_exec_env()) && (dst & VE_LOG))
+		printed_len = __vprintk(fmt, args2);
+	return printed_len;
+}
+
+asmlinkage int ve_printk(int dst, const char *fmt, ...)
+{
+	va_list args;
+	int printed_len;
+
+	va_start(args, fmt);
+	printed_len = ve_vprintk(dst, fmt, args);
+	va_end(args);
+	return printed_len;
+}
+EXPORT_SYMBOL(ve_printk);
+
+
 #else
 
 asmlinkage long sys_syslog(int type, char __user *buf, int len)
@@ -776,6 +921,18 @@ static int __init console_setup(char *st
 }
 __setup("console=", console_setup);
 
+void wake_up_klogd(void)
+{
+	if (!oops_in_progress && waitqueue_active(&log_wait))
+		/*
+		 * If we printk from within the lock dependency code,
+		 * from within the scheduler code, then do not lock
+		 * up due to self-recursion:
+		 */
+		if (!lockdep_internal())
+			wake_up_interruptible(&log_wait);
+}
+
 /**
  * add_preferred_console - add a device to the list of preferred consoles.
  * @name: device name
@@ -883,43 +1040,45 @@ EXPORT_UNUSED_SYMBOL(is_console_locked);
  *
  * release_console_sem() may be called from any context.
  */
-void release_console_sem(void)
+static unsigned long do_release_console_sem(unsigned long *flags)
 {
-	unsigned long flags;
 	unsigned long _con_start, _log_end;
 	unsigned long wake_klogd = 0;
 
 	if (console_suspended) {
 		up(&secondary_console_sem);
-		return;
+		goto out;
 	}
 
 	console_may_schedule = 0;
 
 	for ( ; ; ) {
-		spin_lock_irqsave(&logbuf_lock, flags);
 		wake_klogd |= log_start - log_end;
 		if (con_start == log_end)
 			break;			/* Nothing to print */
 		_con_start = con_start;
 		_log_end = log_end;
 		con_start = log_end;		/* Flush */
-		spin_unlock(&logbuf_lock);
+		spin_unlock_irqrestore(&logbuf_lock, *flags);
 		call_console_drivers(_con_start, _log_end);
-		local_irq_restore(flags);
+		spin_lock_irqsave(&logbuf_lock, *flags);
 	}
+out:
+	return wake_klogd;
+}
+
+void release_console_sem(void)
+{
+	unsigned long flags;
+	unsigned long wake_klogd;
+
+	spin_lock_irqsave(&logbuf_lock, flags);
+	wake_klogd = do_release_console_sem(&flags);
 	console_locked = 0;
 	up(&console_sem);
 	spin_unlock_irqrestore(&logbuf_lock, flags);
-	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
-		/*
-		 * If we printk from within the lock dependency code,
-		 * from within the scheduler code, then do not lock
-		 * up due to self-recursion:
-		 */
-		if (!lockdep_internal())
-			wake_up_interruptible(&log_wait);
-	}
+	if (wake_klogd)
+		wake_up_klogd();
 }
 EXPORT_SYMBOL(release_console_sem);
 
@@ -1222,3 +1381,96 @@ bool printk_timed_ratelimit(unsigned lon
 	return false;
 }
 EXPORT_SYMBOL(printk_timed_ratelimit);
+
+/*
+ *	Rate limiting stuff.
+ */
+int vz_ratelimit(struct vz_rate_info *p)
+{
+	unsigned long cjif, djif;
+	unsigned long flags;
+	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
+	long new_bucket;
+
+	spin_lock_irqsave(&ratelimit_lock, flags);
+	cjif = jiffies;
+	djif = cjif - p->last;
+	if (djif < p->interval) {
+		if (p->bucket >= p->burst) {
+			spin_unlock_irqrestore(&ratelimit_lock, flags);
+			return 0;
+		}
+		p->bucket++;
+	} else {
+		new_bucket = p->bucket - (djif / (unsigned)p->interval);
+		if (new_bucket < 0)
+			new_bucket = 0;
+		p->bucket = new_bucket + 1;
+	}
+	p->last = cjif;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
+}
+
+static cpumask_t nmi_show_regs_cpus = CPU_MASK_NONE;
+static unsigned long nmi_show_regs_timeout;
+
+void __attribute__((weak)) send_nmi_ipi_allbutself(void)
+{
+	cpus_clear(nmi_show_regs_cpus);
+}
+
+static void busted_show_regs(struct pt_regs *regs, int in_nmi)
+{
+	if (!regs || (in_nmi && spin_is_locked(&logbuf_lock)))
+		return;
+
+	bust_spinlocks(1);
+	printk("----------- IPI show regs -----------\n");
+	show_regs(regs);
+	bust_spinlocks(0);
+}
+
+void nmi_show_regs(struct pt_regs *regs, int in_nmi)
+{
+	if (cpus_empty(nmi_show_regs_cpus))
+		goto doit;
+
+	/* Previous request still in progress */
+	if (time_before(jiffies, nmi_show_regs_timeout))
+		return;
+
+	if (!in_nmi || !spin_is_locked(&logbuf_lock)) {
+		int cpu;
+
+		bust_spinlocks(1);
+		printk("previous show regs lost IPI to: ");
+		for_each_cpu_mask(cpu, nmi_show_regs_cpus)
+			printk("%d ", cpu);
+		printk("\n");
+		bust_spinlocks(0);
+	}
+
+doit:
+	nmi_show_regs_timeout = jiffies + HZ/10;
+	nmi_show_regs_cpus = cpu_online_map;
+	cpu_clear(raw_smp_processor_id(), nmi_show_regs_cpus);
+	busted_show_regs(regs, in_nmi);
+	send_nmi_ipi_allbutself();
+}
+
+/* call only from nmi handler */
+int do_nmi_show_regs(struct pt_regs *regs, int cpu)
+{
+	static DEFINE_SPINLOCK(nmi_show_regs_lock);
+
+	if (!cpu_isset(cpu, nmi_show_regs_cpus))
+		return 0;
+
+	spin_lock(&nmi_show_regs_lock);
+	busted_show_regs(regs, 1);
+	cpu_clear(cpu, nmi_show_regs_cpus);
+	spin_unlock(&nmi_show_regs_lock);
+
+	return 1;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/ptrace.c kernel-2.6.18-417.el5-028stab121/kernel/ptrace.c
--- kernel-2.6.18-417.el5.orig/kernel/ptrace.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ptrace.c	2017-01-13 08:40:24.000000000 -0500
@@ -121,7 +121,6 @@ EXPORT_SYMBOL_GPL(access_process_vm);
 #define	END_CHECK	do { } while (0)
 #endif
 
-#define PTRACE_DEBUG 1
 #ifdef PTRACE_DEBUG
 #define CHECK_INIT(p)	atomic_set(&(p)->check_dead, 1)
 #define CHECK_DEAD(p)	BUG_ON(!atomic_dec_and_test(&(p)->check_dead))
@@ -130,39 +129,6 @@ EXPORT_SYMBOL_GPL(access_process_vm);
 #define CHECK_DEAD(p)	do { } while (0)
 #endif
 
-struct ptrace_state
-{
-	struct rcu_head rcu;
-	atomic_t refcnt;
-#ifdef PTRACE_DEBUG
-	atomic_t check_dead;
-#endif
-
-	/*
-	 * These elements are always available, even when the struct is
-	 * awaiting destruction at the next RCU callback point.
-	 */
-	struct utrace_attached_engine *engine;
-	struct task_struct *task; /* Target task.  */
-	struct task_struct *parent; /* Whom we report to.  */
-	struct list_head entry;	/* Entry on parent->ptracees list.  */
-
-	u8 options;		/* PTRACE_SETOPTIONS bits.  */
-	unsigned int syscall:1;	/* Reporting for syscall.  */
-#ifdef PTRACE_SYSEMU
-	unsigned int sysemu:1;	/* PTRACE_SYSEMU in progress. */
-#endif
-	unsigned int have_eventmsg:1; /* u.eventmsg valid. */
-	unsigned int cap_sys_ptrace:1; /* Tracer capable.  */
-
-	union
-	{
-		unsigned long eventmsg;
-		siginfo_t *siginfo;
-	} u;
-};
-
-static const struct utrace_engine_ops ptrace_utrace_ops; /* Initialized below. */
 
 /*
  * We use this bit in task_struct.exit_code of a ptrace'd task to indicate
@@ -435,6 +401,12 @@ static int ptrace_attach(struct task_str
 		goto bad;
 	if (!task->mm)		/* kernel threads */
 		goto bad;
+	task_lock(task);
+	if (task->mm && task->mm->vps_dumpable == 2) {
+		task_unlock(task);
+		goto bad;
+	}
+	task_unlock(task);
 
 	pr_debug("%d ptrace_attach %d state %lu exit_code %x\n",
 		 current->pid, task->pid, task->state, task->exit_code);
@@ -696,7 +668,7 @@ ptrace_induce_signal(struct task_struct 
 			info->si_signo = signr;
 			info->si_errno = 0;
 			info->si_code = SI_USER;
-			info->si_pid = current->pid;
+			info->si_pid = virt_pid(current);
 			info->si_uid = current->uid;
 		}
 
@@ -880,7 +852,7 @@ ptrace_start(long pid, long request,
 
 	ret = -ESRCH;
 	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
+	child = find_task_by_pid_ve(pid);
 	if (child)
 		get_task_struct(child);
 	read_unlock(&tasklist_lock);
@@ -1385,14 +1357,19 @@ ptrace_do_wait(struct task_struct *tsk,
 			continue;
 
 		if (pid > 0) {
-			if (p->pid != pid)
+			if ((is_virtual_pid(pid) ? virt_pid(p) : p->pid) != pid)
 				continue;
 		} else if (!pid) {
 			if (process_group(p) != process_group(current))
 				continue;
 		} else if (pid != -1) {
-			if (process_group(p) != -pid)
-				continue;
+			if (__is_virtual_pid(-pid)) {
+				if (virt_pgid(p) != -pid)
+					continue;
+			} else {
+				if (process_group(p) != -pid)
+					continue;
+			}
 		}
 		if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
 		    && !(options & __WALL))
@@ -1550,7 +1527,7 @@ found:
 		if (!err)
 			err = put_user((short)why, &infop->si_code);
 		if (!err)
-			err = put_user(p->pid, &infop->si_pid);
+			err = put_user(get_task_pid(p), &infop->si_pid);
 		if (!err)
 			err = put_user(p->uid, &infop->si_uid);
 		if (!err)
@@ -1567,7 +1544,7 @@ found:
 			 * to its real parent now.
 			 */
 			detach_zombie(tsk, p, state);
-		err = p->pid;
+		err = get_task_pid(p);
 	}
 
 	put_task_struct(p);
@@ -1620,7 +1597,7 @@ do_notify(struct task_struct *tsk, struc
 
 	info.si_signo = SIGCHLD;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	info.si_pid = virt_pid(tsk);
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1923,7 +1900,7 @@ ptrace_report_clone(struct utrace_attach
 
 	if (state->options & option) {
 		state->have_eventmsg = 1;
-		state->u.eventmsg = child->pid;
+		state->u.eventmsg = virt_pid(child);
 	}
 	else
 		event = 0;
@@ -2169,7 +2146,7 @@ ptrace_allow_access_process_vm(struct ut
 }
 
 
-static const struct utrace_engine_ops ptrace_utrace_ops =
+struct utrace_engine_ops ptrace_utrace_ops =
 {
 	.report_syscall_entry = ptrace_report_syscall_entry,
 	.report_syscall_exit = ptrace_report_syscall_exit,
@@ -2185,3 +2162,4 @@ static const struct utrace_engine_ops pt
 	.tracer_task = ptrace_tracer_task,
 	.allow_access_process_vm = ptrace_allow_access_process_vm,
 };
+EXPORT_SYMBOL(ptrace_utrace_ops);
diff -upr kernel-2.6.18-417.el5.orig/kernel/rtmutex-debug.c kernel-2.6.18-417.el5-028stab121/kernel/rtmutex-debug.c
--- kernel-2.6.18-417.el5.orig/kernel/rtmutex-debug.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/rtmutex-debug.c	2017-01-13 08:40:20.000000000 -0500
@@ -153,7 +153,7 @@ void debug_rt_mutex_print_deadlock(struc
 	if (!waiter->deadlock_lock || !rt_trace_on)
 		return;
 
-	task = find_task_by_pid(waiter->deadlock_task_pid);
+	task = find_task_by_pid_all(waiter->deadlock_task_pid);
 	if (!task)
 		return;
 
diff -upr kernel-2.6.18-417.el5.orig/kernel/sched.c kernel-2.6.18-417.el5-028stab121/kernel/sched.c
--- kernel-2.6.18-417.el5.orig/kernel/sched.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/sched.c	2017-01-13 08:40:40.000000000 -0500
@@ -25,6 +25,7 @@
 #include <asm/uaccess.h>
 #include <linux/highmem.h>
 #include <linux/smp_lock.h>
+#include <linux/pagemap.h>
 #include <asm/mmu_context.h>
 #include <linux/interrupt.h>
 #include <linux/capability.h>
@@ -53,6 +54,8 @@
 #include <linux/kprobes.h>
 #include <linux/delayacct.h>
 #include <linux/hash.h>
+#include <linux/vsched.h>
+#include <linux/fairsched.h>
 #include <asm/tlb.h>
 #include <trace/sched.h>
 
@@ -139,7 +142,7 @@
 #ifdef CONFIG_SMP
 #define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
-			num_online_cpus())
+			vsched_num_online_vcpus(task_vsched(p)))
 #else
 #define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
@@ -204,6 +207,7 @@ struct prio_array {
  * (such as the load balancing or the thread migration code), lock
  * acquire operations must be ordered by ascending &runqueue.
  */
+typedef struct vcpu_struct *vcpu_t;
 struct rq {
 	spinlock_t lock;
 
@@ -224,11 +228,14 @@ struct rq {
 	 * one CPU and if it got migrated afterwards it may decrease
 	 * it on another CPU. Always updated under the runqueue lock:
 	 */
-	unsigned long nr_uninterruptible;
+	unsigned int nr_uninterruptible;
+
+	unsigned long nr_sleeping;
+	unsigned long nr_stopped;
 
 	unsigned long expired_timestamp;
 	unsigned long long timestamp_last_tick;
-	struct task_struct *curr, *idle;
+	struct task_struct *curr;
 	struct mm_struct *prev_mm;
 	struct prio_array *active, *expired, arrays[2];
 	int best_expired_prio;
@@ -239,12 +246,12 @@ struct rq {
 
 	/* For active balancing */
 	int active_balance;
-	int push_cpu;
-	int cpu;		/* cpu of this runqueue */
+#endif
+	vcpu_t push_cpu;
 
 	struct task_struct *migration_thread;
+	int migration_thread_init;
 	struct list_head migration_queue;
-#endif
 
 #ifdef CONFIG_SCHEDSTATS
 	/* latency stats */
@@ -265,34 +272,653 @@ struct rq {
 	unsigned long ttwu_cnt;
 	unsigned long ttwu_local;
 #endif
+#ifndef CONFIG_SCHED_VCPU
+	/*
+	 * with VCPU scheduler each rq is dynamic object
+	 * so assign a common static class to them and
+	 * use lock nesting rules in double_rq_lock etc
+	 */
 	struct lock_class_key rq_lock_key;
+#endif
 };
 
-static DEFINE_PER_CPU(struct rq, runqueues);
+/* virtual CPU description */
+struct vcpu_struct {
+	struct rq rq;
+#ifdef CONFIG_SCHED_VCPU
+	unsigned active : 1,
+		 running : 1;
+	struct list_head list;
+	struct vcpu_scheduler *vsched;
+	int last_pcpu;
+	unsigned long start_time;
+	unsigned long stop_time;
+#endif
+	int id;
+} ____cacheline_internodealigned_in_smp;
+
+/* physical CPU description */
+struct pcpu_info {
+	struct vcpu_scheduler *vsched;
+	struct vcpu_struct *vcpu;
+	struct task_struct *idle;
+#ifdef CONFIG_SMP
+	struct sched_domain *sd;
+#endif
+	int id;
+} ____cacheline_internodealigned_in_smp;
+
+struct pcpu_info pcpu_info[NR_CPUS];
+
+LIST_HEAD(vsched_list);
+EXPORT_SYMBOL(vsched_list);
+DEFINE_SPINLOCK(vsched_list_lock);
+EXPORT_SYMBOL(vsched_list_lock);
+
+#define pcpu(nr)		(&pcpu_info[nr])
+#define this_pcpu()		(pcpu(smp_processor_id()))
+
+/*
+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+ * See detach_destroy_domains: synchronize_sched for details.
+ *
+ * The domain tree of any CPU may only be accessed from within
+ * preempt-disabled sections.
+ */
+#define for_each_pdomain(sd, domain) \
+for (domain = rcu_dereference(sd); domain; domain = domain->parent)
+
+#define for_each_domain(cpu, __sd) \
+	for_each_pdomain(vcpu_rq(cpu)->sd, __sd)
+
+#ifdef CONFIG_SCHED_VCPU
+
+/* Used in find_idle_vsched() */
+static DEFINE_PER_CPU(int, find_busvs_last_pcpu);
+
+/*
+ * vcpu_timeslice - how many msec's runnable VCPU will stay on the same
+ * physical CPU. If vcpu_timeslice < 0, actual vcpu timeslice value will
+ * be calculated according to number of 'ready to run' vcpu's:
+ *
+ * vcpu_timeslice_actual = VCPU_TIMESLICE_MAX >>
+ *			((nr_runnable_vcpus - 1) / nr_pcpus)
+ */
+#define VCPU_TIMESLICE_MAX	FSCH_TIMESLICE
+int vcpu_timeslice_actual;
+unsigned int nr_online_pcpus = 1;	/* mustn't be 0, cause it's divisor */
+/*
+ * Set initial value to -1, to not subtract '-1' each time.
+ */
+unsigned int nr_runnable_vcpus = -1;
+
+u32 vcpu_sched_timeslice = 5;
+int vcpu_timeslice = -1;
+u32 vcpu_hot_timeslice = 4;	/* < 4 won't work for HZ=250 */
+EXPORT_SYMBOL(vcpu_sched_timeslice);
+EXPORT_SYMBOL(vcpu_timeslice);
+EXPORT_SYMBOL(vcpu_hot_timeslice);
+
+extern spinlock_t fairsched_lock;
+struct vcpu_scheduler default_vsched, idle_vsched;
+EXPORT_SYMBOL(default_vsched);
+EXPORT_SYMBOL(idle_vsched);
+
+static struct vcpu_struct boot_vcpu, boot_idle_vcpu;
+
+#define vsched_default_vsched()	(&default_vsched)
+#define vsched_default_vcpu(id)	(default_vsched.vcpu[id])
+
+/* 
+ * All macroses below could be used without locks, if there is no
+ * strict ordering requirements, because we assume, that:
+ *
+ * 1. VCPU could not disappear "on the fly" (FIXME)
+ *
+ * 2. p->vsched access is atomic.
+ */
+
+#define vsched_vcpu(vsched, id)	((vsched)->vcpu[id])
+#define this_vcpu()		(task_vcpu(current))
+#define task_vcpu(p)		((p)->vcpu)
+
+#define vsched_id(vsched)	((vsched)->id)
+#define vsched_vcpu_online_map(vsched)	((vsched)->vcpu_online_map)
+#define vsched_num_online_vcpus(vsched)	((vsched)->num_online_vcpus)
+#define vsched_pcpu_running_map(vsched)	((vsched)->pcpu_running_map)
+
+#define vcpu_vsched(vcpu)	((vcpu)->vsched)
+#define vcpu_last_pcpu(vcpu)	((vcpu)->last_pcpu)
+#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
+#define vcpu_is_offline(vcpu)	(!vcpu_isset(vcpu, \
+					vcpu_vsched(vcpu)->vcpu_online_map))
+
+static int __add_vcpu(struct vcpu_scheduler *vsched, int id);
+
+#define vcpu_is_hot(vcpu)	(jiffies - (vcpu)->start_time \
+					< msecs_to_jiffies(vcpu_timeslice_actual))
+#else	/* CONFIG_SCHED_VCPU */
+
+static DEFINE_PER_CPU(struct vcpu_struct, vcpu_struct);
+
+#define task_vsched(p)		NULL
+#define this_vcpu()		(task_vcpu(current))
+#define task_vcpu(p)		(vcpu(task_cpu(p)))
+
+#define vsched_vcpu(sched, id)	(vcpu(id))
+#define vsched_id(vsched)	0
+#define vsched_default_vsched()	NULL
+#define vsched_default_vcpu(id)	(vcpu(id))
+
+#define vsched_vcpu_online_map(vsched)	(cpu_online_map)
+#define vsched_num_online_vcpus(vsched)	(num_online_cpus())
+#define vsched_pcpu_running_map(vsched)	(cpu_online_map)
+
+#define vcpu(id)		(&per_cpu(vcpu_struct, id))
+
+#define vcpu_vsched(vcpu)	NULL
+#define vcpu_last_pcpu(vcpu)	((vcpu)->id)
+#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
+#define vcpu_is_offline(vcpu)	(cpu_is_offline((vcpu)->id))
+
+#define vcpu_is_hot(vcpu)	(1)
+#endif	/* CONFIG_SCHED_VCPU */
+
+#define this_rq()		(vcpu_rq(this_vcpu()))
+#define task_rq(p)		(vcpu_rq(task_vcpu(p)))
+#define vcpu_rq(vcpu)		(&(vcpu)->rq)
+#define get_vcpu()		({ preempt_disable(); this_vcpu(); })
+#define put_vcpu()		({ put_cpu(); })
+#define rq_vcpu(__rq)		(container_of((__rq), struct vcpu_struct, rq))
 
 static inline int cpu_of(struct rq *rq)
 {
 #ifdef CONFIG_SMP
-	return rq->cpu;
+	return vcpu_last_pcpu(rq_vcpu(rq));
 #else
 	return 0;
 #endif
 }
 
+/**
+ * idle_task - return the idle task for a given cpu.
+ * @cpu: the processor in question.
+ */
+struct task_struct *idle_task(int cpu) 
+{
+	return pcpu(cpu)->idle;
+}
+
+#ifdef CONFIG_SMP
+static inline void update_rq_cpu_load(struct rq *this_rq)
+{
+	unsigned long this_load;
+	int i, scale;
+
+	if (unlikely(this_rq->nr_running == 0)) {
+		for (i = 0; i < 3; i++)
+			this_rq->cpu_load[i] = 0;
+		return;
+	}
+
+	this_load = this_rq->nr_running * SCHED_LOAD_SCALE;
+	for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {
+		unsigned long old_load, new_load;
+
+		old_load = this_rq->cpu_load[i];
+		new_load = this_load;
+		/*
+		 * Round up the averaging division if load is increasing. This
+		 * prevents us from getting stuck on 9 if the load is 10, for
+		 * example.
+		 */
+		if (new_load > old_load)
+			new_load += scale-1;
+		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
+	}
+}
+#else	/* CONFIG_SMP */
+static inline void update_rq_cpu_load(struct rq *this_rq)
+{
+}
+#endif	/* CONFIG_SMP */
+
+#ifdef CONFIG_SCHED_VCPU
+static inline void recalc_vcpu_timeslice(void)
+{
+	int val;
+
+	if (vcpu_timeslice < 0) {
+		val = nr_runnable_vcpus / nr_online_pcpus;
+		val = val > 31 ? 31 : val;
+		val = VCPU_TIMESLICE_MAX >> val;
+	} else
+		val = vcpu_timeslice;
+
+	/*
+	 * Optimization (?) - don't invalidate other CPU's cacheline
+	 * if vcpu_timeslice_actual is not changed.
+	 */
+	if (vcpu_timeslice_actual != val)
+		vcpu_timeslice_actual = val;
+}
+
+void fastcall vsched_cpu_online_map(struct vcpu_scheduler *vsched,
+		cpumask_t *mask)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fairsched_lock, flags);
+	*mask = vsched->vcpu_online_map;
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+}
+
+static inline void set_task_vsched(struct task_struct *p,
+		struct vcpu_scheduler *vsched)
+{
+	/* NOTE: set_task_cpu() is required after every set_task_vsched()! */
+	p->vsched = vsched;
+	p->vsched_id = vsched_id(vsched);
+}
+
+inline void set_task_cpu(struct task_struct *p, unsigned int vcpu_id)
+{
+	p->vcpu = vsched_vcpu(task_vsched(p), vcpu_id);
+	p->vcpu_id = vcpu_id;
+}
+
+static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
+{
+	p->vcpu = vcpu;
+	p->vcpu_id = vcpu->id;
+}
+
+/* this is called when rq->nr_running changes from 0 to 1 */
+static void vcpu_attach(struct rq *rq)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+
+	vcpu = rq_vcpu(rq);
+	vsched = vcpu_vsched(vcpu);
+
+	BUG_ON(vcpu->active);
+	spin_lock(&fairsched_lock);
+	vcpu->active = 1;
+	if (!vcpu->running)
+		list_move_tail(&vcpu->list, &vsched->active_list);
+	else
+		set_tsk_need_resched(idle_task(task_pcpu(rq->curr)));
+
+	fairsched_incrun(vsched->node);
+	nr_runnable_vcpus++;
+	spin_unlock(&fairsched_lock);
+
+	recalc_vcpu_timeslice();
+}
+
+/* this is called when rq->nr_running changes from 1 to 0 */
+static void vcpu_detach(struct rq *rq)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+
+	vcpu = rq_vcpu(rq);
+	vsched = vcpu_vsched(vcpu);
+	BUG_ON(!vcpu->active);
+
+	spin_lock(&fairsched_lock);
+	fairsched_decrun(vsched->node);
+
+	vcpu->active = 0;
+	if (!vcpu->running)
+		list_move_tail(&vcpu->list, &vsched->idle_list);
+	nr_runnable_vcpus--;
+	spin_unlock(&fairsched_lock);
+
+	recalc_vcpu_timeslice();
+}
+
+static inline void __vcpu_get(vcpu_t vcpu)
+{
+	struct pcpu_info *pcpu;
+	struct vcpu_scheduler *vsched;
+
+	BUG_ON(!this_vcpu()->running);
+
+	pcpu = this_pcpu();
+	vsched = vcpu_vsched(vcpu);
+
+	pcpu->vcpu = vcpu;
+	pcpu->vsched = vsched;
+
+	fairsched_inccpu(vsched->node);
+
+	list_move_tail(&vcpu->list, &vsched->running_list);
+	vcpu->start_time = jiffies;
+	vcpu->last_pcpu = pcpu->id;
+	vcpu->running = 1;
+	__set_bit(vcpu->id, vsched->vcpu_running_map.bits);
+	__set_bit(pcpu->id, vsched->pcpu_running_map.bits);
+#ifdef CONFIG_SMP
+	vcpu_rq(vcpu)->sd = pcpu->sd;
+#endif
+}
+
+static void vcpu_put(vcpu_t vcpu)
+{
+	struct vcpu_scheduler *vsched;
+	struct pcpu_info *cur_pcpu;
+	struct rq *rq;
+
+	vsched = vcpu_vsched(vcpu);
+	rq = vcpu_rq(vcpu);
+	cur_pcpu = this_pcpu();
+
+	BUG_ON(!vcpu->running);
+
+	spin_lock(&fairsched_lock);
+	vcpu->running = 0;
+	list_move_tail(&vcpu->list,
+		vcpu->active ? &vsched->active_list : &vsched->idle_list);
+	fairsched_deccpu(vsched->node);
+	__clear_bit(vcpu->id, vsched->vcpu_running_map.bits);
+	if (vsched != this_vsched())
+		__clear_bit(cur_pcpu->id, vsched->pcpu_running_map.bits);
+
+	vcpu->stop_time = jiffies;
+	if (!rq->nr_running)
+		rq->expired_timestamp = 0;
+	/* from this point task_running(prev_rq, prev) will be 0 */
+	rq->curr = cur_pcpu->idle;
+	update_rq_cpu_load(rq);
+	spin_unlock(&fairsched_lock);
+}
+
 /*
- * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
- * See detach_destroy_domains: synchronize_sched for details.
+ * Find an idle VCPU in given vsched. VCPU runned on this pcpu is 
+ * preferrable. Idle VCPU must be present in *cpus mask also.
+ */
+static vcpu_t find_idle_vcpu(struct vcpu_scheduler *vsched, cpumask_t *cpus)
+{
+	vcpu_t vcpu;
+	vcpu_t best_vcpu;
+	int this_pcpu = smp_processor_id();
+	unsigned long time;
+
+	time = jiffies - msecs_to_jiffies(vcpu_hot_timeslice);
+	best_vcpu = NULL;
+
+	spin_lock(&fairsched_lock);
+	if (!list_empty(&vsched->idle_list)) {
+		list_for_each_entry(vcpu, &vsched->idle_list, list) {
+			if (unlikely(vcpu_is_offline(vcpu)))
+				continue;
+			if (time_after_eq(vcpu->stop_time, time) &&
+					!cpu_isset(vcpu_last_pcpu(vcpu), *cpus))
+				continue;
+			best_vcpu = vcpu;
+			if (vcpu_last_pcpu(vcpu) == this_pcpu)
+				break;
+		}
+	}
+	spin_unlock(&fairsched_lock);
+	return best_vcpu;
+}
+
+/*
+ * find_busiest_vsched - find busiest vsched among running vsched's.
+ * An active vsched will be balanced when it becomes running.
  *
- * The domain tree of any CPU may only be accessed from within
- * preempt-disabled sections.
+ * This routine must be simple and fast.
  */
-#define for_each_domain(cpu, __sd) \
-	for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+static inline struct vcpu_scheduler *find_busiest_vsched(cpumask_t *cpus)
+{
+	vcpu_t vcpu;
+	int i, n;
+	cpumask_t mask, tmp_mask;
+	int step;
+
+	step = 0;
+
+	cpus_and(mask, *cpus, cpu_online_map);
+
+	/*
+	 * We implement simple round robin strategy to get 
+	 * PCPU id to start from. Last PCPU number is saved in 
+	 * per_cpu(find_busvs_last_pcpu).
+	 *
+	 * Assume the mask is 0x6789abcd and it's time to start
+	 * from PCPU #13:
+	 * 
+	 * 1) In the first pass we must use mask 0x6789a000:
+	 *
+	 *    ((0x6789abcd >> 13) << 13) => 0x6789a000
+	 *
+	 * 2) In the second pass we must use mask 0x00000bcd:
+	 *
+	 *      0x6789abcd ^ 0x6789a000  => 0x00000bcd
+	 */
+	n = per_cpu(find_busvs_last_pcpu, raw_smp_processor_id());
+
+	cpus_shift_right(tmp_mask, mask, n);
+	cpus_shift_left(tmp_mask, tmp_mask, n);
+restart:
+	for_each_cpu_mask(i, tmp_mask) {
+		vcpu = pcpu(i)->vcpu;
+		if (vcpu_is_offline(vcpu))
+			continue;
+		if (vcpu->vsched == &idle_vsched)
+			continue;
+		if (vcpu == this_vcpu())
+			continue;
+
+		/*
+		 * 'Busiest' mean there at least 2 tasks on this vsched.
+		 */
+		if (vcpu->rq.nr_running > 1) {
+			per_cpu(find_busvs_last_pcpu, raw_smp_processor_id())
+				= ++n % NR_CPUS;
+			return vcpu->vsched;
+		}
+	}
+	if (!step++) {
+		/* Second pass */
+		cpus_xor(tmp_mask, mask, tmp_mask);
+		goto restart;
+	}
+	return NULL;
+}
+
+/*
+ * Find idle VCPUs in a vsched, that can be balanced
+ */
+static inline vcpu_t find_idle_target(cpumask_t *cpus)
+{
+	vcpu_t vcpu;
+	struct vcpu_scheduler *vsched;
+
+	/*
+	 * First of all we have to find busiest vsched
+	 */
+	vsched = find_busiest_vsched(cpus);
+	if (vsched == NULL)
+		return NULL;
+
+	/*
+	 * Try to find an idle VCPU in the target vsched.
+	 * VCPU that was last running on this PCPU is preferred.
+	 */
+	vcpu = find_idle_vcpu(vsched, cpus);
+	if (!vcpu)
+		return NULL;
+	return vcpu;
+}
+
+static int idle_balance(vcpu_t this_cpu, struct rq *this_rq);
+
+static vcpu_t schedule_vcpu(vcpu_t cur_vcpu, cycles_t cycles)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	unsigned long time;
+	struct rq *rq;
+	int pcpu = raw_smp_processor_id();
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node, *prev_node, *cur_node;
+
+	prev_node = vcpu_vsched(cur_vcpu)->node;
+#endif
+
+	BUG_ON(!cur_vcpu->running);
+restart:
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		goto affine;
+
+	spin_lock(&fairsched_lock);
+#ifdef CONFIG_FAIRSCHED
+	cur_node = cur_vcpu->active ? prev_node : NULL;
+	node = fairsched_first(cur_node, cycles);
+	if (unlikely(node == NULL))
+		goto idle;
+
+	vsched = node->vsched;
+	while (unlikely(!cpu_isset(pcpu, vsched->pcpu_allowed_map))) {
+next_node:
+		node = fairsched_next(cur_node, node);
+		if (node == NULL)
+			goto idle;
+		vsched = node->vsched;
+	}
+#else
+	vsched = &default_vsched;
+#endif
+	/* FIXME: optimize vcpu switching, maybe we do not need to call
+	   fairsched_schedule() at all if vcpu is still active and too
+	   little time have passed so far */
+	if (cur_vcpu->vsched == vsched && cur_vcpu->active &&
+	    jiffies - cur_vcpu->start_time < msecs_to_jiffies(vcpu_sched_timeslice)) {
+		vcpu = cur_vcpu;
+		goto done;
+	}
+
+	/*
+	 * Ok, we are going to choose new VCPU now.
+	 */
+	time = jiffies - msecs_to_jiffies(vcpu_hot_timeslice);
+	/*
+	 * First vcpu in the list is more preferable, because it has waited
+	 * for CPU longer than others. If all vcpu's are hot, use the oldest
+	 * one.
+	 */
+	list_for_each_entry(vcpu, &vsched->active_list, list) {
+		/* Skip hot VCPU's that were running on another CPU's */
+		if (vcpu->stop_time > time && vcpu_last_pcpu(vcpu) != pcpu)
+			continue;
+
+		/* add it to running list */
+		__vcpu_get(vcpu);
+		goto done;
+	}
+
+	/*
+	 * Current vcpu is the one we need. We have not
+	 * put it yet, so it's not on the active_list.
+	 */
+	if (cur_vcpu->vsched == vsched && cur_vcpu->active) {
+		vcpu = cur_vcpu;
+		vcpu->start_time = jiffies;
+		goto done;
+	}
+
+	goto next_node;
+
+done:
+#ifdef CONFIG_FAIRSCHED
+	fairsched_switch(prev_node, node, cycles);
+#endif
+done_noswitch:
+	spin_unlock(&fairsched_lock);
+
+	rq = vcpu_rq(vcpu);
+	if (unlikely(vcpu != cur_vcpu)) {
+		spin_unlock(&vcpu_rq(cur_vcpu)->lock);
+		spin_lock(&rq->lock);
+		if (unlikely(!rq->nr_running)) {
+			/* race with balancing? */
+			spin_unlock(&rq->lock);
+			vcpu_put(vcpu);
+			spin_lock(&vcpu_rq(cur_vcpu)->lock);
+			goto restart;
+		}
+	}
+	BUG_ON(!rq->nr_running);
+	return vcpu;
+
+idle:
+	fairsched_switch(prev_node, &fairsched_idle_node, cycles);
+	vcpu_rq(cur_vcpu)->sched_goidle++;
+	vcpu = task_vcpu(this_pcpu()->idle);
+	__vcpu_get(vcpu);
+	spin_unlock(&fairsched_lock);
+	spin_unlock(&vcpu_rq(cur_vcpu)->lock);
+
+	spin_lock(&vcpu_rq(vcpu)->lock);
+	return vcpu;
+
+affine:
+	vcpu = vsched_vcpu(&default_vsched, pcpu);
+	/* current VCPU busy, continue */
+	if (cur_vcpu == vcpu && vcpu->active)
+		return cur_vcpu;
+	/* current is idle and nothing to run, keep idle */
+	if (vcpu_vsched(cur_vcpu) == &idle_vsched && !vcpu->active)
+		return cur_vcpu;
+
+	/* need to switch to idle... */
+	if (cur_vcpu == vcpu) {
+		spin_lock(&fairsched_lock);
+		goto idle;
+	}
+
+	/* ... and from idle */
+	spin_lock(&fairsched_lock);
+	__vcpu_get(vcpu);
+	goto done_noswitch;
+}
+
+int vcpu_online(int cpu)
+{
+	return cpu_isset(cpu, vsched_vcpu_online_map(this_vsched()));
+}
+#else /* CONFIG_SCHED_VCPU */
+
+#define set_task_vsched(task, vsched)		do { } while (0)
+
+static inline void vcpu_attach(struct rq *rq)
+{
+}
+
+static inline void vcpu_detach(struct rq *rq)
+{
+}
+
+static inline void vcpu_put(vcpu_t vcpu)
+{
+}
+
+static inline vcpu_t schedule_vcpu(vcpu_t prev_vcpu, cycles_t cycles)
+{
+	return prev_vcpu;
+}
+
+static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
+{
+	set_task_pcpu(p, vcpu->id);
+}
+
+#endif /* CONFIG_SCHED_VCPU */
 
-#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-#define this_rq()		(&__get_cpu_var(runqueues))
-#define task_rq(p)		cpu_rq(task_cpu(p))
-#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 
 #ifndef prepare_arch_switch
 # define prepare_arch_switch(next)	do { } while (0)
@@ -301,6 +927,34 @@ static inline int cpu_of(struct rq *rq)
 # define finish_arch_switch(prev)	do { } while (0)
 #endif
 
+#ifdef CONFIG_SMP
+static struct percpu_data kstat_lat_pcpu_stats;
+static struct percpu_data kstat_alat_pcpu_stats[KSTAT_ALLOCSTAT_NR];
+#endif
+static struct kstat_lat_pcpu_snap_struct kstat_lat_pcpu_stats_data[NR_CPUS];
+static struct kstat_lat_pcpu_snap_struct kstat_alat_pcpu_stats_data[KSTAT_ALLOCSTAT_NR][NR_CPUS];
+struct kernel_stat_glob kstat_glob;
+
+spinlock_t kstat_glb_lock = SPIN_LOCK_UNLOCKED;
+EXPORT_SYMBOL(kstat_glob);
+EXPORT_SYMBOL(kstat_glb_lock);
+
+static inline void finish_vsched_switch(struct rq *rq, vcpu_t prev_vcpu)
+{
+	vcpu_t vcpu;
+
+	vcpu = rq_vcpu(rq);
+	if (prev_vcpu != vcpu) {
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+		local_irq_disable();
+		vcpu_put(prev_vcpu);
+		local_irq_enable();
+#else
+		vcpu_put(prev_vcpu);
+#endif
+	}
+}
+
 #ifndef __ARCH_WANT_UNLOCKED_CTXSW
 static inline int task_running(struct rq *rq, struct task_struct *p)
 {
@@ -313,6 +967,7 @@ static inline void prepare_lock_switch(s
 
 static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
+	vcpu_t prev_vcpu;
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
 	rq->lock.owner = current;
@@ -324,7 +979,10 @@ static inline void finish_lock_switch(st
 	 */
 	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
 
-	spin_unlock_irq(&rq->lock);
+	prev_vcpu = task_vcpu(prev);
+	spin_unlock(&rq->lock);
+	finish_vsched_switch(rq, prev_vcpu);
+	local_irq_enable();
 }
 
 #else /* __ARCH_WANT_UNLOCKED_CTXSW */
@@ -356,6 +1014,8 @@ static inline void prepare_lock_switch(s
 
 static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
+	/* vcpu_put() should be done before setting prev->oncpu = 0 */
+	finish_vsched_switch(rq, task_vcpu(prev));
 #ifdef CONFIG_SMP
 	/*
 	 * After ->oncpu is cleared, the task can be moved to a different CPU.
@@ -378,71 +1038,295 @@ static inline void finish_lock_switch(st
 static inline struct rq *__task_rq_lock(struct task_struct *p)
 	__acquires(rq->lock)
 {
-	struct rq *rq;
+	struct rq *rq;
+
+repeat_lock_task:
+	rq = task_rq(p);
+	spin_lock(&rq->lock);
+	if (unlikely(rq != task_rq(p))) {
+		spin_unlock(&rq->lock);
+		goto repeat_lock_task;
+	}
+	return rq;
+}
+
+/*
+ * task_rq_lock - lock the runqueue a given task resides on and disable
+ * interrupts.  Note the ordering: we can safely lookup the task_rq without
+ * explicitly disabling preemption.
+ */
+static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+repeat_lock_task:
+	local_irq_save(*flags);
+	rq = task_rq(p);
+	spin_lock(&rq->lock);
+	if (unlikely(rq != task_rq(p))) {
+		spin_unlock_irqrestore(&rq->lock, *flags);
+		goto repeat_lock_task;
+	}
+	return rq;
+}
+
+static inline void __task_rq_unlock(struct rq *rq)
+	__releases(rq->lock)
+{
+	spin_unlock(&rq->lock);
+}
+
+static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
+	__releases(rq->lock)
+{
+	spin_unlock_irqrestore(&rq->lock, *flags);
+}
+
+#ifdef CONFIG_VE
+
+#define cycles_after(a, b)	((long long)(b) - (long long)(a) < 0)
+
+static cycles_t __ve_sched_get_idle_time(struct ve_struct *ve,
+		vcpu_t vcpu, int cpu)
+{
+	struct ve_cpu_stats *ve_stat;
+	unsigned v;
+	cycles_t strt, ret, cycles;
+
+	ve_stat = VE_CPU_STATS(ve, cpu);
+	do {
+		v = read_seqcount_begin(&ve_stat->stat_lock);
+		ret = ve_stat->idle_time;
+		strt = ve_stat->strt_idle_time;
+		if (strt && vcpu &&
+				atomic_read(&vcpu_rq(vcpu)->nr_iowait) == 0) {
+			cycles = get_cycles();
+			if (cycles_after(cycles, strt))
+				ret += cycles - strt;
+		}
+	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
+	return ret;
+}
+
+cycles_t ve_sched_get_idle_time(int cpu)
+{
+	return __ve_sched_get_idle_time(get_exec_env(),
+			vsched_vcpu(this_vsched(), cpu), cpu);
+}
+EXPORT_SYMBOL(ve_sched_get_idle_time);
+
+cycles_t ve_sched_get_idle_time_total(struct ve_struct *ve)
+{
+	struct vcpu_scheduler *vsched;
+	cycles_t res = 0;
+	int cpu;
+
+	spin_lock_irq(&fairsched_lock);
+
+	/**
+	 * VE init task very likely inside VE vsched,
+	 * except if abnormal VE shutdown in progress.
+	 * No reason to care about idle time accounting accuracy in this case.
+	 */
+	vsched = task_vsched(ve->init_entry);
+	if (vsched_id(vsched) == VEID(ve))
+		for_each_cpu_mask(cpu, vsched_vcpu_online_map(vsched))
+			res += __ve_sched_get_idle_time(ve,
+					vsched_vcpu(vsched, cpu), cpu);
+	else
+		for_each_online_cpu(cpu)
+			res += __ve_sched_get_idle_time(ve, NULL, cpu);
+
+	spin_unlock_irq(&fairsched_lock);
+
+	return res;
+}
+EXPORT_SYMBOL(ve_sched_get_idle_time_total);
+
+cycles_t ve_sched_get_iowait_time(int cpu)
+{
+	struct ve_struct *ve;
+	struct ve_cpu_stats *ve_stat;
+	unsigned v;
+	cycles_t strt, ret, cycles;
+	vcpu_t vcpu;
+
+	preempt_disable();
+	ret = 0;
+	vcpu = vsched_vcpu(this_vsched(), cpu);
+	if (!vcpu)
+		goto done;
+
+	ve = get_exec_env();
+	ve_stat = VE_CPU_STATS(ve, cpu);
+	do {
+		struct rq *rq;
+		rq = vcpu_rq(vcpu);
+		v = read_seqcount_begin(&ve_stat->stat_lock);
+		ret = ve_stat->iowait_time;
+		strt = ve_stat->strt_idle_time;
+		if (strt && atomic_read(&rq->nr_iowait) > 0) {
+			cycles = get_cycles();
+			if (cycles_after(cycles, strt))
+				ret += cycles - strt;
+		}
+	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
+done:
+	preempt_enable();
+	return ret;
+}
+
+EXPORT_SYMBOL(ve_sched_get_iowait_time);
+
+static inline void ve_stop_idle(struct ve_struct *ve,
+		vcpu_t vcpu, cycles_t cycles)
+{
+	struct ve_cpu_stats *ve_stat;
+
+	ve_stat = VE_CPU_STATS(ve, vcpu->id);
+
+	write_seqcount_begin(&ve_stat->stat_lock);
+	if (ve_stat->strt_idle_time) {
+		if (cycles_after(cycles, ve_stat->strt_idle_time)) {
+			if (atomic_read(&vcpu_rq(vcpu)->nr_iowait) == 0)
+				ve_stat->idle_time += cycles -
+					ve_stat->strt_idle_time;
+			else
+				ve_stat->iowait_time += cycles - 
+					ve_stat->strt_idle_time;
+		}
+		ve_stat->strt_idle_time = 0;
+	}
+	write_seqcount_end(&ve_stat->stat_lock);
+}
+
+static inline void ve_strt_idle(struct ve_struct *ve,
+		unsigned int cpu, cycles_t cycles)
+{
+	struct ve_cpu_stats *ve_stat;
+
+	ve_stat = VE_CPU_STATS(ve, cpu);
+
+	write_seqcount_begin(&ve_stat->stat_lock);
+	ve_stat->strt_idle_time = cycles;
+	write_seqcount_end(&ve_stat->stat_lock);
+}
+
+void ve_sched_attach(struct ve_struct *envid)
+{
+	struct task_struct *tsk;
+	unsigned int cpu;
 
-repeat_lock_task:
-	rq = task_rq(p);
-	spin_lock(&rq->lock);
-	if (unlikely(rq != task_rq(p))) {
-		spin_unlock(&rq->lock);
-		goto repeat_lock_task;
-	}
-	return rq;
+	tsk = current;
+	preempt_disable();
+	cpu = task_cpu(tsk);
+	preempt_enable();
 }
+EXPORT_SYMBOL(ve_sched_attach);
 
-/*
- * task_rq_lock - lock the runqueue a given task resides on and disable
- * interrupts.  Note the ordering: we can safely lookup the task_rq without
- * explicitly disabling preemption.
- */
-static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
-	__acquires(rq->lock)
+static inline void write_wakeup_stamp(struct task_struct *p, cycles_t cyc)
 {
-	struct rq *rq;
+	struct ve_task_info *ti;
 
-repeat_lock_task:
-	local_irq_save(*flags);
-	rq = task_rq(p);
-	spin_lock(&rq->lock);
-	if (unlikely(rq != task_rq(p))) {
-		spin_unlock_irqrestore(&rq->lock, *flags);
-		goto repeat_lock_task;
-	}
-	return rq;
+	ti = VE_TASK_INFO(p);
+	write_seqcount_begin(&ti->wakeup_lock);
+	ti->wakeup_stamp = cyc;
+	write_seqcount_end(&ti->wakeup_lock);
 }
 
-static inline void __task_rq_unlock(struct rq *rq)
-	__releases(rq->lock)
+static inline void update_sched_lat(struct task_struct *t, cycles_t cycles)
 {
-	spin_unlock(&rq->lock);
+	int cpu;
+	cycles_t ve_wstamp;
+
+	/* safe due to runqueue lock */
+	cpu = smp_processor_id();
+	ve_wstamp = t->ve_task_info.wakeup_stamp;
+
+	if (ve_wstamp && cycles > ve_wstamp) {
+		KSTAT_LAT_PCPU_ADD(&kstat_glob.sched_lat,
+				cpu, cycles - ve_wstamp);
+		KSTAT_LAT_PCPU_ADD(&t->ve_task_info.exec_env->sched_lat_ve,
+				cpu, cycles - ve_wstamp);
+	}
 }
 
-static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
-	__releases(rq->lock)
+static inline void update_ve_task_info(struct task_struct *prev,
+		cycles_t cycles)
 {
-	spin_unlock_irqrestore(&rq->lock, *flags);
+	if (prev != this_pcpu()->idle) {
+		VE_CPU_STATS(prev->ve_task_info.owner_env,
+				smp_processor_id())->used_time +=
+			cycles - prev->ve_task_info.sched_time;
+
+		prev->ve_task_info.sched_time = cycles;
+	}
 }
+#else /* CONFIG_VE */
+#define update_ve_task_info(prev, cycles)	do { } while (0)
+#define ve_stop_idle(ve, vcpu, cycles)		do { } while (0)
+#define ve_strt_idle(ve, cpu, cycles)		do { } while (0)
+#endif /* CONFIG_VE */
+
+struct task_nrs_struct {
+	long nr_running;
+	long nr_unint;
+	long nr_stopped;
+	long nr_sleeping;
+	atomic_t nr_iowait;
+	long long nr_switches;
+} ____cacheline_aligned_in_smp;
 
+static struct task_nrs_struct glob_task_nrs[NR_CPUS];
+#define nr_running_inc(cpu)	do { glob_task_nrs[cpu].nr_running++; } while (0)
+#define nr_running_dec(cpu)	do { glob_task_nrs[cpu].nr_running--; } while (0)
+#define nr_unint_inc(cpu)	do { glob_task_nrs[cpu].nr_unint++; } while (0)
+#define nr_unint_dec(cpu)	do { glob_task_nrs[cpu].nr_unint--; } while (0)
+#define nr_stopped_inc(cpu)	do { glob_task_nrs[cpu].nr_stopped++; } while (0)
+#define nr_stopped_dec(cpu)	do { glob_task_nrs[cpu].nr_stopped--; } while (0)
+#define nr_sleeping_inc(cpu)	do { glob_task_nrs[cpu].nr_sleeping++; } while (0)
+#define nr_sleeping_dec(cpu)	do { glob_task_nrs[cpu].nr_sleeping--; } while (0)
+#define nr_iowait_inc(cpu)	do {				\
+		atomic_inc(&glob_task_nrs[cpu].nr_iowait);	\
+	} while (0)
+#define nr_iowait_dec(cpu)	do {				\
+		atomic_dec(&glob_task_nrs[cpu].nr_iowait);	\
+	} while (0)
+
+
+unsigned long nr_zombie = 0;   /* protected by tasklist_lock */
+EXPORT_SYMBOL(nr_zombie);
+
+atomic_t nr_dead = ATOMIC_INIT(0);
+EXPORT_SYMBOL(nr_dead);
+ 
 #ifdef CONFIG_SCHEDSTATS
+
 /*
  * bump this up when changing the output format or the meaning of an existing
  * format, so that tools can adapt (or abort)
  */
 #define SCHEDSTAT_VERSION 12
 
-static int show_schedstat(struct seq_file *seq, void *v)
+static int show_schedstat_vsched(struct seq_file *seq,
+		struct vcpu_scheduler *vsched)
 {
 	int cpu;
 
-	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
-	seq_printf(seq, "timestamp %lu\n", jiffies);
-	for_each_online_cpu(cpu) {
-		struct rq *rq = cpu_rq(cpu);
+	seq_printf(seq, "vsched%d\n", vsched->id);
+
+	for_each_cpu_mask (cpu, vsched_vcpu_online_map(vsched)) {
+		vcpu_t vcpu;
+		struct rq *rq;
 #ifdef CONFIG_SMP
 		struct sched_domain *sd;
 		int dcnt = 0;
 #endif
 
+		vcpu = vsched_vcpu(vsched, cpu);
+		rq = vcpu_rq(vcpu);
+
 		/* runqueue-specific stats */
 		seq_printf(seq,
 		    "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
@@ -458,7 +1342,17 @@ static int show_schedstat(struct seq_fil
 #ifdef CONFIG_SMP
 		/* domain-specific stats */
 		preempt_disable();
-		for_each_domain(cpu, sd) {
+
+		/*
+		 * We can't use vcpu_rq(cpu)->sd here, because it belongs
+		 * to the last used physical CPU. And you'll get different
+		 * domain statistics each time for the same VCPU,
+		 * so it will be impossible to analyze it and parse by any
+		 * tool.
+		 *
+		 * Just use physical domains here..
+		 */
+		for_each_pdomain(pcpu(cpu)->sd, sd) {
 			enum idle_type itype;
 			char mask_str[NR_CPUS];
 
@@ -488,6 +1382,20 @@ static int show_schedstat(struct seq_fil
 	return 0;
 }
 
+static int show_schedstat(struct seq_file *seq, void *v)
+{
+	struct vcpu_scheduler *vsched;
+
+	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
+	seq_printf(seq, "timestamp %lu\n", jiffies);
+
+	spin_lock_irq(&vsched_list_lock);
+	list_for_each_entry (vsched, &vsched_list, list)
+		show_schedstat_vsched(seq, vsched);
+	spin_unlock_irq(&vsched_list_lock);
+	return 0;
+}
+
 static int schedstat_open(struct inode *inode, struct file *file)
 {
 	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
@@ -645,17 +1553,18 @@ static inline void sched_info_depart(str
 static inline void
 __sched_info_switch(struct task_struct *prev, struct task_struct *next)
 {
-	struct rq *rq = task_rq(prev);
+	int cpu;
+	cpu = smp_processor_id();
 
 	/*
 	 * prev now departs the cpu.  It's not interesting to record
 	 * stats about how efficient we were at scheduling the idle
 	 * process, however.
 	 */
-	if (prev != rq->idle)
+	if (prev != idle_task(cpu))
 		sched_info_depart(prev);
 
-	if (next != rq->idle)
+	if (next != idle_task(cpu))
 		sched_info_arrive(next);
 }
 static inline void
@@ -844,12 +1753,25 @@ static int effective_prio(struct task_st
 static void __activate_task(struct task_struct *p, struct rq *rq)
 {
 	struct prio_array *target = rq->active;
-
+	cycles_t cycles;
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+
+	cycles = get_cycles();
+	write_wakeup_stamp(p, cycles);
+	p->ve_task_info.sleep_time += cycles;
+	ve = VE_TASK_INFO(p)->owner_env;
+#endif
 	trace_activate_task(p, rq);
 	if (batch_task(p))
 		target = rq->expired;
 	enqueue_task(p, target);
 	inc_nr_running(p, rq);
+	nr_running_inc(smp_processor_id());
+	if (rq->nr_running == 1) {
+		ve_stop_idle(ve, task_vcpu(p), cycles);
+		vcpu_attach(rq);
+	}
 }
 
 /*
@@ -984,12 +1906,52 @@ static void activate_task(struct task_st
 /*
  * deactivate_task - remove a task from the runqueue.
  */
-static void deactivate_task(struct task_struct *p, struct rq *rq)
+static void __deactivate_task(struct task_struct *p, struct rq *rq)
 {
+	cycles_t cycles;
+	unsigned int pcpu;
+#ifdef CONFIG_VE
+	unsigned int cpu;
+	struct ve_struct *ve;
+
+	cycles = get_cycles();
+	cpu = task_cpu(p);
+
+	ve = p->ve_task_info.owner_env;
+
+	p->ve_task_info.sleep_time -= cycles;
+#endif
+	pcpu = smp_processor_id();
+
+	nr_running_dec(pcpu);
 	dec_nr_running(p, rq);
 	dequeue_task(p, p->array);
 	trace_deactivate_task(p, rq);
 	p->array = NULL;
+	if (rq->nr_running == 0) {
+		ve_strt_idle(ve, cpu, cycles);
+		vcpu_detach(rq);
+	}
+}
+
+static void deactivate_task(struct task_struct *p, struct rq *rq)
+{
+	unsigned int pcpu = smp_processor_id();
+
+	if (p->state == TASK_UNINTERRUPTIBLE) {
+		rq->nr_uninterruptible++;
+		nr_unint_inc(pcpu);
+	}
+	if (p->state == TASK_INTERRUPTIBLE) {
+		rq->nr_sleeping++;
+		nr_sleeping_inc(pcpu);
+	}
+	if (p->state == TASK_STOPPED) {
+		rq->nr_stopped++;
+		nr_stopped_inc(pcpu);
+	}
+
+	__deactivate_task(p, rq);
 }
 
 /*
@@ -1005,18 +1967,22 @@ static void deactivate_task(struct task_
 #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
 #endif
 
+/* FIXME: need to add vsched arg */
 static void resched_task(struct task_struct *p)
 {
 	int cpu;
 
+#if 0
+	/* FIXME: this fails due to idle rq->curre == idle */
 	assert_spin_locked(&task_rq(p)->lock);
+#endif
 
 	if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
 		return;
 
 	set_tsk_thread_flag(p, TIF_NEED_RESCHED);
 
-	cpu = task_cpu(p);
+	cpu = task_pcpu(p);
 	if (cpu == smp_processor_id())
 		return;
 
@@ -1028,7 +1994,10 @@ static void resched_task(struct task_str
 #else
 static inline void resched_task(struct task_struct *p)
 {
+#if 0
+	/* FIXME: this fails due to idle rq->curre == idle */
 	assert_spin_locked(&task_rq(p)->lock);
+#endif
 	set_tsk_need_resched(p);
 }
 #endif
@@ -1039,21 +2008,40 @@ static inline void resched_task(struct t
  */
 inline int task_curr(const struct task_struct *p)
 {
-	return cpu_curr(task_cpu(p)) == p;
+	return task_rq(p)->curr == p;
 }
+EXPORT_SYMBOL(task_curr);
 
-/* Used instead of source_load when we know the type == 0 */
-unsigned long weighted_cpuload(const int cpu)
+/**
+ * idle_cpu - is a given cpu idle currently?
+ * @cpu: the processor in question.
+ */
+inline int idle_cpu(int cpu)
 {
-	return cpu_rq(cpu)->raw_weighted_load;
+#ifdef CONFIG_SCHED_VCPU
+	return pcpu(cpu)->vsched == &idle_vsched;
+#else
+	return vcpu_rq(pcpu(cpu)->vcpu)->curr == pcpu(cpu)->idle;
+#endif
 }
 
-#ifdef CONFIG_SMP
+EXPORT_SYMBOL_GPL(idle_cpu);
+
+static inline int idle_vcpu(vcpu_t cpu)
+{
+#ifdef CONFIG_SCHED_VCPU
+	return !cpu->active;
+#else
+	return idle_cpu(cpu->id);
+#endif
+}
+
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
 struct migration_req {
 	struct list_head list;
 
 	struct task_struct *task;
-	int dest_cpu;
+	vcpu_t dest_cpu;
 
 	struct completion done;
 };
@@ -1063,7 +2051,7 @@ struct migration_req {
  * Returns true if you have to wait for migration thread.
  */
 static int
-migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
+migrate_task(struct task_struct *p, vcpu_t dest_cpu, struct migration_req *req)
 {
 	struct rq *rq = task_rq(p);
 
@@ -1071,8 +2059,13 @@ migrate_task(struct task_struct *p, int 
 	 * If the task is not on a runqueue (and not running), then
 	 * it is sufficient to simply update the task's cpu field.
 	 */
+#ifdef CONFIG_SCHED_VCPU
+	BUG_ON(task_vsched(p) == &idle_vsched);
+	BUG_ON(vcpu_vsched(dest_cpu) == &idle_vsched);
+#endif
 	if (!p->array && !task_running(rq, p)) {
-		set_task_cpu(p, dest_cpu);
+		set_task_vsched(p, vcpu_vsched(dest_cpu));
+		set_task_vcpu(p, dest_cpu);
 		return 0;
 	}
 
@@ -1113,6 +2106,7 @@ repeat:
 	}
 	task_rq_unlock(rq, &flags);
 }
+EXPORT_SYMBOL_GPL(wait_task_inactive);
 
 /***
  * kick_process - kick a running thread to enter/exit the kernel
@@ -1132,12 +2126,18 @@ void kick_process(struct task_struct *p)
 	int cpu;
 
 	preempt_disable();
-	cpu = task_cpu(p);
+	cpu = task_pcpu(p);
 	if ((cpu != smp_processor_id()) && task_curr(p))
+		/* FIXME: ??? think over */
+		/* should add something like get_pcpu(cpu)->vcpu->id == task_cpu(p),
+		   but with serialization of vcpu access... */
 		smp_send_reschedule(cpu);
 	preempt_enable();
 }
 
+#endif
+
+#ifdef CONFIG_SMP
 /*
  * Return a low guess at the load of a migration-source cpu weighted
  * according to the scheduling class and "nice" value.
@@ -1145,9 +2145,9 @@ void kick_process(struct task_struct *p)
  * We want to under-estimate the load of migration sources, to
  * balance conservatively.
  */
-static inline unsigned long source_load(int cpu, int type)
+static inline unsigned long source_load(vcpu_t cpu, int type)
 {
-	struct rq *rq = cpu_rq(cpu);
+	struct rq *rq = vcpu_rq(cpu);
 
 	if (type == 0)
 		return rq->raw_weighted_load;
@@ -1159,9 +2159,9 @@ static inline unsigned long source_load(
  * Return a high guess at the load of a migration-target cpu weighted
  * according to the scheduling class and "nice" value.
  */
-static inline unsigned long target_load(int cpu, int type)
+static inline unsigned long target_load(vcpu_t cpu, int type)
 {
-	struct rq *rq = cpu_rq(cpu);
+	struct rq *rq = vcpu_rq(cpu);
 
 	if (type == 0)
 		return rq->raw_weighted_load;
@@ -1172,9 +2172,9 @@ static inline unsigned long target_load(
 /*
  * Return the average load per task on the cpu's run queue
  */
-static inline unsigned long cpu_avg_load_per_task(int cpu)
+static inline unsigned long cpu_avg_load_per_task(vcpu_t vcpu)
 {
-	struct rq *rq = cpu_rq(cpu);
+	struct rq *rq = vcpu_rq(vcpu);
 	unsigned long n = rq->nr_running;
 
 	return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
@@ -1185,33 +2185,35 @@ static inline unsigned long cpu_avg_load
  * domain.
  */
 static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
+find_idlest_group(struct sched_domain *sd, struct task_struct *p, vcpu_t this_cpu)
 {
 	struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long min_load = ULONG_MAX, this_load = 0;
 	int load_idx = sd->forkexec_idx;
 	int imbalance = 100 + (sd->imbalance_pct-100)/2;
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	int this_pcpu;
 
+	vsched = vcpu_vsched(this_cpu);
+	this_pcpu = vcpu_last_pcpu(this_cpu);
 	do {
 		unsigned long load, avg_load;
 		int local_group;
 		int i;
 
-		/* Skip over this group if it has no CPUs allowed */
-		if (!cpus_intersects(group->cpumask, p->cpus_allowed))
-			goto nextgroup;
-
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpu_isset(this_pcpu, group->cpumask);
 
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
 
 		for_each_cpu_mask(i, group->cpumask) {
+			vcpu = pcpu(i)->vcpu;
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
-				load = source_load(i, load_idx);
+				load = source_load(vcpu, load_idx);
 			else
-				load = target_load(i, load_idx);
+				load = target_load(vcpu, load_idx);
 
 			avg_load += load;
 		}
@@ -1228,7 +2230,6 @@ find_idlest_group(struct sched_domain *s
 			min_load = avg_load;
 			idlest = group;
 		}
-nextgroup:
 		group = group->next;
 	} while (group != sd->groups);
 
@@ -1237,26 +2238,45 @@ nextgroup:
 	return idlest;
 }
 
+/* Used instead of source_load when we know the type == 0 */
+static unsigned long weighted_cpuload(vcpu_t vcpu)
+{
+	return vcpu_rq(vcpu)->raw_weighted_load;
+}
+
 /*
  * find_idlest_queue - find the idlest runqueue among the cpus in group.
  */
-static int
-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+static vcpu_t 
+find_idlest_cpu(struct sched_group *group, struct task_struct *p, vcpu_t this_cpu)
 {
-	cpumask_t tmp;
 	unsigned long load, min_load = ULONG_MAX;
-	int idlest = -1;
+	cpumask_t vmask;
+	struct vcpu_scheduler *vsched;
+	vcpu_t idlest = (vcpu_t)-1;
+	vcpu_t vcpu;
 	int i;
+	unsigned long time;
+
+	time = jiffies - msecs_to_jiffies(vcpu_hot_timeslice);
+	vsched = vcpu_vsched(this_cpu);
+	BUG_ON(vsched != task_vsched(p));
+
+	cpus_and(vmask, vsched_vcpu_online_map(vsched), p->cpus_allowed);
+	for_each_cpu_mask(i, vmask) {
+		vcpu = vsched_vcpu(vsched, i);
 
-	/* Traverse only the allowed CPUs */
-	cpus_and(tmp, group->cpumask, p->cpus_allowed);
+		if (time_after_eq(vcpu->stop_time, time) &&
+				!cpu_isset(vcpu_last_pcpu(vcpu), group->cpumask))
+			continue;
+		if (vcpu_is_offline(vcpu))
+			continue;
 
-	for_each_cpu_mask(i, tmp) {
-		load = weighted_cpuload(i);
+		load = weighted_cpuload(vcpu);
 
-		if (load < min_load || (load == min_load && i == this_cpu)) {
+		if (load < min_load || (load == min_load && vcpu == this_cpu)) {
 			min_load = load;
-			idlest = i;
+			idlest = vcpu;
 		}
 	}
 
@@ -1274,7 +2294,7 @@ find_idlest_cpu(struct sched_group *grou
  *
  * preempt must be disabled.
  */
-static int sched_balance_self(int cpu, int flag)
+static vcpu_t sched_balance_self(vcpu_t cpu, int flag)
 {
 	struct task_struct *t = current;
 	struct sched_domain *tmp, *sd = NULL;
@@ -1292,7 +2312,7 @@ static int sched_balance_self(int cpu, i
 	while (sd) {
 		cpumask_t span;
 		struct sched_group *group;
-		int new_cpu;
+		vcpu_t new_cpu;
 		int weight;
 
 		span = sd->span;
@@ -1301,7 +2321,7 @@ static int sched_balance_self(int cpu, i
 			goto nextlevel;
 
 		new_cpu = find_idlest_cpu(group, t, cpu);
-		if (new_cpu == -1 || new_cpu == cpu)
+		if (new_cpu == (vcpu_t)(-1) || new_cpu == cpu)
 			goto nextlevel;
 
 		/* Now try balancing at a lower domain level */
@@ -1335,10 +2355,11 @@ static inline int task_hot(struct task_s
  * Returns the CPU we should wake onto.
  */
 #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
-static int wake_idle(int cpu, struct task_struct *p)
+static vcpu_t wake_idle(vcpu_t cpu, struct task_struct *p)
 {
-	cpumask_t tmp;
+	cpumask_t vtmp;
 	struct sched_domain *sd;
+	struct vcpu_scheduler *vsched;
 	int i;
 	unsigned long long now;
 
@@ -1350,19 +2371,26 @@ static int wake_idle(int cpu, struct tas
 	 * pickup the extra load from this cpu and hence we need not check
 	 * sibling runqueue info. This will avoid the checks and cache miss
 	 * penalities associated with that.
+	 *
+	 * O_o huh?
 	 */
-	if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
+	if (idle_vcpu(cpu))
 		return cpu;
 
+	vsched = vcpu_vsched(cpu);
+	cpus_and(vtmp, vsched_vcpu_online_map(vsched), p->cpus_allowed);
 	now = sched_clock();
 	for_each_domain(cpu, sd) {
 		if ((sd->flags & SD_WAKE_IDLE)
 		    || ((sd->flags & SD_WAKE_IDLE_FAR)
 			&& !task_hot(p, now, sd))) {
-			cpus_and(tmp, sd->span, p->cpus_allowed);
-			for_each_cpu_mask(i, tmp) {
-				if (idle_cpu(i))
-					return i;
+			for_each_cpu_mask(i, vtmp) {
+				vcpu_t vcpu;
+				vcpu = vsched_vcpu(vsched, i);
+				if (!cpu_isset(vcpu_last_pcpu(vcpu), sd->span))
+					continue;
+				if (idle_vcpu(vcpu))
+					return vcpu;
 			}
 		}
 		else
@@ -1371,7 +2399,7 @@ static int wake_idle(int cpu, struct tas
 	return cpu;
 }
 #else
-static inline int wake_idle(int cpu, struct task_struct *p)
+static inline vcpu_t wake_idle(vcpu_t cpu, struct task_struct *p)
 {
 	return cpu;
 }
@@ -1392,9 +2420,16 @@ int set_cpus_allowed_ptr(struct task_str
 	unsigned long flags;
 	struct rq *rq;
 	int ret = 0;
+	struct vcpu_scheduler *vsched;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpus_intersects(*new_mask, cpu_online_map)) {
+	vsched = task_vsched(p);
+	if (!cpus_intersects(*new_mask, vsched_vcpu_online_map(vsched))) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (p->flags & PF_THREAD_BOUND) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1405,7 +2440,8 @@ int set_cpus_allowed_ptr(struct task_str
 	if (cpu_isset(task_cpu(p), *new_mask))
 		goto out;
 
-	if (migrate_task(p, any_online_cpu(*new_mask), &req)) {
+	if (migrate_task(p, vsched_vcpu(vsched, any_online_cpu(*new_mask)),
+				&req)) {
 		/* Need help from migration thread: drop lock and wait. */
 		task_rq_unlock(rq, &flags);
 		wake_up_process(rq->migration_thread);
@@ -1435,15 +2471,17 @@ out:
  */
 static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 {
-	int cpu, this_cpu, success = 0;
+	vcpu_t cpu, this_cpu;
+	int success = 0;
 	unsigned long flags;
 	long old_state;
 	struct rq *rq;
 #ifdef CONFIG_SMP
 	struct sched_domain *sd, *this_sd = NULL;
 	unsigned long load, this_load;
-	int new_cpu;
+	vcpu_t new_cpu;
 #endif
+	cpu = NULL;
 
 	rq = task_rq_lock(p, &flags);
 	old_state = p->state;
@@ -1453,8 +2491,8 @@ static int try_to_wake_up(struct task_st
 	if (p->array)
 		goto out_running;
 
-	cpu = task_cpu(p);
-	this_cpu = smp_processor_id();
+	cpu = task_vcpu(p);
+	this_cpu = this_vcpu();
 
 #ifdef CONFIG_SMP
 	if (unlikely(task_running(rq, p)))
@@ -1463,20 +2501,25 @@ static int try_to_wake_up(struct task_st
 	new_cpu = cpu;
 
 	schedstat_inc(rq, ttwu_cnt);
+	/* FIXME: add vsched->last_vcpu array to optimize wakeups in different vsched */
+	if (vcpu_vsched(cpu) != vcpu_vsched(this_cpu))
+		goto out_set_cpu;
 	if (cpu == this_cpu) {
 		schedstat_inc(rq, ttwu_local);
 		goto out_set_cpu;
 	}
 
 	for_each_domain(this_cpu, sd) {
-		if (cpu_isset(cpu, sd->span)) {
+		if (cpu_isset(vcpu_last_pcpu(cpu), sd->span)) {
 			schedstat_inc(sd, ttwu_wake_remote);
 			this_sd = sd;
 			break;
 		}
 	}
 
-	if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
+	if (unlikely(!vcpu_isset(this_cpu, p->cpus_allowed)))
+		goto out_set_cpu;
+	if (vcpu_is_offline(this_cpu))
 		goto out_set_cpu;
 
 	/*
@@ -1534,7 +2577,7 @@ static int try_to_wake_up(struct task_st
 out_set_cpu:
 	new_cpu = wake_idle(new_cpu, p);
 	if (new_cpu != cpu) {
-		set_task_cpu(p, new_cpu);
+		set_task_vcpu(p, new_cpu);
 		task_rq_unlock(rq, &flags);
 		/* might preempt at this point */
 		rq = task_rq_lock(p, &flags);
@@ -1544,13 +2587,20 @@ out_set_cpu:
 		if (p->array)
 			goto out_running;
 
-		this_cpu = smp_processor_id();
-		cpu = task_cpu(p);
+		this_cpu = this_vcpu();
+		cpu = task_vcpu(p);
 	}
 
 out_activate:
 #endif /* CONFIG_SMP */
-	if (old_state == TASK_UNINTERRUPTIBLE) {
+	if (old_state == TASK_INTERRUPTIBLE) {
+		nr_sleeping_dec(smp_processor_id());
+		rq->nr_sleeping--;
+	} else if (old_state == TASK_STOPPED) {
+		nr_stopped_dec(smp_processor_id());
+		rq->nr_stopped--;
+	} else if (old_state == TASK_UNINTERRUPTIBLE) {
+		nr_unint_dec(smp_processor_id());
 		rq->nr_uninterruptible--;
 		/*
 		 * Tasks on involuntary sleep don't earn
@@ -1605,17 +2655,45 @@ int fastcall wake_up_state(struct task_s
 }
 
 /*
+ * init is special, it is forked from swapper (idle_vsched) and should
+ * belong to default_vsched, so we have to change it's vsched/fairsched manually
+ */
+static void wake_up_init(struct task_struct *p)
+{
+	struct rq *rq;
+	unsigned long flags;
+
+	/* we should change both fairsched node and vsched here */
+	set_task_vsched(p, &default_vsched);
+	set_task_cpu(p, raw_smp_processor_id());
+
+	/*
+	 * can't call wake_up_new_task() directly here,
+	 * since it assumes that a child belongs to the same vsched
+	 */
+	p->state = TASK_RUNNING;
+	p->sleep_avg = 0;
+	p->prio = effective_prio(p);
+
+	rq = task_rq_lock(p, &flags);
+	__activate_task(p, rq);
+	task_rq_unlock(rq, &flags);
+}
+
+/*
  * Perform scheduler related setup for a newly forked process p.
  * p is forked by current.
  */
 void fastcall sched_fork(struct task_struct *p, int clone_flags)
 {
-	int cpu = get_cpu();
-
+	vcpu_t cpu;
+       
+	preempt_disable();
+	cpu = this_vcpu();
 #ifdef CONFIG_SMP
 	cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
 #endif
-	set_task_cpu(p, cpu);
+	set_task_vcpu(p, cpu);
 
 	/*
 	 * We mark the process as running here, but have not actually
@@ -1657,6 +2735,10 @@ void fastcall sched_fork(struct task_str
 	p->first_time_slice = current->pid;
 	current->time_slice >>= 1;
 	p->timestamp = sched_clock();
+#ifdef CONFIG_VE
+	/*cosmetic: sleep till wakeup below*/
+	p->ve_task_info.sleep_time -= get_cycles();
+#endif
 	if (unlikely(!current->time_slice)) {
 		/*
 		 * This case is rare, it happens when the parent has only
@@ -1667,7 +2749,7 @@ void fastcall sched_fork(struct task_str
 		scheduler_tick();
 	}
 	local_irq_enable();
-	put_cpu();
+	preempt_enable();
 }
 
 /*
@@ -1681,12 +2763,19 @@ void fastcall wake_up_new_task(struct ta
 {
 	struct rq *rq, *this_rq;
 	unsigned long flags;
-	int this_cpu, cpu;
+	vcpu_t this_cpu, cpu;
+
+	if (unlikely(p->pid == 1)) {
+		/* FIXME - fastpath */
+		wake_up_init(p);
+		return;
+	}
 
 	rq = task_rq_lock(p, &flags);
 	BUG_ON(p->state != TASK_RUNNING);
-	this_cpu = smp_processor_id();
-	cpu = task_cpu(p);
+	BUG_ON(task_vsched(current) != task_vsched(p));
+	this_cpu = this_vcpu();
+	cpu = task_vcpu(p);
 
 	/*
 	 * We decrease the sleep average of forking parents
@@ -1716,6 +2805,7 @@ void fastcall wake_up_new_task(struct ta
 				p->array = current->array;
 				p->array->nr_active++;
 				inc_nr_running(p, rq);
+				nr_running_inc(smp_processor_id());
 			}
 			set_need_resched();
 		} else
@@ -1729,7 +2819,7 @@ void fastcall wake_up_new_task(struct ta
 		 */
 		this_rq = rq;
 	} else {
-		this_rq = cpu_rq(this_cpu);
+		this_rq = vcpu_rq(this_cpu);
 
 		/*
 		 * Not the local CPU - must adjust timestamp. This should
@@ -1774,8 +2864,8 @@ void fastcall sched_exit(struct task_str
 	 */
 
         if (p->first_time_slice) {
-                creator = find_task_by_pid((pid_t)p->first_time_slice);
-                if (creator && task_cpu(p) == task_cpu(creator)) {
+                creator = find_task_by_pid_all((pid_t)p->first_time_slice);
+                if (creator && task_vcpu(p) == task_vcpu(creator)) {
                         rq = task_rq_lock(creator, &flags);
                         creator->time_slice += p->time_slice;
                         if (unlikely(creator->time_slice > task_timeslice(p)))
@@ -1989,6 +3079,7 @@ static inline void finish_task_switch(st
 	finish_arch_switch(prev);
 	finish_lock_switch(rq, prev);
 	fire_sched_in_preempt_notifiers(current);
+
 	if (mm)
 		mmdrop(mm);
 	if (unlikely(prev_task_flags & PF_DEAD)) {
@@ -2016,8 +3107,9 @@ asmlinkage void schedule_tail(struct tas
 	preempt_enable();
 #endif
 	if (current->set_child_tid)
-		put_user(current->pid, current->set_child_tid);
+		put_user(virt_pid(current), current->set_child_tid);
 }
+EXPORT_SYMBOL_GPL(schedule_tail);
 
 /*
  * context_switch - switch to the new MM and the new
@@ -2069,20 +3161,26 @@ context_switch(struct rq *rq, struct tas
  */
 unsigned long nr_running(void)
 {
-	unsigned long i, sum = 0;
+	unsigned long i, sum;
 
+	sum = 0;
 	for_each_online_cpu(i)
-		sum += cpu_rq(i)->nr_running;
+		sum += glob_task_nrs[i].nr_running;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
 
 	return sum;
 }
+EXPORT_SYMBOL(nr_running);
 
 unsigned long nr_uninterruptible(void)
 {
-	unsigned long i, sum = 0;
-
-	for_each_possible_cpu(i)
-		sum += cpu_rq(i)->nr_uninterruptible;
+	unsigned long i, sum;
+	
+	sum = 0;
+	for_each_online_cpu(i)
+		sum += glob_task_nrs[i].nr_unint;
 
 	/*
 	 * Since we read the counters lockless, it might be slightly
@@ -2094,24 +3192,34 @@ unsigned long nr_uninterruptible(void)
 	return sum;
 }
 
+EXPORT_SYMBOL(nr_uninterruptible);
+
 unsigned long long nr_context_switches(void)
 {
 	int i;
-	unsigned long long sum = 0;
-
-	for_each_possible_cpu(i)
-		sum += cpu_rq(i)->nr_switches;
+	unsigned long long sum;
+	
+	sum = 0;
+	for_each_online_cpu(i)
+		sum += glob_task_nrs[i].nr_switches;
 
+	if (unlikely((long)sum < 0))
+		sum = 0;
 	return sum;
 }
 
+EXPORT_SYMBOL(nr_context_switches);
+
 unsigned long nr_iowait(void)
 {
-	unsigned long i, sum = 0;
-
-	for_each_possible_cpu(i)
-		sum += atomic_read(&cpu_rq(i)->nr_iowait);
+	unsigned long i, sum;
+	
+	sum = 0;
+	for_each_online_cpu(i)
+		sum += atomic_read(&glob_task_nrs[i].nr_iowait);
 
+	if (unlikely((long)sum < 0))
+		sum = 0;
 	return sum;
 }
 
@@ -2120,27 +3228,112 @@ unsigned long nr_active(void)
 	unsigned long i, running = 0, uninterruptible = 0;
 
 	for_each_online_cpu(i) {
-		running += cpu_rq(i)->nr_running;
-		uninterruptible += cpu_rq(i)->nr_uninterruptible;
+		running += glob_task_nrs[i].nr_running;
+		uninterruptible += glob_task_nrs[i].nr_unint;
 	}
 
 	if (unlikely((long)uninterruptible < 0))
 		uninterruptible = 0;
+	if (unlikely((long)running < 0))
+		running = 0;
 
 	return running + uninterruptible;
 }
 
-#ifdef CONFIG_SMP
+EXPORT_SYMBOL(nr_iowait);
 
-/*
- * Is this task likely cache-hot:
- */
-static inline int
-task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
+unsigned long nr_stopped(void)
 {
-	return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time;
+	unsigned long i, sum;
+
+	sum = 0;
+	for_each_online_cpu(i)
+		sum += glob_task_nrs[i].nr_stopped;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
+
+	return sum;
+}
+
+EXPORT_SYMBOL(nr_stopped);
+
+unsigned long nr_sleeping(void)
+{
+	unsigned long i, sum;
+
+	sum = 0;
+	for_each_online_cpu(i)
+		sum += glob_task_nrs[i].nr_sleeping;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
+
+	return sum;
+}
+
+EXPORT_SYMBOL(nr_sleeping);
+
+#ifdef CONFIG_VE
+unsigned long nr_running_vsched(struct vcpu_scheduler *vsched)
+{
+	unsigned long sum = 0;
+#ifdef CONFIG_SCHED_VCPU
+        int cpu;
+	for_each_cpu_mask(cpu, vsched_vcpu_online_map(vsched))
+		sum += vcpu_rq(vsched_vcpu(vsched, cpu))->nr_running;
+#endif
+	return sum;
+}
+
+EXPORT_SYMBOL(nr_running_vsched);
+
+unsigned int nr_unint_vsched(struct vcpu_scheduler *vsched)
+{
+	unsigned int sum = 0;
+#ifdef CONFIG_SCHED_VCPU
+        int cpu;
+	for_each_cpu_mask(cpu, vsched_vcpu_online_map(vsched))
+		sum += vcpu_rq(vsched_vcpu(vsched, cpu))->nr_uninterruptible;
+
+	sum += (unsigned int)atomic_read(&vsched->nr_unint_fixup);
+#endif
+	if (unlikely((int)sum < 0))
+		sum = 0;
+	return sum;
+}
+
+EXPORT_SYMBOL(nr_unint_vsched);
+
+unsigned long nr_iowait_ve(void)
+{
+	long sum = 0;
+
+#ifdef CONFIG_SCHED_VCPU
+	int i;
+	struct vcpu_scheduler *vsched;
+	vsched = this_vsched();
+	for_each_cpu_mask(i, vsched_vcpu_online_map(vsched)) {
+		struct rq *rq;
+
+		rq = vcpu_rq(vsched_vcpu(vsched, i));
+		sum += atomic_read(&rq->nr_iowait);
+	}
+#endif
+	return (unsigned long)(sum < 0 ? 0 : sum);
 }
 
+EXPORT_SYMBOL(nr_iowait_ve);
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
+/*
+ * This has calready hanged two times since 2.6.16 started, so
+ * let's keep generic rq_compare() to handle it next time
+ * SCHED_VCPU has many rq-s so somparing of their ->cpu-s
+ * doesn't work as expected.
+ */
+#define rq_compare(rq1, rq2)	(rq1 < rq2)
 /*
  * double_rq_lock - safely lock two runqueues
  *
@@ -2151,60 +3344,36 @@ static void double_rq_lock(struct rq *rq
 	__acquires(rq1->lock)
 	__acquires(rq2->lock)
 {
+	BUG_ON(!irqs_disabled());
 	if (rq1 == rq2) {
 		spin_lock(&rq1->lock);
 		__acquire(rq2->lock);	/* Fake it out ;) */
 	} else {
-		if (rq1 < rq2) {
+		if (rq_compare(rq1, rq2)) {
 			spin_lock(&rq1->lock);
 			spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
 		} else {
 			spin_lock(&rq2->lock);
 			spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
-		}
-	}
-}
-
-/*
- * double_rq_unlock - safely unlock two runqueues
- *
- * Note this does not restore interrupts like task_rq_unlock,
- * you need to do so manually after calling.
- */
-static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
-	__releases(rq1->lock)
-	__releases(rq2->lock)
-{
-	spin_unlock(&rq1->lock);
-	if (rq1 != rq2)
-		spin_unlock(&rq2->lock);
-	else
-		__release(rq2->lock);
-}
-
-/*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
- */
-static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
-	__releases(this_rq->lock)
-	__acquires(busiest->lock)
-	__acquires(this_rq->lock)
-{
-	if (unlikely(!spin_trylock(&busiest->lock))) {
-		if (busiest < this_rq) {
-			spin_unlock(&this_rq->lock);
-			spin_lock(&busiest->lock);
-			spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
-		} else
-			spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
+		}
 	}
 }
 
-static void inline double_unlock_balance(struct rq *this_rq, struct rq *busiest)
-	__releases(busiest->lock)
+/*
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+	__releases(rq1->lock)
+	__releases(rq2->lock)
 {
-	spin_unlock(&busiest->lock);
-	lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+	spin_unlock(&rq1->lock);
+	if (rq1 != rq2)
+		spin_unlock(&rq2->lock);
+	else
+		__release(rq2->lock);
 }
 
 /*
@@ -2213,15 +3382,15 @@ static void inline double_unlock_balance
  * allow dest_cpu, which will force the cpu onto dest_cpu.  Then
  * the cpu_allowed mask is restored.
  */
-static void sched_migrate_task(struct task_struct *p, int dest_cpu)
+static void sched_migrate_task(struct task_struct *p, vcpu_t dest_cpu)
 {
 	struct migration_req req;
 	unsigned long flags;
 	struct rq *rq;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpu_isset(dest_cpu, p->cpus_allowed)
-	    || unlikely(cpu_is_offline(dest_cpu)))
+	if (unlikely(!vcpu_isset(dest_cpu, p->cpus_allowed)
+	    || vcpu_is_offline(dest_cpu)))
 		goto out;
 
 	/* force the process onto the specified CPU */
@@ -2240,6 +3409,48 @@ static void sched_migrate_task(struct ta
 out:
 	task_rq_unlock(rq, &flags);
 }
+#endif
+
+#ifdef CONFIG_SMP
+
+/*
+ * Is this task likely cache-hot:
+ */
+static inline int
+task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
+{
+	return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time;
+}
+
+/*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
+	__releases(this_rq->lock)
+	__acquires(busiest->lock)
+	__acquires(this_rq->lock)
+{
+	if (unlikely(!irqs_disabled())) {
+		/* printk() doesn't work good under rq->lock */
+		spin_unlock(&this_rq->lock);
+		BUG_ON(1);
+	}
+	if (unlikely(!spin_trylock(&busiest->lock))) {
+		if (rq_compare(busiest, this_rq)) {
+			spin_unlock(&this_rq->lock);
+			spin_lock(&busiest->lock);
+			spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
+		} else
+			spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
+	}
+}
+
+static void inline double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+	__releases(busiest->lock)
+{
+	spin_unlock(&busiest->lock);
+	lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+}
 
 /*
  * sched_exec - execve() is a valuable balancing opportunity, because at
@@ -2247,9 +3458,12 @@ out:
  */
 void sched_exec(void)
 {
-	int new_cpu, this_cpu = get_cpu();
+	vcpu_t new_cpu, this_cpu;
+
+	preempt_disable();
+	this_cpu = this_vcpu();
 	new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC);
-	put_cpu();
+	preempt_enable();
 	if (new_cpu != this_cpu)
 		sched_migrate_task(current, new_cpu);
 }
@@ -2260,11 +3474,29 @@ void sched_exec(void)
  */
 static void pull_task(struct rq *src_rq, struct prio_array *src_array,
 		      struct task_struct *p, struct rq *this_rq,
-		      struct prio_array *this_array, int this_cpu)
+		      struct prio_array *this_array, vcpu_t this_cpu)
 {
+	cycles_t cycles;
+	int cpu;
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+
+	ve = VE_TASK_INFO(p)->owner_env;
+#endif
+	cycles = get_cycles();
+
 	dequeue_task(p, src_array);
 	dec_nr_running(p, src_rq);
-	set_task_cpu(p, this_cpu);
+	cpu = task_cpu(p);
+	if (src_rq->nr_running == 0) {
+		ve_strt_idle(ve, cpu, cycles);
+		vcpu_detach(src_rq);
+	}
+	set_task_vcpu(p, this_cpu);
+	if (this_rq->nr_running == 0) {
+		ve_stop_idle(ve, this_cpu, cycles);
+		vcpu_attach(this_rq);
+	}
 	inc_nr_running(p, this_rq);
 	enqueue_task(p, this_array);
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
@@ -2281,7 +3513,7 @@ static void pull_task(struct rq *src_rq,
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
 static
-int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
+int can_migrate_task(struct task_struct *p, struct rq *rq, vcpu_t this_cpu,
 		     struct sched_domain *sd, enum idle_type idle,
 		     int *all_pinned)
 {
@@ -2291,7 +3523,7 @@ int can_migrate_task(struct task_struct 
 	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
 	 * 3) are cache-hot on their current CPU.
 	 */
-	if (!cpu_isset(this_cpu, p->cpus_allowed))
+	if (!vcpu_isset(this_cpu, p->cpus_allowed))
 		return 0;
 	*all_pinned = 0;
 
@@ -2321,7 +3553,7 @@ int can_migrate_task(struct task_struct 
  *
  * Called with both runqueues locked.
  */
-static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+static int move_tasks(struct rq *this_rq, vcpu_t this_cpu, struct rq *busiest,
 		      unsigned long max_nr_move, unsigned long max_load_move,
 		      struct sched_domain *sd, enum idle_type idle,
 		      int *all_pinned)
@@ -2333,6 +3565,8 @@ static int move_tasks(struct rq *this_rq
 	struct task_struct *tmp;
 	long rem_load_move;
 
+	if (vcpu_is_offline(this_cpu))
+		goto out;
 	if (max_nr_move == 0 || max_load_move == 0)
 		goto out;
 
@@ -2445,7 +3679,7 @@ out:
  * should be moved to restore balance via the imbalance parameter.
  */
 static struct sched_group *
-find_busiest_group(struct sched_domain *sd, int this_cpu,
+find_busiest_group(struct sched_domain *sd, vcpu_t this_cpu,
 		   unsigned long *imbalance, enum idle_type idle, int *sd_idle,
 		   cpumask_t *cpus)
 {
@@ -2461,6 +3695,11 @@ find_busiest_group(struct sched_domain *
 	unsigned long min_nr_running = ULONG_MAX;
 	struct sched_group *group_min = NULL, *group_leader = NULL;
 #endif
+	struct vcpu_scheduler *vsched;
+	int this_pcpu;
+
+	vsched = vcpu_vsched(this_cpu);
+	this_pcpu = vcpu_last_pcpu(this_cpu);
 
 	max_load = this_load = total_load = total_pwr = 0;
 	busiest_load_per_task = busiest_nr_running = 0;
@@ -2473,32 +3712,31 @@ find_busiest_group(struct sched_domain *
 		load_idx = sd->idle_idx;
 
 	do {
+		cpumask_t tmp;
 		unsigned long load, group_capacity;
 		int local_group;
 		int i;
 		unsigned long sum_nr_running, sum_weighted_load;
 
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpu_isset(this_pcpu, group->cpumask);
 
 		/* Tally up the load of all CPUs in the group */
 		sum_weighted_load = sum_nr_running = avg_load = 0;
+		cpus_and(tmp, group->cpumask, vsched_pcpu_running_map(vsched));
+		cpus_and(tmp, tmp, *cpus);
 
-		for_each_cpu_mask(i, group->cpumask) {
-			struct rq *rq;
-
-			if (!cpu_isset(i, *cpus))
-				continue;
-
-			rq = cpu_rq(i);
+		for_each_cpu_mask(i, tmp) {
+			vcpu_t vcpu = pcpu(i)->vcpu;
+			struct rq *rq = vcpu_rq(vcpu);
 
 			if (*sd_idle && !idle_cpu(i))
 				*sd_idle = 0;
 
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
-				load = target_load(i, load_idx);
+				load = target_load(vcpu, load_idx);
 			else
-				load = source_load(i, load_idx);
+				load = source_load(vcpu, load_idx);
 
 			avg_load += load;
 			sum_nr_running += rq->nr_running;
@@ -2588,6 +3826,8 @@ group_next:
 
 	if (!busiest || this_load >= max_load || busiest_nr_running == 0)
 		goto out_balanced;
+	if (!this)
+		this = busiest; /* this->cpu_power is needed below */
 
 	avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
 
@@ -2711,30 +3951,33 @@ ret:
 /*
  * find_busiest_queue - find the busiest runqueue among the cpus in group.
  */
-static struct rq *
-find_busiest_queue(struct sched_group *group, enum idle_type idle,
-		   unsigned long imbalance, cpumask_t *cpus)
+static vcpu_t find_busiest_queue(vcpu_t this_vcpu, struct sched_group *group,
+		enum idle_type idle, unsigned long imbalance, cpumask_t *cpus)
 {
-	struct rq *busiest = NULL, *rq;
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu, busiest = NULL;
+	struct rq *rq;
+	cpumask_t tmp;
 	unsigned long max_load = 0;
 	int i;
 
-	for_each_cpu_mask(i, group->cpumask) {
+	vsched = vcpu_vsched(this_vcpu);
+	cpus_and(tmp, group->cpumask, *cpus);
 
-		if (!cpu_isset(i, *cpus))
+	for_each_cpu_mask(i, vsched_vcpu_online_map(vsched)) {
+		vcpu = vsched_vcpu(vsched, i);
+		if (!cpu_isset(vcpu_last_pcpu(vcpu), tmp))
 			continue;
 
-		rq = cpu_rq(i);
-
+		rq = vcpu_rq(vcpu);
 		if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
 			continue;
 
 		if (rq->raw_weighted_load > max_load) {
 			max_load = rq->raw_weighted_load;
-			busiest = rq;
+			busiest = vcpu;
 		}
 	}
-
 	return busiest;
 }
 
@@ -2755,11 +3998,12 @@ static inline unsigned long minus_1_or_z
  *
  * Called with this_rq unlocked.
  */
-static int load_balance(int this_cpu, struct rq *this_rq,
+static int load_balance(vcpu_t this_cpu, struct rq *this_rq,
 			struct sched_domain *sd, enum idle_type idle)
 {
 	int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
 	struct sched_group *group;
+	vcpu_t busiest_vcpu, target_vcpu;
 	unsigned long imbalance;
 	struct rq *busiest;
 	cpumask_t cpus = CPU_MASK_ALL;
@@ -2771,6 +4015,37 @@ static int load_balance(int this_cpu, st
 	schedstat_inc(sd, lb_cnt[idle]);
 
 redo:
+#ifdef CONFIG_SCHED_VCPU
+	/*
+	 * The load_balance() routine can be called on busy or idle PCPU.
+	 *
+	 * 1) For idle PCPU, we are going to find an idle VCPU inside a 
+	 *    busiest vsched, and:
+	 *    - if one is found - we'll try to move task(s) to this VCPU from
+	 *      a busiest one VCPU below using usual balancer algorithms
+	 *      (find_busiest_group() and friends);
+	 *    - if nothing is found - we assume, that this busiest vsched
+	 *	will be rebalanced later on appropriate non-idle
+	 *      rebalance_tick() (FIXME: we may skip some idle rebalance tick's)
+	 *
+ 	 * 2) For busy PCPU, we also need to look for an idle VCPU in a
+	 *    vsched, because find_busiest_group() (below) doesn't operate with
+	 *    detached (i.e. idle) VCPU's. So, if an idle VCPU:
+	 *    - is found (unlikely) - we'll use this VCPU as balancer target.
+	 *      It's quite rare case, because after each successful balancing
+	 *      the idle VCPU will become non-idle;
+	 *    - is not found - continue to use current VCPU (i.e. this_cpu) and
+	 *      try to balance tasks between busy PCPU's that belongs to the
+	 *      current vsched now (see find_busiest_group()).
+	 */
+	target_vcpu = find_idle_target(&cpus);
+	if (target_vcpu)
+		this_cpu = target_vcpu;
+	else if (vcpu_vsched(this_cpu) == &idle_vsched)
+		goto out_balanced;
+
+	this_rq = vcpu_rq(this_cpu);
+#endif
 	group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
 							&cpus);
 	if (!group) {
@@ -2778,13 +4053,17 @@ redo:
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group, idle, imbalance, &cpus);
-	if (!busiest) {
+	busiest_vcpu = find_busiest_queue(this_cpu, group, idle,
+			imbalance, &cpus);
+	if (!busiest_vcpu) {
 		schedstat_inc(sd, lb_nobusyq[idle]);
 		goto out_balanced;
 	}
 
-	BUG_ON(busiest == this_rq);
+	busiest = vcpu_rq(busiest_vcpu);
+
+	if (unlikely(busiest == this_rq))
+		goto out_balanced;
 
 	schedstat_add(sd, lb_imbalance[idle], imbalance);
 
@@ -2822,7 +4101,7 @@ redo:
 			/* don't kick the migration_thread, if the curr
 			 * task on busiest cpu can't be moved to this_cpu
 			 */
-			if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+			if (!vcpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
 				spin_unlock(&busiest->lock);
 				all_pinned = 1;
 				goto out_one_pinned;
@@ -2890,10 +4169,11 @@ out_one_pinned:
  * this_rq is locked.
  */
 static int
-load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
+load_balance_newidle(vcpu_t this_cpu, struct rq *this_rq, struct sched_domain *sd)
 {
 	struct sched_group *group;
-	struct rq *busiest = NULL;
+	struct rq *busiest;
+	vcpu_t busiest_vcpu;
 	unsigned long imbalance;
 	int nr_moved = 0;
 	int sd_idle = 0;
@@ -2911,14 +4191,13 @@ redo:
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance,
-				&cpus);
-	if (!busiest) {
+	busiest_vcpu = find_busiest_queue(this_cpu, group, NEWLY_IDLE,
+				imbalance, &cpus);
+	if (!busiest_vcpu || busiest_vcpu == this_cpu) {
 		schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
 		goto out_balanced;
 	}
-
-	BUG_ON(busiest == this_rq);
+	busiest = vcpu_rq(busiest_vcpu);
 
 	schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance);
 
@@ -2960,8 +4239,11 @@ out_balanced:
 /*
  * idle_balance is called by schedule() if this_cpu is about to become
  * idle. Attempts to pull tasks from other CPUs.
+ *
+ * Returns whether to continue with another runqueue
+ * instead of switching to idle.
  */
-static void idle_balance(int this_cpu, struct rq *this_rq)
+static int idle_balance(vcpu_t this_cpu, struct rq *this_rq)
 {
 	struct sched_domain *sd;
 
@@ -2969,9 +4251,10 @@ static void idle_balance(int this_cpu, s
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
 			/* If we've pulled tasks over stop searching: */
 			if (load_balance_newidle(this_cpu, this_rq, sd))
-				break;
+				return 1;
 		}
 	}
+	return 0;
 }
 
 /*
@@ -2981,10 +4264,18 @@ static void idle_balance(int this_cpu, s
  * logical imbalances.
  *
  * Called with busiest_rq locked.
+ *
+ * In human terms: balancing of CPU load by moving tasks between CPUs is
+ * performed by 2 methods, push and pull.
+ * In certain places when CPU is found to be idle, it performs pull from busy
+ * CPU to current (idle) CPU.
+ * active_load_balance implements push method, with migration thread getting
+ * scheduled on a busy CPU (hence, making all running processes on this CPU sit
+ * in the queue) and selecting where to push and which task.
  */
-static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
+static void active_load_balance(struct rq *busiest_rq, vcpu_t busiest_cpu)
 {
-	int target_cpu = busiest_rq->push_cpu;
+	vcpu_t target_cpu = busiest_rq->push_cpu;
 	struct sched_domain *sd;
 	struct rq *target_rq;
 
@@ -2992,7 +4283,7 @@ static void active_load_balance(struct r
 	if (busiest_rq->nr_running <= 1)
 		return;
 
-	target_rq = cpu_rq(target_cpu);
+	target_rq = vcpu_rq(target_cpu);
 
 	/*
 	 * This condition is "impossible", if it occurs
@@ -3004,10 +4295,17 @@ static void active_load_balance(struct r
 	/* move a task from busiest_rq to target_rq */
 	double_lock_balance(busiest_rq, target_rq);
 
+	/*
+	 * Our main candidate where to push our tasks is busiest->push_cpu.
+	 * First, find the domain that spans over both that candidate CPU and
+	 * the current one.
+	 *
+	 * FIXME: make sure that push_cpu doesn't disappear before we get here.
+	 */
 	/* Search for an sd spanning us and the target CPU. */
 	for_each_domain(target_cpu, sd) {
 		if ((sd->flags & SD_LOAD_BALANCE) &&
-		    cpu_isset(busiest_cpu, sd->span))
+		    cpu_isset(vcpu_last_pcpu(busiest_cpu), sd->span))
 				break;
 	}
 
@@ -3040,31 +4338,19 @@ static inline unsigned long cpu_offset(i
 }
 
 static void
-rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
+rebalance_tick(vcpu_t this_cpu, struct rq *this_rq, enum idle_type idle)
 {
-	unsigned long this_load, interval, j = cpu_offset(this_cpu);
+	unsigned long j;
 	struct sched_domain *sd;
-	int i, scale;
 
-	this_load = this_rq->raw_weighted_load;
 
 	/* Update our load: */
-	for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {
-		unsigned long old_load, new_load;
-
-		old_load = this_rq->cpu_load[i];
-		new_load = this_load;
-		/*
-		 * Round up the averaging division if load is increasing. This
-		 * prevents us from getting stuck on 9 if the load is 10, for
-		 * example.
-		 */
-		if (new_load > old_load)
-			new_load += scale-1;
-		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
-	}
+	update_rq_cpu_load(this_rq);
+	j = jiffies + cpu_offset(smp_processor_id());
 
 	for_each_domain(this_cpu, sd) {
+		unsigned long interval;
+
 		if (!(sd->flags & SD_LOAD_BALANCE))
 			continue;
 
@@ -3094,18 +4380,20 @@ rebalance_tick(int this_cpu, struct rq *
 /*
  * on UP we do not need to balance between CPUs:
  */
-static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle)
+static inline void rebalance_tick(vcpu_t cpu, struct rq *rq, enum idle_type idle)
 {
 }
-static inline void idle_balance(int cpu, struct rq *rq)
+static inline int idle_balance(vcpu_t cpu, struct rq *rq)
 {
 }
 #endif
 
-static inline int wake_priority_sleeper(struct rq *rq)
+static inline int wake_priority_sleeper(struct rq *rq, struct task_struct *idle)
 {
 	int ret = 0;
 
+#ifndef CONFIG_SCHED_VCPU
+	/* FIXME: can we implement SMT priority sleeping for this? */
 #ifdef CONFIG_SCHED_SMT
 	spin_lock(&rq->lock);
 	/*
@@ -3113,11 +4401,13 @@ static inline int wake_priority_sleeper(
 	 * reasons reschedule the idle task to see if it can now run.
 	 */
 	if (rq->nr_running) {
-		resched_task(rq->idle);
+		/* FIXME */
+		resched_task(idle);
 		ret = 1;
 	}
 	spin_unlock(&rq->lock);
 #endif
+#endif
 	return ret;
 }
 
@@ -3175,6 +4465,15 @@ static inline int expired_starving(struc
 	return 0;
 }
 
+#ifdef CONFIG_VE
+#define update_ve_cpu_time(p, time, tick)	do {		\
+		VE_CPU_STATS((p)->ve_task_info.owner_env,	\
+			task_cpu(p))->time += tick;		\
+	} while (0)
+#else
+#define update_ve_cpu_time(p, time, tick)	do { } while (0)
+#endif
+
 /*
  * Account user cpu time to a process.
  * @p: the process that the cpu time gets accounted to
@@ -3190,10 +4489,13 @@ void account_user_time(struct task_struc
 
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
-	if (TASK_NICE(p) > 0)
+	if (TASK_NICE(p) > 0) {
 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
-	else
+		update_ve_cpu_time(p, nice, tmp);
+	} else {
 		cpustat->user = cputime64_add(cpustat->user, tmp);
+		update_ve_cpu_time(p, user, tmp);
+	}
 }
 
 /*
@@ -3206,20 +4508,22 @@ void account_system_time(struct task_str
 			 cputime_t cputime)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	struct rq *rq = this_rq();
+	int this_pcpu = raw_smp_processor_id();
 	cputime64_t tmp;
 
 	p->stime = cputime_add(p->stime, cputime);
+	tmp = cputime_to_cputime64(cputime);
+
+	update_ve_cpu_time(p, system, tmp);
 
 	/* Add system time to cpustat. */
-	tmp = cputime_to_cputime64(cputime);
 	if (hardirq_count() - hardirq_offset)
 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
 	else if (softirq_count())
 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
-	else if (p != rq->idle)
+	else if (p != this_pcpu()->idle)
 		cpustat->system = cputime64_add(cpustat->system, tmp);
-	else if (atomic_read(&rq->nr_iowait) > 0)
+	else if ((atomic_read(&glob_task_nrs[this_pcpu].nr_iowait) > 0))
 		cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 	else
 		cpustat->idle = cputime64_add(cpustat->idle, tmp);
@@ -3238,7 +4542,7 @@ void account_steal_time(struct task_stru
 	cputime64_t tmp = cputime_to_cputime64(steal);
 	struct rq *rq = this_rq();
 
-	if (p == rq->idle) {
+	if (p == this_pcpu()->idle) {
 		p->stime = cputime_add(p->stime, steal);
 		if (atomic_read(&rq->nr_iowait) > 0)
 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
@@ -3260,16 +4564,21 @@ void scheduler_tick(void)
 	unsigned long long now = sched_clock();
 	struct task_struct *p = current;
 	int cpu = smp_processor_id();
-	struct rq *rq = cpu_rq(cpu);
+	vcpu_t vcpu;
+	struct rq *rq;
 
+	vcpu = this_vcpu();
+	rq = vcpu_rq(vcpu);
 	update_cpu_clock(p, rq, now);
 
 	rq->timestamp_last_tick = now;
 
-	if (p == rq->idle) {
-		if (wake_priority_sleeper(rq))
+	set_tsk_need_resched(p); //FIXME
+
+	if (p == pcpu(cpu)->idle) {
+		if (wake_priority_sleeper(rq, pcpu(cpu)->idle))
 			goto out;
-		rebalance_tick(cpu, rq, SCHED_IDLE);
+		rebalance_tick(vcpu, rq, SCHED_IDLE);
 		return;
 	}
 
@@ -3345,10 +4654,14 @@ void scheduler_tick(void)
 out_unlock:
 	spin_unlock(&rq->lock);
 out:
-	rebalance_tick(cpu, rq, NOT_IDLE);
+	rebalance_tick(vcpu, rq, NOT_IDLE);
 }
 
-#ifdef CONFIG_SCHED_SMT
+#if defined(CONFIG_SCHED_SMT) && !defined(CONFIG_SCHED_VCPU)
+/* FIXME: SMT scheduling
+ * rq->cpu is initialized with rq address if FAIRSCED is on
+ * this is not correct for SMT case
+ */
 static inline void wakeup_busy_runqueue(struct rq *rq)
 {
 	/* If an SMT runqueue is sleeping due to priority reasons wake it up */
@@ -3359,7 +4672,7 @@ static inline void wakeup_busy_runqueue(
 /*
  * Called with interrupt disabled and this_rq's runqueue locked.
  */
-static void wake_sleeping_dependent(int this_cpu)
+static void wake_sleeping_dependent(vcpu_t this_cpu)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	int i;
@@ -3405,7 +4718,7 @@ smt_slice(struct task_struct *p, struct 
  * need to be obeyed.
  */
 static int
-dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
+dependent_sleeper(vcpu_t this_cpu, struct task_struct *p)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	int ret = 0, i;
@@ -3468,11 +4781,11 @@ unlock:
 	return ret;
 }
 #else
-static inline void wake_sleeping_dependent(int this_cpu)
+static inline void wake_sleeping_dependent(vcpu_t this_cpu)
 {
 }
 static inline int
-dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
+dependent_sleeper(vcpu_t this_cpu, struct task_struct *p)
 {
 	return 0;
 }
@@ -3531,7 +4844,9 @@ asmlinkage void __sched schedule(void)
 	struct list_head *queue;
 	unsigned long long now;
 	unsigned long run_time;
-	int cpu, idx, new_prio;
+	int idx, new_prio;
+	vcpu_t vcpu;
+	cycles_t cycles;
 	long *switch_count;
 	struct rq *rq;
 
@@ -3553,13 +4868,14 @@ need_resched:
 	prev = current;
 	release_kernel_lock(prev);
 need_resched_nonpreemptible:
+	cycles = get_cycles();
 	rq = this_rq();
 
 	/*
 	 * The idle thread is not allowed to schedule!
 	 * Remove this check after it has been exercised a bit.
 	 */
-	if (unlikely(prev == rq->idle) && prev->state != TASK_RUNNING) {
+	if (unlikely(prev == this_pcpu()->idle) && prev->state != TASK_RUNNING) {
 		printk(KERN_ERR "bad: scheduling from the idle thread!\n");
 		dump_stack();
 	}
@@ -3590,25 +4906,38 @@ need_resched_nonpreemptible:
 				unlikely(signal_pending(prev))))
 			prev->state = TASK_RUNNING;
 		else {
-			if (prev->state == TASK_UNINTERRUPTIBLE)
-				rq->nr_uninterruptible++;
 			deactivate_task(prev, rq);
 		}
 	}
 
 	update_cpu_clock(prev, rq, now);
 
-	cpu = smp_processor_id();
+	prev->sleep_avg -= run_time;
+	if ((long)prev->sleep_avg <= 0)
+		prev->sleep_avg = 0;
+
+	vcpu = rq_vcpu(rq);
+	if (rq->nr_running && vcpu_is_hot(vcpu))
+		goto same_vcpu;
+
+	if (unlikely(!rq->nr_running))
+		idle_balance(vcpu, rq);
+	vcpu = schedule_vcpu(vcpu, cycles);
+	rq = vcpu_rq(vcpu);
+
 	if (unlikely(!rq->nr_running)) {
-		idle_balance(cpu, rq);
-		if (!rq->nr_running) {
-			next = rq->idle;
-			rq->expired_timestamp = 0;
-			wake_sleeping_dependent(cpu);
-			goto switch_tasks;
-		}
+		next = this_pcpu()->idle;
+		rq->expired_timestamp = 0;
+		wake_sleeping_dependent(vcpu);
+		/*
+		 * wake_sleeping_dependent() might have released
+		 * the runqueue, so break out if we got new
+		 * tasks meanwhile:
+		 */
+		goto switch_tasks;
 	}
 
+same_vcpu:
 	array = rq->active;
 	if (unlikely(!array->nr_active)) {
 		/*
@@ -3644,28 +4973,48 @@ need_resched_nonpreemptible:
 		}
 	}
 	next->sleep_type = SLEEP_NORMAL;
-	if (dependent_sleeper(cpu, rq, next))
-		next = rq->idle;
+	if (dependent_sleeper(vcpu, next))
+		next = this_pcpu()->idle;
+
 switch_tasks:
-	if (next == rq->idle)
-		schedstat_inc(rq, sched_goidle);
 	prefetch(next);
 	prefetch_stack(next);
 	clear_tsk_need_resched(prev);
-	rcu_qsctr_inc(task_cpu(prev));
+	rcu_qsctr_inc(task_pcpu(prev));
 
-	prev->sleep_avg -= run_time;
-	if ((long)prev->sleep_avg <= 0)
-		prev->sleep_avg = 0;
+	/* updated w/o rq->lock, which is ok due to after-read-checks */
 	prev->timestamp = prev->last_ran = now;
 
 	sched_info_switch(prev, next);
 	if (likely(prev != next)) {
+		cycles_t cycles;
+
+		/* current physical CPU id should be valid after switch */
+		set_task_vcpu(next, vcpu);
+		set_task_pcpu(next, task_pcpu(prev));
+		cycles = get_cycles();
 		next->timestamp = now;
 		rq->nr_switches++;
+		glob_task_nrs[smp_processor_id()].nr_switches++;
 		rq->curr = next;
 		++*switch_count;
 
+#ifdef CONFIG_VE
+		prev->ve_task_info.sleep_stamp = cycles;
+		if (prev->state == TASK_RUNNING && prev != this_pcpu()->idle)
+			write_wakeup_stamp(prev, cycles);
+		update_sched_lat(next, cycles);
+
+		/* because next & prev are protected with
+		 * runqueue lock we may not worry about
+		 * wakeup_stamp and sched_time protection
+		 * (same thing in 'else' branch below)
+		 */
+		update_ve_task_info(prev, cycles);
+		next->ve_task_info.sched_time = cycles;
+		write_wakeup_stamp(next, 0);
+#endif
+
 		prepare_task_switch(rq, prev, next);
 		prev = context_switch(rq, prev, next);
 		barrier();
@@ -3675,8 +5024,10 @@ switch_tasks:
 		 * frame will be invalid.
 		 */
 		finish_task_switch(this_rq(), prev);
-	} else
+	} else {
+		update_ve_task_info(prev, get_cycles());
 		spin_unlock_irq(&rq->lock);
+	}
 
 	prev = current;
 	if (unlikely(reacquire_kernel_lock(prev) < 0))
@@ -4318,30 +5669,12 @@ int task_nice(const struct task_struct *
 EXPORT_SYMBOL_GPL(task_nice);
 
 /**
- * idle_cpu - is a given cpu idle currently?
- * @cpu: the processor in question.
- */
-int idle_cpu(int cpu)
-{
-	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
-}
-
-/**
- * idle_task - return the idle task for a given cpu.
- * @cpu: the processor in question.
- */
-struct task_struct *idle_task(int cpu)
-{
-	return cpu_rq(cpu)->idle;
-}
-
-/**
  * find_process_by_pid - find a process with a matching PID value.
  * @pid: the pid in question.
  */
 static inline struct task_struct *find_process_by_pid(pid_t pid)
 {
-	return pid ? find_task_by_pid(pid) : current;
+	return pid ? find_task_by_pid_ve(pid) : current;
 }
 
 /*
@@ -4414,7 +5747,7 @@ recheck:
 	/*
 	 * Allow unprivileged RT tasks to decrease priority:
 	 */
-	if (!capable(CAP_SYS_NICE)) {
+	if (!capable(CAP_SYS_ADMIN)) {
 		/*
 		 * can't change policy, except between SCHED_NORMAL
 		 * and SCHED_BATCH:
@@ -4457,7 +5790,7 @@ recheck:
 	}
 	array = p->array;
 	if (array)
-		deactivate_task(p, rq);
+		__deactivate_task(p, rq);
 	oldprio = p->prio;
 	__setscheduler(p, policy, param->sched_priority);
 	if (array) {
@@ -4505,6 +5838,8 @@ do_sched_setscheduler(pid_t pid, int pol
 	return retval;
 }
 
+EXPORT_SYMBOL(do_sched_setscheduler);
+
 /**
  * sys_sched_setscheduler - set/change the scheduler policy and RT priority
  * @pid: the pid in question.
@@ -4640,7 +5975,7 @@ out_unlock:
 	return retval;
 }
 
-static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
+int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
 			     cpumask_t *new_mask)
 {
 	if (len < sizeof(cpumask_t)) {
@@ -4887,11 +6222,20 @@ EXPORT_SYMBOL(yield);
  */
 void __sched io_schedule(void)
 {
-	struct rq *rq = &__raw_get_cpu_var(runqueues);
+	struct rq *rq = this_rq();
+	int cpu;
+
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+	ve = current->ve_task_info.owner_env;
+#endif
 
+	cpu = raw_smp_processor_id();
 	delayacct_blkio_start();
 	atomic_inc(&rq->nr_iowait);
+	nr_iowait_inc(cpu);
 	schedule();
+	nr_iowait_dec(cpu);
 	atomic_dec(&rq->nr_iowait);
 	delayacct_blkio_end();
 }
@@ -4899,12 +6243,21 @@ EXPORT_SYMBOL(io_schedule);
 
 long __sched io_schedule_timeout(long timeout)
 {
-	struct rq *rq = &__raw_get_cpu_var(runqueues);
+	struct rq *rq = this_rq();
 	long ret;
+	int cpu;
+
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+	ve = current->ve_task_info.owner_env;
+#endif
 
+	cpu = raw_smp_processor_id();
 	delayacct_blkio_start();
 	atomic_inc(&rq->nr_iowait);
+	nr_iowait_inc(cpu);
 	ret = schedule_timeout(timeout);
+	nr_iowait_dec(cpu);
 	atomic_dec(&rq->nr_iowait);
 	delayacct_blkio_end();
 	return ret;
@@ -5029,15 +6382,9 @@ void sched_show_task(struct task_struct 
 	printk("%-13.13s %c", p->comm,
 		state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
 #if (BITS_PER_LONG == 32)
-	if (state == TASK_RUNNING)
-		printk(" running ");
-	else
-		printk(" %08lX ", thread_saved_pc(p));
+	printk(" %08lX ", (unsigned long)p);
 #else
-	if (state == TASK_RUNNING)
-		printk("  running task   ");
-	else
-		printk(" %016lx ", thread_saved_pc(p));
+	printk(" %016lx ", (unsigned long)p);
 #endif
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	{
@@ -5075,21 +6422,21 @@ void show_state(void)
 #if (BITS_PER_LONG == 32)
 	printk("\n"
 	       "                                               sibling\n");
-	printk("  task             PC      pid father child younger older\n");
+	printk("  task       taskaddr      pid father child younger older\n");
 #else
 	printk("\n"
 	       "                                                       sibling\n");
-	printk("  task                 PC          pid father child younger older\n");
+	printk("  task           taskaddr          pid father child younger older\n");
 #endif
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		/*
 		 * reset the NMI-timeout, listing all files on a slow
 		 * console might take alot of time:
 		 */
 		touch_nmi_watchdog();
 		sched_show_task(p);
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 	touch_all_softlockup_watchdogs();
 
@@ -5097,6 +6444,23 @@ void show_state(void)
 	debug_show_all_locks();
 }
 
+#ifdef CONFIG_SCHED_VCPU
+static void init_boot_vcpus(long cpu)
+{
+	if (vsched_vcpu(&idle_vsched, cpu) != NULL)
+		return;
+
+	if (__add_vcpu(&idle_vsched, cpu) != 0)
+		panic("Can't create idle vcpu %ld\n", cpu);
+
+	/* Also create vcpu for default_vsched */
+	if (__add_vcpu(&default_vsched, cpu) != 0)
+		panic("Can't create default vcpu %ld\n", cpu);
+
+	cpu_set(cpu, idle_vsched.pcpu_running_map);
+}
+#endif
+
 /**
  * init_idle - set up an idle thread for a given CPU
  * @idle: task in question
@@ -5107,22 +6471,51 @@ void show_state(void)
  */
 void __cpuinit init_idle(struct task_struct *idle, int cpu)
 {
-	struct rq *rq = cpu_rq(cpu);
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	struct rq *rq;
 	unsigned long flags;
 
+#ifdef CONFIG_SCHED_VCPU
+	init_boot_vcpus(cpu);
+	vsched = &idle_vsched;
+#else
+	vsched = NULL;
+#endif
+	vcpu = vsched_vcpu(vsched, cpu);
+	rq = vcpu_rq(vcpu);
+
 	idle->timestamp = sched_clock();
 	idle->sleep_avg = 0;
 	idle->array = NULL;
 	idle->prio = idle->normal_prio = MAX_PRIO;
 	idle->state = TASK_RUNNING;
 	idle->cpus_allowed = cpumask_of_cpu(cpu);
+	set_task_vsched(idle, &idle_vsched);
 	set_task_cpu(idle, cpu);
 
 	spin_lock_irqsave(&rq->lock, flags);
-	rq->curr = rq->idle = idle;
+	pcpu(cpu)->idle = idle;
+	rq->curr = idle;
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	idle->oncpu = 1;
 #endif
+	set_task_pcpu(idle, cpu);
+	set_task_vsched(idle, vsched);
+	set_task_vcpu(idle, vcpu);
+#ifdef CONFIG_SCHED_VCPU
+	/* the following code is very close to vcpu_get */
+	spin_lock(&fairsched_lock);
+	pcpu(cpu)->vcpu = vcpu;
+	pcpu(cpu)->vsched = vcpu->vsched;
+	list_move_tail(&vcpu->list, &vsched->running_list);
+	__set_bit(cpu, vsched->vcpu_running_map.bits);
+	__set_bit(cpu, vsched->pcpu_running_map.bits);
+	vcpu->running = 1;
+	spin_unlock(&fairsched_lock);
+#else
+	pcpu(cpu)->vcpu = vcpu;
+#endif
 	spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
@@ -5142,7 +6535,6 @@ void __cpuinit init_idle(struct task_str
  */
 cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
 
-#ifdef CONFIG_SMP
 /*
  * This is how migration works:
  *
@@ -5159,6 +6551,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
  * 7) we wake up and the migration is done.
  */
 
+#ifdef CONFIG_SMP
 /*
  * Change a given task's CPU affinity. Migrate the thread to a
  * proper CPU and schedule it away if the CPU it's executing on
@@ -5174,9 +6567,16 @@ int set_cpus_allowed(struct task_struct 
 	unsigned long flags;
 	struct rq *rq;
 	int ret = 0;
+	struct vcpu_scheduler *vsched;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpus_intersects(new_mask, cpu_online_map)) {
+	vsched = task_vsched(p);
+	if (!cpus_intersects(new_mask, vsched_vcpu_online_map(vsched))) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (p->flags & PF_THREAD_BOUND) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -5186,7 +6586,8 @@ int set_cpus_allowed(struct task_struct 
 	if (cpu_isset(task_cpu(p), new_mask))
 		goto out;
 
-	if (migrate_task(p, any_online_cpu(new_mask), &req)) {
+	if (migrate_task(p, vsched_vcpu(vsched, any_online_cpu(new_mask)),
+								&req)) {
 		/* Need help from migration thread: drop lock and wait. */
 		task_rq_unlock(rq, &flags);
 		wake_up_process(rq->migration_thread);
@@ -5200,6 +6601,7 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(set_cpus_allowed);
+#endif
 
 /*
  * Move (not current) task off this cpu, onto dest cpu.  We're doing
@@ -5212,26 +6614,46 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
  *
  * Returns non-zero if task was successfully migrated.
  */
-static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+static int __migrate_task(struct task_struct *p, vcpu_t src_cpu, vcpu_t dest_cpu)
 {
 	struct rq *rq_dest, *rq_src;
 	int ret = 0;
 
-	if (unlikely(cpu_is_offline(dest_cpu)))
+	if (unlikely(vcpu_is_offline(dest_cpu)))
 		return ret;
 
-	rq_src = cpu_rq(src_cpu);
-	rq_dest = cpu_rq(dest_cpu);
+#ifdef CONFIG_SCHED_VCPU
+	BUG_ON(vcpu_vsched(src_cpu) == &idle_vsched);
+#endif
+	rq_src = vcpu_rq(src_cpu);
+	rq_dest = vcpu_rq(dest_cpu);
 
+again:
 	double_rq_lock(rq_src, rq_dest);
 	/* Already moved. */
-	if (task_cpu(p) != src_cpu)
-		goto out;
+	if (task_rq(p) != rq_src) {
+		if (vcpu_vsched(src_cpu) == vcpu_vsched(dest_cpu) ||
+				task_vsched(p) != vcpu_vsched(src_cpu))
+			goto out;
+		double_rq_unlock(rq_src, rq_dest);
+		rq_src = task_rq(p);
+		goto again;
+	}
 	/* Affinity changed (again). */
-	if (!cpu_isset(dest_cpu, p->cpus_allowed))
+	if (!vcpu_isset(dest_cpu, p->cpus_allowed))
 		goto out;
 
-	set_task_cpu(p, dest_cpu);
+	if (task_running(rq_src, p)) {
+		if (vcpu_vsched(src_cpu) != vcpu_vsched(dest_cpu))
+			/* Bad Luck */
+			goto out;
+		else
+			BUG();
+	}
+
+	BUG_ON(p->flags & PF_THREAD_BOUND);
+	set_task_vsched(p, vcpu_vsched(dest_cpu));
+	set_task_vcpu(p, dest_cpu);
 	if (p->array) {
 		/*
 		 * Sync timestamp with rq_dest's before activating.
@@ -5241,7 +6663,7 @@ static int __migrate_task(struct task_st
 		 */
 		p->timestamp = p->timestamp - rq_src->timestamp_last_tick
 				+ rq_dest->timestamp_last_tick;
-		deactivate_task(p, rq_src);
+		__deactivate_task(p, rq_src);
 		__activate_task(p, rq_dest);
 		if (TASK_PREEMPTS_CURR(p, rq_dest))
 			resched_task(rq_dest->curr);
@@ -5257,13 +6679,21 @@ out:
  * thread migration by bumping thread off CPU then 'pushing' onto
  * another runqueue.
  */
+#if defined (CONFIG_HOTPLUG_CPU) || defined (CONFIG_SCHED_VCPU)
+static void migrate_live_tasks(vcpu_t src_cpu);
+static void migrate_dead_tasks(vcpu_t dead_cpu);
+#endif
 static int migration_thread(void *data)
 {
-	int cpu = (long)data;
 	struct rq *rq;
+	vcpu_t cpu = (vcpu_t)data;
 
-	rq = cpu_rq(cpu);
+	rq = vcpu_rq(cpu);
 	BUG_ON(rq->migration_thread != current);
+	BUG_ON(!rq->migration_thread_init);
+
+	/* migration thread startup has complete */
+	rq->migration_thread_init = 0;
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	while (!kthread_should_stop()) {
@@ -5274,15 +6704,17 @@ static int migration_thread(void *data)
 
 		spin_lock_irq(&rq->lock);
 
-		if (cpu_is_offline(cpu)) {
+		if (vcpu_is_offline(cpu)) {
 			spin_unlock_irq(&rq->lock);
 			goto wait_to_die;
 		}
 
+#ifdef CONFIG_SMP
 		if (rq->active_balance) {
 			active_load_balance(rq, cpu);
 			rq->active_balance = 0;
 		}
+#endif
 
 		head = &rq->migration_queue;
 
@@ -5301,8 +6733,7 @@ static int migration_thread(void *data)
 
 		complete(&req->done);
 	}
-	__set_current_state(TASK_RUNNING);
-	return 0;
+	goto die;
 
 wait_to_die:
 	/* Wait for kthread_stop */
@@ -5311,20 +6742,33 @@ wait_to_die:
 		schedule();
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
+die:
 	__set_current_state(TASK_RUNNING);
+#if defined (CONFIG_HOTPLUG_CPU) || defined (CONFIG_SCHED_VCPU)
+	migrate_live_tasks(cpu);
+	spin_lock_irq(&rq->lock);
+	migrate_dead_tasks(cpu);
+	spin_unlock_irq(&rq->lock);
+#endif
 	return 0;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-/* Figure out where task on dead CPU should go, use force if neccessary. */
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_SCHED_VCPU)
+/*
+ * Figure out where task on dead CPU should go, use force if neccessary.
+ * NOTE: interrupts should be disabled by the caller
+ */
+static void move_task_off_dead_cpu(vcpu_t dead_cpu, struct task_struct *p)
 {
 	unsigned long flags;
-	cpumask_t mask;
 	struct rq *rq;
+	struct vcpu_scheduler *vsched;
+	cpumask_t mask;
 	int dest_cpu;
 
 restart:
+#ifndef CONFIG_SCHED_VCPU
+#error "FIXME: wrong code"
 	/* On same node? */
 	mask = node_to_cpumask(cpu_to_node(dead_cpu));
 	cpus_and(mask, mask, p->cpus_allowed);
@@ -5351,7 +6795,23 @@ restart:
 			       "longer affine to cpu%d\n",
 			       p->pid, p->comm, dead_cpu);
 	}
-	if (!__migrate_task(p, dead_cpu, dest_cpu))
+#else
+	vsched = vcpu_vsched(dead_cpu);
+	cpus_and(mask, vsched_vcpu_online_map(vsched), p->cpus_allowed);
+	dest_cpu = any_online_cpu(mask);
+
+	/* On any allowed CPU? */
+	if (dest_cpu == NR_CPUS) {
+		rq = task_rq_lock(p, &flags);
+		cpus_setall(p->cpus_allowed);
+		task_rq_unlock(rq, &flags);
+		dest_cpu = any_online_cpu(vsched_vcpu_online_map(vsched));
+	}
+	/* this can happen only when non-empty node is removed... */
+	if (dest_cpu == NR_CPUS)
+		printk("BUG: no where to move task %s(%d)\n", p->comm, p->pid);
+#endif
+	if (!__migrate_task(p, dead_cpu, vsched_vcpu(vsched, dest_cpu)))
 		goto restart;
 }
 
@@ -5359,40 +6819,38 @@ restart:
  * While a dead CPU has no uninterruptible tasks queued at this point,
  * it might still have a nonzero ->nr_uninterruptible counter, because
  * for performance reasons the counter is not stricly tracking tasks to
- * their home CPUs. So we just add the counter to another CPU's counter,
- * to keep the global sum constant after CPU-down:
+ * their home CPUs. So we just add the counter to a fixup variable on
+ * a vcpu sheduler to keep the global per-CT sum constant after CPU-down.
  */
 static void migrate_nr_uninterruptible(struct rq *rq_src)
 {
-	struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL));
-	unsigned long flags;
-
-	local_irq_save(flags);
-	double_rq_lock(rq_src, rq_dest);
-	rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
+	atomic_add((int)rq_src->nr_uninterruptible,
+			&(vcpu_vsched(rq_vcpu(rq_src)))->nr_unint_fixup);
 	rq_src->nr_uninterruptible = 0;
-	double_rq_unlock(rq_src, rq_dest);
-	local_irq_restore(flags);
 }
 
 /* Run through task list and migrate tasks from the dead cpu. */
-static void migrate_live_tasks(int src_cpu)
+static void migrate_live_tasks(vcpu_t src_cpu)
 {
 	struct task_struct *p, *t;
 
+	BUG_ON(vcpu_isset(src_cpu, vsched_vcpu_online_map(vcpu_vsched(src_cpu))));
 	write_lock_irq(&tasklist_lock);
 
-	do_each_thread(t, p) {
+	do_each_thread_all(t, p) {
 		if (p == current)
 			continue;
+		if (p == vcpu_rq(src_cpu)->migration_thread)
+			continue;
 
-		if (task_cpu(p) == src_cpu)
+		if (task_vcpu(p) == src_cpu)
 			move_task_off_dead_cpu(src_cpu, p);
-	} while_each_thread(t, p);
+	} while_each_thread_all(t, p);
 
 	write_unlock_irq(&tasklist_lock);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /* Schedules idle task to be the next runnable task on current CPU.
  * It does so by boosting its priority to highest possible and adding it to
  * the _front_ of the runqueue. Used by CPU offline code.
@@ -5416,6 +6874,9 @@ void sched_idle_next(void)
 	__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
 
 	/* Add idle task to the _front_ of its priority queue: */
+#ifdef CONFIG_SCHED_VCPU
+#error "FIXME: VCPU vs. HOTPLUG: fix the code below"
+#endif
 	__activate_idle_task(p, rq);
 
 	spin_unlock_irqrestore(&rq->lock, flags);
@@ -5435,10 +6896,12 @@ void idle_task_exit(void)
 		switch_mm(mm, &init_mm, current);
 	mmdrop(mm);
 }
+#endif /* CONFIG_HOTPLUG_CPU */
 
-static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
+/* called under rq->lock with disabled interrupts */
+static void migrate_dead(vcpu_t dead_cpu, struct task_struct *p)
 {
-	struct rq *rq = cpu_rq(dead_cpu);
+	struct rq *rq = vcpu_rq(dead_cpu);
 
 	/* Must be exiting, otherwise would be on tasklist. */
 	BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
@@ -5452,88 +6915,152 @@ static void migrate_dead(unsigned int de
 	 * Drop lock around migration; if someone else moves it,
 	 * that's OK.  No task can be added to this CPU, so iteration is
 	 * fine.
+	 * NOTE: interrupts should be left disabled  --dev@
 	 */
-	spin_unlock_irq(&rq->lock);
+	spin_unlock(&rq->lock);
 	move_task_off_dead_cpu(dead_cpu, p);
-	spin_lock_irq(&rq->lock);
+	spin_lock(&rq->lock);
+
+	put_task_struct(p);
+}
+
+/* release_task() removes task from tasklist, so we won't find dead tasks. */
+static void migrate_dead_tasks(vcpu_t dead_cpu)
+{
+	struct rq *rq = vcpu_rq(dead_cpu);
+	unsigned int arr, i;
+
+	for (arr = 0; arr < 2; arr++) {
+		for (i = 0; i < MAX_PRIO; i++) {
+			struct list_head *list = &rq->arrays[arr].queue[i];
+			struct task_struct *tsk;
+restart:
+			list_for_each_entry(tsk, list, run_list) {
+				if (tsk == rq->migration_thread)
+					continue;
+				migrate_dead(dead_cpu, tsk);
+				goto restart;
+			}
+		}
+	}
+}
+#endif /* CONFIG_HOTPLUG_CPU || CONFIG_SCHED_VCPU */
+
+static void migration_thread_bind(struct task_struct *k, vcpu_t cpu)
+{
+	BUG_ON(k->state != TASK_INTERRUPTIBLE);
+	/* Must have done schedule() in kthread() before we set_task_cpu */
+	wait_task_inactive(k);
 
-	put_task_struct(p);
+	set_task_vsched(k, vcpu_vsched(cpu));
+	set_task_vcpu(k, cpu);
+	k->cpus_allowed = cpumask_of_cpu(cpu->id);
+	k->flags |= PF_THREAD_BOUND;
 }
 
-/* release_task() removes task from tasklist, so we won't find dead tasks. */
-static void migrate_dead_tasks(unsigned int dead_cpu)
+static void migration_thread_stop(struct rq *rq)
 {
-	struct rq *rq = cpu_rq(dead_cpu);
-	unsigned int arr, i;
+	struct task_struct *thread;
 
-	for (arr = 0; arr < 2; arr++) {
-		for (i = 0; i < MAX_PRIO; i++) {
-			struct list_head *list = &rq->arrays[arr].queue[i];
+	thread = rq->migration_thread;
+	if (thread == NULL)
+		return;
 
-			while (!list_empty(list))
-				migrate_dead(dead_cpu, list_entry(list->next,
-					     struct task_struct, run_list));
-		}
-	}
+	/*
+	 * Wait until migration thread has really started, i.e.
+	 * migration_thread() function has been called. It's important,
+	 * because migration thread can be still sleeping after creation, but
+	 * it's vcpu is already marked as online, and tasks can migrate to this
+	 * cpu. If we kill non-started migration thread now, migration_thread()
+	 * function will not be called at all (see how kthread() works).
+	 * And if migration_thread() is not called, there is no way to move 
+	 * tasks away from thread's vcpu. So, rq->nr_running will be != 0 even
+	 * after migration thread is dead.
+	 */
+	while (rq->migration_thread_init)
+		yield();
+
+	get_task_struct(thread);
+	if (kthread_stop(thread) == -EINTR)
+		/*
+		 * Somebody else has called kthread_stop() without 
+		 * waiting for migration thread init has complete.
+		 */
+		BUG_ON(1);
+
+	/* We MUST ensure, that the do_exit of the migration thread is
+	 * completed and it will never scheduled again before vsched_destroy.
+	 * The task with flag PF_DEAD if unscheduled will never receive
+	 * CPU again. */
+	while (!(thread->flags & PF_DEAD) || task_running(rq, thread))
+		yield();
+	put_task_struct(thread);
+
+	rq->migration_thread = NULL;
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 /*
  * migration_call - callback that gets triggered when a CPU is added.
  * Here we can start up the necessary migration thread for the new CPU.
  */
-static int __cpuinit
-migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
+static int vmigration_call(struct notifier_block *nfb, unsigned long action,
+			  void *hcpu)
 {
 	struct task_struct *p;
-	int cpu = (long)hcpu;
+	vcpu_t cpu = (vcpu_t)hcpu;
 	unsigned long flags;
 	struct rq *rq;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-		p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
+		p = kthread_create(migration_thread, hcpu, "migration/%d/%d", 
+			vsched_id(vcpu_vsched(cpu)), cpu->id);
 		if (IS_ERR(p))
 			return NOTIFY_BAD;
 		p->flags |= PF_NOFREEZE;
-		kthread_bind(p, cpu);
-		/* Must be high prio: stop_machine expects to yield to it. */
+
+		migration_thread_bind(p, cpu);
 		rq = task_rq_lock(p, &flags);
+		/* Must be high prio: stop_machine expects to yield to it. */
 		__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
 		task_rq_unlock(rq, &flags);
-		cpu_rq(cpu)->migration_thread = p;
+		vcpu_rq(cpu)->migration_thread = p;
+		vcpu_rq(cpu)->migration_thread_init = 1;
+		cpu_set(cpu->id, vsched_vcpu_online_map(vcpu_vsched(cpu)));
 		break;
 
 	case CPU_ONLINE:
 		/* Strictly unneccessary, as first user will wake it. */
-		wake_up_process(cpu_rq(cpu)->migration_thread);
+		wake_up_process(vcpu_rq(cpu)->migration_thread);
 		break;
 
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_SCHED_VCPU)
+#error "FIXME: CPU down code doesn't work yet with VCPUs"
+#endif
 	case CPU_UP_CANCELED:
-		if (!cpu_rq(cpu)->migration_thread)
+		if (!vcpu_rq(cpu)->migration_thread)
 			break;
 		/* Unbind it from offline cpu so it can run.  Fall thru. */
-		kthread_bind(cpu_rq(cpu)->migration_thread,
-			     any_online_cpu(cpu_online_map));
-		kthread_stop(cpu_rq(cpu)->migration_thread);
-		cpu_rq(cpu)->migration_thread = NULL;
+		migration_thread_bind(vcpu_rq(cpu)->migration_thread, this_vcpu());
+		migration_thread_stop(vcpu_rq(cpu));
 		break;
 
 	case CPU_DEAD:
-		migrate_live_tasks(cpu);
-		rq = cpu_rq(cpu);
-		kthread_stop(rq->migration_thread);
-		rq->migration_thread = NULL;
+		rq = vcpu_rq(cpu);
+		migration_thread_stop(rq);
+
+		spin_lock_irqsave(&rq->lock, flags);
+		migrate_nr_uninterruptible(rq);
+		spin_unlock_irqrestore(&rq->lock, flags);
+#ifdef CONFIG_HOTPLUG_CPU
 		/* Idle task back to normal (off runqueue, low prio) */
 		rq = task_rq_lock(rq->idle, &flags);
 		deactivate_task(rq->idle, rq);
 		rq->idle->static_prio = MAX_PRIO;
 		__setscheduler(rq->idle, SCHED_NORMAL, 0);
-		migrate_dead_tasks(cpu);
 		task_rq_unlock(rq, &flags);
-		migrate_nr_uninterruptible(rq);
 		BUG_ON(rq->nr_running != 0);
+#endif
 
 		/* No need to migrate the tasks: it was best-effort if
 		 * they didn't do lock_cpu_hotplug().  Just wake up
@@ -5549,15 +7076,25 @@ migration_call(struct notifier_block *nf
 		}
 		spin_unlock_irq(&rq->lock);
 		break;
-#endif
 	}
 	return NOTIFY_OK;
 }
 
+static int migration_call(struct notifier_block *nfb, unsigned long action,
+			  void *hcpu)
+{
+#ifdef CONFIG_SCHED_VCPU
+	if (action == CPU_UP_PREPARE)
+		init_boot_vcpus((long)hcpu);
+#endif
+	/* we need to translate pcpu to vcpu */
+	return vmigration_call(nfb, action, vsched_default_vcpu((long)hcpu));
+}
+
 /* Register at highest priority so that task migration (migrate_all_tasks)
  * happens before everything else.
  */
-static struct notifier_block __cpuinitdata migration_notifier = {
+static struct notifier_block migration_notifier = {
 	.notifier_call = migration_call,
 	.priority = 10
 };
@@ -5573,7 +7110,6 @@ int __init migration_init(void)
 
 	return 0;
 }
-#endif
 
 #ifdef CONFIG_SMP
 #undef SCHED_DOMAIN_DEBUG
@@ -5601,7 +7137,7 @@ static void sched_domain_debug(struct sc
 		printk(KERN_DEBUG);
 		for (i = 0; i < level + 1; i++)
 			printk(" ");
-		printk("domain %d: ", level);
+		printk("domain %d, flags %x: ", level, sd->flags);
 
 		if (!(sd->flags & SD_LOAD_BALANCE)) {
 			printk("does not load-balance\n");
@@ -5726,7 +7262,7 @@ sd_parent_degenerate(struct sched_domain
  */
 static void cpu_attach_domain(struct sched_domain *sd, int cpu)
 {
-	struct rq *rq = cpu_rq(cpu);
+	struct rq *rq = vcpu_rq(vsched_default_vcpu(cpu));
 	struct sched_domain *tmp;
 
 	/* Remove the sched domains which do not contribute to scheduling. */
@@ -5743,6 +7279,7 @@ static void cpu_attach_domain(struct sch
 
 	sched_domain_debug(sd, cpu);
 
+	rcu_assign_pointer(pcpu(cpu)->sd, sd);
 	rcu_assign_pointer(rq->sd, sd);
 }
 
@@ -5921,7 +7458,7 @@ static unsigned long domain_distance(int
 	unsigned long distance = 0;
 	struct sched_domain *sd;
 
-	for_each_domain(cpu1, sd) {
+	for_each_pdomain(pcpu(cpu1)->sd, sd) {
 		WARN_ON(!cpu_isset(cpu1, sd->span));
 		if (cpu_isset(cpu2, sd->span))
 			return distance;
@@ -6243,7 +7780,7 @@ static void calibrate_migration_costs(co
 	 */
 	for_each_cpu_mask(cpu, *cpu_map) {
 		distance = 0;
-		for_each_domain(cpu, sd) {
+		for_each_pdomain(pcpu(cpu)->sd, sd) {
 			sd->cache_hot_time = migration_cost[distance];
 			distance++;
 		}
@@ -6919,6 +8456,9 @@ static int arch_init_sched_domains(const
 
 	err = build_sched_domains(&cpu_default_map);
 
+#ifdef CONFIG_SCHED_VCPU
+	nr_online_pcpus = num_online_cpus();
+#endif
 	return err;
 }
 
@@ -7123,45 +8663,495 @@ int in_sched_functions(unsigned long add
 		&& addr < (unsigned long)__sched_text_end);
 }
 
-void __init sched_init(void)
+static void init_rq(struct rq *rq, int cpu)
 {
-	int i, j, k;
-
-	for_each_possible_cpu(i) {
-		struct prio_array *array;
-		struct rq *rq;
+	int j, k;
+	struct prio_array *array;
 
-		rq = cpu_rq(i);
-		spin_lock_init(&rq->lock);
-		lockdep_set_class(&rq->lock, &rq->rq_lock_key);
-		rq->nr_running = 0;
-		rq->active = rq->arrays;
-		rq->expired = rq->arrays + 1;
-		rq->best_expired_prio = MAX_PRIO;
+	kstat_glob.sched_lat.cur = static_percpu_ptr(&kstat_lat_pcpu_stats,
+			kstat_lat_pcpu_stats_data);
+	for ( j = 0 ; j < KSTAT_ALLOCSTAT_NR ; j++)
+		kstat_glob.alloc_lat[j].cur = static_percpu_ptr(
+				&kstat_alat_pcpu_stats[j],
+				kstat_alat_pcpu_stats_data[j]);
+	spin_lock_init(&rq->lock);
+	rq->nr_running = 0;
+	rq->nr_uninterruptible = 0;
+#ifndef CONFIG_SCHED_VCPU
+	lockdep_set_class(&rq->lock, &rq->rq_lock_key);
+#endif
+	rq->active = rq->arrays;
+	rq->expired = rq->arrays + 1;
+	rq->best_expired_prio = MAX_PRIO;
 
 #ifdef CONFIG_SMP
-		rq->sd = NULL;
-		for (j = 1; j < 3; j++)
-			rq->cpu_load[j] = 0;
-		rq->active_balance = 0;
-		rq->push_cpu = 0;
-		rq->cpu = i;
-		rq->migration_thread = NULL;
-		INIT_LIST_HEAD(&rq->migration_queue);
-#endif
-		atomic_set(&rq->nr_iowait, 0);
-
-		for (j = 0; j < 2; j++) {
-			array = rq->arrays + j;
-			for (k = 0; k < MAX_PRIO; k++) {
-				INIT_LIST_HEAD(array->queue + k);
-				__clear_bit(k, array->bitmap);
-			}
-			// delimiter for bitsearch
-			__set_bit(MAX_PRIO, array->bitmap);
+	rq->sd = NULL;
+	for (j = 0; j < 3; j++)
+		rq->cpu_load[j] = 0;
+	rq->active_balance = 0;
+#endif
+	rq->push_cpu = NULL;
+	rq->migration_thread = NULL;
+	INIT_LIST_HEAD(&rq->migration_queue);
+	atomic_set(&rq->nr_iowait, 0);
+
+	for (j = 0; j < 2; j++) {
+		array = rq->arrays + j;
+		for (k = 0; k < MAX_PRIO; k++) {
+			INIT_LIST_HEAD(array->queue + k);
+			__clear_bit(k, array->bitmap);
+		}
+		// delimiter for bitsearch
+		__set_bit(MAX_PRIO, array->bitmap);
+	}
+}
+
+static void init_vcpu(vcpu_t vcpu, int id)
+{
+	memset(vcpu, 0, sizeof(struct vcpu_struct));
+	vcpu->id = id;
+	vcpu_last_pcpu(vcpu) = id;
+	init_rq(vcpu_rq(vcpu), id);
+
+	/* ->curr can be dereferenced in try_to_wakeup(), so let it be idle */
+	vcpu_rq(vcpu)->curr = &init_task;
+}
+
+#if defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED)
+/* both rq and vsched lock should be taken */
+static void __install_vcpu(struct vcpu_scheduler *vsched, vcpu_t vcpu)
+{
+	int id;
+
+	id = vcpu->id;
+	vcpu->vsched = vsched;
+	vsched->vcpu[id] = vcpu;
+	vcpu_last_pcpu(vcpu) = id;
+	wmb();
+	/* FIXME: probably locking should be reworked, e.g.
+	   we don't have corresponding rmb(), so we need to update mask
+	   only after quiscent state */
+	/* init_boot_vcpu() should be remade if RCU is used here */
+	list_add(&vcpu->list, &vsched->idle_list);
+	vsched->num_online_vcpus++;
+}
+
+static int install_vcpu(vcpu_t vcpu, struct vcpu_scheduler *vsched)
+{
+	struct rq *rq;
+	unsigned long flags;
+	int res = 0;
+
+	rq = vcpu_rq(vcpu);
+	spin_lock_irqsave(&rq->lock, flags);
+	spin_lock(&fairsched_lock);
+
+	if (vsched->vcpu[vcpu->id] != NULL)
+		res = -EBUSY;
+	else
+		__install_vcpu(vsched, vcpu);
+
+	spin_unlock(&fairsched_lock);
+	spin_unlock_irqrestore(&rq->lock, flags);
+	return res;
+}
+
+static int __add_vcpu(struct vcpu_scheduler *vsched, int id)
+{
+	vcpu_t vcpu;
+	int res;
+
+	res = -ENOMEM;
+	vcpu = kmalloc(sizeof(struct vcpu_struct), GFP_KERNEL);
+	if (vcpu == NULL)
+		goto out;
+
+	init_vcpu(vcpu, id);
+	res = install_vcpu(vcpu, vsched);
+	if (res < 0)
+		goto out_free;
+	return 0;
+
+out_free:
+	kfree(vcpu);
+out:
+	return res;
+}
+
+void vsched_init(struct vcpu_scheduler *vsched, int id)
+{
+	unsigned long flags;
+
+	memset(vsched, 0, sizeof(*vsched));
+
+	INIT_LIST_HEAD(&vsched->idle_list);
+	INIT_LIST_HEAD(&vsched->active_list);
+	INIT_LIST_HEAD(&vsched->running_list);
+	vsched->num_online_vcpus = 0;
+	vsched->vcpu_online_map = CPU_MASK_NONE;
+	vsched->vcpu_running_map = CPU_MASK_NONE;
+	vsched->pcpu_running_map = CPU_MASK_NONE;
+	vsched->pcpu_allowed_map = CPU_MASK_ALL;
+	vsched->id = id;
+	atomic_set(&vsched->nr_unint_fixup, 0);
+
+	spin_lock_irqsave(&vsched_list_lock, flags);
+	list_add(&vsched->list, &vsched_list);
+	spin_unlock_irqrestore(&vsched_list_lock, flags);
+}
+
+#ifdef CONFIG_FAIRSCHED
+int scale_vcpu_frequency = 1;
+EXPORT_SYMBOL(scale_vcpu_frequency);
+
+unsigned long ve_scale_khz(unsigned long khz)
+{
+	struct fairsched_node *node;
+	int cpus;
+	unsigned long rate;
+
+	if (!scale_vcpu_frequency)
+		return khz;
+
+	rate = fairsched_nr_cpus << FSCHRATE_SHIFT;
+
+	/*
+	 * Ideally fairsched node should be taken from the current ve_struct.
+	 * However, to simplify the code and locking, it is taken from current
+	 * (currently fairsched_node can be changed only for a sleeping task).
+	 * That means that VE0 processes moved to some special node will get
+	 * fake CPU speed, but that shouldn't be a big problem.
+	 */
+	preempt_disable();
+	node = current->vsched->node;
+	cpus = node->vcpus;
+	if (node->rate_limited)
+		rate = node->rate;
+	preempt_enable();
+
+	return min((unsigned long long)khz,
+		((unsigned long long)khz * (rate / cpus)) >> FSCHRATE_SHIFT);
+}
+
+/* No locks supposed to be held */
+static void vsched_del_vcpu(vcpu_t vcpu, int empty);
+static int vsched_add_vcpu(struct vcpu_scheduler *vsched)
+{
+	int res, err;
+	vcpu_t vcpu;
+	int id;
+	static DECLARE_MUTEX(id_mutex);
+
+	down(&id_mutex);
+	id = find_first_zero_bit(vsched->vcpu_online_map.bits, NR_CPUS);
+	if (id >= NR_CPUS) {
+		err = -EBUSY;
+		goto out_up;
+	}
+
+	err = __add_vcpu(vsched, id);
+	if (err < 0)
+		goto out_up;
+	memset(VE_CPU_STATS(vsched->node->owner_env, id), 0,
+			sizeof(struct ve_cpu_stats));
+	/* Kick idle time collecting logic */
+	ve_strt_idle(vsched->node->owner_env, id, get_cycles());
+
+	vcpu = vsched_vcpu(vsched, id);
+	err = -ENOMEM;
+
+	res = vmigration_call(&migration_notifier, CPU_UP_PREPARE, vcpu);
+	if (res != NOTIFY_OK)
+		goto out_del_up;
+
+	res = vmigration_call(&migration_notifier, CPU_ONLINE, vcpu);
+	if (res != NOTIFY_OK)
+		goto out_cancel_del_up;
+
+	err = 0;
+
+out_up:
+	up(&id_mutex);
+	return err;
+
+out_cancel_del_up:
+	vmigration_call(&migration_notifier, CPU_UP_CANCELED, vcpu);
+out_del_up:
+	vsched_del_vcpu(vcpu, 0);
+	goto out_up;
+}
+
+static void vsched_del_vcpu(vcpu_t vcpu, int empty)
+{
+	struct vcpu_scheduler *vsched;
+	struct rq *rq;
+
+	vsched = vcpu_vsched(vcpu);
+	rq = vcpu_rq(vcpu);
+
+	spin_lock_irq(&rq->lock);
+	spin_lock(&fairsched_lock);
+	cpu_clear(vcpu->id, vsched->vcpu_online_map);
+	vsched->num_online_vcpus--;
+	spin_unlock(&fairsched_lock);
+	spin_unlock_irq(&rq->lock);
+
+	/* no need to syncronize, if no tasks at all */
+	if (!empty)
+		synchronize_sched();
+
+	/*
+        * FIXME: ideas for VCPU hotplug:
+        *
+        * - push_cpu should be checked/cleanuped
+        * - serialization
+        */
+
+	/*
+	 * all tasks should migrate from this VCPU somewhere,
+	 * also, since this moment VCPU is offline, so migration_thread
+	 * won't accept any new tasks...
+	 */
+	vmigration_call(&migration_notifier, CPU_DEAD, vcpu);
+	BUG_ON(rq->nr_running != 0);
+
+	/* vcpu_put() is called after deactivate_task. This loop makes sure
+	 * that vcpu_put() was finished and vcpu can be freed */
+	while ((volatile int)vcpu->running)
+		yield();
+
+	BUG_ON(vcpu->active);	/* should be in idle_list */
+	BUG_ON(vcpu_rq(vcpu)->prev_mm != NULL);
+	if (vcpu_rq(vcpu)->nr_uninterruptible)
+		printk(KERN_ERR "nr_uninterruptible=%u on vcpu death\n",
+			vcpu_rq(vcpu)->nr_uninterruptible);
+
+	spin_lock_irq(&fairsched_lock);
+	list_del(&vcpu->list);
+	vsched_vcpu(vsched, vcpu->id) = NULL;
+	spin_unlock_irq(&fairsched_lock);
+
+	kfree(vcpu);
+}
+
+int vsched_set_vcpus(struct vcpu_scheduler *vsched, unsigned int vcpus)
+{
+	int i, ret = 0;
+	vcpu_t vcpu;
+
+	if (vsched->num_online_vcpus < vcpus) {
+		/* need to add more VCPUs */
+		for (i = vcpus - vsched->num_online_vcpus; i > 0; i--) {
+			ret = vsched_add_vcpu(vsched);
+			if (ret < 0)
+				break;
 		}
+	} else if (vsched->num_online_vcpus > vcpus) {
+		/* remove some VCPUs */
+		while (vcpus != vsched->num_online_vcpus) {
+			vcpu = vsched_vcpu(vsched, vsched->num_online_vcpus - 1);
+			BUG_ON(!vcpu);
+			vsched_del_vcpu(vcpu, 0);
+		}
+	}
+#ifdef CONFIG_FAIRSCHED
+	vsched->node->vcpus = vsched->num_online_vcpus;
+#endif
+	return ret;
+}
+
+int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched)
+{
+	vcpu_t dest_vcpu;
+	int id;
+
+	id = first_cpu(vsched->vcpu_online_map);
+	if (id >= NR_CPUS)
+		goto err;
+
+	dest_vcpu = vsched_vcpu(vsched, id);
+	set_cpus_allowed(p, CPU_MASK_ALL);
+	sched_migrate_task(p, dest_vcpu);
+
+	if (task_vsched_id(p) != vsched_id(vsched)) {
+		/* race: probably someone changed cpus_allowed? */
+		printk("vsched_mvpr: failed to move task\n");
+		goto err;
 	}
 
+	return 0;
+
+err:
+	return -EINVAL;
+}
+
+void vsched_fairsched_link(struct vcpu_scheduler *vsched,
+		struct fairsched_node *node)
+{
+	vsched->node = node;
+	node->vsched = vsched;
+}
+
+void vsched_fairsched_unlink(struct vcpu_scheduler *vsched,
+		struct fairsched_node *node)
+{
+	vsched->node = NULL;
+	node->vsched = NULL;
+}
+
+int vsched_create(int id, struct fairsched_node *node)
+{
+	struct vcpu_scheduler *vsched;
+	int res, cpus;
+
+	vsched = kmalloc(sizeof(*vsched), GFP_KERNEL);
+	if (vsched == NULL)
+		return -ENOMEM;
+
+	vsched_init(vsched, node->id);
+	vsched_fairsched_link(vsched, node);
+
+	cpus = node->vcpus ? : num_online_cpus();
+	res = vsched_set_vcpus(vsched, cpus);
+	if (res < 0)
+		goto err_add;
+
+	return 0;
+
+err_add:
+	vsched_destroy(vsched);
+	return res;
+}
+
+int vsched_taskcount(struct vcpu_scheduler *vsched)
+{
+	struct task_struct *g, *t;
+	int count = 0;
+
+	if (vsched == NULL)
+		return 0;
+
+	read_lock(&tasklist_lock);
+	do_each_thread_all(g, t) {
+		/* task->vcpu->rq can't point to stale memory, since
+		   both this code and fairsched_set_vcpus() are called under mutex */
+		if (t != task_rq(t)->migration_thread) {
+			if (vsched == t->vsched)
+				count++;
+		}
+	} while_each_thread_all(g, t);
+	read_unlock(&tasklist_lock);
+
+	return count;
+}
+
+int vsched_destroy(struct vcpu_scheduler *vsched)
+{
+	if (vsched == NULL)
+		return 0;
+
+	vsched_set_vcpus(vsched, 0);
+
+	spin_lock_irq(&fairsched_lock);
+	if (vsched->num_online_vcpus ||
+	    !list_empty(&vsched->running_list) ||
+	    !list_empty(&vsched->active_list) ||
+	    !list_empty(&vsched->idle_list))
+		goto err_busy;
+
+	vsched_fairsched_unlink(vsched, vsched->node);
+	spin_unlock_irq(&fairsched_lock);
+
+	spin_lock_irq(&vsched_list_lock);
+	list_del(&vsched->list);
+	spin_unlock_irq(&vsched_list_lock);
+
+	kfree(vsched);
+	return 0;
+
+err_busy:
+	oops_in_progress = 1;
+	printk(KERN_ERR "BUG in vsched_destroy, id %d: n%d r%d a%d i%d\n",
+			vsched->id,
+			vsched->num_online_vcpus,
+			!list_empty(&vsched->running_list),
+			!list_empty(&vsched->active_list),
+			!list_empty(&vsched->idle_list));
+	spin_unlock_irq(&fairsched_lock);
+	oops_in_progress = 0;
+	return -EBUSY;
+	
+}
+#endif /* defined(CONFIG_FAIRSCHED) */
+
+static void init_boot_vcpu(void)
+{
+	int res;
+
+	/*
+	 * We setup boot_vcpu and it's runqueue until init_idle() happens
+	 * on cpu0. This is required since timer interrupts can happen
+	 * between sched_init() and init_idle().
+	 */
+	init_vcpu(&boot_idle_vcpu, raw_smp_processor_id());
+	vcpu_rq(&boot_idle_vcpu)->curr = current;
+	res = install_vcpu(&boot_idle_vcpu, &idle_vsched);
+	if (res < 0)
+		panic("Can't install boot idle vcpu");
+
+	init_vcpu(&boot_vcpu, raw_smp_processor_id());
+	vcpu_rq(&boot_vcpu)->curr = current;
+	res = install_vcpu(&boot_vcpu, &default_vsched);
+	if (res < 0)
+		panic("Can't install boot vcpu");
+
+	cpu_set(boot_vcpu.id, default_vsched.vcpu_online_map);
+
+	this_pcpu()->vcpu = &boot_idle_vcpu;
+	this_pcpu()->vsched = &idle_vsched;
+}
+#endif /* defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED) */
+
+static void init_pcpu(int id)
+{
+	struct pcpu_info *pcpu;
+
+	pcpu = pcpu(id);
+	pcpu->id = id;
+#ifdef CONFIG_SMP
+	pcpu->sd = NULL;
+#endif
+
+#ifndef CONFIG_SCHED_VCPU
+	init_vcpu(vcpu(id), id);
+#endif
+}
+
+static void init_pcpus(void)
+{
+	int i;
+	for (i = 0; i < NR_CPUS; i++)
+		init_pcpu(i);
+}
+
+void __init sched_init(void)
+{
+	init_pcpus();
+#if defined(CONFIG_SCHED_VCPU)
+	vsched_init(&idle_vsched, -1);
+	vsched_init(&default_vsched, 0);
+#if defined(CONFIG_FAIRSCHED)
+	fairsched_init_early();
+	vsched_fairsched_link(&idle_vsched, &fairsched_idle_node);
+	vsched_fairsched_link(&default_vsched, &fairsched_init_node);
+#endif
+	init_boot_vcpu();
+#else
+#if defined(CONFIG_FAIRSCHED)
+	fairsched_init_early();
+#endif
+#endif
+
 	set_load_weight(&init_task);
 
 #ifdef CONFIG_RT_MUTEXES
@@ -7186,6 +9176,155 @@ void __init sched_init(void)
 	init_idle(current, smp_processor_id());
 }
 
+#ifdef CONFIG_SCHED_VCPU
+static void show_vcpu_list(struct vcpu_scheduler *vsched, struct list_head *lh)
+{
+	cpumask_t m;
+	vcpu_t vcpu;
+	int i;
+
+	cpus_clear(m);
+	list_for_each_entry(vcpu, lh, list)
+		cpu_set(vcpu->id, m);
+
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_isset(i, m))
+			printk("%d ", i);
+}
+
+#define PRINT(s, sz, fmt...)				\
+	do {						\
+		int __out;				\
+		__out = scnprintf(*s, *sz, fmt);	\
+		*s += __out;				\
+		*sz -= __out;				\
+	} while(0)
+
+static void show_rq_array(struct prio_array *array, char *header, char **s, int *sz)
+{
+	struct list_head *list;
+	struct task_struct *p;
+	int k, h;
+
+	h = 0;
+	for (k = 0; k < MAX_PRIO; k++) {
+		list = array->queue + k;
+		if (list_empty(list))
+			continue;
+
+		if (!h) {
+			PRINT(s, sz, header);
+			h = 1;
+		}
+
+		PRINT(s, sz, " prio %d (", k);
+		list_for_each_entry(p, list, run_list)
+			PRINT(s, sz, "%s[%d] ", p->comm, p->pid);
+		PRINT(s, sz, ")");
+	}
+	if (h)
+		PRINT(s, sz, "\n");
+}
+
+static void show_vcpu(vcpu_t vcpu)
+{
+	struct rq *rq;
+	char buf[1024], *s;
+	unsigned long flags;
+	int sz;
+	unsigned long nr_running, cpu_load[3];
+	unsigned long long nr_switches;
+	struct sched_domain *sd;
+	struct task_struct *curr;
+
+	if (vcpu == NULL)
+		return;
+
+	printk("  vcpu %d: last_pcpu %d, state %s%s\n",
+			vcpu->id, vcpu->last_pcpu,
+			vcpu->active ? "A" : "",
+			vcpu->running ? "R" : "");
+	s = buf;
+	sz = sizeof(buf) - 1;
+
+	rq = vcpu_rq(vcpu);
+	spin_lock_irqsave(&rq->lock, flags);
+	nr_running = rq->nr_running;
+#ifdef CONFIG_SMP
+	cpu_load[0] = rq->cpu_load[0];
+	cpu_load[1] = rq->cpu_load[1];
+	cpu_load[2] = rq->cpu_load[2];
+	sd = rq->sd;
+#else
+	cpu_load[0] = cpu_load[1] = cpu_load[2] = 0;
+	sd = NULL;
+#endif
+	nr_switches = rq->nr_switches;
+	curr = rq->curr;
+
+	show_rq_array(rq->active, "      active:", &s, &sz);
+	show_rq_array(rq->expired, "      expired:", &s, &sz);
+	spin_unlock_irqrestore(&rq->lock, flags);
+	*s = 0;
+
+	printk("    rq: running %lu, load {%lu,%lu,%lu}, sw %Lu, sd %p, curr %p\n",
+		nr_running, cpu_load[0], cpu_load[1], cpu_load[2], nr_switches,
+		sd, curr);
+
+	printk("%s", buf);
+}
+
+static inline void fairsched_show_node(struct vcpu_scheduler *vsched)
+{
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node;
+
+	node = vsched->node;
+	printk("fsnode: ready %d run %d cpu %d vsched %p, pcpu %d\n",
+			node->nr_ready, node->nr_runnable, node->nr_pcpu,
+			node->vsched, smp_processor_id());
+#endif
+}
+
+static void __show_vsched(struct vcpu_scheduler *vsched)
+{
+	char mask[NR_CPUS + 1];
+	int i;
+
+	spin_lock(&fairsched_lock);
+	printk("vsched id=%d\n", vsched_id(vsched));
+	fairsched_show_node(vsched);
+
+	printk("  idle cpus ");
+	show_vcpu_list(vsched, &vsched->idle_list);
+	printk("; active cpus ");
+	show_vcpu_list(vsched, &vsched->active_list);
+	printk("; running cpus ");
+	show_vcpu_list(vsched, &vsched->running_list);
+	printk("\n");
+
+	cpumask_scnprintf(mask, NR_CPUS, vsched->vcpu_online_map);
+	printk("  num_online_cpus=%d, mask=%s (w=%d)\n",
+			vsched->num_online_vcpus, mask,
+			cpus_weight(vsched->vcpu_online_map));
+	spin_unlock(&fairsched_lock);
+
+	for (i = 0; i < NR_CPUS; i++)
+		show_vcpu(vsched->vcpu[i]);
+}
+
+void show_vsched(void)
+{
+	struct vcpu_scheduler *vsched;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vsched_list_lock, flags);
+	list_for_each_entry (vsched, &vsched_list, list)
+		__show_vsched(vsched);
+	spin_unlock_irqrestore(&vsched_list_lock, flags);
+}
+#endif /* CONFIG_SCHED_VCPU */
+
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 void __might_sleep(char *file, int line)
 {
@@ -7217,7 +9356,7 @@ void normalize_rt_tasks(void)
 	struct rq *rq;
 
 	read_lock_irq(&tasklist_lock);
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (!rt_task(p))
 			continue;
 
@@ -7226,7 +9365,7 @@ void normalize_rt_tasks(void)
 
 		array = p->array;
 		if (array)
-			deactivate_task(p, task_rq(p));
+			__deactivate_task(p, task_rq(p));
 		__setscheduler(p, SCHED_NORMAL, 0);
 		if (array) {
 			__activate_task(p, task_rq(p));
@@ -7260,7 +9399,7 @@ void normalize_rt_tasks(void)
  */
 struct task_struct *curr_task(int cpu)
 {
-	return cpu_curr(cpu);
+	return vcpu_rq(pcpu(cpu)->vcpu)->curr;
 }
 
 /**
@@ -7280,7 +9419,7 @@ struct task_struct *curr_task(int cpu)
  */
 void set_curr_task(int cpu, struct task_struct *p)
 {
-	cpu_curr(cpu) = p;
+	vcpu_rq(pcpu(cpu)->vcpu)->curr = p;
 }
 
 #endif
diff -upr kernel-2.6.18-417.el5.orig/kernel/signal.c kernel-2.6.18-417.el5-028stab121/kernel/signal.c
--- kernel-2.6.18-417.el5.orig/kernel/signal.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/signal.c	2017-01-13 08:40:40.000000000 -0500
@@ -22,11 +22,14 @@
 #include <linux/syscalls.h>
 #include <linux/tracehook.h>
 #include <linux/signal.h>
+#include <linux/signalfd.h>
+#include <linux/kmem_cache.h>
 #include <linux/capability.h>
 #include <asm/param.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/siginfo.h>
+#include <ub/ub_misc.h>
 #include "audit.h"	/* audit_signal_info() */
 #include <trace/signal.h>
 
@@ -34,8 +37,25 @@
  * SLAB caches for signal bits.
  */
 
-static kmem_cache_t *sigqueue_cachep;
+kmem_cache_t *sigqueue_cachep;
+EXPORT_SYMBOL_GPL(sigqueue_cachep);
 
+static int sig_ve_ignored(int sig, struct siginfo *info, struct task_struct *t)
+{
+	struct ve_struct *ve;
+
+	/* always allow signals from the kernel */
+	if (info == SEND_SIG_FORCED ||
+		       (!is_si_special(info) && SI_FROMKERNEL(info)))
+		return 0;
+
+	ve = current->ve_task_info.owner_env;
+	if (ve->init_entry != t)
+		return 0;
+	if (ve_is_super(get_exec_env()))
+		return 0;
+	return !sig_user_defined(t, sig) || sig_kernel_only(sig);
+}
 
 static int sig_ignored(struct task_struct *t, int sig)
 {
@@ -91,7 +111,7 @@ static inline int has_pending_signals(si
 
 #define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
 
-static int recalc_sigpending_tsk(struct task_struct *t)
+int recalc_sigpending_tsk(struct task_struct *t)
 {
 	if (t->signal->group_stop_count > 0 ||
 	    (freezing(t)) ||
@@ -108,6 +128,7 @@ static int recalc_sigpending_tsk(struct 
 	 */
 	return 0;
 }
+EXPORT_SYMBOL_GPL(recalc_sigpending_tsk);
 
 /*
  * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
@@ -126,13 +147,13 @@ void recalc_sigpending(void)
 
 }
 
-int fork_recalc_sigpending(void)
+int fork_recalc_sigpending(int pid0)
 {
 	struct task_struct *tsk = current;
 	int pending;
 
 	recalc_sigpending();
-	if (likely(!signal_pending(tsk)))
+	if (likely(!signal_pending(tsk) || pid0))
 		return 0;
 
 	pending = 1;
@@ -153,7 +174,7 @@ int fork_recalc_sigpending(void)
 
 /* Given the mask, find the first available signal that should be serviced. */
 
-static int
+int
 next_signal(struct sigpending *pending, sigset_t *mask)
 {
 	unsigned long i, *s, *m, x;
@@ -202,8 +223,13 @@ static struct sigqueue *__sigqueue_alloc
 	atomic_inc(&user->sigpending);
 	if (override_rlimit ||
 	    atomic_read(&user->sigpending) <=
-			t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
+			t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) {
 		q = kmem_cache_alloc(sigqueue_cachep, flags);
+		if (q && ub_siginfo_charge(q, get_task_ub(t))) {
+			kmem_cache_free(sigqueue_cachep, q);
+			q = NULL;
+		}
+	}
 	if (unlikely(q == NULL)) {
 		atomic_dec(&user->sigpending);
 	} else {
@@ -220,6 +246,7 @@ static void __sigqueue_free(struct sigqu
 		return;
 	atomic_dec(&q->user->sigpending);
 	free_uid(q->user);
+	ub_siginfo_uncharge(q);
 	kmem_cache_free(sigqueue_cachep, q);
 }
 
@@ -354,7 +381,16 @@ static int __dequeue_signal(struct sigpe
 {
 	int sig = 0;
 
-	sig = next_signal(pending, mask);
+	/* SIGKILL must have priority, otherwise it is quite easy
+	 * to create an unkillable process, sending sig < SIGKILL
+	 * to self */
+	if (unlikely(sigismember(&pending->signal, SIGKILL))) {
+		if (!sigismember(mask, SIGKILL))
+			sig = SIGKILL;
+	}
+
+	if (likely(!sig))
+		sig = next_signal(pending, mask);
 	if (sig) {
 		if (current->notifier) {
 			if (sigismember(current->notifier_mask, sig)) {
@@ -448,6 +484,7 @@ void signal_wake_up(struct task_struct *
 	if (!wake_up_state(t, mask))
 		kick_process(t);
 }
+EXPORT_SYMBOL_GPL(signal_wake_up);
 
 /*
  * Remove signals in mask from the pending set and queue.
@@ -667,7 +704,7 @@ static int send_signal(int sig, struct s
 			q->info.si_signo = sig;
 			q->info.si_errno = 0;
 			q->info.si_code = SI_USER;
-			q->info.si_pid = current->pid;
+			q->info.si_pid = virt_pid(current);
 			q->info.si_uid = current->uid;
 			break;
 		case (unsigned long) SEND_SIG_PRIV:
@@ -700,6 +737,7 @@ static int send_signal(int sig, struct s
 	}
 
 out_set:
+	signalfd_notify(t, sig);
 	sigaddset(&signals->signal, sig);
 	return ret;
 }
@@ -1035,7 +1073,8 @@ int group_send_sig_info(int sig, struct 
 	if (!ret && sig) {
 		ret = -ESRCH;
 		if (lock_task_sighand(p, &flags)) {
-			ret = __group_send_sig_info(sig, info, p);
+			ret = sig_ve_ignored(sig, info, p) ? 0 :
+				__group_send_sig_info(sig, info, p);
 			unlock_task_sighand(p, &flags);
 		}
 	}
@@ -1056,13 +1095,18 @@ int __kill_pg_info(int sig, struct sigin
 	if (pgrp <= 0)
 		return -EINVAL;
 
+	/* Use __vpid_to_pid(). This function is used under write_lock
+	 * tasklist_lock. */
+	if (is_virtual_pid(pgrp))
+		pgrp = __vpid_to_pid(pgrp);
+
 	success = 0;
 	retval = -ESRCH;
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		int err = group_send_sig_info(sig, info, p);
 		success |= !err;
 		retval = err;
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return success ? 0 : retval;
 }
 
@@ -1090,7 +1134,7 @@ kill_proc_info(int sig, struct siginfo *
 		read_lock(&tasklist_lock);
 		acquired_tasklist_lock = 1;
 	}
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	error = -ESRCH;
 	if (p)
 		error = group_send_sig_info(sig, info, p);
@@ -1111,7 +1155,7 @@ int kill_proc_info_as_uid(int sig, struc
 		return ret;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p) {
 		ret = -ESRCH;
 		goto out_unlock;
@@ -1153,8 +1197,8 @@ static int kill_something_info(int sig, 
 		struct task_struct * p;
 
 		read_lock(&tasklist_lock);
-		for_each_process(p) {
-			if (p->pid > 1 && p->tgid != current->tgid) {
+		for_each_process_ve(p) {
+			if (virt_pid(p) > 1 && p->tgid != current->tgid) {
 				int err = group_send_sig_info(sig, info, p);
 				++count;
 				if (err != -EPERM)
@@ -1287,20 +1331,19 @@ struct sigqueue *sigqueue_alloc(void)
 void sigqueue_free(struct sigqueue *q)
 {
 	unsigned long flags;
+	spinlock_t *lock = &current->sighand->siglock;
+
 	BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
 	/*
 	 * If the signal is still pending remove it from the
-	 * pending queue.
+	 * pending queue. We must hold ->siglock while testing
+	 * q->list to serialize with collect_signal().
 	 */
-	if (unlikely(!list_empty(&q->list))) {
-		spinlock_t *lock = &current->sighand->siglock;
-		read_lock(&tasklist_lock);
-		spin_lock_irqsave(lock, flags);
-		if (!list_empty(&q->list))
-			list_del_init(&q->list);
-		spin_unlock_irqrestore(lock, flags);
-		read_unlock(&tasklist_lock);
-	}
+	spin_lock_irqsave(lock, flags);
+	if (!list_empty(&q->list))
+		list_del_init(&q->list);
+	spin_unlock_irqrestore(lock, flags);
+
 	q->flags &= ~SIGQUEUE_PREALLOC;
 	__sigqueue_free(q);
 }
@@ -1343,6 +1386,7 @@ int send_sigqueue(int sig, struct sigque
 		goto out;
 	}
 
+	signalfd_notify(p, sig);
 	list_add_tail(&q->list, &p->pending.list);
 	sigaddset(&p->pending.signal, sig);
 	if (!sigismember(&p->blocked, sig))
@@ -1428,9 +1472,17 @@ void do_notify_parent(struct task_struct
 
 	BUG_ON(tsk->group_leader != tsk || !thread_group_empty(tsk));
 
+#ifdef CONFIG_VE
+	/* Allow to send only SIGCHLD from VE */
+	if (sig != SIGCHLD &&
+			tsk->ve_task_info.owner_env != 
+			tsk->parent->ve_task_info.owner_env)
+		sig = SIGCHLD;
+#endif
+
 	info.si_signo = sig;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	info.si_pid = get_task_pid_ve(tsk, tsk->parent->ve_task_info.owner_env);
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1486,9 +1538,10 @@ void do_notify_parent_cldstop(struct tas
 	struct task_struct *parent;
 	struct sighand_struct *sighand;
 
+	parent = tsk->group_leader->parent;
 	info.si_signo = SIGCHLD;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(parent)->owner_env);
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1545,8 +1598,10 @@ finish_stop(int stop_count)
 		read_unlock(&tasklist_lock);
 	}
 
+	set_stop_state(current);
 	schedule();
 	utrace_finish_stop();
+	clear_stop_state(current);
 	/*
 	 * Now we don't run again until continued.
 	 */
@@ -1648,14 +1703,48 @@ static int handle_group_stop(void)
 	return 1;
 }
 
+atomic_t global_suspend = ATOMIC_INIT(0);
+
+/* Refrigerator is place where frozen processes are stored :-). */
+void refrigerator(void)
+{
+	/* Hmm, should we be allowed to suspend when there are realtime
+	   processes around? */
+	long save;
+	save = current->state;
+	current->state = TASK_UNINTERRUPTIBLE;
+	/* printk("="); */
+
+	spin_lock_irq(&current->sighand->siglock);
+	if (test_and_clear_thread_flag(TIF_FREEZE)) {
+		recalc_sigpending(); /* We sent fake signal, clean it up */
+		if (atomic_read(&global_suspend) ||
+		    atomic_read(&get_exec_env()->suspend)) {
+			current->flags |= PF_FROZEN;
+		} else {
+			current->state = save;
+		}
+	} else {
+		/* Freeze request could be canceled before we entered
+		 * refrigerator(). In this case we do nothing. */
+		current->state = save;
+	}
+	spin_unlock_irq(&current->sighand->siglock);
+
+	while (current->flags & PF_FROZEN) {
+		schedule();
+		current->state = TASK_UNINTERRUPTIBLE;
+	}
+	current->state = save;
+}
+EXPORT_SYMBOL(refrigerator);
+
 int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
 			  struct pt_regs *regs, void *cookie)
 {
 	sigset_t *mask = &current->blocked;
 	int signr = 0;
 
-	try_to_freeze();
-
 relock:
 	spin_lock_irq(&current->sighand->siglock);
 	for (;;) {
@@ -1922,6 +2011,8 @@ int copy_siginfo_to_user(siginfo_t __use
 	/*
 	 * If you change siginfo_t structure, please be sure
 	 * this code is fixed accordingly.
+	 * Please remember to update the signalfd_copyinfo() function
+	 * inside fs/signalfd.c too, in case siginfo_t changes.
 	 * It should never copy any pad contained in the structure
 	 * to avoid security leaks, but must copy the generic
 	 * 3 ints plus the relevant union member.
@@ -2057,7 +2148,7 @@ sys_kill(int pid, int sig)
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_USER;
-	info.si_pid = current->tgid;
+	info.si_pid = virt_tgid(current);
 	info.si_uid = current->uid;
 
 	return kill_something_info(sig, &info, pid);
@@ -2075,7 +2166,7 @@ static int do_tkill(int tgid, int pid, i
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_TKILL;
-	info.si_pid = current->tgid;
+	info.si_pid = virt_tgid(current);
 	info.si_uid = current->uid;
 
 	rcu_read_lock();
@@ -2083,8 +2174,8 @@ static int do_tkill(int tgid, int pid, i
 		read_lock(&tasklist_lock);
 		acquired_tasklist_lock = 1;
 	}
-	p = find_task_by_pid(pid);
-	if (p && (tgid <= 0 || p->tgid == tgid)) {
+	p = find_task_by_pid_ve(pid);
+	if (p && (tgid <= 0 || virt_tgid(p) == tgid)) {
 		error = check_kill_permission(sig, &info, p);
 		/*
 		 * The null signal is a permissions and process existence
@@ -2095,8 +2186,10 @@ static int do_tkill(int tgid, int pid, i
 		 * signal is private anyway.
 		 */
 		if (!error && sig && lock_task_sighand(p, &flags)) {
-			handle_stop_signal(sig, p);
-			error = specific_send_sig_info(sig, &info, p);
+			if (!sig_ve_ignored(sig, &info, p)) {
+				handle_stop_signal(sig, p);
+				error = specific_send_sig_info(sig, &info, p);
+			}
 			unlock_task_sighand(p, &flags);
 		}
 	}
@@ -2467,5 +2560,5 @@ void __init signals_init(void)
 		kmem_cache_create("sigqueue",
 				  sizeof(struct sigqueue),
 				  __alignof__(struct sigqueue),
-				  SLAB_PANIC, NULL, NULL);
+				  SLAB_PANIC|SLAB_UBC, NULL, NULL);
 }
diff -upr kernel-2.6.18-417.el5.orig/kernel/softirq.c kernel-2.6.18-417.el5-028stab121/kernel/softirq.c
--- kernel-2.6.18-417.el5.orig/kernel/softirq.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/softirq.c	2017-01-13 08:40:19.000000000 -0500
@@ -13,12 +13,15 @@
 #include <linux/mm.h>
 #include <linux/notifier.h>
 #include <linux/percpu.h>
+#include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/kthread.h>
 #include <linux/rcupdate.h>
 #include <linux/smp.h>
 #include <trace/irq.h>
 
+#include <ub/beancounter.h>
+
 #include <asm/irq.h>
 /*
    - No shared variables, all the data are CPU local.
@@ -46,6 +49,13 @@ EXPORT_SYMBOL(irq_stat);
 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
 
 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+static DEFINE_PER_CPU(struct task_struct *, ksoftirqd_wakeup);
+static int ksoftirqd_stat[NR_CPUS];
+
+char *softirq_to_name[NR_SOFTIRQS] = {
+	"HI_SOFTIRQ", "TIMER_SOFTIRQ", "NET_TX_SOFTIRQ", "NET_RX_SOFTIRQ",
+	"BLOCK_SOFTIRQ", "TASKLET_SOFTIRQ"
+};
 
 /*
  * we cannot loop indefinitely here to avoid userspace starvation,
@@ -56,7 +66,7 @@ static DEFINE_PER_CPU(struct task_struct
 static inline void wakeup_softirqd(void)
 {
 	/* Interrupts are disabled: no need to stop preemption */
-	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
+	struct task_struct *tsk = __get_cpu_var(ksoftirqd_wakeup);
 
 	if (tsk && tsk->state != TASK_RUNNING)
 		wake_up_process(tsk);
@@ -206,10 +216,14 @@ EXPORT_SYMBOL(local_bh_enable_ip);
 
 asmlinkage void __do_softirq(void)
 {
+	struct user_beancounter *ub;
 	struct softirq_action *h;
 	__u32 pending;
 	int max_restart = MAX_SOFTIRQ_RESTART;
 	int cpu;
+	struct ve_struct *envid;
+
+	envid = set_exec_env(get_ve0());
 
 	pending = local_softirq_pending();
 	account_system_vtime(current);
@@ -226,8 +240,10 @@ restart:
 
 	h = softirq_vec;
 
+	ub = set_exec_ub(get_ub0());
 	do {
 		if (pending & 1) {
+			kstat_inc_softirqs(h - softirq_vec);
 			trace_irq_softirq_entry(h, softirq_vec);
 			h->action(h);
 			trace_irq_softirq_exit(h, softirq_vec);
@@ -236,6 +252,7 @@ restart:
 		h++;
 		pending >>= 1;
 	} while (pending);
+	(void)set_exec_ub(ub);
 
 	local_irq_disable();
 
@@ -249,6 +266,7 @@ restart:
 	trace_softirq_exit();
 
 	account_system_vtime(current);
+	(void)set_exec_env(envid);
 	_local_bh_enable();
 }
 
@@ -289,6 +307,7 @@ void irq_exit(void)
 {
 	account_system_vtime(current);
 	trace_hardirq_exit();
+	restore_context();
 	sub_preempt_count(IRQ_EXIT_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
 		invoke_softirq();
@@ -612,6 +631,52 @@ static int __cpuinit cpu_callback(struct
 	return NOTIFY_OK;
 }
 
+static int proc_ksoftirqd(ctl_table *ctl, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret, cpu;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	if (!write)
+		return ret;
+
+	for_each_online_cpu(cpu) {
+		per_cpu(ksoftirqd_wakeup, cpu) =
+			ksoftirqd_stat[cpu] ? per_cpu(ksoftirqd, cpu) : NULL;
+	}
+	return ret;
+}
+
+static int sysctl_ksoftirqd(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context)
+{
+	return -EINVAL;
+}
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 1246,
+		.procname	= "ksoftirqd",
+		.data		= ksoftirqd_stat,
+		.maxlen		= sizeof(ksoftirqd_stat),
+		.mode		= 0644,
+		.proc_handler	= &proc_ksoftirqd,
+		.strategy	= &sysctl_ksoftirqd
+	},
+	{0}
+};
+
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table
+	},
+	{0}
+};
+
 static struct notifier_block __cpuinitdata cpu_nfb = {
 	.notifier_call = cpu_callback
 };
@@ -622,6 +687,7 @@ __init int spawn_ksoftirqd(void)
 	cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 	register_cpu_notifier(&cpu_nfb);
+	register_sysctl_table(root_table, 0);
 	return 0;
 }
 
diff -upr kernel-2.6.18-417.el5.orig/kernel/stop_machine.c kernel-2.6.18-417.el5-028stab121/kernel/stop_machine.c
--- kernel-2.6.18-417.el5.orig/kernel/stop_machine.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/stop_machine.c	2017-01-13 08:40:28.000000000 -0500
@@ -89,12 +89,12 @@ static int stop_machine(void)
 	init_completion(&migration_done);
 
 	for_each_online_cpu(i) {
-		if (i == raw_smp_processor_id())
+		if (i == task_cpu(current))
 			continue;
 		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
 		if (ret < 0)
 			break;
-		set_cpus_allowed(find_task_by_pid(ret), cpumask_of_cpu(i));
+		set_cpus_allowed(find_task_by_pid_all(ret), cpumask_of_cpu(i));
 		stopmachine_num_threads++;
 	}
 
@@ -169,7 +169,7 @@ struct task_struct *__stop_machine_run(i
 
 	/* If they don't care which CPU fn runs on, bind to any online one. */
 	if (cpu == NR_CPUS)
-		cpu = raw_smp_processor_id();
+		cpu = task_cpu(current);
 
 	p = kthread_create(do_stop, &smdata, "kstopmachine");
 	if (!IS_ERR(p)) {
diff -upr kernel-2.6.18-417.el5.orig/kernel/sys.c kernel-2.6.18-417.el5-028stab121/kernel/sys.c
--- kernel-2.6.18-417.el5.orig/kernel/sys.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/sys.c	2017-01-13 08:40:41.000000000 -0500
@@ -10,6 +10,7 @@
 #include <linux/mman.h>
 #include <linux/smp_lock.h>
 #include <linux/notifier.h>
+#include <linux/virtinfo.h>
 #include <linux/reboot.h>
 #include <linux/prctl.h>
 #include <linux/highuid.h>
@@ -107,7 +108,7 @@ static BLOCKING_NOTIFIER_HEAD(reboot_not
  *	are layered on top of these, with appropriate locking added.
  */
 
-static int notifier_chain_register(struct notifier_block **nl,
+int notifier_chain_register(struct notifier_block **nl,
 		struct notifier_block *n)
 {
 	while ((*nl) != NULL) {
@@ -120,7 +121,7 @@ static int notifier_chain_register(struc
 	return 0;
 }
 
-static int notifier_chain_unregister(struct notifier_block **nl,
+int notifier_chain_unregister(struct notifier_block **nl,
 		struct notifier_block *n)
 {
 	while ((*nl) != NULL) {
@@ -435,6 +436,102 @@ int unregister_reboot_notifier(struct no
 
 EXPORT_SYMBOL(unregister_reboot_notifier);
 
+DECLARE_MUTEX(virtinfo_sem);
+EXPORT_SYMBOL(virtinfo_sem);
+static struct vnotifier_block *virtinfo_chain[VIRT_TYPES];
+
+void __virtinfo_notifier_register(int type, struct vnotifier_block *nb)
+{
+	struct vnotifier_block **p;
+
+	for (p = &virtinfo_chain[type];
+	     *p != NULL && nb->priority < (*p)->priority;
+	     p = &(*p)->next);
+	nb->next = *p;
+	smp_wmb();
+	*p = nb;
+}
+
+EXPORT_SYMBOL(__virtinfo_notifier_register);
+
+void virtinfo_notifier_register(int type, struct vnotifier_block *nb)
+{
+	down(&virtinfo_sem);
+	__virtinfo_notifier_register(type, nb);
+	up(&virtinfo_sem);
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_register);
+
+struct virtinfo_cnt_struct {
+	volatile unsigned long exit[NR_CPUS];
+	volatile unsigned long entry;
+};
+static DEFINE_PER_CPU(struct virtinfo_cnt_struct, virtcnt);
+
+void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb)
+{
+	struct vnotifier_block **p;
+	int entry_cpu, exit_cpu;
+	unsigned long cnt, ent;
+
+	down(&virtinfo_sem);
+	for (p = &virtinfo_chain[type]; *p != nb; p = &(*p)->next);
+	*p = nb->next;
+	smp_mb();
+
+	for_each_cpu_mask(entry_cpu, cpu_possible_map) {
+		while (1) {
+			cnt = 0;
+			for_each_cpu_mask(exit_cpu, cpu_possible_map)
+				cnt +=
+				    per_cpu(virtcnt, entry_cpu).exit[exit_cpu];
+			smp_rmb();
+			ent = per_cpu(virtcnt, entry_cpu).entry;
+			if (cnt == ent)
+				break;
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout(HZ / 100);
+		}
+	}
+	up(&virtinfo_sem);
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_unregister);
+
+int virtinfo_notifier_call(int type, unsigned long n, void *data)
+{
+	int ret;
+	int entry_cpu, exit_cpu;
+	struct vnotifier_block *nb;
+
+	entry_cpu = get_cpu();
+	per_cpu(virtcnt, entry_cpu).entry++;
+	smp_wmb();
+	put_cpu();
+
+	nb = virtinfo_chain[type];
+	ret = NOTIFY_DONE;
+	while (nb)
+	{
+		ret = nb->notifier_call(nb, n, data, ret);
+		if(ret & NOTIFY_STOP_MASK) {
+			ret &= ~NOTIFY_STOP_MASK;
+			break;
+		}
+		nb = nb->next;
+	}
+
+	exit_cpu = get_cpu();
+	smp_wmb();
+	per_cpu(virtcnt, entry_cpu).exit[exit_cpu]++;
+	put_cpu();
+
+	return ret;
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_call);
+
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
 	int no_nice;
@@ -480,17 +577,19 @@ asmlinkage long sys_setpriority(int whic
 	switch (which) {
 		case PRIO_PROCESS:
 			if (!who)
-				who = current->pid;
-			p = find_task_by_pid(who);
+				who = virt_pid(current);
+			p = find_task_by_pid_ve(who);
 			if (p)
 				error = set_one_prio(p, niceval, error);
 			break;
 		case PRIO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			else
+				who = vpid_to_pid(who);
+			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
 				error = set_one_prio(p, niceval, error);
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			user = current->user;
@@ -500,10 +599,10 @@ asmlinkage long sys_setpriority(int whic
 				if ((who != current->uid) && !(user = find_user(who)))
 					goto out_unlock;	/* No processes for this user */
 
-			do_each_thread(g, p)
+			do_each_thread_ve(g, p)
 				if (p->uid == who)
 					error = set_one_prio(p, niceval, error);
-			while_each_thread(g, p);
+			while_each_thread_ve(g, p);
 			if (who != current->uid)
 				free_uid(user);		/* For find_user() */
 			break;
@@ -533,8 +632,11 @@ asmlinkage long sys_getpriority(int whic
 	switch (which) {
 		case PRIO_PROCESS:
 			if (!who)
-				who = current->pid;
-			p = find_task_by_pid(who);
+				who = virt_pid(current);
+			if (!ve_is_super(get_exec_env()) &&
+			    !is_virtual_pid(who))
+				break;
+			p = find_task_by_pid_ve(who);
 			if (p) {
 				niceval = 20 - task_nice(p);
 				if (niceval > retval)
@@ -544,11 +646,13 @@ asmlinkage long sys_getpriority(int whic
 		case PRIO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			else
+				who = vpid_to_pid(who);
+			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
 				niceval = 20 - task_nice(p);
 				if (niceval > retval)
 					retval = niceval;
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			user = current->user;
@@ -558,13 +662,13 @@ asmlinkage long sys_getpriority(int whic
 				if ((who != current->uid) && !(user = find_user(who)))
 					goto out_unlock;	/* No processes for this user */
 
-			do_each_thread(g, p)
+			do_each_thread_ve(g, p)
 				if (p->uid == who) {
 					niceval = 20 - task_nice(p);
 					if (niceval > retval)
 						retval = niceval;
 				}
-			while_each_thread(g, p);
+			while_each_thread_ve(g, p);
 			if (who != current->uid)
 				free_uid(user);		/* for find_user() */
 			break;
@@ -695,6 +799,26 @@ asmlinkage long sys_reboot(int magic1, i
 	                magic2 != LINUX_REBOOT_MAGIC2C))
 		return -EINVAL;
 
+#ifdef CONFIG_VE
+	if (!ve_is_super(get_exec_env()))
+		switch (cmd) {
+		case LINUX_REBOOT_CMD_RESTART:
+		case LINUX_REBOOT_CMD_RESTART2:
+			set_bit(VE_REBOOT, &get_exec_env()->flags);
+
+		case LINUX_REBOOT_CMD_HALT:
+		case LINUX_REBOOT_CMD_POWER_OFF:
+			force_sig(SIGKILL, get_exec_env()->init_entry);
+
+		case LINUX_REBOOT_CMD_CAD_ON:
+		case LINUX_REBOOT_CMD_CAD_OFF:
+			return 0;
+
+		default:
+			return -EINVAL;
+		}
+#endif
+
 	/* Instead of trying to make the power_off code look like
 	 * halt when pm_power_off is not set do it the easy way.
 	 */
@@ -884,7 +1008,7 @@ asmlinkage long sys_setgid(gid_t gid)
 	return 0;
 }
   
-static int set_user(uid_t new_ruid, int dumpclear)
+int set_user(uid_t new_ruid, int dumpclear)
 {
 	struct user_struct *new_user;
 
@@ -894,7 +1018,7 @@ static int set_user(uid_t new_ruid, int 
 
 	if (atomic_read(&new_user->processes) >=
 				current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
-			new_user != &root_user) {
+			new_ruid != 0) {
 		free_uid(new_user);
 		return -EAGAIN;
 	}
@@ -909,6 +1033,7 @@ static int set_user(uid_t new_ruid, int 
 	current->uid = new_ruid;
 	return 0;
 }
+EXPORT_SYMBOL(set_user);
 
 /*
  * Unprivileged users may change the real uid to the effective uid
@@ -1197,8 +1322,27 @@ asmlinkage long sys_setfsgid(gid_t gid)
 	return old_fsgid;
 }
 
+#ifdef CONFIG_VE
+unsigned long long ve_relative_clock(struct timespec * ts)
+{
+	unsigned long long offset = 0;
+
+	if (ts->tv_sec > get_exec_env()->start_timespec.tv_sec ||
+	    (ts->tv_sec == get_exec_env()->start_timespec.tv_sec &&
+	     ts->tv_nsec >= get_exec_env()->start_timespec.tv_nsec))
+		offset = (unsigned long long)(ts->tv_sec -
+			get_exec_env()->start_timespec.tv_sec) * NSEC_PER_SEC
+			+ ts->tv_nsec -	get_exec_env()->start_timespec.tv_nsec;
+	return nsec_to_clock_t(offset);
+}
+#endif
+
 asmlinkage long sys_times(struct tms __user * tbuf)
 {
+#ifdef CONFIG_VE
+	struct timespec now;
+#endif
+
 	/*
 	 *	In the SMP world we might just be unlucky and have one of
 	 *	the times increment as we use it. Since the value is an
@@ -1232,7 +1376,13 @@ asmlinkage long sys_times(struct tms __u
 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 			return -EFAULT;
 	}
+#ifndef CONFIG_VE
 	return (long) jiffies_64_to_clock_t(get_jiffies_64());
+#else
+	/* Compare to calculation in fs/proc/array.c */
+	do_posix_clock_monotonic_gettime(&now);
+	return ve_relative_clock(&now);
+#endif
 }
 
 /*
@@ -1253,21 +1403,24 @@ asmlinkage long sys_setpgid(pid_t pid, p
 	struct task_struct *p;
 	struct task_struct *group_leader = current->group_leader;
 	int err = -EINVAL;
+	int _pgid;
 
 	if (!pid)
-		pid = group_leader->pid;
+		pid = virt_pid(group_leader);
 	if (!pgid)
 		pgid = pid;
 	if (pgid < 0)
 		return -EINVAL;
 
+	_pgid = vpid_to_pid(pgid);
+
 	/* From this point forward we keep holding onto the tasklist lock
 	 * so that our parent does not change from under us. -DaveM
 	 */
 	write_lock_irq(&tasklist_lock);
 
 	err = -ESRCH;
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p)
 		goto out;
 
@@ -1292,25 +1445,29 @@ asmlinkage long sys_setpgid(pid_t pid, p
 	if (p->signal->leader)
 		goto out;
 
-	if (pgid != pid) {
+	pgid = virt_pid(p);
+	if (_pgid != p->pid) {
 		struct task_struct *p;
 
-		do_each_task_pid(pgid, PIDTYPE_PGID, p) {
-			if (p->signal->session == group_leader->signal->session)
+		do_each_task_pid_ve(_pgid, PIDTYPE_PGID, p) {
+			if (p->signal->session == group_leader->signal->session) {
+				pgid = virt_pgid(p);
 				goto ok_pgid;
-		} while_each_task_pid(pgid, PIDTYPE_PGID, p);
+			}
+		} while_each_task_pid_ve(_pgid, PIDTYPE_PGID, p);
 		goto out;
 	}
 
 ok_pgid:
-	err = security_task_setpgid(p, pgid);
+	err = security_task_setpgid(p, _pgid);
 	if (err)
 		goto out;
 
-	if (process_group(p) != pgid) {
+	if (process_group(p) != _pgid) {
 		detach_pid(p, PIDTYPE_PGID);
-		p->signal->pgrp = pgid;
-		attach_pid(p, PIDTYPE_PGID, pgid);
+		p->signal->pgrp = _pgid;
+		attach_pid(p, PIDTYPE_PGID, _pgid);
+		set_virt_pgid(p, pgid);
 	}
 
 	err = 0;
@@ -1323,19 +1480,19 @@ out:
 asmlinkage long sys_getpgid(pid_t pid)
 {
 	if (!pid) {
-		return process_group(current);
+		return virt_pgid(current);
 	} else {
 		int retval;
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		retval = -ESRCH;
 		if (p) {
 			retval = security_task_getpgid(p);
 			if (!retval)
-				retval = process_group(p);
+				retval = get_task_pgid(p);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1347,7 +1504,7 @@ asmlinkage long sys_getpgid(pid_t pid)
 asmlinkage long sys_getpgrp(void)
 {
 	/* SMP - assuming writes are word atomic this is fine */
-	return process_group(current);
+	return virt_pgid(current);
 }
 
 #endif
@@ -1355,19 +1512,19 @@ asmlinkage long sys_getpgrp(void)
 asmlinkage long sys_getsid(pid_t pid)
 {
 	if (!pid) {
-		return current->signal->session;
+		return virt_sid(current);
 	} else {
 		int retval;
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		retval = -ESRCH;
 		if(p) {
 			retval = security_task_getsid(p);
 			if (!retval)
-				retval = p->signal->session;
+				retval = get_task_sid(p);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1395,14 +1552,17 @@ asmlinkage long sys_setsid(void)
 	 * session id and so the check will always fail and make it so
 	 * init cannot successfully call setsid.
 	 */
-	if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session))
+	if (session > 1 && find_task_by_pid_type_ve(PIDTYPE_PGID, session))
 		goto out;
 
 	group_leader->signal->leader = 1;
 	__set_special_pids(session, session);
+	set_virt_pgid(group_leader, virt_pid(group_leader));
+	set_virt_sid(group_leader, virt_pid(group_leader));
 	group_leader->signal->tty = NULL;
 	group_leader->signal->tty_old_pgrp = 0;
-	err = process_group(group_leader);
+
+	err = virt_pgid(group_leader);
 out:
 	write_unlock_irq(&tasklist_lock);
 	mutex_unlock(&tty_mutex);
@@ -1675,7 +1835,7 @@ asmlinkage long sys_newuname(struct new_
 	int errno = 0;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name,&system_utsname,sizeof *name))
+	if (copy_to_user(name, utsname(), sizeof *name))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
@@ -1686,15 +1846,15 @@ asmlinkage long sys_sethostname(char __u
 	int errno;
 	char tmp[__NEW_UTS_LEN];
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	if (len < 0 || len > __NEW_UTS_LEN)
 		return -EINVAL;
 	down_write(&uts_sem);
 	errno = -EFAULT;
 	if (!copy_from_user(tmp, name, len)) {
-		memcpy(system_utsname.nodename, tmp, len);
-		system_utsname.nodename[len] = 0;
+		memcpy(utsname()->nodename, tmp, len);
+		utsname()->nodename[len] = 0;
 		errno = 0;
 	}
 	up_write(&uts_sem);
@@ -1710,11 +1870,11 @@ asmlinkage long sys_gethostname(char __u
 	if (len < 0)
 		return -EINVAL;
 	down_read(&uts_sem);
-	i = 1 + strlen(system_utsname.nodename);
+	i = 1 + strlen(utsname()->nodename);
 	if (i > len)
 		i = len;
 	errno = 0;
-	if (copy_to_user(name, system_utsname.nodename, i))
+	if (copy_to_user(name, utsname()->nodename, i))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
@@ -1731,7 +1891,7 @@ asmlinkage long sys_setdomainname(char _
 	int errno;
 	char tmp[__NEW_UTS_LEN];
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	if (len < 0 || len > __NEW_UTS_LEN)
 		return -EINVAL;
@@ -1739,8 +1899,8 @@ asmlinkage long sys_setdomainname(char _
 	down_write(&uts_sem);
 	errno = -EFAULT;
 	if (!copy_from_user(tmp, name, len)) {
-		memcpy(system_utsname.domainname, tmp, len);
-		system_utsname.domainname[len] = 0;
+		memcpy(utsname()->domainname, tmp, len);
+		utsname()->domainname[len] = 0;
 		errno = 0;
 	}
 	up_write(&uts_sem);
@@ -2206,7 +2366,11 @@ asmlinkage long sys_getcpu(unsigned __us
 	   		   struct getcpu_cache __user *cache)
 {
 	int err = 0;
+#ifdef CONFIG_SCHED_VCPU
+	int cpu = task_cpu(current);
+#else
 	int cpu = raw_smp_processor_id();
+#endif
 	if (cpup)
 		err |= put_user(cpu, cpup);
 	if (nodep)
diff -upr kernel-2.6.18-417.el5.orig/kernel/sysctl.c kernel-2.6.18-417.el5-028stab121/kernel/sysctl.c
--- kernel-2.6.18-417.el5.orig/kernel/sysctl.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/sysctl.c	2017-01-13 08:40:41.000000000 -0500
@@ -24,6 +24,8 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
+#include <linux/ve.h>
+#include <linux/ve_task.h>
 #include <linux/capability.h>
 #include <linux/ctype.h>
 #include <linux/utsname.h>
@@ -45,6 +47,7 @@
 #include <linux/syscalls.h>
 #include <linux/nfs_fs.h>
 #include <linux/acpi.h>
+#include <linux/pid.h>
 #ifndef __GENKSYMS__
 #include <linux/key.h>
 #endif
@@ -60,6 +63,8 @@ extern int proc_nr_files(ctl_table *tabl
 #endif
 
 #if defined(CONFIG_SYSCTL)
+extern int gr_handle_sysctl_mod(const char *dirname, const char *name,
+				const int op);
 
 /* External variables not in a header file. */
 extern int C_A_D;
@@ -70,6 +75,7 @@ extern int max_threads;
 extern int sysrq_enabled;
 extern int core_uses_pid;
 extern int suid_dumpable;
+extern int sysctl_at_vsyscall;
 extern char core_pattern[];
 extern int cad_pid;
 extern int pid_max;
@@ -86,6 +92,9 @@ extern int blk_iopoll_enabled;
 extern int vm_devzero_optimized;
 extern int vm_dirty_bytes;
 extern int dirty_background_bytes;
+extern unsigned relatime_interval; /* fs/inode.c */
+extern int ve_area_access_check; /* fs/namei.c */
+extern unsigned int sysctl_ve_mount_nr;
 
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
@@ -100,7 +109,8 @@ int proc_dointvec_minmax_softlockup(
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
-extern unsigned int vdso_enabled, vdso_populate;
+extern int vdso_enabled;
+extern unsigned int vdso_populate;
 
 int exec_shield = (1<<0);
 /* exec_shield is a bitmask:
@@ -127,6 +137,10 @@ __setup("exec-shield=", setup_exec_shiel
 static int sixty = 60;
 static int threehundred = 300;
 #endif
+#ifdef CONFIG_VE
+int glob_ve_meminfo = 0;
+EXPORT_SYMBOL(glob_ve_meminfo);
+#endif
 
 static int zero;
 static int one = 1;
@@ -139,6 +153,14 @@ static int min_percpu_pagelist_fract = 8
 
 static int ngroups_max = NGROUPS_MAX;
 
+int ve_allow_kthreads = 1;
+int snapapi_enable = 1;
+EXPORT_SYMBOL(snapapi_enable);
+EXPORT_SYMBOL(ve_allow_kthreads);
+#ifdef CONFIG_MAGIC_SYSRQ
+extern int sysrq_key_scancode;
+#endif
+
 #ifdef CONFIG_KMOD
 extern char modprobe_path[];
 extern int modules_disabled;
@@ -147,13 +169,8 @@ extern int modules_disabled;
 extern int sg_big_buff;
 #endif
 #ifdef CONFIG_SYSVIPC
-extern size_t shm_ctlmax;
-extern size_t shm_ctlall;
-extern int shm_ctlmni;
-extern int msg_ctlmax;
-extern int msg_ctlmnb;
-extern int msg_ctlmni;
-extern int sem_ctls[];
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
 #ifdef __sparc__
@@ -161,6 +178,13 @@ extern char reboot_command [];
 extern int stop_a_enabled;
 extern int scons_pwroff;
 #endif
+#ifdef CONFIG_SCHED_VCPU
+extern u32 vcpu_sched_timeslice;
+extern int vcpu_timeslice;
+extern u32 vcpu_hot_timeslice;
+#endif
+
+extern int alloc_fail_warn;
 
 #ifdef __hppa__
 extern int pwrsw_enabled;
@@ -176,6 +200,7 @@ extern int spin_retry;
 #endif
 
 extern int sysctl_hz_timer;
+int decode_call_traces = 1;
 
 #ifdef CONFIG_BSD_PROCESS_ACCT
 extern int acct_parm[];
@@ -192,10 +217,32 @@ extern int max_lock_depth;
 static int proc_dmesg_restrict(ctl_table *table, int write, struct file *filp,
 				void __user *buffer, size_t *lenp, loff_t *ppos);
 
+#ifdef CONFIG_FAIRSCHED
+extern int fairsched_max_latency;
+extern int scale_vcpu_frequency;
+int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
+		        void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
 		       ctl_table *, void **);
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
+static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context);
+
+static int proc_dointvec_ve(ctl_table *ctl, int write, struct file * filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos);
+static int sysctl_strategy_ve(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context);
+
+static int proc_pid_max(ctl_table *table, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos);
+static int sysctl_pid_max(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context);
 
 static ctl_table root_table[];
 static struct ctl_table_header root_table_header =
@@ -241,6 +288,8 @@ static void register_proc_table(ctl_tabl
 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
 #endif
 
+extern struct new_utsname virt_utsname;
+
 /* Something that isn't CTL_ANY, CTL_NONE or a value that may clash. */
 #define CTL_UNNUMBERED          -2
 
@@ -300,49 +349,109 @@ static ctl_table kern_table[] = {
 		.extra1		= &sched_interactive_min,
 		.extra2		= &sched_interactive_max,
 	},
+#ifndef CONFIG_UTS_NS
 	{
 		.ctl_name	= KERN_OSTYPE,
 		.procname	= "ostype",
-		.data		= system_utsname.sysname,
-		.maxlen		= sizeof(system_utsname.sysname),
+		.data		= init_uts_ns.name.sysname,
+		.maxlen		= sizeof(init_uts_ns.name.sysname),
 		.mode		= 0444,
-		.proc_handler	= &proc_doutsstring,
-		.strategy	= &sysctl_string,
+		.proc_handler	= &proc_do_uts_string,
+		.strategy	= &sysctl_uts_string,
 	},
 	{
 		.ctl_name	= KERN_OSRELEASE,
 		.procname	= "osrelease",
-		.data		= system_utsname.release,
-		.maxlen		= sizeof(system_utsname.release),
+		.data		= init_uts_ns.name.release,
+		.maxlen		= sizeof(init_uts_ns.name.release),
 		.mode		= 0444,
-		.proc_handler	= &proc_doutsstring,
-		.strategy	= &sysctl_string,
+		.proc_handler	= &proc_do_uts_string,
+		.strategy	= &sysctl_uts_string,
 	},
 	{
 		.ctl_name	= KERN_VERSION,
 		.procname	= "version",
-		.data		= system_utsname.version,
-		.maxlen		= sizeof(system_utsname.version),
+		.data		= init_uts_ns.name.version,
+		.maxlen		= sizeof(init_uts_ns.name.version),
 		.mode		= 0444,
-		.proc_handler	= &proc_doutsstring,
-		.strategy	= &sysctl_string,
+		.proc_handler	= &proc_do_uts_string,
+		.strategy	= &sysctl_uts_string,
 	},
 	{
 		.ctl_name	= KERN_NODENAME,
 		.procname	= "hostname",
-		.data		= system_utsname.nodename,
-		.maxlen		= sizeof(system_utsname.nodename),
+		.data		= init_uts_ns.name.nodename,
+		.maxlen		= sizeof(init_uts_ns.name.nodename),
 		.mode		= 0644,
-		.proc_handler	= &proc_doutsstring,
-		.strategy	= &sysctl_string,
+		.proc_handler	= &proc_do_uts_string,
+		.strategy	= &sysctl_uts_string,
+		.virt_handler	= 1,
 	},
 	{
 		.ctl_name	= KERN_DOMAINNAME,
 		.procname	= "domainname",
-		.data		= system_utsname.domainname,
-		.maxlen		= sizeof(system_utsname.domainname),
+		.data		= init_uts_ns.name.domainname,
+		.maxlen		= sizeof(init_uts_ns.name.domainname),
 		.mode		= 0644,
-		.proc_handler	= &proc_doutsstring,
+		.proc_handler	= &proc_do_uts_string,
+		.strategy	= &sysctl_uts_string,
+	},
+#else  /* !CONFIG_UTS_NS */
+	{
+		.ctl_name	= KERN_OSTYPE,
+		.procname	= "ostype",
+		.data		= NULL,
+		/* could maybe use __NEW_UTS_LEN here? */
+		.maxlen		= FIELD_SIZEOF(struct new_utsname, sysname),
+		.mode		= 0444,
+		.proc_handler	= &proc_do_uts_string,
+		.strategy	= &sysctl_uts_string,
+	},
+	{
+		.ctl_name	= KERN_OSRELEASE,
+		.procname	= "osrelease",
+		.data		= NULL,
+		.maxlen		= FIELD_SIZEOF(struct new_utsname, release),
+		.mode		= 0444,
+		.proc_handler	= &proc_do_uts_string,
+		.strategy	= &sysctl_uts_string,
+	},
+	{
+		.ctl_name	= KERN_VERSION,
+		.procname	= "version",
+		.data		= NULL,
+		.maxlen		= FIELD_SIZEOF(struct new_utsname, version),
+		.mode		= 0444,
+		.proc_handler	= &proc_do_uts_string,
+		.strategy	= &sysctl_uts_string,
+	},
+	{
+		.ctl_name	= KERN_NODENAME,
+		.procname	= "hostname",
+		.data		= NULL,
+		.maxlen		= FIELD_SIZEOF(struct new_utsname, nodename),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_uts_string,
+		.strategy	= &sysctl_uts_string,
+	},
+	{
+		.ctl_name	= KERN_DOMAINNAME,
+		.procname	= "domainname",
+		.data		= NULL,
+		.maxlen		= FIELD_SIZEOF(struct new_utsname, domainname),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_uts_string,
+		.strategy	= &sysctl_uts_string,
+		.virt_handler	= 1,
+	},
+#endif /* !CONFIG_UTS_NS */
+	{
+		.ctl_name	= KERN_VIRT_OSRELEASE,
+		.procname	= "virt_osrelease",
+		.data		= virt_utsname.release,
+		.maxlen		= sizeof(virt_utsname.release),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_uts_string,
 		.strategy	= &sysctl_string,
 	},
 	{
@@ -435,10 +544,11 @@ static ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_CAP_BSET,
 		.procname	= "cap-bound",
-		.data		= &cap_bset,
+		.data		= NULL,
 		.maxlen		= sizeof(kernel_cap_t),
 		.mode		= 0600,
 		.proc_handler	= &proc_dointvec_bset,
+		.strategy	= &sysctl_strategy_bset,
 	},
 #ifdef CONFIG_BLK_DEV_INITRD
 	{
@@ -477,6 +587,22 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+	{
+		.ctl_name	= KERN_SILENCE_LEVEL,
+		.procname	= "silence-level",
+		.data		= &console_silence_loglevel,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= KERN_ALLOC_FAIL_WARN,
+		.procname	= "alloc_fail_warn",
+		.data		= &alloc_fail_warn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 #ifdef __hppa__
 	{
 		.ctl_name	= KERN_HPPA_PWRSW,
@@ -509,7 +635,8 @@ static ctl_table kern_table[] = {
 		.data		= &console_loglevel,
 		.maxlen		= 4*sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_dointvec_ve_immutable,
+		.virt_handler	= 1,
 	},
 #ifdef CONFIG_KMOD
 	{
@@ -540,8 +667,10 @@ static ctl_table kern_table[] = {
 		.data		= &uevent_helper,
 		.maxlen		= UEVENT_HELPER_PATH_LEN,
 		.mode		= 0644,
-		.proc_handler	= &proc_dostring,
-		.strategy	= &sysctl_string,
+		.proc_handler	= &proc_dostring_ve_immutable,
+		.strategy	= &sysctl_string_ve_immutable,
+		.extra1		= "",
+		.virt_handler	= 1,
 	},
 #endif
 #ifdef CONFIG_CHR_DEV_SG
@@ -568,58 +697,65 @@ static ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_SHMMAX,
 		.procname	= "shmmax",
-		.data		= &shm_ctlmax,
+		.data		= NULL,
 		.maxlen		= sizeof (size_t),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_minmax,
+		.proc_handler	= &proc_do_ipc_string,
+		.virt_handler	= 1,
 	},
 	{
 		.ctl_name	= KERN_SHMALL,
 		.procname	= "shmall",
-		.data		= &shm_ctlall,
+		.data		= NULL,
 		.maxlen		= sizeof (size_t),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_minmax,
+		.proc_handler	= &proc_do_ipc_string,
+		.virt_handler	= 1,
 	},
 	{
 		.ctl_name	= KERN_SHMMNI,
 		.procname	= "shmmni",
-		.data		= &shm_ctlmni,
+		.data		= NULL,
 		.maxlen		= sizeof (int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_do_ipc_string,
+		.virt_handler	= 1,
 	},
 	{
 		.ctl_name	= KERN_MSGMAX,
 		.procname	= "msgmax",
-		.data		= &msg_ctlmax,
+		.data		= NULL,
 		.maxlen		= sizeof (int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_do_ipc_string,
+		.virt_handler	= 1,
 	},
 	{
 		.ctl_name	= KERN_MSGMNI,
 		.procname	= "msgmni",
-		.data		= &msg_ctlmni,
+		.data		= NULL,
 		.maxlen		= sizeof (int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_do_ipc_string,
+		.virt_handler	= 1,
 	},
 	{
 		.ctl_name	= KERN_MSGMNB,
 		.procname	=  "msgmnb",
-		.data		= &msg_ctlmnb,
+		.data		= NULL,
 		.maxlen		= sizeof (int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_do_ipc_string,
+		.virt_handler	= 1,
 	},
 	{
 		.ctl_name	= KERN_SEM,
 		.procname	= "sem",
-		.data		= &sem_ctls,
+		.data		= NULL,
 		.maxlen		= 4*sizeof (int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_do_ipc_string,
+		.virt_handler	= 1,
 	},
 #endif
 #ifdef CONFIG_MAGIC_SYSRQ
@@ -631,6 +767,22 @@ static ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= KERN_VCPU_HOT_TIMESLICE,
+		.procname	= "vcpu_hot_timeslice",
+		.data		= &vcpu_hot_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_SYSRQ_KEY_SCANCODE,
+		.procname	= "sysrq-key",
+		.data		= &sysrq_key_scancode,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 	{
 		.ctl_name	= KERN_CADPID,
@@ -714,17 +866,64 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_SCHED_VCPU
+	{
+		.ctl_name	= KERN_VCPU_SCHED_TIMESLICE,
+		.procname	= "vcpu_sched_timeslice",
+		.data		= &vcpu_sched_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_VCPU_TIMESLICE,
+		.procname	= "vcpu_timeslice",
+		.data		= &vcpu_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_FAIRSCHED
+	{
+		.ctl_name	= KERN_FAIRSCHED_MAX_LATENCY,
+		.procname	= "fairsched-max-latency",
+		.data		=  &fairsched_max_latency,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &fsch_sysctl_latency
+	},
+#endif
 	{
 		.ctl_name	= KERN_PIDMAX,
 		.procname	= "pid_max",
 		.data		= &pid_max,
 		.maxlen		= sizeof (int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= sysctl_intvec,
+		.proc_handler	= proc_pid_max,
+		.strategy	= sysctl_pid_max,
 		.extra1		= &pid_max_min,
 		.extra2		= &pid_max_max,
+		.virt_handler	= 1,
 	},
+#ifdef CONFIG_VE
+	{
+		.ctl_name	= KERN_VIRT_PIDS,
+		.procname	= "virt_pids",
+		.data		= &glob_virt_pids,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_VE_MEMINFO,
+		.procname	= "ve_meminfo",
+		.data		= &glob_ve_meminfo,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{
 		.ctl_name	= KERN_PANIC_ON_OOPS,
 		.procname	= "panic_on_oops",
@@ -816,10 +1015,14 @@ static ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_RANDOMIZE,
 		.procname	= "randomize_va_space",
-		.data		= &randomize_va_space,
+		.data		= &_randomize_va_space,
+		.extra1		= (void *)offsetof(struct ve_struct,
+							_randomize_va_space),
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_dointvec_ve,
+		.strategy	= &sysctl_strategy_ve,
+		.virt_handler	= 1,
 	},
 #endif
 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
@@ -950,6 +1153,14 @@ static ctl_table kern_table[] = {
 		.mode		= 0555,
 		.child		= key_sysctls,
 	},
+	{
+		.ctl_name	= KERN_SCALE_VCPU_FREQUENCY,
+		.procname	= "scale_vcpu_frequency",
+		.data		= &scale_vcpu_frequency,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 	{ .ctl_name = 0 }
 };
@@ -1016,6 +1227,7 @@ static ctl_table vm_table[] = {
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= &mmap_min_addr_handler,
+		.virt_handler	= 1,
 	},
 	{
 		.ctl_name	= VM_DIRTY_WB_CS,
@@ -1313,6 +1525,14 @@ static ctl_table vm_table[] = {
 
 static ctl_table fs_table[] = {
 	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "relatime_interval",
+		.data		= &relatime_interval,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
 		.ctl_name	= FS_NRINODE,
 		.procname	= "inode-nr",
 		.data		= &inodes_stat,
@@ -1435,6 +1655,30 @@ static ctl_table fs_table[] = {
 #endif	
 #endif
 	{
+		.ctl_name	= FS_ODIRECT,
+		.procname	= "odirect_enable",
+		.data		= &odirect_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= FS_SNAPAPI,
+		.procname	= "snapapi_enable",
+		.data		= &snapapi_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= FS_LSYSCALL,
+		.procname	= "lsyscall_enable",
+		.data		= &lsyscall_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
 		.ctl_name	= KERN_SETUID_DUMPABLE,
 		.procname	= "suid_dumpable",
 		.data		= &suid_dumpable,
@@ -1442,6 +1686,22 @@ static ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= FS_AT_VSYSCALL,
+		.procname	= "vsyscall",
+		.data		= &sysctl_at_vsyscall,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "ve-mount-nr",
+		.data		= &sysctl_ve_mount_nr,
+		.maxlen		= sizeof(sysctl_ve_mount_nr),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
@@ -1451,6 +1711,14 @@ extern int proc_kprobes_optimization_han
 					     void __user *buffer,
 					     size_t *length, loff_t *ppos);
 static ctl_table debug_table[] = {
+	{
+		.ctl_name	= DBG_DECODE_CALLTRACES,
+		.procname	= "decode_call_traces",
+		.data		= &decode_call_traces,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 #if defined(CONFIG_OPTPROBES)
 	{
 		.ctl_name	= DEBUG_KPROBES_OPTIMIZE,
@@ -1527,6 +1795,7 @@ int do_sysctl(int __user *name, int nlen
 {
 	struct list_head *tmp;
 	int error = -ENOTDIR;
+	struct ve_struct *ve;
 
 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
 		return -ENOTDIR;
@@ -1535,13 +1804,24 @@ int do_sysctl(int __user *name, int nlen
 		if (!oldlenp || get_user(old_len, oldlenp))
 			return -EFAULT;
 	}
+	ve = get_exec_env();
 	spin_lock(&sysctl_lock);
+#ifdef CONFIG_VE
+	tmp = ve->sysctl_lh.next;
+#else
 	tmp = &root_table_header.ctl_entry;
+#endif
 	do {
-		struct ctl_table_header *head =
-			list_entry(tmp, struct ctl_table_header, ctl_entry);
+		struct ctl_table_header *head;
 		void *context = NULL;
 
+#ifdef CONFIG_VE
+		if (tmp == &ve->sysctl_lh)
+			/* second pass over global variables */
+			tmp = &root_table_header.ctl_entry;
+#endif
+
+		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
 		if (!use_table(head))
 			continue;
 
@@ -1595,10 +1875,17 @@ static int test_perm(int mode, int op)
 static inline int ctl_perm(ctl_table *table, int op)
 {
 	int error;
+	int mode = table->mode;
+
+	if (table->de && gr_handle_sysctl_mod(table->de->parent->name, table->de->name, op))
+		return -EACCES;
 	error = security_sysctl(table, op);
 	if (error)
 		return error;
-	return test_perm(table->mode, op);
+	if (!ve_accessible(table->owner_env, get_exec_env()) &&
+			!table->virt_handler)
+		mode &= ~0222; /* disable write access */
+	return test_perm(mode, op);
 }
 
 static int parse_table(int __user *name, int nlen,
@@ -1640,6 +1927,36 @@ repeat:
 	return -ENOTDIR;
 }
 
+int __do_sysctl_strategy (void  *data, ctl_table *table,
+			int __user *name, int nlen,
+			void __user *oldval, size_t __user *oldlenp,
+			void __user *newval, size_t newlen, void **context) {
+	size_t len;
+
+	if (oldval && oldlenp) {
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+		if (len) {
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if (copy_to_user(oldval, data, len))
+				return -EFAULT;
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	if (newval && newlen) {
+		len = newlen;
+		if (len > table->maxlen)
+			len = table->maxlen;
+		if (copy_from_user(data, newval, len))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
 /* Perform the actual read/write of a sysctl table entry. */
 int do_sysctl_strategy (ctl_table *table, 
 			int __user *name, int nlen,
@@ -1647,7 +1964,6 @@ int do_sysctl_strategy (ctl_table *table
 			void __user *newval, size_t newlen, void **context)
 {
 	int op = 0, rc;
-	size_t len;
 
 	if (oldval)
 		op |= 004;
@@ -1667,27 +1983,10 @@ int do_sysctl_strategy (ctl_table *table
 
 	/* If there is no strategy routine, or if the strategy returns
 	 * zero, proceed with automatic r/w */
-	if (table->data && table->maxlen) {
-		if (oldval && oldlenp) {
-			if (get_user(len, oldlenp))
-				return -EFAULT;
-			if (len) {
-				if (len > table->maxlen)
-					len = table->maxlen;
-				if(copy_to_user(oldval, table->data, len))
-					return -EFAULT;
-				if(put_user(len, oldlenp))
-					return -EFAULT;
-			}
-		}
-		if (newval && newlen) {
-			len = newlen;
-			if (len > table->maxlen)
-				len = table->maxlen;
-			if(copy_from_user(table->data, newval, len))
-				return -EFAULT;
-		}
-	}
+	if (table->data && table->maxlen)
+		return __do_sysctl_strategy (table->data, table, name, nlen,
+				oldval, oldlenp, newval, newlen, context);
+
 	return 0;
 }
 
@@ -1764,6 +2063,8 @@ struct ctl_table_header *register_sysctl
 					       int insert_at_head)
 {
 	struct ctl_table_header *tmp;
+	struct list_head *lh;
+
 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
 	if (!tmp)
 		return NULL;
@@ -1772,17 +2073,73 @@ struct ctl_table_header *register_sysctl
 	tmp->used = 0;
 	tmp->unregistering = NULL;
 	spin_lock(&sysctl_lock);
+#ifdef CONFIG_VE
+	lh = &get_exec_env()->sysctl_lh;
+#else
+	lh = &root_table_header.ctl_entry;
+#endif
 	if (insert_at_head)
-		list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
+		list_add(&tmp->ctl_entry, lh);
 	else
-		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
+		list_add_tail(&tmp->ctl_entry, lh);
 	spin_unlock(&sysctl_lock);
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_VE
+	register_proc_table(table, get_exec_env()->proc_sys_root, tmp);
+#else
 	register_proc_table(table, proc_sys_root, tmp);
 #endif
+#endif
 	return tmp;
 }
 
+void free_sysctl_clone(ctl_table *clone)
+{
+	int i;
+
+	for (i = 0; clone[i].ctl_name != 0; i++)
+		if (clone[i].child != NULL)
+			free_sysctl_clone(clone[i].child);
+
+	kfree(clone);
+}
+
+ctl_table *clone_sysctl_template(ctl_table *tmpl)
+{
+	int i, nr;
+	ctl_table *clone;
+
+	nr = 0;
+	while (tmpl[nr].ctl_name != 0)
+		nr++;
+	nr++;
+
+	clone = kmalloc(nr * sizeof(ctl_table), GFP_KERNEL);
+	if (clone == NULL)
+		return NULL;
+
+	memcpy(clone, tmpl, nr * sizeof(ctl_table));
+	for (i = 0; i < nr; i++) {
+		clone[i].owner_env = get_exec_env();
+		clone[i].de = NULL;
+		if (tmpl[i].child == NULL)
+			continue;
+
+		clone[i].child = clone_sysctl_template(tmpl[i].child);
+		if (clone[i].child == NULL)
+			goto unroll;
+	}
+	return clone;
+
+unroll:
+	for (i--; i >= 0; i--)
+		if (clone[i].child != NULL)
+			free_sysctl_clone(clone[i].child);
+
+	kfree(clone);
+	return NULL;
+}
+
 /**
  * unregister_sysctl_table - unregister a sysctl table hierarchy
  * @header: the header returned from register_sysctl_table
@@ -1796,8 +2153,12 @@ void unregister_sysctl_table(struct ctl_
 	spin_lock(&sysctl_lock);
 	start_unregistering(header);
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_VE
+	unregister_proc_table(header->ctl_table, get_exec_env()->proc_sys_root);
+#else
 	unregister_proc_table(header->ctl_table, proc_sys_root);
 #endif
+#endif
 	spin_unlock(&sysctl_lock);
 	kfree(header);
 }
@@ -1841,6 +2202,9 @@ static void register_proc_table(ctl_tabl
 			/* If the subdir exists already, de is non-NULL */
 		}
 
+		if (table->virt_handler)
+			mode |= S_ISVTX;
+
 		if (!de) {
 			de = create_proc_entry(table->procname, mode, root);
 			if (!de)
@@ -1883,11 +2247,6 @@ static void unregister_proc_table(ctl_ta
 		 * its fields.  We are under sysctl_lock here.
 		 */
 		de->data = NULL;
-
-		/* Don't unregister proc entries that are still being used.. */
-		if (atomic_read(&de->count))
-			continue;
-
 		table->de = NULL;
 		remove_proc_entry(table->procname, root);
 	}
@@ -1975,14 +2334,14 @@ static ssize_t proc_writesys(struct file
  *
  * Returns 0 on success.
  */
-int proc_dostring(ctl_table *table, int write, struct file *filp,
+static int _proc_dostring(void *data, int maxlen, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	size_t len;
 	char __user *p;
 	char c;
 	
-	if (!table->data || !table->maxlen || !*lenp ||
+	if (!data || !maxlen || !*lenp ||
 	    (*ppos && !write)) {
 		*lenp = 0;
 		return 0;
@@ -1998,20 +2357,20 @@ int proc_dostring(ctl_table *table, int 
 				break;
 			len++;
 		}
-		if (len >= table->maxlen)
-			len = table->maxlen-1;
-		if(copy_from_user(table->data, buffer, len))
+		if (len >= maxlen)
+			len = maxlen-1;
+		if(copy_from_user(data, buffer, len))
 			return -EFAULT;
-		((char *) table->data)[len] = 0;
+		((char *) data)[len] = 0;
 		*ppos += *lenp;
 	} else {
-		len = strlen(table->data);
-		if (len > table->maxlen)
-			len = table->maxlen;
+		len = strlen(data);
+		if (len > maxlen)
+			len = maxlen;
 		if (len > *lenp)
 			len = *lenp;
 		if (len)
-			if(copy_to_user(buffer, table->data, len))
+			if(copy_to_user(buffer, data, len))
 				return -EFAULT;
 		if (len < *lenp) {
 			if(put_user('\n', ((char __user *) buffer) + len))
@@ -2024,12 +2383,20 @@ int proc_dostring(ctl_table *table, int 
 	return 0;
 }
 
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+		  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return _proc_dostring(table->data, table->maxlen, write,
+			filp, buffer, lenp, ppos);
+}
+
 /*
  *	Special case of dostring for the UTS structure. This has locks
  *	to observe. Should this be in kernel/sys.c ????
  */
  
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+#ifndef CONFIG_UTS_NS
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int r;
@@ -2046,6 +2413,100 @@ static int proc_doutsstring(ctl_table *t
 	return r;
 }
 
+static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context)
+{
+	int r;
+
+	if (newval && newlen) {
+		down_write(&uts_sem);
+		r = sysctl_string(table, name, nlen,
+				oldval, oldlenp, newval, newlen, context);
+		up_write(&uts_sem);
+	} else {
+		down_read(&uts_sem);
+		r = sysctl_string(table, name, nlen,
+				oldval, oldlenp, newval, newlen, context);
+		up_read(&uts_sem);
+	}
+	return rv;
+}
+#else /* !CONFIG_UTS_NS */
+static char *choose_uts_string(int ctl_name)
+{
+	struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
+
+	switch (ctl_name) {
+	case KERN_OSTYPE:
+		return uts_ns->name.sysname;
+	case KERN_NODENAME:
+		return uts_ns->name.nodename;
+	case KERN_VERSION:
+		return uts_ns->name.version;
+	case KERN_DOMAINNAME:
+		return uts_ns->name.domainname;
+	case KERN_VIRT_OSRELEASE:
+		if (!ve_is_super(get_exec_env()) || uts_ns == &init_uts_ns)
+			return virt_utsname.release;
+		/* else - tune the release */
+	case KERN_OSRELEASE:
+		return uts_ns->name.release;
+	default:
+		return NULL;
+	}
+}
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+		  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int r;
+	char* which;
+
+	which = choose_uts_string(table->ctl_name);
+	if (!which)
+		return -EINVAL;
+
+	if (!write) {
+		down_read(&uts_sem);
+		r=_proc_dostring(which,table->maxlen,0,filp,buffer,lenp, ppos);
+		up_read(&uts_sem);
+	} else {
+		down_write(&uts_sem);
+		r=_proc_dostring(which,table->maxlen,1,filp,buffer,lenp, ppos);
+		up_write(&uts_sem);
+	}
+	return r;
+}
+
+static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context)
+{
+	ctl_table tmp_table;
+	char *which;
+	int r;
+
+	which = choose_uts_string(table->ctl_name);
+	if (!which)
+		return -EINVAL;
+
+	tmp_table = *table;
+	tmp_table.data = which;
+	if (newval && newlen) {
+		down_write(&uts_sem);
+		r = sysctl_string(&tmp_table, name, nlen,
+				oldval, oldlenp, newval, newlen, context);
+		up_write(&uts_sem);
+	} else {
+		down_read(&uts_sem);
+		r = sysctl_string(&tmp_table, name, nlen,
+				oldval, oldlenp, newval, newlen, context);
+		up_read(&uts_sem);
+	}
+	return r;
+}
+#endif /* !CONFIG_UTS_NS */
+
 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
 				 int *valp,
 				 int write, void *data)
@@ -2065,8 +2526,9 @@ static int do_proc_dointvec_conv(int *ne
 	return 0;
 }
 
-static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
-		  void __user *buffer, size_t *lenp, loff_t *ppos,
+static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
+		  int write, struct file *filp, void __user *buffer,
+		  size_t *lenp, loff_t *ppos,
 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
 			      int write, void *data),
 		  void *data)
@@ -2079,13 +2541,13 @@ static int do_proc_dointvec(ctl_table *t
 	char buf[TMPBUFLEN], *p;
 	char __user *s = buffer;
 	
-	if (!table->data || !table->maxlen || !*lenp ||
+	if (!tbl_data || !table->maxlen || !*lenp ||
 	    (*ppos && !write)) {
 		*lenp = 0;
 		return 0;
 	}
 	
-	i = (int *) table->data;
+	i = (int *) tbl_data;
 	vleft = table->maxlen / sizeof(*i);
 	left = *lenp;
 
@@ -2174,6 +2636,16 @@ static int do_proc_dointvec(ctl_table *t
 #undef TMPBUFLEN
 }
 
+static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
+		  void __user *buffer, size_t *lenp, loff_t *ppos,
+		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
+			      int write, void *data),
+		  void *data)
+{
+	return __do_proc_dointvec(table->data, table, write, filp,
+			buffer, lenp, ppos, conv, data);
+}
+
 /**
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
@@ -2241,13 +2713,23 @@ int proc_dointvec_bset(ctl_table *table,
 {
 	int op;
 
-	if (write && !capable(CAP_SYS_MODULE)) {
+	if (write && !capable(CAP_SYS_MODULE))
 		return -EPERM;
-	}
 
 	op = (current->pid == 1) ? OP_SET : OP_AND;
-	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
-				do_proc_dointvec_bset_conv,&op);
+	return __do_proc_dointvec(&cap_bset, table, write, filp,
+			buffer, lenp, ppos, do_proc_dointvec_bset_conv, &op);
+}
+
+int sysctl_strategy_bset(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context) {
+
+	if (newval && !capable(CAP_SYS_MODULE))
+		return -EPERM;
+
+	return __do_sysctl_strategy (&cap_bset, table, name, nlen,
+			oldval, oldlenp, newval, newlen, context);
 }
 
 static int proc_dmesg_restrict(ctl_table *table, int write, struct file *filp,
@@ -2336,7 +2818,7 @@ int proc_dointvec_minmax_softlockup(
 }
 #endif
 
-static int do_proc_doulongvec_minmax(ctl_table *table, int write,
+static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
 				     struct file *filp,
 				     void __user *buffer,
 				     size_t *lenp, loff_t *ppos,
@@ -2350,13 +2832,13 @@ static int do_proc_doulongvec_minmax(ctl
 	char buf[TMPBUFLEN], *p;
 	char __user *s = buffer;
 	
-	if (!table->data || !table->maxlen || !*lenp ||
+	if (!data || !table->maxlen || !*lenp ||
 	    (*ppos && !write)) {
 		*lenp = 0;
 		return 0;
 	}
 	
-	i = (unsigned long *) table->data;
+	i = (unsigned long *) data;
 	min = (unsigned long *) table->extra1;
 	max = (unsigned long *) table->extra2;
 	vleft = table->maxlen / sizeof(unsigned long);
@@ -2441,6 +2923,17 @@ static int do_proc_doulongvec_minmax(ctl
 #undef TMPBUFLEN
 }
 
+static int do_proc_doulongvec_minmax(ctl_table *table, int write,
+				     struct file *filp,
+				     void __user *buffer,
+				     size_t *lenp, loff_t *ppos,
+				     unsigned long convmul,
+				     unsigned long convdiv)
+{
+	return __do_proc_doulongvec_minmax(table->data, table, write,
+			filp, buffer, lenp, ppos, convmul, convdiv);
+}
+
 /**
  * proc_doulongvec_minmax - read a vector of long integers with min/max values
  * @table: the sysctl table
@@ -2896,6 +3389,49 @@ int proc_do_large_bitmap(struct ctl_tabl
 	}
 }
 
+#ifdef CONFIG_SYSVIPC
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	void *data;
+	struct ipc_namespace *ns;
+
+	ns = current->nsproxy->ipc_ns;
+
+	switch (table->ctl_name) {
+	case KERN_SHMMAX:
+		data = &ns->shm_ctlmax;
+		goto proc_minmax;
+	case KERN_SHMALL:
+		data = &ns->shm_ctlall;
+		goto proc_minmax;
+	case KERN_SHMMNI:
+		data = &ns->shm_ctlmni;
+		break;
+	case KERN_MSGMAX:
+		data = &ns->msg_ctlmax;
+		break;
+	case KERN_MSGMNI:
+		data = &ns->msg_ctlmni;
+		break;
+	case KERN_MSGMNB:
+		data = &ns->msg_ctlmnb;
+		break;
+	case KERN_SEM:
+		data = &ns->sem_ctls;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return __do_proc_dointvec(data, table, write, filp, buffer,
+			lenp, ppos, NULL, NULL);
+proc_minmax:
+	return __do_proc_doulongvec_minmax(data, table, write, filp, buffer,
+			lenp, ppos, 1l, 1l);
+}
+#endif
+
 #else /* CONFIG_PROC_FS */
 
 int proc_dostring(ctl_table *table, int write, struct file *filp,
@@ -2904,12 +3440,27 @@ int proc_dostring(ctl_table *table, int 
 	return -ENOSYS;
 }
 
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
-			    void __user *buffer, size_t *lenp, loff_t *ppos)
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context)
 {
 	return -ENOSYS;
 }
 
+#ifdef CONFIG_SYSVIPC
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+#endif
+
 int proc_dointvec(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -3106,6 +3657,115 @@ int sysctl_ms_jiffies(ctl_table *table, 
 	return 1;
 }
 
+static int proc_pid_max(ctl_table *table, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ve_struct *ve = get_exec_env();
+	ctl_table tmp;
+
+	if (ve_is_super(ve))
+		return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+
+	tmp = *table;
+	tmp.data = &ve->vpid_max;
+
+	return proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+}
+
+static int sysctl_pid_max(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context)
+{
+	struct ve_struct *ve = get_exec_env();
+	ctl_table tmp;
+
+	if (ve_is_super(ve))
+		return sysctl_intvec(table, name, nlen,
+				oldval, oldlenp, newval, newlen, context);
+	tmp = *table;
+	tmp.data = &ve->vpid_max;
+
+	return sysctl_intvec(&tmp, name, nlen,
+			oldval, oldlenp, newval, newlen, context);
+}
+
+/*
+ * in VE0 work like normal proc_dostring and sysctl_string
+ * in VE ignore writes, reads take data from .extra1
+ */
+int proc_dostring_ve_immutable(ctl_table *table, int write, struct file *filp,
+		  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	if (ve_is_super(get_exec_env()))
+		return proc_dostring(table, write, filp, buffer, lenp, ppos);
+	if (write)
+		return 0;
+	return _proc_dostring(table->extra1, table->maxlen, 0,
+			filp, buffer, lenp, ppos);
+}
+
+int sysctl_string_ve_immutable(ctl_table *table, int __user *name, int nlen,
+		  void __user *oldval, size_t __user *oldlenp,
+		  void __user *newval, size_t newlen, void **context)
+{
+	ctl_table tmp;
+
+	if (ve_is_super(get_exec_env()))
+		return sysctl_string(table, name, nlen,
+				oldval, oldlenp, newval, newlen, context);
+	tmp = *table;
+	tmp.data = tmp.extra1;
+	return sysctl_string(&tmp, name, nlen,
+			oldval, oldlenp, NULL, 0, context);
+}
+
+int proc_dointvec_ve_immutable(ctl_table *ctl, int write, struct file * filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	if (!ve_is_super(get_exec_env()) && write)
+		return 0;
+
+	return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+}
+
+#ifdef CONFIG_VE
+static int proc_dointvec_ve(ctl_table *ctl, int write, struct file * filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table tmp;
+
+	tmp = *ctl;
+	tmp.data = (char *)get_exec_env() + (unsigned long)ctl->extra1;
+
+	return proc_dointvec(&tmp, write, filp, buffer, lenp, ppos);
+}
+
+static int sysctl_strategy_ve(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context)
+{
+	void *data;
+
+	data = (char *)get_exec_env() + (unsigned long)table->extra1;
+
+	return __do_sysctl_strategy(data, table, name, nlen,
+			oldval, oldlenp, newval, newlen, context) ?: 1;
+}
+#else
+static int proc_dointvec_ve(ctl_table *ctl, int write, struct file * filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+}
+
+static int sysctl_strategy_ve(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context)
+{
+	return __do_sysctl_strategy(table->data, table, name, nlen,
+			oldval, oldlenp, newval, newlen, context) ?: 1;
+}
+#endif /* CONFIG_VE */
 #else /* CONFIG_SYSCTL */
 
 
@@ -3208,6 +3868,14 @@ void unregister_sysctl_table(struct ctl_
 {
 }
 
+ctl_table * clone_sysctl_template(ctl_table *tmpl, int nr)
+{
+	return NULL;
+}
+
+void free_sysctl_clone(ctl_table *tmpl)
+{
+}
 #endif /* CONFIG_SYSCTL */
 
 /*
@@ -3228,3 +3896,5 @@ EXPORT_SYMBOL(sysctl_jiffies);
 EXPORT_SYMBOL(sysctl_ms_jiffies);
 EXPORT_SYMBOL(sysctl_string);
 EXPORT_SYMBOL(unregister_sysctl_table);
+EXPORT_SYMBOL(clone_sysctl_template);
+EXPORT_SYMBOL(free_sysctl_clone);
diff -upr kernel-2.6.18-417.el5.orig/kernel/sys_ni.c kernel-2.6.18-417.el5-028stab121/kernel/sys_ni.c
--- kernel-2.6.18-417.el5.orig/kernel/sys_ni.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/sys_ni.c	2017-01-13 08:40:40.000000000 -0500
@@ -26,6 +26,7 @@ cond_syscall(sys_socketpair);
 cond_syscall(sys_bind);
 cond_syscall(sys_listen);
 cond_syscall(sys_accept);
+cond_syscall(sys_accept4);
 cond_syscall(sys_connect);
 cond_syscall(sys_getsockname);
 cond_syscall(sys_getpeername);
@@ -50,6 +51,8 @@ cond_syscall(compat_sys_get_robust_list)
 cond_syscall(sys_epoll_create);
 cond_syscall(sys_epoll_ctl);
 cond_syscall(sys_epoll_wait);
+cond_syscall(sys_epoll_pwait);
+cond_syscall(compat_sys_epoll_pwait);
 cond_syscall(sys_semget);
 cond_syscall(sys_semop);
 cond_syscall(sys_semtimedop);
@@ -85,6 +88,7 @@ cond_syscall(sys_keyctl);
 cond_syscall(compat_sys_keyctl);
 cond_syscall(compat_sys_socketcall);
 cond_syscall(sys_inotify_init);
+cond_syscall(sys_inotify_init1);
 cond_syscall(sys_inotify_add_watch);
 cond_syscall(sys_inotify_rm_watch);
 cond_syscall(sys_migrate_pages);
@@ -143,3 +147,14 @@ cond_syscall(compat_sys_move_pages);
 
 /* New file descriptors */
 cond_syscall(sys_eventfd);
+
+cond_syscall(sys_getluid);
+cond_syscall(sys_setluid);
+cond_syscall(sys_setublimit);
+cond_syscall(compat_sys_setublimit);
+cond_syscall(sys_ubstat);
+
+cond_syscall(sys_signalfd);
+cond_syscall(sys_signalfd4);
+cond_syscall(compat_sys_signalfd);
+cond_syscall(compat_sys_signalfd4);
diff -upr kernel-2.6.18-417.el5.orig/kernel/taskstats.c kernel-2.6.18-417.el5-028stab121/kernel/taskstats.c
--- kernel-2.6.18-417.el5.orig/kernel/taskstats.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/taskstats.c	2017-01-13 08:40:22.000000000 -0500
@@ -182,7 +182,7 @@ static int fill_pid(pid_t pid, struct ta
 
 	if (!pidtsk) {
 		read_lock(&tasklist_lock);
-		tsk = find_task_by_pid(pid);
+		tsk = find_task_by_pid_ve(pid);
 		if (!tsk) {
 			read_unlock(&tasklist_lock);
 			return -ESRCH;
@@ -215,7 +215,7 @@ static int fill_pid_v4(pid_t pid, struct
 	struct taskstats stats;
 
 	read_lock(&tasklist_lock);
-	tsk = find_task_by_pid(pid);
+	tsk = find_task_by_pid_ve(pid);
 	if (!tsk) {
 		read_unlock(&tasklist_lock);
 		return -ESRCH;
@@ -268,7 +268,7 @@ static int fill_tgid(pid_t tgid, struct 
 	first = tgidtsk;
 	if (!first) {
 		read_lock(&tasklist_lock);
-		first = find_task_by_pid(tgid);
+		first = find_task_by_pid_ve(tgid);
 		if (!first) {
 			read_unlock(&tasklist_lock);
 			return -ESRCH;
@@ -300,7 +300,7 @@ static int fill_tgid(pid_t tgid, struct 
 		 */
 		delayacct_add_tsk(stats, tsk);
 
-	} while_each_thread(first, tsk);
+	} while_each_thread_all(first, tsk);
 	read_unlock(&tasklist_lock);
 	stats->version = TASKSTATS_VERSION;
 
@@ -413,7 +413,7 @@ static int taskstats_user_cmd(struct sk_
 {
 	int rc = 0;
 	struct sk_buff *rep_skb;
-	struct taskstats stats;
+	struct taskstats_v4 stats;
 	void *reply;
 	size_t size;
 	struct nlattr *na;
@@ -434,9 +434,9 @@ static int taskstats_user_cmd(struct sk_
 	/*
 	 * Size includes space for nested attributes
 	 */
-	size = nla_total_size(sizeof(struct taskstats));
-	if (info->attrs[TASKSTATS_CMD_ATTR_PID_V4]) {
-		size = nla_total_size(sizeof(struct taskstats_v4));
+	size = nla_total_size(sizeof(stats));
+	if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) {
+		size = nla_total_size(sizeof(struct taskstats));
 	} else
 		memset(&stats, 0, sizeof(stats));
 
@@ -448,28 +448,28 @@ static int taskstats_user_cmd(struct sk_
 
 	if (info->attrs[TASKSTATS_CMD_ATTR_PID]) {
 		u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
-		rc = fill_pid(pid, NULL, &stats);
+		rc = fill_pid_v4(pid, &stats);
 		if (rc < 0)
 			goto err;
 
 		na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
 		NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid);
-		NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
+		NLA_PUT_TYPE(rep_skb, struct taskstats_v4, TASKSTATS_TYPE_STATS,
 				stats);
 	} else if (info->attrs[TASKSTATS_CMD_ATTR_PID_V4]) {
-		struct taskstats_v4 stats_v4;
-
-		memset(&stats_v4, 0, sizeof(stats_v4));
 		u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID_V4]);
-		rc = fill_pid_v4(pid, &stats_v4);
+		rc = fill_pid_v4(pid, &stats);
 		if (rc < 0)
 			goto err;
 
 		na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
 		NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid);
 		NLA_PUT_TYPE(rep_skb, struct taskstats_v4, TASKSTATS_TYPE_STATS_V4,
-				stats_v4);
+				stats);
 	} else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) {
+		struct taskstats stats;
+
+		memset(&stats, 0, sizeof(stats));
 		u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
 		rc = fill_tgid(tgid, NULL, &stats);
 		if (rc < 0)
diff -upr kernel-2.6.18-417.el5.orig/kernel/time.c kernel-2.6.18-417.el5-028stab121/kernel/time.c
--- kernel-2.6.18-417.el5.orig/kernel/time.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/time.c	2017-01-13 08:40:41.000000000 -0500
@@ -610,6 +610,7 @@ void set_normalized_timespec(struct time
 	ts->tv_sec = sec;
 	ts->tv_nsec = nsec;
 }
+EXPORT_SYMBOL(set_normalized_timespec);
 
 /**
  * ns_to_timespec - Convert nanoseconds to timespec
diff -upr kernel-2.6.18-417.el5.orig/kernel/timer.c kernel-2.6.18-417.el5-028stab121/kernel/timer.c
--- kernel-2.6.18-417.el5.orig/kernel/timer.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/timer.c	2017-01-13 08:40:28.000000000 -0500
@@ -34,8 +34,11 @@
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
 #include <linux/delay.h>
+#include <linux/virtinfo.h>
+#include <linux/ve_proto.h>
 #include <linux/timex.h>
 #include <linux/clocksource.h>
+#include <linux/vsched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -464,6 +467,7 @@ void add_timer_on(struct timer_list *tim
 	spin_unlock_irqrestore(&base->lock, flags);
 }
 
+EXPORT_SYMBOL(add_timer_on);
 
 /***
  * mod_timer - modify a timer's timeout
@@ -655,9 +659,13 @@ static inline void __run_timers(tvec_bas
 			spin_unlock_irq(&base->lock);
 			{
 				int preempt_count = preempt_count();
+				struct ve_struct *ve;
+
+				ve = set_exec_env(get_ve0());
 				trace_timer_expire_entry(timer);
 				fn(data);
 				trace_timer_expire_exit(timer);
+				(void)set_exec_env(ve);
 				if (preempt_count != preempt_count()) {
 					printk(KERN_WARNING "huh, entered %p "
 					       "with preempt_count %08x, exited"
@@ -674,7 +682,7 @@ static inline void __run_timers(tvec_bas
 	spin_unlock_irq(&base->lock);
 }
 
-#ifdef CONFIG_NO_IDLE_HZ
+#if defined(CONFIG_NO_IDLE_HZ) && !defined(CONFIG_SCHED_VCPU)
 /*
  * Find out when the next timer event is due to happen. This
  * is used on S/390 to stop all activity when a cpus is idle.
@@ -777,6 +785,11 @@ found:
 
 	return expires;
 }
+#else
+unsigned long next_timer_interrupt(void)
+{
+	return jiffies;
+}
 #endif
 
 /******************************************************************/
@@ -1432,6 +1445,57 @@ EXPORT_SYMBOL(avenrun);
  * calc_load - given tick count, update the avenrun load estimates.
  * This is called while holding a write_lock on xtime_lock.
  */
+
+
+#ifdef CONFIG_VE
+static void calc_load_ve(void)
+{
+	unsigned long flags, nr_unint, nr_active;
+	struct ve_struct *ve;
+	struct vcpu_scheduler *vsched;
+	struct fairsched_node *node;
+
+	spin_lock_irqsave(&vsched_list_lock, flags);
+	list_for_each_entry (vsched, &vsched_list, list) {
+		/* fairsched_idle_node does not have owner_env set */
+		if (vsched == &idle_vsched)
+			continue;
+
+		/* If vsched_destroy() is in progress, fairsched node may be
+		 * unlinked at any time (cause fairsched_lock is not taken)
+		 * but if node != NULL at the moment, node won't be destroyed
+		 * until we release vsched_list_lock
+		 */
+		node = vsched->node;
+		if (node == NULL)
+			continue;
+
+		ve = node->owner_env;
+		if (ve_is_super(ve))
+			continue;
+
+		nr_active = nr_running_vsched(vsched) + nr_unint_vsched(vsched);
+		nr_active *= FIXED_1;
+
+		/* This works fine while we have exactly 1 vsched per CT */
+		CALC_LOAD(ve->avenrun[0], EXP_1, nr_active);
+		CALC_LOAD(ve->avenrun[1], EXP_5, nr_active);
+		CALC_LOAD(ve->avenrun[2], EXP_15, nr_active);
+	}
+	spin_unlock_irqrestore(&vsched_list_lock, flags);
+
+	nr_unint = nr_uninterruptible() * FIXED_1;
+	spin_lock_irqsave(&kstat_glb_lock, flags);
+	CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
+	CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
+	CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
+	spin_unlock_irqrestore(&kstat_glb_lock, flags);
+
+}
+#else
+#define calc_load_ve()	do { } while (0)
+#endif
+
 static inline void calc_load(unsigned long ticks)
 {
 	unsigned long active_tasks; /* fixed-point */
@@ -1444,6 +1508,7 @@ static inline void calc_load(unsigned lo
 		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+		calc_load_ve();
 	}
 }
 
@@ -1540,7 +1605,7 @@ asmlinkage unsigned long sys_alarm(unsig
  */
 asmlinkage long sys_getpid(void)
 {
-	return current->tgid;
+	return virt_tgid(current);
 }
 
 /*
@@ -1554,7 +1619,7 @@ asmlinkage long sys_getppid(void)
 	int pid;
 
 	rcu_read_lock();
-	pid = rcu_dereference(current->parent)->tgid;
+	pid = virt_tgid(rcu_dereference(current->parent));
 	rcu_read_unlock();
 
 	return pid;
@@ -1687,7 +1752,7 @@ EXPORT_SYMBOL(schedule_timeout_uninterru
 /* Thread ID - the internal kernel "pid" */
 asmlinkage long sys_gettid(void)
 {
-	return current->pid;
+	return virt_pid(current);
 }
 
 /*
@@ -1699,11 +1764,12 @@ asmlinkage long sys_sysinfo(struct sysin
 	unsigned long mem_total, sav_total;
 	unsigned int mem_unit, bitcount;
 	unsigned long seq;
+	unsigned long *__avenrun;
+	struct timespec tp;
 
 	memset((char *)&val, 0, sizeof(struct sysinfo));
 
 	do {
-		struct timespec tp;
 		seq = read_seqbegin(&xtime_lock);
 
 		/*
@@ -1720,18 +1786,34 @@ asmlinkage long sys_sysinfo(struct sysin
 			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
 			tp.tv_sec++;
 		}
-		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
-
-		val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+	} while (read_seqretry(&xtime_lock, seq));
 
+	if (ve_is_super(get_exec_env())) {
+		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+		__avenrun = &avenrun[0];
 		val.procs = nr_threads;
-	} while (read_seqretry(&xtime_lock, seq));
+	}
+#ifdef CONFIG_VE
+	else {
+		struct ve_struct *ve;
+		ve = get_exec_env();
+		__avenrun = &ve->avenrun[0];
+		val.procs = atomic_read(&ve->pcounter);
+		val.uptime = tp.tv_sec - ve->start_timespec.tv_sec;
+	}
+#endif
+	val.loads[0] = __avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+	val.loads[1] = __avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+	val.loads[2] = __avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
 
 	si_meminfo(&val);
 	si_swapinfo(&val);
 
+#ifdef CONFIG_USER_RESOURCE
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_SYSINFO, &val)
+			& NOTIFY_FAIL)
+		return -ENOMSG;
+#endif
 	/*
 	 * If the sum of all the available memory (i.e. ram + swap)
 	 * is less than can be stored in a 32 bit unsigned long then
diff -upr kernel-2.6.18-417.el5.orig/kernel/tracepoint.c kernel-2.6.18-417.el5-028stab121/kernel/tracepoint.c
--- kernel-2.6.18-417.el5.orig/kernel/tracepoint.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/tracepoint.c	2017-01-13 08:40:41.000000000 -0500
@@ -555,9 +555,9 @@ void syscall_regfunc(void)
 	mutex_lock(&regfunc_mutex);
 	if (!sys_tracepoint_refcount) {
 		read_lock_irqsave(&tasklist_lock, flags);
-		do_each_thread(g, t) {
+		do_each_thread_ve(g, t) {
 			set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
-		} while_each_thread(g, t);
+		} while_each_thread_ve(g, t);
 		read_unlock_irqrestore(&tasklist_lock, flags);
 	}
 	sys_tracepoint_refcount++;
@@ -574,9 +574,9 @@ void syscall_unregfunc(void)
 	sys_tracepoint_refcount--;
 	if (!sys_tracepoint_refcount) {
 		read_lock_irqsave(&tasklist_lock, flags);
-		do_each_thread(g, t) {
+		do_each_thread_ve(g, t) {
 			clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
-		} while_each_thread(g, t);
+		} while_each_thread_ve(g, t);
 		read_unlock_irqrestore(&tasklist_lock, flags);
 	}
 	mutex_unlock(&regfunc_mutex);
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/beancounter.c kernel-2.6.18-417.el5-028stab121/kernel/ub/beancounter.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/beancounter.c	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/beancounter.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,747 @@
+/*
+ *  linux/kernel/ub/beancounter.c
+ *
+ *  Copyright (C) 1998  Alan Cox
+ *                1998-2000  Andrey V. Savochkin <saw@saw.sw.com.sg>
+ *  Copyright (C) 2000-2005 SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * TODO:
+ *   - more intelligent limit check in mremap(): currently the new size is
+ *     charged and _then_ old size is uncharged
+ *     (almost done: !move_vma case is completely done,
+ *      move_vma in its current implementation requires too many conditions to
+ *      do things right, because it may be not only expansion, but shrinking
+ *      also, plus do_munmap will require an additional parameter...)
+ *   - problem: bad pmd page handling
+ *   - consider /proc redesign
+ *   - TCP/UDP ports
+ *   + consider whether __charge_beancounter_locked should be inline
+ *
+ * Changes:
+ *   1999/08/17  Marcelo Tosatti <marcelo@conectiva.com.br>
+ *	- Set "barrier" and "limit" parts of limits atomically.
+ *   1999/10/06  Marcelo Tosatti <marcelo@conectiva.com.br>
+ *	- setublimit system call.
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_vmpages.h>
+#include <ub/proc.h>
+#include <ub/io_prio.h>
+
+static kmem_cache_t *ub_cachep;
+static struct user_beancounter default_beancounter;
+struct user_beancounter ub0;
+EXPORT_SYMBOL(ub0);
+
+const char *ub_rnames[] = {
+	"kmemsize",	/* 0 */
+	"lockedpages",
+	"privvmpages",
+	"shmpages",
+	"dummy",
+	"numproc",	/* 5 */
+	"physpages",
+	"vmguarpages",
+	"oomguarpages",
+	"numtcpsock",
+	"numflock",	/* 10 */
+	"numpty",
+	"numsiginfo",
+	"tcpsndbuf",
+	"tcprcvbuf",
+	"othersockbuf",	/* 15 */
+	"dgramrcvbuf",
+	"numothersock",
+	"dcachesize",
+	"numfile",
+	"dummy",	/* 20 */
+	"dummy",
+	"dummy",
+	"numiptent",
+	"swappages",
+	"unused_privvmpages",	/* UB_RESOURCES */
+	"tmpfs_respages",
+	"held_pages",
+};
+
+static void init_beancounter_struct(struct user_beancounter *ub);
+static void init_beancounter_store(struct user_beancounter *ub);
+static void init_beancounter_nolimits(struct user_beancounter *ub);
+
+int print_ub_uid(struct user_beancounter *ub, char *buf, int size)
+{
+	if (ub->parent != NULL)
+		return snprintf(buf, size, "%u.%u",
+				ub->parent->ub_uid, ub->ub_uid);
+	else
+		return snprintf(buf, size, "%u", ub->ub_uid);
+}
+EXPORT_SYMBOL(print_ub_uid);
+
+#define ub_hash_fun(x) ((((x) >> 8) ^ (x)) & (UB_HASH_SIZE - 1))
+#define ub_subhash_fun(p, id) ub_hash_fun((p)->ub_uid + (id) * 17)
+struct hlist_head ub_hash[UB_HASH_SIZE];
+DEFINE_SPINLOCK(ub_hash_lock);
+LIST_HEAD(ub_list_head); /* protected by ub_hash_lock */
+EXPORT_SYMBOL(ub_hash);
+EXPORT_SYMBOL(ub_hash_lock);
+EXPORT_SYMBOL(ub_list_head);
+
+/*
+ *	Per user resource beancounting. Resources are tied to their luid.
+ *	The resource structure itself is tagged both to the process and
+ *	the charging resources (a socket doesn't want to have to search for
+ *	things at irq time for example). Reference counters keep things in
+ *	hand.
+ *
+ *	The case where a user creates resource, kills all his processes and
+ *	then starts new ones is correctly handled this way. The refcounters
+ *	will mean the old entry is still around with resource tied to it.
+ */
+
+static inline void free_ub(struct user_beancounter *ub)
+{
+	if (ub == NULL)
+		return;
+	free_percpu(ub->ub_percpu);
+	kfree(ub->private_data2);
+	kmem_cache_free(ub_cachep, ub);
+}
+
+static inline struct user_beancounter *bc_lookup_hash(struct hlist_head *hash,
+		uid_t uid, struct user_beancounter *parent)
+{
+	struct user_beancounter *ub;
+	struct hlist_node *ptr;
+
+	hlist_for_each_entry (ub, ptr, hash, ub_hash)
+		if (ub->ub_uid == uid && ub->parent == parent)
+			return get_beancounter(ub);
+
+	return NULL;
+}
+
+int ub_count;
+
+/* next two must be called under ub_hash_lock */
+static inline void ub_count_inc(struct user_beancounter *ub)
+{
+	if (ub->parent)
+		ub->parent->ub_childs++;
+	else
+		ub_count++;
+}
+
+static inline void ub_count_dec(struct user_beancounter *ub)
+{
+	if (ub->parent)
+		ub->parent->ub_childs--;
+	else
+		ub_count--;
+}
+
+struct user_beancounter *get_beancounter_byuid(uid_t uid, int create)
+{
+	struct user_beancounter *new_ub, *ub;
+	unsigned long flags;
+	struct hlist_head *hash;
+
+	hash = &ub_hash[ub_hash_fun(uid)];
+	new_ub = NULL;
+retry:
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	ub = bc_lookup_hash(hash, uid, NULL);
+	if (ub != NULL) {
+		ub_percpu_add(ub, fast_refcount,
+				atomic_xchg(&ub->ub_fastcount, 0));
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+		if (new_ub != NULL)
+			free_ub(new_ub);
+		return ub;
+	}
+
+	if (!create) {
+		/* no ub found */
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return NULL;
+	}
+
+	if (new_ub != NULL) {
+		list_add_rcu(&new_ub->ub_list, &ub_list_head);
+		hlist_add_head(&new_ub->ub_hash, hash);
+		ub_count_inc(new_ub);
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return new_ub;
+	}
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	/* alloc new ub */
+	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, 
+			GFP_KERNEL);
+	if (new_ub == NULL)
+		return NULL;
+
+	ub_debug(UBD_ALLOC, "Creating ub %p\n", new_ub);
+	memcpy(new_ub, &default_beancounter, sizeof(*new_ub));
+	init_beancounter_struct(new_ub);
+	new_ub->ub_percpu = alloc_percpu(struct ub_percpu_struct);
+	if (new_ub->ub_percpu == NULL)
+		goto fail_free;
+	new_ub->ub_uid = uid;
+	goto retry;
+
+fail_free:
+	kmem_cache_free(ub_cachep, new_ub);
+	return NULL;
+}
+EXPORT_SYMBOL(get_beancounter_byuid);
+
+struct user_beancounter *get_subbeancounter_byid(struct user_beancounter *p,
+		int id, int create)
+{
+	struct user_beancounter *new_ub, *ub;
+	unsigned long flags;
+	struct hlist_head *hash;
+
+	hash = &ub_hash[ub_subhash_fun(p, id)];
+	new_ub = NULL;
+retry:
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	ub = bc_lookup_hash(hash, id, p);
+	if (ub != NULL) {
+		ub_percpu_add(ub, fast_refcount,
+				atomic_xchg(&ub->ub_fastcount, 0));
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+		if (new_ub != NULL) {
+			put_beancounter(new_ub->parent);
+			free_ub(new_ub);
+		}
+		return ub;
+	}
+
+	if (!create) {
+		/* no ub found */
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return NULL;
+	}
+
+	if (new_ub != NULL) {
+		list_add_rcu(&new_ub->ub_list, &ub_list_head);
+		hlist_add_head(&new_ub->ub_hash, hash);
+		ub_count_inc(new_ub);
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return new_ub;
+	}
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	/* alloc new ub */
+	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep,
+			create & UB_CREATE_ATOMIC ? GFP_ATOMIC : GFP_KERNEL);
+	if (new_ub == NULL)
+		return NULL;
+
+	ub_debug(UBD_ALLOC, "Creating sub %p\n", new_ub);
+	memset(new_ub, 0, sizeof(*new_ub));
+	init_beancounter_nolimits(new_ub);
+	init_beancounter_store(new_ub);
+	init_beancounter_struct(new_ub);
+	if (create & UB_CREATE_ATOMIC)
+		new_ub->ub_percpu = alloc_percpu_atomic(struct ub_percpu_struct);
+	else
+		new_ub->ub_percpu = alloc_percpu(struct ub_percpu_struct);
+	if (new_ub->ub_percpu == NULL)
+		goto fail_free;
+	new_ub->ub_uid = id;
+	new_ub->parent = get_beancounter(p);
+	goto retry;
+
+fail_free:
+	kmem_cache_free(ub_cachep, new_ub);
+	return NULL;
+}
+EXPORT_SYMBOL(get_subbeancounter_byid);
+
+static void put_warn(struct user_beancounter *ub)
+{
+	char id[64];
+
+	print_ub_uid(ub, id, sizeof(id));
+	printk(KERN_ERR "UB: Bad refcount (%d) on put of %s (%p)\n",
+			atomic_read(&ub->ub_refcount), id, ub);
+}
+
+#ifdef CONFIG_UBC_KEEP_UNUSED
+#define release_beancounter(ub)	do { } while (0)
+#else
+static int verify_res(struct user_beancounter *ub, int resource,
+		unsigned long held)
+{
+	char id[64];
+
+	if (likely(held == 0))
+		return 1;
+
+	print_ub_uid(ub, id, sizeof(id));
+	printk(KERN_WARNING "Ub %s helds %lu in %s on put\n",
+			id, held, ub_rnames[resource]);
+	return 0;
+}
+
+static inline void bc_verify_held(struct user_beancounter *ub)
+{
+	int i, clean;
+
+	clean = 1;
+	for (i = 0; i < UB_RESOURCES; i++)
+		clean &= verify_res(ub, i, ub->ub_parms[i].held);
+
+	clean &= verify_res(ub, UB_UNUSEDPRIVVM, ub->ub_unused_privvmpages);
+	clean &= verify_res(ub, UB_TMPFSPAGES, ub->ub_tmpfs_respages);
+	clean &= verify_res(ub, UB_HELDPAGES, (unsigned long)ub->ub_held_pages);
+
+	ub_debug_trace(!clean, 5, 60*HZ);
+}
+
+static void bc_free_rcu(struct rcu_head *rcu)
+{
+	struct user_beancounter *ub;
+
+	ub = container_of(rcu, struct user_beancounter, rcu);
+	free_ub(ub);
+}
+
+static void delayed_release_beancounter(void *data)
+{
+	struct user_beancounter *ub, *parent;
+	unsigned long flags;
+	int cpu, refcount;
+
+	ub = (struct user_beancounter *)data;
+again:
+	if (atomic_read(&ub->ub_refcount) > 1) {
+		/* raced with get_beancounter_byuid */
+		__put_beancounter(ub);
+		return;
+	}
+
+	refcount = INT_MAX/2;
+	atomic_add(refcount, &ub->ub_fastcount);
+	synchronize_sched();
+
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	if (atomic_read(&ub->ub_refcount) > 1) {
+		ub_percpu_sub(ub, fast_refcount, refcount);
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		__put_beancounter(ub);
+		return;
+	}
+	for_each_possible_cpu(cpu) {
+		refcount -= per_cpu_ptr(ub->ub_percpu, cpu)->fast_refcount;
+		per_cpu_ptr(ub->ub_percpu, cpu)->fast_refcount = 0;
+	}
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	if (!atomic_sub_and_test(refcount, &ub->ub_fastcount))
+		return;
+
+	local_irq_save(flags);
+	if (!atomic_dec_and_lock(&ub->ub_refcount, &ub_hash_lock)) {
+		/* raced with get_beancounter_byuid */
+		local_irq_restore(flags);
+		return;
+	}
+
+	hlist_del(&ub->ub_hash);
+	list_del_rcu(&ub->ub_list);
+	ub_count_dec(ub);
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	ub_flush_held_pages(ub);
+	bc_verify_held(ub);
+	ub_free_counters(ub);
+#ifdef CONFIG_UBC_IO_PRIO
+ 	bc_fini_ioprio(&ub->iopriv);
+#endif
+	parent = ub->parent;
+
+	call_rcu(&ub->rcu, bc_free_rcu);
+	if (parent) {
+		ub = parent;
+		goto again;
+	}
+}
+
+static inline void release_beancounter(struct user_beancounter *ub)
+{
+	struct execute_work *ew;
+
+	ew = &ub->cleanup;
+	INIT_WORK(&ew->work, delayed_release_beancounter, ub);
+	schedule_work(&ew->work);
+}
+#endif
+
+void __put_beancounter(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	/* equevalent to atomic_dec_and_lock_irqsave() */
+	local_irq_save(flags);
+	if (likely(!atomic_dec_and_lock(&ub->ub_refcount, &ub_hash_lock))) {
+		if (unlikely(atomic_read(&ub->ub_refcount) < 0))
+			put_warn(ub);
+		local_irq_restore(flags);
+		return;
+	}
+
+	if (unlikely(ub == get_ub0())) {
+		printk(KERN_ERR "Trying to put ub0\n");
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return;
+	}
+
+	/* prevent get_beancounter_byuid + put_beancounter() reentrance */
+	atomic_inc(&ub->ub_refcount);
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	release_beancounter(ub);
+}
+EXPORT_SYMBOL(__put_beancounter);
+
+void put_beancounter_safe(struct user_beancounter *ub)
+{
+	synchronize_rcu();
+	__put_beancounter(ub);
+}
+EXPORT_SYMBOL(put_beancounter_safe);
+
+/*
+ *	Generic resource charging stuff
+ */
+
+int __charge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val, enum ub_severity strict)
+{
+	ub_debug_resource(resource, "Charging %lu for %d of %p with %lu\n",
+			val, resource, ub, ub->ub_parms[resource].held);
+	/*
+	 * ub_value <= UB_MAXVALUE, value <= UB_MAXVALUE, and only one addition
+	 * at the moment is possible so an overflow is impossible.  
+	 */
+	ub->ub_parms[resource].held += val;
+
+	switch (strict) {
+		case UB_HARD:
+			if (ub->ub_parms[resource].held >
+					ub->ub_parms[resource].barrier)
+				break;
+		case UB_SOFT:
+			if (ub->ub_parms[resource].held >
+					ub->ub_parms[resource].limit)
+				break;
+		case UB_FORCE:
+			ub_adjust_maxheld(ub, resource);
+			return 0;
+		default:
+			BUG();
+	}
+
+	if (strict == UB_SOFT && ub_ratelimit(&ub->ub_limit_rl))
+		printk(KERN_INFO "Fatal resource shortage: %s, UB %d.\n",
+		       ub_rnames[resource], ub->ub_uid);
+	ub->ub_parms[resource].failcnt++;
+	ub->ub_parms[resource].held -= val;
+	return -ENOMEM;
+}
+
+int charge_beancounter(struct user_beancounter *ub,
+		int resource, unsigned long val, enum ub_severity strict)
+{
+	int retval;
+	struct user_beancounter *p, *q;
+	unsigned long flags;
+
+	retval = -EINVAL;
+	if (val > UB_MAXVALUE)
+		goto out;
+
+	local_irq_save(flags);
+	for (p = ub; p != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		retval = __charge_beancounter_locked(p, resource, val, strict);
+		spin_unlock(&p->ub_lock);
+		if (retval)
+			goto unroll;
+	}
+out_restore:
+	local_irq_restore(flags);
+out:
+	return retval;
+
+unroll:
+	for (q = ub; q != p; q = q->parent) {
+		spin_lock(&q->ub_lock);
+		__uncharge_beancounter_locked(q, resource, val);
+		spin_unlock(&q->ub_lock);
+	}
+	goto out_restore;
+}
+
+EXPORT_SYMBOL(charge_beancounter);
+
+void __charge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	struct user_beancounter *p;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	for (p = ub; p->parent != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		__charge_beancounter_locked(p, resource, val, UB_FORCE);
+		spin_unlock(&p->ub_lock);
+	}
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(__charge_beancounter_notop);
+
+void uncharge_warn(struct user_beancounter *ub, int resource,
+		unsigned long val, unsigned long held)
+{
+	char id[64];
+
+	print_ub_uid(ub, id, sizeof(id));
+	printk(KERN_ERR "Uncharging too much %lu h %lu, res %s ub %s\n",
+			val, held, ub_rnames[resource], id);
+	ub_debug_trace(1, 10, 10*HZ);
+}
+
+void __uncharge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	ub_debug_resource(resource, "Uncharging %lu for %d of %p with %lu\n",
+			val, resource, ub, ub->ub_parms[resource].held);
+	if (ub->ub_parms[resource].held < val) {
+		uncharge_warn(ub, resource,
+				val, ub->ub_parms[resource].held);
+		val = ub->ub_parms[resource].held;
+	}
+	ub->ub_parms[resource].held -= val;
+}
+
+void uncharge_beancounter(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	unsigned long flags;
+	struct user_beancounter *p;
+
+	for (p = ub; p != NULL; p = p->parent) {
+		spin_lock_irqsave(&p->ub_lock, flags);
+		__uncharge_beancounter_locked(p, resource, val);
+		spin_unlock_irqrestore(&p->ub_lock, flags);
+	}
+}
+
+EXPORT_SYMBOL(uncharge_beancounter);
+
+void __uncharge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	struct user_beancounter *p;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	for (p = ub; p->parent != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		__uncharge_beancounter_locked(p, resource, val);
+		spin_unlock(&p->ub_lock);
+	}
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(__uncharge_beancounter_notop);
+
+
+/*
+ *	Rate limiting stuff.
+ */
+int ub_ratelimit(struct ub_rate_info *p)
+{
+	unsigned long cjif, djif;
+	unsigned long flags;
+	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
+	long new_bucket;
+
+	spin_lock_irqsave(&ratelimit_lock, flags);
+	cjif = jiffies;
+	djif = cjif - p->last;
+	if (djif < p->interval) {
+		if (p->bucket >= p->burst) {
+			spin_unlock_irqrestore(&ratelimit_lock, flags);
+			return 0;
+		}
+		p->bucket++;
+	} else {
+		new_bucket = p->bucket - (djif / (unsigned)p->interval);
+		if (new_bucket < 0)
+			new_bucket = 0;
+		p->bucket = new_bucket + 1;
+	}
+	p->last = cjif;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
+}
+EXPORT_SYMBOL(ub_ratelimit);
+
+
+/*
+ *	Initialization
+ *
+ *	struct user_beancounter contains
+ *	 - limits and other configuration settings,
+ *	   with a copy stored for accounting purposes,
+ *	 - structural fields: lists, spinlocks and so on.
+ *
+ *	Before these parts are initialized, the structure should be memset
+ *	to 0 or copied from a known clean structure.  That takes care of a lot
+ *	of fields not initialized explicitly.
+ */
+
+static void init_beancounter_struct(struct user_beancounter *ub)
+{
+	ub->ub_magic = UB_MAGIC;
+	ub->ub_cookie = get_random_int();
+	atomic_set(&ub->ub_refcount, 1);
+	atomic_set(&ub->ub_fastcount, 0);
+	spin_lock_init(&ub->ub_lock);
+	INIT_LIST_HEAD(&ub->ub_tcp_sk_list);
+	INIT_LIST_HEAD(&ub->ub_other_sk_list);
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	INIT_LIST_HEAD(&ub->ub_cclist);
+#endif
+#ifdef CONFIG_UBC_IO_PRIO
+	bc_init_ioprio(&ub->iopriv);
+#endif
+}
+
+static void init_beancounter_store(struct user_beancounter *ub)
+{
+	int k;
+
+	for (k = 0; k < UB_RESOURCES; k++) {
+		memcpy(&ub->ub_store[k], &ub->ub_parms[k],
+				sizeof(struct ubparm));
+	}
+}
+
+static void init_beancounter_nolimits(struct user_beancounter *ub)
+{
+	int k;
+
+	for (k = 0; k < UB_RESOURCES; k++) {
+		ub->ub_parms[k].limit = UB_MAXVALUE;
+		/* FIXME: whether this is right for physpages and guarantees? */
+		ub->ub_parms[k].barrier = UB_MAXVALUE;
+	}
+
+	/* FIXME: set unlimited rate? */
+	ub->ub_limit_rl.burst = 4;
+	ub->ub_limit_rl.interval = 300*HZ;
+}
+
+static void init_beancounter_syslimits(struct user_beancounter *ub)
+{
+	unsigned long mp;
+	extern int max_threads;
+	int k;
+
+	mp = num_physpages;
+	ub->ub_parms[UB_KMEMSIZE].limit = 
+		mp > (192*1024*1024 >> PAGE_SHIFT) ?
+				32*1024*1024 : (mp << PAGE_SHIFT) / 6;
+	ub->ub_parms[UB_LOCKEDPAGES].limit = 8;
+	ub->ub_parms[UB_PRIVVMPAGES].limit = UB_MAXVALUE;
+	ub->ub_parms[UB_SHMPAGES].limit = 64;
+	ub->ub_parms[UB_NUMPROC].limit = max_threads / 2;
+	ub->ub_parms[UB_NUMTCPSOCK].limit = 1024;
+	ub->ub_parms[UB_TCPSNDBUF].limit = 1024*4*1024; /* 4k per socket */
+	ub->ub_parms[UB_TCPRCVBUF].limit = 1024*6*1024; /* 6k per socket */
+	ub->ub_parms[UB_NUMOTHERSOCK].limit = 256;
+	ub->ub_parms[UB_DGRAMRCVBUF].limit = 256*4*1024; /* 4k per socket */
+	ub->ub_parms[UB_OTHERSOCKBUF].limit = 256*8*1024; /* 8k per socket */
+	ub->ub_parms[UB_NUMFLOCK].limit = 1024;
+	ub->ub_parms[UB_NUMPTY].limit = 16;
+	ub->ub_parms[UB_NUMSIGINFO].limit = 1024;
+	ub->ub_parms[UB_DCACHESIZE].limit = 1024*1024;
+	ub->ub_parms[UB_NUMFILE].limit = 1024;
+	ub->ub_parms[UB_SWAPPAGES].limit = UB_MAXVALUE;
+
+	for (k = 0; k < UB_RESOURCES; k++)
+		ub->ub_parms[k].barrier = ub->ub_parms[k].limit;
+
+	ub->ub_limit_rl.burst = 4;
+	ub->ub_limit_rl.interval = 300*HZ;
+}
+
+#ifdef CONFIG_SMP
+static struct percpu_data ub0_percpu;
+#endif
+static struct ub_percpu_struct ub0_percpu_data[NR_CPUS];
+
+void __init ub_init_early(void)
+{
+	struct user_beancounter *ub;
+
+	init_cache_counters();
+	ub = get_ub0();
+	memset(ub, 0, sizeof(*ub));
+	ub->ub_uid = 0;
+	init_beancounter_nolimits(ub);
+	init_beancounter_store(ub);
+	init_beancounter_struct(ub);
+	ub->ub_percpu = static_percpu_ptr(&ub0_percpu, ub0_percpu_data);
+#ifdef CONFIG_UBC_IO_PRIO
+	ub->iopriv.ioprio = (UB_IOPRIO_BASE + UB_IOPRIO_MAX) / 2;
+#endif
+
+	memset(&current->task_bc, 0, sizeof(struct task_beancounter));
+	(void)set_exec_ub(ub);
+	current->task_bc.task_ub = get_beancounter(ub);
+	__charge_beancounter_locked(ub, UB_NUMPROC, 1, UB_FORCE);
+	current->task_bc.fork_sub = get_beancounter(ub);
+	ub_init_task_bc(&current->task_bc);
+	init_mm.mm_ub = get_beancounter(ub);
+
+	hlist_add_head(&ub->ub_hash, &ub_hash[ub->ub_uid]);
+	list_add(&ub->ub_list, &ub_list_head);
+	ub_count_inc(ub);
+}
+
+void __init ub_init_late(void)
+{
+	ub_cachep = kmem_cache_create("user_beancounters",
+			sizeof(struct user_beancounter),
+			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+
+	memset(&default_beancounter, 0, sizeof(default_beancounter));
+#ifdef CONFIG_UBC_UNLIMITED
+	init_beancounter_nolimits(&default_beancounter);
+#else
+	init_beancounter_syslimits(&default_beancounter);
+#endif
+	init_beancounter_store(&default_beancounter);
+	init_beancounter_struct(&default_beancounter);
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/io_acct.c kernel-2.6.18-417.el5-028stab121/kernel/ub/io_acct.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/io_acct.c	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/io_acct.c	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,231 @@
+/*
+ *  kernel/ub/io_acct.c
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  Pavel Emelianov <xemul@openvz.org>
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/mempool.h>
+#include <linux/proc_fs.h>
+#include <linux/virtinfo.h>
+#include <linux/pagemap.h>
+#include <linux/module.h>
+#include <linux/writeback.h>
+
+#include <ub/beancounter.h>
+#include <ub/io_acct.h>
+#include <ub/ub_vmpages.h>
+#include <ub/proc.h>
+
+/* under write lock mapping->tree_lock */
+
+void ub_io_account_dirty(struct address_space *mapping, int pages)
+{
+	struct user_beancounter *ub = mapping->dirtied_ub;
+
+	WARN_ON_ONCE(!radix_tree_tagged(&mapping->page_tree,
+				PAGECACHE_TAG_DIRTY));
+
+	if (!ub)
+		ub = mapping->dirtied_ub = get_beancounter(get_io_ub());
+
+	ub_stat_add(ub, dirty_pages, pages);
+}
+
+void ub_io_account_clean(struct address_space *mapping, int pages, int cancel)
+{
+	struct user_beancounter *ub = mapping->dirtied_ub;
+
+	if (unlikely(!ub)) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	ub_stat_sub(ub, dirty_pages, pages);
+
+	if (cancel)
+		ub_percpu_add(ub, async_write_canceled, pages);
+	else {
+		size_t bytes = pages << PAGE_SHIFT;
+
+		ub_percpu_add(ub, async_write_complete, pages);
+		ub = set_exec_ub(ub);
+		virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_ACCOUNT, &bytes);
+		ub = set_exec_ub(ub);
+	}
+
+	if (!radix_tree_tagged(&mapping->page_tree, PAGECACHE_TAG_DIRTY)) {
+		mapping->dirtied_ub = NULL;
+		__put_beancounter(ub);
+	}
+}
+
+int ub_dirty_limits(long *pdirty, struct user_beancounter *ub)
+{
+	int dirty_ratio, unmapped_ratio;
+	unsigned long available_memory;
+
+	available_memory = ub->ub_parms[UB_PHYSPAGES].limit;
+	if (available_memory == UB_MAXVALUE)
+		available_memory = ub->ub_mem_size;
+	if (!available_memory)
+		return 0;
+
+	/* math taken from get_dirty_limits */
+	unmapped_ratio = 100 - (100 * ub_mapped_pages(ub)) / available_memory;
+
+	dirty_ratio = vm_dirty_ratio;
+	if ((dirty_ratio > unmapped_ratio / 2) && (dirty_ratio != 100))
+		dirty_ratio = unmapped_ratio / 2;
+
+	if (dirty_ratio < 5)
+		dirty_ratio = 5;
+
+	*pdirty = (dirty_ratio * available_memory) / 100;
+
+	return 1;
+}
+
+#ifdef CONFIG_PROC_FS
+#define in_flight(var)	(var > var##_done ? var - var##_done : 0)
+
+static int bc_ioacct_show(struct seq_file *f, void *v)
+{
+	int i;
+	unsigned long long read, write, cancel;
+	unsigned long sync, sync_done;
+	unsigned long fsync, fsync_done;
+	unsigned long fdsync, fdsync_done;
+	unsigned long frsync, frsync_done;
+	unsigned long reads, writes;
+	unsigned long long rchar, wchar;
+	struct user_beancounter *ub;
+	unsigned long dirty_pages;
+	unsigned long long dirtied;
+
+	ub = seq_beancounter(f);
+
+	read = write = cancel = 0;
+	sync = sync_done = fsync = fsync_done =
+		fdsync = fdsync_done = frsync = frsync_done = 0;
+	reads = writes = 0;
+	rchar = wchar = 0;
+	dirty_pages = __ub_stat_get(ub, dirty_pages);
+	for_each_online_cpu(i) {
+		struct ub_percpu_struct *ub_percpu;
+		ub_percpu = per_cpu_ptr(ub->ub_percpu, i);
+
+		read += ub_percpu->sync_read_bytes;
+		write += ub_percpu->sync_write_bytes;
+
+		dirty_pages += ub_percpu->dirty_pages;
+		write += (u64)ub_percpu->async_write_complete << PAGE_SHIFT;
+		cancel += (u64)ub_percpu->async_write_canceled << PAGE_SHIFT;
+
+		sync += ub_percpu->sync;
+		fsync += ub_percpu->fsync;
+		fdsync += ub_percpu->fdsync;
+		frsync += ub_percpu->frsync;
+		sync_done += ub_percpu->sync_done;
+		fsync_done += ub_percpu->fsync_done;
+		fdsync_done += ub_percpu->fdsync_done;
+		frsync_done += ub_percpu->frsync_done;
+
+		reads += ub_percpu->read;
+		writes += ub_percpu->write;
+		rchar += ub_percpu->rchar;
+		wchar += ub_percpu->wchar;
+	}
+
+	if ((long)dirty_pages < 0)
+		dirty_pages = 0;
+
+	dirtied = write + cancel;
+	dirtied += (u64)dirty_pages << PAGE_SHIFT;
+
+	seq_printf(f, bc_proc_llu_fmt, "read", read);
+	seq_printf(f, bc_proc_llu_fmt, "write", write);
+	seq_printf(f, bc_proc_llu_fmt, "dirty", dirtied);
+	seq_printf(f, bc_proc_llu_fmt, "cancel", cancel);
+	seq_printf(f, bc_proc_llu_fmt, "missed", 0ull);
+
+	seq_printf(f, bc_proc_lu_lfmt, "syncs_total", sync);
+	seq_printf(f, bc_proc_lu_lfmt, "fsyncs_total", fsync);
+	seq_printf(f, bc_proc_lu_lfmt, "fdatasyncs_total", fdsync);
+	seq_printf(f, bc_proc_lu_lfmt, "range_syncs_total", frsync);
+
+	seq_printf(f, bc_proc_lu_lfmt, "syncs_active", in_flight(sync));
+	seq_printf(f, bc_proc_lu_lfmt, "fsyncs_active", in_flight(fsync));
+	seq_printf(f, bc_proc_lu_lfmt, "fdatasyncs_active", in_flight(fsync));
+	seq_printf(f, bc_proc_lu_lfmt, "range_syncs_active", in_flight(frsync));
+
+	seq_printf(f, bc_proc_lu_lfmt, "vfs_reads", reads);
+	seq_printf(f, bc_proc_llu_fmt, "vfs_read_chars", rchar);
+	seq_printf(f, bc_proc_lu_lfmt, "vfs_writes", writes);
+	seq_printf(f, bc_proc_llu_fmt, "vfs_write_chars", wchar);
+
+	seq_printf(f, bc_proc_lu_lfmt, "io_pbs", dirty_pages);
+	return 0;
+}
+
+static struct bc_proc_entry bc_ioacct_entry = {
+	.name = "ioacct",
+	.u.show = bc_ioacct_show,
+};
+
+static int bc_ioacct_notify(struct vnotifier_block *self,
+		unsigned long event, void *arg, int old_ret)
+{
+	struct user_beancounter *ub;
+	struct ub_percpu_struct *ub_pcpu;
+	unsigned long *vm_events;
+	unsigned long long bin, bout;
+	int i;
+
+	if (event != VIRTINFO_VMSTAT)
+		return old_ret;
+
+	ub = top_beancounter(get_exec_ub());
+	if (ub == get_ub0())
+		return old_ret;
+
+	/* Think over: do we need to account here bytes_dirty_missed? */
+	bout = 0;
+	bin = 0;
+	for_each_online_cpu(i) {
+		ub_pcpu = per_cpu_ptr(ub->ub_percpu, i);
+		bout += (u64)ub_pcpu->async_write_complete << PAGE_SHIFT;
+		bout += ub_pcpu->sync_write_bytes;
+		bin += ub_pcpu->sync_read_bytes;
+	}
+
+	/* convert to Kbytes */
+	bout >>= 10;
+	bin >>= 10;
+
+	vm_events = ((unsigned long *)arg) + NR_VM_ZONE_STAT_ITEMS;
+	vm_events[PGPGOUT] = (unsigned long)bout;
+	vm_events[PGPGIN] = (unsigned long)bin;
+	return NOTIFY_OK;
+}
+
+static struct vnotifier_block bc_ioacct_nb = {
+	.notifier_call = bc_ioacct_notify,
+};
+
+static int __init bc_ioacct_init(void)
+{
+	bc_register_proc_entry(&bc_ioacct_entry);
+
+	virtinfo_notifier_register(VITYPE_GENERAL, &bc_ioacct_nb);
+	return 0;
+}
+
+late_initcall(bc_ioacct_init);
+#endif
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/io_prio.c kernel-2.6.18-417.el5-028stab121/kernel/ub/io_prio.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/io_prio.c	2017-01-13 08:40:18.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/io_prio.c	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,630 @@
+/*
+ *  kernel/ub/io_prio.c
+ *
+ *  Copyright (C) 2007 SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  Vasily Tarasov <vtaras@openvz.org>
+ *
+ */
+
+#include <linux/mm.h>
+#include <ub/io_prio.h>
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <linux/module.h>
+#include <ub/io_acct.h>
+#include <linux/blkdev.h>
+#include <ub/proc.h>
+
+#define BC_MAX_RATIO	100
+
+/* bc bandwidth inversely proportional coefficient per ioprio */
+static int bc_ioprio_ratio[CFQ_PRIO_LISTS] = {100, 87, 77, 70, 63, 58, 53, 50};
+
+struct cfq_bc_data *__find_cfq_bc(struct ub_iopriv *iopriv,
+							struct cfq_data *cfqd)
+{
+	struct cfq_bc_data *cfq_bc;
+
+	list_for_each_entry(cfq_bc, &iopriv->cfq_bc_head, cfq_bc_list)
+		if (cfq_bc->cfqd == cfqd)
+			return cfq_bc;
+
+	return NULL;
+}
+
+struct cfq_bc_data *bc_findcreate_cfq_bc(struct ub_iopriv *iopriv,
+					struct cfq_data *cfqd, gfp_t gfp_mask)
+{
+	struct cfq_bc_data *cfq_bc_new;
+	struct cfq_bc_data *cfq_bc;
+	unsigned long flags;
+
+	read_lock_irqsave(&iopriv->cfq_bc_list_lock, flags);
+	cfq_bc = __find_cfq_bc(iopriv, cfqd);
+	read_unlock_irqrestore(&iopriv->cfq_bc_list_lock, flags);
+
+	if (cfq_bc)
+		return cfq_bc;
+
+	cfq_bc_new = kzalloc(sizeof(*cfq_bc_new), gfp_mask);
+	if (!cfq_bc_new)
+		return NULL;
+
+	cfq_init_cfq_bc(cfq_bc_new);
+	cfq_bc_new->cfqd = cfqd;
+	cfq_bc_new->ub_iopriv = iopriv;
+
+	write_lock_irqsave(&iopriv->cfq_bc_list_lock, flags);
+	cfq_bc = __find_cfq_bc(iopriv, cfqd);
+	if (cfq_bc)
+		kfree(cfq_bc_new);
+	else {
+		list_add_tail(&cfq_bc_new->cfq_bc_list,
+					&iopriv->cfq_bc_head);
+		cfq_bc = cfq_bc_new;
+	}
+	write_unlock_irqrestore(&iopriv->cfq_bc_list_lock, flags);
+
+	return cfq_bc;
+}
+
+void bc_init_ioprio(struct ub_iopriv *iopriv)
+{
+	INIT_LIST_HEAD(&iopriv->cfq_bc_head);
+	rwlock_init(&iopriv->cfq_bc_list_lock);
+	iopriv->ioprio = UB_IOPRIO_BASE;
+}
+
+static void inline bc_cfq_bc_check_empty(struct cfq_bc_data *cfq_bc)
+{
+	int i;
+
+	for (i = 0; i < CFQ_PRIO_LISTS; i++)
+		BUG_ON(!list_empty(&cfq_bc->rr_list[i]));
+
+	BUG_ON(!list_empty(&cfq_bc->cur_rr));
+ 	BUG_ON(!list_empty(&cfq_bc->busy_rr));
+ 	BUG_ON(!list_empty(&cfq_bc->idle_rr));
+}
+
+static void bc_release_cfq_bc(struct cfq_bc_data *cfq_bc)
+{
+	struct cfq_data *cfqd;
+	struct cfq_queue *cfqq, *tmp;
+	elevator_t *eq;
+
+	cfqd = cfq_bc->cfqd;
+	eq = cfqd->queue->elevator;
+
+	__cfq_put_async_queues(cfq_bc);
+
+	list_for_each_entry_safe(cfqq, tmp, &cfq_bc->empty_list, cfq_list) {
+		list_del_init(&cfqq->cfq_list);
+		cfqq->cfq_bc = NULL;
+	}
+
+	/* 
+	 * Note: this cfq_bc is already not in active list,
+	 * but can be still pointed from cfqd as active.
+	 */
+	if (cfqd->active_cfq_bc == cfq_bc)
+		cfqd->active_cfq_bc = NULL;
+
+	bc_cfq_bc_check_empty(cfq_bc);
+	list_del(&cfq_bc->cfq_bc_list);
+	kfree(cfq_bc);
+}
+
+void bc_fini_ioprio(struct ub_iopriv *iopriv)
+{
+	struct cfq_bc_data *cfq_bc;
+	struct cfq_bc_data *cfq_bc_tmp;
+	unsigned long flags;
+	spinlock_t *queue_lock;
+
+	/* 
+	 * Don't get cfq_bc_list_lock since ub is already dead,
+	 * but async cfqqs are still in hash list, consequently
+	 * queue_lock should be hold.
+	 */
+	list_for_each_entry_safe(cfq_bc, cfq_bc_tmp,
+			&iopriv->cfq_bc_head, cfq_bc_list) {
+		queue_lock = cfq_bc->cfqd->queue->queue_lock;
+		spin_lock_irqsave(queue_lock, flags);
+		bc_release_cfq_bc(cfq_bc);
+		spin_unlock_irqrestore(queue_lock, flags);
+	}
+}
+
+void bc_cfq_exit_queue(struct cfq_data *cfqd)
+{
+	struct cfq_bc_data *cfq_bc;
+	struct user_beancounter *ub;
+
+	local_irq_disable();
+	for_each_beancounter(ub) {
+		write_lock(&ub->iopriv.cfq_bc_list_lock);
+		cfq_bc = __find_cfq_bc(&ub->iopriv, cfqd);
+		if (!cfq_bc) {
+			write_unlock(&ub->iopriv.cfq_bc_list_lock);
+			continue;
+		}
+		bc_release_cfq_bc(cfq_bc);
+		write_unlock(&ub->iopriv.cfq_bc_list_lock);
+	}
+	local_irq_enable();
+}
+
+int bc_expired(struct cfq_data *cfqd)
+{
+	return time_after(jiffies, cfqd->slice_end) ?  1 : 0;
+}
+
+static inline int bc_empty(struct cfq_bc_data *cfq_bc)
+{
+	/*
+	 * consider BC as empty only if there is no requests
+	 * in elevator _and_ in driver
+	 */
+	if (cfq_bc->rqnum)
+		return 0;
+
+	if (cfq_bc->on_dispatch && cfq_bc->cfqd->cfq_ub_isolate)
+		return 0;
+
+	return 1;
+}
+
+static void bc_wait_start(struct cfq_bc_data *cfq_bc, unsigned long now)
+{
+	write_seqcount_begin(&cfq_bc->stat_lock);
+	cfq_bc->wait_start = now;
+	write_seqcount_end(&cfq_bc->stat_lock);
+}
+
+static void bc_wait_stop(struct cfq_bc_data *cfq_bc, unsigned long now)
+{
+	write_seqcount_begin(&cfq_bc->stat_lock);
+	cfq_bc->wait_time += now - cfq_bc->wait_start;
+	cfq_bc->wait_start = 0;
+	write_seqcount_end(&cfq_bc->stat_lock);
+}
+
+static unsigned int bc_wait_time(struct cfq_bc_data *cfq_bc, unsigned long now)
+{
+	unsigned long res;
+	unsigned seq;
+
+	do {
+		seq = read_seqcount_begin(&cfq_bc->stat_lock);
+		res = cfq_bc->wait_time + now - (cfq_bc->wait_start ?: now);
+	} while (read_seqcount_retry(&cfq_bc->stat_lock, seq));
+
+	return jiffies_to_msecs(res);
+}
+
+static inline s64 cmp_iotime(u64 a, u64 b)
+{
+	return (s64)a - (s64)b;
+}
+
+static inline u64 max_iotime(u64 a, u64 b)
+{
+	return (cmp_iotime(a, b) > 0) ? a : b;
+}
+
+static inline u64 min_iotime(u64 a, u64 b)
+{
+	return (cmp_iotime(a, b) < 0) ? a : b;
+}
+
+/* cfq bc queue rb_tree helper function */
+static void bc_insert(struct cfq_data *cfqd, struct cfq_bc_data *cfq_bc)
+{
+	struct rb_node **p = &cfqd->cfq_bc_queue.rb_node;
+	struct rb_node *parent = NULL;
+	struct cfq_bc_data *__cfq_bc;
+
+	while (*p) {
+		parent = *p;
+		__cfq_bc = rb_entry(parent, struct cfq_bc_data, cfq_bc_node);
+		/* important: if equal push right */
+		if (cmp_iotime(__cfq_bc->iotime, cfq_bc->iotime) > 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&cfq_bc->cfq_bc_node, parent, p);
+	rb_insert_color(&cfq_bc->cfq_bc_node, &cfqd->cfq_bc_queue);
+}
+
+static void bc_remove(struct cfq_data *cfqd, struct cfq_bc_data *cfq_bc)
+{
+	rb_erase(&cfq_bc->cfq_bc_node, &cfqd->cfq_bc_queue);
+}
+
+static struct cfq_bc_data *bc_pick_next(struct cfq_data *cfqd,
+		struct cfq_bc_data *prev)
+{
+	struct cfq_bc_data *next;
+
+	if (prev && !prev->rqnum)
+		prev = NULL;
+
+	if (RB_EMPTY_ROOT(&cfqd->cfq_bc_queue))
+		return prev;
+
+	next = rb_entry(rb_first(&cfqd->cfq_bc_queue),
+			struct cfq_bc_data, cfq_bc_node);
+
+	if (prev && cmp_iotime(prev->iotime, next->iotime) < 0)
+		next = prev;
+
+	return next;
+}
+
+static void bc_enqueue(struct cfq_data *cfqd, struct cfq_bc_data *cfq_bc)
+{
+	unsigned long slice;
+
+	slice = cfqd->cfq_ub_slice * bc_ioprio_ratio[cfq_bc->ub_iopriv->ioprio];
+
+	/* fix (im)possible overlap at wakeup after extralong sleep */
+	cfq_bc->iotime = min_iotime(cfq_bc->iotime, cfqd->max_iotime + slice * 100);
+
+	/* put at the end of queue, minus gift up to slice */
+	cfq_bc->iotime = max_iotime(cfq_bc->iotime, cfqd->max_iotime - slice);
+
+	if (cfq_bc != cfqd->active_cfq_bc) {
+		bc_insert(cfqd, cfq_bc);
+		bc_wait_start(cfq_bc, jiffies);
+	}
+}
+
+static void bc_dequeue(struct cfq_data *cfqd, struct cfq_bc_data *cfq_bc)
+{
+	if (cfq_bc != cfqd->active_cfq_bc)
+		bc_remove(cfqd, cfq_bc);
+	if (cfq_bc->wait_start)
+		bc_wait_stop(cfq_bc, jiffies);
+}
+
+/* update bc iotime */
+static void bc_update(struct cfq_data *cfqd, struct cfq_bc_data *cfq_bc,
+		unsigned long delta)
+{
+	int ioprio;
+
+	ioprio = cfq_bc->ub_iopriv->ioprio;
+	delta *= bc_ioprio_ratio[ioprio];
+	cfq_bc->iotime += delta;
+
+	cfqd->max_iotime = max_iotime(cfqd->max_iotime, cfq_bc->iotime);
+}
+
+int bc_allow_preempt(struct cfq_data *cfqd, struct cfq_bc_data *new)
+{
+	struct cfq_bc_data *cur = cfqd->active_cfq_bc;
+	int cur_ioprio, new_ioprio;
+	unsigned long slice, delta;
+	unsigned long now = jiffies;
+
+	if (!cur)
+		return 1;
+
+	cur_ioprio = cur->ub_iopriv->ioprio;
+	new_ioprio = new->ub_iopriv->ioprio;
+
+	delta = (now - cfqd->slice_begin) * bc_ioprio_ratio[cur_ioprio];
+	slice = cfqd->cfq_ub_slice * bc_ioprio_ratio[new_ioprio] *
+		(UB_IOPRIO_MAX + cur_ioprio - new_ioprio) / (UB_IOPRIO_MAX * 2);
+	/* allow preempting only if new have iotime gap */
+	if (cmp_iotime(cur->iotime + delta, new->iotime + slice) < 0)
+		return 0;
+
+	return 1;
+}
+
+void bc_set_active(struct cfq_data *cfqd, struct cfq_bc_data *next_cfq_bc)
+{
+	struct cfq_bc_data *cfq_bc;
+	unsigned long now = jiffies;
+	unsigned long used_slice;
+
+	used_slice = now - cfqd->slice_begin;
+
+	/* update iotime of last active bc according to used time */
+	cfq_bc = cfqd->active_cfq_bc;
+	if (cfq_bc)
+		bc_update(cfqd, cfq_bc, used_slice);
+
+	/* peek first bc from queue if not forced */
+	if (!next_cfq_bc)
+		next_cfq_bc = bc_pick_next(cfqd, cfq_bc);
+
+	if (cfq_bc && cfq_bc != next_cfq_bc && cfq_bc->rqnum)
+		bc_insert(cfqd, cfq_bc);
+
+	if (cfq_bc) {
+		write_seqcount_begin(&cfq_bc->stat_lock);
+		if (cfq_bc != next_cfq_bc && cfq_bc->rqnum)
+			cfq_bc->wait_start = now;
+		cfq_bc->used_time += used_slice;
+		cfqd->slice_begin = 0;
+		write_seqcount_end(&cfq_bc->stat_lock);
+	}
+
+	cfqd->active_cfq_bc = next_cfq_bc;
+
+	if (!next_cfq_bc)
+		return;
+
+	next_cfq_bc->activations_count++;
+
+	cfqd->slice_begin = now;
+	cfqd->slice_end = now + cfqd->cfq_ub_slice;
+
+	if (next_cfq_bc != cfq_bc && next_cfq_bc->rqnum) {
+		bc_remove(cfqd, next_cfq_bc);
+		bc_wait_stop(next_cfq_bc, now);
+	}
+}
+
+void bc_schedule_active(struct cfq_data *cfqd)
+{
+	if (bc_expired(cfqd) || !cfqd->active_cfq_bc ||
+				bc_empty(cfqd->active_cfq_bc))
+		bc_set_active(cfqd, NULL);
+}
+
+void bc_inc_rqnum(struct cfq_queue *cfqq)
+{
+	struct cfq_bc_data *cfq_bc;
+
+	cfq_bc = cfqq->cfq_bc;
+
+	if (!cfq_bc->rqnum)
+		bc_enqueue(cfq_bc->cfqd, cfq_bc);
+
+	cfq_bc->rqnum++;
+}
+
+void bc_dec_rqnum(struct cfq_queue *cfqq)
+{
+	struct cfq_bc_data *cfq_bc;
+
+	cfq_bc = cfqq->cfq_bc;
+
+	cfq_bc->rqnum--;
+
+	if (!cfq_bc->rqnum)
+		bc_dequeue(cfq_bc->cfqd, cfq_bc);
+}
+
+unsigned long bc_set_ioprio(int ubid, int ioprio)
+{
+	struct user_beancounter *ub;
+
+	if (ioprio < UB_IOPRIO_MIN || ioprio >= UB_IOPRIO_MAX)
+		return -ERANGE;
+
+	ub = get_beancounter_byuid(ubid, 0);
+ 	if (!ub)
+		return -ESRCH;
+
+	ub->iopriv.ioprio = ioprio;
+	put_beancounter(ub);
+ 
+	return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static int bc_ioprio_show(struct seq_file *f, void *v)
+{
+	struct user_beancounter *bc;
+
+	bc = seq_beancounter(f);
+	seq_printf(f, "prio: %u\n", bc->iopriv.ioprio);
+
+	return 0;
+}
+
+static struct bc_proc_entry bc_ioprio_entry = {
+	.name = "ioprio",
+	.u.show = bc_ioprio_show,
+};
+
+static int bc_ioprio_queue_show(struct seq_file *f, void *v)
+{
+	struct user_beancounter *bc;
+	struct cfq_bc_data *cfq_bc;
+	unsigned long now = jiffies;
+
+	bc = seq_beancounter(f);
+
+	read_lock_irq(&bc->iopriv.cfq_bc_list_lock);
+	list_for_each_entry(cfq_bc, &bc->iopriv.cfq_bc_head, cfq_bc_list) {
+		struct cfq_data *cfqd;
+		struct kobject *parent;
+
+		cfqd = cfq_bc->cfqd;
+		parent = cfqd->queue->kobj.parent;
+		seq_printf(f, "\t%-10s%6lu %c%c %10u\n",
+				/*
+				 * this per-bc -> queue-data -> queue -> device
+				 * access is safe w/o additional locks, since
+				 * all the stuff above dies in the order shown
+				 * and we're holding the first element
+				 */
+				parent ? kobject_name(parent) : "?",
+				cfq_bc->rqnum,
+				cfq_bc->on_dispatch ? 'D' : ' ',
+				cfqd->active_cfq_bc == cfq_bc ? 'A' : ' ',
+				bc_wait_time(cfq_bc, now));
+	}
+	read_unlock_irq(&bc->iopriv.cfq_bc_list_lock);
+
+	return 0;
+}
+
+static struct bc_proc_entry bc_ioprio_queues_entry = {
+	.name = "ioprio_queues",
+	.u.show = bc_ioprio_queue_show,
+};
+
+static int bc_iostat(struct seq_file *f, struct user_beancounter *bc)
+{
+	struct cfq_bc_data *cfq_bc;
+
+	read_lock_irq(&bc->iopriv.cfq_bc_list_lock);
+	list_for_each_entry(cfq_bc, &bc->iopriv.cfq_bc_head, cfq_bc_list) {
+		struct cfq_data *cfqd;
+		struct kobject *parent;
+		const char *disk_name;
+		unsigned wait_time, used_time, wait_start, slice_begin;
+		struct cfq_bc_data *active_cfq_bc;
+		char state;
+		unsigned seq;
+
+		cfqd = cfq_bc->cfqd;
+		parent = cfqd->queue->kobj.parent;
+		disk_name = parent ? kobject_name(parent) : "none";
+
+		do {
+			seq = read_seqcount_begin(&cfq_bc->stat_lock);
+			active_cfq_bc = cfqd->active_cfq_bc;
+			wait_time = cfq_bc->wait_time;
+			used_time = cfq_bc->used_time;
+			wait_start = cfq_bc->wait_start;
+			slice_begin = cfqd->slice_begin;
+		} while (read_seqcount_retry(&cfq_bc->stat_lock, seq));
+
+		if (active_cfq_bc == cfq_bc) {
+			state = 'A';
+			if (slice_begin)
+				used_time += jiffies - slice_begin;
+		} else if (cfq_bc->rqnum) {
+			state = 'W';
+			if (wait_start)
+				wait_time += jiffies - wait_start;
+		} else
+			state = 'I';
+
+		seq_printf(f, "%s %u %c %lu %lu %lu %u %u %lu %lu\n",
+				disk_name, (unsigned)bc->ub_uid, state,
+				cfq_bc->rqnum, cfq_bc->on_dispatch,
+				cfq_bc->activations_count,
+				jiffies_to_msecs(wait_time),
+				jiffies_to_msecs(used_time),
+				cfq_bc->requests_dispatched,
+				cfq_bc->sectors_dispatched);
+	}
+	read_unlock_irq(&bc->iopriv.cfq_bc_list_lock);
+
+	return 0;
+}
+
+static int bc_iostat_single(struct seq_file *f, void *v)
+{
+	return bc_iostat(f, seq_beancounter(f));
+}
+
+static struct bc_proc_entry bc_iostat_entry = {
+	.name = "iostat",
+	.u.show = bc_iostat_single,
+};
+
+static void *bc_iostat_start(struct seq_file *f, loff_t *ppos)
+{
+	struct user_beancounter *ub;
+	unsigned long pos = *ppos;
+
+	rcu_read_lock();
+	for_each_beancounter(ub) {
+		if (ub->parent != NULL)
+			continue;
+		if (!pos--)
+			return ub;
+	}
+	return NULL;
+}
+
+static void *bc_iostat_next(struct seq_file *f, void *v, loff_t *ppos)
+{
+	struct user_beancounter *ub = v;
+	struct list_head *entry;
+
+	entry = &ub->ub_list;
+	list_for_each_continue_rcu(entry, &ub_list_head) {
+		ub = list_entry(entry, struct user_beancounter, ub_list);
+		if (ub->parent != NULL)
+			continue;
+		(*ppos)++;
+		return ub;
+	}
+	return NULL;
+}
+
+static int bc_iostat_show(struct seq_file *f, void *v)
+{
+	return bc_iostat(f, v);
+}
+
+static void bc_iostat_stop(struct seq_file *f, void *v)
+{
+	rcu_read_unlock();
+}
+
+static struct seq_operations iostat_seq_ops = {
+	.start = bc_iostat_start,
+	.next  = bc_iostat_next,
+	.stop  = bc_iostat_stop,
+	.show  = bc_iostat_show,
+};
+
+static int bc_iostat_open(struct inode *inode, struct file *filp)
+{
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return -EACCES;
+
+	return seq_open(filp, &iostat_seq_ops);
+}
+
+static struct file_operations bc_iostat_ops = {
+	.open		= bc_iostat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct bc_proc_entry bc_root_iostat_entry = {
+	.name = "iostat",
+	.u.fops = &bc_iostat_ops,
+};
+
+static int __init bc_ioprio_init(void)
+{
+	bc_register_proc_entry(&bc_ioprio_entry);
+	bc_register_proc_entry(&bc_ioprio_queues_entry);
+	bc_register_proc_entry(&bc_iostat_entry);
+	bc_register_proc_root_entry(&bc_root_iostat_entry);
+	return 0;
+}
+
+late_initcall(bc_ioprio_init);
+#endif
+
+EXPORT_SYMBOL(__find_cfq_bc);
+EXPORT_SYMBOL(bc_fini_ioprio);
+EXPORT_SYMBOL(bc_init_ioprio);
+EXPORT_SYMBOL(bc_findcreate_cfq_bc);
+EXPORT_SYMBOL(bc_cfq_exit_queue);
+EXPORT_SYMBOL(bc_expired);
+EXPORT_SYMBOL(bc_set_active);
+EXPORT_SYMBOL(bc_schedule_active);
+EXPORT_SYMBOL(bc_inc_rqnum);
+EXPORT_SYMBOL(bc_dec_rqnum);
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/Kconfig kernel-2.6.18-417.el5-028stab121/kernel/ub/Kconfig
--- kernel-2.6.18-417.el5.orig/kernel/ub/Kconfig	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/Kconfig	2017-01-13 08:40:18.000000000 -0500
@@ -0,0 +1,105 @@
+#
+# User resources part (UBC)
+#
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+#
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+menu "User resources"
+
+config USER_RESOURCE
+	bool "Enable user resource accounting"
+	default y
+	help 
+          This patch provides accounting and allows to configure
+          limits for user's consumption of exhaustible system resources.
+          The most important resource controlled by this patch is unswappable 
+          memory (either mlock'ed or used by internal kernel structures and 
+          buffers). The main goal of this patch is to protect processes
+          from running short of important resources because of an accidental
+          misbehavior of processes or malicious activity aiming to ``kill'' 
+          the system. It's worth to mention that resource limits configured 
+          by setrlimit(2) do not give an acceptable level of protection 
+          because they cover only small fraction of resources and work on a 
+          per-process basis.  Per-process accounting doesn't prevent malicious
+          users from spawning a lot of resource-consuming processes.
+
+config USER_RSS_ACCOUNTING
+	bool "Account physical memory usage"
+	default y
+	depends on USER_RESOURCE
+	help
+          This allows to estimate per beancounter physical memory usage.
+          Implemented alghorithm accounts shared pages of memory as well,
+          dividing them by number of beancounter which use the page.
+
+config UBC_IO_ACCT
+	bool "Account disk I/O"
+	default y
+	depends on USER_RESOURCE
+	help
+	  When on this option allows seeing disk IO activity caused by
+	  tasks from each UB
+
+config UBC_IO_PRIO
+	bool "Disk I/O priority"
+	default y
+	depends on UBC_IO_ACCT && IOSCHED_CFQ
+	help
+	  This option controls whether to build CFQ I/O scheduler
+	  with support of beancounter I/O priority.
+
+config USER_SWAP_ACCOUNTING
+	bool "Account swap usage"
+	default y
+	depends on USER_RESOURCE
+	help
+          This allows accounting of swap usage.
+
+config USER_RESOURCE_PROC
+	bool "Report resource usage in /proc"
+	default y
+	depends on USER_RESOURCE
+	help
+          Allows a system administrator to inspect resource accounts and limits.
+
+config UBC_DEBUG
+	bool "User resources debug features"
+	default n
+	depends on USER_RESOURCE
+	help
+	  Enables to setup debug features for user resource accounting
+
+config UBC_DEBUG_KMEM
+	bool "Debug kmemsize with cache counters"
+	default n
+	depends on UBC_DEBUG
+	help
+	  Adds /proc/user_beancounters_debug entry to get statistics
+	  about cache usage of each beancounter
+
+config UBC_KEEP_UNUSED
+	bool "Keep unused beancounter alive"
+	default y
+	depends on UBC_DEBUG
+	help
+	  If on, unused beancounters are kept on the hash and maxheld value
+	  can be looked through.
+
+config UBC_DEBUG_ITEMS
+	bool "Account resources in items rather than in bytes"
+	default y
+	depends on UBC_DEBUG
+	help
+	  When true some of the resources (e.g. kmemsize) are accounted
+	  in items instead of bytes.
+
+config UBC_UNLIMITED
+	bool "Use unlimited ubc settings"
+	default y
+	depends on UBC_DEBUG
+	help
+	  When ON all limits and barriers are set to max values.
+
+endmenu
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/Makefile kernel-2.6.18-417.el5-028stab121/kernel/ub/Makefile
--- kernel-2.6.18-417.el5.orig/kernel/ub/Makefile	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/Makefile	2017-01-13 08:40:18.000000000 -0500
@@ -0,0 +1,16 @@
+#
+# User resources part (UBC)
+#
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+#
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+obj-y := ub_sys.o beancounter.o ub_dcache.o ub_mem.o ub_misc.o \
+	 ub_pages.o ub_stat.o ub_oom.o
+
+obj-$(CONFIG_NET) += ub_net.o
+obj-$(CONFIG_USER_RSS_ACCOUNTING) += ub_page_bc.o
+obj-$(CONFIG_USER_RESOURCE_PROC)  += ub_proc.o
+obj-$(CONFIG_UBC_IO_ACCT) += io_acct.o
+obj-$(CONFIG_UBC_IO_PRIO) += io_prio.o
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/ub_dcache.c kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_dcache.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/ub_dcache.c	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_dcache.c	2017-01-13 08:40:18.000000000 -0500
@@ -0,0 +1,676 @@
+/*
+ *  kernel/ub/ub_dcache.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/dcache.h>
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/fs.h>
+#include <linux/kmem_slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <linux/swap.h>
+#include <linux/stop_machine.h>
+#include <linux/cpumask.h>
+#include <linux/nmi.h>
+#include <linux/rwsem.h>
+#include <linux/rcupdate.h>
+#include <asm/bitops.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/ub_dcache.h>
+#include <ub/ub_dcache_op.h>
+
+/*
+ * Locking
+ *                          traverse  dcache_lock  d_lock
+ *        ub_dentry_charge   +         -            +
+ *      ub_dentry_uncharge   +         +            -
+ * ub_dentry_charge_nofail   +         +            -
+ *
+ * d_inuse changes are atomic, with special handling of "not in use" <->
+ * "in use" (-1 <-> 0) transitions.  We have two sources of non-atomicity
+ * here: (1) in many operations we need to change d_inuse of both dentry and
+ * its parent, and (2) on state transitions we need to adjust the account.
+ *
+ * Regarding (1): we do not have (and do not want) a single lock covering all
+ * operations, so in general it's impossible to get a consistent view of
+ * a tree with respect to d_inuse counters (except by swsuspend).  It also
+ * means if a dentry with d_inuse of 0 gets one new in-use child and loses
+ * one, it's d_inuse counter will go either 0 -> 1 -> 0 path or 0 -> -1 -> 0,
+ * and we can't say which way.
+ * Note that path -1 -> 0 -> -1 can't turn into -1 -> -2 -> -1, since
+ * uncharge can be done only after return from charge (with d_genocide being
+ * the only apparent exception).
+ * Regarding (2): there is a similar uncertainty with the dcache account.
+ * If the account is equal to the limit, one more dentry is started to be
+ * used and one is put, the account will either hit the limit (and an error
+ * will be returned), or decrement will happen before increment.
+ *
+ * These races do not really matter.
+ * The only things we want are:
+ *  - if a system is suspenede with no in-use dentries, all d_inuse counters
+ *    should be correct (-1);
+ *  - d_inuse counters should always be >= -1.
+ * This holds if ->parent references are accessed and maintained properly.
+ * In subtle moments (like d_move) dentries exchanging their parents should
+ * both be in-use.  At d_genocide time, lookups and charges are assumed to be
+ * impossible.
+ */
+
+/*
+ * Hierarchical accounting
+ * UB argument must NOT be NULL
+ */
+
+static int do_charge_dcache(struct user_beancounter *ub, unsigned long size, 
+		enum ub_severity sv)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size), sv))
+		goto out_mem;
+	if (__charge_beancounter_locked(ub, UB_DCACHESIZE, size, sv))
+		goto out_dcache;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return 0;
+
+out_dcache:
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
+out_mem:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return -ENOMEM;
+}
+
+static void do_uncharge_dcache(struct user_beancounter *ub, 
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
+	__uncharge_beancounter_locked(ub, UB_DCACHESIZE, size);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static int charge_dcache(struct user_beancounter *ub, unsigned long size, 
+		enum ub_severity sv)
+{
+	struct user_beancounter *p, *q;
+
+	for (p = ub; p != NULL; p = p->parent) {
+		if (do_charge_dcache(p, size, sv))
+			goto unroll;
+	}
+	return 0;
+
+unroll:
+	for (q = ub; q != p; q = q->parent)
+		do_uncharge_dcache(q, size);
+	return -ENOMEM;
+}
+
+void uncharge_dcache(struct user_beancounter *ub, unsigned long size)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_uncharge_dcache(ub, size);
+}
+
+/*
+ * Simple helpers to do maintain account and d_ub field.
+ */
+
+static inline int d_charge(struct dentry_beancounter *d_bc)
+{
+	struct user_beancounter *ub;
+
+	ub = get_beancounter(get_exec_ub());
+	if (charge_dcache(ub, d_bc->d_ubsize, UB_SOFT)) {
+		put_beancounter(ub);
+		return -1;
+	}
+	d_bc->d_ub = ub;
+	return 0;
+}
+
+static inline void d_forced_charge(struct dentry_beancounter *d_bc)
+{
+	struct user_beancounter *ub;
+
+	ub = get_beancounter(get_exec_ub());
+	charge_dcache(ub, d_bc->d_ubsize, UB_FORCE);
+	d_bc->d_ub = ub;
+}
+
+/*
+ * Minor helpers
+ */
+
+extern kmem_cache_t *dentry_cache; 
+extern kmem_cache_t *inode_cachep;
+static struct rw_semaphore ub_dentry_alloc_sem;
+
+static inline unsigned int dentry_memusage(void)
+{
+	return dentry_cache->objuse;
+}
+
+static inline unsigned int inode_memusage(void)
+{
+	return inode_cachep->objuse;
+}
+
+static inline unsigned long d_charge_size(struct dentry *dentry)
+{
+	/* dentry's d_name is already set to appropriate value (see d_alloc) */
+	return inode_cachep->objuse + dentry_cache->objuse +
+		(dname_external(dentry) ?
+		 kmem_obj_memusage((void *)dentry->d_name.name) : 0);
+}
+
+/*
+ * Entry points from dcache.c
+ */
+
+/* 
+ * Set initial d_inuse on d_alloc.
+ * Called with no locks, preemption disabled.
+ */
+int __ub_dentry_alloc(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+
+	d_bc = &dentry->dentry_bc;
+	d_bc->d_ub = get_beancounter(get_exec_ub());
+	atomic_set(&d_bc->d_inuse, INUSE_INIT); /* see comment in ub_dcache.h */
+	d_bc->d_ubsize = d_charge_size(dentry);
+
+	if (charge_dcache(d_bc->d_ub, d_bc->d_ubsize, UB_HARD))
+		goto failure;
+	return 0;
+
+failure:
+	put_beancounter(d_bc->d_ub);
+	d_bc->d_ub = NULL;
+	return -ENOMEM;
+}
+void __ub_dentry_alloc_start(void)
+{
+	down_read(&ub_dentry_alloc_sem);
+	current->task_bc.dentry_alloc = 1;
+}
+
+void __ub_dentry_alloc_end(void)
+{
+	current->task_bc.dentry_alloc = 0;
+	up_read(&ub_dentry_alloc_sem);
+}
+
+/*
+ * It is assumed that parent is already in use, so traverse upwards is
+ * limited to one ancestor only.
+ * Called under d_lock and rcu_read_lock.
+ */
+int __ub_dentry_charge(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+	struct dentry *parent;
+	int ret;
+
+	if (ub_dget_testone(dentry)) {
+		d_bc = &dentry->dentry_bc;
+		/* state transition -1 => 0 */
+		if (d_charge(d_bc))
+			goto failure;
+
+		if (dentry != dentry->d_parent) {
+			parent = dentry->d_parent;
+			if (ub_dget_testone(parent))
+				BUG();
+		}
+	}
+	return 0;
+
+failure:
+	/*
+	 * Here we would like to fail the lookup.
+	 * It is not easy: if d_lookup fails, callers expect that a dentry
+	 * with the given name doesn't exist, and create a new one.
+	 * So, first we forcedly charge for this dentry.
+	 * Then try to remove it from cache safely.  If it turns out to be
+	 * possible, we can return error.
+	 */
+	d_forced_charge(d_bc);
+
+	if (dentry != dentry->d_parent) {
+		parent = dentry->d_parent;
+		if (ub_dget_testone(parent))
+			BUG();
+	}
+
+	ret = 0;
+	if (spin_trylock(&dcache_lock)) {
+		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&dentry->d_lock);
+			spin_unlock(&dcache_lock);
+			rcu_read_unlock();
+			shrink_dcache_parent(dentry);
+			rcu_read_lock();
+			spin_lock(&dcache_lock);
+			spin_lock(&dentry->d_lock);
+		}
+		if (atomic_read(&dentry->d_count) == 1) {
+			__d_drop(dentry);
+			ret = -1;
+		}
+		spin_unlock(&dcache_lock);
+	}
+
+	return ret;
+}
+
+/*
+ * Go up in the tree decreasing d_inuse.
+ * Called under dcache_lock.
+ */
+void __ub_dentry_uncharge(struct dentry *dentry)
+{
+	struct dentry *parent;
+	struct user_beancounter *ub;
+	unsigned long size;
+
+	/* go up until state doesn't change or and root is reached */
+	size = dentry->dentry_bc.d_ubsize;
+	ub = dentry->dentry_bc.d_ub;
+	while (ub_dput_testzero(dentry)) {
+		/* state transition 0 => -1 */
+		uncharge_dcache(ub, size);
+		put_beancounter(ub);
+
+		parent = dentry->d_parent;
+		if (dentry == parent)
+			break;
+
+		dentry = parent;
+		size = dentry->dentry_bc.d_ubsize;
+		ub = dentry->dentry_bc.d_ub;
+	}
+}
+
+/* 
+ * Forced charge for __dget_locked, where API doesn't allow to return error.
+ * Called under dcache_lock.
+ */
+void __ub_dentry_charge_nofail(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	while (ub_dget_testone(dentry)) {
+		/* state transition -1 => 0 */
+		d_forced_charge(&dentry->dentry_bc);
+
+		parent = dentry->d_parent;
+		if (dentry == parent)
+			break;
+		dentry = parent;
+	}
+}
+
+/*
+ * Adaptive accounting
+ */
+
+int ub_dentry_on;
+int ub_dentry_alloc_barrier;
+EXPORT_SYMBOL(ub_dentry_on);
+
+static DEFINE_PER_CPU(int, checkcnt);
+static unsigned long checklowat = 0;
+static unsigned long checkhiwat = ULONG_MAX;
+
+static int sysctl_ub_dentry_chk = 10;
+#define sysctl_ub_lowat	sysctl_ub_watermark[0]
+#define sysctl_ub_hiwat sysctl_ub_watermark[1]
+static DECLARE_RWSEM(ub_dentry_alloc_sem);
+/* 1024th of lowmem size */
+static unsigned int sysctl_ub_watermark[2] = {0, 100};
+
+
+static int ub_dentry_acctinit(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+
+	d_bc = &dentry->dentry_bc;
+	d_bc->d_ub = NULL;
+	atomic_set(&d_bc->d_inuse, -1);
+	if (dname_external(dentry)) {
+		struct page *page;
+		page = virt_to_page(dentry->d_name.name);
+		if (!PageSlab(page) || page_get_cache(page) == NULL) {
+			printk("Problem with name, dentry %p, parent %p, "
+					"name %p len %d\n",
+					dentry, dentry->d_parent,
+					dentry->d_name.name,
+					dentry->d_name.len);
+			printk("   de %p name %.10s\n",
+					dentry, dentry->d_name.name);
+			d_bc->d_ubsize = 0;
+			return 0;
+		}
+	}
+	d_bc->d_ubsize = d_charge_size(dentry);
+	return 0;
+}
+
+static int ub_dentry_acctcount(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+	struct dentry *child;
+	int count;
+
+	count = 0;
+	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
+		count++;
+
+	d_bc = &dentry->dentry_bc;
+	count = atomic_read(&dentry->d_count) - count;
+	if (count) {
+		__ub_dentry_charge_nofail(dentry);
+		if (count > 1)
+			atomic_add(count - 1, &d_bc->d_inuse);
+	}
+
+	return 0;
+}
+
+static int ub_dentry_acctdrop(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+
+	d_bc = &dentry->dentry_bc;
+	if (atomic_read(&d_bc->d_inuse) < 0)
+		return 0;
+	atomic_set(&d_bc->d_inuse, -1);
+	uncharge_dcache(d_bc->d_ub, d_bc->d_ubsize);
+	put_beancounter(d_bc->d_ub);
+	return 0;
+}
+
+extern void kmem_cache_free_block(kmem_cache_t *cachep,
+		struct kmem_list3 *l3, void **objpp,
+		int nr_objects, int node);
+
+static int ub_dentry_walk_node(int (*fun)(struct dentry *), int node)
+{
+	kmem_cache_t *cachep;
+	struct array_cache *ac;
+	struct slab *slabp;
+	char *objp;
+	int cpu, i, sz, r, n;
+	struct kmem_list3 *l3;
+	unsigned long map[PAGE_SIZE / sizeof(struct dentry)
+					/ BITS_PER_LONG + 1];
+
+	cachep = dentry_cache;
+	if (cachep->num >= sizeof(map) * 8)
+		return -E2BIG;
+
+	l3 = cachep->nodelists[node];
+	/* drain all CPU caches to have up-to-date free map */
+
+#ifdef CONFIG_NUMA
+	/* walk through all nodes and drain alien caches */
+	for_each_online_node (n) {
+		if (!cachep->nodelists[n]->alien)
+			continue;
+		ac = cachep->nodelists[n]->alien[node];
+		if (!ac)
+			continue;
+		kmem_cache_free_block(cachep, cachep->nodelists[node],
+				ac->entry, ac->avail, node);
+		ac->avail = 0;
+	}
+#endif
+
+	ac = l3->shared;
+	kmem_cache_free_block(cachep, l3, ac->entry, ac->avail, node);
+	ac->avail = 0;
+	for_each_online_cpu(cpu) {
+		ac = cachep->array[cpu];
+		n = cpu_to_node(cpu);
+		kmem_cache_free_block(cachep, cachep->nodelists[n],
+				ac->entry, ac->avail, n);
+		ac->avail = 0;
+	}
+
+	list_for_each_entry(slabp, &l3->slabs_full, list) {
+		touch_nmi_watchdog();
+		for (i = 0, objp = slabp->s_mem;
+		     i < cachep->num;
+		     i++, objp += cachep->buffer_size) {
+#if SLAB_DEBUG
+			r = (*fun)((struct dentry *)
+					(objp + cachep->obj_offset));
+#else
+			r = (*fun)((struct dentry *)objp);
+#endif
+			if (r)
+				return r;
+		}
+	}
+
+	list_for_each_entry(slabp, &l3->slabs_partial, list) {
+		touch_nmi_watchdog();
+		memset(map, 0xff, sizeof(map));
+		for (i = slabp->free, r = 0;
+		     i != BUFCTL_END;
+		     i = slab_bufctl(slabp)[i], r++) {
+			if (r > cachep->num)
+				return -1;
+			__clear_bit(i, map);
+		}
+		sz = sizeof(map) * BITS_PER_LONG;
+		for (i = find_first_bit(map, sz);
+		     i < cachep->num;
+		     i = find_next_bit(map, sz, i + 1)) {
+			objp = slabp->s_mem + i * cachep->buffer_size;
+#if SLAB_DEBUG
+			r = (*fun)((struct dentry *)
+					(objp + cachep->obj_offset));
+#else
+			r = (*fun)((struct dentry *)objp);
+#endif
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
+static int ub_dentry_walk(int (*fun)(struct dentry *))
+{
+	int node;
+	int err;
+
+	for_each_online_node (node) {
+		if ((err = ub_dentry_walk_node(fun, node)) != 0)
+			return err;
+	}
+	return 0;
+}
+
+static int ub_dentry_accton(void *data)
+{
+	struct user_beancounter *ub;
+	int err;
+
+	ub = get_exec_ub();
+	set_exec_ub(get_ub0());
+	err = ub_dentry_walk(&ub_dentry_acctinit);
+	if (!err)
+		err = ub_dentry_walk(&ub_dentry_acctcount);
+	set_exec_ub(ub);
+	if (err == 0)
+		ub_dentry_on = 1;
+	return err;
+}
+
+static int ub_dentry_acctoff(void *data)
+{
+	int ret;
+	ret = ub_dentry_walk(&ub_dentry_acctdrop);
+	if (ret == 0)
+		ub_dentry_on = 0;
+	return ret;
+}
+
+/*
+ * Main function turning dcache accounting on and off.
+ * Called with preemption disabled (for caller's convenience).
+ */
+static void ub_dentry_switch(int onoff, unsigned long pages, int (*fun)(void *))
+{
+	static char *s[] = { "off", "on" };
+	unsigned long start_jiffies;
+	int err, tm;
+
+	start_jiffies = jiffies;
+	preempt_enable();
+	ub_dentry_alloc_barrier = 1;
+	/* ensure ub_dentry_alloc_barrier is visible on all CPUs */
+	mb();
+	synchronize_rcu();
+	down_write(&ub_dentry_alloc_sem);
+	if (ub_dentry_on == onoff)
+		goto done;
+
+	printk("UBC: preparing to turn dcache accounting %s, "
+			"size %lu pages, watermarks %lu %lu\n",
+			s[onoff], pages, checklowat, checkhiwat);
+	err = stop_machine_run(fun, NULL, NR_CPUS);
+	if (err) {
+		printk(KERN_ERR "UBC: ERROR: dcache accounting switch %d\n",
+				err);
+		preempt_disable();
+		checklowat = 0;
+		checkhiwat = ULONG_MAX;
+		sysctl_ub_dentry_chk = INT_MAX;
+		preempt_enable();
+	} else {
+		tm = jiffies_to_msecs(jiffies - start_jiffies);
+		printk("UBC: turning dcache accounting %s succeeded, "
+				"usage %lu, time %u.%03u\n",
+				s[onoff],
+				get_ub0()->ub_parms[UB_DCACHESIZE].held,
+				tm / 1000, tm % 1000);
+	}
+
+done:
+	ub_dentry_alloc_barrier = 0;
+	up_write(&ub_dentry_alloc_sem);
+	preempt_disable();
+}
+
+void ub_dentry_checkup(void)
+{
+	int *p;
+	unsigned long pages;
+
+	preempt_disable();
+	p = &__get_cpu_var(checkcnt);
+	if (++*p > sysctl_ub_dentry_chk) {
+		*p = 0;
+		pages = dentry_cache->grown
+			- dentry_cache->reaped
+			- dentry_cache->shrunk;
+		pages <<= dentry_cache->gfporder;
+		if (ub_dentry_on) {
+			if (pages < checklowat)
+				ub_dentry_switch(0, pages, &ub_dentry_acctoff);
+		} else {
+			if (pages >= checkhiwat)
+				ub_dentry_switch(1, pages, &ub_dentry_accton);
+		}
+	}
+	preempt_enable();
+}
+
+static void ub_dentry_set_limits(unsigned long pages, unsigned long cap)
+{
+	down_write(&ub_dentry_alloc_sem);
+	preempt_disable();
+	checklowat = (pages >> 10) * sysctl_ub_lowat;
+	checkhiwat = (pages >> 10) * sysctl_ub_hiwat;
+	if (checkhiwat > cap) {
+		checkhiwat = cap;
+		checklowat = cap / sysctl_ub_hiwat * sysctl_ub_lowat;
+	}
+	preempt_enable();
+	up_write(&ub_dentry_alloc_sem);
+}
+
+static int ub_dentry_proc_handler(ctl_table *ctl, int write, struct file *filp,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int r;
+
+	r = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	if (!r && write)
+		ub_dentry_set_limits(totalram_pages - totalhigh_pages,
+				ULONG_MAX);
+	return r;
+}
+
+static ctl_table ub_dentry_sysctl_table[] = {
+	{
+		.ctl_name	= 1000,
+		.procname	= "dentry_check",
+		.data		= &sysctl_ub_dentry_chk,
+		.maxlen		= sizeof(sysctl_ub_dentry_chk),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= 1001,
+		.procname	= "dentry_watermark",
+		.data		= &sysctl_ub_lowat,
+		.maxlen		= sizeof(sysctl_ub_lowat) * 2,
+		.mode		= 0644,
+		.proc_handler	= &ub_dentry_proc_handler,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table ub_dentry_sysctl_root[] = {
+	{
+		.ctl_name	= 23681,
+		.procname	= "ubc",
+		.mode		= 0555,
+		.child		= ub_dentry_sysctl_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static int __init ub_dentry_init(void)
+{
+	/*
+	 * Initial watermarks are limited, to limit walk time.
+	 * 384MB translates into 0.8 sec on PIII 866MHz.
+	 */
+	ub_dentry_set_limits(totalram_pages - totalhigh_pages,
+			384 * 1024 * 1024 / PAGE_SIZE);
+	if (register_sysctl_table(ub_dentry_sysctl_root, 0) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+__initcall(ub_dentry_init);
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/ub_mem.c kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_mem.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/ub_mem.c	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_mem.c	2017-01-13 08:40:31.000000000 -0500
@@ -0,0 +1,466 @@
+/*
+ *  kernel/ub/ub_mem.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/kmem_slab.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/swap.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/ub_page.h>
+#include <ub/ub_hash.h>
+#include <ub/proc.h>
+
+/*
+ * Initialization
+ */
+
+/*
+ * Slab accounting
+ */
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+
+#define CC_HASH_SIZE	1024
+static struct ub_cache_counter *cc_hash[CC_HASH_SIZE];
+spinlock_t cc_lock;
+
+static void __free_cache_counters(struct user_beancounter *ub,
+		kmem_cache_t *cachep)
+{
+	struct ub_cache_counter *cc, **pprev, *del;
+	int i;
+	unsigned long flags;
+
+	del = NULL;
+	spin_lock_irqsave(&cc_lock, flags);
+	for (i = 0; i < CC_HASH_SIZE; i++) {
+		pprev = &cc_hash[i];
+		cc = cc_hash[i];
+		while (cc != NULL) {
+			if (cc->ub != ub && cc->cachep != cachep) {
+				pprev = &cc->next;
+				cc = cc->next;
+				continue;
+			}
+
+			list_del(&cc->ulist);
+			*pprev = cc->next;
+			cc->next = del;
+			del = cc;
+			cc = *pprev;
+		}
+	}
+	spin_unlock_irqrestore(&cc_lock, flags);
+
+	while (del != NULL) {
+		cc = del->next;
+		kfree(del);
+		del = cc;
+	}
+}
+
+void ub_free_counters(struct user_beancounter *ub)
+{
+	__free_cache_counters(ub, NULL);
+}
+
+void ub_kmemcache_free(kmem_cache_t *cachep)
+{
+	__free_cache_counters(NULL, cachep);
+}
+
+void __init init_cache_counters(void)
+{
+	memset(cc_hash, 0, CC_HASH_SIZE * sizeof(cc_hash[0]));
+	spin_lock_init(&cc_lock);
+}
+
+#define cc_hash_fun(ub, cachep)	(				\
+	(((unsigned long)(ub) >> L1_CACHE_SHIFT) ^		\
+	 ((unsigned long)(ub) >> (BITS_PER_LONG / 2)) ^		\
+	 ((unsigned long)(cachep) >> L1_CACHE_SHIFT) ^		\
+	 ((unsigned long)(cachep) >> (BITS_PER_LONG / 2))	\
+	) & (CC_HASH_SIZE - 1))
+
+static int change_slab_charged(struct user_beancounter *ub,
+		kmem_cache_t *cachep, long val)
+{
+	struct ub_cache_counter *cc, *new_cnt, **pprev;
+	unsigned long flags;
+
+	new_cnt = NULL;
+again:
+	spin_lock_irqsave(&cc_lock, flags);
+	cc = cc_hash[cc_hash_fun(ub, cachep)];
+	while (cc) {
+		if (cc->ub == ub && cc->cachep == cachep)
+			goto found;
+		cc = cc->next;
+	}
+
+	if (new_cnt != NULL)
+		goto insert;
+
+	spin_unlock_irqrestore(&cc_lock, flags);
+
+	new_cnt = kmalloc(sizeof(*new_cnt), GFP_ATOMIC);
+	if (new_cnt == NULL)
+		return -ENOMEM;
+
+	new_cnt->counter = 0;
+	new_cnt->ub = ub;
+	new_cnt->cachep = cachep;
+	goto again;
+
+insert:
+	pprev = &cc_hash[cc_hash_fun(ub, cachep)];
+	new_cnt->next = *pprev;
+	*pprev = new_cnt;
+	list_add(&new_cnt->ulist, &ub->ub_cclist);
+	cc = new_cnt;
+	new_cnt = NULL;
+
+found:
+	cc->counter += val;
+	spin_unlock_irqrestore(&cc_lock, flags);
+	if (new_cnt)
+		kfree(new_cnt);
+	return 0;
+}
+
+static inline int inc_slab_charged(struct user_beancounter *ub,
+	kmem_cache_t *cachep)
+{
+	return change_slab_charged(ub, cachep, 1);
+}
+
+static inline void dec_slab_charged(struct user_beancounter *ub,
+	kmem_cache_t *cachep)
+{
+	if (change_slab_charged(ub, cachep, -1) < 0)
+		BUG();
+}
+
+#include <linux/vmalloc.h>
+
+#define inc_pages_charged(ub, order)	ub_percpu_add(ub, \
+					pages_charged, 1 << order)
+#define dec_pages_charged(ub, order)	ub_percpu_sub(ub, \
+					pages_charged, 1 << order)
+
+#ifdef CONFIG_PROC_FS
+static int bc_kmem_debug_show(struct seq_file *f, void *v)
+{
+	struct user_beancounter *ub;
+	struct ub_cache_counter *cc;
+	long pages, vmpages, pbcs;
+	int i;
+
+	ub = seq_beancounter(f);
+
+	pbcs = __ub_stat_get(ub, pbcs);
+	pages = vmpages = 0;
+	for_each_online_cpu(i) {
+		pages += per_cpu_ptr(ub->ub_percpu, i)->pages_charged;
+		vmpages += per_cpu_ptr(ub->ub_percpu, i)->vmalloc_charged;
+		pbcs += per_cpu_ptr(ub->ub_percpu, i)->pbcs;
+	}
+	if (pages < 0)
+		pages = 0;
+	if (vmpages < 0)
+		vmpages = 0;
+
+	seq_printf(f, bc_proc_lu_lu_fmt, "pages", pages, PAGE_SIZE);
+	seq_printf(f, bc_proc_lu_lu_fmt, "vmalloced", vmpages, PAGE_SIZE);
+	seq_printf(f, bc_proc_lu_lu_fmt, "pbcs", max(0l, pbcs),
+			sizeof(struct page_beancounter));
+
+	spin_lock_irq(&cc_lock);
+	list_for_each_entry (cc, &ub->ub_cclist, ulist) {
+		kmem_cache_t *cachep;
+
+		cachep = cc->cachep;
+		seq_printf(f, bc_proc_lu_lu_fmt,
+				cachep->name, cc->counter,
+				(unsigned long)cachep->objuse);
+	}
+	spin_unlock_irq(&cc_lock);
+	return 0;
+}
+
+static struct bc_proc_entry bc_kmem_debug_entry = {
+	.name = "kmem_debug",
+	.u.show = bc_kmem_debug_show,
+};
+
+static int __init bc_kmem_debug_init(void)
+{
+	bc_register_proc_entry(&bc_kmem_debug_entry);
+	return 0;
+}
+
+late_initcall(bc_kmem_debug_init);
+#endif
+
+#else
+#define inc_slab_charged(ub, cache)		(0)
+#define dec_slab_charged(ub, cache)		do { } while (0)
+#define inc_pages_charged(ub, cache) 		do { } while (0)
+#define dec_pages_charged(ub, cache)		do { } while (0)
+#endif
+
+static inline struct user_beancounter **slab_ub_ref(kmem_cache_t *cachep,
+		void *objp)
+{
+	struct slab *slabp;
+	int objnr;
+
+	BUG_ON(!(cachep->flags & SLAB_UBC));
+	slabp = virt_to_slab(objp);
+	objnr = (objp - slabp->s_mem) / cachep->buffer_size;
+	return slab_ubcs(cachep, slabp) + objnr;
+}
+
+struct user_beancounter *slab_ub(void *objp)
+{
+	struct user_beancounter **ub_ref;
+
+	ub_ref = slab_ub_ref(virt_to_cache(objp), objp);
+	return *ub_ref;
+}
+
+EXPORT_SYMBOL(slab_ub);
+
+#define UB_KMEM_QUANT	(PAGE_SIZE * 4)
+
+/* called with IRQ disabled */
+int ub_kmemsize_charge(struct user_beancounter *ub,
+		unsigned long size,
+		enum ub_severity strict)
+{
+	struct task_beancounter *tbc;
+
+	tbc = &current->task_bc;
+	if (ub != tbc->task_ub || size > UB_KMEM_QUANT)
+		goto just_charge;
+	if (tbc->kmem_precharged >= size) {
+		tbc->kmem_precharged -= size;
+		return 0;
+	}
+
+	if (charge_beancounter(ub, UB_KMEMSIZE, UB_KMEM_QUANT, UB_HARD) == 0) {
+		tbc->kmem_precharged += UB_KMEM_QUANT - size;
+		return 0;
+	}
+
+just_charge:
+	return charge_beancounter(ub, UB_KMEMSIZE, size, strict);
+}
+
+/* called with IRQ disabled */
+void ub_kmemsize_uncharge(struct user_beancounter *ub,
+		unsigned long size)
+{
+	struct task_beancounter *tbc;
+
+	if (size > UB_MAXVALUE) {
+		printk("ub_kmemsize_uncharge: size %lu\n", size);
+		dump_stack();
+	}
+
+	tbc = &current->task_bc;
+	if (ub != tbc->task_ub)
+		goto just_uncharge;
+
+	tbc->kmem_precharged += size;
+	if (tbc->kmem_precharged < UB_KMEM_QUANT * 2)
+		return;
+	size = tbc->kmem_precharged - UB_KMEM_QUANT;
+	tbc->kmem_precharged -= size;
+
+just_uncharge:
+	uncharge_beancounter(ub, UB_KMEMSIZE, size);
+}
+
+static inline int should_charge(kmem_cache_t *cachep, gfp_t flags)
+{
+	if (!(cachep->flags & SLAB_UBC))
+		return 0;
+	if ((cachep->flags & SLAB_NO_CHARGE) && !(flags & __GFP_UBC))
+		return 0;
+	return 1;
+}
+
+#define should_uncharge(cachep)	should_charge(cachep, __GFP_UBC)
+
+/* called with IRQ disabled */
+int ub_slab_charge(kmem_cache_t *cachep, void *objp, gfp_t flags)
+{
+	unsigned int size;
+	struct user_beancounter *ub;
+
+	if (!should_charge(cachep, flags))
+		return 0;
+
+	ub = get_beancounter_fast(get_exec_ub());
+	if (ub == NULL)
+		return 0;
+
+	size = CHARGE_SIZE(cachep->objuse);
+	if (ub_kmemsize_charge(ub, size,
+				(flags & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
+		goto out_err;
+
+	if (inc_slab_charged(ub, cachep) < 0) {
+		ub_kmemsize_uncharge(ub, size);
+		goto out_err;
+	}
+	*slab_ub_ref(cachep, objp) = ub;
+	return 0;
+
+out_err:
+	put_beancounter_fast(ub);
+	return -ENOMEM;
+}
+
+/* called with IRQ disabled */
+void ub_slab_uncharge(kmem_cache_t *cachep, void *objp)
+{
+	unsigned int size;
+	struct user_beancounter **ub_ref;
+
+	if (!should_uncharge(cachep))
+		return;
+
+	ub_ref = slab_ub_ref(cachep, objp);
+	if (*ub_ref == NULL)
+		return;
+
+	dec_slab_charged(*ub_ref, cachep);
+	size = CHARGE_SIZE(cachep->objuse);
+	ub_kmemsize_uncharge(*ub_ref, size);
+	put_beancounter_fast(*ub_ref);
+	*ub_ref = NULL;
+}
+
+/*
+ * Pages accounting
+ */
+
+int ub_page_charge(struct page *page, int order, gfp_t mask)
+{
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	ub = NULL;
+	if (!(mask & __GFP_UBC))
+		goto out;
+
+	ub = get_beancounter_fast(get_exec_ub());
+	if (ub == NULL)
+		goto out;
+
+	local_irq_save(flags);
+	if (ub_kmemsize_charge(ub, CHARGE_ORDER(order),
+				(mask & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
+		goto err;
+
+	inc_pages_charged(ub, order);
+	local_irq_restore(flags);
+out:
+	BUG_ON(page_ub(page) != NULL);
+	page_ub(page) = ub;
+	return 0;
+
+err:
+	local_irq_restore(flags);
+	BUG_ON(page_ub(page) != NULL);
+	put_beancounter_fast(ub);
+	return -ENOMEM;
+}
+
+void ub_page_uncharge(struct page *page, int order)
+{
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	ub = page_ub(page);
+	if (ub == NULL)
+		return;
+
+	if (unlikely(ub->ub_magic != UB_MAGIC)) {
+		struct page_beancounter *pb;
+
+		printk(KERN_EMERG "%s page: %p ub: %p magic: %lx\n",
+				__func__, page, ub, ub->ub_magic);
+		print_hex_dump(KERN_EMERG, "page: ", DUMP_PREFIX_ADDRESS, 32, 1,
+				page, sizeof(*page), 0);
+
+		if (ub->ub_magic != PB_MAGIC || ((long)ub & 1))
+			BUG();
+
+		pb = page->bc.page_pb;
+		do {
+			print_hex_dump(KERN_EMERG, "pb: ", DUMP_PREFIX_ADDRESS, 32, 1,
+				pb, sizeof(*pb), 0);
+			pb = list_entry(pb->page_list.next, struct page_beancounter, page_list);
+		} while (pb != page->bc.page_pb);
+		BUG();
+	}
+
+	dec_pages_charged(ub, order);
+	local_irq_save(flags);
+	ub_kmemsize_uncharge(ub, CHARGE_ORDER(order));
+	local_irq_restore(flags);
+	put_beancounter_fast(ub);
+	page_ub(page) = NULL;
+}
+
+/* 
+ * takes init_mm.page_table_lock 
+ * some outer lock to protect pages from vmalloced area must be held
+ */
+struct user_beancounter *vmalloc_ub(void *obj)
+{
+	struct page *pg;
+
+	pg = vmalloc_to_page(obj);
+	if (pg == NULL)
+		return NULL;
+
+	return page_ub(pg);
+}
+
+EXPORT_SYMBOL(vmalloc_ub);
+
+struct user_beancounter *mem_ub(void *obj)
+{
+	struct user_beancounter *ub;
+
+	if ((unsigned long)obj >= VMALLOC_START &&
+	    (unsigned long)obj  < VMALLOC_END)
+		ub = vmalloc_ub(obj);
+	else
+		ub = slab_ub(obj);
+
+	return ub;
+}
+
+EXPORT_SYMBOL(mem_ub);
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/ub_misc.c kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_misc.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/ub_misc.c	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_misc.c	2017-01-13 08:40:24.000000000 -0500
@@ -0,0 +1,461 @@
+/*
+ *  kernel/ub/ub_misc.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/kmem_cache.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/proc.h>
+
+#define UB_FILE_MINQUANT	3
+#define UB_FILE_MAXQUANT	10
+#define UB_FILE_INIQUANT	4
+
+static unsigned long ub_file_precharge(struct task_beancounter *task_bc,
+		struct user_beancounter *ub, unsigned long *kmemsize);
+
+static inline unsigned long ub_file_kmemsize(unsigned long nr)
+{
+	return CHARGE_SIZE(filp_cachep->objuse) * nr;
+}
+
+/*
+ * Task staff
+ */
+
+static void init_task_sub(struct task_struct *parent,
+		struct task_struct *tsk,
+  		struct task_beancounter *old_bc)
+{
+	struct task_beancounter *new_bc;
+	struct user_beancounter *sub;
+
+	new_bc = &tsk->task_bc;
+	sub = old_bc->fork_sub;
+	new_bc->fork_sub = get_beancounter(sub);
+	new_bc->task_fnode = NULL;
+	new_bc->task_freserv = old_bc->task_freserv;
+	old_bc->task_freserv = NULL;
+	memset(&new_bc->task_data, 0, sizeof(new_bc->task_data));
+	new_bc->pgfault_handle = 0;
+	new_bc->pgfault_allot = 0;
+}
+
+void ub_init_task_bc(struct task_beancounter *tbc)
+{
+	tbc->file_precharged = 0;
+	tbc->file_quant = UB_FILE_INIQUANT;
+	tbc->file_count = 0;
+
+	tbc->kmem_precharged = 0;
+	tbc->dentry_alloc = 0;
+}
+
+int ub_task_charge(struct task_struct *parent, struct task_struct *task)
+{
+	struct task_beancounter *old_bc;
+	struct task_beancounter *new_bc;
+	struct user_beancounter *ub, *pub;
+	unsigned long file_nr, kmemsize;
+	unsigned long flags;
+
+	old_bc = &parent->task_bc;
+	ub = old_bc->fork_sub;
+	new_bc = &task->task_bc;
+	new_bc->task_ub = get_beancounter(ub);
+	new_bc->exec_ub = get_beancounter(ub);
+
+	pub = top_beancounter(ub);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+	if (unlikely(__charge_beancounter_locked(pub, UB_NUMPROC,
+					1, UB_HARD) < 0))
+		goto out_numproc;
+
+	ub_init_task_bc(new_bc);
+	file_nr = ub_file_precharge(new_bc, pub, &kmemsize);
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+
+	charge_beancounter_notop(ub, UB_NUMPROC, 1);
+	if (likely(file_nr)) {
+		charge_beancounter_notop(ub, UB_NUMFILE, file_nr);
+		charge_beancounter_notop(ub, UB_KMEMSIZE, kmemsize);
+	}
+
+	init_task_sub(parent, task, old_bc);
+	return 0;
+
+out_numproc:
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+	__put_beancounter_batch(ub, 2);
+	return -ENOMEM;
+}
+
+extern atomic_t dbgpre;
+
+void ub_task_uncharge(struct task_struct *task)
+{
+	struct task_beancounter *task_bc;
+	struct user_beancounter *pub;
+	unsigned long file_nr, file_kmemsize;
+	unsigned long flags;
+
+	task_bc = &task->task_bc;
+	pub = top_beancounter(task_bc->task_ub);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+	__uncharge_beancounter_locked(pub, UB_NUMPROC, 1);
+	file_nr = task_bc->file_precharged;
+	if (likely(file_nr))
+		__uncharge_beancounter_locked(pub,
+				UB_NUMFILE, file_nr);
+
+	/* see comment in ub_file_charge */
+	task_bc->file_precharged = 0;
+	file_kmemsize = ub_file_kmemsize(file_nr);
+	if (likely(file_kmemsize))
+		__uncharge_beancounter_locked(pub,
+				UB_KMEMSIZE, file_kmemsize);
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+
+	uncharge_beancounter_notop(task_bc->task_ub, UB_NUMPROC, 1);
+	if (likely(file_nr)) {
+		uncharge_beancounter_notop(task_bc->task_ub,
+				UB_NUMFILE, file_nr);
+		__put_beancounter_batch(task_bc->task_ub, file_nr);
+	}
+	if (likely(file_kmemsize))
+		uncharge_beancounter_notop(task_bc->task_ub,
+				UB_KMEMSIZE, file_kmemsize);
+}
+
+void ub_task_put(struct task_struct *task)
+{
+	struct task_beancounter *task_bc;
+	struct user_beancounter *pub;
+	unsigned long kmemsize, flags;
+
+	task_bc = &task->task_bc;
+
+	pub = top_beancounter(task_bc->task_ub);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+	kmemsize = task_bc->kmem_precharged;
+	task_bc->kmem_precharged = 0;
+	if (likely(kmemsize))
+		__uncharge_beancounter_locked(pub, UB_KMEMSIZE, kmemsize);
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+	if (likely(kmemsize))
+		uncharge_beancounter_notop(task_bc->task_ub, UB_KMEMSIZE, kmemsize);
+
+	put_beancounter(task_bc->exec_ub);
+	put_beancounter(task_bc->task_ub);
+	put_beancounter(task_bc->fork_sub);
+	/* can't be freed elsewhere, failures possible in the middle of fork */
+	if (task_bc->task_freserv != NULL)
+		kfree(task_bc->task_freserv);
+
+	task_bc->exec_ub = (struct user_beancounter *)0xdeadbcbc;
+	task_bc->task_ub = (struct user_beancounter *)0xdead100c;
+	BUG_ON(task_bc->kmem_precharged != 0);
+}
+
+/*
+ * Files and file locks.
+ */
+/*
+ * For NUMFILE, we do not take a lock and call charge function
+ * for every file.  We try to charge in batches, keeping local reserve on
+ * task.  For experimental purposes, batch size is adaptive and depends
+ * on numfile barrier, number of processes, and the history of successes and
+ * failures of batch charges.
+ *
+ * Per-task fields have the following meaning
+ *   file_precharged    number of files charged to beancounter in advance,
+ *   file_quant         logarithm of batch size
+ *   file_count         counter of charge successes, to reduce batch size
+ *                      fluctuations.
+ */
+static unsigned long ub_file_precharge(struct task_beancounter *task_bc,
+		struct user_beancounter *ub, unsigned long *kmemsize)
+{
+	unsigned long n, kmem;
+
+	n = 1UL << task_bc->file_quant;
+	if (ub->ub_parms[UB_NUMPROC].held >
+			(ub->ub_parms[UB_NUMFILE].barrier >>
+						task_bc->file_quant))
+		goto nopre;
+	if (unlikely(__charge_beancounter_locked(ub, UB_NUMFILE, n, UB_HARD)))
+		goto nopre;
+	kmem = ub_file_kmemsize(n);
+	if (unlikely(__charge_beancounter_locked(ub, UB_KMEMSIZE,
+					kmem, UB_HARD)))
+		goto nopre_kmem;
+
+	task_bc->file_precharged += n;
+	get_beancounter_batch(task_bc->task_ub, n);
+	task_bc->file_count++;
+	if (task_bc->file_quant < UB_FILE_MAXQUANT &&
+	    task_bc->file_count >= task_bc->file_quant) {
+		task_bc->file_quant++;
+		task_bc->file_count = 0;
+	}
+	*kmemsize = kmem;
+	return n;
+
+nopre_kmem:
+	__uncharge_beancounter_locked(ub, UB_NUMFILE, n);
+nopre:
+	if (task_bc->file_quant > UB_FILE_MINQUANT)
+		task_bc->file_quant--;
+	task_bc->file_count = 0;
+	return 0;
+}
+
+int ub_file_charge(struct file *f)
+{
+	struct user_beancounter *ub, *pub;
+	struct task_beancounter *task_bc;
+	unsigned long file_nr, kmem;
+	unsigned long flags;
+	int err;
+
+	task_bc = &current->task_bc;
+	ub = get_exec_ub();
+	if (unlikely(ub != task_bc->task_ub))
+		goto just_charge;
+
+	if (likely(task_bc->file_precharged > 0)) {
+		/*
+		 * files are put via RCU in 2.6.16 so during
+		 * this decrement an IRQ can happen and called
+		 * ub_files_uncharge() will mess file_precharged
+		 *
+		 * ub_task_uncharge() is called via RCU also so no
+		 * protection is needed there
+		 *
+		 * Xemul
+		 */
+
+		local_irq_save(flags);
+		task_bc->file_precharged--;
+		local_irq_restore(flags);
+
+		f->f_ub = ub;
+		return 0;
+	}
+
+	pub = top_beancounter(ub);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+	file_nr = ub_file_precharge(task_bc, pub, &kmem);
+	if (unlikely(!file_nr))
+		goto last_try;
+	spin_unlock(&pub->ub_lock);
+	task_bc->file_precharged--;
+	local_irq_restore(flags);
+
+	charge_beancounter_notop(ub, UB_NUMFILE, file_nr);
+	charge_beancounter_notop(ub, UB_KMEMSIZE, kmem);
+	f->f_ub = ub;
+	return 0;
+
+just_charge:
+	pub = top_beancounter(ub);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+last_try:
+	kmem = ub_file_kmemsize(1);
+	err = __charge_beancounter_locked(pub, UB_NUMFILE, 1, UB_HARD);
+	if (likely(!err)) {
+		err = __charge_beancounter_locked(pub, UB_KMEMSIZE,
+				kmem, UB_HARD);
+		if (unlikely(err))
+			__uncharge_beancounter_locked(pub, UB_NUMFILE, 1);
+	}
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+	if (likely(!err)) {
+		charge_beancounter_notop(ub, UB_NUMFILE, 1);
+		charge_beancounter_notop(ub, UB_KMEMSIZE, kmem);
+		f->f_ub = get_beancounter(ub);
+	}
+	return err;
+}
+
+static inline int task_precharge_farnr(struct task_beancounter *task_bc)
+{
+	return (task_bc->file_precharged < (1UL << task_bc->file_quant));
+}
+
+void ub_file_uncharge(struct file *f)
+{
+	struct user_beancounter *ub, *pub;
+	struct task_beancounter *task_bc;
+	long nr;
+
+	ub = f->f_ub;
+	task_bc = &current->task_bc;
+	if (likely(ub == task_bc->task_ub)) {
+		task_bc->file_precharged++;
+		pub = top_beancounter(ub);
+		if (task_precharge_farnr(task_bc) &&
+			ub_barrier_farnr(pub, UB_NUMFILE) &&
+				ub_barrier_farsz(pub, UB_KMEMSIZE))
+			return;
+		nr = task_bc->file_precharged
+			- (1UL << (task_bc->file_quant - 1));
+		if (nr > 0) {
+			task_bc->file_precharged -= nr;
+			__put_beancounter_batch(ub, nr);
+			uncharge_beancounter(ub, UB_NUMFILE, nr);
+			uncharge_beancounter(ub, UB_KMEMSIZE,
+						ub_file_kmemsize(nr));
+		}
+	} else {
+		uncharge_beancounter(ub, UB_NUMFILE, 1);
+		uncharge_beancounter(ub, UB_KMEMSIZE, ub_file_kmemsize(1));
+		put_beancounter(ub);
+	}
+}
+
+int ub_flock_charge(struct file_lock *fl, int hard)
+{
+	struct user_beancounter *ub;
+	int err;
+
+	/* No need to get_beancounter here since it's already got in slab */
+	ub = slab_ub(fl);
+	if (ub == NULL)
+		return 0;
+
+	err = charge_beancounter(ub, UB_NUMFLOCK, 1, hard ? UB_HARD : UB_SOFT);
+	if (!err)
+		fl->fl_charged = 1;
+	return err;
+}
+
+void ub_flock_uncharge(struct file_lock *fl)
+{
+	struct user_beancounter *ub;
+
+	/* Ub will be put in slab */
+	ub = slab_ub(fl);
+	if (ub == NULL || !fl->fl_charged)
+		return;
+
+	uncharge_beancounter(ub, UB_NUMFLOCK, 1);
+	fl->fl_charged = 0;
+}
+
+/*
+ * Signal handling
+ */
+
+static int do_ub_siginfo_charge(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, size, UB_HARD))
+		goto out_kmem;
+
+	if (__charge_beancounter_locked(ub, UB_NUMSIGINFO, 1, UB_HARD))
+		goto out_num;
+
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return 0;
+
+out_num:
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
+out_kmem:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return -ENOMEM;
+}
+
+static void do_ub_siginfo_uncharge(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
+	__uncharge_beancounter_locked(ub, UB_NUMSIGINFO, 1);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+int ub_siginfo_charge(struct sigqueue *sq, struct user_beancounter *ub)
+{
+	unsigned long size;
+	struct user_beancounter *p, *q;
+
+	size = CHARGE_SIZE(kmem_obj_memusage(sq));
+	for (p = ub; p != NULL; p = p->parent) {
+		if (do_ub_siginfo_charge(p, size))
+			goto unroll;
+	}
+
+	sq->sig_ub = get_beancounter(ub);
+	return 0;
+
+unroll:
+	for (q = ub; q != p; q = q->parent)
+		do_ub_siginfo_uncharge(q, size);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(ub_siginfo_charge);
+
+void ub_siginfo_uncharge(struct sigqueue *sq)
+{
+	unsigned long size;
+	struct user_beancounter *ub, *p;
+
+	p = ub = sq->sig_ub;
+	sq->sig_ub = NULL;
+	size = CHARGE_SIZE(kmem_obj_memusage(sq));
+	for (; ub != NULL; ub = ub->parent)
+		do_ub_siginfo_uncharge(ub, size);
+	put_beancounter(p);
+}
+
+/*
+ * PTYs
+ */
+
+int ub_pty_charge(struct tty_struct *tty)
+{
+	struct user_beancounter *ub;
+	int retval;
+
+	ub = slab_ub(tty);
+	retval = 0;
+	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
+			!test_bit(TTY_CHARGED, &tty->flags)) {
+		retval = charge_beancounter(ub, UB_NUMPTY, 1, UB_HARD);
+		if (!retval)
+			set_bit(TTY_CHARGED, &tty->flags);
+	}
+	return retval;
+}
+
+void ub_pty_uncharge(struct tty_struct *tty)
+{
+	struct user_beancounter *ub;
+
+	ub = slab_ub(tty);
+	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
+			test_bit(TTY_CHARGED, &tty->flags)) {
+		uncharge_beancounter(ub, UB_NUMPTY, 1);
+		clear_bit(TTY_CHARGED, &tty->flags);
+	}
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/ub_net.c kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_net.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/ub_net.c	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_net.c	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,1147 @@
+/*
+ *  linux/kernel/ub/ub_net.c
+ *
+ *  Copyright (C) 1998-2004  Andrey V. Savochkin <saw@saw.sw.com.sg>
+ *  Copyright (C) 2005 SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * TODO:
+ *   - sizeof(struct inode) charge
+ *   = tcp_mem_schedule() feedback based on ub limits
+ *   + measures so that one socket won't exhaust all send buffers,
+ *     see bug in bugzilla
+ *   = sk->socket check for NULL in snd_wakeups
+ *     (tcp_write_space checks for NULL itself)
+ *   + in tcp_close(), orphaned socket abortion should be based on ubc
+ *     resources (same in tcp_out_of_resources)
+ *     Beancounter should also have separate orphaned socket counter...
+ *   + for rcv, in-order segment should be accepted
+ *     if only barrier is exceeded
+ *   = tcp_rmem_schedule() feedback based on ub limits
+ *   - repair forward_alloc mechanism for receive buffers
+ *     It's idea is that some buffer space is pre-charged so that receive fast
+ *     path doesn't need to take spinlocks and do other heavy stuff
+ *   + tcp_prune_queue actions based on ub limits
+ *   + window adjustments depending on available buffers for receive
+ *   - window adjustments depending on available buffers for send
+ *   + race around usewreserv
+ *   + avoid allocating new page for each tiny-gram, see letter from ANK
+ *   + rename ub_sock_lock
+ *   + sk->sleep wait queue probably can be used for all wakeups, and
+ *     sk->ub_wait is unnecessary
+ *   + for UNIX sockets, the current algorithm will lead to
+ *     UB_UNIX_MINBUF-sized messages only for non-blocking case
+ *   - charge for af_packet sockets
+ *   + all datagram sockets should be charged to NUMUNIXSOCK
+ *   - we do not charge for skb copies and clones staying in device queues
+ *   + live-lock if number of sockets is big and buffer limits are small
+ *     [diff-ubc-dbllim3]
+ *   - check that multiple readers/writers on the same socket won't cause fatal
+ *     consequences
+ *   - check allocation/charge orders
+ *   + There is potential problem with callback_lock.  In *snd_wakeup we take
+ *     beancounter first, in sock_def_error_report - callback_lock first.
+ *     then beancounter.  This is not a problem if callback_lock taken
+ *     readonly, but anyway...
+ *   - SKB_CHARGE_SIZE doesn't include the space wasted by slab allocator
+ * General kernel problems:
+ *   - in tcp_sendmsg(), if allocation fails, non-blocking sockets with ASYNC
+ *     notification won't get signals
+ *   - datagram_poll looks racy
+ *
+ */
+
+#include <linux/net.h>
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#include <net/sock.h>
+#include <net/tcp.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_net.h>
+#include <ub/ub_debug.h>
+
+/* by some reason it is not used currently */
+#define UB_SOCK_MAINTAIN_WMEMPRESSURE	0
+
+
+/* Skb truesize definition. Bad place. Den */
+
+static inline int skb_chargesize_head(struct sk_buff *skb)
+{
+	return skb_charge_size(skb->end - skb->head +
+				sizeof(struct skb_shared_info));
+}
+
+int skb_charge_fullsize(struct sk_buff *skb)
+{
+	int chargesize;
+	struct sk_buff *skbfrag;
+
+	chargesize = skb_chargesize_head(skb) +
+		PAGE_SIZE * skb_shinfo(skb)->nr_frags;
+	if (likely(skb_shinfo(skb)->frag_list == NULL))
+		return chargesize;
+	for (skbfrag = skb_shinfo(skb)->frag_list;
+	     skbfrag != NULL;
+	     skbfrag = skbfrag->next) {
+		chargesize += skb_charge_fullsize(skbfrag);
+	}
+	return chargesize;
+}
+EXPORT_SYMBOL(skb_charge_fullsize);
+
+static int ub_sock_makewreserv_locked(struct sock *sk,
+		int bufid, unsigned long size);
+
+int __ub_too_many_orphans(struct sock *sk, int count)
+{
+	struct user_beancounter *ub;
+
+	if (sock_has_ubc(sk)) {
+		ub = top_beancounter(sock_bc(sk)->ub);
+		if (count >= ub->ub_parms[UB_NUMTCPSOCK].barrier >> 2)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Queueing
+ */
+
+static void ub_sock_snd_wakeup(struct user_beancounter *ub)
+{
+	struct list_head *p;
+	struct sock *sk;
+	struct sock_beancounter *skbc;
+	struct socket *sock;
+	unsigned long added;
+
+	while (!list_empty(&ub->ub_other_sk_list)) {
+		p = ub->ub_other_sk_list.next;
+		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
+		sk = skbc_sock(skbc);
+
+		added = 0;
+		sock = sk->sk_socket;
+		if (sock == NULL) {
+			/* sk being destroyed */
+			list_del_init(&skbc->ub_sock_list);
+			continue;
+		}
+
+		ub_debug(UBD_NET_SLEEP,
+				"Checking queue, waiting %lu, reserv %lu\n",
+				skbc->ub_waitspc, skbc->poll_reserv);
+		added = -skbc->poll_reserv;
+		if (ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF,
+					skbc->ub_waitspc))
+			break;
+		added += skbc->poll_reserv;
+
+		list_del_init(&skbc->ub_sock_list);
+
+		/*
+		 * See comments in ub_tcp_snd_wakeup.
+		 * Locking note: both unix_write_space and
+		 * sock_def_write_space take callback_lock themselves.
+		 * We take it here just to be on the safe side and to
+		 * act the same way as ub_tcp_snd_wakeup does.
+		 */
+		sock_hold(sk);
+		read_lock(&sk->sk_callback_lock);
+		spin_unlock(&ub->ub_lock);
+
+		sk->sk_write_space(sk);
+		read_unlock(&sk->sk_callback_lock);
+
+		if (skbc->ub != ub && added)
+			charge_beancounter_notop(skbc->ub,
+				       	UB_OTHERSOCKBUF, added);
+		sock_put(sk);
+
+		spin_lock(&ub->ub_lock);
+	}
+}
+
+static void ub_tcp_snd_wakeup(struct user_beancounter *ub)
+{
+	struct list_head *p;
+	struct sock *sk;
+	struct sock_beancounter *skbc;
+	struct socket *sock;
+	unsigned long added;
+
+	while (!list_empty(&ub->ub_tcp_sk_list)) {
+		p = ub->ub_tcp_sk_list.next;
+		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
+		sk = skbc_sock(skbc);
+
+		added = 0;
+		sock = sk->sk_socket;
+		if (sock == NULL) {
+			/* sk being destroyed */
+			list_del_init(&skbc->ub_sock_list);
+			continue;
+		}
+
+		ub_debug(UBD_NET_SLEEP,
+				"Checking queue, waiting %lu, reserv %lu\n",
+				skbc->ub_waitspc, skbc->poll_reserv);
+		added = -skbc->poll_reserv;
+		if (ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF,
+					skbc->ub_waitspc))
+			break;
+		added += skbc->poll_reserv;
+
+		list_del_init(&skbc->ub_sock_list);
+
+		/*
+		 * Send async notifications and wake up.
+		 * Locking note: we get callback_lock here because
+		 * tcp_write_space is over-optimistic about calling context
+		 * (socket lock is presumed).  So we get the lock here although
+		 * it belongs to the callback.
+		 */
+		sock_hold(sk);
+		read_lock(&sk->sk_callback_lock);
+		spin_unlock(&ub->ub_lock);
+
+		sk->sk_write_space(sk);
+		read_unlock(&sk->sk_callback_lock);
+
+		if (skbc->ub != ub && added)
+			charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, added);
+		sock_put(sk);
+
+		spin_lock(&ub->ub_lock);
+	}
+}
+
+int ub_sock_snd_queue_add(struct sock *sk, int res, unsigned long size)
+{
+	unsigned long flags;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long added_reserv;
+
+	if (!sock_has_ubc(sk))
+		return 0;
+
+	skbc = sock_bc(sk);
+	ub = top_beancounter(skbc->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub_debug(UBD_NET_SLEEP, "attempt to charge for %lu\n", size);
+	added_reserv = -skbc->poll_reserv;
+	if (!ub_sock_makewreserv_locked(sk, res, size)) {
+		/*
+		 * It looks a bit hackish, but it is compatible with both
+		 * wait_for_xx_ubspace and poll.
+		 * This __set_current_state is equivalent to a wakeup event
+		 * right after spin_unlock_irqrestore.
+		 */
+		__set_current_state(TASK_RUNNING);
+		added_reserv += skbc->poll_reserv;
+		spin_unlock_irqrestore(&ub->ub_lock, flags);
+		if (added_reserv)
+			charge_beancounter_notop(skbc->ub, res, added_reserv);
+		return 0;
+	}
+
+	ub_debug(UBD_NET_SLEEP, "Adding sk to queue\n");
+	skbc->ub_waitspc = size;
+	if (!list_empty(&skbc->ub_sock_list)) {
+		ub_debug(UBD_NET_SOCKET,
+				"re-adding socket to beancounter %p.\n", ub);
+		goto out;
+	}
+
+	switch (res) {
+		case UB_TCPSNDBUF:
+			list_add_tail(&skbc->ub_sock_list,
+					&ub->ub_tcp_sk_list);
+			break;
+		case UB_OTHERSOCKBUF:
+			list_add_tail(&skbc->ub_sock_list,
+					&ub->ub_other_sk_list);
+			break;
+		default:
+			BUG();
+	}
+out:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return -ENOMEM;
+}
+
+EXPORT_SYMBOL(ub_sock_snd_queue_add);
+
+long ub_sock_wait_for_space(struct sock *sk, long timeo, unsigned long size)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(sk->sk_sleep, &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size))
+			break;
+
+		if (sk->sk_shutdown & SEND_SHUTDOWN)
+			break;
+		if (sk->sk_err)
+			break;
+		ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, size);
+		timeo = schedule_timeout(timeo);
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return timeo;
+}
+
+void ub_sock_sndqueuedel(struct sock *sk)
+{
+	struct user_beancounter *ub;
+	struct sock_beancounter *skbc;
+	unsigned long flags;
+
+	if (!sock_has_ubc(sk))
+		return;
+	skbc = sock_bc(sk);
+
+	/* race with write_space callback of other socket */
+	ub = top_beancounter(skbc->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	list_del_init(&skbc->ub_sock_list);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+/*
+ * Helpers
+ */
+
+static inline void __ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
+		       unsigned long size, int resource)
+{
+	WARN_ON_ONCE(skb_bc(skb)->ub != NULL);
+
+	skb_bc(skb)->ub = sock_bc(sk)->ub;
+	skb_bc(skb)->charged = size;
+	skb_bc(skb)->resource = resource;
+}
+
+void ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
+		       unsigned long size, int resource)
+{
+	if (!sock_has_ubc(sk))
+		return;
+
+	if (sock_bc(sk)->ub == NULL)
+		BUG();
+
+	__ub_skb_set_charge(skb, sk, size, resource);
+
+	/* Ugly. Ugly. Skb in sk writequeue can live without ref to sk */
+	if (skb->sk == NULL)
+		skb->sk = sk;
+}
+
+EXPORT_SYMBOL(ub_skb_set_charge);
+
+static inline void ub_skb_set_uncharge(struct sk_buff *skb)
+{
+	skb_bc(skb)->ub = NULL;
+	skb_bc(skb)->charged = 0;
+	skb_bc(skb)->resource = 0;
+}
+
+static void ub_update_rmem_thres(struct sock_beancounter *skub)
+{
+	struct user_beancounter *ub;
+
+	if (skub && skub->ub) {
+		ub = top_beancounter(skub->ub);
+		ub->ub_rmem_thres = ub->ub_parms[UB_TCPRCVBUF].barrier /
+			(ub->ub_parms[UB_NUMTCPSOCK].held + 1);
+	}
+}
+
+static inline void ub_sock_wcharge_dec(struct sock *sk,
+		unsigned long chargesize)
+{
+	/* The check sk->sk_family != PF_NETLINK is made as the skb is
+	 * queued to the kernel end of socket while changed to the user one.
+	 * Den */
+	if (unlikely(sock_bc(sk)->ub_wcharged) && sk->sk_family != PF_NETLINK) {
+		if (sock_bc(sk)->ub_wcharged > chargesize)
+			sock_bc(sk)->ub_wcharged -= chargesize;
+		else
+			sock_bc(sk)->ub_wcharged = 0;
+	}
+}
+
+/*
+ * Charge socket number
+ */
+
+static inline void sk_alloc_beancounter(struct sock *sk)
+{
+	struct sock_beancounter *skbc;
+
+	skbc = sock_bc(sk);
+	memset(skbc, 0, sizeof(struct sock_beancounter));
+}
+
+static inline void sk_free_beancounter(struct sock *sk)
+{
+}
+
+static int __sock_charge(struct sock *sk, int res)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *cub, *ub;
+	unsigned long added_reserv, added_forw;
+	unsigned long flags;
+
+	cub = get_exec_ub();
+	if (unlikely(cub == NULL))
+		return 0;
+
+	sk_alloc_beancounter(sk);
+	skbc = sock_bc(sk);
+	INIT_LIST_HEAD(&skbc->ub_sock_list);
+
+	ub = top_beancounter(cub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (unlikely(__charge_beancounter_locked(ub, res, 1, UB_HARD) < 0))
+		goto out_limit;
+
+	added_reserv = 0;
+	added_forw = 0;
+	if (res == UB_NUMTCPSOCK) {
+		added_reserv = skb_charge_size(MAX_TCP_HEADER +
+				1500 - sizeof(struct iphdr) -
+					sizeof(struct tcphdr));
+		added_reserv *= 4;
+		ub->ub_parms[UB_TCPSNDBUF].held += added_reserv;
+		if (!ub_barrier_farsz(ub, UB_TCPSNDBUF)) {
+			ub->ub_parms[UB_TCPSNDBUF].held -= added_reserv;
+			added_reserv = 0;
+		}
+		skbc->poll_reserv = added_reserv;
+		ub_adjust_maxheld(ub, UB_TCPSNDBUF);
+
+		added_forw = SK_STREAM_MEM_QUANTUM * 4;
+		ub->ub_parms[UB_TCPRCVBUF].held += added_forw;
+		if (!ub_barrier_farsz(ub, UB_TCPRCVBUF)) {
+			ub->ub_parms[UB_TCPRCVBUF].held -= added_forw;
+			added_forw = 0;
+		}
+		skbc->forw_space = added_forw;
+		ub_adjust_maxheld(ub, UB_TCPRCVBUF);
+	}
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	charge_beancounter_notop(cub, res, 1);
+	if (added_reserv)
+		charge_beancounter_notop(cub, UB_TCPSNDBUF, added_reserv);
+	if (added_forw)
+		charge_beancounter_notop(cub, UB_TCPRCVBUF, added_forw);
+
+	skbc->ub = get_beancounter(cub);
+	return 0;
+
+out_limit:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	sk_free_beancounter(sk);
+	return -ENOMEM;
+}
+
+int ub_tcp_sock_charge(struct sock *sk)
+{
+	int ret;
+
+	ret = __sock_charge(sk, UB_NUMTCPSOCK);
+	ub_update_rmem_thres(sock_bc(sk));
+
+	return ret;
+}
+
+int ub_other_sock_charge(struct sock *sk)
+{
+	return __sock_charge(sk, UB_NUMOTHERSOCK);
+}
+
+EXPORT_SYMBOL(ub_other_sock_charge);
+
+int ub_sock_charge(struct sock *sk, int family, int type)
+{
+	return (IS_TCP_SOCK(family, type) ?
+			ub_tcp_sock_charge(sk) : ub_other_sock_charge(sk));
+}
+EXPORT_SYMBOL(ub_sock_charge);
+
+/*
+ * Uncharge socket number
+ */
+
+void ub_sock_uncharge(struct sock *sk)
+{
+	int is_tcp_sock;
+	unsigned long flags;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long reserv, forw;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	is_tcp_sock = IS_TCP_SOCK(sk->sk_family, sk->sk_type);
+	skbc = sock_bc(sk);
+	ub_debug(UBD_NET_SOCKET, "Calling ub_sock_uncharge on %p\n", sk);
+
+	ub = top_beancounter(skbc->ub);
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (!list_empty(&skbc->ub_sock_list)) {
+		ub_debug(UBD_NET_SOCKET,
+			 "ub_sock_uncharge: removing from ub(%p) queue.\n",
+			 skbc);
+		list_del_init(&skbc->ub_sock_list);
+	}
+
+	reserv = skbc->poll_reserv;
+	forw = skbc->forw_space;
+	__uncharge_beancounter_locked(ub,
+			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
+			reserv);
+	if (forw)
+		__uncharge_beancounter_locked(ub,
+				(is_tcp_sock ? UB_TCPRCVBUF : UB_DGRAMRCVBUF),
+				forw);
+	__uncharge_beancounter_locked(ub,
+			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
+
+	ub_sock_wcharge_dec(sk, reserv);
+	if (unlikely(skbc->ub_wcharged))
+		printk(KERN_WARNING
+		       "ub_sock_uncharge: wch=%lu for ub %p (%d).\n",
+		       skbc->ub_wcharged, skbc->ub, skbc->ub->ub_uid);
+	skbc->poll_reserv = 0;
+	skbc->forw_space = 0;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(skbc->ub,
+			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
+			reserv);
+	if (forw)
+		uncharge_beancounter_notop(skbc->ub,
+				(is_tcp_sock ? UB_TCPRCVBUF : UB_DGRAMRCVBUF),
+				forw);
+	uncharge_beancounter_notop(skbc->ub,
+			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
+
+	put_beancounter(skbc->ub);
+	sk_free_beancounter(sk);
+}
+
+/*
+ * Special case for netlink_dump - (un)charges precalculated size
+ */
+
+int ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk)
+{
+	int ret;
+	unsigned long chargesize;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	chargesize = skb_charge_fullsize(skb);
+	ret = charge_beancounter(sock_bc(sk)->ub,
+			UB_DGRAMRCVBUF, chargesize, UB_HARD);
+	if (ret < 0)
+		return ret;
+	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
+	return ret;
+}
+
+/*
+ * Poll reserve accounting
+ *
+ * This is the core of socket buffer management (along with queueing/wakeup
+ * functions.  The rest of buffer accounting either call these functions, or
+ * repeat parts of their logic for some simpler cases.
+ */
+
+static int ub_sock_makewreserv_locked(struct sock *sk,
+		int bufid, unsigned long size)
+{
+	unsigned long wcharge_added;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+
+	skbc = sock_bc(sk);
+	if (skbc->poll_reserv >= size) /* no work to be done */
+		goto out;
+
+	ub = top_beancounter(skbc->ub);
+	ub->ub_parms[bufid].held += size - skbc->poll_reserv;
+
+	wcharge_added = 0;
+	/*
+	 * Logic:
+	 *  1) when used memory hits barrier, we set wmem_pressure;
+	 *     wmem_pressure is reset under barrier/2;
+	 *     between barrier/2 and barrier we limit per-socket buffer growth;
+	 *  2) each socket is guaranteed to get (limit-barrier)/maxsockets
+	 *     calculated on the base of memory eaten after the barrier is hit
+	 */
+	skbc = sock_bc(sk);
+#if UB_SOCK_MAINTAIN_WMEMPRESSURE
+	if (!ub_hfbarrier_hit(ub, bufid)) {
+		if (ub->ub_wmem_pressure)
+			ub_debug(UBD_NET_SEND, "makewres: pressure -> 0 "
+				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+				sk, size, skbc->poll_reserv,
+				ub->ub_parms[bufid].held,
+				skbc->ub_wcharged, sk->sk_sndbuf);
+		ub->ub_wmem_pressure = 0;
+	}
+#endif
+	if (ub_barrier_hit(ub, bufid)) {
+#if UB_SOCK_MAINTAIN_WMEMPRESSURE
+		if (!ub->ub_wmem_pressure)
+			ub_debug(UBD_NET_SEND, "makewres: pressure -> 1 "
+				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+				sk, size, skbc->poll_reserv,
+				ub->ub_parms[bufid].held,
+				skbc->ub_wcharged, sk->sk_sndbuf);
+		ub->ub_wmem_pressure = 1;
+#endif
+		if (sk->sk_family == PF_NETLINK)
+			goto unroll;
+		wcharge_added = size - skbc->poll_reserv;
+		skbc->ub_wcharged += wcharge_added;
+		if (skbc->ub_wcharged * ub->ub_parms[bid2sid(bufid)].limit +
+				ub->ub_parms[bufid].barrier >
+					ub->ub_parms[bufid].limit)
+			goto unroll_wch;
+	}
+	if (ub->ub_parms[bufid].held > ub->ub_parms[bufid].limit)
+		goto unroll;
+
+	ub_adjust_maxheld(ub, bufid);
+	skbc->poll_reserv = size;
+out:
+	return 0;
+
+unroll_wch:
+	skbc->ub_wcharged -= wcharge_added;
+unroll:
+	ub_debug(UBD_NET_SEND,
+			"makewres: deny "
+			"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+			sk, size, skbc->poll_reserv, ub->ub_parms[bufid].held,
+			skbc->ub_wcharged, sk->sk_sndbuf);
+	ub->ub_parms[bufid].failcnt++;
+	ub->ub_parms[bufid].held -= size - skbc->poll_reserv;
+
+	if (sk->sk_socket != NULL) {
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+	}
+	return -ENOMEM;
+}
+
+int ub_sock_make_wreserv(struct sock *sk, int bufid, unsigned long size)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long flags;
+	unsigned long added_reserv;
+	int err;
+
+	skbc = sock_bc(sk);
+
+	/*
+	 * This function provides that there is sufficient reserve upon return
+	 * only if sk has only one user.  We can check poll_reserv without
+	 * serialization and avoid locking if the reserve already exists.
+	 */
+	if (unlikely(!sock_has_ubc(sk)) || likely(skbc->poll_reserv >= size))
+		return 0;
+
+	ub = top_beancounter(skbc->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	added_reserv = -skbc->poll_reserv;
+	err = ub_sock_makewreserv_locked(sk, bufid, size);
+	added_reserv += skbc->poll_reserv;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (added_reserv)
+		charge_beancounter_notop(skbc->ub, bufid, added_reserv);
+
+	return err;
+}
+
+EXPORT_SYMBOL(ub_sock_make_wreserv);
+
+int ub_sock_get_wreserv(struct sock *sk, int bufid, unsigned long size)
+{
+	struct sock_beancounter *skbc;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	/* optimize for the case if socket has sufficient reserve */
+	ub_sock_make_wreserv(sk, bufid, size);
+	skbc = sock_bc(sk);
+	if (likely(skbc->poll_reserv >= size)) {
+		skbc->poll_reserv -= size;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+EXPORT_SYMBOL(ub_sock_get_wreserv);
+
+static void ub_sock_do_ret_wreserv(struct sock *sk, int bufid,
+		unsigned long size, unsigned long ressize)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long extra;
+	unsigned long flags;
+
+	skbc = sock_bc(sk);
+	ub = top_beancounter(skbc->ub);
+
+	extra = 0;
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	skbc->poll_reserv += size;
+	if (skbc->poll_reserv > ressize) {
+		extra = skbc->poll_reserv - ressize;
+		ub_sock_wcharge_dec(sk, extra);
+		skbc->poll_reserv = ressize;
+
+		__uncharge_beancounter_locked(ub, bufid, extra);
+		if (bufid == UB_TCPSNDBUF)
+			ub_tcp_snd_wakeup(ub);
+		else
+			ub_sock_snd_wakeup(ub);
+	}
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (extra)
+		uncharge_beancounter_notop(skbc->ub, bufid, extra);
+}
+
+void ub_sock_ret_wreserv(struct sock *sk, int bufid,
+		unsigned long size, unsigned long ressize)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	skbc = sock_bc(sk);
+	ub = top_beancounter(skbc->ub);
+	/* check if the reserve can be kept */
+	if (ub_barrier_farsz(ub, bufid)) {
+		skbc->poll_reserv += size;
+		return;
+	}
+	ub_sock_do_ret_wreserv(sk, bufid, size, ressize);
+}
+
+/*
+ * UB_DGRAMRCVBUF
+ */
+
+int ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb)
+{
+	unsigned long chargesize;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	chargesize = skb_charge_fullsize(skb);
+	if (charge_beancounter(sock_bc(sk)->ub, UB_DGRAMRCVBUF,
+				 chargesize, UB_HARD))
+		return -ENOMEM;
+
+	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
+	return 0;
+}
+
+EXPORT_SYMBOL(ub_sockrcvbuf_charge);
+
+static void ub_sockrcvbuf_uncharge(struct sk_buff *skb)
+{
+	uncharge_beancounter(skb_bc(skb)->ub, UB_DGRAMRCVBUF,
+			     skb_bc(skb)->charged);
+	ub_skb_set_uncharge(skb);
+}
+
+/*
+ * UB_TCPRCVBUF
+ */
+
+int ub_sock_tcp_chargerecv(struct sock *sk, struct sk_buff *skb,
+			    enum ub_severity strict)
+{
+	int retval;
+	unsigned long flags;
+	struct user_beancounter *ub;
+	struct sock_beancounter *skbc;
+	unsigned long chargesize;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+	skbc = sock_bc(sk);
+
+	chargesize = skb_charge_fullsize(skb);
+	if (likely(skbc->forw_space >= chargesize)) {
+		skbc->forw_space -= chargesize;
+		__ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
+		return 0;
+	}
+
+	/*
+	 * Memory pressure reactions:
+	 *  1) set UB_RMEM_KEEP (clearing UB_RMEM_EXPAND)
+	 *  2) set UB_RMEM_SHRINK and tcp_clamp_window()
+	 *     tcp_collapse_queues() if rmem_alloc > rcvbuf
+	 *  3) drop OFO, tcp_purge_ofo()
+	 *  4) drop all.
+	 * Currently, we do #2 and #3 at once (which means that current
+	 * collapsing of OFO queue in tcp_collapse_queues() is a waste of time,
+	 * for example...)
+	 * On memory pressure we jump from #0 to #3, and when the pressure
+	 * subsides, to #1.
+	 */
+	retval = 0;
+	ub = top_beancounter(sock_bc(sk)->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_parms[UB_TCPRCVBUF].held += chargesize;
+	if (ub->ub_parms[UB_TCPRCVBUF].held >
+			ub->ub_parms[UB_TCPRCVBUF].barrier &&
+			strict != UB_FORCE)
+		goto excess;
+	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+out:
+	if (retval == 0) {
+		charge_beancounter_notop(sock_bc(sk)->ub, UB_TCPRCVBUF,
+				chargesize);
+		ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
+	}
+	return retval;
+
+excess:
+	ub->ub_rmem_pressure = UB_RMEM_SHRINK;
+	if (strict == UB_HARD)
+		retval = -ENOMEM;
+	if (ub->ub_parms[UB_TCPRCVBUF].held > ub->ub_parms[UB_TCPRCVBUF].limit)
+		retval = -ENOMEM;
+	/*
+	 * We try to leave numsock*maxadvmss as a reserve for sockets not
+	 * queueing any data yet (if the difference between the barrier and the
+	 * limit is enough for this reserve).
+	 */
+	if (ub->ub_parms[UB_TCPRCVBUF].held +
+			ub->ub_parms[UB_NUMTCPSOCK].limit * ub->ub_maxadvmss
+			> ub->ub_parms[UB_TCPRCVBUF].limit &&
+			atomic_read(&sk->sk_rmem_alloc))
+		retval = -ENOMEM;
+	if (retval) {
+		ub->ub_parms[UB_TCPRCVBUF].held -= chargesize;
+		ub->ub_parms[UB_TCPRCVBUF].failcnt++;
+	}
+	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	goto out;
+}
+EXPORT_SYMBOL(ub_sock_tcp_chargerecv);
+
+static void ub_tcprcvbuf_uncharge(struct sk_buff *skb)
+{
+	unsigned long flags;
+	unsigned long held, bar;
+	int prev_pres;
+	struct user_beancounter *ub;
+
+	ub = top_beancounter(skb_bc(skb)->ub);
+	if (ub_barrier_farsz(ub, UB_TCPRCVBUF)) {
+		sock_bc(skb->sk)->forw_space += skb_bc(skb)->charged;
+		ub_skb_set_uncharge(skb);
+		return;
+	}
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (ub->ub_parms[UB_TCPRCVBUF].held < skb_bc(skb)->charged) {
+		printk(KERN_ERR "Uncharging %d for tcprcvbuf of %p with %lu\n",
+				skb_bc(skb)->charged,
+				ub, ub->ub_parms[UB_TCPRCVBUF].held);
+		/* ass-saving bung */
+		skb_bc(skb)->charged = ub->ub_parms[UB_TCPRCVBUF].held;
+	}
+	ub->ub_parms[UB_TCPRCVBUF].held -= skb_bc(skb)->charged;
+	held = ub->ub_parms[UB_TCPRCVBUF].held;
+	bar = ub->ub_parms[UB_TCPRCVBUF].barrier;
+	prev_pres = ub->ub_rmem_pressure;
+	if (held <= bar - (bar >> 2))
+		ub->ub_rmem_pressure = UB_RMEM_EXPAND;
+	else if (held <= bar)
+		ub->ub_rmem_pressure = UB_RMEM_KEEP;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(skb_bc(skb)->ub, UB_TCPRCVBUF,
+			skb_bc(skb)->charged);
+	ub_skb_set_uncharge(skb);
+}
+
+
+/*
+ * UB_OTHERSOCKBUF and UB_TCPSNDBUF
+ */
+
+static void ub_socksndbuf_uncharge(struct sk_buff *skb)
+{
+	unsigned long flags;
+	struct user_beancounter *ub, *cub;
+	unsigned long chargesize;
+
+	cub = skb_bc(skb)->ub;
+	ub = top_beancounter(cub);
+	chargesize = skb_bc(skb)->charged;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_OTHERSOCKBUF, chargesize);
+	if (skb->sk != NULL && sock_has_ubc(skb->sk))
+		ub_sock_wcharge_dec(skb->sk, chargesize);
+	ub_sock_snd_wakeup(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(cub, UB_OTHERSOCKBUF, chargesize);
+	ub_skb_set_uncharge(skb);
+}
+
+/* expected to be called under socket lock */
+static void ub_tcpsndbuf_uncharge(struct sk_buff *skb)
+{
+	/*
+	 * ub_sock_ret_wreserv call is abused here, we just want to uncharge
+	 * skb size.  However, to reduce duplication of the code doing
+	 * ub_hfbarrier_hit check, ub_wcharged reduction, and wakeup we call
+	 * a function that already does all of this.  2006/04/27  SAW
+	 */
+	ub_sock_ret_wreserv(skb->sk, UB_TCPSNDBUF, skb_bc(skb)->charged,
+			sock_bc(skb->sk)->poll_reserv);
+	ub_skb_set_uncharge(skb);
+}
+
+void ub_skb_uncharge(struct sk_buff *skb)
+{
+	switch (skb_bc(skb)->resource) {
+		case UB_TCPSNDBUF:
+			ub_tcpsndbuf_uncharge(skb);
+			break;
+		case UB_TCPRCVBUF:
+			ub_tcprcvbuf_uncharge(skb);
+			break;
+		case UB_DGRAMRCVBUF:
+			ub_sockrcvbuf_uncharge(skb);
+			break;
+		case UB_OTHERSOCKBUF:
+			ub_socksndbuf_uncharge(skb);
+			break;
+	}
+}
+
+EXPORT_SYMBOL(ub_skb_uncharge);	/* due to skb_orphan()/conntracks */
+
+/*
+ * Other sock reserve managment
+ */
+
+int ub_sock_getwres_other(struct sock *sk, unsigned long size)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long flags;
+	unsigned long added_reserv;
+	int err;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	/*
+	 * Nothing except beancounter lock protects skbc->poll_reserv.
+	 * So, take the lock and do the job.
+	 * Dances with added_reserv repeat ub_sock_make_wreserv.
+	 */
+	skbc = sock_bc(sk);
+	ub = top_beancounter(skbc->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	added_reserv = -skbc->poll_reserv;
+	err = ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF, size);
+	added_reserv += skbc->poll_reserv;
+	if (!err)
+		skbc->poll_reserv -= size;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (added_reserv)
+		charge_beancounter_notop(skbc->ub, UB_OTHERSOCKBUF, added_reserv);
+
+	return err;
+}
+EXPORT_SYMBOL(ub_sock_getwres_other);
+
+void ub_sock_retwres_other(struct sock *sk,
+		unsigned long size, unsigned long ressize)
+{
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	ub_sock_do_ret_wreserv(sk, UB_OTHERSOCKBUF, size, ressize);
+}
+
+/*
+ * TCP send buffers accouting. Paged part
+ */
+
+int ub_sock_tcp_chargepage(struct sock *sk)
+{
+	struct sock_beancounter *skbc;
+	unsigned long extra;
+	int err;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	skbc = sock_bc(sk);
+	ub_sock_make_wreserv(sk, UB_TCPSNDBUF, PAGE_SIZE);
+	if (likely(skbc->poll_reserv >= PAGE_SIZE)) {
+		skbc->poll_reserv -= PAGE_SIZE;
+		return 0;
+	}
+
+	/*
+	 * Ok, full page is not available.
+	 * However, this function must succeed if poll previously indicated
+	 * that write is possible.  We better make a forced charge here
+	 * than reserve a whole page in poll.
+	 */
+	err = ub_sock_make_wreserv(sk, UB_TCPSNDBUF, SOCK_MIN_UBCSPACE);
+	if (unlikely(err < 0))
+		goto out;
+	if (skbc->poll_reserv < PAGE_SIZE) {
+		extra = PAGE_SIZE - skbc->poll_reserv;
+		err = charge_beancounter(skbc->ub, UB_TCPSNDBUF, extra,
+				UB_FORCE);
+		if (err < 0)
+			goto out;
+		skbc->poll_reserv += extra;
+	}
+	skbc->poll_reserv -= PAGE_SIZE;
+	return 0;
+
+out:
+	return err;
+}
+
+void ub_sock_tcp_detachpage(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	/* The page is just detached from socket. The last skb in queue
+	   with paged part holds referrence to it */
+	skb = skb_peek_tail(&sk->sk_write_queue);
+	if (skb == NULL) {
+	   	/* If the queue is empty - all data is sent and page is about
+		   to be freed */
+		ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, PAGE_SIZE,
+				sock_bc(sk)->poll_reserv);
+	} else {
+		/* Last skb is a good aproximation for a last skb with
+		   paged part */
+		skb_bc(skb)->charged += PAGE_SIZE;
+	}
+}
+
+/*
+ * TCPSNDBUF charge functions below are called in the following cases:
+ *  - sending of SYN, SYN-ACK, FIN, the latter charge is forced by
+ *    some technical reasons in TCP code;
+ *  - fragmentation of TCP packets.
+ * These functions are allowed but not required to use poll_reserv.
+ * Originally, these functions didn't do that, since it didn't make
+ * any sense.  Now, since poll_reserv now has a function of general reserve,
+ * they use it.
+ */
+int ub_sock_tcp_chargesend(struct sock *sk, struct sk_buff *skb,
+			    enum ub_severity strict)
+{
+	int ret;
+	unsigned long chargesize;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	skbc = sock_bc(sk);
+	chargesize = skb_charge_fullsize(skb);
+	if (likely(skbc->poll_reserv >= chargesize)) {
+		skbc->poll_reserv -= chargesize;
+		__ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+		/* XXX hack, see ub_skb_set_charge */
+		skb->sk = sk;
+		return 0;
+	}
+
+	ub = top_beancounter(skbc->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ret = __charge_beancounter_locked(ub, UB_TCPSNDBUF,
+			chargesize, strict);
+	/*
+	 * Note: this check is not equivalent of the corresponding check
+	 * in makewreserv.  It's similar in spirit, but an equivalent check
+	 * would be too long and complicated here.
+	 */
+	if (!ret && ub_barrier_hit(ub, UB_TCPSNDBUF))
+		skbc->ub_wcharged += chargesize;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	if (likely(!ret)) {
+		charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, chargesize);
+		ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(ub_sock_tcp_chargesend);
+
+/*
+ * Initialization
+ */
+
+int __init skbc_cache_init(void)
+{
+	return 0;
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/ub_oom.c kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_oom.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/ub_oom.c	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_oom.c	2017-01-13 08:40:20.000000000 -0500
@@ -0,0 +1,200 @@
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/cpuset.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_oom.h>
+#include <ub/ub_hash.h>
+
+#define UB_OOM_TIMEOUT	(5 * HZ)
+
+int oom_generation;
+int oom_kill_counter;
+static DEFINE_SPINLOCK(oom_lock);
+static DECLARE_WAIT_QUEUE_HEAD(oom_wq);
+
+static inline int ub_oom_completed(struct task_struct *tsk)
+{
+	if (test_tsk_thread_flag(tsk, TIF_MEMDIE))
+		/* we were oom killed - just die */
+		return 1;
+	if (tsk->task_bc.oom_generation != oom_generation)
+		/* some task was succesfully killed */
+		return 1;
+	return 0;
+}
+
+static void ub_clear_oom(void)
+{
+	struct user_beancounter *ub;
+
+	rcu_read_lock();
+	for_each_beancounter(ub)
+		ub->ub_oom_noproc = 0;
+	rcu_read_unlock();
+}
+
+/* Called with cpuset_lock held */
+int ub_oom_lock(void)
+{
+	int timeout;
+	DEFINE_WAIT(oom_w);
+	struct task_struct *tsk;
+
+	tsk = current;
+
+	spin_lock(&oom_lock);
+	if (!oom_kill_counter)
+		goto out_do_oom;
+
+	timeout = UB_OOM_TIMEOUT;
+	while (1) {
+		if (ub_oom_completed(tsk)) {
+			spin_unlock(&oom_lock);
+			return -EINVAL;
+		}
+
+		if (timeout == 0)
+			break;
+
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+		add_wait_queue(&oom_wq, &oom_w);
+		spin_unlock(&oom_lock);
+		cpuset_unlock();
+
+		timeout = schedule_timeout(timeout);
+
+		cpuset_lock();
+		spin_lock(&oom_lock);
+		remove_wait_queue(&oom_wq, &oom_w);
+	}
+
+out_do_oom:
+	ub_clear_oom();
+	return 0;
+}
+
+static inline long ub_current_overdraft(struct user_beancounter *ub)
+{
+	return ub->ub_parms[UB_OOMGUARPAGES].held +
+		((ub->ub_parms[UB_KMEMSIZE].held
+		  + ub->ub_parms[UB_TCPSNDBUF].held
+		  + ub->ub_parms[UB_TCPRCVBUF].held
+		  + ub->ub_parms[UB_OTHERSOCKBUF].held
+		  + ub->ub_parms[UB_DGRAMRCVBUF].held)
+		 >> PAGE_SHIFT) - ub->ub_parms[UB_OOMGUARPAGES].barrier;
+}
+
+int ub_oom_task_skip(struct user_beancounter *ub, struct task_struct *tsk)
+{
+	struct user_beancounter *mm_ub;
+
+	if (ub == NULL)
+		return 0;
+
+	task_lock(tsk);
+	if (tsk->mm == NULL)
+		mm_ub = NULL;
+	else
+		mm_ub = tsk->mm->mm_ub;
+
+	while (mm_ub != NULL && mm_ub != ub)
+		mm_ub = mm_ub->parent;
+	task_unlock(tsk);
+
+	return mm_ub != ub;
+}
+
+struct user_beancounter *ub_oom_select_worst(void)
+{
+	struct user_beancounter *ub, *walkp;
+	long ub_maxover;
+
+	ub_maxover = 0;
+	ub = NULL;
+
+	rcu_read_lock();
+	for_each_beancounter (walkp) {
+		long ub_overdraft;
+
+		if (walkp->parent != NULL)
+			continue;
+		if (walkp->ub_oom_noproc)
+			continue;
+
+		ub_overdraft = ub_current_overdraft(walkp);
+		if (ub_overdraft > ub_maxover && get_beancounter_rcu(walkp)) {
+			put_beancounter(ub);
+			ub = walkp;
+			ub_maxover = ub_overdraft;
+		}
+	}
+
+	if (ub)
+		ub->ub_oom_noproc = 1;
+	rcu_read_unlock();
+
+	return ub;
+}
+
+void ub_oom_mm_killed(struct user_beancounter *ub)
+{
+	static struct ub_rate_info ri = { 5, 60*HZ };
+
+	/* increment is serialized with oom_lock */
+	ub->ub_parms[UB_OOMGUARPAGES].failcnt++;
+
+	if (ub_ratelimit(&ri))
+		show_mem();
+}
+
+void ub_oom_unlock(void)
+{
+	spin_unlock(&oom_lock);
+}
+
+void ub_oom_task_dead(struct task_struct *tsk)
+{
+	spin_lock(&oom_lock);
+	oom_kill_counter = 0;
+	oom_generation++;
+
+	printk("OOM killed process %s (pid=%d, ve=%d) exited, "
+			"free=%u gen=%d.\n",
+			tsk->comm, tsk->pid, VEID(tsk->ve_task_info.owner_env),
+			nr_free_pages(), oom_generation);
+	/* if there is time to sleep in ub_oom_lock -> sleep will continue */
+	wake_up_all(&oom_wq);
+	spin_unlock(&oom_lock);
+}
+
+void ub_out_of_memory(struct user_beancounter *scope)
+{
+	struct user_beancounter *ub;
+	struct task_struct *p;
+
+	cpuset_lock();
+	spin_lock(&oom_lock);
+	ub_clear_oom();
+	ub = get_beancounter(scope);
+
+	read_lock(&tasklist_lock);
+retry:
+	p = oom_select_bad_process(ub);
+	if (p == NULL || PTR_ERR(p) == -1UL)
+		goto unlock;
+
+	if (oom_kill_process(p, "UB Out of memory"))
+		goto retry;
+
+	put_beancounter(ub);
+
+unlock:
+	read_unlock(&tasklist_lock);
+	spin_unlock(&oom_lock);
+	cpuset_unlock();
+}
+EXPORT_SYMBOL(ub_out_of_memory);
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/ub_page_bc.c kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_page_bc.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/ub_page_bc.c	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_page_bc.c	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,777 @@
+/*
+ *  kernel/ub/ub_page_bc.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/kmem_cache.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_vmpages.h>
+#include <ub/ub_page.h>
+#include <ub/ub_mem.h>
+#include <ub/io_acct.h>
+
+#define page_pblist(page)      (&page_pbc(page))
+
+static kmem_cache_t *pb_cachep;
+
+struct pb_hash_chain {
+	spinlock_t lock;
+	struct page_beancounter *first;
+};
+
+static struct pb_hash_chain *pb_hash_table;
+
+static unsigned int page_locks_mask;
+static spinlock_t *page_locks;
+
+static inline spinlock_t *page_lock(struct page *p)
+{
+	unsigned long pfn;
+
+	pfn = page_to_pfn(p);
+	return page_locks + (pfn & page_locks_mask);
+}
+
+static unsigned int pb_hash_mask;
+
+/*
+ * Auxiliary staff
+ */
+
+static inline struct page_beancounter *next_page_pb(struct page_beancounter *p)
+{
+	return list_entry(p->page_list.next, struct page_beancounter,
+			page_list);
+}
+
+static inline struct page_beancounter *prev_page_pb(struct page_beancounter *p)
+{
+	return list_entry(p->page_list.prev, struct page_beancounter,
+			page_list);
+}
+
+/*
+ * Held pages manipulation
+ */
+static inline void set_held_pages(struct user_beancounter *bc)
+{
+	/* all three depend on ub_held_pages */
+	__ub_update_physpages(bc);
+	__ub_update_oomguarpages(bc);
+	__ub_update_privvm(bc);
+}
+
+#define UB_HELD_BATCH	(64 * UB_PAGE_WEIGHT)
+
+static inline void do_dec_held_pages(struct user_beancounter *ub, int value)
+{
+	struct ub_percpu_struct *pcpu;
+	unsigned long flags;
+
+	pcpu = per_cpu_ptr(ub->ub_percpu, smp_processor_id());
+	if (pcpu->held_pages - value >= -UB_HELD_BATCH) {
+		pcpu->held_pages -= value;
+		return;
+	}
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_held_pages += pcpu->held_pages - value;
+	pcpu->held_pages = 0;
+	set_held_pages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static void dec_held_pages(struct user_beancounter *ub, int value)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_dec_held_pages(ub, value);
+}
+
+static inline void do_inc_held_pages(struct user_beancounter *ub, int value)
+{
+	struct ub_percpu_struct *pcpu;
+	unsigned long flags;
+
+	pcpu = per_cpu_ptr(ub->ub_percpu, smp_processor_id());
+	if (pcpu->held_pages + value <= UB_HELD_BATCH) {
+		pcpu->held_pages += value;
+		return;
+	}
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_held_pages += pcpu->held_pages + value;
+	pcpu->held_pages = 0;
+	set_held_pages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static void inc_held_pages(struct user_beancounter *ub, int value)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_inc_held_pages(ub, value);
+}
+
+void ub_flush_held_pages(struct user_beancounter *ub)
+{
+	int cpu;
+	struct ub_percpu_struct *pcpu;
+
+	for_each_possible_cpu(cpu) {
+		pcpu = per_cpu_ptr(ub->ub_percpu, cpu);
+		ub->ub_held_pages += pcpu->held_pages;
+		pcpu->held_pages = 0;
+	}
+	set_held_pages(ub);
+}
+
+void ub_held_snapshot(struct user_beancounter *ub, unsigned long *held)
+{
+	unsigned long flags, pages, tmpfs, unused;
+	long long held_pages;
+	int cpu, res;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	tmpfs = ub->ub_tmpfs_respages;
+	unused = ub->ub_unused_privvmpages;
+	held_pages = ub->ub_held_pages;
+	for( res = 0 ; res < UB_RESOURCES ; res++ )
+		held[res] = ub->ub_parms[res].held;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	for_each_possible_cpu(cpu)
+		held_pages += per_cpu_ptr(ub->ub_percpu, cpu)->held_pages;
+	pages = max(0ll, held_pages) >> UB_PAGE_WEIGHT_SHIFT;
+
+	/* see set_held_pages() */
+	held[UB_PHYSPAGES] = pages + tmpfs;
+	held[UB_PRIVVMPAGES] = pages + unused + held[UB_SHMPAGES];
+	held[UB_OOMGUARPAGES] = held[UB_PHYSPAGES] + held[UB_SWAPPAGES];
+}
+
+/*
+ * ++ and -- beyond are protected with pb_lock
+ */
+
+static inline void inc_pbc_count(struct user_beancounter *ub)
+{
+	for (; ub != NULL; ub = ub->parent)
+		ub_stat_inc(ub, pbcs);
+}
+
+static inline void dec_pbc_count(struct user_beancounter *ub)
+{
+	for (; ub != NULL; ub = ub->parent)
+		ub_stat_dec(ub, pbcs);
+}
+
+/*
+ * Alloc - free
+ */
+
+inline int pb_alloc(struct page_beancounter **pbc)
+{
+	*pbc = kmem_cache_alloc(pb_cachep, GFP_KERNEL);
+	if (*pbc != NULL) {
+		(*pbc)->next_hash = NULL;
+		(*pbc)->pb_magic = PB_MAGIC;
+	}
+	return (*pbc == NULL);
+}
+
+static void ___pb_free(struct page_beancounter *pb)
+{
+	kmem_cache_free(pb_cachep, pb);
+}
+
+static void __pb_free(struct rcu_head *rcu)
+{
+	___pb_free(container_of(rcu, struct page_beancounter, rcu));
+}
+
+inline void pb_free(struct page_beancounter **pb)
+{
+	if (*pb != NULL) {
+		___pb_free(*pb);
+		*pb = NULL;
+	}
+}
+
+static inline void pb_free_rcu(struct page_beancounter *pb)
+{
+	call_rcu(&pb->rcu, __pb_free);
+}
+
+void pb_free_list(struct page_beancounter **p_pb)
+{
+	struct page_beancounter *list, *pb;
+	
+	list = *p_pb;
+	if (list == PBC_COPY_SAME)
+		return;
+
+	while (list) {
+		pb = list;
+		list = list->next_hash;
+		pb_free(&pb);
+	}
+	*p_pb = NULL;
+}
+
+/*
+ * head -> <new objs> -> <old objs> -> ...
+ */
+static int __alloc_list(struct page_beancounter **head, int num)
+{
+	struct page_beancounter *pb;
+
+	while (num > 0) {
+		if (pb_alloc(&pb))
+			return -1;
+		pb->next_hash = *head;
+		*head = pb;
+		num--;
+	}
+
+	return num;
+}
+
+/* 
+ * Ensure that the list contains at least num elements.
+ * p_pb points to an initialized list, may be of the zero length. 
+ *
+ * mm->page_table_lock should be held
+ */
+int pb_alloc_list(struct page_beancounter **p_pb, int num)
+{
+	struct page_beancounter *list;
+
+	for (list = *p_pb; list != NULL && num; list = list->next_hash, num--);
+	if (!num)
+		return 0;
+
+	/*
+	 *  *p_pb(after)       *p_pb (before)
+	 *     \                  \
+	 *     <new objs> -...-> <old objs> -> ...
+	 */
+	if (__alloc_list(p_pb, num) < 0)
+		goto nomem;
+	return 0;
+
+nomem:
+	pb_free_list(p_pb);
+	return -ENOMEM;
+}
+
+/*
+ * Allocates a page_beancounter for each
+ * user_beancounter in a hash
+ */
+int pb_alloc_all(struct page_beancounter **pbs)
+{
+	int need_alloc;
+	struct user_beancounter *ub;
+
+	need_alloc = 0;
+	rcu_read_lock();
+	for_each_beancounter(ub)
+		need_alloc++;
+	rcu_read_unlock();
+
+	if (!__alloc_list(pbs, need_alloc))
+		return 0;
+
+	pb_free_list(pbs);
+	return -ENOMEM;
+}
+
+/*
+ * Hash routines
+ */
+
+static inline int pb_hash(struct user_beancounter *ub, struct page *page)
+{
+	return (page_to_pfn(page) ^ ub->ub_cookie) & pb_hash_mask;
+}
+
+/* pb_lock should be held */
+static inline void insert_pb(struct page_beancounter *p, struct page *page,
+		struct user_beancounter *ub, int hash)
+{
+	p->page = page;
+	p->ub = get_beancounter_fast(ub);
+	p->next_hash = pb_hash_table[hash].first;
+	rcu_assign_pointer(pb_hash_table[hash].first, p);
+	inc_pbc_count(ub);
+}
+
+/*
+ * Heart
+ */
+
+static int __pb_dup_ref(struct page *page, struct user_beancounter *bc,
+		int hash, int locked)
+{
+	struct page_beancounter *p;
+
+	for (p = rcu_dereference(pb_hash_table[hash].first);
+			p != NULL && (p->page != page || p->ub != bc);
+			p = rcu_dereference(p->next_hash));
+
+#if 0
+	if (p == NULL && !locked) {
+		printk("%s %p %p\n", __func__, page, bc);
+		spin_lock(&pb_lock);
+		for (p = rcu_dereference(pb_hash_table[hash]);
+				p ; p = rcu_dereference(p->next_hash)) {
+			printk("h %p %p %p %d %d\n", p, p->page, p->ub,
+					atomic_read(&p->refcnt),
+					p->page == page && p->ub == bc);
+		}
+		p = page->bc.page_pb;
+		if (p) do {
+			printk("p %p %p %p %d %d\n", p, p->page, p->ub,
+					atomic_read(&p->refcnt),
+					p->page == page && p->ub == bc);
+			p = list_entry(p->page_list.next,
+					struct page_beancounter, page_list);
+		} while (p != page->bc.page_pb);
+		spin_unlock(&pb_lock);
+		return -1;
+	}
+#endif
+
+	if (p == NULL)
+		return -1;
+
+	if (!locked && atomic_read(&p->refcnt) == 0)
+		return -1;
+
+	atomic_inc(&p->refcnt);
+	return 0;
+}
+
+static void __pb_add_ref(struct page *page, struct user_beancounter *bc,
+		struct page_beancounter **ppb, int hash)
+{
+	struct page_beancounter *head, *p, **hp;
+	spinlock_t *pl;
+	int shift;
+
+	p = *ppb;
+	*ppb = p->next_hash;
+
+	atomic_set(&p->refcnt, 1);
+	insert_pb(p, page, bc, hash);
+
+	pl = page_lock(page);
+	spin_lock(pl);
+	hp = page_pblist(page);
+	head = *hp;
+
+	if (head != NULL) {
+		/* 
+		 * Move the first element to the end of the list.
+		 * List head (pb_head) is set to the next entry.
+		 * Note that this code works even if head is the only element
+		 * on the list (because it's cyclic). 
+		 */
+		BUG_ON(head->pb_magic != PB_MAGIC);
+		*hp = next_page_pb(head);
+		head->shift++;
+		shift = head->shift;
+		/* 
+		 * Update user beancounter, the share of head has been changed.
+		 * Note that the shift counter is taken after increment. 
+		 */
+		dec_held_pages(head->ub, UB_PAGE_WEIGHT >> shift);
+		/* add the new page beancounter to the end of the list */
+		head = *hp;
+		list_add_tail(&p->page_list, &head->page_list);
+	} else {
+		*hp = p;
+		shift = 0;
+		INIT_LIST_HEAD(&p->page_list);
+	}
+
+	p->shift = shift;
+	spin_unlock(pl);
+	/* update user beancounter for the new page beancounter */
+	inc_held_pages(bc, UB_PAGE_WEIGHT >> shift);
+}
+
+static void pb_add_ref_ub(struct page *page, struct user_beancounter *bc,
+		struct page_beancounter **p_pb)
+{
+	int hash;
+
+	if (bc == NULL)
+		return;
+
+	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
+		return;
+
+	hash = pb_hash(bc, page);
+
+	spin_lock(&pb_hash_table[hash].lock);
+	if (__pb_dup_ref(page, bc, hash, 1))
+		__pb_add_ref(page, bc, p_pb, hash);
+	spin_unlock(&pb_hash_table[hash].lock);
+}
+
+void pb_add_ref(struct page *page, struct mm_struct *mm,
+		struct page_beancounter **p_pb)
+{
+	struct user_beancounter *bc;
+	bc = mm->mm_ub;
+	pb_add_ref_ub(page, bc, p_pb);	
+}
+
+void pb_dup_ref(struct page *page, struct mm_struct *mm,
+		struct page_beancounter **p_pb)
+{
+	int hash;
+	struct user_beancounter *bc;
+
+	bc = mm->mm_ub;
+	if (bc == NULL)
+		return;
+
+	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
+		return;
+
+	hash = pb_hash(bc, page);
+
+	if (*page_pblist(page) == NULL)
+		/*
+		 * pages like ZERO_PAGE must not be accounted in pbc
+		 * so on fork we just skip them
+		 */
+		return;
+
+	if (unlikely(*p_pb != PBC_COPY_SAME)) {
+		spin_lock(&pb_hash_table[hash].lock);
+		if (__pb_dup_ref(page, bc, hash, 1))
+			__pb_add_ref(page, bc, p_pb, hash);
+		spin_unlock(&pb_hash_table[hash].lock);
+	} else {
+		rcu_read_lock();
+		if (unlikely(__pb_dup_ref(page, bc, hash, 0)))
+			WARN_ON(1);
+		rcu_read_unlock();
+	}
+}
+
+static void pb_remove_ref_ub(struct page *page, struct user_beancounter *bc)
+{
+	int hash;
+	struct page_beancounter *p, **q, *f;
+	spinlock_t *pl;
+	int shift, shiftt;
+
+	if (bc == NULL)
+		return;
+
+	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
+		return;
+
+	hash = pb_hash(bc, page);
+
+	rcu_read_lock();
+	for (q = &pb_hash_table[hash].first, p = rcu_dereference(*q);
+			p != NULL && (p->page != page || p->ub != bc);
+			q = &p->next_hash, p = rcu_dereference(*q));
+	if (p == NULL)
+		goto out_rcu;
+
+	if (!atomic_dec_and_lock(&p->refcnt, &pb_hash_table[hash].lock))
+		/* 
+		 * More references from the same user beancounter exist.
+		 * Nothing needs to be done. 
+		 */
+		goto out_rcu;
+
+	/*
+	 * q may not be a valid obj now
+	 */
+	f = container_of(q, struct page_beancounter, next_hash);
+	if (*q != p || ((q != &pb_hash_table[hash].first) &&
+				atomic_read(&f->refcnt) == 0)) {
+		/*
+		 * no luck - pq was removed from hash while we
+		 * waited for lock and thus we should re-lookup
+		 * one
+		 */
+
+		q = &pb_hash_table[hash].first;
+		while (*q != NULL && *q != p)
+			q = &(*q)->next_hash;
+
+		if (*q == NULL) {
+			WARN_ON(1);
+			rcu_read_unlock();
+			spin_unlock(&pb_hash_table[hash].lock);
+			return;
+		}
+	}
+
+	rcu_read_unlock(); /* not required any longer */
+
+	/* remove from the hash list */
+	f = p;
+	rcu_assign_pointer(*q, p->next_hash);
+
+	pl = page_lock(page);
+	spin_lock(pl);
+	shift = p->shift;
+
+	dec_held_pages(p->ub, UB_PAGE_WEIGHT >> shift);
+
+	q = page_pblist(page);
+	if (*q == p) {
+		if (list_empty(&p->page_list)) {
+			*q = NULL;
+			goto out_free;
+		}
+
+		*q = next_page_pb(p);
+	}
+	list_del(&p->page_list);
+
+	/* Now balance the list.  Move the tail and adjust its shift counter. */
+	p = prev_page_pb(*q);
+	shiftt = p->shift;
+	*q = p;
+	p->shift--;
+
+	inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
+
+	/* 
+	 * If the shift counter of the moved beancounter is different from the
+	 * removed one's, repeat the procedure for one more tail beancounter 
+	 */
+	if (shiftt > shift) {
+		p = prev_page_pb(*q);
+		*q = p;
+		p->shift--;
+		inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
+	}
+out_free:
+	dec_pbc_count(f->ub);
+	spin_unlock(pl);
+	spin_unlock(&pb_hash_table[hash].lock);
+
+	put_beancounter_fast(f->ub);
+	pb_free_rcu(f);
+	return;
+
+out_rcu:
+	rcu_read_unlock();
+}
+
+void pb_remove_ref(struct page *page, struct mm_struct *mm)
+{
+	struct user_beancounter *bc;
+	bc = mm->mm_ub;
+	pb_remove_ref_ub(page, bc);
+}
+
+struct user_beancounter *pb_grab_page_ub(struct page *page)
+{
+	struct page_beancounter *pb;
+	struct user_beancounter *ub;
+
+	rcu_read_lock();
+	pb = *page_pblist(page);
+	ub = (pb == NULL ? ERR_PTR(-EINVAL) :
+			get_beancounter_rcu(pb->ub));
+	rcu_read_unlock();
+	return ub == NULL ? ERR_PTR(-EINVAL) : ub;
+}
+
+void __init ub_init_pbc(void)
+{
+	unsigned long hash_size, i;
+
+	pb_cachep = kmem_cache_create("page_beancounter", 
+			sizeof(struct page_beancounter), 0,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+	hash_size = num_physpages >> 2;
+	for (pb_hash_mask = 1;
+		(hash_size & pb_hash_mask) != hash_size;
+		pb_hash_mask = (pb_hash_mask << 1) + 1);
+	hash_size = pb_hash_mask + 1;
+	printk(KERN_INFO "Page beancounter hash is %lu entries.\n", hash_size);
+	pb_hash_table = vmalloc(hash_size * sizeof(struct pb_hash_chain));
+	for (i = 0; i < hash_size; i++) {
+		pb_hash_table[i].first = NULL;
+		spin_lock_init(&pb_hash_table[i].lock);
+	}
+
+	hash_size >>= 2;
+	printk(KERN_INFO "Page locks hash is %lu entries.\n", hash_size);
+	page_locks_mask = hash_size - 1;
+	page_locks = vmalloc(hash_size * sizeof(spinlock_t));
+	for (i = 0; i < hash_size; i++)
+		spin_lock_init(&page_locks[i]);
+}
+
+static inline void ub_migrate_pte_range(struct vm_area_struct *vma,
+		pmd_t *pmd, unsigned long addr, unsigned long end,
+		struct user_beancounter *old_ub,
+		struct user_beancounter *new_ub,
+		struct page_beancounter **p_pb)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+	struct page *page;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	do {
+		if (pte_none(*pte) || !pte_present(*pte))
+			continue;
+		page = vm_normal_page(vma, addr, *pte);
+		if (!page)
+			continue;
+		/* implemented only for single-mmaped pages */
+		BUG_ON(page_mapcount(page) != 1);
+
+		pb_remove_ref_ub(page, old_ub);
+		pb_add_ref_ub(page, new_ub, p_pb);
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	pte_unmap_unlock(pte - 1, ptl);
+}
+
+static inline void ub_migrate_pmd_range(struct vm_area_struct *vma,
+		pud_t *pud, unsigned long addr, unsigned long end,
+		struct user_beancounter *old_ub,
+		struct user_beancounter *new_ub,
+		struct page_beancounter **p_pb)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		ub_migrate_pte_range(vma, pmd, addr, next,
+				old_ub, new_ub, p_pb);
+	} while (pmd++, addr = next, addr != end);
+}
+
+static inline void ub_migrate_pud_range(struct vm_area_struct *vma,
+		pgd_t *pgd, unsigned long addr, unsigned long end,
+		struct user_beancounter *old_ub,
+		struct user_beancounter *new_ub,
+		struct page_beancounter **p_pb)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		ub_migrate_pmd_range(vma, pud, addr, next,
+				old_ub, new_ub, p_pb);
+	} while (pud++, addr = next, addr != end);
+}
+
+static void ub_migrate_vma_range(struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end,
+		struct user_beancounter *old_ub,
+		struct user_beancounter *new_ub,
+		struct page_beancounter **p_pb)
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	pgd = pgd_offset(vma->vm_mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		ub_migrate_pud_range(vma, pgd, addr, next,
+				old_ub, new_ub, p_pb);
+	} while (pgd++, addr = next, addr != end);
+}
+
+int ub_migrate_mm(struct mm_struct *mm, struct user_beancounter *new_ub)
+{
+	struct user_beancounter *old_ub = mm->mm_ub;
+	struct vm_area_struct *vma;
+	unsigned long size, nr_vmas = 0;
+	struct page_beancounter *p_pb = NULL;
+	unsigned long rss = get_mm_rss(mm);
+	int ret;
+
+	ret = pb_alloc_list(&p_pb, rss);
+	if (ret)
+		return ret;
+
+	/* implemented only migration into sub-beancounter */
+	BUG_ON(new_ub->parent != top_beancounter(old_ub));
+
+	down_write(&mm->mmap_sem);
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		nr_vmas++;
+		/* UB_PRIVVMPAGES charged only on top_beancounter */
+		if (vma->vm_flags & VM_LOCKED) {
+			unsigned long size;
+
+			size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+			local_irq_disable();
+
+			if (new_ub->parent != old_ub) {
+				spin_lock(&old_ub->ub_lock);
+				__uncharge_beancounter_locked(old_ub,
+						UB_LOCKEDPAGES, size);
+				spin_unlock(&old_ub->ub_lock);
+			}
+
+			spin_lock(&new_ub->ub_lock);
+			__charge_beancounter_locked(new_ub, UB_LOCKEDPAGES,
+					size, UB_FORCE);
+			spin_unlock(&new_ub->ub_lock);
+
+			local_irq_enable();
+		}
+
+		ub_migrate_vma_range(vma, vma->vm_start, vma->vm_end,
+				old_ub, new_ub, &p_pb);
+	}
+
+	size = mm->page_table_charged * CHARGE_ORDER(0);
+	size += mm->page_table_precharge * CHARGE_ORDER(0);
+	size += nr_vmas * CHARGE_SIZE(__vm_area_cachep->objuse);
+
+	uncharge_beancounter_notop(old_ub, UB_KMEMSIZE, size);
+	charge_beancounter_notop(new_ub, UB_KMEMSIZE, size);
+
+	mm->mm_ub = get_beancounter(new_ub);
+	put_beancounter(old_ub);
+
+	up_write(&mm->mmap_sem);
+	pb_free_list(&p_pb);
+	return 0;
+}
+EXPORT_SYMBOL(ub_migrate_mm);
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/ub_pages.c kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_pages.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/ub_pages.c	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_pages.c	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,556 @@
+/*
+ *  kernel/ub/ub_pages.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/virtinfo.h>
+#include <linux/module.h>
+#include <linux/shmem_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+#include <ub/proc.h>
+
+static inline unsigned long pages_in_pte_range(struct vm_area_struct *vma,
+		pmd_t *pmd, unsigned long addr, unsigned long end,
+		unsigned long *ret)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	do {
+		if (!pte_none(*pte) && pte_present(*pte))
+			(*ret)++;
+	} while (pte++, addr += PAGE_SIZE, (addr != end));
+	pte_unmap_unlock(pte - 1, ptl);
+
+	return addr;
+}
+
+static inline unsigned long pages_in_pmd_range(struct vm_area_struct *vma,
+		pud_t *pud, unsigned long addr, unsigned long end,
+		unsigned long *ret)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		next = pages_in_pte_range(vma, pmd, addr, next, ret);
+	} while (pmd++, addr = next, (addr != end));
+
+	return addr;
+}
+
+static inline unsigned long pages_in_pud_range(struct vm_area_struct *vma,
+		pgd_t *pgd, unsigned long addr, unsigned long end,
+		unsigned long *ret)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		next = pages_in_pmd_range(vma, pud, addr, next, ret);
+	} while (pud++, addr = next, (addr != end));
+
+	return addr;
+}
+
+unsigned long pages_in_vma_range(struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end)
+{
+	pgd_t *pgd;
+	unsigned long next;
+	unsigned long ret;
+
+	ret = 0;
+	BUG_ON(addr >= end);
+	pgd = pgd_offset(vma->vm_mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		next = pages_in_pud_range(vma, pgd, addr, next, &ret);
+	} while (pgd++, addr = next, (addr != end));
+	return ret;
+}
+
+void fastcall __ub_update_physpages(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_PHYSPAGES].held = ub->ub_tmpfs_respages
+		+ (max(0ll, ub->ub_held_pages) >> UB_PAGE_WEIGHT_SHIFT);
+	ub_adjust_maxheld(ub, UB_PHYSPAGES);
+}
+
+void fastcall __ub_update_oomguarpages(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_OOMGUARPAGES].held =
+		ub->ub_parms[UB_PHYSPAGES].held +
+		ub->ub_parms[UB_SWAPPAGES].held;
+	ub_adjust_maxheld(ub, UB_OOMGUARPAGES);
+}
+
+void fastcall __ub_update_privvm(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_PRIVVMPAGES].held =
+		(max(0ll, ub->ub_held_pages) >> UB_PAGE_WEIGHT_SHIFT)
+		+ ub->ub_unused_privvmpages
+		+ ub->ub_parms[UB_SHMPAGES].held;
+	ub_adjust_maxheld(ub, UB_PRIVVMPAGES);
+}
+
+static inline int __charge_privvm_locked(struct user_beancounter *ub, 
+		unsigned long s, enum ub_severity strict)
+{
+	if (__charge_beancounter_locked(ub, UB_PRIVVMPAGES, s, strict) < 0)
+		return -ENOMEM;
+
+	ub->ub_unused_privvmpages += s;
+	return 0;
+}
+
+static void __unused_privvm_dec_locked(struct user_beancounter *ub, 
+		long size)
+{
+	/* catch possible overflow */
+	if (ub->ub_unused_privvmpages < size) {
+		uncharge_warn(ub, UB_UNUSEDPRIVVM,
+				size, ub->ub_unused_privvmpages);
+		size = ub->ub_unused_privvmpages;
+	}
+	ub->ub_unused_privvmpages -= size;
+	__ub_update_privvm(ub);
+}
+
+void __ub_unused_privvm_dec(struct mm_struct *mm, long size)
+{
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return;
+
+	ub = top_beancounter(ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__unused_privvm_dec_locked(ub, size);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_unused_privvm_sub(struct mm_struct *mm,
+		struct vm_area_struct *vma, unsigned long count)
+{
+	if (VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
+		__ub_unused_privvm_dec(mm, count);
+}
+
+void __ub_unused_privvm_inc(struct mm_struct *mm, unsigned long size)
+{
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return;
+
+	ub = top_beancounter(ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_unused_privvmpages += size;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_unused_privvm_add(struct mm_struct *mm,
+		struct vm_area_struct *vma, unsigned long size)
+{
+	if (VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
+		__ub_unused_privvm_inc(mm, size);
+}
+
+int ub_protected_charge(struct mm_struct *mm, unsigned long size,
+		unsigned long newflags, struct vm_area_struct *vma)
+{
+	unsigned long flags;
+	struct file *file;
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return PRIVVM_NO_CHARGE;
+
+	flags = vma->vm_flags;
+	if (!((newflags ^ flags) & VM_WRITE))
+		return PRIVVM_NO_CHARGE;
+
+	file = vma->vm_file;
+	if (!VM_UB_PRIVATE(newflags | VM_WRITE, file))
+		return PRIVVM_NO_CHARGE;
+
+	if (flags & VM_WRITE)
+		return PRIVVM_TO_SHARED;
+
+	ub = top_beancounter(ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_privvm_locked(ub, size, UB_SOFT) < 0)
+		goto err;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return PRIVVM_TO_PRIVATE;
+
+err:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return PRIVVM_ERROR;
+}
+
+int ub_memory_charge(struct mm_struct *mm, unsigned long size,
+		unsigned vm_flags, struct file *vm_file, int sv)
+{
+	struct user_beancounter *ub, *ubl;
+	unsigned long flags;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return 0;
+
+	size >>= PAGE_SHIFT;
+	if (size > UB_MAXVALUE)
+		return -EINVAL;
+
+	BUG_ON(sv != UB_SOFT && sv != UB_HARD);
+
+	if (vm_flags & VM_LOCKED) {
+		if (charge_beancounter(ub, UB_LOCKEDPAGES, size, sv))
+			goto out_err;
+	}
+	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
+		ubl = top_beancounter(ub);
+		spin_lock_irqsave(&ubl->ub_lock, flags);
+		if (__charge_privvm_locked(ubl, size, sv))
+			goto out_private;
+		spin_unlock_irqrestore(&ubl->ub_lock, flags);
+	}
+	return 0;
+
+out_private:
+	spin_unlock_irqrestore(&ubl->ub_lock, flags);
+	if (vm_flags & VM_LOCKED)
+		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
+out_err:
+	return -ENOMEM;
+}
+
+void ub_memory_uncharge(struct mm_struct *mm, unsigned long size,
+		unsigned vm_flags, struct file *vm_file)
+{
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return;
+
+	size >>= PAGE_SHIFT;
+
+	if (vm_flags & VM_LOCKED)
+		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
+	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
+		ub = top_beancounter(ub);
+		spin_lock_irqsave(&ub->ub_lock, flags);
+		__unused_privvm_dec_locked(ub, size);
+		spin_unlock_irqrestore(&ub->ub_lock, flags);
+	}
+}
+
+int ub_locked_charge(struct mm_struct *mm, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return 0;
+
+	return charge_beancounter(ub, UB_LOCKEDPAGES,
+			size >> PAGE_SHIFT, UB_HARD);
+}
+
+void ub_locked_uncharge(struct mm_struct *mm, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return;
+
+	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
+}
+
+int ub_lockedshm_charge(struct shmem_inode_info *shi, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return 0;
+
+	return charge_beancounter(ub, UB_LOCKEDPAGES,
+			size >> PAGE_SHIFT, UB_HARD);
+}
+
+void ub_lockedshm_uncharge(struct shmem_inode_info *shi, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return;
+
+	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
+}
+
+
+static inline void do_ub_tmpfs_respages_inc(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_tmpfs_respages++;
+	__ub_update_physpages(ub);
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_tmpfs_respages_inc(struct shmem_inode_info *shi)
+{
+	struct user_beancounter *ub;
+
+	for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
+		do_ub_tmpfs_respages_inc(ub);
+}
+
+static inline void do_ub_tmpfs_respages_sub(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	/* catch possible overflow */
+	if (ub->ub_tmpfs_respages < size) {
+		uncharge_warn(ub, UB_TMPFSPAGES,
+				size, ub->ub_tmpfs_respages);
+		size = ub->ub_tmpfs_respages;
+	}
+	ub->ub_tmpfs_respages -= size;
+	/* update values what is the most interesting */
+	__ub_update_physpages(ub);
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
+		unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
+		do_ub_tmpfs_respages_sub(ub, size);
+}
+
+int ub_shmpages_charge(struct shmem_inode_info *shi, unsigned long size)
+{
+	int ret;
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return 0;
+
+	ub = top_beancounter(ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ret = __charge_beancounter_locked(ub, UB_SHMPAGES, size, UB_HARD);
+	if (ret == 0)
+		__ub_update_privvm(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return ret;
+}
+
+void ub_shmpages_uncharge(struct shmem_inode_info *shi, unsigned long size)
+{
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return;
+
+	ub = top_beancounter(ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_SHMPAGES, size);
+	__ub_update_privvm(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+#ifdef CONFIG_USER_SWAP_ACCOUNTING
+static inline void do_ub_swapentry_inc(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__charge_beancounter_locked(ub, UB_SWAPPAGES, 1, UB_FORCE);
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_swapentry_inc(struct swap_info_struct *si, pgoff_t num,
+		struct user_beancounter *ub)
+{
+	si->swap_ubs[num] = get_beancounter(ub);
+	for (; ub != NULL; ub = ub->parent)
+		do_ub_swapentry_inc(ub);
+}
+EXPORT_SYMBOL(ub_swapentry_inc);
+
+static inline void do_ub_swapentry_dec(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_SWAPPAGES, 1);
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_swapentry_dec(struct swap_info_struct *si, pgoff_t num)
+{
+	struct user_beancounter *ub, *ubp;
+
+	ub = si->swap_ubs[num];
+	si->swap_ubs[num] = NULL;
+	for (ubp = ub; ubp != NULL; ubp = ubp->parent)
+		do_ub_swapentry_dec(ubp);
+	put_beancounter(ub);
+}
+EXPORT_SYMBOL(ub_swapentry_dec);
+
+int ub_swap_init(struct swap_info_struct *si, pgoff_t num)
+{
+	struct user_beancounter **ubs;
+
+	ubs = vmalloc(num * sizeof(struct user_beancounter *));
+	if (ubs == NULL)
+		return -ENOMEM;
+
+	memset(ubs, 0, num * sizeof(struct user_beancounter *));
+	si->swap_ubs = ubs;
+	return 0;
+}
+
+void ub_swap_fini(struct swap_info_struct *si)
+{
+	if (si->swap_ubs) {
+		vfree(si->swap_ubs);
+		si->swap_ubs = NULL;
+	}
+}
+#endif
+
+static int vmguar_enough_memory(struct vnotifier_block *self,
+		unsigned long event, void *arg, int old_ret)
+{
+	struct user_beancounter *ub;
+
+	if (event != VIRTINFO_ENOUGHMEM)
+		return old_ret;
+	/*
+	 * If it's a kernel thread, don't care about it.
+	 * Added in order aufsd to run smoothly over ramfs.
+	 */
+	if (!current->mm)
+		return NOTIFY_DONE;
+
+	ub = top_beancounter(current->mm->mm_ub);
+	if (ub->ub_parms[UB_PRIVVMPAGES].held >
+			ub->ub_parms[UB_VMGUARPAGES].barrier)
+		return old_ret;
+
+	return NOTIFY_OK;
+}
+
+static struct vnotifier_block vmguar_notifier_block = {
+	.notifier_call = vmguar_enough_memory
+};
+
+static int __init init_vmguar_notifier(void)
+{
+	virtinfo_notifier_register(VITYPE_GENERAL, &vmguar_notifier_block);
+	return 0;
+}
+
+static void __exit fini_vmguar_notifier(void)
+{
+	virtinfo_notifier_unregister(VITYPE_GENERAL, &vmguar_notifier_block);
+}
+
+module_init(init_vmguar_notifier);
+module_exit(fini_vmguar_notifier);
+
+#ifdef CONFIG_PROC_FS
+static int bc_vmaux_show(struct seq_file *f, void *v)
+{
+	struct user_beancounter *ub;
+	unsigned long swap, unmap;
+	long pbcs;
+	int i;
+
+	ub = seq_beancounter(f);
+
+	pbcs = __ub_stat_get(ub, pbcs);
+	swap = unmap = 0;
+	for_each_online_cpu(i) {
+		swap += per_cpu_ptr(ub->ub_percpu, i)->swapin;
+		unmap += per_cpu_ptr(ub->ub_percpu, i)->unmap;
+		pbcs += per_cpu_ptr(ub->ub_percpu, i)->pbcs;
+	}
+
+	seq_printf(f, bc_proc_lu_fmt, ub_rnames[UB_UNUSEDPRIVVM],
+			ub->ub_unused_privvmpages);
+	seq_printf(f, bc_proc_lu_fmt, ub_rnames[UB_TMPFSPAGES],
+			ub->ub_tmpfs_respages);
+	seq_printf(f, bc_proc_lu_fmt, "rss", max(0l, pbcs));
+
+	seq_printf(f, bc_proc_lu_fmt, "swapin", swap);
+	seq_printf(f, bc_proc_lu_fmt, "unmap", unmap);
+	seq_printf(f, bc_proc_lu_fmt, "dmsize", ub->ub_mem_size);
+	return 0;
+}
+static struct bc_proc_entry bc_vmaux_entry = {
+	.name = "vmaux",
+	.u.show = bc_vmaux_show,
+};
+
+static int __init bc_vmaux_init(void)
+{
+	bc_register_proc_entry(&bc_vmaux_entry);
+	return 0;
+}
+
+late_initcall(bc_vmaux_init);
+#endif
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/ub_proc.c kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_proc.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/ub_proc.c	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_proc.c	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,780 @@
+/*
+ *  kernel/ub/proc.c 
+ *
+ *  Copyright (C) 2006 OpenVZ. SWsoft Inc.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_page.h>
+#include <ub/proc.h>
+
+#include <linux/ve_proto.h>
+
+/* Generic output formats */
+#if BITS_PER_LONG == 32
+const char *bc_proc_lu_fmt = "\t%-20s %10lu\n";
+const char *bc_proc_lu_lfmt = "\t%-20s %21lu\n";
+const char *bc_proc_llu_fmt = "\t%-20s %21llu\n";
+const char *bc_proc_lu_lu_fmt = "\t%-20s %10lu %10lu\n";
+#else
+const char *bc_proc_lu_fmt = "\t%-20s %21lu\n";
+const char *bc_proc_lu_lfmt = "\t%-20s %21lu\n";
+const char *bc_proc_llu_fmt = "\t%-20s %21llu\n";
+const char *bc_proc_lu_lu_fmt = "\t%-20s %21lu %21lu\n";
+#endif
+
+#if BITS_PER_LONG == 32
+static const char *head_fmt = "%10s  %-12s %10s %10s %10s %10s %10s\n";
+static const char *res_fmt = "%10s  %-12s %10lu %10lu %10lu %10lu %10lu\n";
+#else
+static const char *head_fmt = "%10s  %-12s %20s %20s %20s %20s %20s\n";
+static const char *res_fmt = "%10s  %-12s %20lu %20lu %20lu %20lu %20lu\n";
+#endif
+
+static void ub_show_res(struct seq_file *f, struct user_beancounter *ub,
+		int r, unsigned long held, int show_uid)
+{
+	int len;
+	char ub_uid[64];
+
+	if (show_uid && r == 0) {
+		len = print_ub_uid(ub, ub_uid, sizeof(ub_uid) - 2);
+		ub_uid[len] = ':';
+		ub_uid[len + 1] = '\0';
+	} else
+		strcpy(ub_uid, "");
+
+	seq_printf(f, res_fmt, ub_uid, ub_rnames[r],
+			held,
+			ub->ub_parms[r].maxheld,
+			ub->ub_parms[r].barrier,
+			ub->ub_parms[r].limit,
+			ub->ub_parms[r].failcnt);
+}
+
+static void __show_resources(struct seq_file *f, struct user_beancounter *ub,
+		int show_uid)
+{
+	unsigned long held[UB_RESOURCES];
+	int i;
+
+	ub_held_snapshot(ub, held);
+
+	for (i = 0; i < UB_RESOURCES_COMPAT; i++)
+		if (strcmp(ub_rnames[i], "dummy") != 0)
+			ub_show_res(f, ub, i, held[i], show_uid);
+
+	for (i = UB_RESOURCES_COMPAT; i < UB_RESOURCES; i++)
+		ub_show_res(f, ub, i, held[i], show_uid);
+}
+
+static int bc_resources_show(struct seq_file *f, void *v)
+{
+	__show_resources(f, seq_beancounter(f), 0);
+	return 0;
+}
+
+static struct bc_proc_entry bc_resources_entry = {
+	.name = "resources",
+	.u.show = bc_resources_show,
+};
+
+static int bc_debug_show(struct seq_file *f, void *v)
+{
+	struct user_beancounter *ub;
+	char buf[64];
+	int cpu, cnt;
+
+	ub = seq_beancounter(f);
+	print_ub_uid(ub, buf, sizeof(buf));
+	seq_printf(f, "uid: %s\n", buf);
+	seq_printf(f, "ref: %d\n", atomic_read(&ub->ub_refcount));
+
+	cnt = 0;
+	for_each_possible_cpu(cpu)
+		cnt += per_cpu_ptr(ub->ub_percpu, cpu)->fast_refcount;
+	seq_printf(f, "fast_ref: %d\n", atomic_read(&ub->ub_fastcount) + cnt);
+
+	seq_printf(f, "bc: %p\n", ub);
+	seq_printf(f, "par: %p\n", ub->parent);
+	seq_printf(f, "priv: %p\n", ub->private_data);
+	return 0;
+}
+
+static struct bc_proc_entry bc_debug_entry = {
+	.name = "debug",
+	.u.show = bc_debug_show,
+};
+
+extern int meminfo_read_proc_ub(struct user_beancounter *ub, char *page);
+
+static int bc_meminfo_show(struct seq_file *f, void *v)
+{
+	int err;
+	struct user_beancounter *ub;
+	struct ve_struct *ve, *old;
+	char *page;
+
+	err = 0; /* empty file for stopped CT */
+	ub = seq_beancounter(f);
+	ve = get_ve_by_id(top_beancounter(ub)->ub_uid);
+	if (ve == NULL)
+		goto out;
+
+	err = -ENOMEM;
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (page == NULL)
+		goto out_put;
+
+	old = set_exec_env(ve);
+	err = meminfo_read_proc_ub(ub, page);
+	set_exec_env(old);
+	if (err < 0)
+		goto out_free;
+
+	seq_printf(f, page);
+	err = 0;
+
+out_free:
+	free_page((unsigned long)page);
+out_put:
+	put_ve(ve);
+out:
+	return err;
+}
+
+static struct bc_proc_entry bc_meminfo_entry = {
+	.name = "meminfo",
+	.u.show = bc_meminfo_show,
+};
+
+static void bc_count_slab_show_one(const char *name, int count, void *v)
+{
+	seq_printf((struct seq_file *)v, "%s: %u\n", name, count);
+}
+
+static int bc_count_slab_show(struct seq_file *f, void *v)
+{
+	struct user_beancounter *ub;
+
+	ub = seq_beancounter(f);
+	slab_walk_ub(ub, bc_count_slab_show_one, f);
+	return 0;
+}
+
+static struct bc_proc_entry bc_count_slab_entry = {
+	.name = "slabinfo",
+	.u.show = bc_count_slab_show
+};
+
+static int ub_show(struct seq_file *f, void *v)
+{
+	struct user_beancounter *ub = v;
+	unsigned long held[UB_RESOURCES];
+	int i;
+
+	ub_held_snapshot(ub, held);
+
+	for (i = 0; i < UB_RESOURCES_COMPAT; i++)
+		ub_show_res(f, ub, i, held[i], 1);
+	return 0;
+}
+
+static int res_show(struct seq_file *f, void *v)
+{
+	__show_resources(f, (struct user_beancounter *)v, 1);
+	return 0;
+}
+
+static int ub_accessible(struct user_beancounter *exec,
+		struct user_beancounter *target)
+{
+	struct user_beancounter *p, *q;
+
+	p = top_beancounter(exec);
+	q = top_beancounter(target);
+
+	return (p == get_ub0() || p == q);
+}
+
+static void ub_show_header(struct seq_file *f)
+{
+	seq_printf(f, "Version: 2.5\n");
+	seq_printf(f, head_fmt, "uid", "resource",
+			"held", "maxheld", "barrier", "limit", "failcnt");
+}
+
+static void *ub_start(struct seq_file *f, loff_t *ppos)
+{
+	struct user_beancounter *ub;
+	struct user_beancounter *exec_ub; 
+	unsigned long pos;
+
+	pos = *ppos;
+	if (pos == 0)
+		ub_show_header(f);
+
+	exec_ub = get_exec_ub();
+
+	rcu_read_lock();
+	for_each_beancounter(ub) {
+		if (ub->parent != NULL)
+			continue;
+		if (!ub_accessible(exec_ub, ub))
+			continue;
+		if (pos-- == 0)
+			return ub;
+	}
+	return NULL;
+}
+
+static void *ub_next(struct seq_file *f, void *v, loff_t *ppos)
+{
+	struct user_beancounter *ub;
+	struct list_head *entry;
+	struct user_beancounter *exec_ub;
+
+	exec_ub = get_exec_ub();
+	ub = (struct user_beancounter *)v;
+
+	entry = &ub->ub_list;
+
+	list_for_each_continue_rcu(entry, &ub_list_head) {
+		ub = list_entry(entry, struct user_beancounter, ub_list);
+		if (ub->parent != NULL)
+			continue;
+		if (!ub_accessible(exec_ub, ub))
+			continue;
+
+		(*ppos)++;
+		return ub;
+	}
+	return NULL;
+}
+
+static void ub_stop(struct seq_file *f, void *v)
+{
+	rcu_read_unlock();
+}
+
+static struct seq_operations ub_seq_ops = {
+	.start = ub_start,
+	.next  = ub_next,
+	.stop  = ub_stop,
+	.show  = ub_show,
+};
+
+static int ub_open(struct inode *inode, struct file *filp)
+{
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return -EACCES;
+
+	return seq_open(filp, &ub_seq_ops);
+}
+
+static struct file_operations ub_file_operations = {
+	.open		= ub_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct seq_operations res_seq_ops = {
+	.start = ub_start,
+	.next  = ub_next,
+	.stop  = ub_stop,
+	.show  = res_show,
+};
+
+static int res_open(struct inode *inode, struct file *filp)
+{
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return -EACCES;
+
+	return seq_open(filp, &res_seq_ops);
+}
+
+static struct file_operations resources_operations = {
+	.open		= res_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct bc_proc_entry bc_all_resources_entry = {
+	.name = "resources",
+	.u.fops = &resources_operations,
+};
+
+/*
+ * Generic showing stuff
+ */
+
+static int cookies, num_entries;
+static struct bc_proc_entry *bc_entries __read_mostly;
+static struct bc_proc_entry *bc_root_entries __read_mostly;
+static DEFINE_SPINLOCK(bc_entries_lock);
+static struct proc_dir_entry *bc_proc_root;
+
+void bc_register_proc_entry(struct bc_proc_entry *e)
+{
+	spin_lock(&bc_entries_lock);
+	e->cookie = ++cookies;
+	e->next = bc_entries;
+	bc_entries = e;
+	num_entries++;
+	spin_unlock(&bc_entries_lock);
+}
+
+EXPORT_SYMBOL(bc_register_proc_entry);
+
+void bc_register_proc_root_entry(struct bc_proc_entry *e)
+{
+	spin_lock(&bc_entries_lock);
+	e->cookie = ++cookies;
+	e->next = bc_root_entries;
+	bc_root_entries = e;
+	bc_proc_root->nlink++;
+	spin_unlock(&bc_entries_lock);
+}
+
+EXPORT_SYMBOL(bc_register_proc_root_entry);
+
+/*
+ * small helpers
+ */
+
+static inline unsigned long bc_make_ino(struct user_beancounter *ub)
+{
+	unsigned long ret;
+
+	ret = 0xbc000000;
+	if (ub->parent)
+		ret |= ((ub->parent->ub_uid + 1) << 4);
+	ret |= (ub->ub_uid + 1);
+	return ret;
+}
+
+static inline unsigned long bc_make_file_ino(struct bc_proc_entry *de)
+{
+	return 0xbe000000 + de->cookie;
+}
+
+static int bc_d_delete(struct dentry *d)
+{
+	return 1;
+}
+
+static void bc_d_release(struct dentry *d)
+{
+	put_beancounter((struct user_beancounter *)d->d_fsdata);
+}
+
+static struct inode_operations bc_entry_iops;
+static struct file_operations bc_entry_fops;
+static struct dentry_operations bc_dentry_ops = {
+	.d_delete = bc_d_delete,
+	.d_release = bc_d_release,
+};
+
+/*
+ * common directory operations' helpers
+ */
+
+static int bc_readdir(struct file *file, filldir_t filler, void *data,
+		struct user_beancounter *parent)
+{
+	int err = 0;
+	loff_t pos, filled;
+	struct user_beancounter *ub, *prev;
+	struct bc_proc_entry *pde;
+
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return -EPERM;
+
+	pos = file->f_pos;
+	if (pos == 0) {
+		err = (*filler)(data, ".", 1, pos,
+				file->f_dentry->d_inode->i_ino, DT_DIR);
+		if (err < 0) {
+			err = 0;
+			goto out;
+		}
+		pos++;
+	}
+
+	if (pos == 1) {
+		err = (*filler)(data, "..", 2, pos,
+				parent_ino(file->f_dentry), DT_DIR);
+		if (err < 0) {
+			err = 0;
+			goto out;
+		}
+		pos++;
+	}
+
+	filled = 2;
+	for (pde = (parent == NULL ? bc_root_entries : bc_entries);
+			pde != NULL; pde = pde->next) {
+		if (filled++ < pos)
+			continue;
+
+		err = (*filler)(data, pde->name, strlen(pde->name), pos,
+				bc_make_file_ino(pde), DT_REG);
+		if (err < 0) {
+			err = 0;
+			goto out;
+		}
+		pos++;
+	}
+
+	rcu_read_lock();
+	prev = NULL;
+	ub = list_entry(&ub_list_head, struct user_beancounter, ub_list);
+	while (1) {
+		int len;
+		unsigned long ino;
+		char buf[64];
+
+		ub = list_entry(rcu_dereference(ub->ub_list.next),
+				struct user_beancounter, ub_list);
+		if (&ub->ub_list == &ub_list_head)
+			break;
+
+		if (ub->parent != parent)
+			continue;
+
+		if (filled++ < pos)
+			continue;
+
+		if (!get_beancounter_rcu(ub))
+			continue;
+
+		rcu_read_unlock();
+		put_beancounter(prev);
+
+		len = print_ub_uid(ub, buf, sizeof(buf));
+		ino = bc_make_ino(ub);
+
+		err = (*filler)(data, buf, len, pos, ino, DT_DIR);
+		if (err < 0) {
+			err = 0;
+			put_beancounter(ub);
+			goto out;
+		}
+
+		rcu_read_lock();
+		prev = ub;
+		pos++;
+	}
+	rcu_read_unlock();
+	put_beancounter(prev);
+out:
+	file->f_pos = pos;
+	return err;
+}
+
+static int bc_looktest(struct inode *ino, void *data)
+{
+	return ino->i_op == &bc_entry_iops && ino->i_private == data;
+}
+
+static int bc_lookset(struct inode *ino, void *data)
+{
+	struct user_beancounter *ub;
+
+	ub = (struct user_beancounter *)data;
+	ino->i_private = data;
+	ino->i_ino = bc_make_ino(ub);
+	ino->i_fop = &bc_entry_fops;
+	ino->i_op = &bc_entry_iops;
+	ino->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
+	/* subbeancounters are not included, but who cares? */
+	ino->i_nlink = num_entries + 2;
+	ino->i_gid = 0;
+	ino->i_uid = 0;
+	return 0;
+}
+
+static struct dentry *bc_lookup(struct user_beancounter *ub, struct inode *dir,
+		struct dentry *dentry)
+{
+	struct inode *ino;
+
+	ino = iget5_locked(dir->i_sb, ub->ub_uid, bc_looktest, bc_lookset, ub);
+	if (ino == NULL)
+		goto out_put;
+
+	unlock_new_inode(ino);
+	dentry->d_op = &bc_dentry_ops;
+	dentry->d_fsdata = ub;
+	d_add(dentry, ino);
+	return NULL;
+
+out_put:
+	put_beancounter(ub);
+	return ERR_PTR(-ENOENT);
+}
+
+/*
+ * files (bc_proc_entry) manipulations
+ */
+
+static struct dentry *bc_lookup_file(struct inode *dir,
+		struct dentry *dentry, struct bc_proc_entry *root,
+		int (*test)(struct inode *, void *),
+		int (*set)(struct inode *, void *))
+{
+	struct bc_proc_entry *pde;
+	struct inode *ino;
+
+	for (pde = root; pde != NULL; pde = pde->next)
+		if (strcmp(pde->name, dentry->d_name.name) == 0)
+			break;
+
+	if (pde == NULL)
+		return ERR_PTR(-ESRCH);
+
+	ino = iget5_locked(dir->i_sb, pde->cookie, test, set, pde);
+	if (ino == NULL)
+		return ERR_PTR(-ENOENT);
+
+	unlock_new_inode(ino);
+	dentry->d_op = &bc_dentry_ops;
+	d_add(dentry, ino);
+	return NULL;
+}
+
+static int bc_file_open(struct inode *ino, struct file *filp)
+{
+	struct bc_proc_entry *de;
+	struct user_beancounter *ub;
+
+	de = (struct bc_proc_entry *)ino->i_private;
+	ub = (struct user_beancounter *)filp->f_dentry->d_parent->d_fsdata;
+	BUG_ON(ub->ub_magic != UB_MAGIC);
+
+	/*
+	 * ub can't disappear: we hold d_parent, he holds the beancounter
+	 */
+	return single_open(filp, de->u.show, ub);
+}
+
+static struct file_operations bc_file_ops = {
+	.open		= bc_file_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int bc_looktest_entry(struct inode *ino, void *data)
+{
+	return ino->i_fop == &bc_file_ops && ino->i_private == data;
+}
+
+static int bc_lookset_entry(struct inode *ino, void *data)
+{
+	struct bc_proc_entry *de;
+
+	de = (struct bc_proc_entry *)data;
+	ino->i_private = data;
+	ino->i_ino = bc_make_file_ino(de);
+	ino->i_fop = &bc_file_ops,
+	ino->i_mode = S_IFREG | S_IRUSR;
+	ino->i_nlink = 1;
+	ino->i_gid = 0;
+	ino->i_uid = 0;
+	return 0;
+}
+
+static inline struct dentry *bc_lookup_files(struct inode *dir,
+		struct dentry *de)
+{
+	return bc_lookup_file(dir, de, bc_entries,
+			bc_looktest_entry, bc_lookset_entry);
+}
+
+static int bc_looktest_root_entry(struct inode *ino, void *data)
+{
+	struct bc_proc_entry *de;
+
+	de = (struct bc_proc_entry *)data;
+	return ino->i_fop == de->u.fops && ino->i_private == data;
+}
+
+static int bc_lookset_root_entry(struct inode *ino, void *data)
+{
+	struct bc_proc_entry *de;
+
+	de = (struct bc_proc_entry *)data;
+	ino->i_private = data;
+	ino->i_ino = bc_make_file_ino(de);
+	ino->i_fop = de->u.fops;
+	ino->i_mode = S_IFREG | S_IRUSR;
+	ino->i_nlink = 1;
+	ino->i_gid = 0;
+	ino->i_uid = 0;
+	return 0;
+}
+
+static inline struct dentry *bc_lookup_root_files(struct inode *dir,
+		struct dentry *de)
+{
+	return bc_lookup_file(dir, de, bc_root_entries,
+			bc_looktest_root_entry, bc_lookset_root_entry);
+}
+
+/*
+ * /proc/bc/.../<id> directory operations
+ */
+
+static int bc_entry_readdir(struct file *file, void *data, filldir_t filler)
+{
+	return bc_readdir(file, filler, data,
+			(struct user_beancounter *)file->f_dentry->d_fsdata);
+}
+
+static struct dentry *bc_entry_lookup(struct inode *dir, struct dentry *dentry,
+		struct nameidata *nd)
+{
+	int id;
+	char *end;
+	struct user_beancounter *par, *ub;
+	struct dentry *de;
+
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return ERR_PTR(-EPERM);
+
+	de = bc_lookup_files(dir, dentry);
+	if (de != ERR_PTR(-ESRCH))
+		return de;
+
+	id = simple_strtol(dentry->d_name.name, &end, 10);
+	if (*end != '.')
+		return ERR_PTR(-ENOENT);
+
+	par = (struct user_beancounter *)dir->i_private;
+	if (par->ub_uid != id)
+		return ERR_PTR(-ENOENT);
+
+	id = simple_strtol(end + 1, &end, 10);
+	if (*end != '\0')
+		return ERR_PTR(-ENOENT);
+
+	ub = get_subbeancounter_byid(par, id, 0);
+	if (ub == NULL)
+		return ERR_PTR(-ENOENT);
+
+	return bc_lookup(ub, dir, dentry);
+}
+
+static int bc_entry_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		struct kstat *stat)
+{
+	struct user_beancounter *ub;
+
+	generic_fillattr(dentry->d_inode, stat);
+	ub = (struct user_beancounter *)dentry->d_fsdata;
+	stat->nlink = ub->ub_childs + 2;
+	return 0;
+}
+
+static struct file_operations bc_entry_fops = {
+	.read = generic_read_dir,
+	.readdir = bc_entry_readdir,
+};
+
+static struct inode_operations bc_entry_iops = {
+	.lookup = bc_entry_lookup,
+	.getattr = bc_entry_getattr,
+};
+
+/*
+ * /proc/bc directory operations
+ */
+
+static int bc_root_readdir(struct file *file, void *data, filldir_t filler)
+{
+	return bc_readdir(file, filler, data, NULL);
+}
+
+static struct dentry *bc_root_lookup(struct inode *dir, struct dentry *dentry,
+		struct nameidata *nd)
+{
+	int id;
+	char *end;
+	struct user_beancounter *ub;
+	struct dentry *de;
+
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return ERR_PTR(-EPERM);
+
+	de = bc_lookup_root_files(dir, dentry);
+	if (de != ERR_PTR(-ESRCH))
+		return de;
+
+	id = simple_strtol(dentry->d_name.name, &end, 10);
+	if (*end != '\0')
+		return ERR_PTR(-ENOENT);
+
+	ub = get_beancounter_byuid(id, 0);
+	if (ub == NULL)
+		return ERR_PTR(-ENOENT);
+
+	return bc_lookup(ub, dir, dentry);
+}
+
+static int bc_root_getattr(struct vfsmount *mnt, struct dentry *dentry,
+	struct kstat *stat)
+{
+	generic_fillattr(dentry->d_inode, stat);
+	stat->nlink = ub_count + 2;
+	return 0;
+}
+
+static struct file_operations bc_root_fops = {
+	.read = generic_read_dir,
+	.readdir = bc_root_readdir,
+};
+
+static struct inode_operations bc_root_iops = {
+	.lookup = bc_root_lookup,
+	.getattr = bc_root_getattr,
+};
+
+static int __init ub_init_proc(void)
+{
+	struct proc_dir_entry *entry;
+
+	bc_proc_root = create_proc_entry("bc",
+			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
+	if (bc_proc_root == NULL)
+		panic("Can't create /proc/bc entry");
+
+	bc_proc_root->proc_fops = &bc_root_fops;
+	bc_proc_root->proc_iops = &bc_root_iops;
+
+	bc_register_proc_entry(&bc_resources_entry);
+#ifdef CONFIG_UBC_DEBUG
+	bc_register_proc_entry(&bc_debug_entry);
+#endif
+	bc_register_proc_entry(&bc_count_slab_entry);
+	bc_register_proc_root_entry(&bc_all_resources_entry);
+
+	bc_register_proc_entry(&bc_meminfo_entry);
+
+	entry = create_proc_glob_entry("user_beancounters", S_IRUSR, NULL);
+	entry->proc_fops = &ub_file_operations;
+	return 0;
+}
+
+core_initcall(ub_init_proc);
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/ub_stat.c kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_stat.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/ub_stat.c	2017-01-13 08:40:17.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_stat.c	2017-01-13 08:40:17.000000000 -0500
@@ -0,0 +1,453 @@
+/*
+ *  kernel/ub/ub_stat.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/timer.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/suspend.h>
+
+#include <asm/uaccess.h>
+#include <asm/param.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_stat.h>
+
+static spinlock_t ubs_notify_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(ubs_notify_list);
+static long ubs_min_interval;
+static ubstattime_t ubs_start_time, ubs_end_time;
+static struct timer_list ubs_timer;
+
+static int ubstat_get_list(void __user *buf, long size)
+{
+	int retval;
+	struct user_beancounter *ub, *ubp;
+	long *page, *ptr, *end;
+	int len;
+
+	page = (long *)__get_free_page(GFP_KERNEL);
+	if (page == NULL)
+		return -ENOMEM;
+
+	retval = 0;
+	ubp = NULL;
+	ptr = page;
+	end = page + PAGE_SIZE / sizeof(*ptr);
+
+	spin_lock_irq(&ub_hash_lock);
+	for_each_beancounter(ub) {
+		if (ub->parent != NULL)
+			continue;
+		*ptr++ = ub->ub_uid;
+		if (ptr != end)
+			continue;
+
+		get_beancounter(ub);
+		spin_unlock_irq(&ub_hash_lock);
+
+		put_beancounter(ubp);
+		ubp = ub;
+
+		len = min_t(long, (ptr - page) * sizeof(*ptr), size);
+		if (copy_to_user(buf, page, len)) {
+			retval = -EFAULT;
+			goto out_put;
+		}
+		retval += len;
+		if (len < PAGE_SIZE)
+			goto out_put;
+		buf += len;
+		size -= len;
+
+		ptr = page;
+		end = page + PAGE_SIZE / sizeof(*ptr);
+
+		spin_lock_irq(&ub_hash_lock);
+	}
+	spin_unlock_irq(&ub_hash_lock);
+
+	put_beancounter(ubp);
+	size = min_t(long, (ptr - page) * sizeof(*ptr), size);
+	if (size > 0 && copy_to_user(buf, page, size)) {
+		retval = -EFAULT;
+		goto out_put;
+	}
+	retval += size;
+
+out_put:
+	put_beancounter(ubp);
+	free_page((unsigned long)page);
+	return retval;
+}
+
+static int ubstat_gettime(void __user *buf, long size)
+{
+	ubgettime_t data;
+	int retval;
+
+	spin_lock(&ubs_notify_lock);
+	data.start_time = ubs_start_time;
+	data.end_time = ubs_end_time;
+	data.cur_time = ubs_start_time + (jiffies - ubs_start_time * HZ) / HZ;
+	spin_unlock(&ubs_notify_lock);
+
+	retval = min_t(long, sizeof(data), size);
+	if (copy_to_user(buf, &data, retval))
+		retval = -EFAULT;
+	return retval;
+}
+
+static int ubstat_do_read_one(struct user_beancounter *ub, int res, void *kbuf)
+{
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparm_t	param[1];
+	} *data;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+
+	data->param[0].maxheld = ub->ub_store[res].maxheld;
+	data->param[0].failcnt = ub->ub_store[res].failcnt;
+
+	return sizeof(*data);
+}
+
+static int ubstat_do_read_all(struct user_beancounter *ub, void *kbuf, int size)
+{
+	int wrote;
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparm_t	param[UB_RESOURCES];
+	} *data;
+	int resource;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+	wrote = sizeof(data->start_time) + sizeof(data->end_time);
+
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		if (size < wrote + sizeof(data->param[resource]))
+			break;
+		data->param[resource].maxheld = ub->ub_store[resource].maxheld;
+		data->param[resource].failcnt = ub->ub_store[resource].failcnt;
+		wrote += sizeof(data->param[resource]); 
+	}
+
+	return wrote;
+}
+
+static int ubstat_do_read_full(struct user_beancounter *ub, void *kbuf,
+		int size)
+{
+	int wrote;
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparmf_t	param[UB_RESOURCES];
+	} *data;
+	int resource;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+	wrote = sizeof(data->start_time) + sizeof(data->end_time);
+
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		if (size < wrote + sizeof(data->param[resource]))
+			break;
+		/* The beginning of ubstatparmf_t matches struct ubparm. */
+		memcpy(&data->param[resource], &ub->ub_store[resource],
+				sizeof(ub->ub_store[resource]));
+		data->param[resource].__unused1 = 0;
+		data->param[resource].__unused2 = 0;
+		wrote += sizeof(data->param[resource]);
+	}
+	return wrote;
+}
+
+static int ubstat_get_stat(struct user_beancounter *ub, long cmd,
+		void __user *buf, long size)
+{
+	void *kbuf;
+	int retval;
+
+	kbuf = (void *)__get_free_page(GFP_KERNEL);
+	if (kbuf == NULL)
+		return -ENOMEM;
+
+	spin_lock(&ubs_notify_lock);
+	switch (UBSTAT_CMD(cmd)) {
+		case UBSTAT_READ_ONE:
+			retval = -EINVAL;
+			if (UBSTAT_PARMID(cmd) >= UB_RESOURCES)
+				break;
+			retval = ubstat_do_read_one(ub,
+					UBSTAT_PARMID(cmd), kbuf);
+			break;
+		case UBSTAT_READ_ALL:
+			retval = ubstat_do_read_all(ub, kbuf, PAGE_SIZE);
+			break;
+		case UBSTAT_READ_FULL:
+			retval = ubstat_do_read_full(ub, kbuf, PAGE_SIZE);
+			break;
+		default:
+			retval = -EINVAL;
+	}
+	spin_unlock(&ubs_notify_lock);
+
+	if (retval > 0) {
+		retval = min_t(long, retval, size);
+		if (copy_to_user(buf, kbuf, retval))
+			retval = -EFAULT;
+	}
+
+	free_page((unsigned long)kbuf);
+	return retval;
+}
+
+static int ubstat_handle_notifrq(ubnotifrq_t *req)
+{
+	int retval;
+	struct ub_stat_notify *new_notify;
+	struct list_head *entry;
+	struct task_struct *tsk_to_free;
+
+	new_notify = kmalloc(sizeof(new_notify), GFP_KERNEL);
+	if (new_notify == NULL)
+		return -ENOMEM;
+
+	tsk_to_free = NULL;
+	INIT_LIST_HEAD(&new_notify->list);
+
+	spin_lock(&ubs_notify_lock);
+	list_for_each(entry, &ubs_notify_list) {
+		struct ub_stat_notify *notify;
+
+		notify = list_entry(entry, struct ub_stat_notify, list);
+		if (notify->task == current) {
+			kfree(new_notify);
+			new_notify = notify;
+			break;
+		}
+	}
+
+	retval = -EINVAL;
+	if (req->maxinterval < 1)
+		goto out_unlock;
+	if (req->maxinterval > TIME_MAX_SEC)
+		req->maxinterval = TIME_MAX_SEC;
+	if (req->maxinterval < ubs_min_interval) {
+		unsigned long dif;
+
+		ubs_min_interval = req->maxinterval;
+		dif = (ubs_timer.expires - jiffies + HZ - 1) / HZ;
+		if (dif > req->maxinterval)
+			mod_timer(&ubs_timer,
+					ubs_timer.expires -
+					(dif - req->maxinterval) * HZ);
+	}
+
+	if (entry != &ubs_notify_list) {
+		list_del(&new_notify->list);
+		tsk_to_free = new_notify->task;
+	}
+	if (req->signum) {
+		new_notify->task = current;
+		get_task_struct(new_notify->task);
+		new_notify->signum = req->signum;
+		list_add(&new_notify->list, &ubs_notify_list);
+	} else
+		kfree(new_notify);
+	retval = 0;
+out_unlock:
+	spin_unlock(&ubs_notify_lock);
+	if (tsk_to_free != NULL)
+		put_task_struct(tsk_to_free);
+	return retval;
+}
+
+/*
+ * former sys_ubstat
+ */
+long do_ubstat(int func, unsigned long arg1, unsigned long arg2,
+		void __user *buf, long size)
+{
+	int retval;
+	struct user_beancounter *ub;
+
+	if (func == UBSTAT_UBPARMNUM)
+		return UB_RESOURCES;
+	if (func == UBSTAT_UBLIST)
+		return ubstat_get_list(buf, size);
+	if (!(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)))
+		return -EPERM;
+
+	if (func == UBSTAT_GETTIME) {
+		retval = ubstat_gettime(buf, size);
+		goto notify;
+	}
+
+	ub = get_exec_ub();
+	if (ub != NULL && ub->ub_uid == arg1)
+		get_beancounter(ub);
+	else /* FIXME must be if (ve_is_super) */
+		ub = get_beancounter_byuid(arg1, 0);
+
+	if (ub == NULL)
+		return -ESRCH;
+
+	retval = ubstat_get_stat(ub, func, buf, size);
+	put_beancounter(ub);
+notify:
+	/* Handle request for notification */
+	if (retval >= 0) {
+		ubnotifrq_t notifrq;
+		int err;
+
+		err = -EFAULT;
+		if (!copy_from_user(&notifrq, (void __user *)arg2,
+					sizeof(notifrq)))
+			err = ubstat_handle_notifrq(&notifrq);
+		if (err)
+			retval = err;
+	}
+
+	return retval;
+}
+
+static void ubstat_save_onestat(struct user_beancounter *ub)
+{
+	int resource;
+
+	/* called with local irq disabled */
+	spin_lock(&ub->ub_lock);
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		memcpy(&ub->ub_store[resource], &ub->ub_parms[resource],
+			sizeof(struct ubparm));
+		ub->ub_parms[resource].minheld = 
+			ub->ub_parms[resource].maxheld =
+			ub->ub_parms[resource].held;
+	}
+	spin_unlock(&ub->ub_lock);
+}
+
+static void ubstat_save_statistics(void)
+{
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	local_irq_save(flags);
+	for_each_beancounter (ub)
+		ubstat_save_onestat(ub);
+	local_irq_restore(flags);
+}
+
+static void ubstatd_timeout(unsigned long __data)
+{
+	struct task_struct *p;
+
+	p = (struct task_struct *) __data;
+	wake_up_process(p);
+}
+
+/*
+ * Safe wrapper for send_sig. It prevents a race with release_task
+ * for sighand.
+ * Should be called under tasklist_lock.
+ */
+static void task_send_sig(struct ub_stat_notify *notify)
+{
+	if (likely(notify->task->sighand != NULL))
+		send_sig(notify->signum, notify->task, 1);
+}
+
+static inline void do_notifies(void)
+{
+	LIST_HEAD(notif_free_list);
+	struct ub_stat_notify *notify;
+	struct ub_stat_notify *tmp;
+
+	spin_lock(&ubs_notify_lock);
+	ubs_start_time = ubs_end_time;
+	/*
+	 * the expression below relies on time being unsigned long and
+	 * arithmetic promotion rules
+	 */
+	ubs_end_time += (ubs_timer.expires - ubs_start_time * HZ) / HZ;
+	mod_timer(&ubs_timer, ubs_timer.expires + ubs_min_interval * HZ);
+	ubs_min_interval = TIME_MAX_SEC;
+	/* save statistics accumulated for the interval */
+	ubstat_save_statistics();
+	/* send signals */
+	read_lock(&tasklist_lock);
+	while (!list_empty(&ubs_notify_list)) {
+		notify = list_entry(ubs_notify_list.next,
+				struct ub_stat_notify, list);
+		task_send_sig(notify);
+		list_del(&notify->list);
+		list_add(&notify->list, &notif_free_list);
+	}
+	read_unlock(&tasklist_lock);
+	spin_unlock(&ubs_notify_lock);
+
+	list_for_each_entry_safe(notify, tmp, &notif_free_list, list) {
+		put_task_struct(notify->task);
+		kfree(notify);
+	}
+}
+
+/*
+ * Kernel thread
+ */
+static int ubstatd(void *unused)
+{
+	/* daemonize call will take care of signals */
+	daemonize("ubstatd");
+
+	ubs_timer.data = (unsigned long)current;
+	ubs_timer.function = ubstatd_timeout;
+	add_timer(&ubs_timer);
+
+	while (1) {
+		set_task_state(current, TASK_INTERRUPTIBLE);
+		if (time_after(ubs_timer.expires, jiffies)) {
+			schedule();
+			try_to_freeze();
+			continue;
+		}
+
+		__set_task_state(current, TASK_RUNNING);
+		do_notifies();
+	}
+	return 0;
+}
+
+static int __init ubstatd_init(void)
+{
+	init_timer(&ubs_timer);
+	ubs_timer.expires = TIME_MAX_JIF;
+	ubs_min_interval = TIME_MAX_SEC;
+	ubs_start_time = ubs_end_time = 0;
+
+	kernel_thread(ubstatd, NULL, 0);
+	return 0;
+}
+
+module_init(ubstatd_init);
diff -upr kernel-2.6.18-417.el5.orig/kernel/ub/ub_sys.c kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_sys.c
--- kernel-2.6.18-417.el5.orig/kernel/ub/ub_sys.c	2017-01-13 08:40:16.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ub/ub_sys.c	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,181 @@
+/*
+ *  kernel/ub/ub_sys.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/virtinfo.h>
+#include <linux/compat.h>
+#include <asm/uaccess.h>
+
+#include <ub/beancounter.h>
+
+/*
+ *	The (rather boring) getluid syscall
+ */
+asmlinkage long sys_getluid(void)
+{
+	struct user_beancounter *ub;
+
+	ub = get_exec_ub();
+	if (ub == NULL)
+		return -EINVAL;
+
+	return ub->ub_uid;
+}
+
+/*
+ *	The setluid syscall
+ */
+asmlinkage long sys_setluid(uid_t uid)
+{
+	struct user_beancounter *ub;
+	struct task_beancounter *task_bc;
+	int error;
+
+	task_bc = &current->task_bc;
+
+	/* You may not disown a setluid */
+	error = -EINVAL;
+	if (uid == (uid_t)-1)
+		goto out;
+
+	/* You may only set an ub as root */
+	error = -EPERM;
+	if (!capable(CAP_SETUID))
+		goto out;
+	/*
+	 * The ub once set is irrevocable to all
+	 * unless it's set from ve0.
+	 */
+	if (!ve_is_super(get_exec_env()))
+		goto out;
+
+	/* Ok - set up a beancounter entry for this user */
+	error = -ENOBUFS;
+	ub = get_beancounter_byuid(uid, 1);
+	if (ub == NULL)
+		goto out;
+
+	ub_debug(UBD_ALLOC | UBD_LIMIT, "setluid, bean %p (count %d) "
+			"for %.20s pid %d\n",
+			ub, atomic_read(&ub->ub_refcount),
+			current->comm, current->pid);
+	/* install bc */
+	error = virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_NEWUBC, ub);
+	if (!(error & NOTIFY_FAIL)) {
+		put_beancounter(task_bc->exec_ub);
+		task_bc->exec_ub = ub;
+		if (!(error & NOTIFY_OK)) {
+			put_beancounter(task_bc->fork_sub);
+			task_bc->fork_sub = get_beancounter(ub);
+		}
+		error = 0;
+	} else {
+		put_beancounter(ub);
+		error = -ENOBUFS;
+	}
+out:
+	return error;
+}
+
+long do_setublimit(uid_t uid, unsigned long resource,
+		unsigned long *new_limits)
+{
+	int error;
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	error = -EPERM;
+	if(!capable(CAP_SYS_RESOURCE))
+		goto out;
+
+	if (!ve_is_super(get_exec_env()))
+		goto out;
+
+	error = -EINVAL;
+	if (resource >= UB_RESOURCES)
+		goto out;
+
+	error = -EINVAL;
+	if (new_limits[0] > UB_MAXVALUE || new_limits[1] > UB_MAXVALUE)
+		goto out;
+
+	error = -ENOENT;
+	ub = get_beancounter_byuid(uid, 0);
+	if (ub == NULL) {
+		ub_debug(UBD_LIMIT, "No login bc for uid %d\n", uid);
+		goto out;
+	}
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_parms[resource].barrier = new_limits[0];
+	ub->ub_parms[resource].limit = new_limits[1];
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	put_beancounter(ub);
+
+	error = 0;
+out:
+	return error;
+}
+
+/*
+ *	The setbeanlimit syscall
+ */
+asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
+		unsigned long __user *limits)
+{
+	unsigned long new_limits[2];
+
+	if (copy_from_user(&new_limits, limits, sizeof(new_limits)))
+		return -EFAULT;
+
+	return do_setublimit(uid, resource, new_limits);
+}
+
+extern long do_ubstat(int func, unsigned long arg1, unsigned long arg2, 
+		void __user *buf, long size);
+asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2, 
+		void __user *buf, long size)
+{
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
+	return do_ubstat(func, arg1, arg2, buf, size);
+}
+
+#ifdef CONFIG_COMPAT
+#define UB_MAXVALUE_COMPAT ((1UL << (sizeof(compat_long_t) * 8 - 1)) - 1)
+
+asmlinkage long compat_sys_setublimit(uid_t uid, compat_long_t resource,
+		compat_long_t __user *limits)
+{
+	compat_long_t u_new_limits[2];
+	unsigned long new_limits[2];
+
+	if (copy_from_user(&u_new_limits, limits, sizeof(u_new_limits)))
+		return -EFAULT;
+
+	new_limits[0] = u_new_limits[0];
+	new_limits[1] = u_new_limits[1];
+
+	if (u_new_limits[0] == UB_MAXVALUE_COMPAT)
+		new_limits[0] = UB_MAXVALUE;
+	if (u_new_limits[1] == UB_MAXVALUE_COMPAT)
+		new_limits[1] = UB_MAXVALUE;
+
+	return do_setublimit(uid, resource, new_limits);
+}
+
+asmlinkage long compat_sys_ubstat(int func, unsigned int arg1,
+		unsigned int arg2, compat_uptr_t *buf, long size)
+{
+	return sys_ubstat(func, arg1, arg2, buf, size);
+}
+#endif
diff -upr kernel-2.6.18-417.el5.orig/kernel/user.c kernel-2.6.18-417.el5-028stab121/kernel/user.c
--- kernel-2.6.18-417.el5.orig/kernel/user.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/user.c	2017-01-13 08:40:24.000000000 -0500
@@ -14,6 +14,7 @@
 #include <linux/bitops.h>
 #include <linux/key.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 
 /*
  * UID task count cache, to get fast user lookup in "alloc_uid"
@@ -24,7 +25,20 @@
 #define UIDHASH_SZ		(1 << UIDHASH_BITS)
 #define UIDHASH_MASK		(UIDHASH_SZ - 1)
 #define __uidhashfn(uid)	(((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
-#define uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
+#define __uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
+
+#ifdef CONFIG_VE
+#define UIDHASH_MASK_VE			(UIDHASH_SZ_VE - 1)
+#define __uidhashfn_ve(uid)		(((uid >> UIDHASH_BITS_VE) ^ uid) & \
+						UIDHASH_MASK_VE)
+#define __uidhashentry_ve(uid, envid)	((envid)->uidhash_table + \
+						__uidhashfn_ve(uid))
+#define uidhashentry_ve(uid)		(ve_is_super(get_exec_env()) ?	\
+						__uidhashentry(uid) :	\
+						__uidhashentry_ve(uid, get_exec_env()))
+#else
+#define uidhashentry_ve(uid)		__uidhashentry(uid)
+#endif
 
 static kmem_cache_t *uid_cachep;
 static struct list_head uidhash_table[UIDHASH_SZ];
@@ -96,7 +110,7 @@ struct user_struct *find_user(uid_t uid)
 	unsigned long flags;
 
 	spin_lock_irqsave(&uidhash_lock, flags);
-	ret = uid_hash_find(uid, uidhashentry(uid));
+	ret = uid_hash_find(uid, uidhashentry_ve(uid));
 	spin_unlock_irqrestore(&uidhash_lock, flags);
 	return ret;
 }
@@ -119,10 +133,11 @@ void free_uid(struct user_struct *up)
 		local_irq_restore(flags);
 	}
 }
+EXPORT_SYMBOL_GPL(free_uid);
 
 struct user_struct * alloc_uid(uid_t uid)
 {
-	struct list_head *hashent = uidhashentry(uid);
+	struct list_head *hashent = uidhashentry_ve(uid);
 	struct user_struct *up;
 
 	spin_lock_irq(&uidhash_lock);
@@ -172,6 +187,7 @@ struct user_struct * alloc_uid(uid_t uid
 	}
 	return up;
 }
+EXPORT_SYMBOL_GPL(alloc_uid);
 
 void switch_uid(struct user_struct *new_user)
 {
@@ -201,21 +217,21 @@ void switch_uid(struct user_struct *new_
 	free_uid(old_user);
 	suid_keys(current);
 }
-
+EXPORT_SYMBOL_GPL(switch_uid);
 
 static int __init uid_cache_init(void)
 {
 	int n;
 
 	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 
 	for(n = 0; n < UIDHASH_SZ; ++n)
 		INIT_LIST_HEAD(uidhash_table + n);
 
 	/* Insert the root user immediately (init already runs as root) */
 	spin_lock_irq(&uidhash_lock);
-	uid_hash_insert(&root_user, uidhashentry(0));
+	uid_hash_insert(&root_user, __uidhashentry(0));
 	spin_unlock_irq(&uidhash_lock);
 
 	return 0;
diff -upr kernel-2.6.18-417.el5.orig/kernel/utrace.c kernel-2.6.18-417.el5-028stab121/kernel/utrace.c
--- kernel-2.6.18-417.el5.orig/kernel/utrace.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/utrace.c	2017-01-13 08:40:24.000000000 -0500
@@ -20,7 +20,6 @@
 #include <asm/tracehook.h>
 
 
-#define UTRACE_DEBUG 1
 #ifdef UTRACE_DEBUG
 #define CHECK_INIT(p)	atomic_set(&(p)->check_dead, 1)
 #define CHECK_DEAD(p)	BUG_ON(!atomic_dec_and_test(&(p)->check_dead))
@@ -29,41 +28,6 @@
 #define CHECK_DEAD(p)	do { } while (0)
 #endif
 
-/*
- * Per-thread structure task_struct.utrace points to.
- *
- * The task itself never has to worry about this going away after
- * some event is found set in task_struct.utrace_flags.
- * Once created, this pointer is changed only when the task is quiescent
- * (TASK_TRACED or TASK_STOPPED with the siglock held, or dead).
- *
- * For other parties, the pointer to this is protected by RCU and
- * task_lock.  Since call_rcu is never used while the thread is alive and
- * using this struct utrace, we can overlay the RCU data structure used
- * only for a dead struct with some local state used only for a live utrace
- * on an active thread.
- */
-struct utrace
-{
-	union {
-		struct rcu_head dead;
-		struct {
-			struct task_struct *cloning;
-			struct utrace_signal *signal;
-		} live;
-		struct {
-			unsigned long flags;
-		} exit;
-	} u;
-
-	struct list_head engines;
-	spinlock_t lock;
-	bool freeze_stop;
-#ifdef UTRACE_DEBUG
-	atomic_t check_dead;
-#endif
-};
-
 static struct kmem_cache *utrace_cachep;
 static struct kmem_cache *utrace_engine_cachep;
 
@@ -1240,10 +1204,10 @@ check_detach(struct task_struct *tsk, u3
 }
 
 static inline int
-check_quiescent(struct task_struct *tsk, u32 action)
+check_quiescent(struct task_struct *tsk, u32 action, int loc)
 {
 	if (action & UTRACE_ACTION_STATE_MASK)
-		return utrace_quiescent(tsk, NULL);
+		return utrace_quiescent(tsk, NULL, loc);
 	return 0;
 }
 
@@ -1442,7 +1406,7 @@ void utrace_finish_stop(void)
  * will be entered before user mode.
  */
 int
-utrace_quiescent(struct task_struct *tsk, struct utrace_signal *signal)
+utrace_quiescent(struct task_struct *tsk, struct utrace_signal *signal, int loc)
 {
 	struct utrace *utrace = tsk->utrace;
 	unsigned long action;
@@ -1476,6 +1440,7 @@ restart:
 		killed = sigkill_pending(tsk);
 		stop = !killed && (tsk->utrace_flags & UTRACE_ACTION_QUIESCE);
 		if (likely(stop)) {
+			set_pn_state(tsk, loc);
 			set_current_state(TASK_TRACED);
 			/*
 			 * If there is a group stop in progress,
@@ -1486,6 +1451,7 @@ restart:
 			spin_unlock_irq(&tsk->sighand->siglock);
 			schedule();
 			utrace_finish_stop();
+			clear_pn_state(tsk);
 		}
 		else
 			spin_unlock_irq(&tsk->sighand->siglock);
@@ -1603,7 +1569,7 @@ utrace_report_exit(long *exit_code)
 			REPORT(report_exit, orig_code, exit_code);
 	}
 	action = check_detach(tsk, action);
-	check_quiescent(tsk, action);
+	check_quiescent(tsk, action, PN_STOP_EXIT);
 }
 
 /*
@@ -1779,7 +1745,7 @@ utrace_report_vfork_done(pid_t child_pid
 			break;
 	}
 	action = check_detach(tsk, action);
-	check_quiescent(tsk, action);
+	check_quiescent(tsk, action, PN_STOP_VFORK);
 }
 
 /*
@@ -1804,7 +1770,7 @@ utrace_report_exec(struct linux_binprm *
 			break;
 	}
 	action = check_detach(tsk, action);
-	check_quiescent(tsk, action);
+	check_quiescent(tsk, action, PN_STOP_EXEC);
 }
 
 /*
@@ -1842,7 +1808,7 @@ utrace_report_syscall(struct pt_regs *re
 			break;
 	}
 	action = check_detach(tsk, action);
-	killed = check_quiescent(tsk, action);
+	killed = check_quiescent(tsk, action, is_exit ? PN_STOP_LEAVE : PN_STOP_ENTRY);
 
 	if (!is_exit) {
 		if (unlikely(killed))
@@ -1872,18 +1838,6 @@ utrace_report_syscall(struct pt_regs *re
 
 
 /*
- * This is pointed to by the utrace struct, but it's really a private
- * structure between utrace_get_signal and utrace_inject_signal.
- */
-struct utrace_signal
-{
-	siginfo_t *const info;
-	struct k_sigaction *return_ka;
-	int signr;
-};
-
-
-/*
  * Call each interested tracing engine's report_signal callback.
  */
 static u32
@@ -1921,7 +1875,7 @@ utrace_signal_handler_singlestep(struct 
 			       UTRACE_ACTION_SINGLESTEP|UTRACE_ACTION_BLOCKSTEP,
 			       NULL, NULL, NULL);
 	action = check_detach(tsk, action);
-	check_quiescent(tsk, action);
+	check_quiescent(tsk, action, PN_STOP_TF);
 }
 
 
@@ -2007,7 +1961,7 @@ utrace_get_signal(struct task_struct *ts
 		if (!killed) {
 			spin_unlock_irq(&tsk->sighand->siglock);
 
-			killed = utrace_quiescent(tsk, &signal);
+			killed = utrace_quiescent(tsk, &signal, PN_STOP_SIGNAL);
 
 			/*
 			 * Noone wants us quiescent any more, we can take
diff -upr kernel-2.6.18-417.el5.orig/kernel/utsname.c kernel-2.6.18-417.el5-028stab121/kernel/utsname.c
--- kernel-2.6.18-417.el5.orig/kernel/utsname.c	2017-01-13 08:40:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/utsname.c	2017-01-13 08:40:19.000000000 -0500
@@ -0,0 +1,97 @@
+/*
+ *  Copyright (C) 2004 IBM Corporation
+ *
+ *  Author: Serge Hallyn <serue@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ */
+
+#include <linux/module.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+
+/*
+ * Clone a new ns copying an original utsname, setting refcount to 1
+ * @old_ns: namespace to clone
+ * Return NULL on error (failure to kmalloc), new ns otherwise
+ */
+static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
+{
+	struct uts_namespace *ns;
+
+	ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+	if (ns) {
+		memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+		kref_init(&ns->kref);
+	}
+	return ns;
+}
+
+/*
+ * unshare the current process' utsname namespace.
+ * called only in sys_unshare()
+ */
+int unshare_utsname(unsigned long unshare_flags, struct uts_namespace **new_uts)
+{
+	if (unshare_flags & CLONE_NEWUTS) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		*new_uts = clone_uts_ns(current->nsproxy->uts_ns);
+		if (!*new_uts)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Copy task tsk's utsname namespace, or clone it if flags
+ * specifies CLONE_NEWUTS.  In latter case, changes to the
+ * utsname of this process won't be seen by parent, and vice
+ * versa.
+ */
+int copy_utsname(int flags, struct task_struct *tsk)
+{
+	struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
+	struct uts_namespace *new_ns;
+	int err = 0;
+
+	if (!old_ns)
+		return 0;
+
+	get_uts_ns(old_ns);
+
+	if (!(flags & CLONE_NEWUTS))
+		return 0;
+
+#ifndef CONFIG_VE
+	if (!capable(CAP_SYS_ADMIN)) {
+		err = -EPERM;
+		goto out;
+	}
+#endif
+
+	new_ns = clone_uts_ns(old_ns);
+	if (!new_ns) {
+		err = -ENOMEM;
+		goto out;
+	}
+	tsk->nsproxy->uts_ns = new_ns;
+
+out:
+	put_uts_ns(old_ns);
+	return err;
+}
+
+void free_uts_ns(struct kref *kref)
+{
+	struct uts_namespace *ns;
+
+	ns = container_of(kref, struct uts_namespace, kref);
+	kfree(ns);
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/ve/devperms.c kernel-2.6.18-417.el5-028stab121/kernel/ve/devperms.c
--- kernel-2.6.18-417.el5.orig/kernel/ve/devperms.c	2017-01-13 08:40:20.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ve/devperms.c	2017-01-13 08:40:20.000000000 -0500
@@ -0,0 +1,418 @@
+/*
+ *  linux/kernel/ve/devperms.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * Devices permissions routines,
+ * character and block devices separately
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <linux/ve.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/rcupdate.h>
+#include <linux/vzcalluser.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+
+/*
+ * Rules applied in the following order:
+ *  MAJOR!=0, MINOR!=0
+ *  MAJOR!=0, MINOR==0
+ *  MAJOR==0, MINOR==0
+ */
+
+struct devperms_struct {
+	dev_t   	dev;	/* device id */
+	unsigned char	mask;
+	unsigned 	type;
+	envid_t	 	veid;
+
+	struct hlist_node	hash;
+	struct rcu_head		rcu;
+};
+
+static struct devperms_struct default_major_perms[] = {
+	{
+		MKDEV(UNIX98_PTY_MASTER_MAJOR, 0),
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(UNIX98_PTY_SLAVE_MAJOR, 0),
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(PTY_MASTER_MAJOR, 0),
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(PTY_SLAVE_MAJOR, 0),
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+};
+
+static struct devperms_struct default_minor_perms[] = {
+	{
+		MKDEV(MEM_MAJOR, 3),	/* null */
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(MEM_MAJOR, 5),	/* zero */
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(MEM_MAJOR, 7),	/* full */
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(TTYAUX_MAJOR, 0),	/* tty */
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(TTYAUX_MAJOR, 2),	/* ptmx */
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(MEM_MAJOR, 8),	/* random */
+		S_IROTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(MEM_MAJOR, 9),	/* urandom */
+		S_IROTH,
+		S_IFCHR
+	},
+};
+
+static struct devperms_struct default_deny_perms = {
+	MKDEV(0, 0),
+	0,
+	S_IFCHR,
+};
+
+static inline struct devperms_struct *find_default_devperms(int type, dev_t dev)
+{
+	int i;
+
+	/* XXX all defaults perms are S_IFCHR */
+	if (type != S_IFCHR)
+		return &default_deny_perms;
+
+	for (i = 0; i < ARRAY_SIZE(default_minor_perms); i++)
+		if (MAJOR(dev) == MAJOR(default_minor_perms[i].dev) &&
+				MINOR(dev) == MINOR(default_minor_perms[i].dev))
+			return &default_minor_perms[i];
+
+	for (i = 0; i < ARRAY_SIZE(default_major_perms); i++)
+		if (MAJOR(dev) == MAJOR(default_major_perms[i].dev))
+			return &default_major_perms[i];
+
+	return &default_deny_perms;
+}
+
+#define DEVPERMS_HASH_SZ 512
+#define devperms_hashfn(id, dev) \
+	( (id << 5) ^ (id >> 5) ^ (MAJOR(dev)) ^ MINOR(dev) ) & \
+						(DEVPERMS_HASH_SZ - 1)
+
+static DEFINE_SPINLOCK(devperms_hash_lock);
+static struct hlist_head devperms_hash[DEVPERMS_HASH_SZ];
+
+static inline struct devperms_struct *find_devperms(envid_t veid,
+						    int type,
+						    dev_t dev)
+{
+	struct hlist_head *table;
+	struct devperms_struct *perms;
+	struct hlist_node *h;
+
+	table = &devperms_hash[devperms_hashfn(veid, dev)];
+	hlist_for_each_entry_rcu (perms, h, table, hash)
+		if (perms->type == type && perms->veid == veid &&
+				MAJOR(perms->dev) == MAJOR(dev) &&
+				MINOR(perms->dev) == MINOR(dev))
+			return perms;
+
+	return NULL;
+}
+
+static void free_devperms(struct rcu_head *rcu)
+{
+	struct devperms_struct *perms;
+
+	perms = container_of(rcu, struct devperms_struct, rcu);
+	kfree(perms);
+}
+
+/* API calls */
+
+void clean_device_perms_ve(envid_t veid)
+{
+	int i;
+	struct devperms_struct *p;
+	struct hlist_node *n, *tmp;
+
+	spin_lock(&devperms_hash_lock);
+	for (i = 0; i < DEVPERMS_HASH_SZ; i++)
+		hlist_for_each_entry_safe (p, n, tmp, &devperms_hash[i], hash)
+			if (p->veid == veid) {
+				hlist_del_rcu(&p->hash);
+				call_rcu(&p->rcu, free_devperms);
+			}
+	spin_unlock(&devperms_hash_lock);
+}
+
+EXPORT_SYMBOL(clean_device_perms_ve);
+
+/*
+ * Mode is a mask of
+ *	FMODE_READ	for read access (configurable by S_IROTH)
+ *	FMODE_WRITE	for write access (configurable by S_IWOTH)
+ *	FMODE_QUOTACTL	for quotactl access (configurable by S_IXGRP)
+ */
+
+int get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
+{
+	struct devperms_struct *p;
+	struct ve_struct *ve;
+	envid_t veid;
+	char mask;
+
+	ve = get_exec_env();
+	veid = ve->veid;
+	rcu_read_lock();
+
+	p = find_devperms(veid, dev_type | VE_USE_MINOR, dev);
+	if (p != NULL)
+		goto end;
+
+	p = find_devperms(veid, dev_type | VE_USE_MAJOR, MKDEV(MAJOR(dev),0));
+	if (p != NULL)
+		goto end;
+
+	p = find_devperms(veid, dev_type, MKDEV(0,0));
+	if (p != NULL)
+		goto end;
+
+	if (ve->features & VE_FEATURE_DEF_PERMS) {
+		p = find_default_devperms(dev_type, dev);
+		if (p != NULL)
+			goto end;
+	}
+
+	rcu_read_unlock();
+	return -ENODEV;
+
+end:
+	mask = p->mask;
+	rcu_read_unlock();
+
+	access_mode = "\000\004\002\006\010\014\012\016"[access_mode];
+	return ((mask & access_mode) == access_mode) ? 0 : -EACCES;
+}
+
+EXPORT_SYMBOL(get_device_perms_ve);
+
+int set_device_perms_ve(envid_t veid, unsigned type, dev_t dev, unsigned mask)
+{
+	struct devperms_struct *perms, *new_perms;
+	struct hlist_head *htable;
+
+	new_perms = kmalloc(sizeof(struct devperms_struct), GFP_KERNEL);
+
+	spin_lock(&devperms_hash_lock);
+	perms = find_devperms(veid, type, dev);
+	if (perms != NULL) {
+		kfree(new_perms);
+		perms->mask = mask & S_IALLUGO;
+	} else {
+		switch (type & VE_USE_MASK) {
+		case 0:
+			dev = 0;
+			break;
+		case VE_USE_MAJOR:
+			dev = MKDEV(MAJOR(dev),0);
+			break;
+		}
+
+		new_perms->veid = veid;
+		new_perms->dev = dev;
+		new_perms->type = type;
+		new_perms->mask = mask & S_IALLUGO;
+
+		htable = &devperms_hash[devperms_hashfn(new_perms->veid,
+				new_perms->dev)];
+		hlist_add_head_rcu(&new_perms->hash, htable);
+	}
+	spin_unlock(&devperms_hash_lock);
+	return 0;
+}
+
+EXPORT_SYMBOL(set_device_perms_ve);
+
+#ifdef CONFIG_PROC_FS
+static int devperms_seq_show(struct seq_file *m, void *v)
+{
+	struct devperms_struct *dp;
+	char dev_s[32], type_c;
+	unsigned use, type;
+	dev_t dev;
+
+	dp = (struct devperms_struct *)v;
+	if (dp == (struct devperms_struct *)1L) {
+		seq_printf(m, "Version: 2.7\n");
+		return 0;
+	}
+
+	use = dp->type & VE_USE_MASK;
+	type = dp->type & S_IFMT;
+	dev = dp->dev;
+
+	if ((use | VE_USE_MINOR) == use)
+		snprintf(dev_s, sizeof(dev_s), "%d:%d", MAJOR(dev), MINOR(dev));
+	else if ((use | VE_USE_MAJOR) == use)
+		snprintf(dev_s, sizeof(dev_s), "%d:*", MAJOR(dp->dev));
+	else
+		snprintf(dev_s, sizeof(dev_s), "*:*");
+
+	if (type == S_IFCHR)
+		type_c = 'c';
+	else if (type == S_IFBLK)
+		type_c = 'b';
+	else
+		type_c = '?';
+
+	seq_printf(m, "%10u %c %03o %s\n", dp->veid, type_c, dp->mask, dev_s);
+	return 0;
+}
+
+static void *devperms_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t cpos;
+	long slot;
+	struct devperms_struct *dp;
+	struct hlist_node *h;
+
+	cpos = *pos;
+	rcu_read_lock();
+
+	if (cpos-- == 0)
+		return (void *)1L;
+
+	for (slot = 0; slot < DEVPERMS_HASH_SZ; slot++)
+		hlist_for_each_entry_rcu (dp, h, &devperms_hash[slot], hash)
+			if (cpos-- == 0) {
+				m->private = (void *)slot;
+				return dp;
+			}
+	return NULL;
+}
+
+static void *devperms_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	long slot;
+	struct hlist_node *next;
+	struct devperms_struct *dp;
+
+	dp = (struct devperms_struct *)v;
+
+	if (unlikely(dp == (struct devperms_struct *)1L))
+		slot = 0;
+	else {
+		next = rcu_dereference(dp->hash.next);
+		if (next != NULL)
+			goto out;
+
+		slot = (long)m->private + 1;
+	}
+
+	for (; slot < DEVPERMS_HASH_SZ; slot++) {
+		next = rcu_dereference(devperms_hash[slot].first);
+		if (next == NULL)
+			continue;
+
+		m->private = (void *)slot;
+		goto out;
+	}
+	return NULL;
+
+out:
+	(*pos)++;
+	return hlist_entry(next, struct devperms_struct, hash);
+}
+
+static void devperms_seq_stop(struct seq_file *m, void *v)
+{
+	rcu_read_unlock();
+}
+
+static struct seq_operations devperms_seq_op = {
+	.start	= devperms_seq_start,
+	.next	= devperms_seq_next,
+	.stop	= devperms_seq_stop,
+	.show	= devperms_seq_show,
+};
+
+static int devperms_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &devperms_seq_op);
+}
+
+struct file_operations proc_devperms_ops = {
+	.open		= devperms_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+EXPORT_SYMBOL(proc_devperms_ops);
+#endif
+
+/* Initialisation */
+
+static struct devperms_struct original_perms[] =
+{
+	{
+		MKDEV(0,0),
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+		0,
+	},
+	{
+		MKDEV(0,0),
+		S_IXGRP | S_IROTH | S_IWOTH,
+		S_IFBLK,
+		0,
+	},
+};
+
+static int __init init_devperms_hash(void)
+{
+	hlist_add_head(&original_perms[0].hash,
+			&devperms_hash[devperms_hashfn(0,
+				original_perms[0].dev)]);
+	hlist_add_head(&original_perms[1].hash,
+			&devperms_hash[devperms_hashfn(0,
+				original_perms[1].dev)]);
+	return 0;
+}
+
+core_initcall(init_devperms_hash);
diff -upr kernel-2.6.18-417.el5.orig/kernel/ve/hooks.c kernel-2.6.18-417.el5-028stab121/kernel/ve/hooks.c
--- kernel-2.6.18-417.el5.orig/kernel/ve/hooks.c	2017-01-13 08:40:20.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ve/hooks.c	2017-01-13 08:40:20.000000000 -0500
@@ -0,0 +1,114 @@
+/*
+ *  linux/kernel/ve/hooks.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/ve.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/ve_proto.h>
+#include <linux/module.h>
+
+static struct list_head ve_hooks[VE_MAX_CHAINS];
+static DECLARE_RWSEM(ve_hook_sem);
+
+void ve_hook_register(int chain, struct ve_hook *vh)
+{
+	struct list_head *lh;
+	struct ve_hook *tmp;
+
+	BUG_ON(chain > VE_MAX_CHAINS);
+
+	down_write(&ve_hook_sem);
+	list_for_each(lh, &ve_hooks[chain]) {
+		tmp = list_entry(lh, struct ve_hook, list);
+		if (vh->priority < tmp->priority)
+			break;
+	}
+
+	list_add_tail(&vh->list, lh);
+	up_write(&ve_hook_sem);
+}
+
+EXPORT_SYMBOL(ve_hook_register);
+
+void ve_hook_unregister(struct ve_hook *vh)
+{
+	down_write(&ve_hook_sem);
+	list_del(&vh->list);
+	up_write(&ve_hook_sem);
+}
+
+EXPORT_SYMBOL(ve_hook_unregister);
+
+static inline int ve_hook_init(struct ve_hook *vh, struct ve_struct *ve)
+{
+	int err;
+
+	err = 0;
+	if (try_module_get(vh->owner)) {
+		err = vh->init(ve);
+		module_put(vh->owner);
+	}
+	return err;
+}
+
+static inline void ve_hook_fini(struct ve_hook *vh, struct ve_struct *ve)
+{
+	if (vh->fini != NULL && try_module_get(vh->owner)) {
+		vh->fini(ve);
+		module_put(vh->owner);
+	}
+}
+
+int ve_hook_iterate_init(int chain, void *ve)
+{
+	struct ve_hook *vh;
+	int err;
+
+	err = 0;
+
+	down_read(&ve_hook_sem);
+	list_for_each_entry(vh, &ve_hooks[chain], list)
+		if ((err = ve_hook_init(vh, ve)) < 0)
+			break;
+
+	if (err)
+		list_for_each_entry_continue_reverse(vh, &ve_hooks[chain], list)
+			ve_hook_fini(vh, ve);
+
+	up_read(&ve_hook_sem);
+	return err;
+}
+
+EXPORT_SYMBOL(ve_hook_iterate_init);
+
+void ve_hook_iterate_fini(int chain, void *ve)
+{
+	struct ve_hook *vh;
+
+	down_read(&ve_hook_sem);
+	list_for_each_entry_reverse(vh, &ve_hooks[chain], list)
+		ve_hook_fini(vh, ve);
+	up_read(&ve_hook_sem);
+}
+
+EXPORT_SYMBOL(ve_hook_iterate_fini);
+
+static int __init ve_hooks_init(void)
+{
+	int i;
+
+	for (i = 0; i < VE_MAX_CHAINS; i++)
+		INIT_LIST_HEAD(&ve_hooks[i]);
+	return 0;
+}
+
+core_initcall(ve_hooks_init);
+
diff -upr kernel-2.6.18-417.el5.orig/kernel/ve/Makefile kernel-2.6.18-417.el5-028stab121/kernel/ve/Makefile
--- kernel-2.6.18-417.el5.orig/kernel/ve/Makefile	2017-01-13 08:40:20.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ve/Makefile	2017-01-13 08:40:22.000000000 -0500
@@ -0,0 +1,17 @@
+#
+#
+#  kernel/ve/Makefile
+#
+#  Copyright (C) 2000-2005  SWsoft
+#  All rights reserved.
+#
+#  Licensing governed by "linux/COPYING.SWsoft" file.
+
+obj-y = ve.o veowner.o hooks.o devperms.o
+
+obj-$(CONFIG_VZ_DEV) += vzdev.o
+obj-$(CONFIG_VZ_WDOG) += vzwdog.o
+obj-$(CONFIG_VE_CALLS) += vzmon.o
+obj-$(CONFIG_VZ_EVENT) += vzevent.o
+
+vzmon-objs = vecalls.o
diff -upr kernel-2.6.18-417.el5.orig/kernel/ve/ve.c kernel-2.6.18-417.el5-028stab121/kernel/ve/ve.c
--- kernel-2.6.18-417.el5.orig/kernel/ve/ve.c	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ve/ve.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,200 @@
+/*
+ *  linux/kernel/ve/ve.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * 've.c' helper file performing VE sub-system initialization
+ */
+
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/ve.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/sys.h>
+#include <linux/kdev_t.h>
+#include <linux/termios.h>
+#include <linux/tty_driver.h>
+#include <linux/netdevice.h>
+#include <linux/utsname.h>
+#include <linux/proc_fs.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/rcupdate.h>
+#include <linux/ve_proto.h>
+#include <linux/devpts_fs.h>
+
+#include <linux/nfcalls.h>
+
+unsigned long vz_rstamp = 0x37e0f59d;
+
+EXPORT_SYMBOL(vz_rstamp);
+
+#ifdef CONFIG_MODULES
+struct module no_module = { .state = MODULE_STATE_GOING };
+EXPORT_SYMBOL(no_module);
+#endif
+
+INIT_KSYM_MODULE(ip_tables);
+INIT_KSYM_MODULE(ip6_tables);
+INIT_KSYM_MODULE(iptable_filter);
+INIT_KSYM_MODULE(ip6table_filter);
+INIT_KSYM_MODULE(iptable_mangle);
+INIT_KSYM_MODULE(ip6table_mangle);
+INIT_KSYM_MODULE(ip_conntrack);
+INIT_KSYM_MODULE(ip_conntrack_ftp);
+INIT_KSYM_MODULE(ip_conntrack_irc);
+INIT_KSYM_MODULE(ip_nat);
+INIT_KSYM_MODULE(iptable_nat);
+INIT_KSYM_MODULE(ip_nat_ftp);
+INIT_KSYM_MODULE(ip_nat_irc);
+
+INIT_KSYM_CALL(int, init_netfilter, (void));
+INIT_KSYM_CALL(int, init_iptables, (void));
+INIT_KSYM_CALL(int, init_ip6tables, (void));
+INIT_KSYM_CALL(int, init_iptable_filter, (void));
+INIT_KSYM_CALL(int, init_ip6table_filter, (void));
+INIT_KSYM_CALL(int, init_iptable_mangle, (void));
+INIT_KSYM_CALL(int, init_ip6table_mangle, (void));
+INIT_KSYM_CALL(int, init_iptable_conntrack, (void));
+INIT_KSYM_CALL(int, init_ip_ct_ftp, (void));
+INIT_KSYM_CALL(int, init_ip_ct_irc, (void));
+INIT_KSYM_CALL(int, ip_nat_init, (void));
+INIT_KSYM_CALL(int, init_iptable_nat, (void));
+INIT_KSYM_CALL(int, init_iptable_nat_ftp, (void));
+INIT_KSYM_CALL(int, init_iptable_nat_irc, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat_irc, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat, (void));
+INIT_KSYM_CALL(void, ip_nat_cleanup, (void));
+INIT_KSYM_CALL(void, fini_ip_ct_irc, (void));
+INIT_KSYM_CALL(void, fini_ip_ct_ftp, (void));
+INIT_KSYM_CALL(void, fini_iptable_conntrack, (void));
+INIT_KSYM_CALL(void, fini_ip6table_filter, (void));
+INIT_KSYM_CALL(void, fini_iptable_filter, (void));
+INIT_KSYM_CALL(void, fini_ip6table_mangle, (void));
+INIT_KSYM_CALL(void, fini_iptable_mangle, (void));
+INIT_KSYM_CALL(void, fini_ip6tables, (void));
+INIT_KSYM_CALL(void, fini_iptables, (void));
+INIT_KSYM_CALL(void, fini_netfilter, (void));
+
+#if defined(CONFIG_VE_CALLS_MODULE) || defined(CONFIG_VE_CALLS)
+INIT_KSYM_MODULE(vzmon);
+INIT_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
+
+void do_env_free(struct ve_struct *env)
+{
+	KSYMSAFECALL_VOID(vzmon, real_do_env_free, (env));
+}
+EXPORT_SYMBOL(do_env_free);
+#endif
+
+struct ve_struct ve0 = {
+	.ve_list		= LIST_HEAD_INIT(ve0.ve_list),
+	.vetask_lh		= LIST_HEAD_INIT(ve0.vetask_lh),
+	.vetask_auxlist		= LIST_HEAD_INIT(ve0.vetask_auxlist),
+	.start_jiffies		= INITIAL_JIFFIES,
+#ifdef CONFIG_NET
+	._net_dev_tail		= &ve0._net_dev_base,
+	.ifindex		= -1,
+#endif
+#ifdef CONFIG_UNIX98_PTYS
+	.devpts_config		= &devpts_config,
+#endif
+	.ve_ns			= &init_nsproxy,
+	.is_running		= 1,
+	.op_sem			= __RWSEM_INITIALIZER(ve0.op_sem),
+#ifdef CONFIG_FIB_RULES
+	._rules_ops		= LIST_HEAD_INIT(ve0._rules_ops),
+#endif
+	._randomize_va_space	= 1,
+	.features		= -1,
+	.mnt_nr			= ATOMIC_INIT(0),
+	.ifa_nr			= ATOMIC_INIT(0),
+};
+
+EXPORT_SYMBOL(ve0);
+
+#ifdef CONFIG_SMP
+static struct percpu_data ve0_cpu_stats;
+static struct percpu_data ve0_lat_pcpu_stats;
+#endif
+static struct ve_cpu_stats ve0_cpu_stats_data[NR_CPUS];
+static struct kstat_lat_pcpu_snap_struct ve0_lat_pcpu_stats_data[NR_CPUS];
+
+LIST_HEAD(ve_list_head);
+rwlock_t ve_list_lock = RW_LOCK_UNLOCKED;
+
+LIST_HEAD(ve_cleanup_list);
+DEFINE_SPINLOCK(ve_cleanup_lock);
+struct task_struct *ve_cleanup_thread;
+
+EXPORT_SYMBOL(ve_list_lock);
+EXPORT_SYMBOL(ve_list_head);
+EXPORT_SYMBOL(ve_cleanup_lock);
+EXPORT_SYMBOL(ve_cleanup_list);
+EXPORT_SYMBOL(ve_cleanup_thread);
+
+struct ve_struct *__find_ve_by_id(envid_t veid)
+{
+	struct ve_struct *ve;
+
+	for_each_ve(ve) {
+		if (ve->veid == veid)
+			return ve;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(__find_ve_by_id);
+
+struct ve_struct *get_ve_by_id(envid_t veid)
+{
+	struct ve_struct *ve;
+	read_lock(&ve_list_lock);
+	ve = __find_ve_by_id(veid);
+	get_ve(ve);
+	read_unlock(&ve_list_lock);
+	return ve;
+}
+EXPORT_SYMBOL(get_ve_by_id);
+
+struct ve_ipv6_ops *ve_ipv6_ops;
+EXPORT_SYMBOL(ve_ipv6_ops);
+
+void init_ve0(void)
+{
+	struct ve_struct *ve;
+
+	ve = get_ve0();
+	(void)get_ve(ve);
+	atomic_set(&ve->pcounter, 1);
+
+	ve->cpu_stats = static_percpu_ptr(&ve0_cpu_stats,
+			ve0_cpu_stats_data);
+	ve->sched_lat_ve.cur = static_percpu_ptr(&ve0_lat_pcpu_stats,
+			ve0_lat_pcpu_stats_data);
+
+	list_add(&ve->ve_list, &ve_list_head);
+}
+
+void ve_cleanup_schedule(struct ve_struct *ve)
+{
+	BUG_ON(ve_cleanup_thread == NULL);
+
+	spin_lock(&ve_cleanup_lock);
+	list_add_tail(&ve->cleanup_list, &ve_cleanup_list);
+	spin_unlock(&ve_cleanup_lock);
+
+	wake_up_process(ve_cleanup_thread);
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/ve/vecalls.c kernel-2.6.18-417.el5-028stab121/kernel/ve/vecalls.c
--- kernel-2.6.18-417.el5.orig/kernel/ve/vecalls.c	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ve/vecalls.c	2017-01-13 08:40:41.000000000 -0500
@@ -0,0 +1,3368 @@
+/*
+ *  linux/kernel/ve/vecalls.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ */
+
+/*
+ * 'vecalls.c' is file with basic VE support. It provides basic primities
+ * along with initialization script
+ */
+
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/ve.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sys.h>
+#include <linux/fs.h>
+#include <linux/namespace.h>
+#include <linux/termios.h>
+#include <linux/tty_driver.h>
+#include <linux/netdevice.h>
+#include <linux/wait.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/utsname.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/devpts_fs.h>
+#include <linux/shmem_fs.h>
+#include <linux/sysfs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/suspend.h>
+#include <linux/rcupdate.h>
+#include <linux/in.h>
+#include <linux/idr.h>
+#include <linux/inetdevice.h>
+#include <linux/pid.h>
+#include <net/pkt_sched.h>
+#include <linux/divert.h>
+#include <ub/beancounter.h>
+#include <linux/nsproxy.h>
+#include <linux/kobject.h>
+#include <linux/acct.h>
+
+#include <net/route.h>
+#include <net/ip_fib.h>
+#include <net/ip6_route.h>
+#include <net/arp.h>
+#include <net/ipv6.h>
+
+#include <linux/ve_proto.h>
+#include <linux/venet.h>
+#include <linux/vzctl.h>
+#include <linux/vzcalluser.h>
+#ifdef CONFIG_FAIRSCHED
+#include <linux/fairsched.h>
+#endif
+
+#include <ub/io_acct.h>
+#include <linux/nfcalls.h>
+#include <linux/virtinfo.h>
+#include <linux/utsrelease.h>
+#include <linux/major.h>
+
+#include <../fs/sysfs/sysfs.h>
+
+int nr_ve = 1;	/* One VE always exists. Compatibility with vestat */
+EXPORT_SYMBOL(nr_ve);
+
+static int	do_env_enter(struct ve_struct *ve, unsigned int flags);
+static int	alloc_ve_tty_drivers(struct ve_struct* ve);
+static void	free_ve_tty_drivers(struct ve_struct* ve);
+static int	register_ve_tty_drivers(struct ve_struct* ve);
+static void	unregister_ve_tty_drivers(struct ve_struct* ve);
+static int	init_ve_tty_drivers(struct ve_struct *);
+static void	fini_ve_tty_drivers(struct ve_struct *);
+static void	clear_termios(struct tty_driver* driver );
+#ifdef CONFIG_INET
+static void	ve_mapped_devs_cleanup(struct ve_struct *ve);
+#endif
+static void	ve_del_ipv6_addrs(struct ve_struct *, struct net_device *dev);
+static int alone_in_pgrp(struct task_struct *tsk);
+
+static void vecalls_exit(void);
+extern void grsecurity_setup(void);
+
+/*
+ * real_put_ve() MUST be used instead of put_ve() inside vecalls.
+ */
+void real_do_env_free(struct ve_struct *ve);
+static inline void real_put_ve(struct ve_struct *ve)
+{
+	if (ve && atomic_dec_and_test(&ve->counter)) {
+		if (atomic_read(&ve->pcounter) > 0)
+			BUG();
+		if (ve->is_running)
+			BUG();
+		real_do_env_free(ve);
+	}
+}
+
+static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat __user *buf)
+{
+	struct ve_struct *ve;
+	struct vz_cpu_stat *vstat;
+	int retval;
+	int i, cpu;
+	unsigned long tmp;
+
+	if (!ve_is_super(get_exec_env()) && (veid != get_exec_env()->veid))
+		return -EPERM;
+	if (veid == 0)
+		return -ESRCH;
+
+	vstat = kzalloc(sizeof(*vstat), GFP_KERNEL);
+	if (!vstat)
+		return -ENOMEM;
+	
+	retval = -ESRCH;
+	read_lock(&ve_list_lock);
+	ve = __find_ve_by_id(veid);
+	if (ve == NULL)
+		goto out_unlock;
+	for_each_online_cpu(cpu) {
+		struct ve_cpu_stats *st;
+
+		st = VE_CPU_STATS(ve, cpu);
+		vstat->user_jif += (unsigned long)cputime64_to_clock_t(st->user);
+		vstat->nice_jif += (unsigned long)cputime64_to_clock_t(st->nice);
+		vstat->system_jif += (unsigned long)cputime64_to_clock_t(st->system);
+	}
+	vstat->idle_clk = ve_sched_get_idle_time_total(ve);
+	vstat->uptime_clk = get_cycles() - ve->start_cycles;
+	vstat->uptime_jif = (unsigned long)cputime64_to_clock_t(
+				get_jiffies_64() - ve->start_jiffies);
+	for (i = 0; i < 3; i++) {
+		tmp = ve->avenrun[i] + (FIXED_1/200);
+		vstat->avenrun[i].val_int = LOAD_INT(tmp);
+		vstat->avenrun[i].val_frac = LOAD_FRAC(tmp);
+	}
+	read_unlock(&ve_list_lock);
+
+	retval = 0;
+	if (copy_to_user(buf, vstat, sizeof(*vstat)))
+		retval = -EFAULT;
+out_free:
+	kfree(vstat);
+	return retval;
+
+out_unlock:
+	read_unlock(&ve_list_lock);
+	goto out_free;
+}
+
+static int real_setdevperms(envid_t veid, unsigned type,
+		dev_t dev, unsigned mask)
+{
+	struct ve_struct *ve;
+	int err;
+
+	if (!capable_setveid() || veid == 0)
+		return -EPERM;
+
+	if ((ve = get_ve_by_id(veid)) == NULL)
+		return -ESRCH;
+
+	down_read(&ve->op_sem);
+	err = -ESRCH;
+	if (ve->is_running)
+		err = set_device_perms_ve(veid, type, dev, mask);
+	up_read(&ve->op_sem);
+	real_put_ve(ve);
+	return err;
+}
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * FS-related helpers to VE start/stop
+ *
+ **********************************************************************
+ **********************************************************************/
+
+#ifdef CONFIG_SYSCTL
+static inline int register_ve_sysctltables(struct ve_struct *ve)
+{
+	/*
+	 * a code to register kernel sysctl table used to be here
+	 * it registered utsname and ipc ones only, but since
+	 * we have namespaves for them booth nothing is to be done
+	 * here.
+	 */
+	return 0;
+}
+
+static inline void unregister_ve_sysctltables(struct ve_struct *ve)
+{
+}
+
+static inline void free_ve_sysctltables(struct ve_struct *ve)
+{
+}
+#endif
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE start: subsystems
+ *
+ **********************************************************************
+ **********************************************************************/
+
+#ifdef CONFIG_INET
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int init_fini_ve_mibs6(struct ve_struct *ve, int fini)
+{
+	if (fini)
+		goto fini;
+
+	if (!(ve->_ipv6_statistics[0] = alloc_percpu(struct ipstats_mib)))
+		goto out1;
+	if (!(ve->_ipv6_statistics[1] = alloc_percpu(struct ipstats_mib)))
+		goto out2;
+	if (!(ve->_icmpv6_statistics[0] = alloc_percpu(struct icmpv6_mib)))
+		goto out3;
+	if (!(ve->_icmpv6_statistics[1] = alloc_percpu(struct icmpv6_mib)))
+		goto out4;
+	if (!(ve->_udp_stats_in6[0] = alloc_percpu(struct udp_mib)))
+		goto out5;
+	if (!(ve->_udp_stats_in6[1] = alloc_percpu(struct udp_mib)))
+		goto out6;
+	return 0;
+
+fini:
+	free_percpu(ve->_udp_stats_in6[1]);
+out6:
+	free_percpu(ve->_udp_stats_in6[0]);
+out5:
+	free_percpu(ve->_icmpv6_statistics[1]);
+out4:
+	free_percpu(ve->_icmpv6_statistics[0]);
+out3:
+	free_percpu(ve->_ipv6_statistics[1]);
+out2:
+	free_percpu(ve->_ipv6_statistics[0]);
+out1:
+	return -ENOMEM;
+}
+#else
+static int init_fini_ve_mibs6(struct ve_struct *ve, int fini) { return 0; }
+#endif
+
+static int init_fini_ve_mibs(struct ve_struct *ve, int fini)
+{
+	if (fini)
+		goto fini;
+
+	if (!(ve->_net_statistics[0] = alloc_percpu(struct linux_mib)))
+		goto out1;
+	if (!(ve->_net_statistics[1] = alloc_percpu(struct linux_mib)))
+		goto out2;
+	if (!(ve->_ip_statistics[0] = alloc_percpu(struct ipstats_mib)))
+		goto out3;
+	if (!(ve->_ip_statistics[1] = alloc_percpu(struct ipstats_mib)))
+		goto out4;
+	if (!(ve->_icmp_statistics[0] = alloc_percpu(struct icmp_mib)))
+		goto out5;
+	if (!(ve->_icmp_statistics[1] = alloc_percpu(struct icmp_mib)))
+		goto out6;
+	if (!(ve->_tcp_statistics[0] = alloc_percpu(struct tcp_mib)))
+		goto out7;
+	if (!(ve->_tcp_statistics[1] = alloc_percpu(struct tcp_mib)))
+		goto out8;
+	if (!(ve->_udp_statistics[0] = alloc_percpu(struct udp_mib)))
+		goto out9;
+	if (!(ve->_udp_statistics[1] = alloc_percpu(struct udp_mib)))
+		goto out10;
+	if (!(ve->_icmpmsg_statistics[0] = alloc_percpu(struct icmpmsg_mib)))
+		goto out11;
+	if (!(ve->_icmpmsg_statistics[1] = alloc_percpu(struct icmpmsg_mib)))
+		goto out12;
+	if (init_fini_ve_mibs6(ve, fini))
+		goto out13;
+	return 0;
+
+fini:
+	init_fini_ve_mibs6(ve, fini);
+out13:
+	free_percpu(ve->_icmpmsg_statistics[1]);
+out12:
+	free_percpu(ve->_icmpmsg_statistics[0]);
+out11:
+	free_percpu(ve->_udp_statistics[1]);
+out10:
+	free_percpu(ve->_udp_statistics[0]);
+out9:
+	free_percpu(ve->_tcp_statistics[1]);
+out8:
+	free_percpu(ve->_tcp_statistics[0]);
+out7:
+	free_percpu(ve->_icmp_statistics[1]);
+out6:
+	free_percpu(ve->_icmp_statistics[0]);
+out5:
+	free_percpu(ve->_ip_statistics[1]);
+out4:
+	free_percpu(ve->_ip_statistics[0]);
+out3:
+	free_percpu(ve->_net_statistics[1]);
+out2:
+	free_percpu(ve->_net_statistics[0]);
+out1:
+	return -ENOMEM;
+}
+
+static inline int init_ve_mibs(struct ve_struct *ve)
+{
+	return init_fini_ve_mibs(ve, 0);
+}
+
+static inline void fini_ve_mibs(struct ve_struct *ve)
+{
+	(void)init_fini_ve_mibs(ve, 1);
+}
+
+static void veloop_dump(struct net_device *dev,
+		struct cpt_ops *ops, struct cpt_context *ctx)
+{
+}
+
+static struct net_device_stats *veloop_stats(struct net_device *dev)
+{
+	return (struct net_device_stats*) dev->priv;
+}
+static void veloop_rst_lstats(struct net_device *dev)
+{
+	struct pcpu_lstats *lb_stats;
+	struct net_device_stats *stats;
+
+	lb_stats = per_cpu_ptr(get_exec_env()->_pcpu_lstats, smp_processor_id());
+	stats = dev->cpt_ops->stats(dev);
+
+	lb_stats->bytes = stats->rx_bytes;
+	lb_stats->packets = stats->rx_packets;
+}
+
+static struct dev_cpt_ops veloop_cpt_ops = {
+	.dump = veloop_dump,
+	.stats = veloop_stats,
+	.post_restore_netstats = veloop_rst_lstats,
+};
+
+static void veloop_setup(struct net_device *dev)
+{
+	int padded;
+	padded = dev->padded;
+	memcpy(dev, &templ_loopback_dev, sizeof(struct net_device));
+	dev->padded = padded;
+	dev->cpt_ops = &veloop_cpt_ops;
+}
+
+static int init_ve_netdev(void)
+{
+	struct ve_struct *ve;
+	struct net_device_stats *stats;
+	int err;
+
+	ve = get_exec_env();
+	INIT_HLIST_HEAD(&ve->_net_dev_head);
+	ve->_net_dev_base = NULL;
+	ve->_net_dev_tail = &ve->_net_dev_base;
+
+	err = -ENOMEM;
+	ve->_loopback_dev = alloc_netdev(0, templ_loopback_dev.name, 
+					 veloop_setup);
+	if (ve->_loopback_dev == NULL)
+		goto out;
+
+	ve->_pcpu_lstats = alloc_percpu(struct pcpu_lstats);
+	if (ve->_pcpu_lstats == NULL)
+		goto out_free_netdev;
+	stats = kzalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+	if (stats != NULL)
+		ve->_loopback_dev->priv = stats;
+	err = register_netdev(ve->_loopback_dev);
+	if (err)
+		goto out_free_stats;
+	return 0;
+
+out_free_stats:
+	if (ve->_loopback_dev->priv != NULL)
+		kfree(ve->_loopback_dev->priv);
+	free_percpu(ve->_pcpu_lstats);
+out_free_netdev:
+	free_netdev(ve->_loopback_dev);
+out:
+	return err;
+}
+
+static void fini_ve_netdev(void)
+{
+	struct ve_struct *ve;
+	struct net_device *dev;
+
+	ve = get_exec_env();
+	while (1) {
+		rtnl_lock();
+		/* 
+		 * loopback is special, it can be referenced in  fib's, 
+		 * so it must be freed the last. Doing so is 
+		 * sufficient to guarantee absence of such references.
+		 */
+		if (dev_base == ve->_loopback_dev)
+			dev = dev_base->next;
+		else
+			dev = dev_base;
+		if (dev == NULL)
+			break;
+		unregister_netdevice(dev);
+		rtnl_unlock();
+		free_netdev(dev);
+	}
+	ve_del_ipv6_addrs(ve, ve->_loopback_dev);
+	unregister_netdevice(ve->_loopback_dev);
+	rtnl_unlock();
+	free_netdev(ve->_loopback_dev);
+	ve->_loopback_dev = NULL;
+
+	free_percpu(ve->_pcpu_lstats);
+	ve->_pcpu_lstats = NULL;
+}
+#else
+#define init_ve_mibs(ve)	(0)
+#define fini_ve_mibs(ve)	do { } while (0)
+#define init_ve_netdev()	(0)
+#define fini_ve_netdev()	do { } while (0)
+#endif
+
+static int prepare_proc_root(struct ve_struct *ve)
+{
+	struct proc_dir_entry *de;
+
+	de = kzalloc(sizeof(struct proc_dir_entry) + 6, GFP_KERNEL);
+	if (de == NULL)
+		return -ENOMEM;
+
+	memcpy(de + 1, "/proc", 6);
+	de->name = (char *)(de + 1);
+	de->namelen = 5;
+	de->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	de->nlink = 2;
+	atomic_set(&de->count, 1);
+
+	ve->proc_root = de;
+	return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static int init_ve_proc(struct ve_struct *ve)
+{
+	int err;
+	struct proc_dir_entry *de;
+
+	err = prepare_proc_root(ve);
+	if (err)
+		goto out_root;
+
+	err = register_ve_fs_type(ve, &proc_fs_type,
+			&ve->proc_fstype, &ve->proc_mnt);
+	if (err)
+		goto out_reg;
+
+	err = -ENOMEM;
+	de = create_proc_entry("kmsg", S_IRUSR, NULL);
+	if (!de)
+		goto out_kmsg;
+	de->proc_fops = &proc_kmsg_operations;
+
+	/* create necessary /proc subdirs in VE local proc tree */
+	err = -ENOMEM;
+	de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+	if (!de)
+		goto out_vz;
+
+	proc_net = proc_mkdir("net", NULL);
+	if (!proc_net)
+		goto out_net;
+
+	if (!proc_mkdir("fs", NULL))
+		goto out_fs;
+
+	if (ve->ipv6_ops) {
+		if (ve->ipv6_ops->snmp_proc_init(ve))
+			goto out_snmp;
+	}
+
+	return 0;
+
+out_snmp:
+	remove_proc_entry("fs", NULL);
+out_fs:
+	remove_proc_entry("net", NULL);
+out_net:
+	remove_proc_entry("vz", NULL);
+out_vz:
+	remove_proc_entry("kmsg", NULL);
+out_kmsg:
+	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
+	ve->proc_mnt = NULL;
+out_reg:
+	/* proc_fstype and proc_root are freed in real_put_ve -> free_ve_proc */
+	;
+out_root:
+	return err;
+}
+
+static void fini_ve_proc(struct ve_struct *ve)
+{
+	if (ve->ipv6_ops)
+		ve->ipv6_ops->snmp_proc_fini(ve);
+	remove_proc_entry("fs", NULL);
+	remove_proc_entry("net", NULL);
+	proc_net =  NULL;
+	remove_proc_entry("vz", NULL);
+	remove_proc_entry("kmsg", NULL);
+	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
+	ve->proc_mnt = NULL;
+}
+
+static void free_ve_proc(struct ve_struct *ve)
+{
+	/* proc filesystem frees proc_dir_entries on remove_proc_entry() only,
+	   so we check that everything was removed and not lost */
+	if (ve->proc_root && ve->proc_root->subdir) {
+		struct proc_dir_entry *p = ve->proc_root;
+		printk(KERN_WARNING "CT: %d: proc entry /proc", ve->veid);
+		while ((p = p->subdir) != NULL)
+			printk("/%s", p->name);
+		printk(" is not removed!\n");
+	}
+
+	kfree(ve->proc_root);
+	kfree(ve->proc_fstype);
+
+	ve->proc_fstype = NULL;
+	ve->proc_root = NULL;
+}
+#else
+#define init_ve_proc(ve)	(0)
+#define fini_ve_proc(ve)	do { } while (0)
+#define free_ve_proc(ve)	do { } while (0)
+#endif
+
+#ifdef CONFIG_SYSCTL
+static int init_ve_sysctl(struct ve_struct *ve)
+{
+	int err;
+
+#ifdef CONFIG_PROC_FS
+	err = -ENOMEM;
+	ve->proc_sys_root = proc_mkdir("sys", NULL);
+	if (ve->proc_sys_root == NULL)
+		goto out_proc;
+#endif
+	INIT_LIST_HEAD(&ve->sysctl_lh);
+	err = register_ve_sysctltables(ve);
+	if (err)
+		goto out_reg;
+
+	err = devinet_sysctl_init(ve);
+	if (err)
+		goto out_dev;
+
+	if (ve->ipv6_ops) {
+		err = ve->ipv6_ops->addrconf_sysctl_init(ve);
+		if (err)
+			goto out_dev6;
+	}
+
+	return 0;
+
+out_dev6:
+	devinet_sysctl_fini(ve);
+out_dev:
+	unregister_ve_sysctltables(ve);
+	free_ve_sysctltables(ve);
+out_reg:
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("sys", NULL);
+out_proc:
+#endif
+	return err;
+}
+
+static void fini_ve_sysctl(struct ve_struct *ve)
+{
+	if (ve->ipv6_ops)
+		ve->ipv6_ops->addrconf_sysctl_fini(ve);
+	devinet_sysctl_fini(ve);
+	unregister_ve_sysctltables(ve);
+	remove_proc_entry("sys", NULL);
+}
+
+static void free_ve_sysctl(struct ve_struct *ve)
+{
+	if (ve->ipv6_ops)
+		ve->ipv6_ops->addrconf_sysctl_free(ve);
+	devinet_sysctl_free(ve);
+	free_ve_sysctltables(ve);
+}
+#else
+#define init_ve_sysctl(ve)	(0)
+#define fini_ve_sysctl(ve)	do { } while (0)
+#define free_ve_sysctl(ve)	do { } while (0)
+#endif
+
+#ifdef CONFIG_UNIX98_PTYS
+#include <linux/devpts_fs.h>
+
+/*
+ * DEVPTS needs a virtualization: each environment should see each own list of
+ * pseudo-terminals.
+ * To implement it we need to have separate devpts superblocks for each
+ * VE, and each VE should mount its own one.
+ * Thus, separate vfsmount structures are required.
+ * To minimize intrusion into vfsmount lookup code, separate file_system_type
+ * structures are created.
+ *
+ * In addition to this, patch fo character device itself is required, as file
+ * system itself is used only for MINOR/MAJOR lookup.
+ */
+
+static int init_ve_devpts(struct ve_struct *ve)
+{
+	int err;
+
+	err = -ENOMEM;
+	ve->devpts_config = kzalloc(sizeof(struct devpts_config), GFP_KERNEL);
+	if (ve->devpts_config == NULL)
+		goto out;
+
+	ve->devpts_config->mode = 0600;
+	err = register_ve_fs_type(ve, &devpts_fs_type,
+			&ve->devpts_fstype, &ve->devpts_mnt);
+	if (err) {
+		kfree(ve->devpts_config);
+		ve->devpts_config = NULL;
+	}
+out:
+	return err;
+}
+
+static void fini_ve_devpts(struct ve_struct *ve)
+{
+	unregister_ve_fs_type(ve->devpts_fstype, ve->devpts_mnt);
+	/* devpts_fstype is freed in real_put_ve -> free_ve_filesystems */
+	ve->devpts_mnt = NULL;
+	kfree(ve->devpts_config);
+	ve->devpts_config = NULL;
+}
+#else
+#define init_ve_devpts(ve)	(0)
+#define fini_ve_devpts(ve)	do { } while (0)
+#endif
+
+static int init_ve_shmem(struct ve_struct *ve)
+{
+	return register_ve_fs_type(ve,
+				   &tmpfs_fs_type,
+				   &ve->shmem_fstype,
+				   &ve->shmem_mnt);
+}
+
+static void fini_ve_shmem(struct ve_struct *ve)
+{
+	unregister_ve_fs_type(ve->shmem_fstype, ve->shmem_mnt);
+	/* shmem_fstype is freed in real_put_ve -> free_ve_filesystems */
+	ve->shmem_mnt = NULL;
+}
+
+static inline int init_ve_sysfs_root(struct ve_struct *ve)
+{
+	struct sysfs_dirent *sysfs_root;
+
+	sysfs_root = kmem_cache_alloc(sysfs_dir_cachep, GFP_KERNEL);
+	if (sysfs_root == NULL)
+		return -ENOMEM;
+
+	memset(sysfs_root, 0, sizeof(*sysfs_root));
+	atomic_set(&sysfs_root->s_count, 1);
+	INIT_LIST_HEAD(&sysfs_root->s_sibling);
+	INIT_LIST_HEAD(&sysfs_root->s_children);
+	sysfs_root->s_element = NULL;
+	sysfs_root->s_type = SYSFS_ROOT;
+	sysfs_root->s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+	ve->sysfs_root = sysfs_root;
+	return 0;
+}
+
+static const struct {
+	unsigned	minor;
+	char		*name;
+} mem_class_devices [] = {
+	{3, "null"},
+	{5, "zero"},
+	{7, "full"},
+	{8, "random"},
+	{9, "urandom"},
+	{0, NULL},
+};
+
+static struct class *init_ve_mem_class(void)
+{
+	int i;
+	struct class *ve_mem_class;
+
+	ve_mem_class = class_create(THIS_MODULE, "mem");
+	if (IS_ERR(ve_mem_class))
+		return ve_mem_class;
+	for (i = 0; mem_class_devices[i].name; i++)
+		class_device_create(ve_mem_class, NULL,
+				MKDEV(MEM_MAJOR, mem_class_devices[i].minor),
+				NULL, mem_class_devices[i].name);
+	return ve_mem_class;
+}
+
+
+void fini_ve_mem_class(struct class *ve_mem_class)
+{
+	int i;
+
+	for (i = 0; mem_class_devices[i].name; i++)
+		class_device_destroy(ve_mem_class,
+				MKDEV(MEM_MAJOR, mem_class_devices[i].minor));
+	class_destroy(ve_mem_class);
+}
+
+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
+extern struct class_device_attribute ve_net_class_attributes[];
+static inline int init_ve_netclass(struct ve_struct *ve)
+{
+	struct class *nc;
+	int err;
+
+	nc = kzalloc(sizeof(*nc), GFP_KERNEL);
+	if (!nc)
+		return -ENOMEM;
+
+	nc->name = net_class.name;
+	nc->release = net_class.release;
+	nc->uevent = net_class.uevent;
+	nc->class_dev_attrs = ve_net_class_attributes;
+
+	err = class_register(nc);
+	if (!err) {
+		ve->net_class = nc;
+		return 0;
+	}
+	kfree(nc);	
+	return err;
+}
+
+static inline void fini_ve_netclass(struct ve_struct *ve)
+{
+	class_unregister(ve->net_class);
+	kfree(ve->net_class);
+	ve->net_class = NULL;
+}
+#else
+static inline int init_ve_netclass(struct ve_struct *ve) { return 0; }
+static inline void fini_ve_netclass(struct ve_struct *ve) { ; }
+#endif
+
+static int init_ve_sysfs(struct ve_struct *ve)
+{
+	struct subsystem *subsys;
+	int err;
+
+#ifdef CONFIG_SYSFS
+	err = 0;
+	if (ve->features & VE_FEATURE_SYSFS) {
+		err = init_ve_sysfs_root(ve);
+		if (err != 0)
+			goto out;
+		err = register_ve_fs_type(ve,
+				   &sysfs_fs_type,
+				   &ve->sysfs_fstype,
+				   &ve->sysfs_mnt);
+	}
+	if (err != 0)
+		goto out_fs_type;
+#endif
+	err = -ENOMEM;
+	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
+	if (subsys == NULL)
+		goto out_class_obj;
+	/* ick, this is ugly, the things we go through to keep from showing up
+	 * in sysfs... */
+	memcpy(&subsys->kset.kobj.name, &class_obj_subsys.kset.kobj.name,
+			sizeof(subsys->kset.kobj.name));
+	subsys->kset.ktype = class_obj_subsys.kset.ktype;
+	subsys->kset.uevent_ops = class_obj_subsys.kset.uevent_ops;
+	subsystem_init(subsys);
+	if (!subsys->kset.subsys)
+			subsys->kset.subsys = subsys;
+	ve->class_obj_subsys = subsys;
+
+	err = -ENOMEM;
+	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
+	if (subsys == NULL)
+		goto out_class_subsys;
+	/* ick, this is ugly, the things we go through to keep from showing up
+	 * in sysfs... */
+	memcpy(&subsys->kset.kobj.name, &class_subsys.kset.kobj.name,
+			sizeof(subsys->kset.kobj.name));
+	subsys->kset.ktype = class_subsys.kset.ktype;
+	subsys->kset.uevent_ops = class_subsys.kset.uevent_ops;
+	ve->class_subsys = subsys;
+	err = subsystem_register(subsys);
+	if (err != 0)
+		goto out_register;
+
+	err = init_ve_netclass(ve);
+	if (err)
+		goto out_nc;
+
+	ve->tty_class = init_ve_tty_class();
+	if (IS_ERR(ve->tty_class)) {
+		err = PTR_ERR(ve->tty_class);
+		ve->tty_class = NULL;
+		goto out_tty_class_register;
+	}
+
+	ve->mem_class = init_ve_mem_class();
+	if (IS_ERR(ve->mem_class)) {
+		err = PTR_ERR(ve->mem_class);
+		ve->mem_class = NULL;
+		goto out_mem_class_register;
+	}
+
+	return err;
+
+out_mem_class_register:
+	fini_ve_tty_class(ve->tty_class);
+out_tty_class_register:
+	fini_ve_netclass(ve);
+out_nc:
+	subsystem_unregister(subsys);
+out_register:
+	kfree(ve->class_subsys);
+out_class_subsys:
+	kfree(ve->class_obj_subsys);
+out_class_obj:
+#ifdef CONFIG_SYSFS
+	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
+	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
+out_fs_type:
+	sysfs_put(ve->sysfs_root);
+	ve->sysfs_root = NULL;
+#endif
+	ve->class_subsys = NULL;
+	ve->class_obj_subsys = NULL;
+out:
+	return err;
+}
+
+static void fini_ve_sysfs(struct ve_struct *ve)
+{
+	fini_ve_mem_class(ve->mem_class);
+	fini_ve_tty_class(ve->tty_class);
+	fini_ve_netclass(ve);
+	subsystem_unregister(ve->class_subsys);
+	kfree(ve->class_subsys);
+	kfree(ve->class_obj_subsys);
+
+	ve->class_subsys = NULL;
+	ve->class_obj_subsys = NULL;
+#ifdef CONFIG_SYSFS
+	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
+	ve->sysfs_mnt = NULL;
+	if (ve->sysfs_root) {
+		sysfs_put(ve->sysfs_root);
+		ve->sysfs_root = NULL;
+	}
+	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
+#endif
+}
+
+static void free_ve_filesystems(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSFS
+	kfree(ve->sysfs_fstype);
+	ve->sysfs_fstype = NULL;
+#endif
+	kfree(ve->shmem_fstype);
+	ve->shmem_fstype = NULL;
+
+	kfree(ve->devpts_fstype);
+	ve->devpts_fstype = NULL;
+
+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
+	kfree(ve->fuse_fs_type);
+	ve->fuse_fs_type = NULL;
+
+	kfree(ve->fuse_ctl_fs_type);
+	ve->fuse_ctl_fs_type = NULL;
+#endif
+
+#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
+	kfree(ve->bm_fs_type);
+	ve->bm_fs_type = NULL;
+#endif
+
+	free_ve_proc(ve);
+}
+
+static int init_printk(struct ve_struct *ve)
+{
+	struct ve_prep_printk {
+		wait_queue_head_t       log_wait;
+		unsigned long           log_start;
+		unsigned long           log_end;
+		unsigned long           logged_chars;
+	} *tmp;
+
+	tmp = kzalloc(sizeof(struct ve_prep_printk), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	init_waitqueue_head(&tmp->log_wait);
+	ve->_log_wait = &tmp->log_wait;
+	ve->_log_start = &tmp->log_start;
+	ve->_log_end = &tmp->log_end;
+	ve->_logged_chars = &tmp->logged_chars;
+	/* ve->log_buf will be initialized later by ve_log_init() */
+	return 0;
+}
+
+static void fini_printk(struct ve_struct *ve)
+{
+	/* 
+	 * there is no spinlock protection here because nobody can use
+	 * log_buf at the moments when this code is called. 
+	 */
+	kfree(ve->log_buf);
+	kfree(ve->_log_wait);
+}
+
+static void fini_venet(struct ve_struct *ve)
+{
+#ifdef CONFIG_INET
+	tcp_v4_kill_ve_sockets(ve);
+	ve_mapped_devs_cleanup(ve);
+	synchronize_net();
+#endif
+}
+
+static int init_ve_sched(struct ve_struct *ve, unsigned int vcpus)
+{
+	int err;
+
+	err = fairsched_new_node(ve->veid, vcpus);
+	if (err == 0)
+		ve_sched_attach(ve);
+
+	return err;
+}
+
+static void fini_ve_sched(struct ve_struct *ve)
+{
+	fairsched_drop_node(ve->veid);
+}
+
+/*
+ * Pidmap
+ */
+
+static int init_ve_pidmap(struct ve_struct *ve)
+{
+	int i;
+
+	ve->vpid_max = VPID_MAX_DEFAULT;
+
+	for (i = 0; i < PIDMAP_ENTRIES; i++)
+		atomic_set(&ve->ve_pidmap[i].nr_free, PIDMAP_NRFREE);
+
+	return 0;
+}
+
+static void fini_ve_pidmap(struct ve_struct *ve)
+{
+	int i;
+
+	for (i = 0; i < PIDMAP_ENTRIES; i++) {
+		if (ve->ve_pidmap[i].page == NULL)
+			continue;
+
+		free_page((unsigned long)ve->ve_pidmap[i].page);
+		ve->ve_pidmap[i].page = NULL;
+	}
+}
+
+/*
+ * Namespaces
+ */
+
+static inline int init_ve_namespaces(struct ve_struct *ve,
+		struct nsproxy **old)
+{
+	int err;
+	struct task_struct *tsk;
+	struct nsproxy *cur;
+
+	tsk = current;
+	cur = get_nsproxy(tsk->nsproxy);
+
+	err = copy_namespaces(CLONE_NAMESPACES_MASK, tsk);
+	if (err < 0) {
+		put_nsproxy(cur);
+		return err;
+	}
+
+	ve->ve_ns = get_nsproxy(tsk->nsproxy);
+	memcpy(ve->ve_ns->uts_ns->name.release, virt_utsname.release,
+			sizeof(virt_utsname.release));
+	*old = cur;
+	return 0;
+}
+
+static inline void fini_ve_namespaces(struct ve_struct *ve,
+		struct nsproxy *old)
+{
+	struct task_struct *tsk;
+	struct nsproxy *cur;
+
+	if (old) {
+		tsk = current;
+		cur = tsk->nsproxy;
+		tsk->nsproxy = old;
+		put_nsproxy(cur);
+	}
+
+	if (ve->ve_ns->ipc_ns)
+		shm_clean_ns(ve->ve_ns->ipc_ns);
+
+	put_nsproxy(ve->ve_ns);
+	ve->ve_ns = NULL;
+}
+
+static inline void switch_ve_namespaces(struct ve_struct *ve,
+		struct task_struct *tsk)
+{
+	struct nsproxy *old_ns;
+	struct nsproxy *new_ns;
+
+	BUG_ON(tsk != current);
+	old_ns = tsk->nsproxy;
+	new_ns = ve->ve_ns;
+
+	if (old_ns != new_ns) {
+		tsk->nsproxy = get_nsproxy(new_ns);
+		put_nsproxy(old_ns);
+	}
+}
+
+static __u64 get_ve_features(env_create_param_t *data, int datalen)
+{
+	__u64 known_features;
+
+	if (datalen < sizeof(struct env_create_param3))
+		/* this version of vzctl is aware of VE_FEATURES_OLD only */
+		known_features = VE_FEATURES_OLD;
+	else
+		known_features = data->known_features;
+
+	/*
+	 * known features are set as required
+	 * yet unknown features are set as in VE_FEATURES_DEF
+	 */
+	return (data->feature_mask & known_features) |
+		(VE_FEATURES_DEF & ~known_features);
+}
+
+static int init_ve_struct(struct ve_struct *ve, envid_t veid,
+		u32 class_id, env_create_param_t *data, int datalen,
+		struct task_struct *init_tsk)
+{
+	int n;
+
+	(void)get_ve(ve);
+	ve->veid = veid;
+	ve->class_id = class_id;
+	ve->init_entry = init_tsk;
+	ve->features = get_ve_features(data, datalen);
+	INIT_LIST_HEAD(&ve->vetask_lh);
+	init_rwsem(&ve->op_sem);
+#ifdef CONFIG_NET
+	ve->ifindex = -1;
+#endif
+
+	for(n = 0; n < UIDHASH_SZ_VE; ++n)
+		INIT_LIST_HEAD(&ve->uidhash_table[n]);
+
+	ve->start_timespec = ve->init_entry->start_time;
+	/* The value is wrong, but it is never compared to process
+	 * start times */
+	ve->start_jiffies = get_jiffies_64();
+	ve->start_cycles = get_cycles();
+	ve->virt_pids = glob_virt_pids;
+	INIT_LIST_HEAD(&ve->vetask_auxlist);
+
+	ve->_randomize_va_space = ve0._randomize_va_space;
+	ve->ipv6_ops = ve_ipv6_ops_get();
+
+	atomic_set(&ve->mnt_nr, 0);
+	atomic_set(&ve->ifa_nr, 0);
+
+	return 0;
+}
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * /proc/meminfo virtualization
+ *
+ **********************************************************************
+ **********************************************************************/
+static int ve_set_meminfo(envid_t veid, unsigned long val)
+{
+#ifdef CONFIG_USER_RESOURCE
+	struct ve_struct *ve;
+
+	ve = get_ve_by_id(veid);
+	if (!ve)
+		return -EINVAL;
+
+	if (val == 0)
+		val = VE_MEMINFO_SYSTEM;
+	else if (val == 1)
+		val = VE_MEMINFO_DEFAULT;
+	ve->meminfo_val = val;
+	real_put_ve(ve);
+	return 0;
+#else
+	return -ENOTTY;
+#endif
+}
+
+static int init_ve_meminfo(struct ve_struct *ve)
+{
+	ve->meminfo_val = VE_MEMINFO_DEFAULT;
+	return 0;
+}
+
+static inline void fini_ve_meminfo(struct ve_struct *ve)
+{
+}
+
+static void set_ve_root(struct ve_struct *ve, struct task_struct *tsk)
+{
+	read_lock(&tsk->fs->lock);
+	ve->fs_rootmnt = tsk->fs->rootmnt;
+	ve->fs_root = tsk->fs->root;
+	read_unlock(&tsk->fs->lock);
+	mark_tree_virtual(ve->fs_rootmnt, ve->fs_root);
+}
+
+static void set_ve_caps(struct ve_struct *ve, struct task_struct *tsk)
+{
+	/* required for real_setdevperms from register_ve_<fs> above */
+	memcpy(&ve->ve_cap_bset, &tsk->cap_effective, sizeof(kernel_cap_t));
+}
+
+static int ve_list_add(struct ve_struct *ve)
+{
+	write_lock_irq(&ve_list_lock);
+	if (__find_ve_by_id(ve->veid) != NULL)
+		goto err_exists;
+
+	list_add(&ve->ve_list, &ve_list_head);
+	nr_ve++;
+	write_unlock_irq(&ve_list_lock);
+	return 0;
+
+err_exists:
+	write_unlock_irq(&ve_list_lock);
+	return -EEXIST;
+}
+
+static void ve_list_del(struct ve_struct *ve)
+{
+	write_lock_irq(&ve_list_lock);
+	list_del(&ve->ve_list);
+	nr_ve--;
+	write_unlock_irq(&ve_list_lock);
+}
+
+static void set_task_ve_caps(struct task_struct *tsk, struct ve_struct *ve)
+{
+	spin_lock(&task_capability_lock);
+	cap_mask(tsk->cap_effective, ve->ve_cap_bset);
+	cap_mask(tsk->cap_inheritable, ve->ve_cap_bset);
+	cap_mask(tsk->cap_permitted, ve->ve_cap_bset);
+	spin_unlock(&task_capability_lock);
+}
+
+static void __ve_move_task(struct task_struct *tsk, struct ve_struct *new)
+{
+	struct ve_struct *old;
+
+	might_sleep();
+	BUG_ON(tsk != current);
+	BUG_ON(!(thread_group_leader(tsk) && thread_group_empty(tsk)));
+
+	/* this probihibts ptracing of task entered to VE from host system */
+	tsk->mm->vps_dumpable = 0;
+	/* setup capabilities before enter */
+	set_task_ve_caps(tsk, new);
+
+	/* Drop OOM protection. */
+	if (tsk->oomkilladj == OOM_DISABLE)
+		tsk->oomkilladj = 0;
+
+	old = tsk->ve_task_info.owner_env;
+	tsk->ve_task_info.owner_env = new;
+	tsk->ve_task_info.exec_env = new;
+
+	write_lock_irq(&tasklist_lock);
+	list_del_rcu(&tsk->ve_task_info.vetask_list);
+	write_unlock_irq(&tasklist_lock);
+
+	synchronize_rcu();
+
+	write_lock_irq(&tasklist_lock);
+	list_add_tail_rcu(&tsk->ve_task_info.vetask_list,
+			&new->vetask_lh);
+	list_del(&tsk->ve_task_info.aux_list);
+	list_add_tail(&tsk->ve_task_info.aux_list,
+			&new->vetask_auxlist);
+	write_unlock_irq(&tasklist_lock);
+
+	atomic_dec(&old->pcounter);
+	real_put_ve(old);
+
+	atomic_inc(&new->pcounter);
+	get_ve(new);
+}
+
+void ve_move_task(struct task_struct *tsk, struct ve_struct *new)
+{
+	__ve_move_task(tsk, new);
+
+	/* Check that the process is not a leader of non-empty group/session.
+	 * If it is, we cannot virtualize its PID. Do not fail, just leave
+	 * it non-virtual.
+	 */
+	if (!is_virtual_pid(virt_pid(tsk)) && alone_in_pgrp(tsk)) {
+		pid_t vpid = alloc_vpid(tsk->pids[PIDTYPE_PID].pid, -1);
+		if (vpid > 0) {
+			set_virt_pid(tsk, vpid);
+			set_virt_tgid(tsk, vpid);
+			if (tsk->signal->pgrp == tsk->pid)
+				set_virt_pgid(tsk, vpid);
+			if (tsk->signal->session == tsk->pid)
+				set_virt_sid(tsk, vpid);
+		}
+	}
+}
+
+EXPORT_SYMBOL(ve_move_task);
+
+#ifdef CONFIG_VE_IPTABLES
+extern int init_netfilter(void);
+extern void fini_netfilter(void);
+#define init_ve_netfilter()	init_netfilter()
+#define fini_ve_netfilter()	fini_netfilter()
+
+#define KSYMIPTINIT(mask, ve, full_mask, mod, name, args)	\
+({								\
+	int ret = 0;						\
+	if (VE_IPT_CMP(mask, full_mask) &&			\
+		VE_IPT_CMP((ve)->_iptables_modules, 		\
+			full_mask & ~(full_mask##_MOD))) {	\
+		ret = KSYMERRCALL(1, mod, name, args);		\
+		if (ret == 0)					\
+			(ve)->_iptables_modules |=		\
+					full_mask##_MOD;	\
+		if (ret == 1)					\
+			ret = 0;				\
+	}							\
+	ret;							\
+})
+
+#define KSYMIPTFINI(mask, full_mask, mod, name, args)		\
+({								\
+ 	if (VE_IPT_CMP(mask, full_mask##_MOD))			\
+		KSYMSAFECALL_VOID(mod, name, args);		\
+})
+
+
+static int do_ve_iptables(struct ve_struct *ve, __u64 init_mask,
+		int init_or_cleanup)
+{
+	int err;
+
+	/* Remove when userspace will start supplying IPv6-related bits. */
+	init_mask &= ~VE_IP_IPTABLES6;
+	init_mask &= ~VE_IP_FILTER6;
+	init_mask &= ~VE_IP_MANGLE6;
+	init_mask &= ~VE_IP_IPTABLE_NAT_MOD;
+	if ((init_mask & VE_IP_IPTABLES) == VE_IP_IPTABLES)
+		init_mask |= VE_IP_IPTABLES6;
+	if ((init_mask & VE_IP_FILTER) == VE_IP_FILTER)
+		init_mask |= VE_IP_FILTER6;
+	if ((init_mask & VE_IP_MANGLE) == VE_IP_MANGLE)
+		init_mask |= VE_IP_MANGLE6;
+	if ((init_mask & VE_IP_NAT) == VE_IP_NAT)
+		init_mask |= VE_IP_IPTABLE_NAT;
+
+	err = 0;
+	if (!init_or_cleanup)
+		goto cleanup;
+
+	/* init part */
+#if defined(CONFIG_IP_NF_IPTABLES) || \
+    defined(CONFIG_IP_NF_IPTABLES_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
+			ip_tables, init_iptables, ());
+	if (err < 0)
+		goto err_iptables;
+#endif
+#if defined(CONFIG_IP6_NF_IPTABLES) || \
+    defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES6,
+			ip6_tables, init_ip6tables, ());
+	if (err < 0)
+		goto err_ip6tables;
+#endif
+#if defined(CONFIG_IP_NF_CONNTRACK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK,
+			ip_conntrack, init_iptable_conntrack, ());
+	if (err < 0)
+		goto err_iptable_conntrack;
+#endif
+#if defined(CONFIG_IP_NF_FTP) || \
+    defined(CONFIG_IP_NF_FTP_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_FTP,
+			ip_conntrack_ftp, init_ip_ct_ftp, ());
+	if (err < 0)
+		goto err_iptable_ftp;
+#endif
+#if defined(CONFIG_IP_NF_IRC) || \
+    defined(CONFIG_IP_NF_IRC_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_IRC,
+			ip_conntrack_irc, init_ip_ct_irc, ());
+	if (err < 0)
+		goto err_iptable_irc;
+#endif
+#if defined(CONFIG_IP_NF_NAT) || \
+    defined(CONFIG_IP_NF_NAT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
+			ip_nat, ip_nat_init, ());
+	if (err < 0)
+		goto err_iptable_nat;
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLE_NAT,
+			iptable_nat, init_iptable_nat, ());
+	if (err < 0)
+		goto err_iptable_nat2;
+#endif
+#if defined(CONFIG_IP_NF_NAT_FTP) || \
+    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_FTP,
+			ip_nat_ftp, init_iptable_nat_ftp, ());
+	if (err < 0)
+		goto err_iptable_nat_ftp;
+#endif
+#if defined(CONFIG_IP_NF_NAT_IRC) || \
+    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_IRC,
+			ip_nat_irc, init_iptable_nat_irc, ());
+	if (err < 0)
+		goto err_iptable_nat_irc;
+#endif
+#if defined(CONFIG_IP_NF_FILTER) || \
+    defined(CONFIG_IP_NF_FILTER_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER,
+			iptable_filter,	init_iptable_filter, ());
+	if (err < 0)
+		goto err_iptable_filter;
+#endif
+#if defined(CONFIG_IP6_NF_FILTER) || \
+    defined(CONFIG_IP6_NF_FILTER_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER6,
+			ip6table_filter, init_ip6table_filter, ());
+	if (err < 0)
+		goto err_ip6table_filter;
+#endif
+#if defined(CONFIG_IP_NF_MANGLE) || \
+    defined(CONFIG_IP_NF_MANGLE_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE,
+			iptable_mangle,	init_iptable_mangle, ());
+	if (err < 0)
+		goto err_iptable_mangle;
+#endif
+#if defined(CONFIG_IP6_NF_MANGLE) || \
+    defined(CONFIG_IP6_NF_MANGLE_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE6,
+			ip6table_mangle, init_ip6table_mangle, ());
+	if (err < 0)
+		goto err_ip6table_mangle;
+#endif
+	return 0;
+
+/* ------------------------------------------------------------------------- */
+
+cleanup:
+#if defined(CONFIG_IP6_NF_MANGLE) || \
+    defined(CONFIG_IP6_NF_MANGLE_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE6,
+			ip6table_mangle, fini_ip6table_mangle, ());
+err_ip6table_mangle:
+#endif
+#if defined(CONFIG_IP_NF_MANGLE) || \
+    defined(CONFIG_IP_NF_MANGLE_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE,
+			iptable_mangle,	fini_iptable_mangle, ());
+err_iptable_mangle:
+#endif
+#if defined(CONFIG_IP6_NF_FILTER) || \
+    defined(CONFIG_IP6_NF_FILTER_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER6,
+			ip6table_filter, fini_ip6table_filter, ());
+err_ip6table_filter:
+#endif
+#if defined(CONFIG_IP_NF_FILTER) || \
+    defined(CONFIG_IP_NF_FILTER_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER,
+			iptable_filter,	fini_iptable_filter, ());
+err_iptable_filter:
+#endif
+#if defined(CONFIG_IP_NF_NAT_IRC) || \
+    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_IRC,
+			ip_nat_irc, fini_iptable_nat_irc, ());
+err_iptable_nat_irc:
+#endif
+#if defined(CONFIG_IP_NF_NAT_FTP) || \
+    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_FTP,
+			ip_nat_ftp, fini_iptable_nat_ftp, ());
+err_iptable_nat_ftp:
+#endif
+#if defined(CONFIG_IP_NF_NAT) || \
+    defined(CONFIG_IP_NF_NAT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLE_NAT,
+			iptable_nat, fini_iptable_nat, ());
+err_iptable_nat2:
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
+			ip_nat, ip_nat_cleanup, ());
+err_iptable_nat:
+#endif
+#if defined(CONFIG_IP_NF_IRC) || \
+    defined(CONFIG_IP_NF_IRC_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_IRC,
+			ip_conntrack_irc, fini_ip_ct_irc, ());
+err_iptable_irc:
+#endif
+#if defined(CONFIG_IP_NF_FTP) || \
+    defined(CONFIG_IP_NF_FTP_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_FTP,
+			ip_conntrack_ftp, fini_ip_ct_ftp, ());
+err_iptable_ftp:
+#endif
+#if defined(CONFIG_IP_NF_CONNTRACK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK,
+			ip_conntrack, fini_iptable_conntrack, ());
+err_iptable_conntrack:
+#endif
+#if defined(CONFIG_IP6_NF_IPTABLES) || \
+    defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES6,
+			ip6_tables, fini_ip6tables, ());
+err_ip6tables:
+#endif
+#if defined(CONFIG_IP_NF_IPTABLES) || \
+    defined(CONFIG_IP_NF_IPTABLES_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
+			ip_tables, fini_iptables, ());
+err_iptables:
+#endif
+	ve->_iptables_modules = 0;
+
+	return err;
+}
+
+static inline int init_ve_iptables(struct ve_struct *ve, __u64 init_mask)
+{
+	return do_ve_iptables(ve, init_mask, 1);
+}
+
+static inline void fini_ve_iptables(struct ve_struct *ve, __u64 init_mask)
+{
+	(void)do_ve_iptables(ve, init_mask, 0);
+}
+
+#else
+#define init_ve_iptables(x, y)	(0)
+#define fini_ve_iptables(x, y)	do { } while (0)
+#define init_ve_netfilter()	(0)
+#define fini_ve_netfilter()	do { } while (0)
+#endif
+
+static inline int init_ve_cpustats(struct ve_struct *ve)
+{
+	ve->cpu_stats = alloc_percpu(struct ve_cpu_stats);
+	if (ve->cpu_stats == NULL)
+		return -ENOMEM;
+	ve->sched_lat_ve.cur = alloc_percpu(struct kstat_lat_pcpu_snap_struct);
+	if (ve == NULL)
+		goto fail;
+	return 0;
+
+fail:
+	free_percpu(ve->cpu_stats);
+	return -ENOMEM;
+}
+
+static inline void free_ve_cpustats(struct ve_struct *ve)
+{
+	free_percpu(ve->cpu_stats);
+	ve->cpu_stats = NULL;
+	free_percpu(ve->sched_lat_ve.cur);
+	ve->sched_lat_ve.cur = NULL;
+}
+
+static int alone_in_pgrp(struct task_struct *tsk)
+{
+	struct task_struct *p;
+	int alone = 0;
+
+	read_lock(&tasklist_lock);
+	do_each_task_pid_all(tsk->pid, PIDTYPE_PGID, p) {
+		if (p != tsk)
+			goto out;
+	} while_each_task_pid_all(tsk->pid, PIDTYPE_PGID, p);
+	do_each_task_pid_all(tsk->pid, PIDTYPE_SID, p) {
+		if (p != tsk)
+			goto out;
+	} while_each_task_pid_all(tsk->pid, PIDTYPE_SID, p);
+	alone = 1;
+out:
+	read_unlock(&tasklist_lock);
+	return alone;
+}
+
+static int do_env_create(envid_t veid, unsigned int flags, u32 class_id,
+			 env_create_param_t *data, int datalen)
+{
+	struct task_struct *tsk;
+	struct ve_struct *old;
+	struct ve_struct *old_exec;
+	struct ve_struct *ve;
+ 	__u64 init_mask;
+	int err;
+	struct nsproxy *old_ns;
+
+	tsk = current;
+	old = VE_TASK_INFO(tsk)->owner_env;
+
+	if (!thread_group_leader(tsk) || !thread_group_empty(tsk))
+		return -EINVAL;
+
+	if (tsk->signal->tty) {
+		printk("ERR: CT init has controlling terminal\n");
+		return -EINVAL;
+	}
+	if (tsk->signal->pgrp != tsk->pid ||
+	    tsk->signal->session != tsk->pid) {
+		int may_setsid;
+
+		read_lock(&tasklist_lock);
+		may_setsid = !tsk->signal->leader &&
+			!find_task_by_pid_type_all(PIDTYPE_PGID, tsk->pid);
+		read_unlock(&tasklist_lock);
+
+		if (!may_setsid) {
+			printk("ERR: CT init is process group leader\n");
+			return -EINVAL;
+		}
+	}
+	/* Check that the process is not a leader of non-empty group/session.
+	 * If it is, we cannot virtualize its PID and must fail. */
+	if (!alone_in_pgrp(tsk)) {
+		printk("ERR: CT init is not alone in process group\n");
+		return -EINVAL;
+	}
+
+
+	VZTRACE("%s: veid=%d classid=%d pid=%d\n",
+		__FUNCTION__, veid, class_id, current->pid);
+
+	err = -ENOMEM;
+	ve = kzalloc(sizeof(struct ve_struct), GFP_KERNEL);
+	if (ve == NULL)
+		goto err_struct;
+
+	ve->uidhash_table =
+		kcalloc(UIDHASH_SZ_VE, sizeof(*ve->uidhash_table), GFP_KERNEL);
+	if (ve->uidhash_table == NULL)
+		goto err_uidhash_table;
+
+	init_ve_struct(ve, veid, class_id, data, datalen, tsk);
+	__module_get(THIS_MODULE);
+	down_write(&ve->op_sem);
+	if (flags & VE_LOCK)
+		ve->is_locked = 1;
+
+	/*
+	 * this should be done before adding to list
+	 * because if calc_load_ve finds this ve in
+	 * list it will be very surprised
+	 */
+	if ((err = init_ve_cpustats(ve)) < 0)
+		goto err_cpu_stats;
+
+	if ((err = ve_list_add(ve)) < 0)
+		goto err_exist;
+
+	/* this should be done before context switching */
+	if ((err = init_printk(ve)) < 0)
+		goto err_log_wait;
+
+	old_exec = set_exec_env(ve);
+
+	if ((err = init_ve_pidmap(ve)) < 0)
+		goto err_pidmap;
+
+	if ((err = init_ve_sched(ve, data->total_vcpus)) < 0)
+		goto err_sched;
+
+	/* move user to VE */
+	if ((err = set_user(0, 0)) < 0)
+		goto err_set_user;
+
+	set_ve_root(ve, tsk);
+
+	if ((err = init_ve_namespaces(ve, &old_ns)))
+		goto err_ns;
+
+	if ((err = init_ve_mibs(ve)))
+		goto err_mibs;
+
+	if ((err = init_ve_proc(ve)))
+		goto err_proc;
+
+	if ((err = init_ve_sysctl(ve)))
+		goto err_sysctl;
+
+	if ((err = init_ve_sysfs(ve)))
+		goto err_sysfs;
+
+	if ((err = ve_arp_init(ve)) < 0)
+		goto err_route;
+
+	if (ve->ipv6_ops) {
+		if ((err = ve->ipv6_ops->ndisc_init(ve)) < 0)
+			goto err_route;
+	}
+
+#ifdef CONFIG_FIB_RULES
+	INIT_LIST_HEAD(&ve->_rules_ops);
+#endif
+
+	if ((err = init_ve_route(ve)) < 0)
+		goto err_route;
+
+	if (ve->ipv6_ops) {
+		if ((err = ve->ipv6_ops->route_init(ve)) < 0)
+			goto err_route6;
+	}
+
+	if ((err = init_ve_netdev()))
+		goto err_dev;
+
+	if ((err = init_ve_tty_drivers(ve)) < 0)
+		goto err_tty;
+
+	if ((err = init_ve_shmem(ve)))
+		goto err_shmem;
+
+	if ((err = init_ve_devpts(ve)))
+		goto err_devpts;
+
+	if((err = init_ve_meminfo(ve)))
+		goto err_meminf;
+
+	set_ve_caps(ve, tsk);
+
+	/* It is safe to initialize netfilter here as routing initialization and
+	   interface setup will be done below. This means that NO skb can be
+	   passed inside. Den */
+	/* iptables ve initialization for non ve0;
+	   ve0 init is in module_init */
+	if ((err = init_ve_netfilter()) < 0)
+		goto err_netfilter;
+
+	init_mask = data ? data->iptables_mask : VE_IP_DEFAULT;
+	if ((err = init_ve_iptables(ve, init_mask)) < 0)
+		goto err_iptables;
+
+	if ((err = alloc_vpid(tsk->pids[PIDTYPE_PID].pid, 1)) < 0)
+		goto err_vpid;
+
+	if ((err = ve_hook_iterate_init(VE_SS_CHAIN, ve)) < 0)
+		goto err_ve_hook;
+
+	put_nsproxy(old_ns);
+
+	/* finally: set vpids and move inside */
+	__ve_move_task(tsk, ve);
+
+	set_virt_pid(tsk, 1);
+	set_virt_tgid(tsk, 1);
+
+	set_special_pids(tsk->pid, tsk->pid);
+	current->signal->tty_old_pgrp = 0;
+	set_virt_pgid(tsk, 1);
+	set_virt_sid(tsk, 1);
+	grsecurity_setup();
+
+	ve->is_running = 1;
+	up_write(&ve->op_sem);
+
+	printk(KERN_INFO "CT: %d: started\n", veid);
+	return veid;
+
+err_ve_hook:
+	free_vpid(tsk->pids[PIDTYPE_PID].pid);
+err_vpid:
+	fini_venet(ve);
+	fini_ve_iptables(ve, init_mask);
+err_iptables:
+	fini_ve_netfilter();
+err_netfilter:
+	fini_ve_meminfo(ve);
+err_meminf:
+	fini_ve_devpts(ve);
+err_devpts:
+	fini_ve_shmem(ve);
+err_shmem:
+	fini_ve_tty_drivers(ve);
+err_tty:
+	fini_ve_netdev();
+err_dev:
+	if (ve->ipv6_ops)
+		ve->ipv6_ops->route_fini(ve);
+err_route6:
+	fini_ve_route(ve);
+err_route:
+	if (ve->ipv6_ops)
+		ve->ipv6_ops->ndisc_fini(ve);
+	ve_arp_fini(ve);
+	fini_ve_sysfs(ve);
+err_sysfs:
+	fini_ve_sysctl(ve);
+err_sysctl:
+	fini_ve_proc(ve);
+err_proc:
+	clean_device_perms_ve(ve->veid);
+	fini_ve_mibs(ve);
+err_mibs:
+	/* free_ve_utsname() is called inside real_put_ve() */ ;
+	fini_ve_namespaces(ve, old_ns);
+err_ns:
+	/* It is safe to restore current->envid here because
+	 * ve_fairsched_detach does not use current->envid. */
+	/* Really fairsched code uses current->envid in sys_fairsched_mknod 
+	 * only.  It is correct if sys_fairsched_mknod is called from
+	 * userspace.  If sys_fairsched_mknod is called from
+	 * ve_fairsched_attach, then node->envid and node->parent_node->envid
+	 * are explicitly set to valid value after the call. */
+	/* FIXME */
+	VE_TASK_INFO(tsk)->owner_env = old;
+	VE_TASK_INFO(tsk)->exec_env = old_exec;
+	/* move user back */
+	if (set_user(0, 0) < 0)
+		printk(KERN_WARNING"Can't restore UID\n");
+
+err_set_user:
+	fini_ve_sched(ve);
+err_sched:
+	fini_ve_pidmap(ve);
+err_pidmap:
+	(void)set_exec_env(old_exec);
+
+	/* we can jump here having incorrect envid */
+	VE_TASK_INFO(tsk)->owner_env = old;
+	fini_printk(ve);
+err_log_wait:
+	/* cpustats will be freed in do_env_free */
+	ve_list_del(ve);
+	up_write(&ve->op_sem);
+
+	real_put_ve(ve);
+err_struct:
+	printk(KERN_INFO "CT: %d: failed to start with err=%d\n", veid, err);
+	return err;
+
+err_exist:
+	free_ve_cpustats(ve);
+err_cpu_stats:
+	kfree(ve->uidhash_table);
+err_uidhash_table:
+	kfree(ve);
+	goto err_struct;
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE start/stop callbacks
+ *
+ **********************************************************************
+ **********************************************************************/
+
+int real_env_create(envid_t veid, unsigned flags, u32 class_id,
+			env_create_param_t *data, int datalen)
+{
+	int status;
+	struct ve_struct *ve;
+
+	if (!flags) {
+		status = get_exec_env()->veid;
+		goto out;
+	}
+
+	status = -EPERM;
+	if (!capable_setveid())
+		goto out;
+
+	status = -EINVAL;
+	if ((flags & VE_TEST) && (flags & (VE_ENTER|VE_CREATE)))
+		goto out;
+
+	status = -EINVAL;
+	ve = get_ve_by_id(veid);
+	if (ve) {
+		if (flags & VE_TEST) {
+			status = 0;
+			goto out_put;
+		}
+		if (flags & VE_EXCLUSIVE) {
+			status = -EACCES;
+			goto out_put;
+		}
+		if (flags & VE_CREATE) {
+			flags &= ~VE_CREATE;
+			flags |= VE_ENTER;
+		}
+	} else {
+		if (flags & (VE_TEST|VE_ENTER)) {
+			status = -ESRCH;
+			goto out;
+		}
+	}
+
+	if (flags & VE_CREATE) {
+		status = do_env_create(veid, flags, class_id, data, datalen);
+		goto out;
+	} else if (flags & VE_ENTER)
+		status = do_env_enter(ve, flags);
+
+	/* else: returning EINVAL */
+
+out_put:
+	real_put_ve(ve);
+out:
+	return status;
+}
+EXPORT_SYMBOL(real_env_create);
+
+static int do_env_enter(struct ve_struct *ve, unsigned int flags)
+{
+	struct task_struct *tsk = current;
+	int err;
+
+	VZTRACE("%s: veid=%d\n", __FUNCTION__, ve->veid);
+
+	err = -EBUSY;
+	down_read(&ve->op_sem);
+	if (!ve->is_running)
+		goto out_up;
+	if (ve->is_locked && !(flags & VE_SKIPLOCK))
+		goto out_up;
+	err = -EINVAL;
+	if (!thread_group_leader(tsk) || !thread_group_empty(tsk))
+		goto out_up;
+
+#ifdef CONFIG_FAIRSCHED
+	err = sys_fairsched_mvpr(current->pid, ve->veid);
+	if (err)
+		goto out_up;
+#endif
+	ve_sched_attach(ve);
+	switch_ve_namespaces(ve, tsk);
+	ve_move_task(current, ve);
+
+	/* Unlike VE_CREATE, we do not setsid() in VE_ENTER.
+	 * Process is allowed to be in an external group/session.
+	 * If user space callers wants, it will do setsid() after
+	 * VE_ENTER.
+	 */
+	err = VE_TASK_INFO(tsk)->owner_env->veid;
+
+out_up:
+	up_read(&ve->op_sem);
+	return err;
+}
+
+static void env_cleanup(struct ve_struct *ve)
+{
+	struct ve_struct *old_ve;
+
+	VZTRACE("real_do_env_cleanup\n");
+
+	acct_exit_ve(ve->bacct);
+
+	down_read(&ve->op_sem);
+	old_ve = set_exec_env(ve);
+
+	ve_hook_iterate_fini(VE_SS_CHAIN, ve);
+
+	fini_venet(ve);
+
+	/* no new packets in flight beyond this point */
+	/* skb hold dst_entry, and in turn lies in the ip fragment queue */
+	ip_fragment_cleanup(ve);
+	if (ve->ipv6_ops)
+		ve->ipv6_ops->frag_cleanup(ve);
+
+	fini_ve_netdev();
+	fini_ve_route(ve);
+	if (ve->ipv6_ops)
+		ve->ipv6_ops->route_fini(ve);
+
+	ve_arp_fini(ve);
+	if (ve->ipv6_ops)
+		ve->ipv6_ops->ndisc_fini(ve);
+
+	fini_ve_namespaces(ve, NULL);
+
+	/* kill iptables */
+	/* No skb belonging to VE can exist at this point as unregister_netdev
+	   is an operation awaiting until ALL skb's gone */
+	fini_ve_iptables(ve, ve->_iptables_modules);
+	fini_ve_netfilter();
+
+	fini_ve_sched(ve);
+	clean_device_perms_ve(ve->veid);
+
+	fini_ve_devpts(ve);
+	fini_ve_shmem(ve);
+	fini_ve_sysfs(ve);
+	unregister_ve_tty_drivers(ve);
+	fini_ve_sysctl(ve);
+	fini_ve_proc(ve);
+	fini_ve_meminfo(ve);
+
+	fini_ve_mibs(ve);
+	fini_ve_pidmap(ve);
+
+	(void)set_exec_env(old_ve);
+	fini_printk(ve);	/* no printk can happen in ve context anymore */
+
+	ve_list_del(ve);
+	up_read(&ve->op_sem);
+
+	real_put_ve(ve);
+}
+
+static DECLARE_COMPLETION(vzmond_complete);
+static volatile int stop_vzmond;
+
+static int vzmond_helper(void *arg)
+{
+	char name[18];
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)arg;
+	snprintf(name, sizeof(name), "vzmond/%d", ve->veid);
+	daemonize(name);
+	env_cleanup(ve);
+	module_put_and_exit(0);
+}
+
+static void do_pending_env_cleanups(void)
+{
+	int err;
+	struct ve_struct *ve;
+
+	spin_lock(&ve_cleanup_lock);
+	while (1) {
+		if (list_empty(&ve_cleanup_list) || need_resched())
+			break;
+
+		ve = list_first_entry(&ve_cleanup_list,
+				struct ve_struct, cleanup_list);
+		list_del(&ve->cleanup_list);
+		spin_unlock(&ve_cleanup_lock);
+
+		__module_get(THIS_MODULE);
+		err = kernel_thread(vzmond_helper, (void *)ve, 0);
+		if (err < 0) {
+			env_cleanup(ve);
+			module_put(THIS_MODULE);
+		}
+
+		spin_lock(&ve_cleanup_lock);
+	}
+	spin_unlock(&ve_cleanup_lock);
+}
+
+static inline int have_pending_cleanups(void)
+{
+	return !list_empty(&ve_cleanup_list);
+}
+
+static int vzmond(void *arg)
+{
+	daemonize("vzmond");
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	while (!stop_vzmond || have_pending_cleanups()) {
+		schedule();
+		try_to_freeze();
+		if (signal_pending(current))
+			flush_signals(current);
+
+		do_pending_env_cleanups();
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (have_pending_cleanups())
+			__set_current_state(TASK_RUNNING);
+	}
+
+	__set_task_state(current, TASK_RUNNING);
+	complete_and_exit(&vzmond_complete, 0);
+}
+
+static int __init init_vzmond(void)
+{
+	int pid;
+	struct task_struct *tsk;
+
+	pid = kernel_thread(vzmond, NULL, 0);
+	if (pid > 0) {
+		tsk = find_task_by_pid_all(pid);
+		BUG_ON(tsk == NULL);
+		ve_cleanup_thread = tsk;
+	}
+	return pid;
+}
+
+static void fini_vzmond(void)
+{
+	stop_vzmond = 1;
+	wake_up_process(ve_cleanup_thread);
+	wait_for_completion(&vzmond_complete);
+	ve_cleanup_thread = NULL;
+	WARN_ON(!list_empty(&ve_cleanup_list));
+}
+
+void real_do_env_free(struct ve_struct *ve)
+{
+	VZTRACE("real_do_env_free\n");
+
+	free_ve_tty_drivers(ve);
+	free_ve_sysctl(ve); /* free per ve sysctl data */
+	free_ve_filesystems(ve);
+	free_ve_cpustats(ve);
+	printk(KERN_INFO "CT: %d: stopped\n", VEID(ve));
+	kfree(ve->uidhash_table);
+	kfree(ve);
+
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL(real_do_env_free);
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE TTY handling
+ *
+ **********************************************************************
+ **********************************************************************/
+
+static struct tty_driver *alloc_ve_tty_driver(struct tty_driver *base,
+					   struct ve_struct *ve)
+{
+	size_t size;
+	struct tty_driver *driver;
+
+	driver = ub_kmalloc(sizeof(struct tty_driver), GFP_KERNEL);
+	if (!driver)
+		goto out;
+
+	memcpy(driver, base, sizeof(struct tty_driver));
+
+	driver->driver_state = NULL;
+
+	size = base->num * 3 * sizeof(void *);
+	if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
+		void **p;
+		p = ub_kzalloc(size, GFP_KERNEL);
+		if (!p)
+			goto out_free;
+
+		driver->ttys = (struct tty_struct **)p;
+		driver->termios = (struct termios **)(p + driver->num);
+		driver->termios_locked = (struct termios **)
+			(p + driver->num * 2);
+	} else {
+		driver->ttys = NULL;
+		driver->termios = NULL;
+		driver->termios_locked = NULL;
+	}
+
+	driver->owner_env = ve;
+	driver->flags |= TTY_DRIVER_INSTALLED;
+	driver->refcount = 0;
+
+	return driver;
+
+out_free:
+	kfree(driver);
+out:
+	return NULL;
+}
+
+static void free_ve_tty_driver(struct tty_driver *driver)
+{
+	if (!driver)
+		return;
+
+	clear_termios(driver);
+	kfree(driver->ttys);
+	kfree(driver);
+}
+
+static int alloc_ve_tty_drivers(struct ve_struct* ve)
+{
+#ifdef CONFIG_LEGACY_PTYS
+	/* Traditional BSD devices */
+	ve->pty_driver = alloc_ve_tty_driver(pty_driver, ve);
+	if (!ve->pty_driver)
+		goto out_mem;
+
+	ve->pty_slave_driver = alloc_ve_tty_driver(pty_slave_driver, ve);
+	if (!ve->pty_slave_driver)
+		goto out_mem;
+
+	ve->pty_driver->other       = ve->pty_slave_driver;
+	ve->pty_slave_driver->other = ve->pty_driver;
+#endif	
+
+#ifdef CONFIG_UNIX98_PTYS
+	ve->ptm_driver = alloc_ve_tty_driver(ptm_driver, ve);
+	if (!ve->ptm_driver)
+		goto out_mem;
+
+	ve->pts_driver = alloc_ve_tty_driver(pts_driver, ve);
+	if (!ve->pts_driver)
+		goto out_mem;
+
+	ve->ptm_driver->other = ve->pts_driver;
+	ve->pts_driver->other = ve->ptm_driver;
+
+	ve->allocated_ptys = ub_kmalloc(sizeof(*ve->allocated_ptys),
+			GFP_KERNEL);
+	if (!ve->allocated_ptys)
+		goto out_mem;
+	idr_init(ve->allocated_ptys);
+#endif
+	return 0;
+
+out_mem:
+	free_ve_tty_drivers(ve);
+	return -ENOMEM;
+}
+
+static void free_ve_tty_drivers(struct ve_struct* ve)
+{
+#ifdef CONFIG_LEGACY_PTYS
+	free_ve_tty_driver(ve->pty_driver);
+	free_ve_tty_driver(ve->pty_slave_driver);
+	ve->pty_driver = ve->pty_slave_driver = NULL;
+#endif	
+#ifdef CONFIG_UNIX98_PTYS
+	free_ve_tty_driver(ve->ptm_driver);
+	free_ve_tty_driver(ve->pts_driver);
+	if (ve->allocated_ptys)
+		idr_destroy(ve->allocated_ptys);
+	kfree(ve->allocated_ptys);
+	ve->ptm_driver = ve->pts_driver = NULL;
+	ve->allocated_ptys = NULL;
+#endif
+}
+
+static inline void __register_tty_driver(struct tty_driver *driver)
+{
+	list_add(&driver->tty_drivers, &tty_drivers);
+}
+
+static inline void __unregister_tty_driver(struct tty_driver *driver)
+{
+	if (!driver)
+		return;
+	list_del(&driver->tty_drivers);
+}
+
+static int register_ve_tty_drivers(struct ve_struct* ve)
+{
+	write_lock_irq(&tty_driver_guard);
+#ifdef CONFIG_UNIX98_PTYS
+	__register_tty_driver(ve->ptm_driver);
+	__register_tty_driver(ve->pts_driver);
+#endif
+#ifdef CONFIG_LEGACY_PTYS
+	__register_tty_driver(ve->pty_driver);
+	__register_tty_driver(ve->pty_slave_driver);
+#endif	
+	write_unlock_irq(&tty_driver_guard);
+
+	return 0;
+}
+
+static void unregister_ve_tty_drivers(struct ve_struct* ve)
+{
+	VZTRACE("unregister_ve_tty_drivers\n");
+
+	write_lock_irq(&tty_driver_guard);
+#ifdef CONFIG_LEGACY_PTYS
+	__unregister_tty_driver(ve->pty_driver);
+	__unregister_tty_driver(ve->pty_slave_driver);
+#endif
+#ifdef CONFIG_UNIX98_PTYS
+	__unregister_tty_driver(ve->ptm_driver);
+	__unregister_tty_driver(ve->pts_driver);
+#endif
+	write_unlock_irq(&tty_driver_guard);
+}
+
+static int init_ve_tty_drivers(struct ve_struct *ve)
+{
+	int err;
+
+	if ((err = alloc_ve_tty_drivers(ve)))
+		goto err_ttyalloc;
+	if ((err = register_ve_tty_drivers(ve)))
+		goto err_ttyreg;
+	return 0;
+
+err_ttyreg:
+	free_ve_tty_drivers(ve);
+err_ttyalloc:
+	return err;
+}
+
+static void fini_ve_tty_drivers(struct ve_struct *ve)
+{
+	unregister_ve_tty_drivers(ve);
+	free_ve_tty_drivers(ve);
+}
+
+/*
+ * Free the termios and termios_locked structures because
+ * we don't want to get memory leaks when modular tty
+ * drivers are removed from the kernel.
+ */
+static void clear_termios(struct tty_driver *driver)
+{
+	int i;
+	struct termios *tp;
+
+	if (driver->termios == NULL)
+		return;
+	for (i = 0; i < driver->num; i++) {
+		tp = driver->termios[i];
+		if (tp) {
+			driver->termios[i] = NULL;
+			kfree(tp);
+		}
+		tp = driver->termios_locked[i];
+		if (tp) {
+			driver->termios_locked[i] = NULL;
+			kfree(tp);
+		}
+	}
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * Pieces of VE network
+ *
+ **********************************************************************
+ **********************************************************************/
+
+#ifdef CONFIG_NET
+#include <asm/uaccess.h>
+#include <net/sock.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+#endif
+
+#ifdef CONFIG_INET
+static void ve_del_ip_addrs(struct net_device *dev)
+{
+	struct in_device *in_dev;
+
+	in_dev = in_dev_get(dev);
+	if (in_dev == NULL)
+		return;
+
+	while (in_dev->ifa_list != NULL) {
+		inet_del_ifa(in_dev, &in_dev->ifa_list, 1);
+	}
+	in_dev_put(in_dev);
+}
+
+static void ve_del_ipv6_addrs(struct ve_struct *ve, struct net_device *dev)
+{
+	if (ve->ipv6_ops)
+		ve->ipv6_ops->ifdown(dev, 2);
+}
+
+static int ve_netdev_cleanup(struct ve_struct *ve, struct net_device *dev, int to_ve)
+{
+	int err;
+
+	err = 0;
+	ve_del_ip_addrs(dev);
+	ve_del_ipv6_addrs(ve, dev);
+	if ((dev->flags & IFF_UP) != 0)
+		err = dev_close(dev);
+	synchronize_net();
+	dev_shutdown(dev);
+	dev_mc_discard(dev);
+	free_divert_blk(dev);
+	synchronize_net();
+	return err;
+}
+
+static void __ve_dev_move(struct net_device *dev, struct ve_struct *ve_src,
+	struct ve_struct *ve_dst, struct user_beancounter *exec_ub)
+{
+	struct net_device **dp, *d;
+	struct user_beancounter *ub;
+	struct ve_struct *exec_ve;
+
+	for (d = ve_src->_net_dev_base, dp = NULL; d != NULL; 
+	     dp = &d->next, d = d->next) {
+		if (d == dev) {
+			hlist_del(&dev->name_hlist);
+			hlist_del(&dev->index_hlist);
+			if (ve_src->_net_dev_tail == &dev->next)
+				ve_src->_net_dev_tail = dp;
+			if (dp)
+				*dp = dev->next;
+			dev->next = NULL;
+			break;
+		}
+	}
+	*ve_dst->_net_dev_tail = dev;
+	ve_dst->_net_dev_tail = &dev->next;
+	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name, ve_dst));
+	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, ve_dst));
+	dev->owner_env = ve_dst;
+
+	ub = netdev_bc(dev)->exec_ub;
+	netdev_bc(dev)->exec_ub = get_beancounter(exec_ub);
+	put_beancounter(ub);
+
+	write_unlock_bh(&dev_base_lock);
+
+	exec_ve = set_exec_env(ve_src);
+	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+
+	netdev_unregister_sysfs(dev, 1);
+	(void)set_exec_env(exec_ve);
+
+	exec_ve = set_exec_env(ve_dst);
+	call_netdevice_notifiers(NETDEV_REGISTER, dev);
+	if (netdev_register_sysfs(dev, 1))
+		printk(KERN_ERR "Can't register %s dev in CT%d sysfs\n",
+				dev->name, ve_dst->veid);
+	(void)set_exec_env(exec_ve);
+
+	write_lock_bh(&dev_base_lock);
+}
+
+static int ve_dev_add(envid_t veid, char *dev_name)
+{
+	int err;
+	struct net_device *dev;
+	struct ve_struct *ve;
+	struct hlist_node *p;
+	struct hlist_head *head;
+
+	dev = NULL;
+	err = -ESRCH;
+
+	ve = get_ve_by_id(veid);
+	if (ve == NULL)
+		goto out;
+
+	rtnl_lock();
+
+	read_lock(&dev_base_lock);
+	hlist_for_each(p, dev_name_hash(dev_name, get_ve0())) {
+		struct net_device *d = hlist_entry(p, struct net_device, 
+						   name_hlist);
+		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
+			dev = d;
+			break;
+		}
+	}
+	read_unlock(&dev_base_lock);
+	if (dev == NULL)
+		goto out_unlock;
+
+	err = -EPERM;
+	if (!ve_is_dev_movable(dev))
+		goto out_unlock;
+
+	err = -EINVAL;
+	if (dev->flags & (IFF_SLAVE|IFF_MASTER))
+		goto out_unlock;
+
+	/* Check for existence of name */
+	head = dev_name_hash(dev->name, ve);
+	hlist_for_each(p, head) {
+		struct net_device *d
+			= hlist_entry(p, struct net_device, name_hlist);
+		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
+			err = -EEXIST;
+ 			goto out_unlock;
+		}
+ 	}
+
+	ve_netdev_cleanup(ve, dev, 1);
+
+	write_lock_bh(&dev_base_lock);
+	__ve_dev_move(dev, get_ve0(), ve, get_exec_ub());
+	write_unlock_bh(&dev_base_lock);
+
+	err = 0;
+
+out_unlock:
+	rtnl_unlock();
+	real_put_ve(ve);
+
+	if (dev == NULL)
+		printk(KERN_WARNING "Device %s not found\n", dev_name);
+
+out:
+	return err;
+}
+
+static int ve_dev_del(envid_t veid, char *dev_name)
+{
+	int err;
+	struct net_device *dev;
+	struct ve_struct *ve, *old_exec;
+	struct hlist_node *p;
+
+	dev = NULL;
+	err = -ESRCH;
+
+	ve = get_ve_by_id(veid);
+	if (ve == NULL)
+		goto out;
+
+	rtnl_lock();
+
+	read_lock(&dev_base_lock);
+	hlist_for_each(p, dev_name_hash(dev_name, ve)) {
+		struct net_device *d = hlist_entry(p, struct net_device, 
+						   name_hlist);
+		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
+			dev = d;
+			break;
+		}
+	}
+	read_unlock(&dev_base_lock);
+	if (dev == NULL)
+		goto out_unlock;
+
+	err = -EPERM;
+	if (!ve_is_dev_movable(dev))
+		goto out_unlock;
+
+	old_exec = set_exec_env(ve);
+	ve_netdev_cleanup(ve, dev, 0);
+	(void)set_exec_env(old_exec);
+
+	write_lock_bh(&dev_base_lock);
+	__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
+	write_unlock_bh(&dev_base_lock);
+
+	err = 0;
+
+out_unlock:
+	rtnl_unlock();
+	real_put_ve(ve);
+
+	if (dev == NULL)
+		printk(KERN_WARNING "Device %s not found\n", dev_name);
+
+out:
+	return err;
+}
+
+int real_ve_dev_map(envid_t veid, int op, char *dev_name)
+{
+	int err;
+	err = -EPERM;
+	if (!capable_setveid())
+		goto out;
+	switch (op)
+	{
+		case VE_NETDEV_ADD:
+			err = ve_dev_add(veid, dev_name);
+			break;
+		case VE_NETDEV_DEL:
+			err = ve_dev_del(veid, dev_name);
+			break;
+		default:
+			err = -EINVAL;
+			break;
+	}
+out:
+	return err;
+}
+
+static void ve_mapped_devs_cleanup(struct ve_struct *ve)
+{
+	struct net_device *dev;
+
+	rtnl_lock();
+	write_lock_bh(&dev_base_lock);
+restart:
+	for (dev = ve->_net_dev_base; dev != NULL; dev = dev->next)
+	{
+		if ((dev->features & NETIF_F_VENET) ||
+		    (dev == ve->_loopback_dev)) /* Skip loopback dev */
+			continue;
+		write_unlock_bh(&dev_base_lock);
+		ve_netdev_cleanup(ve, dev, 0);
+		write_lock_bh(&dev_base_lock);
+		__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
+		goto restart;
+	}
+	write_unlock_bh(&dev_base_lock);
+	rtnl_unlock();
+}
+#endif
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE information via /proc
+ *
+ **********************************************************************
+ **********************************************************************/
+#ifdef CONFIG_PROC_FS
+#if BITS_PER_LONG == 32
+#define VESTAT_LINE_WIDTH (6 * 11 + 6 * 21)
+#define VESTAT_LINE_FMT "%10u %10lu %10lu %10lu %10Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %10lu\n"
+#define VESTAT_HEAD_FMT "%10s %10s %10s %10s %10s %20s %20s %20s %20s %20s %20s %10s\n"
+#else
+#define VESTAT_LINE_WIDTH (12 * 21)
+#define VESTAT_LINE_FMT "%20u %20lu %20lu %20lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20lu\n"
+#define VESTAT_HEAD_FMT "%20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s\n"
+#endif
+
+static int vestat_seq_show(struct seq_file *m, void *v)
+{
+	struct list_head *entry;
+	struct ve_struct *ve;
+	struct ve_struct *curve;
+	int cpu;
+	unsigned long user_ve, nice_ve, system_ve;
+	unsigned long long uptime;
+	cycles_t uptime_cycles, idle_time, strv_time, used;
+
+	entry = (struct list_head *)v;
+	ve = list_entry(entry, struct ve_struct, ve_list);
+
+	curve = get_exec_env();
+	if (entry == ve_list_head.next ||
+	    (!ve_is_super(curve) && ve == curve)) {
+		/* print header */
+		seq_printf(m, "%-*s\n",
+			VESTAT_LINE_WIDTH - 1,
+			"Version: 2.2");
+		seq_printf(m, VESTAT_HEAD_FMT, "VEID",
+					"user", "nice", "system",
+					"uptime", "idle",
+					"strv", "uptime", "used",
+					"maxlat", "totlat", "numsched");
+	}
+
+	if (ve == get_ve0())
+		return 0;
+
+	user_ve = nice_ve = system_ve = 0;
+	idle_time = strv_time = used = 0;
+
+	for_each_online_cpu(cpu) {
+		struct ve_cpu_stats *st;
+
+		st = VE_CPU_STATS(ve, cpu);
+		user_ve += st->user;
+		nice_ve += st->nice;
+		system_ve += st->system;
+		used += st->used_time;
+	}
+	idle_time = ve_sched_get_idle_time_total(ve);
+	uptime_cycles = get_cycles() - ve->start_cycles;
+	uptime = get_jiffies_64() - ve->start_jiffies;
+
+	seq_printf(m, VESTAT_LINE_FMT, ve->veid,
+				user_ve, nice_ve, system_ve,
+				(unsigned long long)uptime,
+				(unsigned long long)idle_time, 
+				(unsigned long long)strv_time,
+				(unsigned long long)uptime_cycles,
+				(unsigned long long)used,
+				(unsigned long long)ve->sched_lat_ve.last.maxlat,
+				(unsigned long long)ve->sched_lat_ve.last.totlat,
+				ve->sched_lat_ve.last.count);
+	return 0;
+}
+
+static void *ve_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct ve_struct *curve;
+	struct list_head *entry;
+	loff_t l;
+
+	curve = get_exec_env();
+	read_lock(&ve_list_lock);
+	if (!ve_is_super(curve)) {
+		if (*pos != 0)
+			return NULL;
+		return curve;
+	}
+
+	l = *pos;
+	list_for_each(entry, &ve_list_head) {
+		if (l == 0)
+			return entry;
+		l--;
+	}
+	return NULL;
+}
+
+static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *entry;
+
+	entry = (struct list_head *)v;
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
+	(*pos)++;
+	return entry->next == &ve_list_head ? NULL : entry->next;
+}
+
+static void ve_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_list_lock);
+}
+
+static struct seq_operations vestat_seq_op = {
+        .start	= ve_seq_start,
+        .next	= ve_seq_next,
+        .stop	= ve_seq_stop,
+        .show	= vestat_seq_show
+};
+
+static int vestat_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &vestat_seq_op);
+}
+
+static struct file_operations proc_vestat_operations = {
+        .open	 = vestat_open,
+        .read	 = seq_read,
+        .llseek	 = seq_lseek,
+        .release = seq_release
+};
+
+static int vz_version_show(struct seq_file *file, void* v)
+{
+	static const char ver[] = VZVERSION "\n";
+
+	return seq_puts(file, ver);
+}
+
+static int vz_version_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vz_version_show, NULL);
+}
+
+static struct file_operations proc_vz_version_oparations = {
+	.open    = vz_version_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+
+static inline unsigned long ve_used_mem(struct user_beancounter *ub)
+{
+	extern int glob_ve_meminfo;
+	return glob_ve_meminfo ? ub->ub_parms[UB_OOMGUARPAGES].held :
+				 ub->ub_parms[UB_PRIVVMPAGES].held ;
+}
+
+static void ve_swapinfo(struct sysinfo *val, struct user_beancounter *ub)
+{
+	unsigned long size, used;
+
+	size = ub->ub_parms[UB_SWAPPAGES].limit;
+	used = ub->ub_parms[UB_SWAPPAGES].held;
+
+	if (size == UB_MAXVALUE)
+		size = 0;
+
+	val->totalswap = size;
+	val->freeswap = size > used ? size - used : 0;
+}
+
+static inline int ve_mi_replace(struct meminfo *mi)
+{
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *ub;
+	unsigned long meminfo_val;
+	unsigned long nodettram;
+	unsigned long usedmem;
+
+	meminfo_val = get_exec_env()->meminfo_val;
+
+	if (meminfo_val == VE_MEMINFO_SYSTEM)
+		return NOTIFY_OK; /* No virtualization */
+
+	if (meminfo_val == VE_MEMINFO_DEFAULT)
+		return NOTIFY_DONE; /* Default behaviour */
+
+	nodettram = mi->si.totalram;
+	if (mi->ub != NULL)
+		ub = top_beancounter(mi->ub);
+	else
+		ub = top_beancounter(current->mm->mm_ub);
+
+	usedmem = ve_used_mem(ub);
+
+	memset(mi, 0, sizeof(*mi));
+
+	mi->si.totalram = (meminfo_val > nodettram) ?
+			nodettram : meminfo_val;
+	mi->si.freeram = (mi->si.totalram > usedmem) ?
+			(mi->si.totalram - usedmem) : 0;
+
+	mi->pi.nr_file_dirty = ub_dirty_pages(ub);
+
+	ve_swapinfo(&mi->si, ub);
+
+	return NOTIFY_OK; /* No more virtualization */
+#else
+	return NOTIFY_DONE;
+#endif
+}
+
+static int meminfo_call(struct vnotifier_block *self,
+                unsigned long event, void *arg, int old_ret)
+{
+	if (event != VIRTINFO_MEMINFO)
+		return old_ret;
+
+	return ve_mi_replace((struct meminfo *)arg);
+}
+
+
+static struct vnotifier_block meminfo_notifier_block = {
+	.notifier_call = meminfo_call,
+	.priority = INT_MAX,
+};
+
+/* /proc/vz/veinfo */
+
+static ve_seq_print_t veaddr_seq_print_cb;
+
+void vzmon_register_veaddr_print_cb(ve_seq_print_t cb)
+{
+	rcu_assign_pointer(veaddr_seq_print_cb, cb);
+}
+EXPORT_SYMBOL(vzmon_register_veaddr_print_cb);
+
+void vzmon_unregister_veaddr_print_cb(ve_seq_print_t cb)
+{
+	rcu_assign_pointer(veaddr_seq_print_cb, NULL);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL(vzmon_unregister_veaddr_print_cb);
+
+static int veinfo_seq_show(struct seq_file *m, void *v)
+{
+	struct ve_struct *ve;
+	ve_seq_print_t veaddr_seq_print;
+
+	ve = list_entry((struct list_head *)v, struct ve_struct, ve_list);
+
+	seq_printf(m, "%10u %5u %5u", ve->veid,
+			ve->class_id, atomic_read(&ve->pcounter));
+
+	veaddr_seq_print = m->private;
+	if (veaddr_seq_print)
+		veaddr_seq_print(m, ve);
+
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static void *veinfo_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct ve_struct *curve;
+	struct list_head *entry;
+	loff_t l;
+
+	rcu_read_lock();
+	m->private = rcu_dereference(veaddr_seq_print_cb);
+	curve = get_exec_env();
+	read_lock(&ve_list_lock);
+	if (!ve_is_super(curve)) {
+		if (*pos != 0)
+			return NULL;
+		return curve;
+	}
+
+	l = *pos;
+	list_for_each(entry, &ve_list_head) {
+		if (l == 0)
+			return entry;
+		l--;
+	}
+	return NULL;
+}
+
+static void *veinfo_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *entry;
+
+	entry = (struct list_head *)v;
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
+	(*pos)++;
+	return entry->next == &ve_list_head ? NULL : entry->next;
+}
+
+static void veinfo_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_list_lock);
+	rcu_read_unlock();
+}
+
+
+static struct seq_operations veinfo_seq_op = {
+	.start	= veinfo_seq_start,
+	.next	=  veinfo_seq_next,
+	.stop	=  veinfo_seq_stop,
+	.show	=  veinfo_seq_show,
+};
+
+static int veinfo_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &veinfo_seq_op);
+}
+
+static struct file_operations proc_veinfo_operations = {
+	.open		= veinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init init_vecalls_proc(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_glob_entry_mod("vz/vestat",
+			S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	if (de == NULL) {
+		/* create "vz" subdirectory, if not exist */
+		(void) create_proc_glob_entry("vz",
+					      S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+		de = create_proc_glob_entry_mod("vz/vestat",
+				S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	}
+	if (de)
+		de->proc_fops = &proc_vestat_operations;
+	else
+		printk(KERN_WARNING 
+				"VZMON: can't make vestat proc entry\n");
+
+	de = create_proc_entry_mod("vz/devperms", S_IFREG | S_IRUSR, NULL,
+				THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_devperms_ops;
+	else
+		printk(KERN_WARNING
+				"VZMON: can't make devperms proc entry\n");
+
+	de = create_proc_glob_entry_mod("vz/veinfo", S_IFREG | S_IRUSR, NULL,
+				THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_veinfo_operations;
+	else
+		printk(KERN_WARNING "VZMON: can't make veinfo proc entry\n");
+
+	
+	de = create_proc_entry_mod("vz/version", S_IFREG | 0444, NULL,
+				THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_vz_version_oparations;
+	else
+		printk(KERN_WARNING
+				"VZMON: can't make version proc entry\n");
+
+	virtinfo_notifier_register(VITYPE_GENERAL, &meminfo_notifier_block);
+
+	return 0;
+}
+
+static void fini_vecalls_proc(void)
+{
+	remove_proc_entry("vz/version", NULL);
+	remove_proc_entry("vz/devperms", NULL);
+	remove_proc_entry("vz/vestat", NULL);
+	remove_proc_entry("vz/veinfo", NULL);
+	virtinfo_notifier_unregister(VITYPE_GENERAL, &meminfo_notifier_block);
+}
+#else
+#define init_vecalls_proc()	(0)
+#define fini_vecalls_proc()	do { } while (0)
+#endif /* CONFIG_PROC_FS */
+
+static int init_ve_osrelease(struct ve_struct *ve, char *release)
+{
+	if (!release)
+		return -ENODATA;
+
+	if (strlen(release) >= sizeof(ve->ve_ns->uts_ns->name.release))
+		return -EMSGSIZE;
+
+	down_write(&uts_sem);
+	strcpy(ve->ve_ns->uts_ns->name.release, release);
+	up_write(&uts_sem);
+
+	return 0;
+}
+
+static int ve_configure(envid_t veid, unsigned int key,
+			unsigned int val, unsigned int size, char *data)
+{
+	struct ve_struct *ve;
+	int err = -ENOKEY;
+
+	ve = get_ve_by_id(veid);
+	if (!ve)
+		return -EINVAL;
+
+	switch(key) {
+	case VE_CONFIGURE_OS_RELEASE:
+		err = init_ve_osrelease(ve, data); 
+		break;
+ 	}
+
+	real_put_ve(ve);
+ 	return err;
+}
+
+static int ve_configure_ioctl(struct vzctl_ve_configure *arg)
+{
+	int err;
+	struct vzctl_ve_configure s;
+	char *data = NULL;
+
+	err = -EFAULT;
+	if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+		goto out;
+	if (s.size) {
+		if (s.size > PAGE_SIZE)
+			return -EMSGSIZE;
+
+		data = kzalloc(s.size + 1, GFP_KERNEL);
+		if (unlikely(!data))
+			return -ENOMEM;
+
+		if (copy_from_user(data, (void __user *) &arg->data, s.size))
+			goto out;
+	}
+	err = ve_configure(s.veid, s.key, s.val, s.size, data);
+out:
+	kfree(data);
+	return err;
+}
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * User ctl
+ *
+ **********************************************************************
+ **********************************************************************/
+
+int vzcalls_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	    case VZCTL_MARK_ENV_TO_DOWN: {
+		        /* Compatibility issue */
+		        err = 0;
+		}
+		break;
+	    case VZCTL_SETDEVPERMS: {
+			/* Device type was mistakenly declared as dev_t
+			 * in the old user-kernel interface.
+			 * That's wrong, dev_t is a kernel internal type.
+			 * I use `unsigned' not having anything better in mind.
+			 * 2001/08/11  SAW  */
+			struct vzctl_setdevperms s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err = real_setdevperms(s.veid, s.type,
+					new_decode_dev(s.dev), s.mask);
+		}
+		break;
+#ifdef CONFIG_INET
+	    case VZCTL_VE_NETDEV: {
+			struct vzctl_ve_netdev d;
+			char *s;
+			err = -EFAULT;
+			if (copy_from_user(&d, (void __user *)arg, sizeof(d)))
+				break;
+			err = -ENOMEM;
+			s = kmalloc(IFNAMSIZ+1, GFP_KERNEL);
+			if (s == NULL)
+				break;
+			err = -EFAULT;
+			if (strncpy_from_user(s, d.dev_name, IFNAMSIZ) > 0) {
+				s[IFNAMSIZ] = 0;
+				err = real_ve_dev_map(d.veid, d.op, s);
+			}
+			kfree(s);
+		}
+		break;
+#endif
+	    case VZCTL_ENV_CREATE: {
+			struct vzctl_env_create s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err = real_env_create(s.veid, s.flags, s.class_id,
+				NULL, 0);
+		}
+		break;
+	    case VZCTL_ENV_CREATE_DATA: {
+			struct vzctl_env_create_data s;
+			env_create_param_t *data;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err=-EINVAL;
+			if (s.datalen < VZCTL_ENV_CREATE_DATA_MINLEN ||
+			    s.datalen > VZCTL_ENV_CREATE_DATA_MAXLEN ||
+			    s.data == 0)
+				break;
+			err = -ENOMEM;
+			data = kzalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				break;
+
+			err = -EFAULT;
+			if (copy_from_user(data, (void __user *)s.data,
+						s.datalen))
+				goto free_data;
+			err = real_env_create(s.veid, s.flags, s.class_id,
+				data, s.datalen);
+free_data:
+			kfree(data);
+		}
+		break;
+	    case VZCTL_GET_CPU_STAT: {
+			struct vzctl_cpustatctl s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err = ve_get_cpu_stat(s.veid, s.cpustat);
+		}
+		break;
+	    case VZCTL_VE_MEMINFO: {
+			struct vzctl_ve_meminfo s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err = ve_set_meminfo(s.veid, s.val);
+		}
+		break;
+	    case VZCTL_VE_CONFIGURE:
+		err = ve_configure_ioctl((struct vzctl_ve_configure *)arg);
+		break;
+	}
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+int compat_vzcalls_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	int err;
+
+	switch(cmd) {
+	case VZCTL_GET_CPU_STAT: {
+		/* FIXME */
+	}
+	case VZCTL_COMPAT_ENV_CREATE_DATA: {
+		struct compat_vzctl_env_create_data cs;
+		struct vzctl_env_create_data __user *s;
+
+		s = compat_alloc_user_space(sizeof(*s));
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+
+		if (put_user(cs.veid, &s->veid) ||
+		    put_user(cs.flags, &s->flags) ||
+		    put_user(cs.class_id, &s->class_id) ||
+		    put_user(compat_ptr(cs.data), &s->data) ||
+		    put_user(cs.datalen, &s->datalen))
+			break;
+		err = vzcalls_ioctl(file, VZCTL_ENV_CREATE_DATA,
+						(unsigned long)s);
+		break;
+	}
+#ifdef CONFIG_NET
+	case VZCTL_COMPAT_VE_NETDEV: {
+		struct compat_vzctl_ve_netdev cs;
+		struct vzctl_ve_netdev __user *s;
+
+		s = compat_alloc_user_space(sizeof(*s));
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+
+		if (put_user(cs.veid, &s->veid) ||
+		    put_user(cs.op, &s->op) ||
+		    put_user(compat_ptr(cs.dev_name), &s->dev_name))
+			break;
+		err = vzcalls_ioctl(file, VZCTL_VE_NETDEV, (unsigned long)s);
+		break;
+	}
+#endif
+	case VZCTL_COMPAT_VE_MEMINFO: {
+		struct compat_vzctl_ve_meminfo cs;
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+		err = ve_set_meminfo(cs.veid, cs.val);
+		break;
+	}
+	default:
+		err = vzcalls_ioctl(file, cmd, arg);
+		break;
+	}
+	return err;
+}
+#endif
+
+static struct vzioctlinfo vzcalls = {
+	.type		= VZCTLTYPE,
+	.ioctl		= vzcalls_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= compat_vzcalls_ioctl,
+#endif
+	.owner		= THIS_MODULE,
+};
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * Init/exit stuff
+ *
+ **********************************************************************
+ **********************************************************************/
+
+static int __init init_vecalls_symbols(void)
+{
+	KSYMRESOLVE(real_do_env_free);
+	KSYMMODRESOLVE(vzmon);
+	return 0;
+}
+
+static void fini_vecalls_symbols(void)
+{
+	KSYMMODUNRESOLVE(vzmon);
+	KSYMUNRESOLVE(real_do_env_free);
+}
+
+static inline __init int init_vecalls_ioctls(void)
+{
+	vzioctl_register(&vzcalls);
+	return 0;
+}
+
+static inline void fini_vecalls_ioctls(void)
+{
+	vzioctl_unregister(&vzcalls);
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *table_header;
+
+static ctl_table kernel_table[] = {
+	{
+		.ctl_name	= KERN_VE_ALLOW_KTHREADS,
+		.procname	= "ve_allow_kthreads",
+		.data		= &ve_allow_kthreads,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ 0 }
+};
+
+static ctl_table root_table[] =  {
+	{CTL_KERN, "kernel",  NULL, 0, 0555, kernel_table},
+	{ 0 }
+};
+
+static int init_vecalls_sysctl(void)
+{
+	table_header = register_sysctl_table(root_table, 0);
+	if (!table_header)
+		return -ENOMEM ;
+	return 0;
+}
+
+static void fini_vecalls_sysctl(void)
+{
+	unregister_sysctl_table(table_header);
+} 
+#else
+static int init_vecalls_sysctl(void) { return 0; }
+static void fini_vecalls_sysctl(void) { ; }
+#endif
+
+static int devtmpfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	/* This fs is only required to be visible in there */
+	return -EOPNOTSUPP;
+}
+
+static struct file_system_type devtmpfs_fs_type = {
+	.owner = THIS_MODULE,
+	.name = "devtmpfs",
+	.get_sb = devtmpfs_get_sb,
+	.fs_flags = FS_VIRTUALIZED,
+};
+
+static int __init vecalls_init(void)
+{
+	int err;
+
+	err = init_vecalls_sysctl();
+	if (err)
+		goto out_vzmond;
+
+	err = init_vzmond();
+	if (err < 0)
+		goto out_sysctl;
+
+	err = init_vecalls_symbols();
+	if (err < 0)
+		goto out_sym;
+
+	err = init_vecalls_proc();
+	if (err < 0)
+		goto out_proc;
+
+	err = init_vecalls_ioctls();
+	if (err < 0)
+		goto out_ioctls;
+
+	err = register_filesystem(&devtmpfs_fs_type);
+	if (err < 0)
+		goto out_devtmpfs;
+
+	return 0;
+
+out_devtmpfs:
+	fini_vecalls_ioctls();
+out_ioctls:
+	fini_vecalls_proc();
+out_proc:
+	fini_vecalls_symbols();
+out_sym:
+	fini_vzmond();
+out_sysctl:
+	fini_vecalls_sysctl();
+out_vzmond:
+	return err;
+}
+
+static void vecalls_exit(void)
+{
+	unregister_filesystem(&devtmpfs_fs_type);
+	fini_vecalls_ioctls();
+	fini_vecalls_proc();
+	fini_vecalls_symbols();
+	fini_vzmond();
+	fini_vecalls_sysctl();
+}
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Control");
+MODULE_LICENSE("GPL v2");
+
+module_init(vecalls_init)
+module_exit(vecalls_exit)
diff -upr kernel-2.6.18-417.el5.orig/kernel/ve/veowner.c kernel-2.6.18-417.el5-028stab121/kernel/ve/veowner.c
--- kernel-2.6.18-417.el5.orig/kernel/ve/veowner.c	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ve/veowner.c	2017-01-13 08:40:40.000000000 -0500
@@ -0,0 +1,302 @@
+/*
+ *  kernel/ve/veowner.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/ipc.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/inetdevice.h>
+#include <linux/xattr.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#include <net/tcp.h>
+
+void prepare_ve0_process(struct task_struct *tsk)
+{
+	set_virt_pid(tsk, tsk->pid);
+	set_virt_tgid(tsk, tsk->tgid);
+	if (tsk->signal) {
+		set_virt_pgid(tsk, tsk->signal->pgrp);
+		set_virt_sid(tsk, tsk->signal->session);
+	}
+	VE_TASK_INFO(tsk)->exec_env = get_ve0();
+	VE_TASK_INFO(tsk)->owner_env = get_ve0();
+	VE_TASK_INFO(tsk)->sleep_time = 0;
+	VE_TASK_INFO(tsk)->wakeup_stamp = 0;
+	VE_TASK_INFO(tsk)->sched_time = 0;
+	seqcount_init(&VE_TASK_INFO(tsk)->wakeup_lock);
+
+	if (tsk->pid) {
+		list_add_rcu(&tsk->ve_task_info.vetask_list,
+				&get_ve0()->vetask_lh);
+		list_add(&tsk->ve_task_info.aux_list,
+			 &get_ve0()->vetask_auxlist);
+		atomic_inc(&get_ve0()->pcounter);
+	}
+}
+
+#ifdef CONFIG_NET
+void prepare_ve0_loopback(void)
+{
+	get_ve0()->_loopback_dev = &loopback_dev;
+}
+#endif
+
+/*
+ * ------------------------------------------------------------------------
+ * proc entries
+ * ------------------------------------------------------------------------
+ */
+
+#ifdef CONFIG_PROC_FS
+static void proc_move(struct proc_dir_entry *ddir,
+		struct proc_dir_entry *sdir,
+		const char *name)
+{
+	struct proc_dir_entry **p, *q;
+	int len;
+
+	len = strlen(name);
+	for (p = &sdir->subdir, q = *p; q != NULL; p = &q->next, q = *p)
+		if (proc_match(len, name, q))
+			break;
+	if (q == NULL)
+		return;
+	*p = q->next;
+	q->parent = ddir;
+	q->next = ddir->subdir;
+	ddir->subdir = q;
+	if (S_ISDIR(q->mode)) {
+		sdir->nlink--;
+		ddir->nlink++;
+	}
+}
+static void prepare_proc_misc(void)
+{
+	static char *table[] = {
+		"loadavg",
+		"uptime",
+		"meminfo",
+		"version",
+		"stat",
+		"filesystems",
+		"locks",
+		"swaps",
+		"mounts",
+		"net",
+		"cpuinfo",
+		"sysvipc",
+		"sys",
+		"fs",
+		"vz",
+		"cmdline",
+		"vmstat",
+		"modules",
+		"devices",
+		"sysrq-trigger",
+		NULL,
+	};
+	char **p;
+
+	for (p = table; *p != NULL; p++)
+		proc_move(&proc_root, ve0.proc_root, *p);
+}
+int prepare_proc(void)
+{
+	struct ve_struct *envid;
+	struct proc_dir_entry *de;
+	struct proc_dir_entry *ve_root;
+
+	envid = set_exec_env(&ve0);
+	ve_root = ve0.proc_root->subdir;
+	/* move the whole tree to be visible in VE0 only */
+	ve0.proc_root->subdir = proc_root.subdir;
+	ve0.proc_root->nlink += proc_root.nlink - 2;
+	for (de = ve0.proc_root->subdir; de->next != NULL; de = de->next)
+		de->parent = ve0.proc_root;
+	de->parent = ve0.proc_root;
+	de->next = ve_root;
+
+	/* move back into the global scope some specific entries */
+	proc_root.subdir = NULL;
+	proc_root.nlink = 2;
+	prepare_proc_misc();
+	proc_net = proc_mkdir("net", ve0.proc_root);
+	proc_net_stat = proc_mkdir("stat", proc_net);
+	proc_mkdir("vz", NULL);
+#ifdef CONFIG_SYSVIPC
+	proc_mkdir("sysvipc", NULL);
+#endif
+	proc_root_fs = proc_mkdir("fs", NULL);
+	/* XXX proc_tty_init(); */
+
+	/* XXX process inodes */
+
+	(void)set_exec_env(envid);
+
+	(void)create_proc_glob_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+	return 0;
+}
+
+static struct proc_dir_entry ve0_proc_root = {
+	.name = "/proc",
+	.namelen = 5,
+	.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+	.nlink = 2
+};
+
+void prepare_ve0_proc_root(void)
+{
+	ve0.proc_root = &ve0_proc_root;
+}
+#endif
+
+/*
+ * ------------------------------------------------------------------------
+ * Virtualized sysctl
+ * ------------------------------------------------------------------------
+ */
+int ve_xattr_policy = VE_XATTR_POLICY_ACCEPT;
+extern int ve_area_access_check;
+
+int sysctl_fsync_enable = 2;
+
+#ifdef CONFIG_INET
+static ctl_table vz_ipv4_route_table[] = {
+	{
+		.ctl_name	= NET_IPV4_ROUTE_SRC_CHECK,
+		.procname	= "src_check",
+		.data		= &ip_rt_src_check,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ 0 }
+};
+static ctl_table vz_ipv4_table[] = {
+	{NET_IPV4_ROUTE, "route", NULL, 0, 0555, vz_ipv4_route_table},
+	{ 0 }
+};
+static ctl_table vz_net_table[] = {
+	{NET_IPV4,   "ipv4",      NULL, 0, 0555, vz_ipv4_table},
+	{ 0 }
+};
+#endif
+static ctl_table vz_fs_table[] = {
+	{
+		.ctl_name	= 226,
+		.procname	= "ve-area-access-check",
+		.data		= &ve_area_access_check,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= 228,
+		.procname	= "ve-xattr-policy",
+		.data		= &ve_xattr_policy,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= 227,
+		.procname	= "fsync-enable",
+		.data		= &sysctl_fsync_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ 0 }
+};
+
+static int dummy_pde_data = 0;
+static ctl_table dummy_kern_table[] = {
+	{
+		.ctl_name	= 23571113,
+		.procname	= ".dummy-pde",
+		.data		= &dummy_pde_data,
+		.maxlen		= sizeof(int),
+		.mode		= 0400,
+		.proc_handler	= proc_dointvec,
+	},
+	{}
+};
+static ctl_table root_table2[] = {
+#ifdef CONFIG_INET
+	{CTL_NET, "net", NULL, 0, 0555, vz_net_table},
+#endif
+	{CTL_FS, "fs", NULL, 0, 0555, vz_fs_table},
+	{CTL_KERN, "kernel", NULL, 0, 0555, dummy_kern_table},
+	{ 0 }
+};
+int prepare_sysctl(void)
+{
+	struct ve_struct *envid;
+
+	envid = set_exec_env(&ve0);
+	register_sysctl_table(root_table2, 0);
+	(void)set_exec_env(envid);
+	return 0;
+}
+
+void prepare_ve0_sysctl(void)
+{
+	INIT_LIST_HEAD(&ve0.sysctl_lh);
+#ifdef CONFIG_SYSCTL
+	ve0.proc_sys_root = proc_mkdir("sys", NULL);
+#endif
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * XXX init_ve_system
+ * ------------------------------------------------------------------------
+ */
+
+void init_ve_system(void)
+{
+	struct task_struct *init_entry;
+	struct ve_struct *ve;
+
+	ve = get_ve0();
+
+	init_entry = child_reaper;
+	ve->init_entry = init_entry;
+	/* if ve_move_task to VE0 (e.g. in cpt code)	*
+	 * occurs, ve_cap_bset on VE0 is required	*/
+	ve->ve_cap_bset = CAP_INIT_EFF_SET;
+
+#ifdef CONFIG_INET
+	ve->_ipv4_devconf = &ipv4_devconf;
+	ve->_ipv4_devconf_dflt = &ipv4_devconf_dflt;
+#endif
+
+	read_lock(&init_entry->fs->lock);
+	ve->fs_rootmnt = init_entry->fs->rootmnt;
+	ve->fs_root = init_entry->fs->root;
+	read_unlock(&init_entry->fs->lock);
+
+	/* common prepares */
+#ifdef CONFIG_PROC_FS
+	prepare_proc();
+#endif
+	prepare_sysctl();
+}
diff -upr kernel-2.6.18-417.el5.orig/kernel/ve/vzdev.c kernel-2.6.18-417.el5-028stab121/kernel/ve/vzdev.c
--- kernel-2.6.18-417.el5.orig/kernel/ve/vzdev.c	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ve/vzdev.c	2017-01-13 08:40:20.000000000 -0500
@@ -0,0 +1,154 @@
+/*
+ *  kernel/ve/vzdev.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/vzctl.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/vzcalluser.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <linux/device.h>
+#include <linux/smp_lock.h>
+
+#define VZCTL_MAJOR 126
+#define VZCTL_NAME "vzctl"
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Interface");
+MODULE_LICENSE("GPL v2");
+
+static LIST_HEAD(ioctls);
+static spinlock_t ioctl_lock = SPIN_LOCK_UNLOCKED;
+
+static struct vzioctlinfo *vzctl_get_handler(unsigned int cmd)
+{
+	struct vzioctlinfo *h;
+
+	spin_lock(&ioctl_lock);
+	list_for_each_entry(h, &ioctls, list) {
+		if (h->type == _IOC_TYPE(cmd))
+			goto found;
+	}
+	h = NULL;
+found:
+	if (h && !try_module_get(h->owner))
+		h = NULL;
+	spin_unlock(&ioctl_lock);
+	return h;
+}
+
+static void vzctl_put_handler(struct vzioctlinfo *h)
+{
+	if (!h)
+		return;
+
+	module_put(h->owner);
+}
+
+long vzctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct vzioctlinfo *h;
+	int err;
+
+	err = -ENOTTY;
+	h = vzctl_get_handler(cmd);
+	if (h && h->ioctl)
+		err = (*h->ioctl)(file, cmd, arg);
+	vzctl_put_handler(h);
+
+	return err;
+}
+
+long compat_vzctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct vzioctlinfo *h;
+	int err;
+
+	err = -ENOIOCTLCMD;
+	h = vzctl_get_handler(cmd);
+	if (h && h->compat_ioctl)
+		err = (*h->compat_ioctl)(file, cmd, arg);
+	vzctl_put_handler(h);
+
+	return err;
+}
+
+void vzioctl_register(struct vzioctlinfo *inf)
+{
+	spin_lock(&ioctl_lock);
+	list_add(&inf->list, &ioctls);
+	spin_unlock(&ioctl_lock);
+}
+EXPORT_SYMBOL(vzioctl_register);
+
+void vzioctl_unregister(struct vzioctlinfo *inf)
+{
+	spin_lock(&ioctl_lock);
+	list_del_init(&inf->list);
+	spin_unlock(&ioctl_lock);
+}
+EXPORT_SYMBOL(vzioctl_unregister);
+
+/*
+ * Init/exit stuff.
+ */
+static struct file_operations vzctl_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= vzctl_ioctl,
+	.compat_ioctl	= compat_vzctl_ioctl,
+};
+
+static struct class *vzctl_class;
+
+static void __exit vzctl_exit(void)
+{
+	class_device_destroy(vzctl_class, MKDEV(VZCTL_MAJOR, 0));
+	class_destroy(vzctl_class);
+	unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
+}
+
+static int __init vzctl_init(void)
+{
+	int ret;
+	struct class_device *class_err;
+
+	ret = register_chrdev(VZCTL_MAJOR, VZCTL_NAME, &vzctl_fops);
+	if (ret < 0)
+		goto out;
+
+	vzctl_class = class_create(THIS_MODULE, "vzctl");
+	if (IS_ERR(vzctl_class)) {
+		ret = PTR_ERR(vzctl_class);
+		goto out_cleandev;
+	}
+
+	class_err = class_device_create(vzctl_class, NULL, MKDEV(VZCTL_MAJOR, 0),
+				NULL, VZCTL_NAME);
+	if (IS_ERR(class_err)) {
+		ret = PTR_ERR(class_err);
+		goto out_rmclass;
+	}
+
+	goto out;
+
+out_rmclass:
+	class_destroy(vzctl_class);
+out_cleandev:
+	unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
+out:
+	return ret;
+}
+
+module_init(vzctl_init)
+module_exit(vzctl_exit);
diff -upr kernel-2.6.18-417.el5.orig/kernel/ve/vzevent.c kernel-2.6.18-417.el5-028stab121/kernel/ve/vzevent.c
--- kernel-2.6.18-417.el5.orig/kernel/ve/vzevent.c	2017-01-13 08:40:20.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ve/vzevent.c	2017-01-13 08:40:22.000000000 -0500
@@ -0,0 +1,140 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/netlink.h>
+#include <linux/errno.h>
+#include <linux/ve_proto.h>
+#include <linux/vzevent.h>
+
+#define NETLINK_UEVENT	31
+#define VZ_EVGRP_ALL	0x01
+
+static int reboot_event;
+module_param(reboot_event, int, 0644);
+MODULE_PARM_DESC(reboot_event, "Enable reboot events");
+
+/*
+ * NOTE: the original idea was to send events via kobject_uevent(),
+ * however, it turns out that it has negative consequences like
+ * start of /sbin/hotplug which tries to react on our events in inadequate manner.
+ */
+
+static struct sock *vzev_sock;
+
+static char *action_to_string(int action)
+{
+	switch (action) {
+	case KOBJ_MOUNT:
+		return "ve-mount";
+	case KOBJ_UMOUNT:
+		return "ve-umount";
+	case KOBJ_START:
+		return "ve-start";
+	case KOBJ_STOP:
+		return "ve-stop";
+	case KOBJ_REBOOT:
+		return "ve-reboot";
+	default:
+		return NULL;
+	}
+}
+
+static int do_vzevent_send(int event, char *msg, int len)
+{
+	struct sk_buff *skb;
+	char *buf, *action;
+	int alen;
+
+	action = action_to_string(event);
+	if (!action)
+		return -EINVAL;
+
+	alen = strlen(action);
+
+	skb = alloc_skb(len + 1 + alen, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	buf = skb_put(skb, len + 1 + alen);
+	memcpy(buf, action, alen);
+	buf[alen] = '@';
+	memcpy(buf + alen + 1, msg, len);
+	(void)netlink_broadcast(vzev_sock, skb, 0, VZ_EVGRP_ALL, GFP_KERNEL);
+	return 0;
+}
+
+int vzevent_send(int event, const char *attrs_fmt, ...)
+{
+	va_list args;
+	int len, err;
+	struct ve_struct *ve;
+	char *page;
+
+	err = -ENOMEM;
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (!page)
+		goto out;
+
+	va_start(args, attrs_fmt);
+	len = vscnprintf(page, PAGE_SIZE, attrs_fmt, args);
+	va_end(args);
+
+	ve = set_exec_env(get_ve0());
+	err = do_vzevent_send(event, page, len);
+	(void)set_exec_env(ve);
+	free_page((unsigned long)page);
+out:
+	return err;
+}
+EXPORT_SYMBOL(vzevent_send);
+
+static int ve_start(void *data)
+{
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)data;
+	vzevent_send(KOBJ_START, "%d", ve->veid);
+	return 0;
+}
+
+static void ve_stop(void *data)
+{
+	struct ve_struct *ve;
+	int event = KOBJ_STOP;
+
+	if (test_and_clear_bit(VE_REBOOT, &get_exec_env()->flags) &&
+		reboot_event)
+		event = KOBJ_REBOOT;
+
+	ve = (struct ve_struct *)data;
+	vzevent_send(event, "%d", ve->veid);
+}
+
+static struct ve_hook ve_start_stop_hook = {
+	.init		= ve_start,
+	.fini		= ve_stop,
+	.owner		= THIS_MODULE,
+	.priority	= HOOK_PRIO_AFTERALL,
+};
+
+static int __init init_vzevent(void)
+{
+	vzev_sock = netlink_kernel_create(NETLINK_UEVENT, 0, NULL, THIS_MODULE);
+	if (vzev_sock == NULL)
+		return -ENOMEM;
+	ve_hook_register(VE_SS_CHAIN, &ve_start_stop_hook);
+	return 0;
+}
+
+static void __exit exit_vzevent(void)
+{
+	ve_hook_unregister(&ve_start_stop_hook);
+	sock_release(vzev_sock->sk_socket);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_vzevent);
+module_exit(exit_vzevent);
diff -upr kernel-2.6.18-417.el5.orig/kernel/ve/vzwdog.c kernel-2.6.18-417.el5-028stab121/kernel/ve/vzwdog.c
--- kernel-2.6.18-417.el5.orig/kernel/ve/vzwdog.c	2017-01-13 08:40:19.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/kernel/ve/vzwdog.c	2017-01-13 08:40:20.000000000 -0500
@@ -0,0 +1,282 @@
+/*
+ *  kernel/ve/vzwdog.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/kobject.h>
+#include <linux/genhd.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/suspend.h>
+#include <linux/ve.h>
+#include <linux/vzstat.h>
+#include <asm/uaccess.h>
+#include <linux/kthread.h>
+
+/* Staff regading kernel thread polling VE validity */
+static int sleep_timeout = 60;
+static struct task_struct *wdog_thread_tsk;
+
+extern void show_mem(void);
+
+static struct file *intr_file;
+static char page[PAGE_SIZE];
+
+static void parse_irq_list(int len)
+{
+	int i, k, skip;
+	for (i = 0; i < len; ) {
+		k = i;
+		while (i < len && page[i] != '\n' && page[i] != ':')
+			i++;
+		skip = 0;
+		if (i < len && page[i] != '\n') {
+			i++; /* skip ':' */
+			while (i < len && (page[i] == ' ' || page[i] == '0'))
+				i++;
+			skip = (i < len && (page[i] < '0' || page[i] > '9'));
+			while (i < len && page[i] != '\n')
+				i++;
+		}
+		if (!skip)
+			printk("%.*s\n", i - k, page + k);
+		if (i < len)
+			i++; /* skip '\n' */
+	}
+}
+
+extern loff_t vfs_llseek(struct file *file, loff_t, int);
+extern ssize_t vfs_read(struct file *file, char __user *, size_t, loff_t *);
+extern struct file *filp_open(const char *filename, int flags, int mode);
+extern int filp_close(struct file *filp, fl_owner_t id);
+static void show_irq_list(void)
+{
+	mm_segment_t fs;
+	int r;
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	vfs_llseek(intr_file, 0, 0);
+	r = vfs_read(intr_file, (void __user *)page, sizeof(page),
+			&intr_file->f_pos);
+	set_fs(fs);
+
+	if (r > 0)
+		parse_irq_list(r);
+}
+
+static void show_alloc_latency(void)
+{
+	static const char *alloc_descr[KSTAT_ALLOCSTAT_NR] = {
+		"A0",
+		"L0",
+		"H0",
+		"L1",
+		"H1"
+	};
+	int i;
+
+	printk("lat: ");
+	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
+		struct kstat_lat_struct *p;
+		cycles_t maxlat, avg0, avg1, avg2;
+
+		p = &kstat_glob.alloc_lat[i];
+		spin_lock_irq(&kstat_glb_lock);
+		maxlat = p->last.maxlat;
+		avg0 = p->avg[0];
+		avg1 = p->avg[1];
+		avg2 = p->avg[2];
+		spin_unlock_irq(&kstat_glb_lock);
+
+		printk("%s %Lu (%Lu %Lu %Lu)",
+				alloc_descr[i],
+				(unsigned long long)maxlat,
+				(unsigned long long)avg0,
+				(unsigned long long)avg1,
+				(unsigned long long)avg2);
+	}
+	printk("\n");
+}
+
+static void show_schedule_latency(void)
+{
+	struct kstat_lat_pcpu_struct *p;
+	cycles_t maxlat, totlat, avg0, avg1, avg2;
+	unsigned long count;
+
+	p = &kstat_glob.sched_lat;
+	spin_lock_irq(&kstat_glb_lock);
+	maxlat = p->last.maxlat;
+	totlat = p->last.totlat;
+	count = p->last.count;
+	avg0 = p->avg[0];
+	avg1 = p->avg[1];
+	avg2 = p->avg[2];
+	spin_unlock_irq(&kstat_glb_lock);
+
+	printk("sched lat: %Lu/%Lu/%lu (%Lu %Lu %Lu)\n",
+			(unsigned long long)maxlat,
+			(unsigned long long)totlat,
+			count,
+			(unsigned long long)avg0,
+			(unsigned long long)avg1,
+			(unsigned long long)avg2);
+}
+
+static void show_header(void)
+{
+	struct timeval tv;
+
+	do_gettimeofday(&tv);
+	preempt_disable();
+	printk("*** VZWDOG 1.14: time %lu.%06lu uptime %Lu CPU %d ***\n",
+			tv.tv_sec, (long)tv.tv_usec,
+			(unsigned long long)get_jiffies_64(),
+			smp_processor_id());
+#ifdef CONFIG_FAIRSCHED
+	printk("*** cycles_per_jiffy %lu jiffies_per_second %u ***\n",
+			cycles_per_jiffy, HZ);
+#else
+	printk("*** jiffies_per_second %u ***\n", HZ);
+#endif
+	preempt_enable();
+}
+
+static void show_pgdatinfo(void)
+{
+	pg_data_t *pgdat;
+
+	printk("pgdat:");
+	for_each_online_pgdat(pgdat) {
+		printk(" %d: %lu,%lu,%lu",
+				pgdat->node_id,
+				pgdat->node_start_pfn,
+				pgdat->node_present_pages,
+				pgdat->node_spanned_pages);
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+		printk(",%p", pgdat->node_mem_map);
+#endif
+	}
+	printk("\n");
+}
+
+static void show_diskio(void)
+{
+	struct gendisk *gd;
+	char buf[BDEVNAME_SIZE];
+
+	printk("disk_io: ");
+
+	down_read(&block_subsys.rwsem);
+	list_for_each_entry(gd, &block_subsys.kset.list, kobj.entry) {
+		char *name;
+		name = disk_name(gd, 0, buf);
+		if ((strlen(name) > 4) && (strncmp(name, "loop", 4) == 0) &&
+		    isdigit(name[4]))
+			continue;
+		if ((strlen(name) > 3) && (strncmp(name, "ram", 3) == 0) &&
+		    isdigit(name[3]))
+			continue;
+		printk("(%u,%u) %s r(%lu %lu %lu) w(%lu %lu %lu)\n",
+			gd->major, gd->first_minor,
+			name,
+			disk_stat_read(gd, ios[READ]),
+			disk_stat_read(gd, sectors[READ]),
+			disk_stat_read(gd, merges[READ]),
+			disk_stat_read(gd, ios[WRITE]),
+			disk_stat_read(gd, sectors[WRITE]),
+			disk_stat_read(gd, merges[WRITE]));
+	}
+	up_read(&block_subsys.rwsem);
+
+	printk("\n");
+}
+
+static void show_nrprocs(void)
+{
+	unsigned long _nr_running, _nr_sleeping,
+			_nr_unint, _nr_zombie, _nr_dead, _nr_stopped;
+
+	_nr_running = nr_running();
+	_nr_unint = nr_uninterruptible();
+	_nr_sleeping = nr_sleeping();
+	_nr_zombie = nr_zombie;
+	_nr_dead = atomic_read(&nr_dead);
+	_nr_stopped = nr_stopped();
+
+	printk("VEnum: %d, proc R %lu, S %lu, D %lu, "
+		"Z %lu, X %lu, T %lu (tot %d)\n",
+		nr_ve,	_nr_running, _nr_sleeping, _nr_unint,
+		_nr_zombie, _nr_dead, _nr_stopped, nr_threads);
+}
+
+static void wdog_print(void)
+{
+	show_header();
+	show_irq_list();
+	show_pgdatinfo();
+	show_mem();
+	show_diskio();
+	show_schedule_latency();
+	show_alloc_latency();
+	show_nrprocs();
+}
+
+static int wdog_loop(void* data)
+{
+	while (1) {
+		wdog_print();
+		try_to_freeze();
+
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (kthread_should_stop())
+			break;
+		schedule_timeout(sleep_timeout*HZ);
+	}
+	return 0;
+}
+
+static int __init wdog_init(void)
+{
+	struct file *file;
+
+	file = filp_open("/proc/interrupts", 0, 0);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+	intr_file = file;
+
+	wdog_thread_tsk = kthread_run(wdog_loop, NULL, "vzwdog");
+	if (IS_ERR(wdog_thread_tsk)) {
+		filp_close(intr_file, NULL);
+		return -EBUSY;
+	}
+	return 0;
+}
+
+static void __exit wdog_exit(void)
+{
+	kthread_stop(wdog_thread_tsk);
+	filp_close(intr_file, NULL);
+}
+
+module_param(sleep_timeout, int, 0666);
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo WDOG");
+MODULE_LICENSE("GPL v2");
+
+module_init(wdog_init)
+module_exit(wdog_exit)
diff -upr kernel-2.6.18-417.el5.orig/lib/bust_spinlocks.c kernel-2.6.18-417.el5-028stab121/lib/bust_spinlocks.c
--- kernel-2.6.18-417.el5.orig/lib/bust_spinlocks.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/lib/bust_spinlocks.c	2017-01-13 08:40:16.000000000 -0500
@@ -12,26 +12,21 @@
 #include <linux/tty.h>
 #include <linux/wait.h>
 #include <linux/vt_kern.h>
-
+#include <linux/console.h>
 
 void bust_spinlocks(int yes)
 {
+	if (printk_no_wake)
+		return;
+
 	if (yes) {
-		oops_in_progress = 1;
+		++oops_in_progress;
 	} else {
-		int loglevel_save = console_loglevel;
 #ifdef CONFIG_VT
 		unblank_screen();
 #endif
-		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk() will give klogd
-		 * and the blanked console a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
-		printk(" ");
-		console_loglevel = loglevel_save;
+		if (--oops_in_progress == 0)
+			wake_up_klogd();
 	}
 }
 
diff -upr kernel-2.6.18-417.el5.orig/lib/Kconfig.debug kernel-2.6.18-417.el5-028stab121/lib/Kconfig.debug
--- kernel-2.6.18-417.el5.orig/lib/Kconfig.debug	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/lib/Kconfig.debug	2017-01-13 08:40:28.000000000 -0500
@@ -39,6 +39,14 @@ config UNUSED_SYMBOLS
 	  you really need it, and what the merge plan to the mainline kernel for
 	  your module is.
 
+config SYSRQ_DEBUG
+	bool "Debugging via sysrq keys"
+	depends on MAGIC_SYSRQ
+	help
+	  Say Y if you want to extend functionality of magic key. It will
+	  provide you with some debugging facilities such as dumping and
+	  writing memory, resolving symbols and some other.
+
 config DEBUG_KERNEL
 	bool "Kernel debugging"
 	help
@@ -64,7 +72,7 @@ config LOG_BUF_SHIFT
 
 config DETECT_SOFTLOCKUP
 	bool "Detect Soft Lockups"
-	depends on DEBUG_KERNEL
+	depends on DEBUG_KERNEL && !SCHED_VCPU
 	default y
 	help
 	  Say Y here to enable the kernel to detect "soft lockups",
@@ -307,6 +315,15 @@ config DEBUG_LOCKING_API_SELFTESTS
 	  The following locking APIs are covered: spinlocks, rwlocks,
 	  mutexes and rwsems.
 
+config STACKTRACE_PROC
+	bool "Report stack trace in proc"
+	default y
+	depends on DEBUG_KERNEL && STACKTRACE_SUPPORT && PROC_FS
+	select STACKTRACE
+	help
+	  Report full kernel stack trace in /proc/<pid>/stack
+	  Note: without CONFIG_FRAME_POINTER trace may contain garbage
+
 config STACKTRACE
 	bool
 	depends on DEBUG_KERNEL
diff -upr kernel-2.6.18-417.el5.orig/lib/kobject.c kernel-2.6.18-417.el5-028stab121/lib/kobject.c
--- kernel-2.6.18-417.el5.orig/lib/kobject.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/lib/kobject.c	2017-01-13 08:40:19.000000000 -0500
@@ -516,6 +516,8 @@ void subsystem_init(struct subsystem * s
 	kset_init(&s->kset);
 }
 
+EXPORT_SYMBOL(subsystem_init);
+
 /**
  *	subsystem_register - register a subsystem.
  *	@s:	the subsystem we're registering.
diff -upr kernel-2.6.18-417.el5.orig/lib/kobject_uevent.c kernel-2.6.18-417.el5-028stab121/lib/kobject_uevent.c
--- kernel-2.6.18-417.el5.orig/lib/kobject_uevent.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/lib/kobject_uevent.c	2017-01-13 08:40:22.000000000 -0500
@@ -26,7 +26,10 @@
 #define NUM_ENVP	32	/* number of env pointers */
 
 #if defined(CONFIG_HOTPLUG)
+#ifndef CONFIG_VE
+/* Virtualized for all VEs, but is shown only in VE0 */
 u64 uevent_seqnum;
+#endif
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = "/sbin/hotplug";
 static DEFINE_SPINLOCK(sequence_lock);
 #if defined(CONFIG_NET)
@@ -50,6 +53,10 @@ static char *action_to_string(enum kobje
 		return "offline";
 	case KOBJ_ONLINE:
 		return "online";
+	case KOBJ_START:
+		return "start";
+	case KOBJ_STOP:
+		return "stop";
 	default:
 		return NULL;
 	}
@@ -159,7 +166,7 @@ int kobject_uevent_env(struct kobject *k
 
 	/* we will send an event, request a new sequence number */
 	spin_lock(&sequence_lock);
-	seq = ++uevent_seqnum;
+	seq = ++ve_uevent_seqnum;
 	spin_unlock(&sequence_lock);
 	sprintf(seq_buff, "SEQNUM=%llu", (unsigned long long)seq);
 
diff -upr kernel-2.6.18-417.el5.orig/lib/radix-tree.c kernel-2.6.18-417.el5-028stab121/lib/radix-tree.c
--- kernel-2.6.18-417.el5.orig/lib/radix-tree.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/lib/radix-tree.c	2017-01-13 08:40:18.000000000 -0500
@@ -44,6 +44,8 @@
 #define RADIX_TREE_TAG_LONGS	\
 	((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
 
+#define RADIX_ROOT_TAG_MASK	(((1<<RADIX_TREE_MAX_TAGS)-1) << __GFP_BITS_SHIFT)
+
 struct radix_tree_node {
 	unsigned int	count;
 	void		*slots[RADIX_TREE_MAP_SIZE];
@@ -170,9 +172,15 @@ static inline void root_tag_clear(struct
 	root->gfp_mask &= ~(1 << (tag + __GFP_BITS_SHIFT));
 }
 
+static inline void root_tag_move_all_to_prev(struct radix_tree_root *root)
+{
+	root->gfp_mask = (root->gfp_mask & __GFP_BITS_MASK) |
+		(root->gfp_mask & RADIX_ROOT_TAG_MASK) << RADIX_TREE_MAX_TAGS;
+}
+
 static inline void root_tag_clear_all(struct radix_tree_root *root)
 {
-	root->gfp_mask &= __GFP_BITS_MASK;
+	root->gfp_mask &= (__force gfp_t)~RADIX_ROOT_TAG_MASK;
 }
 
 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
@@ -180,6 +188,26 @@ static inline int root_tag_get(struct ra
 	return root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
 }
 
+static inline void prev_tag_set(struct radix_tree_root *root, unsigned int tag)
+{
+	root->gfp_mask |= (1 << (tag + RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT));
+}
+
+static inline void prev_tag_clear(struct radix_tree_root *root, unsigned int tag)
+{
+	root->gfp_mask &= ~(1 << (tag + RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT));
+}
+
+static inline void prev_tag_clear_all(struct radix_tree_root *root)
+{
+	root->gfp_mask &= __GFP_BITS_MASK | RADIX_ROOT_TAG_MASK;
+}
+
+static inline int prev_tag_get(struct radix_tree_root *root, unsigned int tag)
+{
+	return root->gfp_mask & (1 << (tag + RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT));
+}
+
 /*
  * Returns 1 if any slot in the node has this tag set.
  * Otherwise returns 0.
@@ -388,6 +416,8 @@ void *radix_tree_tag_set(struct radix_tr
 {
 	unsigned int height, shift;
 	struct radix_tree_node *slot;
+	int prev = 0; /* suppress warning */
+	int right_prev = radix_tree_tag_get(root, index, tag);
 
 	height = root->height;
 	BUG_ON(index > radix_tree_maxindex(height));
@@ -395,11 +425,15 @@ void *radix_tree_tag_set(struct radix_tr
 	slot = root->rnode;
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 
+	if (!height)
+		prev = root_tag_get(root, tag);
+
 	while (height > 0) {
 		int offset;
 
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-		if (!tag_get(slot, tag, offset))
+		prev = tag_get(slot, tag, offset);
+		if (!prev)
 			tag_set(slot, tag, offset);
 		slot = slot->slots[offset];
 		BUG_ON(slot == NULL);
@@ -407,6 +441,13 @@ void *radix_tree_tag_set(struct radix_tr
 		height--;
 	}
 
+	if (prev)
+		prev_tag_set(root, tag);
+	else
+		prev_tag_clear(root, tag);
+
+	BUG_ON(!prev != !right_prev);
+
 	/* set the root's tag bit */
 	if (slot && !root_tag_get(root, tag))
 		root_tag_set(root, tag);
@@ -435,6 +476,8 @@ void *radix_tree_tag_clear(struct radix_
 	struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
 	struct radix_tree_node *slot = NULL;
 	unsigned int height, shift;
+	int prev = 0; /* suppress warning */
+	int right_prev = radix_tree_tag_get(root, index, tag);
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
@@ -444,6 +487,13 @@ void *radix_tree_tag_clear(struct radix_
 	pathp->node = NULL;
 	slot = root->rnode;
 
+	if (!height) {
+		prev = root_tag_get(root, tag);
+		if (prev)
+			root_tag_clear(root, tag);
+		goto out;
+	}
+
 	while (height > 0) {
 		int offset;
 
@@ -463,7 +513,8 @@ void *radix_tree_tag_clear(struct radix_
 		goto out;
 
 	while (pathp->node) {
-		if (!tag_get(pathp->node, tag, pathp->offset))
+		prev = tag_get(pathp->node, tag, pathp->offset);
+		if (!prev)
 			goto out;
 		tag_clear(pathp->node, tag, pathp->offset);
 		if (any_tag_set(pathp->node, tag))
@@ -476,11 +527,17 @@ void *radix_tree_tag_clear(struct radix_
 		root_tag_clear(root, tag);
 
 out:
+	if (prev)
+		prev_tag_set(root, tag);
+	else
+		prev_tag_clear(root, tag);
+
+	BUG_ON(!prev != !right_prev);
+
 	return slot;
 }
 EXPORT_SYMBOL(radix_tree_tag_clear);
 
-#ifndef __KERNEL__	/* Only the test harness uses this at present */
 /**
  * radix_tree_tag_get - get a tag on a radix tree node
  * @root:		radix tree root
@@ -539,7 +596,6 @@ int radix_tree_tag_get(struct radix_tree
 	}
 }
 EXPORT_SYMBOL(radix_tree_tag_get);
-#endif
 
 static unsigned int
 __lookup(struct radix_tree_root *root, void **results, unsigned long index,
@@ -771,14 +827,18 @@ void *radix_tree_delete(struct radix_tre
 	unsigned int height, shift;
 	int tag;
 	int offset;
+	int right_prev[RADIX_TREE_MAX_TAGS] = {0,};
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
-		goto out;
+		goto out_none;
+
+	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+		right_prev[tag] = radix_tree_tag_get(root, index, tag);
 
 	slot = root->rnode;
 	if (height == 0 && root->rnode) {
-		root_tag_clear_all(root);
+		root_tag_move_all_to_prev(root);
 		root->rnode = NULL;
 		goto out;
 	}
@@ -788,7 +848,7 @@ void *radix_tree_delete(struct radix_tre
 
 	do {
 		if (slot == NULL)
-			goto out;
+			goto out_none;
 
 		pathp++;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
@@ -800,7 +860,7 @@ void *radix_tree_delete(struct radix_tre
 	} while (height > 0);
 
 	if (slot == NULL)
-		goto out;
+		goto out_none;
 
 	/*
 	 * Clear all tags associated with the just-deleted item
@@ -808,6 +868,8 @@ void *radix_tree_delete(struct radix_tre
 	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
 		if (tag_get(pathp->node, tag, pathp->offset))
 			radix_tree_tag_clear(root, index, tag);
+		else
+			prev_tag_clear(root, tag);
 	}
 
 	/* Now free the nodes we do not need anymore */
@@ -831,7 +893,12 @@ void *radix_tree_delete(struct radix_tre
 	root->rnode = NULL;
 
 out:
+	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+		BUG_ON(!right_prev[tag] != !prev_tag_get(root, tag));
 	return slot;
+out_none:
+	prev_tag_clear_all(root);
+	goto out;
 }
 EXPORT_SYMBOL(radix_tree_delete);
 
@@ -846,6 +913,19 @@ int radix_tree_tagged(struct radix_tree_
 }
 EXPORT_SYMBOL(radix_tree_tagged);
 
+/**
+ *	radix_tree_prev_tag_get - get previous tag status for last changed item
+ *			call is valid right after radix_tree_tag_set/clear for
+ *			changed tag and after radix_tree_delete for all tags
+ *	@root:		radix tree root
+ *	@tag:		tag to test
+ */
+int radix_tree_prev_tag_get(struct radix_tree_root *root, unsigned int tag)
+{
+	return prev_tag_get(root, tag);
+}
+EXPORT_SYMBOL(radix_tree_prev_tag_get);
+
 static void
 radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags)
 {
diff -upr kernel-2.6.18-417.el5.orig/lib/smp_processor_id.c kernel-2.6.18-417.el5-028stab121/lib/smp_processor_id.c
--- kernel-2.6.18-417.el5.orig/lib/smp_processor_id.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/lib/smp_processor_id.c	2017-01-13 08:40:28.000000000 -0500
@@ -7,11 +7,26 @@
 #include <linux/kallsyms.h>
 #include <linux/sched.h>
 
+#ifdef CONFIG_VCPU
+/* We can not guarantee pcpu affinity if use VCPU extention */
+static inline int run_on_single_cpu(int cpu) { return 0; }
+#else
+static inline int run_on_single_cpu(int cpu)
+{
+	cpumask_t this_mask;
+
+	this_mask = cpumask_of_cpu(cpu);
+	if (cpus_equal(current->cpus_allowed, this_mask))
+		return 1;
+
+	return 0;
+}
+#endif
+
 unsigned int debug_smp_processor_id(void)
 {
 	unsigned long preempt_count = preempt_count();
 	int this_cpu = raw_smp_processor_id();
-	cpumask_t this_mask;
 
 	if (likely(preempt_count))
 		goto out;
@@ -23,9 +38,7 @@ unsigned int debug_smp_processor_id(void
 	 * Kernel threads bound to a single CPU can safely use
 	 * smp_processor_id():
 	 */
-	this_mask = cpumask_of_cpu(this_cpu);
-
-	if (cpus_equal(current->cpus_allowed, this_mask))
+	if (run_on_single_cpu(this_cpu))
 		goto out;
 
 	/*
diff -upr kernel-2.6.18-417.el5.orig/Makefile kernel-2.6.18-417.el5-028stab121/Makefile
--- kernel-2.6.18-417.el5.orig/Makefile	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/Makefile	2017-01-13 08:40:41.000000000 -0500
@@ -5,6 +5,7 @@ EXTRAVERSION = -prep
 RHEL_MAJOR = 5
 RHEL_MINOR = 11
 NAME=Avast! A bilge rat!
+VZVERSION = 028stab121
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
@@ -320,7 +321,7 @@ AFLAGS          := -D__ASSEMBLY__
 KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null)
 KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
 
-export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
+export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION VZVERSION
 export ARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
 export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE
 export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
@@ -557,7 +558,7 @@ export mod_strip_cmd
 
 
 ifeq ($(KBUILD_EXTMOD),)
-core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/ block/
+core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/
 
 vmlinux-dirs	:= $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
 		     $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
@@ -874,7 +875,8 @@ define filechk_utsrelease.h
 	  echo '"$(KERNELRELEASE)" exceeds $(uts_len) characters' >&2;    \
 	  exit 1;                                                         \
 	fi;                                                               \
-	(echo \#define UTS_RELEASE \"$(KERNELRELEASE)\";)
+	(echo \#define UTS_RELEASE \"$(KERNELRELEASE)\"; 		  \
+		echo \#define VZVERSION \"$(VZVERSION)\";)
 endef
 
 define filechk_version.h
diff -upr kernel-2.6.18-417.el5.orig/mm/filemap.c kernel-2.6.18-417.el5-028stab121/mm/filemap.c
--- kernel-2.6.18-417.el5.orig/mm/filemap.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/filemap.c	2017-01-13 08:40:40.000000000 -0500
@@ -34,6 +34,7 @@
 #include <trace/mm.h>
 #include "internal.h"
 #include <trace/filemap.h>
+#include <linux/virtinfo.h>
 
 /*
  * FIXME: remove all knowledge of the buffer layer from the core VM
@@ -42,6 +43,8 @@
 
 #include <asm/mman.h>
 
+#include <ub/io_acct.h>
+
 static ssize_t
 generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	loff_t offset, unsigned long nr_segs);
@@ -119,6 +122,10 @@ void __remove_from_page_cache(struct pag
 	struct address_space *mapping = page->mapping;
 
 	radix_tree_delete(&mapping->page_tree, page->index);
+	if (mapping_cap_account_dirty(mapping) &&
+			radix_tree_prev_tag_get(&mapping->page_tree,
+				PAGECACHE_TAG_DIRTY))
+		ub_io_account_clean(mapping, 1, 1);
 	page->mapping = NULL;
 	mapping->nrpages--;
 	trace_remove_from_page_cache(mapping, page->index);
@@ -1019,6 +1026,8 @@ page_ok:
 		goto out;
 
 page_not_up_to_date:
+		virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 		/* Get exclusive access to the page ... */
 		lock_page(page);
 
@@ -1081,6 +1090,8 @@ readpage_error:
 		goto out;
 
 no_cached_page:
+		virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 		/*
 		 * Ok, it wasn't cached, so we need to create a new
 		 * page..
@@ -1120,6 +1131,8 @@ int file_read_actor(read_descriptor_t *d
 	if (size > count)
 		size = count;
 
+	left = size;
+#ifndef CONFIG_X86_UACCESS_INDIRECT
 	/*
 	 * Faults on the destination of a read are common, so do it before
 	 * taking the kmap.
@@ -1129,20 +1142,21 @@ int file_read_actor(read_descriptor_t *d
 		left = __copy_to_user_inatomic(desc->arg.buf,
 						kaddr + offset, size);
 		kunmap_atomic(kaddr, KM_USER0);
-		if (left == 0)
-			goto success;
 	}
+#endif
 
-	/* Do it the slow way */
-	kaddr = kmap(page);
-	left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
-	kunmap(page);
-
-	if (left) {
-		size -= left;
-		desc->error = -EFAULT;
+	if (left != 0) {
+		/* Do it the slow way */
+		kaddr = kmap(page);
+		left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
+		kunmap(page);
+
+		if (left) {
+			size -= left;
+			desc->error = -EFAULT;
+		}
 	}
-success:
+
 	desc->count = count - size;
 	desc->written += size;
 	desc->arg.buf += size;
@@ -1278,12 +1292,11 @@ out:
 EXPORT_SYMBOL(__generic_file_aio_read);
 
 ssize_t
-generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
 {
-	struct iovec local_iov = { .iov_base = buf, .iov_len = count };
-
 	BUG_ON(iocb->ki_pos != pos);
-	return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
+	return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos);
 }
 EXPORT_SYMBOL(generic_file_aio_read);
 
@@ -1390,6 +1403,8 @@ static int fastcall page_cache_read(stru
 	struct page *page; 
 	int ret;
 
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 	do {
 		page = page_cache_alloc_cold(mapping);
 		if (!page)
@@ -1556,6 +1571,9 @@ page_not_uptodate:
 		majmin = VM_FAULT_MAJOR;
 		count_vm_event(PGMAJFAULT);
 	}
+
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 	lock_page(page);
 
 	/* Did it get unhashed while we waited for it? */
@@ -1587,6 +1605,8 @@ page_not_uptodate:
 	 * because there really aren't any performance issues here
 	 * and we need to check for errors.
 	 */
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 	lock_page(page);
 
 	/* Somebody truncated the page on us? */
@@ -1678,6 +1698,8 @@ no_cached_page:
 	return NULL;
 
 page_not_uptodate:
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 	lock_page(page);
 
 	/* Did it get unhashed while we waited for it? */
@@ -1708,6 +1730,8 @@ page_not_uptodate:
 	 * because there really aren't any performance issues here
 	 * and we need to check for errors.
 	 */
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 	lock_page(page);
 
 	/* Somebody truncated the page on us? */
@@ -2623,6 +2647,8 @@ generic_file_buffered_write(struct kiocb
 	ssize_t status;
 	struct iov_iter i;
 
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 	iov_iter_init(&i, iov, nr_segs, count, written);
 	if (IS_NEWAOPS(inode))
 		status = generic_perform_write(file, &i, pos);
@@ -2847,22 +2873,22 @@ out:
 	current->backing_dev_info = NULL;
 	return written ? written : err;
 }
-EXPORT_SYMBOL(generic_file_aio_write_nolock);
 
-ssize_t
-generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t *ppos)
+ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
+		const struct iovec *iov, unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	ssize_t ret;
-	loff_t pos = *ppos;
 
-	ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos);
+	BUG_ON(iocb->ki_pos != pos);
+
+	ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs,
+			&iocb->ki_pos);
 
 	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
-		int err;
+		ssize_t err;
 
 		err = sync_page_range_nolock(inode, mapping, pos, ret);
 		if (err < 0)
@@ -2870,6 +2896,7 @@ generic_file_aio_write_nolock(struct kio
 	}
 	return ret;
 }
+EXPORT_SYMBOL(generic_file_aio_write_nolock);
 
 static ssize_t
 __generic_file_write_nolock(struct file *file, const struct iovec *iov,
@@ -2879,8 +2906,9 @@ __generic_file_write_nolock(struct file 
 	ssize_t ret;
 
 	init_sync_kiocb(&kiocb, file);
+	kiocb.ki_pos = *ppos;
 	ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
-	if (ret == -EIOCBQUEUED)
+	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
 	return ret;
 }
@@ -2893,28 +2921,28 @@ generic_file_write_nolock(struct file *f
 	ssize_t ret;
 
 	init_sync_kiocb(&kiocb, file);
-	ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
+	kiocb.ki_pos = *ppos;
+	ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos);
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
+	*ppos = kiocb.ki_pos;
 	return ret;
 }
 EXPORT_SYMBOL(generic_file_write_nolock);
 
-ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
-			       size_t count, loff_t pos)
+ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	ssize_t ret;
-	struct iovec local_iov = { .iov_base = (void __user *)buf,
-					.iov_len = count };
 
 	BUG_ON(iocb->ki_pos != pos);
 
 	mutex_lock(&inode->i_mutex);
-	ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
-						&iocb->ki_pos);
+	ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs,
+			&iocb->ki_pos);
 	mutex_unlock(&inode->i_mutex);
 
 	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
diff -upr kernel-2.6.18-417.el5.orig/mm/filemap_xip.c kernel-2.6.18-417.el5-028stab121/mm/filemap_xip.c
--- kernel-2.6.18-417.el5.orig/mm/filemap_xip.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/filemap_xip.c	2017-01-13 08:40:17.000000000 -0500
@@ -16,6 +16,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/uaccess.h>
 #include <asm/tlbflush.h>
+#include <ub/ub_vmpages.h>
 
 /*
  * This is a file read routine for execute in place files, and uses
@@ -191,6 +192,8 @@ __xip_unmap (struct address_space * mapp
 			flush_cache_page(vma, address, pte_pfn(*pte));
 			pteval = ptep_clear_flush_notify(vma, address, pte);
 			page_remove_rmap(page);
+			pb_remove_ref(page, mm);
+			ub_unused_privvm_inc(mm, vma);
 			dec_mm_counter(mm, file_rss);
 			BUG_ON(pte_dirty(pteval));
 			pte_unmap_unlock(pte, ptl);
diff -upr kernel-2.6.18-417.el5.orig/mm/fremap.c kernel-2.6.18-417.el5-028stab121/mm/fremap.c
--- kernel-2.6.18-417.el5.orig/mm/fremap.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/fremap.c	2017-01-13 08:40:24.000000000 -0500
@@ -21,6 +21,9 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+#include <ub/ub_mem.h>
+
 static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long addr, pte_t *ptep)
 {
@@ -33,8 +36,9 @@ static int zap_pte(struct mm_struct *mm,
 		page = vm_normal_page(vma, addr, pte);
 		if (page) {
 			if (pte_dirty(pte))
-				set_page_dirty(page);
+				set_page_dirty_mm(page, mm);
 			page_remove_rmap(page);
+			pb_remove_ref(page, mm);
 			page_cache_release(page);
 		}
 	} else {
@@ -58,6 +62,10 @@ int install_page(struct mm_struct *mm, s
 	pte_t *pte;
 	pte_t pte_val;
 	spinlock_t *ptl;
+	struct page_beancounter *pbc;
+
+	if (unlikely(pb_alloc(&pbc)))
+		goto out_nopb;
 
 	pte = get_locked_pte(mm, addr, &ptl);
 	if (!pte)
@@ -78,12 +86,15 @@ int install_page(struct mm_struct *mm, s
 			goto unlock;
 	}
 
-	if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
+	if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte)) {
+		ub_unused_privvm_dec(mm, vma);
 		inc_mm_counter(mm, file_rss);
+	}
 
 	flush_icache_page(vma, page);
 	pte_val = mk_pte(page, prot);
 	set_pte_at(mm, addr, pte, pte_val);
+	pb_add_ref(page, mm, &pbc);
 	page_add_file_rmap(page);
 	update_mmu_cache(vma, addr, pte_val);
 	lazy_mmu_prot_update(pte_val);
@@ -91,6 +102,8 @@ int install_page(struct mm_struct *mm, s
 unlock:
 	pte_unmap_unlock(pte, ptl);
 out:
+	pb_free(&pbc);
+out_nopb:
 	return err;
 }
 EXPORT_SYMBOL(install_page);
@@ -113,6 +126,7 @@ int install_file_pte(struct mm_struct *m
 
 	if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) {
 		update_hiwater_rss(mm);
+		ub_unused_privvm_inc(mm, vma);
 		dec_mm_counter(mm, file_rss);
 	}
 
@@ -235,4 +249,5 @@ asmlinkage long sys_remap_file_pages(uns
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(sys_remap_file_pages);
 
diff -upr kernel-2.6.18-417.el5.orig/mm/madvise.c kernel-2.6.18-417.el5-028stab121/mm/madvise.c
--- kernel-2.6.18-417.el5.orig/mm/madvise.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/madvise.c	2017-01-13 08:40:16.000000000 -0500
@@ -11,6 +11,7 @@
 #include <linux/mempolicy.h>
 #include <linux/hugetlb.h>
 #include <linux/file.h>
+#include <linux/swap.h>
 
 /*
  * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -23,6 +24,7 @@ static int madvise_need_mmap_write(int b
 	case MADV_REMOVE:
 	case MADV_WILLNEED:
 	case MADV_DONTNEED:
+	case MADV_DEACTIVATE:
 		return 0;
 	default:
 		/* be safe, default to 1. list exceptions explicitly */
@@ -216,6 +218,26 @@ static long madvise_remove(struct vm_are
 	return error;
 }
 
+static long madvise_deactivate(struct vm_area_struct * vma,
+			     struct vm_area_struct ** prev,
+			     unsigned long start, unsigned long end)
+{
+	unsigned long addr;
+	struct page *page;
+
+	*prev = vma;
+	for (addr = start ; addr < end ; addr++) {
+		page = follow_page(vma, addr, FOLL_GET);
+		if (!page)
+			continue;
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+		deactivate_page(page);
+		put_page(page);
+	}
+	return 0;
+}
+
 static long
 madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		unsigned long start, unsigned long end, int behavior)
@@ -246,6 +268,10 @@ madvise_vma(struct vm_area_struct *vma, 
 		error = madvise_dontneed(vma, prev, start, end);
 		break;
 
+	case MADV_DEACTIVATE:
+		error = madvise_deactivate(vma, prev, start, end);
+		break;
+
 	default:
 		error = -EINVAL;
 		break;
diff -upr kernel-2.6.18-417.el5.orig/mm/Makefile kernel-2.6.18-417.el5-028stab121/mm/Makefile
--- kernel-2.6.18-417.el5.orig/mm/Makefile	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/Makefile	2017-01-13 08:40:14.000000000 -0500
@@ -13,6 +13,7 @@ obj-y			:= bootmem.o filemap.o mempool.o
 			   prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
+obj-$(CONFIG_X86_4G)	+= usercopy.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SPARSEMEM)	+= sparse.o
diff -upr kernel-2.6.18-417.el5.orig/mm/memory.c kernel-2.6.18-417.el5-028stab121/mm/memory.c
--- kernel-2.6.18-417.el5.orig/mm/memory.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/memory.c	2017-01-13 08:40:40.000000000 -0500
@@ -42,6 +42,7 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
+#include <linux/virtinfo.h>
 #include <linux/swap.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
@@ -62,6 +63,11 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+#include <ub/ub_mem.h>
+#include <ub/io_acct.h>
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
@@ -86,7 +92,7 @@ EXPORT_SYMBOL(num_physpages);
 EXPORT_SYMBOL(high_memory);
 EXPORT_SYMBOL(vmalloc_earlyreserve);
 
-int randomize_va_space __read_mostly = 1;
+int _randomize_va_space __read_mostly = 1;
 
 static int __init disable_randmaps(char *s)
 {
@@ -108,18 +114,21 @@ void pgd_clear_bad(pgd_t *pgd)
 	pgd_ERROR(*pgd);
 	pgd_clear(pgd);
 }
+EXPORT_SYMBOL_GPL(pgd_clear_bad);
 
 void pud_clear_bad(pud_t *pud)
 {
 	pud_ERROR(*pud);
 	pud_clear(pud);
 }
+EXPORT_SYMBOL_GPL(pud_clear_bad);
 
 void pmd_clear_bad(pmd_t *pmd)
 {
 	pmd_ERROR(*pmd);
 	pmd_clear(pmd);
 }
+EXPORT_SYMBOL_GPL(pmd_clear_bad);
 
 /*
  * Note: this doesn't free the actual pages themselves. That
@@ -131,6 +140,7 @@ static void free_pte_range(struct mmu_ga
 	pmd_clear(pmd);
 	pte_lock_deinit(page);
 	pte_free_tlb(tlb, page);
+	ub_page_table_uncharge(tlb->mm);
 	dec_zone_page_state(page, NR_PAGETABLE);
 	tlb->mm->nr_ptes--;
 }
@@ -166,6 +176,11 @@ static inline void free_pmd_range(struct
 	pmd = pmd_offset(pud, start);
 	pud_clear(pud);
 	pmd_free_tlb(tlb, pmd);
+#ifndef __PAGETABLE_PMD_FOLDED
+# ifndef CONFIG_X86_PAE
+	ub_page_table_uncharge(tlb->mm);
+# endif
+#endif
 }
 
 static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -199,6 +214,9 @@ static inline void free_pud_range(struct
 	pud = pud_offset(pgd, start);
 	pgd_clear(pgd);
 	pud_free_tlb(tlb, pud);
+#ifndef __PAGETABLE_PUD_FOLDED
+	ub_page_table_uncharge(tlb->mm);
+#endif
 }
 
 /*
@@ -301,6 +319,7 @@ void free_pgtables(struct mmu_gather **t
 		}
 		vma = next;
 	}
+	ub_page_table_commit((*tlb)->mm);
 }
 
 int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
@@ -314,6 +333,11 @@ int __pte_alloc(struct mm_struct *mm, pm
 	if (pmd_present(*pmd)) {	/* Another has populated it */
 		pte_lock_deinit(new);
 		pte_free(new);
+	} else if (ub_page_table_charge(mm)) {
+		spin_unlock(&mm->page_table_lock);
+		pte_lock_deinit(new);
+		pte_free(new);
+		return -ENOMEM;
 	} else {
 		mm->nr_ptes++;
 		inc_zone_page_state(new, NR_PAGETABLE);
@@ -322,6 +346,7 @@ int __pte_alloc(struct mm_struct *mm, pm
 	spin_unlock(&mm->page_table_lock);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__pte_alloc);
 
 int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
 {
@@ -468,6 +493,7 @@ struct page *vm_normal_page(struct vm_ar
 out:
 	return pfn_to_page(pfn);
 }
+EXPORT_SYMBOL_GPL(vm_normal_page);
 
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
@@ -478,7 +504,7 @@ out:
 static inline void
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-		unsigned long addr, int *rss)
+		unsigned long addr, int *rss, struct page_beancounter **pbc)
 {
 	unsigned long vm_flags = vma->vm_flags;
 	pte_t pte = *src_pte;
@@ -533,6 +559,7 @@ copy_one_pte(struct mm_struct *dst_mm, s
 	if (page) {
 		get_page(page);
 		page_dup_rmap(page);
+		pb_dup_ref(page, dst_mm, pbc);
 		rss[!!PageAnon(page)]++;
 	}
 
@@ -540,20 +567,37 @@ out_set_pte:
 	set_pte_at(dst_mm, addr, dst_pte, pte);
 }
 
+#define pte_ptrs(a)	(PTRS_PER_PTE - ((a >> PAGE_SHIFT)&(PTRS_PER_PTE - 1)))
+#ifdef CONFIG_USER_RESOURCE
+#define same_ub(mm1, mm2)	((mm1)->mm_ub == (mm2)->mm_ub)
+#else
+#define same_ub(mm1, mm2)	(1)
+#endif
+
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
-		unsigned long addr, unsigned long end)
+		pmd_t *dst_pmd, pmd_t *src_pmd,
+		struct vm_area_struct *dst_vma,
+		struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end,
+		unsigned long *ub_unused_fix)
 {
 	pte_t *src_pte, *dst_pte;
 	spinlock_t *src_ptl, *dst_ptl;
 	int progress = 0;
-	int rss[2];
+	int rss[2], rss_tot;
+	struct page_beancounter *pbc;
+	int err;
 
+	err = -ENOMEM;
+	pbc = same_ub(src_mm, dst_mm) ? PBC_COPY_SAME : NULL;
 again:
+	if (pbc != PBC_COPY_SAME && pb_alloc_list(&pbc, pte_ptrs(addr)))
+		goto out;
 	rss[1] = rss[0] = 0;
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
 	if (!dst_pte)
-		return -ENOMEM;
+		goto out;
+
 	src_pte = pte_offset_map_nested(src_pmd, addr);
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -574,23 +618,34 @@ again:
 			progress++;
 			continue;
 		}
-		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
+		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
+				vma, addr, rss, &pbc);
 		progress += 8;
 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 
 	spin_unlock(src_ptl);
 	pte_unmap_nested(src_pte - 1);
+	rss_tot = rss[0] + rss[1];
+	if (VM_UB_PRIVATE(dst_vma->vm_flags, dst_vma->vm_file))
+		*ub_unused_fix += rss_tot;
 	add_mm_rss(dst_mm, rss[0], rss[1]);
 	pte_unmap_unlock(dst_pte - 1, dst_ptl);
 	cond_resched();
 	if (addr != end)
 		goto again;
-	return 0;
+
+	err = 0;
+out:
+	pb_free_list(&pbc);
+	return err;
 }
 
 static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
-		unsigned long addr, unsigned long end)
+		pud_t *dst_pud, pud_t *src_pud,
+		struct vm_area_struct *dst_vma, 
+		struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end,
+		unsigned long *ub_unused_fix)
 {
 	pmd_t *src_pmd, *dst_pmd;
 	unsigned long next;
@@ -604,15 +659,18 @@ static inline int copy_pmd_range(struct 
 		if (pmd_none_or_clear_bad(src_pmd))
 			continue;
 		if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
-						vma, addr, next))
+						dst_vma, vma, addr, next, ub_unused_fix))
 			return -ENOMEM;
 	} while (dst_pmd++, src_pmd++, addr = next, addr != end);
 	return 0;
 }
 
 static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
-		unsigned long addr, unsigned long end)
+		pgd_t *dst_pgd, pgd_t *src_pgd,
+		struct vm_area_struct *dst_vma,
+		struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end,
+		unsigned long *ub_unused_fix)
 {
 	pud_t *src_pud, *dst_pud;
 	unsigned long next;
@@ -626,20 +684,22 @@ static inline int copy_pud_range(struct 
 		if (pud_none_or_clear_bad(src_pud))
 			continue;
 		if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
-						vma, addr, next))
+						dst_vma, vma, addr, next, ub_unused_fix))
 			return -ENOMEM;
 	} while (dst_pud++, src_pud++, addr = next, addr != end);
 	return 0;
 }
 
-int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		struct vm_area_struct *vma)
+int __copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *vma,
+		      unsigned long addr, size_t size)
 {
+	struct mm_struct *dst_mm = dst_vma->vm_mm;
+	struct mm_struct *src_mm = vma->vm_mm;
 	pgd_t *src_pgd, *dst_pgd;
 	unsigned long next;
-	unsigned long addr = vma->vm_start;
-	unsigned long end = vma->vm_end;
+	unsigned long end = addr + size;
 	int ret;
+	unsigned long ub_unused_fix = 0;
 
 	/*
 	 * Don't copy ptes where a page fault will fill them correctly.
@@ -672,28 +732,43 @@ int copy_page_range(struct mm_struct *ds
 		if (pgd_none_or_clear_bad(src_pgd))
 			continue;
 		if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
-				   vma, addr, next))) {
+				   dst_vma, vma, addr, next, &ub_unused_fix))) {
 			ret = -ENOMEM;
 			break;
 		}
 	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
 
+	__ub_unused_privvm_dec(dst_mm, ub_unused_fix);
+
 	if (is_cow_mapping(vma->vm_flags))
 		mmu_notifier_invalidate_range_end(src_mm,
 						vma->vm_start, end);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(__copy_page_range);
+
+int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+		    struct vm_area_struct *dst_vma, struct vm_area_struct *vma)
+{
+	if (dst_vma->vm_mm != dst)
+		BUG();
+	if (vma->vm_mm != src)
+		BUG();
+	return __copy_page_range(dst_vma, vma, vma->vm_start, vma->vm_end-vma->vm_start);
+}
 
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				long *zap_work, struct zap_details *details,
+				unsigned long *ub_unused_fix)
 {
 	struct mm_struct *mm = tlb->mm;
 	pte_t *pte;
 	spinlock_t *ptl;
 	int file_rss = 0;
 	int anon_rss = 0;
+	int rss;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	do {
@@ -742,13 +817,14 @@ static unsigned long zap_pte_range(struc
 				trace_mm_anon_userfree(mm, addr, page);
 			} else {
 				if (pte_dirty(ptent))
-					set_page_dirty(page);
+					set_page_dirty_mm(page, mm);
 				if (pte_young(ptent))
 					SetPageReferenced(page);
 				file_rss--;
 				trace_mm_filemap_userunmap(mm, addr, page);
 			}
 			page_remove_rmap(page);
+			pb_remove_ref(page, mm);
 			tlb_remove_page(tlb, page);
 			continue;
 		}
@@ -763,6 +839,9 @@ static unsigned long zap_pte_range(struc
 		pte_clear_full(mm, addr, pte, tlb->fullmm);
 	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
 
+	rss = -(file_rss + anon_rss);
+	if (VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
+		(*ub_unused_fix) += rss;
 	add_mm_rss(mm, file_rss, anon_rss);
 	pte_unmap_unlock(pte - 1, ptl);
 
@@ -772,7 +851,8 @@ static unsigned long zap_pte_range(struc
 static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				long *zap_work, struct zap_details *details,
+				unsigned long *ub_unused_fix)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -785,7 +865,7 @@ static inline unsigned long zap_pmd_rang
 			continue;
 		}
 		next = zap_pte_range(tlb, vma, pmd, addr, next,
-						zap_work, details);
+						zap_work, details, ub_unused_fix);
 	} while (pmd++, addr = next, (addr != end && *zap_work > 0));
 
 	return addr;
@@ -794,7 +874,8 @@ static inline unsigned long zap_pmd_rang
 static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pgd_t *pgd,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				long *zap_work, struct zap_details *details,
+				unsigned long *ub_unused_fix)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -807,7 +888,7 @@ static inline unsigned long zap_pud_rang
 			continue;
 		}
 		next = zap_pmd_range(tlb, vma, pud, addr, next,
-						zap_work, details);
+						zap_work, details, ub_unused_fix);
 	} while (pud++, addr = next, (addr != end && *zap_work > 0));
 
 	return addr;
@@ -821,7 +902,8 @@ static void mmap_flush(struct address_sp
 static unsigned long unmap_page_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				long *zap_work, struct zap_details *details,
+				unsigned long *ub_unused_fix)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -839,7 +921,7 @@ static unsigned long unmap_page_range(st
 			continue;
 		}
 		next = zap_pud_range(tlb, vma, pgd, addr, next,
-						zap_work, details);
+						zap_work, details, ub_unused_fix);
 	} while (pgd++, addr = next, (addr != end && *zap_work > 0));
 	tlb_end_vma(tlb, vma);
 
@@ -894,6 +976,7 @@ unsigned long unmap_vmas(struct mmu_gath
 	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
 	int fullmm = (*tlbp)->fullmm;
 	struct mm_struct *mm = vma ? vma->vm_mm : NULL;
+	unsigned long ub_unused_fix = 0;
 
 	if (mm)
 		mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -923,7 +1006,8 @@ unsigned long unmap_vmas(struct mmu_gath
 				start = end;
 			} else
 				start = unmap_page_range(*tlbp, vma,
-						start, end, &zap_work, details);
+						start, end, &zap_work, details,
+						&ub_unused_fix);
 
 			if (zap_work > 0) {
 				BUG_ON(start != end);
@@ -935,6 +1019,8 @@ unsigned long unmap_vmas(struct mmu_gath
 			if (need_resched() ||
 				(i_mmap_lock && need_lockbreak(i_mmap_lock))) {
 				if (i_mmap_lock) {
+					__ub_unused_privvm_inc((*tlbp)->mm,
+							ub_unused_fix);
 					*tlbp = NULL;
 					goto out;
 				}
@@ -946,6 +1032,7 @@ unsigned long unmap_vmas(struct mmu_gath
 			zap_work = ZAP_BLOCK_SIZE;
 		}
 	}
+	__ub_unused_privvm_inc((*tlbp)->mm, ub_unused_fix);
 out:
 	if (mm)
 		mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
@@ -995,6 +1082,74 @@ static inline bool can_follow_write_pte(
 	return false;
 }
 
+struct page *
+follow_page_pte(struct mm_struct *mm, unsigned long address, int write,
+		pte_t *page_pte, spinlock_t **ptlp)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	unsigned long pfn;
+	struct page *page;
+
+	memset(page_pte, 0, sizeof(*page_pte));
+	page = follow_huge_addr(mm, address, write);
+	if (!IS_ERR(page))
+		return page;
+
+	pgd = pgd_offset(mm, address);
+	if (pgd_none(*pgd) || pgd_bad(*pgd))
+		goto out;
+
+	pud = pud_offset(pgd, address);
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd))
+		goto out;
+	if (pmd_huge(*pmd))
+		return follow_huge_pmd(mm, address, pmd, write);
+	if (pmd_bad(*pmd))
+		goto out;
+
+	/*
+	 * Unlike get_user_pages() we do not hold mmap sem,
+	 * so we need ptl lock to guard us against the race
+	 * when another thread doing unmap/remap the page and reuse it,
+	 * while we are still doing some operation on it.
+	 * i.e. the lock *MUST* be held until we are finished
+	 * accessing the page
+	 */
+	ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
+	BUG_ON(!ptep);
+
+	pte = *ptep;
+	pte_unmap(ptep);
+	if (pte_present(pte) && pte_read(pte)) { /* handle PROT_NONE too */
+		if (write && !pte_write(pte))
+			goto out2;
+		if (write && !pte_dirty(pte)) {
+			struct page *page = pte_page(pte);
+			if (!PageDirty(page))
+				set_page_dirty(page);
+		}
+		pfn = pte_pfn(pte);
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+
+			mark_page_accessed(page);
+			return page;
+		} else {
+			*page_pte = pte;
+			return NULL;
+		}
+	}
+out2:
+	spin_unlock(*ptlp);
+out:
+	return NULL;
+}
+
+
 /*
  * Do a quick page-table lookup for a single page.
  */
@@ -1072,6 +1227,17 @@ no_page:
 		return page;
 	/* Fall through to ZERO_PAGE handling */
 no_page_table:
+#ifndef CONFIG_X86_4G
+	/*
+	 *
+	 * disable this for 4:4 - it prevents
+	 * follow_page() from ever seeing these pages.
+	 *
+	 * (The 'fix' is dubious anyway, there's
+	 * nothing that this code avoids which couldnt
+	 * be triggered from userspace anyway.)
+	 */
+
 	/*
 	 * When core dumping an enormous anonymous area that nobody
 	 * has touched so far, we don't want to allocate page tables.
@@ -1082,6 +1248,7 @@ no_page_table:
 			get_page(page);
 		BUG_ON(flags & FOLL_WRITE);
 	}
+#endif
 	return page;
 }
 
@@ -1194,7 +1361,10 @@ int get_user_pages(struct task_struct *t
 			continue;
 		}
 
-		foll_flags = FOLL_TOUCH;
+		if (write < 0)
+			foll_flags = 0;
+		else
+			foll_flags = FOLL_TOUCH;
 		if (pages)
 			foll_flags |= FOLL_GET;
 		if (!write && use_zero_page(vma))
@@ -1333,6 +1503,8 @@ int zeromap_page_range(struct vm_area_st
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
+
+	ub_unused_privvm_sub(mm, vma, size >> PAGE_SHIFT);
 	return err;
 }
 
@@ -1462,7 +1634,6 @@ int vm_insert_pfn(struct vm_area_struct 
 	retval = 0;
 out_unlock:
 	pte_unmap_unlock(pte, ptl);
-
 out:
 	return retval;
 }
@@ -1773,6 +1944,7 @@ static int do_wp_page(struct mm_struct *
 	int reuse = 0, ret = VM_FAULT_MINOR;
 	struct page *dirty_page = NULL;
 	int dirty_pte = 0;
+	struct page_beancounter *pbc;
 
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page)
@@ -1797,6 +1969,11 @@ static int do_wp_page(struct mm_struct *
 			page_cache_release(old_page);
 		}
 		reuse = can_share_swap_page(old_page);
+		if (reuse && PageSwapCache(old_page) &&
+				!PageWriteback(old_page)) {
+			delete_from_swap_cache(old_page);
+			SetPageDirty(old_page);
+		}
 		unlock_page(old_page);
 	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 					(VM_WRITE|VM_SHARED))) {
@@ -1849,6 +2026,7 @@ static int do_wp_page(struct mm_struct *
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = pte_mkyoung(orig_pte);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		ClearPageCheckpointed(old_page);
 		dirty_pte++;
 		ptep_set_access_flags(vma, address, page_table, entry, 1);
 		update_mmu_cache(vma, address, entry);
@@ -1864,6 +2042,9 @@ static int do_wp_page(struct mm_struct *
 gotten:
 	pte_unmap_unlock(page_table, ptl);
 
+	if (unlikely(pb_alloc(&pbc)))
+		goto oom_nopb;
+
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
 	if (old_page == ZERO_PAGE(address)) {
@@ -1889,6 +2070,7 @@ gotten:
 				trace_mm_filemap_cow(mm, address, new_page);
 			}
 		} else {
+			ub_unused_privvm_dec(mm, vma);
 			inc_mm_counter(mm, anon_rss);
 			trace_mm_anon_cow(mm, address, new_page);
 		}
@@ -1908,6 +2090,7 @@ gotten:
 		update_mmu_cache(vma, address, entry);
 		lru_cache_add_active(new_page);
 		page_add_new_anon_rmap(new_page, vma, address);
+		pb_add_ref(new_page, mm, &pbc);
 		if (old_page) {
 			/*
 			 * Only after switching the pte to the new page may
@@ -1932,6 +2115,7 @@ gotten:
 			 * old page will be flushed before it can be reused.
 			 */
 			page_remove_rmap(old_page);
+			pb_remove_ref(old_page, mm);
 		}
 
 		/* Free the old page.. */
@@ -1942,9 +2126,12 @@ gotten:
 		page_cache_release(new_page);
 	if (old_page)
 		page_cache_release(old_page);
+	pb_free(&pbc);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	if (dirty_page) {
+		if (vma->vm_file)
+			inode_update_time(vma->vm_file->f_mapping->host);
 		if (flush_mmap_pages || !dirty_pte)
 			set_page_dirty_balance(dirty_page);
 		put_page(dirty_page);
@@ -1954,6 +2141,8 @@ unlock:
 	}
 	return ret;
 oom:
+	pb_free(&pbc);
+oom_nopb:
 	if (old_page)
 		page_cache_release(old_page);
 	return VM_FAULT_OOM;
@@ -2312,10 +2501,17 @@ static int do_swap_page(struct mm_struct
 	swp_entry_t entry;
 	pte_t pte;
 	int ret = VM_FAULT_MINOR;
+	struct page_beancounter *pbc;
+	cycles_t start;
+	int drop_swap = 0;
 
 	if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
-		goto out;
+		goto out_nostat;
 
+	if (unlikely(pb_alloc(&pbc)))
+		return VM_FAULT_OOM;
+
+	start = get_cycles();
 	entry = pte_to_swp_entry(orig_pte);
 	if (is_migration_entry(entry)) {
 		migration_entry_wait(mm, pmd, address);
@@ -2363,19 +2559,26 @@ static int do_swap_page(struct mm_struct
 	/* The page isn't present yet, go ahead with the fault. */
 
 	inc_mm_counter(mm, anon_rss);
+	ub_percpu_inc(mm->mm_ub, swapin);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if (write_access && can_share_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 		write_access = 0;
+		drop_swap = 1;
 	}
 
 	flush_icache_page(vma, page);
 	set_pte_at(mm, address, page_table, pte);
 	page_add_anon_rmap(page, vma, address);
+	pb_add_ref(page, mm, &pbc);
+	ub_unused_privvm_dec(mm, vma);
 
 	swap_free(entry);
-	if (vm_swap_full())
-		remove_exclusive_swap_page(page);
+	if (drop_swap && !PageWriteback(page)) {
+		delete_from_swap_cache(page);
+		SetPageDirty(page);
+	} else
+		try_to_remove_exclusive_swap_page(page);
 	unlock_page(page);
 
 	if (write_access) {
@@ -2391,10 +2594,16 @@ static int do_swap_page(struct mm_struct
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 out:
+	pb_free(&pbc);
+	spin_lock_irq(&kstat_glb_lock);
+	KSTAT_LAT_ADD(&kstat_glob.swap_in, get_cycles() - start);
+	spin_unlock_irq(&kstat_glb_lock);
+out_nostat:
 	trace_mm_anon_pgin(mm, address, page);
 	return ret;
 out_nomap:
 	pte_unmap_unlock(page_table, ptl);
+	pb_free(&pbc);
 	unlock_page(page);
 	page_cache_release(page);
 	return ret;
@@ -2495,6 +2704,7 @@ static int do_anonymous_page(struct mm_s
 	struct page *page;
 	spinlock_t *ptl;
 	pte_t entry;
+	struct page_beancounter *pbc;
 
 	pte_unmap(page_table);
 
@@ -2505,6 +2715,9 @@ static int do_anonymous_page(struct mm_s
 	/* Use the zero-page for reads */
 	if (write_access) {
 		/* Allocate our own private page. */
+		if (unlikely(pb_alloc(&pbc)))
+			goto oom_nopb;
+
 		if (unlikely(anon_vma_prepare(vma)))
 			goto oom;
 		page = alloc_zeroed_user_highpage(vma, address);
@@ -2520,7 +2733,10 @@ static int do_anonymous_page(struct mm_s
 		inc_mm_counter(mm, anon_rss);
 		lru_cache_add_active(page);
 		page_add_new_anon_rmap(page, vma, address);
+		pb_add_ref(page, mm, &pbc);
 	} else {
+		pbc = NULL;
+
 		/* Map the ZERO_PAGE - vm_page_prot is readonly */
 		page = ZERO_PAGE(address);
 		page_cache_get(page);
@@ -2533,6 +2749,7 @@ static int do_anonymous_page(struct mm_s
 		page_add_file_rmap(page);
 	}
 
+	ub_unused_privvm_dec(mm, vma);
 	set_pte_at(mm, address, page_table, entry);
 
 	/* No need to invalidate - it was non-present before */
@@ -2540,12 +2757,15 @@ static int do_anonymous_page(struct mm_s
 	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
+	pb_free(&pbc);
 	trace_mm_anon_fault(mm, address, page);
 	return VM_FAULT_MINOR;
 release:
 	page_cache_release(page);
 	goto unlock;
 oom:
+	pb_free(&pbc);
+oom_nopb:
 	return VM_FAULT_OOM;
 }
 
@@ -2575,6 +2795,7 @@ static int do_no_page(struct mm_struct *
 	int anon = 0;
 	struct page *dirty_page = NULL;
 	int dirty_pte = 0;
+	struct page_beancounter *pbc;
 
 	pte_unmap(page_table);
 	BUG_ON(vma->vm_flags & VM_PFNMAP);
@@ -2584,6 +2805,9 @@ static int do_no_page(struct mm_struct *
 		sequence = mapping->truncate_count;
 		smp_rmb(); /* serializes i_size against truncate_count */
 	}
+
+	if (unlikely(pb_alloc(&pbc)))
+		goto oom_nopb;
 retry:
 	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
 	/*
@@ -2596,9 +2820,11 @@ retry:
 
 	/* no page was available -- either SIGBUS or OOM */
 	if (new_page == NOPAGE_SIGBUS)
-		return VM_FAULT_SIGBUS;
+		goto bus_nopg;
 	if (new_page == NOPAGE_OOM)
-		return VM_FAULT_OOM;
+		goto oom_nopg;
+	if (new_page == NOPAGE_RESTART)
+		goto restart_nopg;
 
 	/*
 	 * Should we do an early C-O-W break?
@@ -2662,10 +2888,12 @@ retry:
 	 */
 	/* Only go through if we didn't race with anybody else... */
 	if (pte_none(*page_table)) {
+		struct user_beancounter *ub;
 		flush_icache_page(vma, new_page);
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access) {
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+			ClearPageCheckpointed(new_page);
 			dirty_pte++;
 		}
 		lazy_mmu_prot_update(entry);
@@ -2684,6 +2912,18 @@ retry:
 				get_page(dirty_page);
 			}
 		}
+		ub = page_ub(new_page);
+		if (ub != NULL && ub->ub_magic == UB_MAGIC) {
+		/* WOW: Page was already charged as page_ub. This may happens
+		 * for example then some driver export its low memory pages
+		 * to user space. We can't account page as page_ub and page_bp
+		 * at the same time. So uncharge page from UB counter.
+		 */
+			WARN_ON_ONCE(1);
+			ub_page_uncharge(new_page, 0);
+		}
+		pb_add_ref(new_page, mm, &pbc);
+		ub_unused_privvm_dec(mm, vma);
 	} else {
 		/* One of our sibling threads was faster, back out. */
 		unlock_page(new_page);
@@ -2696,6 +2936,8 @@ retry:
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	if (dirty_page) {
+		if (vma->vm_file)
+			inode_update_time(vma->vm_file->f_mapping->host);
 		if (flush_mmap_pages || !dirty_pte)
 			set_page_dirty_balance(dirty_page);
 		put_page(dirty_page);
@@ -2703,10 +2945,21 @@ unlock:
 		if (vma->vm_file)
 			file_update_time(vma->vm_file);
 	}
+	pb_free(&pbc);
 	return ret;
 oom:
 	page_cache_release(new_page);
+oom_nopg:
+	pb_free(&pbc);
+oom_nopb:
 	return VM_FAULT_OOM;
+
+bus_nopg:
+	pb_free(&pbc);
+	return VM_FAULT_SIGBUS;
+
+restart_nopg:
+	return VM_FAULT_MINOR;
 }
 
 /*
@@ -2884,6 +3137,27 @@ int __handle_mm_fault(struct mm_struct *
 	pmd_t *pmd;
 	pte_t *pte;
 
+#ifdef CONFIG_VZ_GENCALLS
+	do {
+		int ret;
+#ifdef CONFIG_USER_RESOURCE
+		struct task_beancounter *tbc;
+
+		tbc = &current->task_bc;
+		if (!test_bit(UB_AFLAG_NOTIF_PAGEIN, &mm->mm_ub->ub_aflags) &&
+				tbc->pgfault_allot) {
+			tbc->pgfault_allot--;
+			break; /* skip notifier */
+		}
+#endif
+		ret = virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_PAGEIN,
+				(void *)1);
+		if (ret & NOTIFY_FAIL)
+			return VM_FAULT_SIGBUS;
+		if (ret & NOTIFY_OK)
+			return VM_FAULT_MINOR; /* retry */
+	} while (0);
+#endif
 	__set_current_state(TASK_RUNNING);
 
 	count_vm_event(PGFAULT);
@@ -2921,7 +3195,11 @@ int __pud_alloc(struct mm_struct *mm, pg
 	spin_lock(&mm->page_table_lock);
 	if (pgd_present(*pgd))		/* Another has populated it */
 		pud_free(new);
-	else
+	else if (ub_page_table_charge(mm)) {
+		spin_unlock(&mm->page_table_lock);
+		pud_free(new);
+		return -ENOMEM;
+	} else
 		pgd_populate(mm, pgd, new);
 	spin_unlock(&mm->page_table_lock);
 	return 0;
@@ -2934,6 +3212,8 @@ int __pud_alloc(struct mm_struct *mm, pg
 }
 #endif /* __PAGETABLE_PUD_FOLDED */
 
+EXPORT_SYMBOL_GPL(__pud_alloc);
+
 #ifndef __PAGETABLE_PMD_FOLDED
 /*
  * Allocate page middle directory.
@@ -2949,12 +3229,20 @@ int __pmd_alloc(struct mm_struct *mm, pu
 #ifndef __ARCH_HAS_4LEVEL_HACK
 	if (pud_present(*pud))		/* Another has populated it */
 		pmd_free(new);
-	else
+	else if (ub_page_table_charge(mm)) {
+		spin_unlock(&mm->page_table_lock);
+		pmd_free(new);
+		return -ENOMEM;
+	} else
 		pud_populate(mm, pud, new);
 #else
 	if (pgd_present(*pud))		/* Another has populated it */
 		pmd_free(new);
-	else
+	else if (ub_page_table_charge(mm)) {
+		spin_unlock(&mm->page_table_lock);
+		pmd_free(new);
+		return -ENOMEM;
+	} else
 		pgd_populate(mm, pud, new);
 #endif /* __ARCH_HAS_4LEVEL_HACK */
 	spin_unlock(&mm->page_table_lock);
@@ -2968,6 +3256,8 @@ int __pmd_alloc(struct mm_struct *mm, pu
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
+EXPORT_SYMBOL_GPL(__pmd_alloc);
+
 int make_pages_present(unsigned long addr, unsigned long end)
 {
 	int ret, len, write;
@@ -2987,6 +3277,8 @@ int make_pages_present(unsigned long add
 	return ret == len ? 0 : -1;
 }
 
+EXPORT_SYMBOL(make_pages_present);
+
 /* 
  * Map a vmalloc()-space virtual address to the physical page.
  */
diff -upr kernel-2.6.18-417.el5.orig/mm/mempolicy.c kernel-2.6.18-417.el5-028stab121/mm/mempolicy.c
--- kernel-2.6.18-417.el5.orig/mm/mempolicy.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/mempolicy.c	2017-01-13 08:40:19.000000000 -0500
@@ -921,7 +921,7 @@ asmlinkage long sys_migrate_pages(pid_t 
 
 	/* Find the mm_struct */
 	read_lock(&tasklist_lock);
-	task = pid ? find_task_by_pid(pid) : current;
+	task = pid ? find_task_by_pid_ve(pid) : current;
 	if (!task) {
 		read_unlock(&tasklist_lock);
 		return -ESRCH;
diff -upr kernel-2.6.18-417.el5.orig/mm/mempool.c kernel-2.6.18-417.el5-028stab121/mm/mempool.c
--- kernel-2.6.18-417.el5.orig/mm/mempool.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/mempool.c	2017-01-13 08:40:16.000000000 -0500
@@ -14,6 +14,7 @@
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
+#include <linux/kmem_cache.h>
 
 static void add_element(mempool_t *pool, void *element)
 {
@@ -78,6 +79,8 @@ mempool_t *mempool_create_node(int min_n
 	init_waitqueue_head(&pool->wait);
 	pool->alloc = alloc_fn;
 	pool->free = free_fn;
+	if (alloc_fn == mempool_alloc_slab)
+		kmem_mark_nocharge((kmem_cache_t *)pool_data);
 
 	/*
 	 * First pre-allocate the guaranteed number of buffers.
@@ -119,6 +122,7 @@ int mempool_resize(mempool_t *pool, int 
 	unsigned long flags;
 
 	BUG_ON(new_min_nr <= 0);
+	gfp_mask &= ~__GFP_UBC;
 
 	spin_lock_irqsave(&pool->lock, flags);
 	if (new_min_nr <= pool->min_nr) {
@@ -212,6 +216,7 @@ void * mempool_alloc(mempool_t *pool, gf
 	gfp_mask |= __GFP_NOMEMALLOC;	/* don't allocate emergency reserves */
 	gfp_mask |= __GFP_NORETRY;	/* don't loop in __alloc_pages */
 	gfp_mask |= __GFP_NOWARN;	/* failures are OK */
+	gfp_mask &= ~__GFP_UBC;
 
 	gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO);
 
diff -upr kernel-2.6.18-417.el5.orig/mm/migrate.c kernel-2.6.18-417.el5-028stab121/mm/migrate.c
--- kernel-2.6.18-417.el5.orig/mm/migrate.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/migrate.c	2017-01-13 08:40:20.000000000 -0500
@@ -890,7 +890,7 @@ asmlinkage long sys_move_pages(pid_t pid
 
 	/* Find the mm_struct */
 	read_lock(&tasklist_lock);
-	task = pid ? find_task_by_pid(pid) : current;
+	task = pid ? find_task_by_pid_ve(pid) : current;
 	if (!task) {
 		read_unlock(&tasklist_lock);
 		return -ESRCH;
diff -upr kernel-2.6.18-417.el5.orig/mm/mlock.c kernel-2.6.18-417.el5-028stab121/mm/mlock.c
--- kernel-2.6.18-417.el5.orig/mm/mlock.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/mlock.c	2017-01-13 08:40:24.000000000 -0500
@@ -8,9 +8,11 @@
 #include <linux/capability.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/mempolicy.h>
 #include <linux/syscalls.h>
 
+#include <ub/ub_vmpages.h>
 
 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	unsigned long start, unsigned long end, unsigned int newflags)
@@ -25,6 +27,14 @@ static int mlock_fixup(struct vm_area_st
 		goto out;
 	}
 
+	if (newflags & VM_LOCKED) {
+		ret = ub_locked_charge(mm, end - start);
+		if (ret < 0) {
+			*prev = vma;
+			goto out;
+		}
+	}
+
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
 			  vma->vm_file, pgoff, vma_policy(vma));
@@ -38,13 +48,13 @@ static int mlock_fixup(struct vm_area_st
 	if (start != vma->vm_start) {
 		ret = split_vma(mm, vma, start, 1);
 		if (ret)
-			goto out;
+			goto out_uncharge;
 	}
 
 	if (end != vma->vm_end) {
 		ret = split_vma(mm, vma, end, 0);
 		if (ret)
-			goto out;
+			goto out_uncharge;
 	}
 
 success:
@@ -63,13 +73,19 @@ success:
 		pages = -pages;
 		if (!(newflags & VM_IO))
 			ret = make_pages_present(start, end);
-	}
+	} else
+		ub_locked_uncharge(mm, end - start);
 
 	vma->vm_mm->locked_vm -= pages;
 out:
 	if (ret == -ENOMEM)
 		ret = -EAGAIN;
 	return ret;
+
+out_uncharge:
+	if (newflags & VM_LOCKED)
+		ub_locked_uncharge(mm, end - start);
+	goto out;
 }
 
 static int do_mlock(unsigned long start, size_t len, int on)
@@ -146,6 +162,7 @@ asmlinkage long sys_mlock(unsigned long 
 	up_write(&current->mm->mmap_sem);
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_mlock);
 
 asmlinkage long sys_munlock(unsigned long start, size_t len)
 {
@@ -158,6 +175,7 @@ asmlinkage long sys_munlock(unsigned lon
 	up_write(&current->mm->mmap_sem);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(sys_munlock);
 
 static int do_mlockall(int flags)
 {
diff -upr kernel-2.6.18-417.el5.orig/mm/mmap.c kernel-2.6.18-417.el5-028stab121/mm/mmap.c
--- kernel-2.6.18-417.el5.orig/mm/mmap.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/mmap.c	2017-01-13 08:40:41.000000000 -0500
@@ -27,6 +27,8 @@
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
 #include <linux/random.h>
+#include <linux/virtinfo.h>
+#include <linux/grsecurity.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -36,9 +38,12 @@
 #define arch_mmap_check(addr, len, flags)	(0)
 #endif
 
+#include <ub/ub_vmpages.h>
+
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end);
+static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft);
 
 /*
  * WARNING: the debugging will use recursive algorithms so never enable this
@@ -100,6 +105,18 @@ int __vm_enough_memory(struct mm_struct 
 
 	vm_acct_memory(pages);
 
+#ifdef CONFIG_USER_RESOURCE
+	switch (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_ENOUGHMEM,
+				(void *)pages)
+			& (NOTIFY_OK | NOTIFY_FAIL)) {
+		case NOTIFY_OK:
+			return 0;
+		case NOTIFY_FAIL:
+			vm_unacct_memory(pages);
+			return -ENOMEM;
+	}
+#endif
+
 	/*
 	 * Sometimes we want to use more memory than we have
 	 */
@@ -218,6 +235,7 @@ void unlink_file_vma(struct vm_area_stru
 		spin_unlock(&mapping->i_mmap_lock);
 	}
 }
+EXPORT_SYMBOL_GPL(unlink_file_vma);
 
 /*
  * Close a vm structure and free it, returning the next.
@@ -227,12 +245,15 @@ static struct vm_area_struct *remove_vma
 	struct vm_area_struct *next = vma->vm_next;
 
 	might_sleep();
+
+	ub_memory_uncharge(vma->vm_mm, vma->vm_end - vma->vm_start,
+			vma->vm_flags, vma->vm_file);
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	mpol_free(vma_policy(vma));
-	kmem_cache_free(vm_area_cachep, vma);
+	free_vma(vma->vm_mm, vma);
 	return next;
 }
 
@@ -275,7 +296,7 @@ asmlinkage unsigned long sys_brk(unsigne
 		goto out;
 
 	/* Ok, looks good - let it rip. */
-	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+	if (__do_brk(oldbrk, newbrk-oldbrk, UB_HARD) != oldbrk)
 		goto out;
 set_brk:
 	mm->brk = brk;
@@ -396,7 +417,7 @@ void __vma_link_rb(struct mm_struct *mm,
 	rb_insert_color(&vma->vm_rb, &mm->mm_rb);
 }
 
-static inline void __vma_link_file(struct vm_area_struct *vma)
+void __vma_link_file(struct vm_area_struct *vma)
 {
 	struct file * file;
 
@@ -417,6 +438,7 @@ static inline void __vma_link_file(struc
 		flush_dcache_mmap_unlock(mapping);
 	}
 }
+EXPORT_SYMBOL_GPL(__vma_link_file);
 
 static void
 __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -628,7 +650,7 @@ again:			remove_next = 1 + (end > next->
 			fput(file);
 		mm->map_count--;
 		mpol_free(vma_policy(next));
-		kmem_cache_free(vm_area_cachep, next);
+		free_vma(mm, next);
 		/*
 		 * In mprotect's case 6 (see comments on vma_merge),
 		 * we must remove another next too. It would clutter
@@ -909,6 +931,7 @@ unsigned long do_mmap_pgoff(struct file 
 	struct rb_node ** rb_link, * rb_parent;
 	int accountable = 1;
 	unsigned long charged = 0, reqprot = prot;
+	unsigned long ub_charged = 0;
 
 	if (file) {
 		if (is_file_hugepages(file))
@@ -932,7 +955,7 @@ unsigned long do_mmap_pgoff(struct file 
 			prot |= PROT_EXEC;
 
 	if (!len)
-		return -EINVAL;
+		return strncmp(current->comm, "rpm", 3) ? -EINVAL : addr;
 
 	if (!(flags & MAP_FIXED))
 		addr = round_hint_to_min(addr);
@@ -950,6 +973,12 @@ unsigned long do_mmap_pgoff(struct file 
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
+	if (file && (prot & PROT_EXEC)) {
+		error = check_area_execute_ve(file->f_dentry, file->f_vfsmnt);
+		if (error)
+			return error;
+	}
+
 	/* Obtain the address to map to. we verify (or select) it and ensure
 	 * that it represents a valid section of the address space.
 	 */
@@ -1034,6 +1063,9 @@ unsigned long do_mmap_pgoff(struct file 
 	if (error)
 		return error;
 
+	if (!gr_acl_handle_mmap(file, prot))
+		return -EACCES;
+
 	/* Clear old maps */
 	error = -ENOMEM;
 munmap_back:
@@ -1064,6 +1096,11 @@ munmap_back:
 		}
 	}
 
+	if (ub_memory_charge(mm, len, vm_flags, file,
+				(flags & MAP_EXECPRIO ? UB_SOFT : UB_HARD)))
+		goto charge_error;
+	ub_charged = 1;
+
 	/*
 	 * Can we just expand an old private anonymous mapping?
 	 * The VM_SHARED test is necessary because shmem_zero_setup
@@ -1079,11 +1116,13 @@ munmap_back:
 	 * specific mapper. the address has already been validated, but
 	 * not unmapped, but the maps are removed from the list.
 	 */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = allocate_vma(mm, GFP_KERNEL |
+			(flags & MAP_EXECPRIO ? __GFP_SOFT_UBC : 0));
 	if (!vma) {
 		error = -ENOMEM;
 		goto unacct_error;
 	}
+	memset(vma, 0, sizeof(*vma));
 
 	vma->vm_mm = mm;
 	vma->vm_start = addr;
@@ -1108,6 +1147,19 @@ munmap_back:
 		error = file->f_op->mmap(file, vma);
 		if (error)
 			goto unmap_and_free_vma;
+		if (vm_flags != vma->vm_flags) {
+		/* 
+		 * ->vm_flags has been changed in f_op->mmap method.
+		 * We have to recharge ub memory.
+		 */
+			ub_memory_uncharge(mm, len, vm_flags, file);
+			if (ub_memory_charge(mm, len, vma->vm_flags, file,
+				(flags & MAP_EXECPRIO ? UB_SOFT : UB_HARD))) {
+				ub_charged = 0;
+				error = -ENOMEM;
+				goto unmap_and_free_vma;
+			}
+		}
 	} else if (vm_flags & VM_SHARED) {
 		error = shmem_zero_setup(vma);
 		if (error)
@@ -1148,7 +1200,7 @@ munmap_back:
 			fput(file);
 		}
 		mpol_free(vma_policy(vma));
-		kmem_cache_free(vm_area_cachep, vma);
+		free_vma(mm, vma);
 	}
 out:	
 	mm->total_vm += len >> PAGE_SHIFT;
@@ -1175,8 +1227,11 @@ unmap_and_free_vma:
 	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
 	charged = 0;
 free_vma:
-	kmem_cache_free(vm_area_cachep, vma);
+	free_vma(mm, vma);
 unacct_error:
+	if (ub_charged)
+		ub_memory_uncharge(mm, len, vm_flags, file);
+charge_error:
 	if (charged)
 		vm_unacct_memory(charged);
 	return error;
@@ -1625,12 +1680,16 @@ static int acct_stack_growth(struct vm_a
 			vma->vm_end - size;
 	if (is_hugepage_only_range(vma->vm_mm, new_start, size))
 		return -EFAULT;
+	if (ub_memory_charge(mm, grow << PAGE_SHIFT, vma->vm_flags,
+				vma->vm_file, UB_SOFT))
+		goto fail_charge;
+
 	/*
 	 * Overcommit..  This must be the final test, as it will
 	 * update security statistics.
 	 */
 	if (security_vm_enough_memory_mm(mm, grow))
-		return -ENOMEM;
+		goto fail_sec;
 
 	/* Ok, everything looks good - let it rip */
 	mm->total_vm += grow;
@@ -1638,6 +1697,11 @@ static int acct_stack_growth(struct vm_a
 		mm->locked_vm += grow;
 	vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
 	return 0;
+
+fail_sec:
+	ub_memory_uncharge(mm, grow << PAGE_SHIFT, vma->vm_flags, vma->vm_file);
+fail_charge:
+	return -ENOMEM;
 }
 
 #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
@@ -1878,7 +1942,7 @@ static int __split_vma(struct mm_struct 
 	if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
 		return -EINVAL;
 
-	new = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	new = allocate_vma(mm, SLAB_KERNEL);
 	if (!new)
 		return -ENOMEM;
 
@@ -1894,7 +1958,7 @@ static int __split_vma(struct mm_struct 
 
 	pol = mpol_copy(vma_policy(vma));
 	if (IS_ERR(pol)) {
-		kmem_cache_free(vm_area_cachep, new);
+		free_vma(mm, new);
 		return PTR_ERR(pol);
 	}
 	vma_set_policy(new, pol);
@@ -1930,6 +1994,7 @@ int split_vma(struct mm_struct *mm, stru
 
 	return __split_vma(mm, vma, addr, new_below);
 }
+EXPORT_SYMBOL_GPL(split_vma);
 
 /* Munmap is split into 2 main parts -- this part which finds
  * what needs doing, and the areas themselves, which do the
@@ -2033,7 +2098,7 @@ static inline void verify_mm_writelocked
  *  anonymous maps.  eventually we may be able to do some
  *  brk-specific accounting here.
  */
-unsigned long do_brk(unsigned long addr, unsigned long len)
+static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft)
 {
 	struct mm_struct * mm = current->mm;
 	struct vm_area_struct * vma, * prev;
@@ -2093,8 +2158,11 @@ unsigned long do_brk(unsigned long addr,
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
+	if (ub_memory_charge(mm, len, flags, NULL, soft))
+		goto fail_charge;
+
 	if (security_vm_enough_memory(len >> PAGE_SHIFT))
-		return -ENOMEM;
+		goto fail_sec;
 
 	/* Can we just expand an old private anonymous mapping? */
 	if (vma_merge(mm, prev, addr, addr + len, flags,
@@ -2104,11 +2172,11 @@ unsigned long do_brk(unsigned long addr,
 	/*
 	 * create a vma struct for an anonymous mapping
 	 */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
-	if (!vma) {
-		vm_unacct_memory(len >> PAGE_SHIFT);
-		return -ENOMEM;
-	}
+	vma = allocate_vma(mm, GFP_KERNEL |
+			(soft == UB_SOFT ? __GFP_SOFT_UBC : 0));
+	if (!vma)
+		goto fail_alloc;
+	memset(vma, 0, sizeof(*vma));
 
 	vma->vm_mm = mm;
 	vma->vm_start = addr;
@@ -2125,8 +2193,19 @@ out:
 		make_pages_present(addr, addr + len);
 	}
 	return addr;
+
+fail_alloc:
+	vm_unacct_memory(len >> PAGE_SHIFT);
+fail_sec:
+	ub_memory_uncharge(mm, len, flags, NULL);
+fail_charge:
+	return -ENOMEM;
 }
 
+unsigned long do_brk(unsigned long addr, unsigned long len)
+{
+	return __do_brk(addr, len, UB_SOFT);
+}
 EXPORT_SYMBOL(do_brk);
 
 /* Release all mmaps. */
@@ -2230,12 +2309,12 @@ struct vm_area_struct *copy_vma(struct v
 		    vma_start < new_vma->vm_end)
 			*vmap = new_vma;
 	} else {
-		new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+		new_vma = allocate_vma(mm, SLAB_KERNEL);
 		if (new_vma) {
 			*new_vma = *vma;
 			pol = mpol_copy(vma_policy(vma));
 			if (IS_ERR(pol)) {
-				kmem_cache_free(vm_area_cachep, new_vma);
+				free_vma(mm, new_vma);
 				return NULL;
 			}
 			vma_set_policy(new_vma, pol);
@@ -2293,10 +2372,11 @@ static void special_mapping_close(struct
 {
 }
 
-static struct vm_operations_struct special_mapping_vmops = {
+struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.nopage	= special_mapping_nopage,
 };
+EXPORT_SYMBOL(special_mapping_vmops);
 
 unsigned int vdso_populate = 0;
 
@@ -2314,7 +2394,7 @@ int install_special_mapping(struct mm_st
 	struct vm_area_struct *vma;
 	int err;
 
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = allocate_vma(mm, SLAB_KERNEL);
 	if (unlikely(vma == NULL))
 		return -ENOMEM;
 	memset(vma, 0, sizeof(*vma));
@@ -2357,7 +2437,7 @@ int install_special_mapping(struct mm_st
 	return err;
 
 out:
-	kmem_cache_free(vm_area_cachep, vma);
+	free_vma(mm, vma);
 	return err;
 }
 
diff -upr kernel-2.6.18-417.el5.orig/mm/mmzone.c kernel-2.6.18-417.el5-028stab121/mm/mmzone.c
--- kernel-2.6.18-417.el5.orig/mm/mmzone.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/mmzone.c	2017-01-13 08:40:20.000000000 -0500
@@ -14,7 +14,7 @@ struct pglist_data *first_online_pgdat(v
 	return NODE_DATA(first_online_node);
 }
 
-EXPORT_UNUSED_SYMBOL(first_online_pgdat);  /*  June 2006  */
+EXPORT_SYMBOL(first_online_pgdat);  /*  June 2006  */
 
 struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
 {
@@ -24,7 +24,7 @@ struct pglist_data *next_online_pgdat(st
 		return NULL;
 	return NODE_DATA(nid);
 }
-EXPORT_UNUSED_SYMBOL(next_online_pgdat);  /*  June 2006  */
+EXPORT_SYMBOL(next_online_pgdat);  /*  June 2006  */
 
 
 /*
diff -upr kernel-2.6.18-417.el5.orig/mm/mprotect.c kernel-2.6.18-417.el5-028stab121/mm/mprotect.c
--- kernel-2.6.18-417.el5.orig/mm/mprotect.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/mprotect.c	2017-01-13 08:40:41.000000000 -0500
@@ -9,6 +9,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
 #include <linux/shm.h>
@@ -21,6 +22,7 @@
 #include <linux/syscalls.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/grsecurity.h>
 #include <linux/mmu_notifier.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -28,6 +30,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
 		int dirty_accountable)
@@ -140,6 +144,8 @@ mprotect_fixup(struct vm_area_struct *vm
 	unsigned long charged = 0, old_end = vma->vm_end;
 	pgoff_t pgoff;
 	int error;
+	unsigned long ch_size;
+	int ch_dir;
 	int dirty_accountable = 0;
 
 	if (newflags == oldflags) {
@@ -147,6 +153,12 @@ mprotect_fixup(struct vm_area_struct *vm
 		return 0;
 	}
 
+	error = -ENOMEM;
+	ch_size = nrpages - pages_in_vma_range(vma, start, end);
+	ch_dir = ub_protected_charge(mm, ch_size, newflags, vma);
+	if (ch_dir == PRIVVM_ERROR)
+		goto fail_ch;
+
 	/*
 	 * If we make a private mapping writable we increase our commit;
 	 * but (without finer accounting) cannot reduce our commit if we
@@ -159,7 +171,7 @@ mprotect_fixup(struct vm_area_struct *vm
 		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) {
 			charged = nrpages;
 			if (security_vm_enough_memory(charged))
-				return -ENOMEM;
+				goto fail_sec;
 			newflags |= VM_ACCOUNT;
 		}
 	}
@@ -202,7 +214,9 @@ success:
 	if (vma_wants_writenotify(vma)) {
 		vma->vm_page_prot = protection_map[newflags &
 			(VM_READ|VM_WRITE|VM_EXEC)];
-		dirty_accountable = 1;
+		if (!vma->vm_file ||
+		    !test_bit(AS_CHECKPOINT, &vma->vm_file->f_mapping->flags))
+			dirty_accountable = 1;
 	}
 
 	mmu_notifier_invalidate_range_start(mm, start, end);
@@ -213,10 +227,16 @@ success:
 	mmu_notifier_invalidate_range_end(mm, start, end);
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
+	if (ch_dir == PRIVVM_TO_SHARED)
+		__ub_unused_privvm_dec(mm, ch_size);
 	return 0;
 
 fail:
 	vm_unacct_memory(charged);
+fail_sec:
+	if (ch_dir == PRIVVM_TO_PRIVATE)
+		__ub_unused_privvm_dec(mm, ch_size);
+fail_ch:
 	return error;
 }
 
@@ -278,6 +298,11 @@ sys_mprotect(unsigned long start, size_t
 	if (start > vma->vm_start)
 		prev = vma;
 
+	if (!gr_acl_handle_mprotect(vma->vm_file, prot)) {
+		error = -EACCES;
+		goto out;
+	}
+
 	for (nstart = start ; ; ) {
 		unsigned long newflags;
 
@@ -318,3 +343,4 @@ out:
 	up_write(&current->mm->mmap_sem);
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_mprotect);
diff -upr kernel-2.6.18-417.el5.orig/mm/mremap.c kernel-2.6.18-417.el5-028stab121/mm/mremap.c
--- kernel-2.6.18-417.el5.orig/mm/mremap.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/mremap.c	2017-01-13 08:40:17.000000000 -0500
@@ -24,6 +24,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
@@ -172,17 +174,21 @@ static unsigned long move_vma(struct vm_
 	unsigned long hiwater_vm;
 	int split = 0;
 
+	if (ub_memory_charge(mm, new_len, vm_flags,
+				vma->vm_file, UB_HARD))
+		goto err;
+
 	/*
 	 * We'd prefer to avoid failure later on in do_munmap:
 	 * which may split one vma into three before unmapping.
 	 */
 	if (mm->map_count >= sysctl_max_map_count - 3)
-		return -ENOMEM;
+		goto err_nomem;
 
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
 	if (!new_vma)
-		return -ENOMEM;
+		goto err_nomem;
 
 	moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
 	if (moved_len < old_len) {
@@ -241,7 +247,13 @@ static unsigned long move_vma(struct vm_
 					   new_addr + new_len);
 	}
 
-	return new_addr;
+	if (new_addr != -ENOMEM)
+		return new_addr;
+
+err_nomem:
+	ub_memory_uncharge(mm, new_len, vm_flags, vma->vm_file);
+err:
+	return -ENOMEM;
 }
 
 static struct vm_area_struct *vma_to_resize(unsigned long addr,
@@ -448,7 +460,15 @@ unsigned long do_mremap(unsigned long ad
 	if (old_len == vma->vm_end - addr) {
 		/* can we just expand the current mapping? */
 		if (vma_expandable(vma, new_len - old_len)) {
-			int pages = (new_len - old_len) >> PAGE_SHIFT;
+			unsigned long len;
+			int pages;
+
+			len = new_len - old_len;
+			pages = len >> PAGE_SHIFT;
+			ret = -ENOMEM;
+			if (ub_memory_charge(mm, len, vma->vm_flags,
+						vma->vm_file, UB_HARD))
+				goto out;
 
 			vma_adjust(vma, vma->vm_start,
 				addr + new_len, vma->vm_pgoff, NULL);
diff -upr kernel-2.6.18-417.el5.orig/mm/oom_kill.c kernel-2.6.18-417.el5-028stab121/mm/oom_kill.c
--- kernel-2.6.18-417.el5.orig/mm/oom_kill.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/oom_kill.c	2017-01-13 08:40:20.000000000 -0500
@@ -17,6 +17,8 @@
 
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/virtinfo.h>
 #include <linux/swap.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
@@ -24,6 +26,9 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_oom.h>
+
 int sysctl_panic_on_oom;
 /* #define DEBUG */
 
@@ -62,8 +67,10 @@ unsigned long badness(struct task_struct
 	/*
 	 * swapoff can easily use up all memory, so kill those first.
 	 */
-	if (p->flags & PF_SWAPOFF)
+	if (p->flags & PF_SWAPOFF) {
+		task_unlock(p);
 		return ULONG_MAX;
+	}
 
 	/*
 	 * The memory size of the process is the basis for the badness.
@@ -202,21 +209,23 @@ static inline int constrained_alloc(stru
  *
  * (not docbooked, we don't want this one cluttering up the manual)
  */
-static struct task_struct *select_bad_process(unsigned long *ppoints)
+struct task_struct *oom_select_bad_process(struct user_beancounter *ub)
 {
 	struct task_struct *g, *p;
 	struct task_struct *chosen = NULL;
 	struct timespec uptime;
-	*ppoints = 0;
+	unsigned long chosen_points = 0;
 
 	do_posix_clock_monotonic_gettime(&uptime);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		unsigned long points;
 		int releasing;
 
 		/* skip kernel threads */
 		if (!p->mm)
 			continue;
+		if (ub_oom_task_skip(ub, p))
+			continue;
 
 		/* skip the init task with pid == 1 */
 		if (p->pid == 1)
@@ -239,7 +248,7 @@ static struct task_struct *select_bad_pr
 				continue;
 			if (p->flags & PF_EXITING && p == current) {
 				chosen = p;
-				*ppoints = ULONG_MAX;
+				chosen_points = ULONG_MAX;
 				break;
 			}
 			return ERR_PTR(-1UL);
@@ -248,11 +257,11 @@ static struct task_struct *select_bad_pr
 			continue;
 
 		points = badness(p, uptime.tv_sec);
-		if (points > *ppoints || !chosen) {
+		if (points > chosen_points || !chosen) {
 			chosen = p;
-			*ppoints = points;
+			chosen_points = points;
 		}
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	return chosen;
 }
 
@@ -269,6 +278,10 @@ static void __oom_kill_task(struct task_
 		return;
 	}
 
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_OOMKILL, p)
+			& NOTIFY_FAIL)
+		return 1;
+
 	task_lock(p);
 	if (!p->mm || p->mm == &init_mm) {
 		WARN_ON(1);
@@ -290,13 +303,16 @@ static void __oom_kill_task(struct task_
 	set_tsk_thread_flag(p, TIF_MEMDIE);
 
 	force_sig(SIGKILL, p);
+	ub_oom_task_killed(p);
 }
 
 static int oom_kill_task(struct task_struct *p, const char *message)
 {
 	struct mm_struct *mm;
+	struct user_beancounter *ub;
 	struct task_struct *g, *q;
 
+	task_lock(p);
 	mm = p->mm;
 
 	/* WARNING: mm may not be dereferenced since we did not obtain its
@@ -308,32 +324,38 @@ static int oom_kill_task(struct task_str
 	 * However, this is of no concern to us.
 	 */
 
-	if (mm == NULL || mm == &init_mm)
+	if (mm == NULL || mm == &init_mm) {
+		task_unlock(p);
 		return 1;
+	}
+
+	ub = get_beancounter(mm_ub(mm));
+	task_unlock(p);
 
 	/*
 	 * Don't kill the process if any threads are set to OOM_DISABLE
 	 */
-	do_each_thread(g, q) {
+	do_each_thread_all(g, q) {
 		if (q->mm == mm && p->oomkilladj == OOM_DISABLE)
 			return 1;
-	} while_each_thread(g, q);
+	} while_each_thread_all(g, q);
 
 	__oom_kill_task(p, message);
 	/*
 	 * kill all processes that share the ->mm (i.e. all threads),
 	 * but are in a different thread group
 	 */
-	do_each_thread(g, q) {
+	do_each_thread_all(g, q) {
 		if (q->mm == mm && q->tgid != p->tgid)
 			__oom_kill_task(q, message);
-	} while_each_thread(g, q);
+	} while_each_thread_all(g, q);
 
+	ub_oom_mm_killed(ub);
+	put_beancounter(ub);
 	return 0;
 }
 
-static int oom_kill_process(struct task_struct *p, unsigned long points,
-		const char *message)
+int oom_kill_process(struct task_struct *p, const char *message)
 {
 	struct task_struct *c;
 	struct list_head *tsk;
@@ -358,6 +380,7 @@ static int oom_kill_process(struct task_
 	return oom_kill_task(p, message);
 }
 
+#ifndef CONFIG_USER_RESOURCE
 int should_oom_kill(void)
 {
 	static spinlock_t oom_lock = SPIN_LOCK_UNLOCKED;
@@ -420,6 +443,7 @@ out_unlock:
 	spin_unlock(&oom_lock);
 	return ret;
 }
+#endif
 
 static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
 
@@ -446,16 +470,28 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifie
 void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order, int force)
 {
 	struct task_struct *p;
-	unsigned long points = 0;
 	unsigned long freed = 0;
+	struct user_beancounter *ub;
 
 	blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
 	if (freed > 0)
 		/* Got some memory back in the last second. */
 		return;
 
+#ifndef CONFIG_USER_RESOURCE
 	if (!should_oom_kill() && !force)
 		return;
+#endif
+
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_OUTOFMEM, NULL)
+			& (NOTIFY_OK | NOTIFY_FAIL))
+		return;
+
+	ub = NULL;
+
+	cpuset_lock();
+	if (ub_oom_lock())
+		goto out_cpuset;
 
 	if (printk_ratelimit()) {
 		printk(KERN_WARNING "%s invoked oom-killer: "
@@ -463,9 +499,9 @@ void out_of_memory(struct zonelist *zone
 		current->comm, gfp_mask, order, current->oomkilladj);
 		dump_stack();
 		show_mem();
+		show_slab_info();
 	}
 
-	cpuset_lock();
 	read_lock(&tasklist_lock);
 
 	/*
@@ -474,36 +510,41 @@ void out_of_memory(struct zonelist *zone
 	 */
 	switch (constrained_alloc(zonelist, gfp_mask)) {
 	case CONSTRAINT_MEMORY_POLICY:
-		oom_kill_process(current, points,
-				"No available memory (MPOL_BIND)");
+		oom_kill_process(current, "No available memory (MPOL_BIND)");
 		break;
 
 	case CONSTRAINT_CPUSET:
-		oom_kill_process(current, points,
-				"No available memory in cpuset");
+		oom_kill_process(current, "No available memory in cpuset");
 		break;
 
 	case CONSTRAINT_NONE:
 		if (sysctl_panic_on_oom)
 			panic("out of memory. panic_on_oom is selected\n");
 retry:
+		put_beancounter(ub);
+
 		/*
 		 * Rambo mode: Shoot down a process and hope it solves whatever
 		 * issues we may have.
 		 */
-		p = select_bad_process(&points);
+		ub = ub_oom_select_worst();
+		p = oom_select_bad_process(ub);
 
 		if (PTR_ERR(p) == -1UL)
 			goto out;
 
 		/* Found nothing?!?! Either we hang forever, or we panic. */
 		if (!p) {
+			if (ub != NULL)
+				goto retry;
+
 			read_unlock(&tasklist_lock);
+			ub_oom_unlock();
 			cpuset_unlock();
 			panic("Out of memory and no killable processes...\n");
 		}
 
-		if (oom_kill_process(p, points, "Out of memory"))
+		if (oom_kill_process(p, "Out of memory"))
 			goto retry;
 
 		break;
@@ -511,6 +552,10 @@ retry:
 
 out:
 	read_unlock(&tasklist_lock);
+	ub_oom_unlock();
+	put_beancounter(ub);
+
+out_cpuset:
 	cpuset_unlock();
 
 	/*
diff -upr kernel-2.6.18-417.el5.orig/mm/page_alloc.c kernel-2.6.18-417.el5-028stab121/mm/page_alloc.c
--- kernel-2.6.18-417.el5.orig/mm/page_alloc.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/page_alloc.c	2017-01-13 08:40:30.000000000 -0500
@@ -43,6 +43,9 @@
 #include <asm/div64.h>
 #include "internal.h"
 
+#include <ub/ub_mem.h>
+#include <ub/io_acct.h>
+
 /*
  * MCD - HACK: Find somewhere to initialize this EARLY, or make this
  * initializer cleaner
@@ -72,6 +75,7 @@ static void __free_pages_ok(struct page 
  */
 int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
 
+EXPORT_SYMBOL(nr_swap_pages);
 EXPORT_SYMBOL(totalram_pages);
 
 /*
@@ -463,6 +467,7 @@ static void __free_pages_ok(struct page 
 		return;
 
 	kernel_map_pages(page, 1 << order, 0);
+	ub_page_uncharge(page, order);
 	local_irq_save(flags);
 	__count_vm_events(PGFREE, 1 << order);
 	free_one_page(page_zone(page), page, order);
@@ -559,7 +564,8 @@ static int prep_new_page(struct page *pa
 
 	page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
 			1 << PG_referenced | 1 << PG_arch_1 |
-			1 << PG_fs_misc | 1 << PG_mappedtodisk);
+			1 << PG_fs_misc | 1 << PG_mappedtodisk |
+			1 << PG_checkpointed);
 	set_page_private(page, 0);
 	set_page_refcounted(page);
 	kernel_map_pages(page, 1 << order, 1);
@@ -752,6 +758,7 @@ static void fastcall free_hot_cold_page(
 	kernel_map_pages(page, 1, 0);
 
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+	ub_page_uncharge(page, 0);
 	local_irq_save(flags);
 	__count_vm_event(PGFREE);
 	list_add(&page->lru, &pcp->list);
@@ -930,6 +937,28 @@ get_page_from_freelist(gfp_t gfp_mask, u
 	return page;
 }
 
+static void __alloc_collect_stats(gfp_t gfp_mask, unsigned int order,
+		struct page *page, cycles_t time)
+{
+	int ind, cpu;
+
+	time = (jiffies - time) * cycles_per_jiffy;
+	if (!(gfp_mask & __GFP_WAIT))
+		ind = 0;
+	else if (!(gfp_mask & __GFP_HIGHMEM))
+		ind = (order > 0 ? 2 : 1);
+	else
+		ind = (order > 0 ? 4 : 3);
+
+	cpu = get_cpu();
+	KSTAT_LAT_PCPU_ADD(&kstat_glob.alloc_lat[ind], cpu, time);
+	if (!page)
+		kstat_glob.alloc_fails[cpu][ind]++;
+	put_cpu();
+}
+
+int alloc_fail_warn;
+
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
@@ -946,6 +975,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned i
 	int alloc_flags;
 	int did_some_progress;
 	int would_oom_kill = 0;
+	cycles_t start;
 
 	might_sleep_if(wait);
 
@@ -957,6 +987,7 @@ restart:
 		return NULL;
 	}
 
+	start = jiffies;
 	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
 				zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
 	if (page)
@@ -996,6 +1027,7 @@ restart:
 	if (page)
 		goto got_pg;
 
+rebalance:
 	/* This allocation should allow future memory freeing. */
 
 	if (((p->flags & PF_MEMALLOC) ||
@@ -1020,7 +1052,6 @@ nofail_alloc:
 	if (!wait)
 		goto nopage;
 
-rebalance:
 	if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL) || would_oom_kill)
 		goto nopage;
 	cond_resched();
@@ -1077,19 +1108,35 @@ rebalance:
 			do_retry = 1;
 	}
 	if (do_retry) {
+		if (nr_swap_pages <= 0 && !did_some_progress) {
+			if (!(gfp_mask & __GFP_NO_OOM))
+				out_of_memory(zonelist, gfp_mask, order, 0);
+			else
+				would_oom_kill = 1;
+			goto restart;
+		}
 		blk_congestion_wait(WRITE, HZ/50);
 		goto rebalance;
 	}
 
 nopage:
-	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
+	__alloc_collect_stats(gfp_mask, order, NULL, start);
+	if (alloc_fail_warn && !(gfp_mask & __GFP_NOWARN) && 
+			printk_ratelimit()) {
 		printk(KERN_WARNING "%s: page allocation failure."
 			" order:%d, mode:0x%x\n",
 			p->comm, order, gfp_mask);
 		dump_stack();
 		show_mem();
 	}
+	return NULL;
+
 got_pg:
+	__alloc_collect_stats(gfp_mask, order, page, start);
+	if (ub_page_charge(page, order, gfp_mask)) {
+		__free_pages(page, order);
+		page = NULL;
+	}
 	return page;
 }
 
@@ -1173,6 +1220,19 @@ unsigned int nr_free_pages(void)
 
 EXPORT_SYMBOL(nr_free_pages);
 
+unsigned int nr_free_lowpages (void)
+{
+	pg_data_t *pgdat;
+	unsigned int pages = 0;
+
+	for_each_online_pgdat(pgdat)
+		pages += pgdat->node_zones[ZONE_NORMAL].free_pages;
+
+	return pages;
+}
+EXPORT_SYMBOL(nr_free_lowpages);
+
+
 #ifdef CONFIG_NUMA
 unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
 {
@@ -1353,8 +1413,8 @@ void show_free_areas(void)
 			K(zone->pages_min),
 			K(zone->pages_low),
 			K(zone->pages_high),
-			K(zone->nr_active),
-			K(zone->nr_inactive),
+			K(zone_page_state(zone, NR_ACTIVE)),
+			K(zone_page_state(zone, NR_INACTIVE)),
 			K(zone->present_pages),
 			zone->pages_scanned,
 			(zone->all_unreclaimable ? "yes" : "no")
@@ -1592,13 +1652,6 @@ static void __meminit build_zonelists(pg
 		int distance = node_distance(local_node, node);
 
 		/*
-		 * If another node is sufficiently far away then it is better
-		 * to reclaim pages in a zone before going off node.
-		 */
-		if (distance > RECLAIM_DISTANCE)
-			zone_reclaim_mode = 1;
-
-		/*
 		 * We don't want to pressure a particular node.
 		 * So adding penalty to the first node in same
 		 * distance group to make it round-robin.
@@ -2151,8 +2204,6 @@ static void __meminit free_area_init_cor
 		INIT_LIST_HEAD(&zone->inactive_list);
 		zone->nr_scan_active = 0;
 		zone->nr_scan_inactive = 0;
-		zone->nr_active = 0;
-		zone->nr_inactive = 0;
 		zap_zone_vm_stats(zone);
 		atomic_set(&zone->reclaim_in_progress, 0);
 		if (!size)
diff -upr kernel-2.6.18-417.el5.orig/mm/page_io.c kernel-2.6.18-417.el5-028stab121/mm/page_io.c
--- kernel-2.6.18-417.el5.orig/mm/page_io.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/page_io.c	2017-01-13 08:40:18.000000000 -0500
@@ -132,6 +132,7 @@ int swap_readpage(struct file *file, str
 {
 	struct bio *bio;
 	int ret = 0;
+	struct user_beancounter *old_ub;
 
 	BUG_ON(!PageLocked(page));
 	ClearPageUptodate(page);
@@ -143,7 +144,9 @@ int swap_readpage(struct file *file, str
 		goto out;
 	}
 	count_vm_event(PSWPIN);
+	old_ub = set_exec_ub(get_ub0());
 	submit_bio(READ, bio);
+	(void)set_exec_ub(old_ub);
 out:
 	return ret;
 }
diff -upr kernel-2.6.18-417.el5.orig/mm/page-writeback.c kernel-2.6.18-417.el5-028stab121/mm/page-writeback.c
--- kernel-2.6.18-417.el5.orig/mm/page-writeback.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/page-writeback.c	2017-01-13 08:40:19.000000000 -0500
@@ -33,6 +33,9 @@
 #include <linux/rmap.h>
 #include <trace/mm.h>
 
+#include <ub/io_acct.h>
+#include <ub/io_prio.h>
+
 /*
  * The maximum number of pages to writeout in a single bdflush/kupdate
  * operation.  We do this so we don't hold I_LOCK against an inode for
@@ -198,6 +201,7 @@ static void balance_dirty_pages(struct a
 	long dirty_thresh;
 	unsigned long pages_written = 0;
 	unsigned long write_chunk = sync_writeback_pages();
+	struct user_beancounter *ub = get_io_ub();
 
 	struct backing_dev_info *bdi = mapping->backing_dev_info;
 
@@ -210,6 +214,30 @@ static void balance_dirty_pages(struct a
 			.range_cyclic	= 1,
 		};
 
+		if (ub_dirty_limits(&dirty_thresh, ub)) {
+			nr_reclaimable = ub_dirty_pages(ub);
+			if (nr_reclaimable > dirty_thresh) {
+				nr_reclaimable = ub_stat_get_exact(ub, dirty_pages);
+				if (nr_reclaimable <= dirty_thresh) {
+					ub_stat_flush_pcpu(ub, dirty_pages);
+					goto no_ub_balance;
+				}
+				if (!ub->dirty_exceeded)
+					ub->dirty_exceeded = 1;
+				wbc.only_this_ub = ub;
+				writeback_inodes(&wbc);
+				pages_written += write_chunk - wbc.nr_to_write;
+				if (pages_written >= write_chunk)
+					break;
+				blk_congestion_wait(WRITE, HZ/10);
+				continue;
+			}
+		}
+no_ub_balance:
+
+		if (ub->dirty_exceeded)
+			ub->dirty_exceeded = 0;
+
 		get_dirty_limits(&background_thresh, &dirty_thresh, mapping);
 		nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
 					global_page_state(NR_UNSTABLE_NFS);
@@ -311,7 +339,7 @@ void balance_dirty_pages_ratelimited_nr(
 	unsigned long *p;
 
 	ratelimit = ratelimit_pages;
-	if (dirty_exceeded)
+	if (dirty_exceeded || get_io_ub()->dirty_exceeded)
 		ratelimit = 8;
 
 	/*
@@ -650,6 +678,7 @@ int write_one_page(struct page *page, in
 	} else {
 		unlock_page(page);
 	}
+
 	return ret;
 }
 EXPORT_SYMBOL(write_one_page);
@@ -679,14 +708,21 @@ int __set_page_dirty_nobuffers(struct pa
 			write_lock_irq(&mapping->tree_lock);
 			mapping2 = page_mapping(page);
 			if (mapping2) { /* Race with truncate? */
+				int acct = 0;
+
 				BUG_ON(mapping2 != mapping);
 				if (mapping_cap_account_dirty(mapping)) {
 					__inc_zone_page_state(page,
 								NR_FILE_DIRTY);
-					task_io_account_write(PAGE_CACHE_SIZE);
+					task_io_account_dirty(PAGE_CACHE_SIZE);
+					acct = 1;
 				}
 				radix_tree_tag_set(&mapping->page_tree,
 					page_index(page), PAGECACHE_TAG_DIRTY);
+				if (acct && !radix_tree_prev_tag_get(
+							&mapping->page_tree,
+							PAGECACHE_TAG_DIRTY))
+					ub_io_account_dirty(mapping, 1);
 			}
 			write_unlock_irq(&mapping->tree_lock);
 			if (mapping->host) {
@@ -735,6 +771,18 @@ int fastcall set_page_dirty(struct page 
 }
 EXPORT_SYMBOL(set_page_dirty);
 
+int fastcall set_page_dirty_mm(struct page *page, struct mm_struct *mm)
+{
+	struct user_beancounter *old_ub;
+	int ret;
+
+	old_ub = set_exec_ub(mm_ub(mm));
+	ret = set_page_dirty(page);
+	(void)set_exec_ub(old_ub);
+	return ret;
+}
+EXPORT_SYMBOL(set_page_dirty_mm);
+
 /*
  * set_page_dirty() is racy if the caller has no reference against
  * page->mapping->host, and if the page is unlocked.  This is because another
@@ -772,6 +820,11 @@ int test_clear_page_dirty(struct page *p
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
+			if (mapping_cap_account_dirty(mapping) &&
+					radix_tree_prev_tag_get(
+						&mapping->page_tree,
+						PAGECACHE_TAG_DIRTY))
+				ub_io_account_clean(mapping, 1, 1);
 			write_unlock_irqrestore(&mapping->tree_lock, flags);
 			/*
 			 * We can continue to use `mapping' here because the
@@ -887,10 +940,16 @@ int test_set_page_writeback(struct page 
 			radix_tree_tag_set(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
-		if (!PageDirty(page))
+		if (!PageDirty(page)) {
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
+			if (mapping_cap_account_dirty(mapping) &&
+					radix_tree_prev_tag_get(
+						&mapping->page_tree,
+						PAGECACHE_TAG_DIRTY))
+				ub_io_account_clean(mapping, 1, 0);
+		}
 		write_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
diff -upr kernel-2.6.18-417.el5.orig/mm/prio_tree.c kernel-2.6.18-417.el5-028stab121/mm/prio_tree.c
--- kernel-2.6.18-417.el5.orig/mm/prio_tree.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/prio_tree.c	2017-01-13 08:40:24.000000000 -0500
@@ -205,3 +205,6 @@ struct vm_area_struct *vma_prio_tree_nex
 	} else
 		return NULL;
 }
+
+#include <linux/module.h>
+EXPORT_SYMBOL(vma_prio_tree_next);
diff -upr kernel-2.6.18-417.el5.orig/mm/readahead.c kernel-2.6.18-417.el5-028stab121/mm/readahead.c
--- kernel-2.6.18-417.el5.orig/mm/readahead.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/readahead.c	2017-01-13 08:40:18.000000000 -0500
@@ -190,6 +190,8 @@ static int read_pages(struct address_spa
 	unsigned page_idx;
 	int ret;
 
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_PREPARE, NULL);
+
 	if (mapping->a_ops->readpages) {
 		ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
 		/* Clean up the remaining pages */
@@ -401,6 +403,10 @@ int do_page_cache_readahead(struct addre
 	if (bdi_read_congested(mapping->backing_dev_info))
 		return -1;
 
+	if (virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_READAHEAD,
+				NULL) & NOTIFY_FAIL)
+		return -1;
+
 	return __do_page_cache_readahead(mapping, filp, offset, nr_to_read);
 }
 
@@ -421,6 +427,10 @@ blockable_page_cache_readahead(struct ad
 	if (!block && bdi_read_congested(mapping->backing_dev_info))
 		return 0;
 
+	if (virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_READAHEAD,
+				NULL) & NOTIFY_FAIL)
+		return 0;
+
 	actual = __do_page_cache_readahead(mapping, filp, offset, nr_to_read);
 
 	return check_ra_success(ra, nr_to_read, actual);
diff -upr kernel-2.6.18-417.el5.orig/mm/rmap.c kernel-2.6.18-417.el5-028stab121/mm/rmap.c
--- kernel-2.6.18-417.el5.orig/mm/rmap.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/rmap.c	2017-01-13 08:40:24.000000000 -0500
@@ -56,9 +56,49 @@
 #include <linux/mmu_notifier.h>
 #include <trace/mm.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+#include <linux/kmem_cache.h>
+#include <ub/ub_mem.h>
+
 #include <asm/tlbflush.h>
 
-struct kmem_cache *anon_vma_cachep;
+static struct kmem_cache *anon_vma_cachep;
+
+static struct anon_vma *anon_vma_alloc(struct mm_struct *mm)
+{
+	struct user_beancounter *ub = mm->mm_ub;
+	struct anon_vma *anon_vma;
+
+	anon_vma = kmem_cache_alloc(anon_vma_cachep, SLAB_KERNEL);
+	if (unlikely(!anon_vma))
+		goto out;
+
+	local_irq_disable();
+	if (ub_kmemsize_charge(ub, CHARGE_SIZE(anon_vma_cachep->objuse), UB_HARD))
+		goto out_free;
+	local_irq_enable();
+
+	anon_vma->anon_vma_ub = get_beancounter_fast(ub);
+
+	return anon_vma;
+out_free:
+	local_irq_enable();
+	kmem_cache_free(anon_vma_cachep, anon_vma);
+out:
+	return NULL;
+}
+
+static void anon_vma_free(struct anon_vma *anon_vma)
+{
+       struct user_beancounter *ub = anon_vma->anon_vma_ub;
+
+       local_irq_disable();
+       ub_kmemsize_uncharge(ub, CHARGE_SIZE(anon_vma_cachep->objuse));
+       local_irq_enable();
+       put_beancounter_fast(ub);
+       kmem_cache_free(anon_vma_cachep, anon_vma);
+}
 
 static inline void validate_anon_vma(struct vm_area_struct *find_vma)
 {
@@ -94,7 +134,7 @@ int anon_vma_prepare(struct vm_area_stru
 			locked = anon_vma;
 			spin_lock(&locked->lock);
 		} else {
-			anon_vma = anon_vma_alloc();
+			anon_vma = anon_vma_alloc(mm);
 			if (unlikely(!anon_vma))
 				return -ENOMEM;
 			allocated = anon_vma;
@@ -117,6 +157,7 @@ int anon_vma_prepare(struct vm_area_stru
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(anon_vma_prepare);
 
 void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
 {
@@ -145,6 +186,7 @@ void anon_vma_link(struct vm_area_struct
 		spin_unlock(&anon_vma->lock);
 	}
 }
+EXPORT_SYMBOL_GPL(anon_vma_link);
 
 void anon_vma_unlink(struct vm_area_struct *vma)
 {
@@ -181,14 +223,15 @@ static void anon_vma_ctor(void *data, st
 void __init anon_vma_init(void)
 {
 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
-			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor, NULL);
+			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC,
+			anon_vma_ctor, NULL);
 }
 
 /*
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma = NULL;
 	unsigned long anon_mapping;
@@ -206,6 +249,7 @@ out:
 	rcu_read_unlock();
 	return anon_vma;
 }
+EXPORT_SYMBOL_GPL(page_lock_anon_vma);
 
 /*
  * At what user virtual address is page expected in vma?
@@ -601,6 +645,13 @@ void page_remove_rmap(struct page *page)
 		if ((!PageAnon(page) || PageSwapCache(page)) &&
 		    page_test_and_clear_dirty(page))
 			set_page_dirty(page);
+
+		/*
+		 * Well, when a page is unmapped, we cannot keep PG_checkpointed
+		 * flag, it is not accessible via process VM and we have no way
+		 * to reset its state
+		 */
+		ClearPageCheckpointed(page);
 		__dec_zone_page_state(page,
 				PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
 		/*
@@ -651,7 +702,7 @@ static int try_to_unmap_one(struct page 
 
 	/* Move the dirty bit to the physical page now the pte is gone. */
 	if (pte_dirty(pteval))
-		set_page_dirty(page);
+		set_page_dirty_mm(page, mm);
 
 	/* Update high watermark before we lower rss */
 	update_hiwater_rss(mm);
@@ -698,6 +749,9 @@ static int try_to_unmap_one(struct page 
 
 
 	page_remove_rmap(page);
+	ub_unused_privvm_inc(mm, vma);
+	ub_percpu_inc(mm->mm_ub, unmap);
+	pb_remove_ref(page, mm);
 	page_cache_release(page);
 
 out_unmap:
@@ -785,9 +839,12 @@ static void try_to_unmap_cluster(unsigne
 
 		/* Move the dirty bit to the physical page now the pte is gone. */
 		if (pte_dirty(pteval))
-			set_page_dirty(page);
+			set_page_dirty_mm(page, mm);
 
 		page_remove_rmap(page);
+		ub_percpu_inc(mm->mm_ub, unmap);
+		pb_remove_ref(page, mm);
+		ub_unused_privvm_inc(mm, vma);
 		page_cache_release(page);
 		dec_mm_counter(mm, file_rss);
 		(*mapcount)--;
diff -upr kernel-2.6.18-417.el5.orig/mm/shmem.c kernel-2.6.18-417.el5-028stab121/mm/shmem.c
--- kernel-2.6.18-417.el5.orig/mm/shmem.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/shmem.c	2017-01-13 08:40:24.000000000 -0500
@@ -50,6 +50,8 @@
 #include <asm/div64.h>
 #include <asm/pgtable.h>
 
+#include <ub/ub_vmpages.h>
+
 /* This magic number is used in glibc for posix shared memory */
 #define TMPFS_MAGIC	0x01021994
 
@@ -211,7 +213,7 @@ static void shmem_free_blocks(struct ino
  *
  * It has to be called with the spinlock held.
  */
-static void shmem_recalc_inode(struct inode *inode)
+static void shmem_recalc_inode(struct inode *inode, long swp_freed)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	long freed;
@@ -221,6 +223,8 @@ static void shmem_recalc_inode(struct in
 		info->alloced -= freed;
 		shmem_unacct_blocks(info->flags, freed);
 		shmem_free_blocks(inode, freed);
+		if (freed > swp_freed)
+			ub_tmpfs_respages_sub(info, freed - swp_freed);
 	}
 }
 
@@ -326,6 +330,11 @@ static void shmem_swp_set(struct shmem_i
 		struct page *page = kmap_atomic_to_page(entry);
 		set_page_private(page, page_private(page) + incdec);
 	}
+
+	if (incdec == 1)
+		ub_tmpfs_respages_dec(info);
+	else
+		ub_tmpfs_respages_inc(info);
 }
 
 /*
@@ -342,14 +351,24 @@ static swp_entry_t *shmem_swp_alloc(stru
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	struct page *page = NULL;
 	swp_entry_t *entry;
+	unsigned long ub_val;
 
 	if (sgp != SGP_WRITE &&
 	    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
 		return ERR_PTR(-EINVAL);
 
+	ub_val = 0;
+	if (info->next_index <= index) {
+		ub_val = index + 1 - info->next_index;
+		if (ub_shmpages_charge(info, ub_val))
+			return ERR_PTR(-ENOSPC);
+	}
+
 	while (!(entry = shmem_swp_entry(info, index, &page))) {
-		if (sgp == SGP_READ)
-			return shmem_swp_map(ZERO_PAGE(0));
+		if (sgp == SGP_READ) {
+			entry = shmem_swp_map(ZERO_PAGE(0));
+			goto out;
+		}
 		/*
 		 * Test free_blocks against 1 not 0, since we have 1 data
 		 * page (and perhaps indirect index pages) yet to allocate:
@@ -359,7 +378,8 @@ static swp_entry_t *shmem_swp_alloc(stru
 			spin_lock(&sbinfo->stat_lock);
 			if (sbinfo->free_blocks <= 1) {
 				spin_unlock(&sbinfo->stat_lock);
-				return ERR_PTR(-ENOSPC);
+				entry = ERR_PTR(-ENOSPC);
+				goto out;
 			}
 			sbinfo->free_blocks--;
 			inode->i_blocks += BLOCKS_PER_PAGE;
@@ -367,31 +387,43 @@ static swp_entry_t *shmem_swp_alloc(stru
 		}
 
 		spin_unlock(&info->lock);
-		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
+		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) |
+				__GFP_ZERO | __GFP_UBC);
 		if (page)
 			set_page_private(page, 0);
 		spin_lock(&info->lock);
 
 		if (!page) {
-			shmem_free_blocks(inode, 1);
-			return ERR_PTR(-ENOMEM);
+			entry = ERR_PTR(-ENOMEM);
+			goto out_block;
 		}
 		if (sgp != SGP_WRITE &&
 		    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
 			entry = ERR_PTR(-EINVAL);
-			break;
+			goto out_dir;
 		}
-		if (info->next_index <= index)
+		if (info->next_index <= index) {
+			ub_val = 0;
 			info->next_index = index + 1;
+		}
 	}
 	if (page) {
 		/* another task gave its page, or truncated the file */
 		shmem_free_blocks(inode, 1);
 		shmem_dir_free(page);
 	}
-	if (info->next_index <= index && !IS_ERR(entry))
+	if (info->next_index <= index)
 		info->next_index = index + 1;
 	return entry;
+
+out_dir:
+	shmem_dir_free(page);
+out_block:
+	shmem_free_blocks(inode, 1);
+out:
+	if (ub_val)
+		ub_shmpages_uncharge(info, ub_val);
+	return entry;
 }
 
 /*
@@ -503,6 +535,7 @@ static void shmem_truncate_range(struct 
 	info->flags |= SHMEM_TRUNCATE;
 	if (likely(end == (loff_t) -1)) {
 		limit = info->next_index;
+		ub_shmpages_uncharge(info, limit - idx);
 		upper_limit = SHMEM_MAX_INDEX;
 		info->next_index = idx;
 		needs_lock = NULL;
@@ -686,7 +719,7 @@ done2:
 	info->swapped -= nr_swaps_freed;
 	if (nr_pages_to_free)
 		shmem_free_blocks(inode, nr_pages_to_free);
-	shmem_recalc_inode(inode);
+	shmem_recalc_inode(inode, nr_swaps_freed);
 	spin_unlock(&info->lock);
 
 	/*
@@ -769,6 +802,7 @@ static void shmem_delete_inode(struct in
 		sbinfo->free_inodes++;
 		spin_unlock(&sbinfo->stat_lock);
 	}
+	shmi_ub_put(info);
 	clear_inode(inode);
 }
 
@@ -890,6 +924,12 @@ int shmem_unuse(swp_entry_t entry, struc
 	return found;
 }
 
+#ifdef CONFIG_USER_RESOURCE
+#define shm_get_swap_page(info)	(get_swap_page((info)->shmi_ub))
+#else
+#define shm_get_swap_page(info)	(get_swap_page(NULL))
+#endif
+
 /*
  * Move the page from the page cache to the swap cache.
  */
@@ -910,12 +950,12 @@ static int shmem_writepage(struct page *
 	info = SHMEM_I(inode);
 	if (info->flags & VM_LOCKED)
 		goto redirty;
-	swap = get_swap_page();
+	swap = shm_get_swap_page(info);
 	if (!swap.val)
 		goto redirty;
 
 	spin_lock(&info->lock);
-	shmem_recalc_inode(inode);
+	shmem_recalc_inode(inode, 0);
 	if (index >= info->next_index) {
 		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
 		goto unlock;
@@ -947,6 +987,54 @@ redirty:
 	return AOP_WRITEPAGE_ACTIVATE;	/* Return with the page locked */
 }
 
+/* Insert a swap entry to shmem inode address space. */
+int shmem_insertpage(struct inode * inode, unsigned long index,
+		     swp_entry_t swap)
+{
+	struct shmem_inode_info *info;
+	swp_entry_t *entry;
+	int err;
+
+	info = SHMEM_I(inode);
+
+	spin_lock(&info->lock);
+	shmem_recalc_inode(inode, 0);
+	entry = shmem_swp_alloc(info, index, SGP_WRITE);
+	err = PTR_ERR(entry);
+	if (IS_ERR(entry))
+		goto unlock;
+
+	err = -EBUSY;
+	if (entry->val)
+		goto unlock_unmap;
+
+	err = -EINVAL;
+	if (!swap_duplicate(swap))
+		goto unlock_unmap;
+
+	info->alloced++;
+	ub_tmpfs_respages_inc(info);
+	inode->i_blocks += BLOCKS_PER_PAGE;
+	shmem_swp_set(info, entry, swap.val);
+	shmem_swp_unmap(entry);
+	spin_unlock(&info->lock);
+	if (list_empty(&info->swaplist)) {
+		spin_lock(&shmem_swaplist_lock);
+		/* move instead of add in case we're racing */
+		list_move_tail(&info->swaplist, &shmem_swaplist);
+		spin_unlock(&shmem_swaplist_lock);
+	}
+	return 0;
+
+unlock_unmap:
+	shmem_swp_unmap(entry);
+unlock:
+	spin_unlock(&info->lock);
+	return err;
+}
+EXPORT_SYMBOL(shmem_insertpage);
+
+
 #ifdef CONFIG_NUMA
 static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
 {
@@ -1103,7 +1191,7 @@ repeat:
 		goto failed;
 
 	spin_lock(&info->lock);
-	shmem_recalc_inode(inode);
+	shmem_recalc_inode(inode, 0);
 	entry = shmem_swp_alloc(info, idx, sgp);
 	if (IS_ERR(entry)) {
 		spin_unlock(&info->lock);
@@ -1272,6 +1360,7 @@ repeat:
 		clear_highpage(filepage);
 		flush_dcache_page(filepage);
 		SetPageUptodate(filepage);
+		ub_tmpfs_respages_inc(info);
 	}
 done:
 	if (*pagep != filepage) {
@@ -1373,28 +1462,6 @@ shmem_get_policy(struct vm_area_struct *
 }
 #endif
 
-int shmem_lock(struct file *file, int lock, struct user_struct *user)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct shmem_inode_info *info = SHMEM_I(inode);
-	int retval = -ENOMEM;
-
-	spin_lock(&info->lock);
-	if (lock && !(info->flags & VM_LOCKED)) {
-		if (!user_shm_lock(inode->i_size, user))
-			goto out_nomem;
-		info->flags |= VM_LOCKED;
-	}
-	if (!lock && (info->flags & VM_LOCKED) && user) {
-		user_shm_unlock(inode->i_size, user);
-		info->flags &= ~VM_LOCKED;
-	}
-	retval = 0;
-out_nomem:
-	spin_unlock(&info->lock);
-	return retval;
-}
-
 int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	file_accessed(file);
@@ -1430,6 +1497,7 @@ shmem_get_inode(struct super_block *sb, 
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		info = SHMEM_I(inode);
 		memset(info, 0, (char *)inode - (char *)info);
+		shmi_ub_set(info, get_exec_ub());
 		spin_lock_init(&info->lock);
 		INIT_LIST_HEAD(&info->swaplist);
 
@@ -1534,6 +1602,7 @@ shmem_file_write(struct file *file, cons
 			break;
 
 		left = bytes;
+#ifndef CONFIG_X86_UACCESS_INDIRECT
 		if (PageHighMem(page)) {
 			volatile unsigned char dummy;
 			__get_user(dummy, buf);
@@ -1544,6 +1613,7 @@ shmem_file_write(struct file *file, cons
 							buf, bytes);
 			kunmap_atomic(kaddr, KM_USER0);
 		}
+#endif
 		if (left) {
 			kaddr = kmap(page);
 			left = __copy_from_user(kaddr + offset, buf, bytes);
@@ -1970,6 +2040,22 @@ static struct inode_operations shmem_sym
 	.put_link	= shmem_put_link,
 };
 
+#include <linux/virtinfo.h>
+
+static unsigned long tmpfs_ram_pages(void)
+{
+	struct meminfo mi;
+
+	if (ve_is_super(get_exec_env()))
+		return totalram_pages;
+
+	memset(&mi, 0, sizeof(mi));
+	si_meminfo(&mi.si);
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi) & NOTIFY_FAIL)
+		return 0;
+	return mi.si.totalram;
+}
+
 static int shmem_parse_options(char *options, int *mode, uid_t *uid,
 	gid_t *gid, unsigned long *blocks, unsigned long *inodes,
 	int *policy, nodemask_t *policy_nodes)
@@ -1995,6 +2081,8 @@ static int shmem_parse_options(char *opt
 		}
 		if (!*this_char)
 			continue;
+		if (!strcmp(this_char, "relatime"))
+			continue;
 		if ((value = strchr(this_char,'=')) != NULL) {
 			*value++ = 0;
 		} else {
@@ -2009,7 +2097,7 @@ static int shmem_parse_options(char *opt
 			size = memparse(value,&rest);
 			if (*rest == '%') {
 				size <<= PAGE_SHIFT;
-				size *= totalram_pages;
+				size *= tmpfs_ram_pages();
 				do_div(size, 100);
 				rest++;
 			}
@@ -2134,9 +2222,11 @@ static int shmem_fill_super(struct super
 	 * but the internal instance is left unlimited.
 	 */
 	if (!(sb->s_flags & MS_NOUSER)) {
+		unsigned long totalram_pages = tmpfs_ram_pages();
+
 		blocks = totalram_pages / 2;
 		inodes = totalram_pages - totalhigh_pages;
-		if (inodes > blocks)
+		if (inodes > blocks || !ve_is_super(get_exec_env()))
 			inodes = blocks;
 		if (shmem_parse_options(data, &mode, &uid, &gid, &blocks,
 					&inodes, &policy, &policy_nodes))
@@ -2295,6 +2385,10 @@ static struct vm_operations_struct shmem
 #endif
 };
 
+int is_shmem_mapping(struct address_space *map)
+{
+	return (map != NULL && map->a_ops == &shmem_aops);
+}
 
 static int shmem_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
@@ -2302,13 +2396,19 @@ static int shmem_get_sb(struct file_syst
 	return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt);
 }
 
-static struct file_system_type tmpfs_fs_type = {
+struct file_system_type tmpfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "tmpfs",
 	.get_sb		= shmem_get_sb,
 	.kill_sb	= kill_litter_super,
 };
+EXPORT_SYMBOL(tmpfs_fs_type);
+
+#ifdef CONFIG_VE
+#define shm_mnt	(get_exec_env()->shmem_mnt)
+#else
 static struct vfsmount *shm_mnt;
+#endif
 
 static int __init init_tmpfs(void)
 {
@@ -2343,6 +2443,36 @@ out3:
 }
 module_init(init_tmpfs)
 
+static inline int shm_charge_ahead(struct inode *inode)
+{
+#ifdef CONFIG_USER_RESOURCE
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	unsigned long idx;
+	swp_entry_t *entry;
+
+	if (!inode->i_size)
+		return 0;
+	idx = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
+	/* 
+	 * Just touch info to allocate space for entry and
+	 * make all UBC checks 
+	 */
+	spin_lock(&info->lock);
+	entry = shmem_swp_alloc(info, idx, SGP_CACHE);
+	if (IS_ERR(entry))
+		goto err;
+	shmem_swp_unmap(entry);
+	spin_unlock(&info->lock);
+	return 0;
+
+err:
+	spin_unlock(&info->lock);
+	return PTR_ERR(entry);
+#else
+	return 0;
+#endif
+}
+
 /*
  * shmem_file_setup - get an unlinked file living in tmpfs
  *
@@ -2390,6 +2520,10 @@ struct file *shmem_file_setup(char *name
 	d_instantiate(dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;	/* It is unlinked */
+	error = shm_charge_ahead(inode);
+	if (error)
+		goto close_file;
+
 	file->f_vfsmnt = mntget(shm_mnt);
 	file->f_dentry = dentry;
 	file->f_mapping = inode->i_mapping;
@@ -2405,6 +2539,7 @@ put_memory:
 	shmem_unacct_size(flags, size);
 	return ERR_PTR(error);
 }
+EXPORT_SYMBOL_GPL(shmem_file_setup);
 
 /*
  * shmem_zero_setup - setup a shared anonymous mapping
@@ -2422,6 +2557,8 @@ int shmem_zero_setup(struct vm_area_stru
 
 	if (vma->vm_file)
 		fput(vma->vm_file);
+	else if (vma->vm_flags & VM_WRITE)
+		__ub_unused_privvm_dec(vma->vm_mm, size >> PAGE_SHIFT);
 	vma->vm_file = file;
 	vma->vm_ops = &shmem_vm_ops;
 	return 0;
diff -upr kernel-2.6.18-417.el5.orig/mm/slab.c kernel-2.6.18-417.el5-028stab121/mm/slab.c
--- kernel-2.6.18-417.el5.orig/mm/slab.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/slab.c	2017-01-13 08:40:28.000000000 -0500
@@ -107,32 +107,19 @@
 #include	<linux/mempolicy.h>
 #include	<linux/mutex.h>
 #include	<linux/rtmutex.h>
+#include	<linux/kmem_slab.h>
+#include	<linux/kmem_cache.h>
 
 #include	<asm/uaccess.h>
 #include	<asm/cacheflush.h>
 #include	<asm/tlbflush.h>
 #include	<asm/page.h>
 
-/*
- * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
- *		  SLAB_RED_ZONE & SLAB_POISON.
- *		  0 for faster, smaller code (especially in the critical paths).
- *
- * STATS	- 1 to collect stats for /proc/slabinfo.
- *		  0 for faster, smaller code (especially in the critical paths).
- *
- * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
- */
+#include	<ub/ub_mem.h>
 
-#ifdef CONFIG_DEBUG_SLAB
-#define	DEBUG		1
-#define	STATS		1
-#define	FORCED_DEBUG	1
-#else
-#define	DEBUG		0
-#define	STATS		0
-#define	FORCED_DEBUG	0
-#endif
+#define DEBUG		SLAB_DEBUG
+#define STATS		SLAB_STATS
+#define FORCED_DEBUG	SLAB_FORCED_DEBUG
 
 /* Shouldn't this be in a header file somewhere? */
 #define	BYTES_PER_WORD		sizeof(void *)
@@ -175,131 +162,17 @@
 			 SLAB_CACHE_DMA | \
 			 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
+			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
+			 SLAB_UBC | SLAB_NO_CHARGE)
 #else
 # define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \
 			 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
+			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
+			 SLAB_UBC | SLAB_NO_CHARGE)
 #endif
 
 /*
- * kmem_bufctl_t:
- *
- * Bufctl's are used for linking objs within a slab
- * linked offsets.
- *
- * This implementation relies on "struct page" for locating the cache &
- * slab an object belongs to.
- * This allows the bufctl structure to be small (one int), but limits
- * the number of objects a slab (not a cache) can contain when off-slab
- * bufctls are used. The limit is the size of the largest general cache
- * that does not use off-slab slabs.
- * For 32bit archs with 4 kB pages, is this 56.
- * This is not serious, as it is only for large objects, when it is unwise
- * to have too many per slab.
- * Note: This limit can be raised by introducing a general cache whose size
- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
- */
-
-typedef unsigned int kmem_bufctl_t;
-#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
-#define	BUFCTL_ACTIVE	(((kmem_bufctl_t)(~0U))-2)
-#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-3)
-
-/*
- * struct slab
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
- */
-struct slab {
-	struct list_head list;
-	unsigned long colouroff;
-	void *s_mem;		/* including colour offset */
-	unsigned int inuse;	/* num of objs active in slab */
-	kmem_bufctl_t free;
-	unsigned short nodeid;
-};
-
-/*
- * struct slab_rcu
- *
- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
- * arrange for kmem_freepages to be called via RCU.  This is useful if
- * we need to approach a kernel structure obliquely, from its address
- * obtained without the usual locking.  We can lock the structure to
- * stabilize it and check it's still at the given address, only if we
- * can be sure that the memory has not been meanwhile reused for some
- * other kind of object (which our subsystem's lock might corrupt).
- *
- * rcu_read_lock before reading the address, then rcu_read_unlock after
- * taking the spinlock within the structure expected at that address.
- *
- * We assume struct slab_rcu can overlay struct slab when destroying.
- */
-struct slab_rcu {
-	struct rcu_head head;
-	struct kmem_cache *cachep;
-	void *addr;
-};
-
-/*
- * struct array_cache
- *
- * Purpose:
- * - LIFO ordering, to hand out cache-warm objects from _alloc
- * - reduce the number of linked list operations
- * - reduce spinlock operations
- *
- * The limit is stored in the per-cpu structure to reduce the data cache
- * footprint.
- *
- */
-struct array_cache {
-	unsigned int avail;
-	unsigned int limit;
-	unsigned int batchcount;
-	unsigned int touched;
-	spinlock_t lock;
-	void *entry[0];	/*
-			 * Must have this definition in here for the proper
-			 * alignment of array_cache. Also simplifies accessing
-			 * the entries.
-			 * [0] is for gcc 2.95. It should really be [].
-			 */
-};
-
-/*
- * bootstrap: The caches do not work without cpuarrays anymore, but the
- * cpuarrays are allocated from the generic caches...
- */
-#define BOOT_CPUCACHE_ENTRIES	1
-struct arraycache_init {
-	struct array_cache cache;
-	void *entries[BOOT_CPUCACHE_ENTRIES];
-};
-
-/*
- * The slab lists for all objects.
- */
-struct kmem_list3 {
-	struct list_head slabs_partial;	/* partial list first, better asm code */
-	struct list_head slabs_full;
-	struct list_head slabs_free;
-	unsigned long free_objects;
-	unsigned int free_limit;
-	unsigned int colour_next;	/* Per-node cache coloring */
-	spinlock_t list_lock;
-	struct array_cache *shared;	/* shared per node */
-	struct array_cache **alien;	/* on other nodes */
-	unsigned long next_reap;	/* updated without locking */
-	int free_touched;		/* updated without locking */
-};
-
-/*
  * Need this for bootstrapping a per node allocator.
  */
 #define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1)
@@ -370,82 +243,6 @@ static void kmem_list3_init(struct kmem_
 	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\
 	} while (0)
 
-/*
- * struct kmem_cache
- *
- * manages a cache.
- */
-
-struct kmem_cache {
-/* 1) per-cpu data, touched during every alloc/free */
-	struct array_cache *array[NR_CPUS];
-/* 2) Cache tunables. Protected by cache_chain_mutex */
-	unsigned int batchcount;
-	unsigned int limit;
-	unsigned int shared;
-
-	unsigned int buffer_size;
-/* 3) touched by every alloc & free from the backend */
-	struct kmem_list3 *nodelists[MAX_NUMNODES];
-
-	unsigned int flags;		/* constant flags */
-	unsigned int num;		/* # of objs per slab */
-
-/* 4) cache_grow/shrink */
-	/* order of pgs per slab (2^n) */
-	unsigned int gfporder;
-
-	/* force GFP flags, e.g. GFP_DMA */
-	gfp_t gfpflags;
-
-	size_t colour;			/* cache colouring range */
-	unsigned int colour_off;	/* colour offset */
-	struct kmem_cache *slabp_cache;
-	unsigned int slab_size;
-	unsigned int dflags;		/* dynamic flags */
-
-	/* constructor func */
-	void (*ctor) (void *, struct kmem_cache *, unsigned long);
-
-	/* de-constructor func */
-	void (*dtor) (void *, struct kmem_cache *, unsigned long);
-
-/* 5) cache creation/removal */
-	const char *name;
-	struct list_head next;
-
-/* 6) statistics */
-#if STATS
-	unsigned long num_active;
-	unsigned long num_allocations;
-	unsigned long high_mark;
-	unsigned long grown;
-	unsigned long reaped;
-	unsigned long errors;
-	unsigned long max_freeable;
-	unsigned long node_allocs;
-	unsigned long node_frees;
-	unsigned long node_overflow;
-	atomic_t allochit;
-	atomic_t allocmiss;
-	atomic_t freehit;
-	atomic_t freemiss;
-#endif
-#if DEBUG
-	/*
-	 * If debugging is enabled, then the allocator can add additional
-	 * fields and/or padding to every object. buffer_size contains the total
-	 * object size including these internal fields, the following two
-	 * variables contain the offset to the user object and its size.
-	 */
-	int obj_offset;
-	int obj_size;
-#endif
-};
-
-#define CFLGS_OFF_SLAB		(0x80000000UL)
-#define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
-
 #define BATCHREFILL_LIMIT	16
 /*
  * Optimization question: fewer reaps means less probability for unnessary
@@ -457,12 +254,14 @@ struct kmem_cache {
 #define REAPTIMEOUT_CPUC	(2*HZ)
 #define REAPTIMEOUT_LIST3	(4*HZ)
 
+#define	STATS_INC_GROWN(x)	((x)->grown++)
+#define	STATS_ADD_REAPED(x,y)	((x)->reaped += (y))
+#define	STATS_INC_SHRUNK(x)	((x)->shrunk++)
+
 #if STATS
 #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
 #define	STATS_DEC_ACTIVE(x)	((x)->num_active--)
 #define	STATS_INC_ALLOCED(x)	((x)->num_allocations++)
-#define	STATS_INC_GROWN(x)	((x)->grown++)
-#define	STATS_ADD_REAPED(x,y)	((x)->reaped += (y))
 #define	STATS_SET_HIGH(x)						\
 	do {								\
 		if ((x)->num_active > (x)->high_mark)			\
@@ -485,8 +284,6 @@ struct kmem_cache {
 #define	STATS_INC_ACTIVE(x)	do { } while (0)
 #define	STATS_DEC_ACTIVE(x)	do { } while (0)
 #define	STATS_INC_ALLOCED(x)	do { } while (0)
-#define	STATS_INC_GROWN(x)	do { } while (0)
-#define	STATS_ADD_REAPED(x,y)	do { } while (0)
 #define	STATS_SET_HIGH(x)	do { } while (0)
 #define	STATS_INC_ERR(x)	do { } while (0)
 #define	STATS_INC_NODEALLOCS(x)	do { } while (0)
@@ -578,65 +375,14 @@ static void **dbg_userword(struct kmem_c
 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
 
 /*
- * Functions for storing/retrieving the cachep and or slab from the page
- * allocator.  These are used to find the slab an obj belongs to.  With kfree(),
- * these are used to find the cache which an obj belongs to.
- */
-static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
-{
-	page->lru.next = (struct list_head *)cache;
-}
-
-static inline struct kmem_cache *page_get_cache(struct page *page)
-{
-	page = compound_head(page);
-	BUG_ON(!PageSlab(page));
-	return (struct kmem_cache *)page->lru.next;
-}
-
-static inline void page_set_slab(struct page *page, struct slab *slab)
-{
-	page->lru.prev = (struct list_head *)slab;
-}
-
-static inline struct slab *page_get_slab(struct page *page)
-{
-	page = compound_head(page);
-	BUG_ON(!PageSlab(page));
-	return (struct slab *)page->lru.prev;
-}
-
-static inline struct kmem_cache *virt_to_cache(const void *obj)
-{
-	struct page *page = virt_to_page(obj);
-	return page_get_cache(page);
-}
-
-static inline struct slab *virt_to_slab(const void *obj)
-{
-	struct page *page = virt_to_page(obj);
-	return page_get_slab(page);
-}
-
-static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
-				 unsigned int idx)
-{
-	return slab->s_mem + cache->buffer_size * idx;
-}
-
-static inline unsigned int obj_to_index(struct kmem_cache *cache,
-					struct slab *slab, void *obj)
-{
-	return (unsigned)(obj - slab->s_mem) / cache->buffer_size;
-}
-
-/*
  * These are the default caches for kmalloc. Custom caches can have other sizes.
  */
 struct cache_sizes malloc_sizes[] = {
 #define CACHE(x) { .cs_size = (x) },
 #include <linux/kmalloc_sizes.h>
 	CACHE(ULONG_MAX)
+#include <linux/kmalloc_sizes.h>
+	CACHE(ULONG_MAX)
 #undef CACHE
 };
 EXPORT_SYMBOL(malloc_sizes);
@@ -650,10 +396,17 @@ struct cache_names {
 static struct cache_names __initdata cache_names[] = {
 #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
 #include <linux/kmalloc_sizes.h>
+	{NULL,},
+#undef CACHE
+#define CACHE(x) { .name = "size-" #x "(UBC)", .name_dma = "size-" #x "(DMA,UBC)" },
+#include <linux/kmalloc_sizes.h>
 	{NULL,}
 #undef CACHE
 };
 
+int malloc_cache_num;
+EXPORT_SYMBOL(malloc_cache_num);
+
 static struct arraycache_init initarray_cache __initdata =
     { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 static struct arraycache_init initarray_generic =
@@ -734,6 +487,7 @@ static inline void init_lock_keys(void)
  */
 static DEFINE_MUTEX(cache_chain_mutex);
 static struct list_head cache_chain;
+static spinlock_t cache_chain_lock;
 
 /*
  * vm_enough_memory() looks at this to determine how many slab-allocated pages
@@ -774,6 +528,8 @@ static inline struct kmem_cache *__find_
 {
 	struct cache_sizes *csizep = malloc_sizes;
 
+	if (gfpflags & __GFP_UBC)
+		csizep += malloc_cache_num;
 #if DEBUG
 	/* This happens if someone tries to call
 	 * kmem_cache_create(), or __kmalloc(), before
@@ -800,9 +556,17 @@ struct kmem_cache *kmem_find_general_cac
 }
 EXPORT_SYMBOL(kmem_find_general_cachep);
 
-static size_t slab_mgmt_size(size_t nr_objs, size_t align)
+static size_t slab_mgmt_size_noalign(size_t nr_objs, int flags)
+{
+	size_t size_noub;
+
+	size_noub = sizeof(struct slab) + nr_objs * sizeof(kmem_bufctl_t);
+	return ALIGN(size_noub, UB_ALIGN(flags)) + nr_objs * UB_EXTRA(flags);
+}
+
+static size_t slab_mgmt_size(size_t nr_objs, size_t align, int flags)
 {
-	return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
+	return ALIGN(slab_mgmt_size_noalign(nr_objs, flags), align);
 }
 
 /*
@@ -847,20 +611,23 @@ static void cache_estimate(unsigned long
 		 * into account.
 		 */
 		nr_objs = (slab_size - sizeof(struct slab)) /
-			  (buffer_size + sizeof(kmem_bufctl_t));
+			  (buffer_size + sizeof(kmem_bufctl_t) +
+			   	UB_EXTRA(flags));
 
 		/*
 		 * This calculated number will be either the right
 		 * amount, or one greater than what we want.
 		 */
-		if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
-		       > slab_size)
+		if (slab_mgmt_size(nr_objs, align, flags) +
+				nr_objs * buffer_size > slab_size)
 			nr_objs--;
+		BUG_ON(slab_mgmt_size(nr_objs, align, flags) +
+				nr_objs * buffer_size > slab_size);
 
 		if (nr_objs > SLAB_LIMIT)
 			nr_objs = SLAB_LIMIT;
 
-		mgmt_size = slab_mgmt_size(nr_objs, align);
+		mgmt_size = slab_mgmt_size(nr_objs, align, flags);
 	}
 	*num = nr_objs;
 	*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
@@ -1342,6 +1109,7 @@ static void init_list(struct kmem_cache 
 	cachep->nodelists[nodeid] = ptr;
 	local_irq_enable();
 }
+static int offslab_limit;
 
 /*
  * Initialisation.  Called after the page allocator have been initialised and
@@ -1390,6 +1158,7 @@ void __init kmem_cache_init(void)
 
 	/* 1) create the cache_cache */
 	INIT_LIST_HEAD(&cache_chain);
+	spin_lock_init(&cache_chain_lock);
 	list_add(&cache_cache.next, &cache_chain);
 	cache_cache.colour_off = cache_line_size();
 	cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
@@ -1423,7 +1192,7 @@ void __init kmem_cache_init(void)
 	sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
 					sizes[INDEX_AC].cs_size,
 					ARCH_KMALLOC_MINALIGN,
-					ARCH_KMALLOC_FLAGS|SLAB_PANIC,
+					ARCH_KMALLOC_FLAGS | SLAB_PANIC,
 					NULL, NULL);
 
 	if (INDEX_AC != INDEX_L3) {
@@ -1431,12 +1200,13 @@ void __init kmem_cache_init(void)
 			kmem_cache_create(names[INDEX_L3].name,
 				sizes[INDEX_L3].cs_size,
 				ARCH_KMALLOC_MINALIGN,
-				ARCH_KMALLOC_FLAGS|SLAB_PANIC,
+				ARCH_KMALLOC_FLAGS | SLAB_PANIC,
 				NULL, NULL);
 	}
 
 	slab_early_init = 0;
 
+	for (i = 0; i < 2; i++) {
 	while (sizes->cs_size != ULONG_MAX) {
 		/*
 		 * For performance, all the general caches are L1 aligned.
@@ -1449,19 +1219,28 @@ void __init kmem_cache_init(void)
 			sizes->cs_cachep = kmem_cache_create(names->name,
 					sizes->cs_size,
 					ARCH_KMALLOC_MINALIGN,
-					ARCH_KMALLOC_FLAGS|SLAB_PANIC,
+					ARCH_KMALLOC_FLAGS | SLAB_PANIC |
+					(i ? SLAB_UBC : 0) | SLAB_NO_CHARGE,
 					NULL, NULL);
 		}
+		if (!(OFF_SLAB(sizes->cs_cachep)))
+			offslab_limit = sizes->cs_size;
 
 		sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
 					sizes->cs_size,
 					ARCH_KMALLOC_MINALIGN,
 					ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
-						SLAB_PANIC,
-					NULL, NULL);
+					(i ? SLAB_UBC : 0) | SLAB_NO_CHARGE|
+					SLAB_PANIC, NULL, NULL);
 		sizes++;
 		names++;
 	}
+
+	sizes++;
+	names++;
+	if (!i)
+		malloc_cache_num = sizes - malloc_sizes;
+	}
 	/* 4) Replace the bootstrap head arrays */
 	{
 		struct array_cache *ptr;
@@ -1902,7 +1681,6 @@ static void set_up_list3s(struct kmem_ca
 static size_t calculate_slab_order(struct kmem_cache *cachep,
 			size_t size, size_t align, unsigned long flags)
 {
-	unsigned long offslab_limit;
 	size_t left_over = 0;
 	int gfporder;
 
@@ -1915,15 +1693,10 @@ static size_t calculate_slab_order(struc
 			continue;
 
 		if (flags & CFLGS_OFF_SLAB) {
-			/*
-			 * Max number of objs-per-slab for caches which
-			 * use off-slab slabs. Needed to avoid a possible
-			 * looping condition in cache_grow().
-			 */
-			offslab_limit = size - sizeof(struct slab);
-			offslab_limit /= sizeof(kmem_bufctl_t);
+			int slab_size;
 
- 			if (num > offslab_limit)
+			slab_size = slab_mgmt_size_noalign(num, flags);
+			if (slab_size > offslab_limit)
 				break;
 		}
 
@@ -2075,7 +1848,11 @@ kmem_cache_create (const char *name, siz
 		 * area of the module.  Print a warning.
 		 */
 		set_fs(KERNEL_DS);
+#ifdef CONFIG_X86_UACCESS_INDIRECT
+		res = __direct_get_user(tmp,pc->name);
+#else
 		res = __get_user(tmp, pc->name);
+#endif
 		set_fs(old_fs);
 		if (res) {
 			printk("SLAB: cache with size %d has lost its name\n",
@@ -2220,8 +1997,7 @@ kmem_cache_create (const char *name, siz
 		cachep = NULL;
 		goto oops;
 	}
-	slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
-			  + sizeof(struct slab), align);
+	slab_size = slab_mgmt_size(cachep->num, align, flags);
 
 	/*
 	 * If the slab has been placed off-slab, and we have enough space then
@@ -2234,8 +2010,7 @@ kmem_cache_create (const char *name, siz
 
 	if (flags & CFLGS_OFF_SLAB) {
 		/* really off slab. No need for manual alignment */
-		slab_size =
-		    cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
+		slab_size = slab_mgmt_size_noalign(cachep->num, flags);
 	}
 
 	cachep->colour_off = cache_line_size();
@@ -2260,7 +2035,10 @@ kmem_cache_create (const char *name, siz
 	setup_cpu_cache(cachep);
 
 	/* cache setup completed, link it into the list */
+	spin_lock(&cache_chain_lock);
 	list_add(&cachep->next, &cache_chain);
+	spin_unlock(&cache_chain_lock);
+	set_cache_objuse(cachep);
 oops:
 	if (!cachep && (flags & SLAB_PANIC))
 		panic("kmem_cache_create(): failed to create slab `%s'\n",
@@ -2370,6 +2148,7 @@ static int drain_freelist(struct kmem_ca
 		BUG_ON(slabp->inuse);
 #endif
 		list_del(&slabp->list);
+		STATS_INC_SHRUNK(cache);
 		/*
 		 * Safe to drop the lock. The slab is no longer linked
 		 * to the cache.
@@ -2453,11 +2232,15 @@ int kmem_cache_destroy(struct kmem_cache
 	/*
 	 * the chain is never empty, cache_cache is never destroyed
 	 */
+	spin_lock(&cache_chain_lock);
 	list_del(&cachep->next);
+	spin_unlock(&cache_chain_lock);
 
 	if (__cache_shrink(cachep)) {
 		slab_error(cachep, "Can't free all objects");
+		spin_lock(&cache_chain_lock);
 		list_add(&cachep->next, &cache_chain);
+		spin_unlock(&cache_chain_lock);
 		mutex_unlock(&cache_chain_mutex);
 		return 1;
 	}
@@ -2477,6 +2260,8 @@ int kmem_cache_destroy(struct kmem_cache
 			kfree(l3);
 		}
 	}
+
+	ub_kmemcache_free(cachep);
 	kmem_cache_free(&cache_cache, cachep);
 	mutex_unlock(&cache_chain_mutex);
 	return 0;
@@ -2493,7 +2278,7 @@ static struct slab *alloc_slabmgmt(struc
 	if (OFF_SLAB(cachep)) {
 		/* Slab management obj is off-slab. */
 		slabp = kmem_cache_alloc_node(cachep->slabp_cache,
-					      local_flags, nodeid);
+				local_flags & (~__GFP_UBC), nodeid);
 		if (!slabp)
 			return NULL;
 	} else {
@@ -2504,14 +2289,10 @@ static struct slab *alloc_slabmgmt(struc
 	slabp->colouroff = colour_off;
 	slabp->s_mem = objp + colour_off;
 	slabp->nodeid = nodeid;
+	init_slab_ubps(cachep, slabp);
 	return slabp;
 }
 
-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
-{
-	return (kmem_bufctl_t *) (slabp + 1);
-}
-
 static void cache_init_objs(struct kmem_cache *cachep,
 			    struct slab *slabp, unsigned long ctor_flags)
 {
@@ -2689,7 +2470,7 @@ static int cache_grow(struct kmem_cache 
 	 * Get mem for the objs.  Attempt to allocate a physical page from
 	 * 'nodeid'.
 	 */
-	objp = kmem_getpages(cachep, flags, nodeid);
+	objp = kmem_getpages(cachep, flags & (~__GFP_UBC), nodeid);
 	if (!objp)
 		goto failed;
 
@@ -3047,10 +2828,15 @@ static __always_inline void *__cache_all
 
 	local_irq_save(save_flags);
 	objp = ____cache_alloc(cachep, flags);
-	local_irq_restore(save_flags);
 	objp = cache_alloc_debugcheck_after(cachep, flags, objp,
 					    caller);
 	prefetchw(objp);
+
+	if (objp && ub_slab_charge(cachep, objp, flags)) {
+		kmem_cache_free(cachep, objp);
+		objp = NULL;
+	}
+	local_irq_restore(save_flags);
 	return objp;
 }
 
@@ -3166,6 +2952,7 @@ static void free_block(struct kmem_cache
 		/* fixup slab chains */
 		if (slabp->inuse == 0) {
 			if (l3->free_objects > l3->free_limit) {
+				STATS_INC_SHRUNK(cachep);
 				l3->free_objects -= cachep->num;
 				slab_destroy(cachep, slabp);
 			} else {
@@ -3181,6 +2968,19 @@ static void free_block(struct kmem_cache
 	}
 }
 
+void kmem_cache_free_block(kmem_cache_t *cachep, struct kmem_list3 *l3,
+		void **objpp, int nr_objects, int node)
+{
+	unsigned long flags;
+
+	if (!nr_objects)
+		return;
+
+	spin_lock_irqsave(&l3->list_lock, flags);
+	free_block(cachep, objpp, nr_objects, node);
+	spin_unlock_irqrestore(&l3->list_lock, flags);
+}
+
 static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
 {
 	int batchcount;
@@ -3243,6 +3043,8 @@ static inline void __cache_free(struct k
 	check_irq_off();
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
+	ub_slab_uncharge(cachep, objp);
+
 	if (cache_free_alien(cachep, objp))
 		return;
 
@@ -3356,11 +3158,15 @@ void *kmem_cache_alloc_node(struct kmem_
 		ptr = ____cache_alloc(cachep, flags);
 	else
 		ptr = __cache_alloc_node(cachep, flags, nodeid);
-	local_irq_restore(save_flags);
 
 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr,
 					   __builtin_return_address(0));
 
+	if (ptr && ub_slab_charge(cachep, ptr, flags)) {
+		kmem_cache_free(cachep, ptr);
+		ptr = NULL;
+	}
+	local_irq_restore(save_flags);
 	return ptr;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
@@ -3426,10 +3232,10 @@ EXPORT_SYMBOL(__kmalloc_track_caller);
  *
  * @size: how many bytes of memory are required.
  */
-void *__alloc_percpu(size_t size)
+void *__alloc_percpu_mask(size_t size, gfp_t gfp)
 {
 	int i;
-	struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+	struct percpu_data *pdata = kmalloc(sizeof(*pdata), gfp);
 
 	if (!pdata)
 		return NULL;
@@ -3443,9 +3249,9 @@ void *__alloc_percpu(size_t size)
 		int node = cpu_to_node(i);
 
 		if (node_online(node))
-			pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node);
+			pdata->ptrs[i] = kmalloc_node(size, gfp, node);
 		else
-			pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
+			pdata->ptrs[i] = kmalloc(size, gfp);
 
 		if (!pdata->ptrs[i])
 			goto unwind_oom;
@@ -3464,7 +3270,7 @@ unwind_oom:
 	kfree(pdata);
 	return NULL;
 }
-EXPORT_SYMBOL(__alloc_percpu);
+EXPORT_SYMBOL(__alloc_percpu_mask);
 #endif
 
 /**
@@ -3792,7 +3598,7 @@ static void cache_reap(void *unused)
 {
 	struct kmem_cache *searchp;
 	struct kmem_list3 *l3;
-	int node = numa_node_id();
+	int node;
 
 	if (!mutex_trylock(&cache_chain_mutex)) {
 		/* Give up. Setup the next iteration. */
@@ -3801,10 +3607,18 @@ static void cache_reap(void *unused)
 		return;
 	}
 
+	{KSTAT_PERF_ENTER(cache_reap)
+	preempt_disable();
 	list_for_each_entry(searchp, &cache_chain, next) {
 		check_irq_on();
 
 		/*
+		 * In original kernel this thread is binded to a CPU,
+		 * but in VZ it can migrate from one physical 
+		 * CPU to another due to cond_resched() below.
+		 */
+		node = numa_node_id();
+		/*
 		 * We only take the l3 lock if absolutely necessary and we
 		 * have established with reasonable certainty that
 		 * we can do some work if the lock was obtained.
@@ -3836,14 +3650,18 @@ static void cache_reap(void *unused)
 			STATS_ADD_REAPED(searchp, freed);
 		}
 next:
+		preempt_enable_no_resched();
 		cond_resched();
+		preempt_disable();
 	}
 	check_irq_on();
 	mutex_unlock(&cache_chain_mutex);
 	next_reap_node();
 	refresh_cpu_vm_stats(smp_processor_id());
+	KSTAT_PERF_LEAVE(cache_reap)}
 	/* Set up the next iteration */
 	schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
+	preempt_enable();
 }
 
 #ifdef CONFIG_PROC_FS
@@ -3865,12 +3683,80 @@ static void print_slabinfo_header(struct
 	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
 #if STATS
 	seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
-		 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
+		 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow> <shrunk>");
 	seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
 #endif
 	seq_putc(m, '\n');
 }
 
+#define SHOW_TOP_SLABS	10
+
+static unsigned long get_cache_size(struct kmem_cache *cachep)
+{
+	unsigned long flags;
+	unsigned long slabs;
+	struct kmem_list3 *l3;
+	struct list_head *lh;
+	int node;
+
+	slabs = 0;
+
+	for_each_online_node (node) {
+		l3 = cachep->nodelists[node];
+		if (l3 == NULL)
+			continue;
+
+		spin_lock_irqsave(&l3->list_lock, flags);
+		list_for_each (lh, &l3->slabs_full)
+			slabs++;
+		list_for_each (lh, &l3->slabs_partial)
+			slabs++;
+		list_for_each (lh, &l3->slabs_free)
+			slabs++;
+		spin_unlock_irqrestore(&l3->list_lock, flags);
+	}
+
+	return slabs * (PAGE_SIZE << cachep->gfporder) +
+		(OFF_SLAB(cachep) ?
+		 cachep->slabp_cache->buffer_size * slabs : 0);
+}
+
+void show_slab_info(void)
+{
+	int i, j;
+	unsigned long size;
+	struct kmem_cache *ptr;
+	unsigned long sizes[SHOW_TOP_SLABS];
+	struct kmem_cache *top[SHOW_TOP_SLABS];
+
+	memset(top, 0, sizeof(top));
+	memset(sizes, 0, sizeof(sizes));
+
+	printk("Top %d caches:\n", SHOW_TOP_SLABS);
+
+	spin_lock(&cache_chain_lock);
+	list_for_each_entry (ptr, &cache_chain, next) {
+		size = get_cache_size(ptr);
+
+		j = 0;
+		for (i = 1; i < SHOW_TOP_SLABS; i++)
+			if (sizes[i] < sizes[j])
+				j = i;
+
+		if (size > sizes[j]) {
+			sizes[j] = size;
+			top[j] = ptr;
+		}
+	}
+
+	for (i = 0; i < SHOW_TOP_SLABS; i++)
+		if (top[i])
+			printk("%-21s: size %10lu objsize %10u\n",
+					top[i]->name, sizes[i],
+					top[i]->buffer_size);
+	spin_unlock(&cache_chain_lock);
+}
+
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
 	loff_t n = *pos;
@@ -3958,7 +3844,7 @@ static int s_show(struct seq_file *m, vo
 	if (error)
 		printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
 
-	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
+	seq_printf(m, "%-21s %6lu %6lu %6u %4u %4d",
 		   name, active_objs, num_objs, cachep->buffer_size,
 		   cachep->num, (1 << cachep->gfporder));
 	seq_printf(m, " : tunables %4u %4u %4u",
@@ -3971,6 +3857,7 @@ static int s_show(struct seq_file *m, vo
 		unsigned long allocs = cachep->num_allocations;
 		unsigned long grown = cachep->grown;
 		unsigned long reaped = cachep->reaped;
+		unsigned long shrunk = cachep->shrunk;
 		unsigned long errors = cachep->errors;
 		unsigned long max_freeable = cachep->max_freeable;
 		unsigned long node_allocs = cachep->node_allocs;
@@ -3978,9 +3865,10 @@ static int s_show(struct seq_file *m, vo
 		unsigned long overflows = cachep->node_overflow;
 
 		seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
-				%4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
+				%4lu %4lu %4lu %4lu %4lu %4lu",
+				allocs, high, grown,
 				reaped, errors, max_freeable, node_allocs,
-				node_frees, overflows);
+				node_frees, overflows, shrunk);
 	}
 	/* cpu stats */
 	{
@@ -4018,6 +3906,61 @@ struct seq_operations slabinfo_op = {
 	.show = s_show,
 };
 
+/* Show object, belonging to each beancounter */
+static int check_ubcs_on_slab(struct kmem_cache *c, struct slab *s,
+		struct user_beancounter *ub)
+{
+	int i, sum = 0;
+	struct user_beancounter **ubcs;
+
+	ubcs = slab_ubcs(c, s);
+	for (i = 0; i < c->num; i++)
+		if (ubcs[i] == ub)
+			sum++;
+
+	return sum;
+}
+
+static int check_ubcs_on_cache(struct kmem_cache *c,
+		struct user_beancounter *ub)
+{
+	int node, sum = 0;
+	struct kmem_list3 *l3;
+	unsigned long flags;
+	struct slab *slab;
+
+	for_each_online_node(node) {
+		l3 = c->nodelists[node];
+		if (l3 == NULL)
+			continue;
+
+		spin_lock_irqsave(&l3->list_lock, flags);
+		list_for_each_entry(slab, &l3->slabs_full, list)
+			sum += check_ubcs_on_slab(c, slab, ub);
+		list_for_each_entry(slab, &l3->slabs_partial, list)
+			sum += check_ubcs_on_slab(c, slab, ub);
+		spin_unlock_irqrestore(&l3->list_lock, flags);
+	}
+
+	return sum;
+}
+
+void slab_walk_ub(struct user_beancounter *ub,
+		void (*show)(const char *name, int count, void *v), void *v)
+{
+	struct kmem_cache *c;
+	int cnt;
+
+	mutex_lock(&cache_chain_mutex);
+	list_for_each_entry(c, &cache_chain, next) {
+		if (c->flags & SLAB_UBC) {
+			cnt = check_ubcs_on_cache(c, ub);
+			show(c->name, cnt, v);
+		}
+	}
+	mutex_unlock(&cache_chain_mutex);
+}
+
 #define MAX_SLABINFO_WRITE 128
 /**
  * slabinfo_write - Tuning for the slab allocator
diff -upr kernel-2.6.18-417.el5.orig/mm/slob.c kernel-2.6.18-417.el5-028stab121/mm/slob.c
--- kernel-2.6.18-417.el5.orig/mm/slob.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/slob.c	2017-01-13 08:40:16.000000000 -0500
@@ -345,16 +345,16 @@ EXPORT_SYMBOL(slab_reclaim_pages);
 
 #ifdef CONFIG_SMP
 
-void *__alloc_percpu(size_t size)
+void *__alloc_percpu_mask(size_t size, gfp_t gfp)
 {
 	int i;
-	struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL);
+	struct percpu_data *pdata = kmalloc(sizeof (*pdata), gfp);
 
 	if (!pdata)
 		return NULL;
 
 	for_each_possible_cpu(i) {
-		pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
+		pdata->ptrs[i] = kmalloc(size, gfp);
 		if (!pdata->ptrs[i])
 			goto unwind_oom;
 		memset(pdata->ptrs[i], 0, size);
@@ -372,7 +372,7 @@ unwind_oom:
 	kfree(pdata);
 	return NULL;
 }
-EXPORT_SYMBOL(__alloc_percpu);
+EXPORT_SYMBOL(__alloc_percpu_mask);
 
 void
 free_percpu(const void *objp)
diff -upr kernel-2.6.18-417.el5.orig/mm/swap.c kernel-2.6.18-417.el5-028stab121/mm/swap.c
--- kernel-2.6.18-417.el5.orig/mm/swap.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/swap.c	2017-01-13 08:40:24.000000000 -0500
@@ -30,6 +30,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/init.h>
+#include <linux/rmap.h>
 
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
@@ -123,6 +124,23 @@ void fastcall activate_page(struct page 
 	spin_unlock_irq(&zone->lru_lock);
 }
 
+void deactivate_page(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+
+	spin_lock_irq(&zone->lru_lock);
+	if (PageLRU(page) && PageActive(page)) {
+		del_page_from_active_list(zone, page);
+		ClearPageActive(page);
+		add_page_to_inactive_list(zone, page);
+		__count_vm_event(PGDEACTIVATE);
+	}
+	spin_unlock_irq(&zone->lru_lock);
+	/* clear referenced bits */
+	page_referenced(page, 0);
+}
+EXPORT_SYMBOL(deactivate_page);
+
 static DEFINE_PER_CPU(struct pagevec, deactivate_pvecs) = { 0, };
 
 static void __pagevec_deactivate(struct pagevec *pvec)
@@ -263,6 +281,8 @@ void fastcall lru_cache_add_active(struc
 	put_cpu_var(lru_add_active_pvecs);
 }
 
+EXPORT_SYMBOL(lru_cache_add_active);
+
 static void __lru_add_drain(int cpu)
 {
 	struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu);
@@ -287,6 +307,8 @@ void lru_add_drain(void)
 	put_cpu();
 }
 
+EXPORT_SYMBOL(lru_add_drain);
+
 #ifdef CONFIG_NUMA
 static void lru_add_drain_per_cpu(void *dummy)
 {
diff -upr kernel-2.6.18-417.el5.orig/mm/swapfile.c kernel-2.6.18-417.el5-028stab121/mm/swapfile.c
--- kernel-2.6.18-417.el5.orig/mm/swapfile.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/swapfile.c	2017-01-13 08:40:24.000000000 -0500
@@ -32,6 +32,8 @@
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
 
+#include <ub/ub_vmpages.h>
+
 DEFINE_SPINLOCK(swap_lock);
 unsigned int nr_swapfiles;
 long total_swap_pages;
@@ -43,8 +45,12 @@ static const char Bad_offset[] = "Bad sw
 static const char Unused_offset[] = "Unused swap offset entry ";
 
 struct swap_list_t swap_list = {-1, -1};
+struct swap_info_struct swap_info[MAX_SWAPFILES];
 
-static struct swap_info_struct swap_info[MAX_SWAPFILES];
+EXPORT_SYMBOL(total_swap_pages);
+EXPORT_SYMBOL(swap_lock);
+EXPORT_SYMBOL(swap_list);
+EXPORT_SYMBOL(swap_info);
 
 static DEFINE_MUTEX(swapon_mutex);
 
@@ -171,7 +177,7 @@ no_page:
 	return 0;
 }
 
-swp_entry_t get_swap_page(void)
+swp_entry_t get_swap_page(struct user_beancounter *ub)
 {
 	struct swap_info_struct *si;
 	pgoff_t offset;
@@ -192,6 +198,8 @@ swp_entry_t get_swap_page(void)
 			wrapped++;
 		}
 
+		if (si->flags & SWP_READONLY)
+			continue;
 		if (!si->highest_bit)
 			continue;
 		if (!(si->flags & SWP_WRITEOK))
@@ -201,6 +209,7 @@ swp_entry_t get_swap_page(void)
 		offset = scan_swap_map(si);
 		if (offset) {
 			spin_unlock(&swap_lock);
+			ub_swapentry_inc(si, offset, ub);
 			return swp_entry(type, offset);
 		}
 		next = swap_list.next;
@@ -212,6 +221,8 @@ noswap:
 	return (swp_entry_t) {0};
 }
 
+EXPORT_SYMBOL(get_swap_page);
+
 swp_entry_t get_swap_page_of_type(int type)
 {
 	struct swap_info_struct *si;
@@ -219,7 +230,7 @@ swp_entry_t get_swap_page_of_type(int ty
 
 	spin_lock(&swap_lock);
 	si = swap_info + type;
-	if (si->flags & SWP_WRITEOK) {
+	if (si->flags & SWP_WRITEOK && !(si->flags & SWP_READONLY)) {
 		nr_swap_pages--;
 		offset = scan_swap_map(si);
 		if (offset) {
@@ -276,6 +287,7 @@ static int swap_entry_free(struct swap_i
 		count--;
 		p->swap_map[offset] = count;
 		if (!count) {
+			ub_swapentry_dec(p, offset);
 			if (offset < p->lowest_bit)
 				p->lowest_bit = offset;
 			if (offset > p->highest_bit)
@@ -304,6 +316,8 @@ void swap_free(swp_entry_t entry)
 	}
 }
 
+EXPORT_SYMBOL(swap_free);
+
 /*
  * How many references to page are currently swapped out?
  */
@@ -385,6 +399,55 @@ int remove_exclusive_swap_page(struct pa
 	return retval;
 }
 
+int try_to_remove_exclusive_swap_page(struct page *page)
+{
+	int retval;
+	struct swap_info_struct * p;
+	swp_entry_t entry;
+
+	BUG_ON(PagePrivate(page));
+	BUG_ON(!PageLocked(page));
+
+	if (!PageSwapCache(page))
+		return 0;
+	if (PageWriteback(page))
+		return 0;
+	if (page_count(page) != 2) /* 2: us + cache */
+		return 0;
+
+	entry.val = page->private;
+	p = swap_info_get(entry);
+	if (!p)
+		return 0;
+
+	if (!vm_swap_full() &&
+			(p->flags & (SWP_ACTIVE|SWP_READONLY)) == SWP_ACTIVE) {
+		spin_unlock(&swap_lock);
+		return 0;
+	}
+
+	/* Is the only swap cache user the cache itself? */
+	retval = 0;
+	if (p->swap_map[swp_offset(entry)] == 1) {
+		/* Recheck the page count with the swapcache lock held.. */
+		write_lock_irq(&swapper_space.tree_lock);
+		if ((page_count(page) == 2) && !PageWriteback(page)) {
+			__delete_from_swap_cache(page);
+			SetPageDirty(page);
+			retval = 1;
+		}
+		write_unlock_irq(&swapper_space.tree_lock);
+	}
+	spin_unlock(&swap_lock);
+
+	if (retval) {
+		swap_free(entry);
+		page_cache_release(page);
+	}
+
+	return retval;
+}
+
 /*
  * Free the swap entry like above, but also try to
  * free the page cache entry if it is the last user.
@@ -425,6 +488,8 @@ void free_swap_and_cache(swp_entry_t ent
 	}
 }
 
+EXPORT_SYMBOL(free_swap_and_cache);
+
 #ifdef CONFIG_SOFTWARE_SUSPEND
 /*
  * Find the swap type that corresponds to given device (if any)
@@ -487,11 +552,17 @@ unsigned int count_swap_pages(int type, 
  * force COW, vm_page_prot omits write permission from any private vma.
  */
 static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
-		unsigned long addr, swp_entry_t entry, struct page *page)
+		unsigned long addr, swp_entry_t entry, struct page *page,
+		struct page_beancounter **pb)
 {
-	inc_mm_counter(vma->vm_mm, anon_rss);
+	struct mm_struct *mm;
+
+	mm = vma->vm_mm;
+	inc_mm_counter(mm, anon_rss);
+	ub_unused_privvm_dec(mm, vma);
+	pb_add_ref(page, mm, pb);
 	get_page(page);
-	set_pte_at(vma->vm_mm, addr, pte,
+	set_pte_at(mm, addr, pte,
 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	page_add_anon_rmap(page, vma, addr);
 	swap_free(entry);
@@ -504,7 +575,8 @@ static void unuse_pte(struct vm_area_str
 
 static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pte_t swp_pte = swp_entry_to_pte(entry);
 	pte_t *pte;
@@ -518,7 +590,7 @@ static int unuse_pte_range(struct vm_are
 		 * Test inline before going to call unuse_pte.
 		 */
 		if (unlikely(pte_same(*pte, swp_pte))) {
-			unuse_pte(vma, pte++, addr, entry, page);
+			unuse_pte(vma, pte++, addr, entry, page, pb);
 			found = 1;
 			break;
 		}
@@ -529,7 +601,8 @@ static int unuse_pte_range(struct vm_are
 
 static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -539,7 +612,7 @@ static inline int unuse_pmd_range(struct
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		if (unuse_pte_range(vma, pmd, addr, next, entry, page))
+		if (unuse_pte_range(vma, pmd, addr, next, entry, page, pb))
 			return 1;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
@@ -547,7 +620,8 @@ static inline int unuse_pmd_range(struct
 
 static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -557,14 +631,15 @@ static inline int unuse_pud_range(struct
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		if (unuse_pmd_range(vma, pud, addr, next, entry, page))
+		if (unuse_pmd_range(vma, pud, addr, next, entry, page, pb))
 			return 1;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
 
 static int unuse_vma(struct vm_area_struct *vma,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pgd_t *pgd;
 	unsigned long addr, end, next;
@@ -585,14 +660,15 @@ static int unuse_vma(struct vm_area_stru
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		if (unuse_pud_range(vma, pgd, addr, next, entry, page))
+		if (unuse_pud_range(vma, pgd, addr, next, entry, page, pb))
 			return 1;
 	} while (pgd++, addr = next, addr != end);
 	return 0;
 }
 
 static int unuse_mm(struct mm_struct *mm,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	struct vm_area_struct *vma;
 
@@ -607,7 +683,7 @@ static int unuse_mm(struct mm_struct *mm
 		lock_page(page);
 	}
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		if (vma->anon_vma && unuse_vma(vma, entry, page))
+		if (vma->anon_vma && unuse_vma(vma, entry, page, pb))
 			break;
 	}
 	up_read(&mm->mmap_sem);
@@ -673,6 +749,7 @@ static int try_to_unuse(unsigned int typ
 	int retval = 0;
 	int reset_overflow = 0;
 	int shmem;
+	struct page_beancounter *pb;
 
 	/*
 	 * When searching mms for an entry, a good strategy is to
@@ -724,6 +801,13 @@ static int try_to_unuse(unsigned int typ
 			break;
 		}
 
+		pb = NULL;
+		if (pb_alloc_all(&pb)) {
+			page_cache_release(page);
+			retval = -ENOMEM;
+			break;
+		}
+
 		/*
 		 * Don't hold on to start_mm if it looks like exiting.
 		 */
@@ -746,6 +830,20 @@ static int try_to_unuse(unsigned int typ
 		lock_page(page);
 		wait_on_page_writeback(page);
 
+		/* If read failed we cannot map not-uptodate page to 
+		 * user space. Actually, we are in serious troubles,
+		 * we do not even know what process to kill. So, the only
+		 * variant remains: to stop swapoff() and allow someone
+		 * to kill processes to zap invalid pages.
+		 */
+		if (unlikely(!PageUptodate(page))) {
+			pb_free_list(&pb);
+			unlock_page(page);
+			page_cache_release(page);
+			retval = -EIO;
+			break;
+		}
+
 		/*
 		 * Remove all references to entry.
 		 * Whenever we reach init_mm, there's no address space
@@ -757,7 +855,7 @@ static int try_to_unuse(unsigned int typ
 			if (start_mm == &init_mm)
 				shmem = shmem_unuse(entry, page);
 			else
-				retval = unuse_mm(start_mm, entry, page);
+				retval = unuse_mm(start_mm, entry, page, &pb);
 		}
 		if (*swap_map > 1) {
 			int set_start_mm = (*swap_map >= swcount);
@@ -787,7 +885,7 @@ static int try_to_unuse(unsigned int typ
 					set_start_mm = 1;
 					shmem = shmem_unuse(entry, page);
 				} else
-					retval = unuse_mm(mm, entry, page);
+					retval = unuse_mm(mm, entry, page, &pb);
 				if (set_start_mm && *swap_map < swcount) {
 					mmput(new_start_mm);
 					atomic_inc(&mm->mm_users);
@@ -801,6 +899,8 @@ static int try_to_unuse(unsigned int typ
 			mmput(start_mm);
 			start_mm = new_start_mm;
 		}
+
+		pb_free_list(&pb);
 		if (retval) {
 			unlock_page(page);
 			page_cache_release(page);
@@ -1146,6 +1246,10 @@ asmlinkage long sys_swapoff(const char _
 	int i, type, prev;
 	int err;
 	
+	/* VE admin check is just to be on the safe side, the admin may affect
+	 * swaps only if he has access to special, i.e. if he has been granted
+	 * access to the block device or if the swap file is in the area
+	 * visible to him. */
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
@@ -1245,6 +1349,7 @@ asmlinkage long sys_swapoff(const char _
 	spin_unlock(&swap_lock);
 	mutex_unlock(&swapon_mutex);
 	vfree(swap_map);
+	ub_swap_fini(p);
 	inode = mapping->host;
 	if (S_ISBLK(inode->i_mode)) {
 		struct block_device *bdev = I_BDEV(inode);
@@ -1264,6 +1369,8 @@ out:
 	return err;
 }
 
+EXPORT_SYMBOL(sys_swapoff);
+
 #ifdef CONFIG_PROC_FS
 /* iterator */
 static void *swap_start(struct seq_file *swap, loff_t *pos)
@@ -1332,23 +1439,57 @@ static struct seq_operations swaps_op = 
 	.show =		swap_show
 };
 
+#include <linux/virtinfo.h>
+
+static int swap_show_ve(struct seq_file *swap, void *v)
+{
+	struct meminfo mi;
+
+	memset(&mi, 0, sizeof(mi));
+	si_swapinfo(&mi.si);
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
+			& NOTIFY_FAIL)
+		goto out;
+
+	seq_printf(swap, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
+	if (!mi.si.totalswap)
+		goto out;
+	seq_printf(swap, "%-40s%s\t%lu\t%lu\t%d\n",
+			"/dev/null",
+			"partition",
+			mi.si.totalswap  << (PAGE_SHIFT - 10),
+			(mi.si.totalswap - mi.si.freeswap) << (PAGE_SHIFT - 10),
+			-1);
+out:
+	return 0;
+}
+
 static int swaps_open(struct inode *inode, struct file *file)
 {
+	if (!ve_is_super(get_exec_env()))
+		return single_open(file, &swap_show_ve, NULL);
 	return seq_open(file, &swaps_op);
 }
 
+static int swaps_release(struct inode *inode, struct file *file)
+{
+	if (!ve_is_super(file->owner_env))
+		return single_release(inode, file);
+	return seq_release(inode, file);
+}
+
 static struct file_operations proc_swaps_operations = {
 	.open		= swaps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release,
+	.release	= swaps_release,
 };
 
 static int __init procswaps_init(void)
 {
 	struct proc_dir_entry *entry;
 
-	entry = create_proc_entry("swaps", 0, NULL);
+	entry = create_proc_glob_entry("swaps", 0, NULL);
 	if (entry)
 		entry->proc_fops = &proc_swaps_operations;
 	return 0;
@@ -1589,9 +1730,16 @@ asmlinkage long sys_swapon(const char __
 		goto bad_swap;
 	}
 
+	if (ub_swap_init(p, maxpages)) {
+		error = -ENOMEM;
+		goto bad_swap;
+	}
+
 	mutex_lock(&swapon_mutex);
 	spin_lock(&swap_lock);
 	p->flags = SWP_ACTIVE;
+	if (swap_flags & SWAP_FLAG_READONLY)
+		p->flags |= SWP_READONLY;
 	nr_swap_pages += nr_good_pages;
 	total_swap_pages += nr_good_pages;
 
@@ -1651,6 +1799,8 @@ out:
 	return error;
 }
 
+EXPORT_SYMBOL(sys_swapon);
+
 void si_swapinfo(struct sysinfo *val)
 {
 	unsigned int i;
@@ -1710,6 +1860,8 @@ bad_file:
 	goto out;
 }
 
+EXPORT_SYMBOL(swap_duplicate);
+
 struct swap_info_struct *
 get_swap_info_struct(unsigned type)
 {
diff -upr kernel-2.6.18-417.el5.orig/mm/swap_state.c kernel-2.6.18-417.el5-028stab121/mm/swap_state.c
--- kernel-2.6.18-417.el5.orig/mm/swap_state.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/swap_state.c	2017-01-13 08:40:24.000000000 -0500
@@ -19,6 +19,9 @@
 
 #include <asm/pgtable.h>
 
+#include <ub/ub_vmpages.h>
+#include <ub/io_acct.h>
+
 /*
  * swapper_space is a fiction, retained to simplify the path through
  * vmscan's shrink_list, to make sync_page look nicer, and to allow
@@ -43,6 +46,7 @@ struct address_space swapper_space = {
 	.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
 	.backing_dev_info = &swap_backing_dev_info,
 };
+EXPORT_SYMBOL(swapper_space);
 
 #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
 
@@ -53,14 +57,18 @@ static struct {
 	unsigned long find_total;
 	unsigned long noent_race;
 	unsigned long exist_race;
+	unsigned long remove_race;
 } swap_cache_info;
+EXPORT_SYMBOL(swap_cache_info);
 
 void show_swap_cache_info(void)
 {
-	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
+	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, "
+		"race %lu+%lu+%lu\n",
 		swap_cache_info.add_total, swap_cache_info.del_total,
 		swap_cache_info.find_success, swap_cache_info.find_total,
-		swap_cache_info.noent_race, swap_cache_info.exist_race);
+		swap_cache_info.noent_race, swap_cache_info.exist_race,
+		swap_cache_info.remove_race);
 	printk("Free swap  = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10));
 	printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
 }
@@ -69,8 +77,7 @@ void show_swap_cache_info(void)
  * __add_to_swap_cache resembles add_to_page_cache on swapper_space,
  * but sets SwapCache flag and private instead of mapping and index.
  */
-static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
-			       gfp_t gfp_mask)
+int __add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 {
 	int error;
 
@@ -95,7 +102,9 @@ static int __add_to_swap_cache(struct pa
 	return error;
 }
 
-static int add_to_swap_cache(struct page *page, swp_entry_t entry)
+EXPORT_SYMBOL(__add_to_swap_cache);
+
+int add_to_swap_cache(struct page *page, swp_entry_t entry)
 {
 	int error;
 
@@ -117,6 +126,8 @@ static int add_to_swap_cache(struct page
 	return 0;
 }
 
+EXPORT_SYMBOL(add_to_swap_cache);
+
 /*
  * This must be called only on pages that have
  * been verified to be in the swap cache.
@@ -151,7 +162,14 @@ int add_to_swap(struct page * page, gfp_
 	BUG_ON(!PageLocked(page));
 
 	for (;;) {
-		entry = get_swap_page();
+		struct user_beancounter *ub;
+
+		ub = pb_grab_page_ub(page);
+		if (IS_ERR(ub))
+			return 0;
+
+		entry = get_swap_page(ub);
+		put_beancounter(ub);
 		if (!entry.val)
 			return 0;
 
@@ -252,10 +270,13 @@ int move_from_swap_cache(struct page *pa
  */
 static inline void free_swap_cache(struct page *page)
 {
-	if (PageSwapCache(page) && !TestSetPageLocked(page)) {
+	if (!PageSwapCache(page))
+		return;
+	if (!TestSetPageLocked(page)) {
 		remove_exclusive_swap_page(page);
 		unlock_page(page);
-	}
+	} else
+		INC_CACHE_INFO(remove_race);
 }
 
 /* 
@@ -364,3 +385,5 @@ struct page *read_swap_cache_async(swp_e
 		page_cache_release(new_page);
 	return found_page;
 }
+
+EXPORT_SYMBOL(read_swap_cache_async);
diff -upr kernel-2.6.18-417.el5.orig/mm/usercopy.c kernel-2.6.18-417.el5-028stab121/mm/usercopy.c
--- kernel-2.6.18-417.el5.orig/mm/usercopy.c	2017-01-13 08:40:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/usercopy.c	2017-01-13 08:40:14.000000000 -0500
@@ -0,0 +1,307 @@
+/*
+ * linux/mm/usercopy.c
+ *
+ * (C) Copyright 2003 Ingo Molnar
+ *
+ * Generic implementation of all the user-VM access functions, without
+ * relying on being able to access the VM directly.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/ptrace.h>
+#include <linux/interrupt.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/atomic_kmap.h>
+
+/*
+ * Get kernel address of the user page and pin it.
+ */
+static inline struct page *pin_page(unsigned long addr, int write,
+				    pte_t *pte, spinlock_t **ptlp)
+{
+	struct mm_struct *mm = current->mm ? : &init_mm;
+	struct page *page = NULL;
+	int ret;
+
+	/*
+	 * Do a quick atomic lookup first - this is the fastpath.
+	 */
+retry:
+	page = follow_page_pte(mm, addr, write, pte, ptlp);
+	if (likely(page != NULL)) {
+		get_page(page);
+		return page;
+	}
+	if (pte_present(*pte))
+		return NULL;
+	/*
+	 * No luck - bad address or need to fault in the page:
+	 */
+
+	/*
+	 * In the context of filemap_copy_from_user(), we are not allowed
+	 * to sleep.  We must fail this usercopy attempt and allow
+	 * filemap_copy_from_user() to recover: drop its atomic kmap and use
+	 * a sleeping kmap instead.
+	 */
+	if (in_atomic())
+		return NULL;
+
+	down_read(&mm->mmap_sem);
+	ret = get_user_pages(current, mm, addr, 1, write, 0, NULL, NULL);
+	up_read(&mm->mmap_sem);
+
+	if (ret <= 0)
+		return NULL;
+
+	/*
+	 * Go try the follow_page again.
+	 */
+	goto retry;
+}
+
+static inline void unpin_page(struct page *page)
+{
+	put_page(page);
+}
+
+/*
+ * map user space page to kernel space
+ * return NULL in case of failure
+ */
+void* __kmap_atomic_user_page(unsigned long addr,
+		struct page** page, spinlock_t **ptlp, int write)
+{
+	pte_t pte;
+	void* maddr;
+
+	*page = pin_page(addr, write, &pte, ptlp);
+	if (!(*page) && !pte_present(pte))
+		return NULL;
+
+	if (*page)
+		maddr = kmap_atomic(*page, KM_USER_COPY);
+	else
+		/* we will map with user pte */
+		maddr = kmap_atomic_pte(&pte, KM_USER_COPY);
+
+	return maddr;
+}
+
+void __kunmap_atomic_user_page(void* maddr, struct page* page,
+		spinlock_t *ptlp)
+{
+	kunmap_atomic(maddr, KM_USER_COPY);
+	spin_unlock(ptlp);
+	if (page)
+		unpin_page(page);
+}
+
+/*
+ * Access another process' address space.
+ * Source/target buffer must be kernel space,
+ * Do not walk the page table directly, use get_user_pages
+ */
+static int rw_vm(unsigned long addr, void *buf, int len, int write)
+{
+	spinlock_t *ptlp;
+
+	if (!len)
+		return 0;
+
+	/* ignore errors, just check how much was sucessfully transfered */
+	while (len) {
+		struct page *page = NULL;
+		int bytes, offset;
+		void *maddr;
+
+		maddr = __kmap_atomic_user_page(addr, &page, &ptlp, write);
+		if (!maddr)
+			break;
+
+		bytes = len;
+		offset = addr & (PAGE_SIZE-1);
+		if (bytes > PAGE_SIZE-offset)
+			bytes = PAGE_SIZE-offset;
+
+#define HANDLE_TYPE(type) \
+	case sizeof(type): *(type *)(maddr+offset) = *(type *)(buf); break;
+
+		if (write) {
+			switch (bytes) {
+			HANDLE_TYPE(char);
+			HANDLE_TYPE(int);
+			HANDLE_TYPE(long long);
+			default:
+				memcpy(maddr + offset, buf, bytes);
+			}
+		} else {
+#undef HANDLE_TYPE
+#define HANDLE_TYPE(type) \
+	case sizeof(type): *(type *)(buf) = *(type *)(maddr+offset); break;
+			switch (bytes) {
+			HANDLE_TYPE(char);
+			HANDLE_TYPE(int);
+			HANDLE_TYPE(long long);
+			default:
+				memcpy(buf, maddr + offset, bytes);
+			}
+#undef HANDLE_TYPE
+		}
+		__kunmap_atomic_user_page(maddr, page, ptlp);
+		len -= bytes;
+		buf += bytes;
+		addr += bytes;
+	}
+
+	return len;
+}
+
+static int str_vm(unsigned long addr, void *buf0, int len, int copy)
+{
+	struct page *page = NULL;
+	spinlock_t *ptlp;
+	void *buf = buf0;
+
+	if (!len)
+		return len;
+
+	/* ignore errors, just check how much was sucessfully transfered */
+	while (len) {
+		int bytes, offset, left, copied;
+		char *maddr;
+
+		maddr = __kmap_atomic_user_page(addr, &page, &ptlp, copy == 2);
+		if (!maddr)
+			return -EFAULT;
+
+		bytes = len;
+		offset = addr & (PAGE_SIZE-1);
+		if (bytes > PAGE_SIZE-offset)
+			bytes = PAGE_SIZE-offset;
+
+		if (copy == 2) {
+			memset(maddr + offset, 0, bytes);
+			copied = bytes;
+			left = 0;
+		} else if (copy == 1) {
+			left = strncpy_count(buf, maddr + offset, bytes);
+			copied = bytes - left;
+		} else {
+			copied = strnlen(maddr + offset, bytes);
+			left = bytes - copied;
+		}
+		BUG_ON(bytes < 0 || copied < 0);
+
+		__kunmap_atomic_user_page(maddr, page, ptlp);
+
+		len -= copied;
+		buf += copied;
+		addr += copied;
+		if (left)
+			break;
+	}
+
+	return len;
+}
+
+/*
+ * Copies memory from userspace (ptr) into kernelspace (val).
+ *
+ * returns # of bytes not copied.
+ */
+int get_user_size(unsigned int size, void *val, const void *ptr)
+{
+	int ret;
+
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
+		ret = __direct_copy_from_user_inatomic(val, ptr, size);
+	else
+		ret = rw_vm((unsigned long)ptr, val, size, 0);
+	if (ret)
+		/*
+		 * Zero the rest:
+		 */
+		memset(val + size - ret, 0, ret);
+	return ret;
+}
+
+/*
+ * Copies memory from kernelspace (val) into userspace (ptr).
+ *
+ * returns # of bytes not copied.
+ */
+int put_user_size(unsigned int size, const void *val, void *ptr)
+{
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
+		return __direct_copy_to_user_inatomic(ptr, val, size);
+	else
+		return rw_vm((unsigned long)ptr, (void *)val, size, 1);
+}
+
+int copy_str_fromuser_size(unsigned int size, void *val, const void *ptr)
+{
+	int copied, left;
+
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
+		left = strncpy_count(val, ptr, size);
+		copied = size - left;
+		BUG_ON(copied < 0);
+
+		return copied;
+	}
+	left = str_vm((unsigned long)ptr, val, size, 1);
+	if (left < 0)
+		return left;
+	copied = size - left;
+	BUG_ON(copied < 0);
+
+	return copied;
+}
+
+int strlen_fromuser_size(unsigned int size, const void *ptr)
+{
+	int copied, left;
+
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
+		copied = strnlen(ptr, size) + 1;
+		BUG_ON(copied < 0);
+
+		return copied;
+	}
+	left = str_vm((unsigned long)ptr, NULL, size, 0);
+	if (left < 0)
+		return 0;
+	copied = size - left + 1;
+	BUG_ON(copied < 0);
+
+	return copied;
+}
+
+int zero_user_size(unsigned int size, void *ptr)
+{
+	int left;
+
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
+		memset(ptr, 0, size);
+		return 0;
+	}
+	left = str_vm((unsigned long)ptr, NULL, size, 2);
+	if (left < 0)
+		return size;
+	return left;
+}
+
+EXPORT_SYMBOL(get_user_size);
+EXPORT_SYMBOL(put_user_size);
+EXPORT_SYMBOL(zero_user_size);
+EXPORT_SYMBOL(copy_str_fromuser_size);
+EXPORT_SYMBOL(strlen_fromuser_size);
diff -upr kernel-2.6.18-417.el5.orig/mm/vmalloc.c kernel-2.6.18-417.el5-028stab121/mm/vmalloc.c
--- kernel-2.6.18-417.el5.orig/mm/vmalloc.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/vmalloc.c	2017-01-13 08:40:20.000000000 -0500
@@ -20,6 +20,9 @@
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_mem.h>
+#include <ub/ub_debug.h>
+
 
 DEFINE_RWLOCK(vmlist_lock);
 struct vm_struct *vmlist;
@@ -269,6 +272,70 @@ static struct vm_struct *__find_vm_area(
 	return tmp;
 }
 
+struct vm_struct * get_vm_area_best(unsigned long size, unsigned long flags)
+{
+	unsigned long addr, best_addr, delta, best_delta;
+	struct vm_struct **p, **best_p, *tmp, *area;
+
+	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		return NULL;
+
+	size += PAGE_SIZE; /* one-page gap at the end */
+	addr = VMALLOC_START;
+	best_addr = 0UL;
+	best_p = NULL;
+	best_delta = PAGE_ALIGN(VMALLOC_END) - VMALLOC_START;
+
+	write_lock(&vmlist_lock);
+	for (p = &vmlist; (tmp = *p) &&
+			(tmp->addr <= (void *)PAGE_ALIGN(VMALLOC_END));
+			p = &tmp->next) {
+		if ((unsigned long)tmp->addr < addr)
+			continue;
+		if ((size + addr) < addr)
+			break;
+		delta = (unsigned long) tmp->addr - (size + addr);
+		if (delta < best_delta) {
+			best_delta = delta;
+			best_addr = addr;
+			best_p = p;
+		}
+		addr = tmp->size + (unsigned long) tmp->addr;
+		if (addr > VMALLOC_END-size)
+			break;
+	}
+
+	if (!tmp || (tmp->addr > (void *)PAGE_ALIGN(VMALLOC_END))) {
+		/* check free area after list end */
+		delta = (unsigned long) PAGE_ALIGN(VMALLOC_END) - (size + addr);
+		if (delta < best_delta) {
+			best_delta = delta;
+			best_addr = addr;
+			best_p = p;
+		}
+	}
+	if (best_addr) {
+		area->flags = flags;
+		/* allocate at the end of this area */
+		area->addr = (void *)(best_addr + best_delta);
+		area->size = size;
+		area->next = *best_p;
+		area->pages = NULL;
+		area->nr_pages = 0;
+		area->phys_addr = 0;
+		*best_p = area;
+		/* check like in __vunmap */
+		WARN_ON((PAGE_SIZE - 1) & (unsigned long)area->addr);
+	} else {
+		kfree(area);
+		area = NULL;
+	}
+	write_unlock(&vmlist_lock);
+
+	return area;
+}
+
 /* Caller must hold vmlist_lock */
 struct vm_struct *__remove_vm_area(void *addr)
 {
@@ -309,7 +376,7 @@ struct vm_struct *remove_vm_area(void *a
 	return v;
 }
 
-void __vunmap(void *addr, int deallocate_pages)
+void __vunmap(void *addr, int deallocate_pages, int uncharge)
 {
 	struct vm_struct *area;
 
@@ -335,6 +402,8 @@ void __vunmap(void *addr, int deallocate
 	if (deallocate_pages) {
 		int i;
 
+		if (uncharge)
+			dec_vmalloc_charged(area);
 		for (i = 0; i < area->nr_pages; i++) {
 			BUG_ON(!area->pages[i]);
 			__free_page(area->pages[i]);
@@ -364,7 +433,7 @@ void __vunmap(void *addr, int deallocate
 void vfree(void *addr)
 {
 	BUG_ON(in_interrupt());
-	__vunmap(addr, 1);
+	__vunmap(addr, 1, 1);
 }
 EXPORT_SYMBOL(vfree);
 
@@ -381,7 +450,7 @@ EXPORT_SYMBOL(vfree);
 void vunmap(void *addr)
 {
 	BUG_ON(in_interrupt());
-	__vunmap(addr, 0);
+	__vunmap(addr, 0, 0);
 }
 EXPORT_SYMBOL(vunmap);
 
@@ -457,10 +526,12 @@ void *__vmalloc_area_node(struct vm_stru
 
 	if (map_vm_area(area, prot, &pages))
 		goto fail;
+
+	inc_vmalloc_charged(area, gfp_mask);
 	return area->addr;
 
 fail:
-	vfree(area->addr);
+	__vunmap(area->addr, 1, 0);
 	return NULL;
 }
 
@@ -510,6 +581,21 @@ static inline void *__vmalloc_node_flags
 	return __vmalloc_node(size, flags, PAGE_KERNEL, node);
 }
 
+static void *____vmalloc(unsigned long size, gfp_t mask, pgprot_t prot)
+{
+	struct vm_struct *area;
+
+	size = PAGE_ALIGN(size);
+	if (!size || (size >> PAGE_SHIFT) > num_physpages)
+		return NULL;
+
+	area = get_vm_area_best(size, VM_ALLOC);
+	if (!area)
+		return NULL;
+
+	return __vmalloc_area_node(area, mask, prot, -1);
+}
+
 /**
  *	vmalloc  -  allocate virtually contiguous memory
  *
@@ -527,6 +613,26 @@ void *vmalloc(unsigned long size)
 }
 EXPORT_SYMBOL(vmalloc);
 
+void *ub_vmalloc(unsigned long size)
+{
+	return __vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+EXPORT_SYMBOL(ub_vmalloc);
+
+void *vmalloc_best(unsigned long size)
+{
+       return ____vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+
+EXPORT_SYMBOL(vmalloc_best);
+
+void *ub_vmalloc_best(unsigned long size)
+{
+       return ____vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+
+EXPORT_SYMBOL(ub_vmalloc_best);
+
 /**
  *	vzalloc - allocate virtually contiguous memory with zero fill
  *	@size:	allocation size
@@ -602,6 +708,12 @@ void *vzalloc_node(unsigned long size, i
 }
 EXPORT_SYMBOL(vzalloc_node);
 
+void *ub_vmalloc_node(unsigned long size, int node)
+{
+	return __vmalloc_node(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL, node);
+}
+EXPORT_SYMBOL(ub_vmalloc_node);
+
 #ifndef PAGE_KERNEL_EXEC
 # define PAGE_KERNEL_EXEC PAGE_KERNEL
 #endif
@@ -795,3 +907,36 @@ out_einval_locked:
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
 
+void vprintstat(void)
+{
+	struct vm_struct *p, *last_p = NULL;
+	unsigned long addr, size, free_size, max_free_size;
+	int num;
+
+	addr = VMALLOC_START;
+	size = max_free_size = 0;
+	num = 0;
+
+	read_lock(&vmlist_lock);
+	for (p = vmlist; p; p = p->next) {
+		free_size = (unsigned long)p->addr - addr;
+		if (free_size > max_free_size)
+			max_free_size = free_size;
+		addr = (unsigned long)p->addr + p->size;
+		size += p->size;
+		++num;
+		last_p = p;		
+	}
+	if (last_p) {
+		free_size = VMALLOC_END -
+			((unsigned long)last_p->addr + last_p->size);
+		if (free_size > max_free_size)
+			max_free_size = free_size;
+	}
+	read_unlock(&vmlist_lock);
+
+	printk("VMALLOC Used: %luKB Total: %luKB Entries: %d\n"
+			"    Max_Free: %luKB Start: %lx End: %lx\n",
+			size/1024, (VMALLOC_END - VMALLOC_START)/1024, num,
+			max_free_size/1024, VMALLOC_START, VMALLOC_END);
+}
diff -upr kernel-2.6.18-417.el5.orig/mm/vmscan.c kernel-2.6.18-417.el5-028stab121/mm/vmscan.c
--- kernel-2.6.18-417.el5.orig/mm/vmscan.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/vmscan.c	2017-01-13 08:40:19.000000000 -0500
@@ -36,6 +36,9 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 
+#include <ub/ub_oom.h>
+#include <ub/io_acct.h>
+
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
 
@@ -183,6 +186,9 @@ unsigned long shrink_slab(unsigned long 
 	if (scanned == 0)
 		scanned = SWAP_CLUSTER_MAX;
 
+	if (unlikely(test_tsk_thread_flag(current, TIF_MEMDIE)))
+		return 1;
+
 	if (!down_read_trylock(&shrinker_rwsem))
 		return 1;	/* Assume we'll be able to shrink next time */
 
@@ -217,6 +223,9 @@ unsigned long shrink_slab(unsigned long 
 			int shrink_ret;
 			int nr_before;
 
+			if (unlikely(test_tsk_thread_flag(current, TIF_MEMDIE)))
+				goto done;
+
 			nr_before = (*shrinker->shrinker)(0, gfp_mask);
 			shrink_ret = (*shrinker->shrinker)(this_scan, gfp_mask);
 			if (shrink_ret == -1)
@@ -231,6 +240,7 @@ unsigned long shrink_slab(unsigned long 
 
 		shrinker->nr += total_scan;
 	}
+done:
 	up_read(&shrinker_rwsem);
 	return ret;
 }
@@ -363,9 +373,12 @@ static pageout_t pageout(struct page *pa
 			.nonblocking = 1,
 			.for_reclaim = 1,
 		};
+		struct user_beancounter *old_ub;
 
 		SetPageReclaim(page);
+		old_ub = set_exec_ub(get_ub0());
 		res = mapping->a_ops->writepage(page, &wbc);
+		(void)set_exec_ub(old_ub);
 		if (res < 0)
 			handle_write_error(mapping, page, res);
 		if (res == AOP_WRITEPAGE_ACTIVATE) {
@@ -662,7 +675,7 @@ static unsigned long shrink_inactive_lis
 		nr_taken = isolate_lru_pages(sc->swap_cluster_max,
 					     &zone->inactive_list,
 					     &page_list, &nr_scan);
-		zone->nr_inactive -= nr_taken;
+		__mod_zone_page_state(zone, NR_INACTIVE, -nr_taken);
 		zone->pages_scanned += nr_scan;
 		spin_unlock_irq(&zone->lru_lock);
 
@@ -724,7 +737,8 @@ static inline void note_zone_scanning_pr
 
 static inline int zone_is_near_oom(struct zone *zone)
 {
-	return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3;
+	return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE)
+				+ zone_page_state(zone, NR_INACTIVE))*3;
 }
 
 /*
@@ -804,12 +818,13 @@ force_reclaim_mapped:
 			reclaim_mapped = 1;
 	}
 
+	{KSTAT_PERF_ENTER(refill_inact)
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
 	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
 				    &l_hold, &pgscanned);
 	zone->pages_scanned += pgscanned;
-	zone->nr_active -= pgmoved;
+	__mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
 	spin_unlock_irq(&zone->lru_lock);
 
 	while (!list_empty(&l_hold)) {
@@ -818,7 +833,8 @@ force_reclaim_mapped:
 		list_del(&page->lru);
 		if (page_mapped(page)) {
 			if (!reclaim_mapped ||
-			    (total_swap_pages == 0 && PageAnon(page)) ||
+			    (PageAnon(page) && !PageSwapCache(page) &&
+			     nr_swap_pages <= 0) ||
 			    page_referenced(page, 0)) {
 				list_add(&page->lru, &l_active);
 				trace_mm_pagereclaim_shrinkactive_a2a(page);
@@ -843,7 +859,7 @@ force_reclaim_mapped:
 		pgmoved++;
 		trace_mm_pagereclaim_shrinkactive_a2i(page);
 		if (!pagevec_add(&pvec, page)) {
-			zone->nr_inactive += pgmoved;
+			__mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
 			spin_unlock_irq(&zone->lru_lock);
 			pgdeactivate += pgmoved;
 			pgmoved = 0;
@@ -853,7 +869,7 @@ force_reclaim_mapped:
 			spin_lock_irq(&zone->lru_lock);
 		}
 	}
-	zone->nr_inactive += pgmoved;
+	__mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
 	pgdeactivate += pgmoved;
 	if (buffer_heads_over_limit) {
 		spin_unlock_irq(&zone->lru_lock);
@@ -871,14 +887,14 @@ force_reclaim_mapped:
 		list_move(&page->lru, &zone->active_list);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
-			zone->nr_active += pgmoved;
+			__mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
 			pgmoved = 0;
 			spin_unlock_irq(&zone->lru_lock);
 			__pagevec_release(&pvec);
 			spin_lock_irq(&zone->lru_lock);
 		}
 	}
-	zone->nr_active += pgmoved;
+	__mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
 
 	__count_zone_vm_events(PGREFILL, zone, pgscanned);
 	__count_vm_events(PGDEACTIVATE, pgdeactivate);
@@ -886,6 +902,7 @@ force_reclaim_mapped:
 
 	trace_mm_pagereclaim_shrinkactive(pgscanned);
 	pagevec_release(&pvec);
+	KSTAT_PERF_LEAVE(refill_inact)}
 }
 
 /*
@@ -912,14 +929,16 @@ static void shrink_zone(int priority, st
 	 * Add one to `nr_to_scan' just to make sure that the kernel will
 	 * slowly sift through the active list.
 	 */
-	zone->nr_scan_active += (zone->nr_active >> priority) + 1;
+	zone->nr_scan_active +=
+		(zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
 	nr_active = zone->nr_scan_active;
 	if (nr_active >= swap_cluster_max)
 		zone->nr_scan_active = 0;
 	else
 		nr_active = 0;
 
-	zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1;
+	zone->nr_scan_inactive +=
+		(zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
 	nr_inactive = zone->nr_scan_inactive;
 	if (nr_inactive >= swap_cluster_max)
 		zone->nr_scan_inactive = 0;
@@ -935,6 +954,8 @@ static void shrink_zone(int priority, st
 			nr_to_scan = min(nr_active,
 					(unsigned long)swap_cluster_max);
 			nr_active -= nr_to_scan;
+			if (unlikely(test_tsk_thread_flag(current, TIF_MEMDIE)))
+				goto done;
 			shrink_active_list(nr_to_scan, zone, sc, priority);
 		}
 
@@ -942,6 +963,8 @@ static void shrink_zone(int priority, st
 			nr_to_scan = min(nr_inactive,
 					(unsigned long)swap_cluster_max);
 			nr_inactive -= nr_to_scan;
+			if (unlikely(test_tsk_thread_flag(current, TIF_MEMDIE)))
+				goto done;
 			nr_reclaimed += shrink_inactive_list(nr_to_scan,
 						zone, sc);
 		}
@@ -964,6 +987,7 @@ out:
 
 	throttle_vm_writeout(sc->gfp_mask);
 
+done:
 	trace_mm_pagereclaim_shrinkzone(nr_reclaimed);
 }
 
@@ -1006,6 +1030,9 @@ static void shrink_zones(int priority, s
 		sc->all_unreclaimable = 0;
 
 		shrink_zone(priority, zone, sc);
+
+		if (unlikely(test_tsk_thread_flag(current, TIF_MEMDIE)))
+			break;
 	}
 	trace_mm_directreclaim_reclaimall(priority);
 }
@@ -1039,15 +1066,18 @@ unsigned long try_to_free_pages(struct z
 		.swappiness = vm_swappiness,
 	};
 
+	KSTAT_PERF_ENTER(ttfp);
 	count_vm_event(ALLOCSTALL);
 
+	ub_oom_start();
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *zone = zones[i];
 
 		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
 			continue;
 
-		lru_pages += zone->nr_active + zone->nr_inactive;
+		lru_pages += zone_page_state(zone, NR_ACTIVE)
+				+ zone_page_state(zone, NR_INACTIVE);
 	}
 
 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
@@ -1081,6 +1111,11 @@ unsigned long try_to_free_pages(struct z
 			sc.may_writepage = 1;
 		}
 
+		if (unlikely(test_tsk_thread_flag(current, TIF_MEMDIE))) {
+			ret = 1;
+			goto out;
+		}
+
 		/* Take a nap, wait for some writeback to complete */
 		if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
 			blk_congestion_wait(WRITE, HZ/10);
@@ -1107,6 +1142,7 @@ out:
 
 		zone->prev_priority = priority;
 	}
+	KSTAT_PERF_LEAVE(ttfp);
 	return ret;
 }
 
@@ -1193,7 +1229,8 @@ scan:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 
-			lru_pages += zone->nr_active + zone->nr_inactive;
+			lru_pages += zone_page_state(zone, NR_ACTIVE)
+					+ zone_page_state(zone, NR_INACTIVE);
 		}
 
 		/*
@@ -1236,8 +1273,9 @@ scan:
 			if (zone->all_unreclaimable)
 				continue;
 			if (nr_slab == 0 && zone->pages_scanned >=
-				    (zone->nr_active + zone->nr_inactive) * 6)
-				zone->all_unreclaimable = 1;
+				(zone_page_state(zone, NR_ACTIVE)
+				+ zone_page_state(zone, NR_INACTIVE)) * 6)
+					zone->all_unreclaimable = 1;
 			/*
 			 * If we've done a decent amount of scanning and
 			 * the reclaim ratio is low, start doing writepage
@@ -1418,18 +1456,22 @@ static unsigned long shrink_all_zones(un
 
 		/* For pass = 0 we don't shrink the active list */
 		if (pass > 0) {
-			zone->nr_scan_active += (zone->nr_active >> prio) + 1;
+			zone->nr_scan_active +=
+				(zone_page_state(zone, NR_ACTIVE) >> prio) + 1;
 			if (zone->nr_scan_active >= nr_pages || pass > 3) {
 				zone->nr_scan_active = 0;
-				nr_to_scan = min(nr_pages, zone->nr_active);
+				nr_to_scan = min(nr_pages,
+					zone_page_state(zone, NR_ACTIVE));
 				shrink_active_list(nr_to_scan, zone, sc, prio);
 			}
 		}
 
-		zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1;
+		zone->nr_scan_inactive +=
+			(zone_page_state(zone, NR_INACTIVE) >> prio) + 1;
 		if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
 			zone->nr_scan_inactive = 0;
-			nr_to_scan = min(nr_pages, zone->nr_inactive);
+			nr_to_scan = min(nr_pages,
+				zone_page_state(zone, NR_INACTIVE));
 			ret += shrink_inactive_list(nr_to_scan, zone, sc);
 			if (ret >= nr_pages)
 				return ret;
@@ -1439,6 +1481,12 @@ static unsigned long shrink_all_zones(un
 	return ret;
 }
 
+
+static unsigned long count_lru_pages(void)
+{
+	return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
+}
+
 /*
  * Try to free `nr_pages' of memory, system-wide, and return the number of
  * freed pages.
@@ -1453,7 +1501,6 @@ unsigned long shrink_all_memory(unsigned
 	unsigned long ret = 0;
 	int pass;
 	struct reclaim_state reclaim_state;
-	struct zone *zone;
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
 		.may_swap = 0,
@@ -1464,9 +1511,7 @@ unsigned long shrink_all_memory(unsigned
 
 	current->reclaim_state = &reclaim_state;
 
-	lru_pages = 0;
-	for_each_zone(zone)
-		lru_pages += zone->nr_active + zone->nr_inactive;
+	lru_pages = count_lru_pages();
 
 	nr_slab = global_page_state(NR_SLAB);
 	/* If slab caches are huge, it's better to hit them first */
@@ -1496,10 +1541,7 @@ unsigned long shrink_all_memory(unsigned
 
 		/* Needed for shrinking slab caches later on */
 		if (!lru_pages)
-			for_each_zone(zone) {
-				lru_pages += zone->nr_active;
-				lru_pages += zone->nr_inactive;
-			}
+			lru_pages = count_lru_pages();
 
 		/* Force reclaiming mapped pages in the passes #3 and #4 */
 		if (pass > 2) {
diff -upr kernel-2.6.18-417.el5.orig/mm/vmstat.c kernel-2.6.18-417.el5-028stab121/mm/vmstat.c
--- kernel-2.6.18-417.el5.orig/mm/vmstat.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/mm/vmstat.c	2017-01-13 08:40:40.000000000 -0500
@@ -13,18 +13,18 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
 
+#include <linux/virtinfo.h>
+
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free, struct pglist_data *pgdat)
 {
 	struct zone *zones = pgdat->node_zones;
 	int i;
 
-	*active = 0;
-	*inactive = 0;
+	*active = node_page_state(pgdat->node_id, NR_ACTIVE);
+	*inactive = node_page_state(pgdat->node_id, NR_INACTIVE);
 	*free = 0;
 	for (i = 0; i < MAX_NR_ZONES; i++) {
-		*active += zones[i].nr_active;
-		*inactive += zones[i].nr_inactive;
 		*free += zones[i].free_pages;
 	}
 }
@@ -34,14 +34,12 @@ void get_zone_counts(unsigned long *acti
 {
 	struct pglist_data *pgdat;
 
-	*active = 0;
-	*inactive = 0;
+	*active = global_page_state(NR_ACTIVE);
+	*inactive = global_page_state(NR_INACTIVE);
 	*free = 0;
 	for_each_online_pgdat(pgdat) {
 		unsigned long l, m, n;
 		__get_zone_counts(&l, &m, &n, pgdat);
-		*active += l;
-		*inactive += m;
 		*free += n;
 	}
 }
@@ -72,6 +70,20 @@ static void sum_vm_events(unsigned long 
 	}
 }
 
+unsigned long vm_events(enum vm_event_item i)
+{
+	int cpu;
+	unsigned long sum;
+	struct vm_event_state *st;
+
+	sum = 0;
+	for_each_online_cpu(cpu) {
+		st = &per_cpu(vm_event_states, cpu);
+		sum += st->event[i];
+	}
+
+	return (sum < 0 ? 0 : sum);
+}
 /*
  * Accumulate the vm event counters across all CPUs.
  * The result is unavoidably approximate - it can change
@@ -239,7 +251,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
  * in between and therefore the atomicity vs. interrupt cannot be exploited
  * in a useful way here.
  */
-static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
+void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 {
 	struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
 	s8 *p = pcp->vm_stat_diff + item;
@@ -260,9 +272,8 @@ void __inc_zone_page_state(struct page *
 }
 EXPORT_SYMBOL(__inc_zone_page_state);
 
-void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
+void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-	struct zone *zone = page_zone(page);
 	struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
 	s8 *p = pcp->vm_stat_diff + item;
 
@@ -275,6 +286,11 @@ void __dec_zone_page_state(struct page *
 		*p = overstep;
 	}
 }
+
+void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+	__dec_zone_state(page_zone(page), item);
+}
 EXPORT_SYMBOL(__dec_zone_page_state);
 
 void inc_zone_state(struct zone *zone, enum zone_stat_item item)
@@ -455,6 +471,8 @@ struct seq_operations fragmentation_op =
 
 static char *vmstat_text[] = {
 	/* Zoned VM counters */
+	"nr_inactive",
+	"nr_active",
 	"nr_anon_pages",
 	"nr_mapped",
 	"nr_file_pages",
@@ -546,8 +564,6 @@ static int zoneinfo_show(struct seq_file
 			   "\n        min      %lu"
 			   "\n        low      %lu"
 			   "\n        high     %lu"
-			   "\n        active   %lu"
-			   "\n        inactive %lu"
 			   "\n        scanned  %lu (a: %lu i: %lu)"
 			   "\n        spanned  %lu"
 			   "\n        present  %lu",
@@ -555,8 +571,6 @@ static int zoneinfo_show(struct seq_file
 			   zone->pages_min,
 			   zone->pages_low,
 			   zone->pages_high,
-			   zone->nr_active,
-			   zone->nr_inactive,
 			   zone->pages_scanned,
 			   zone->nr_scan_active, zone->nr_scan_inactive,
 			   zone->spanned_pages,
@@ -627,30 +641,40 @@ static void *vmstat_start(struct seq_fil
 	unsigned long *v;
 #ifdef CONFIG_VM_EVENT_COUNTERS
 	unsigned long *e;
+#define VMSTAT_BUFSIZE	(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) + \
+				sizeof(struct vm_event_state))
+#else
+#define VMSTAT_BUFSIZE	(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long))
 #endif
 	int i;
 
 	if (*pos >= ARRAY_SIZE(vmstat_text))
 		return NULL;
 
-#ifdef CONFIG_VM_EVENT_COUNTERS
-	v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
-			+ sizeof(struct vm_event_state), GFP_KERNEL);
-#else
-	v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
-			GFP_KERNEL);
-#endif
+	v = kmalloc(VMSTAT_BUFSIZE, GFP_KERNEL);
 	m->private = v;
 	if (!v)
 		return ERR_PTR(-ENOMEM);
-	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-		v[i] = global_page_state(i);
+
+	if (ve_is_super(get_exec_env())) {
+		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+			v[i] = global_page_state(i);
 #ifdef CONFIG_VM_EVENT_COUNTERS
-	e = v + NR_VM_ZONE_STAT_ITEMS;
-	all_vm_events(e);
-	e[PGPGIN] /= 2;		/* sectors -> kbytes */
-	e[PGPGOUT] /= 2;
+		e = v + NR_VM_ZONE_STAT_ITEMS;
+		all_vm_events(e);
+		e[PGPGIN] /= 2;		/* sectors -> kbytes */
+		e[PGPGOUT] /= 2;
 #endif
+	} else
+		memset(v, 0, VMSTAT_BUFSIZE);
+
+	if (virtinfo_notifier_call(VITYPE_GENERAL,
+				VIRTINFO_VMSTAT, v) & NOTIFY_FAIL) {
+		kfree(v);
+		m->private = NULL;
+		return ERR_PTR(-ENOMSG);
+	}
+
 	return v + *pos;
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/8021q/vlan.c kernel-2.6.18-417.el5-028stab121/net/8021q/vlan.c
--- kernel-2.6.18-417.el5.orig/net/8021q/vlan.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/8021q/vlan.c	2017-01-13 08:40:21.000000000 -0500
@@ -32,6 +32,8 @@
 #include <net/arp.h>
 #include <linux/rtnetlink.h>
 #include <linux/notifier.h>
+#include <linux/ve_proto.h>
+#include <linux/ve.h>
 
 #include <linux/if_vlan.h>
 #include "vlan.h"
@@ -68,6 +70,44 @@ static struct packet_type vlan_packet_ty
 	.func = vlan_skb_recv, /* VLAN receive method */
 };
 
+#ifdef CONFIG_VE
+static int vlan_start(void *data)
+{
+	int err;
+
+	err = vlan_proc_init();
+	if (err < 0)
+		goto out_proc;
+
+	__module_get(THIS_MODULE);
+	return 0;
+
+out_proc:
+	return err;
+}
+
+static void vlan_stop(void *data)
+{
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)data;
+	if (ve->_proc_vlan_dir == NULL)
+		return;
+
+	vlan_proc_cleanup();
+	ve->_proc_vlan_conf = NULL;
+	ve->_proc_vlan_dir = NULL;
+	module_put(THIS_MODULE);
+}
+
+static struct ve_hook vlan_ve_hook = {
+	.init		= vlan_start,
+	.fini		= vlan_stop,
+	.owner		= THIS_MODULE,
+	.priority	= HOOK_PRIO_NET_POST,
+};
+#endif
+
 /* End of global variables definitions. */
 
 /*
@@ -105,6 +145,7 @@ static int __init vlan_proto_init(void)
 	}
 
 	vlan_ioctl_set(vlan_ioctl_handler);
+	ve_hook_register(VE_SS_CHAIN, &vlan_ve_hook);
 
 	return 0;
 }
@@ -117,6 +158,8 @@ static void __exit vlan_cleanup_devices(
 {
 	struct net_device *dev, *nxt;
 
+	ve_hook_unregister(&vlan_ve_hook);
+
 	rtnl_lock();
 	for (dev = dev_base; dev; dev = nxt) {
 		nxt = dev->next;
@@ -161,14 +204,16 @@ module_init(vlan_proto_init);
 module_exit(vlan_cleanup_module);
 
 /* Must be invoked with RCU read lock (no preempt) */
-static struct vlan_group *__vlan_find_group(int real_dev_ifindex)
+static struct vlan_group *__vlan_find_group(int real_dev_ifindex,
+		struct ve_struct *ve)
 {
 	struct vlan_group *grp;
 	struct hlist_node *n;
 	int hash = vlan_grp_hashfn(real_dev_ifindex);
 
 	hlist_for_each_entry_rcu(grp, n, &vlan_group_hash[hash], hlist) {
-		if (grp->real_dev_ifindex == real_dev_ifindex)
+		if (grp->real_dev_ifindex == real_dev_ifindex &&
+				ve_accessible_strict(ve, grp->owner))
 			return grp;
 	}
 
@@ -182,7 +227,8 @@ static struct vlan_group *__vlan_find_gr
 struct net_device *__find_vlan_dev(struct net_device *real_dev,
 				   unsigned short VID)
 {
-	struct vlan_group *grp = __vlan_find_group(real_dev->ifindex);
+	struct vlan_group *grp = __vlan_find_group(real_dev->ifindex,
+			real_dev->owner_env);
 
 	if (grp)
                 return grp->vlan_devices[VID];
@@ -219,7 +265,7 @@ static int unregister_vlan_dev(struct ne
 		return -EINVAL;
 
 	ASSERT_RTNL();
-	grp = __vlan_find_group(real_dev_ifindex);
+	grp = __vlan_find_group(real_dev_ifindex, real_dev->owner_env);
 
 	ret = 0;
 
@@ -261,6 +307,9 @@ static int unregister_vlan_dev(struct ne
 
 				hlist_del_rcu(&grp->hlist);
 
+				put_ve(grp->owner);
+				grp->owner = NULL;
+
 				/* Free the group, after all cpu's are done. */
 				call_rcu(&grp->rcu, vlan_rcu_free);
 
@@ -431,6 +480,8 @@ static void vlan_setup(struct net_device
 	new_dev->destructor = free_netdev;
 	new_dev->do_ioctl = vlan_dev_ioctl;
 	new_dev->ethtool_ops = &vlan_ethtool_ops;
+	if (!ve_is_super(get_exec_env()))
+		new_dev->features |= NETIF_F_VIRTUAL;
 }
 
 static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev)
@@ -635,18 +686,19 @@ static struct net_device *register_vlan_
 	/* So, got the sucker initialized, now lets place
 	 * it into our local structure.
 	 */
-	grp = __vlan_find_group(real_dev->ifindex);
+	grp = __vlan_find_group(real_dev->ifindex, real_dev->owner_env);
 
 	/* Note, we are running under the RTNL semaphore
 	 * so it cannot "appear" on us.
 	 */
 	if (!grp) { /* need to add a new group */
-		grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
+		grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL_UBC);
 		if (!grp)
 			goto out_free_unregister;
 					
 		/* printk(KERN_ALERT "VLAN REGISTER:  Allocated new group.\n"); */
 		grp->real_dev_ifindex = real_dev->ifindex;
+		grp->owner = get_ve(real_dev->owner_env);
 
 		hlist_add_head_rcu(&grp->hlist, 
 				   &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
@@ -712,9 +764,10 @@ static void vlan_transfer_features(struc
 static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
-	struct vlan_group *grp = __vlan_find_group(dev->ifindex);
+	struct vlan_group *grp;
 	int i, flgs;
 	struct net_device *vlandev;
+	struct ve_struct *env;
 
 	if ((event == NETDEV_UP) &&
 	    (dev->features & NETIF_F_HW_VLAN_FILTER) &&
@@ -724,6 +777,7 @@ static int vlan_device_event(struct noti
 		dev->vlan_rx_add_vid(dev, 0);
 	}
 
+	grp = __vlan_find_group(dev->ifindex, dev->owner_env);
 	if (!grp)
 		goto out;
 
@@ -809,7 +863,9 @@ static int vlan_device_event(struct noti
 			ret = unregister_vlan_dev(dev,
 						  VLAN_DEV_INFO(vlandev)->vlan_id);
 
+			env = set_exec_env(vlandev->owner_env);
 			unregister_netdevice(vlandev);
+			set_exec_env(env);
 
 			/* Group was destroyed? */
 			if (ret == 1)
@@ -822,6 +878,15 @@ out:
 	return NOTIFY_DONE;
 }
 
+static inline int vlan_check_caps(void)
+{
+	return capable(CAP_NET_ADMIN)
+#ifdef CONFIG_VE
+		|| capable(CAP_VE_NET_ADMIN)
+#endif
+		;
+}
+
 /*
  *	VLAN IOCTL handler.
  *	o execute requested action or pass command to the device driver
@@ -846,7 +911,7 @@ static int vlan_ioctl_handler(void __use
 
 	switch (args.cmd) {
 	case SET_VLAN_INGRESS_PRIORITY_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		err = vlan_dev_set_ingress_priority(args.device1,
 						    args.u.skb_priority,
@@ -854,7 +919,7 @@ static int vlan_ioctl_handler(void __use
 		break;
 
 	case SET_VLAN_EGRESS_PRIORITY_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		err = vlan_dev_set_egress_priority(args.device1,
 						   args.u.skb_priority,
@@ -862,7 +927,7 @@ static int vlan_ioctl_handler(void __use
 		break;
 
 	case SET_VLAN_FLAG_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		err = vlan_dev_set_vlan_flag(args.device1,
 					     args.u.flag,
@@ -870,7 +935,7 @@ static int vlan_ioctl_handler(void __use
 		break;
 
 	case SET_VLAN_NAME_TYPE_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		if ((args.u.name_type >= 0) &&
 		    (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
@@ -882,7 +947,7 @@ static int vlan_ioctl_handler(void __use
 		break;
 
 	case ADD_VLAN_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		/* we have been given the name of the Ethernet Device we want to
 		 * talk to:  args.dev1	 We also have the
@@ -896,7 +961,7 @@ static int vlan_ioctl_handler(void __use
 		break;
 
 	case DEL_VLAN_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		/* Here, the args.dev1 is the actual VLAN we want
 		 * to get rid of.
diff -upr kernel-2.6.18-417.el5.orig/net/8021q/vlan_dev.c kernel-2.6.18-417.el5-028stab121/net/8021q/vlan_dev.c
--- kernel-2.6.18-417.el5.orig/net/8021q/vlan_dev.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/8021q/vlan_dev.c	2017-01-13 08:40:21.000000000 -0500
@@ -478,6 +478,7 @@ int vlan_dev_hard_header(struct sk_buff 
 
 int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct ve_struct *env;
 	struct net_device_stats *stats = vlan_dev_get_stats(dev);
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
 
@@ -531,13 +532,17 @@ int vlan_dev_hard_start_xmit(struct sk_b
 	stats->tx_bytes += skb->len;
 
 	skb->dev = VLAN_DEV_INFO(dev)->real_dev;
+	skb->owner_env = skb->dev->owner_env;
+	env = set_exec_env(skb->owner_env);
 	dev_queue_xmit(skb);
+	set_exec_env(env);
 
 	return 0;
 }
 
 int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct ve_struct *env;
 	struct net_device_stats *stats = vlan_dev_get_stats(dev);
 	unsigned short veth_TCI;
 
@@ -555,7 +560,10 @@ int vlan_dev_hwaccel_hard_start_xmit(str
 	stats->tx_bytes += skb->len;
 
 	skb->dev = VLAN_DEV_INFO(dev)->real_dev;
+	skb->owner_env = skb->dev->owner_env;
+	env = set_exec_env(skb->owner_env);
 	dev_queue_xmit(skb);
+	set_exec_env(env);
 
 	return 0;
 }
diff -upr kernel-2.6.18-417.el5.orig/net/8021q/vlanproc.c kernel-2.6.18-417.el5-028stab121/net/8021q/vlanproc.c
--- kernel-2.6.18-417.el5.orig/net/8021q/vlanproc.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/8021q/vlanproc.c	2017-01-13 08:40:21.000000000 -0500
@@ -114,13 +114,21 @@ static struct file_operations vlandev_fo
  *	/proc/net/vlan 
  */
 
+#ifdef CONFIG_VE
+#define proc_vlan_dir	(get_exec_env()->_proc_vlan_dir)
+#else
 static struct proc_dir_entry *proc_vlan_dir;
+#endif
 
 /*
  *	/proc/net/vlan/config 
  */
 
+#ifdef CONFIG_VE
+#define proc_vlan_conf	(get_exec_env()->_proc_vlan_conf)
+#else
 static struct proc_dir_entry *proc_vlan_conf;
+#endif
 
 /* Strings */
 static const char *vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = {
@@ -154,7 +162,7 @@ void vlan_proc_cleanup(void)
  *	Create /proc/net/vlan entries
  */
 
-int __init vlan_proc_init(void)
+int vlan_proc_init(void)
 {
 	proc_vlan_dir = proc_mkdir(name_root, proc_net);
 	if (proc_vlan_dir) {
diff -upr kernel-2.6.18-417.el5.orig/net/bridge/br.c kernel-2.6.18-417.el5-028stab121/net/bridge/br.c
--- kernel-2.6.18-417.el5.orig/net/bridge/br.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/bridge/br.c	2017-01-13 08:40:23.000000000 -0500
@@ -34,6 +34,41 @@ static struct net_device *__br_get_br_de
 	return port ? port->br->dev : NULL;
 }
 
+static int ve_bridge_init(void *x)
+{
+	struct ve_struct *ve = x;
+
+	if (!(ve->features & VE_FEATURE_BRIDGE))
+		return 0;
+
+	ve->has_bridge_support = 1;
+	if (!ve_is_super(ve))
+		__module_get(THIS_MODULE);
+
+	return 0;
+}
+
+static void ve_bridge_fini(void *x)
+{
+	struct ve_struct *ve = x;
+
+	if (!(ve->has_bridge_support))
+		return;
+
+	br_cleanup_bridges(ve);
+	ve->has_bridge_support = 0;
+
+	if (!ve_is_super(ve))
+		module_put(THIS_MODULE);
+}
+
+static struct ve_hook bridge_hook = {
+	.owner	= THIS_MODULE,
+	.priority = HOOK_PRIO_NET,
+	.init	= ve_bridge_init,
+	.fini	= ve_bridge_fini,
+};
+
 static int __init br_init(void)
 {
 	int err;
@@ -62,6 +97,10 @@ static int __init br_init(void)
 	br_fdb_get_hook = br_fdb_get;
 	br_fdb_put_hook = br_fdb_put;
 
+	get_ve0()->features |= VE_FEATURE_BRIDGE;
+	ve_bridge_init(get_ve0());
+	ve_hook_register(VE_SS_CHAIN, &bridge_hook);
+
 	return 0;
 
 err_out2:
@@ -73,13 +112,15 @@ err_out1:
 
 static void __exit br_deinit(void)
 {
+	ve_hook_unregister(&bridge_hook);
+
 	rcu_assign_pointer(br_stp_sap->rcv_func, NULL);
 
 	br_netlink_fini();
 	unregister_netdevice_notifier(&br_device_notifier);
 	brioctl_set(NULL);
 
-	br_cleanup_bridges();
+	ve_bridge_fini(get_ve0());
 
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
diff -upr kernel-2.6.18-417.el5.orig/net/bridge/br_device.c kernel-2.6.18-417.el5-028stab121/net/bridge/br_device.c
--- kernel-2.6.18-417.el5.orig/net/bridge/br_device.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/bridge/br_device.c	2017-01-13 08:40:23.000000000 -0500
@@ -45,6 +45,8 @@ int br_dev_xmit(struct sk_buff *skb, str
 	skb->mac.raw = skb->data;
 	skb_pull(skb, ETH_HLEN);
 
+	skb->brmark = BR_ALREADY_SEEN;
+
 	if (is_multicast_ether_addr(dest)) {
 		if (br_multicast_rcv(br, NULL, skb)) {
 			kfree_skb(skb);
@@ -58,7 +60,7 @@ int br_dev_xmit(struct sk_buff *skb, str
 		else
 			br_flood_deliver(br, skb);
 	} else if ((dst = __br_fdb_get(br, dest)) != NULL)
-		br_deliver(dst->dst, skb);
+		br_deliver(dst->dst, skb, 1);
 	else
 		br_flood_deliver(br, skb);
 
@@ -252,6 +254,7 @@ void br_dev_setup(struct net_device *dev
 	dev->set_mac_address = br_set_mac_address;
 	dev->priv_flags = IFF_EBRIDGE;
 
- 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
- 			NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | NETIF_F_GSO;
+	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
+			NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
+			NETIF_F_GSO | NETIF_F_VIRTUAL;
 }
diff -upr kernel-2.6.18-417.el5.orig/net/bridge/br_forward.c kernel-2.6.18-417.el5-028stab121/net/bridge/br_forward.c
--- kernel-2.6.18-417.el5.orig/net/bridge/br_forward.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/bridge/br_forward.c	2017-01-13 08:40:23.000000000 -0500
@@ -42,7 +42,8 @@ static inline unsigned packet_length(con
 int br_dev_queue_push_xmit(struct sk_buff *skb)
 {
 	/* drop mtu oversized packets except gso */
-	if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
+	if (!(skb->dev->features & NETIF_F_VENET) &&
+	    packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
 		kfree_skb(skb);
 	else {
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -128,14 +129,24 @@ static void __br_forward(const struct ne
 }
 
 /* called with rcu_read_lock */
-void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
+void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb, int free)
 {
 	if (should_deliver(to, skb)) {
+		if (!free) {
+			struct sk_buff *skb2;
+
+			if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+				to->br->statistics.tx_dropped++;
+				return;
+			}
+			skb = skb2;
+		}
 		__br_deliver(to, skb);
 		return;
 	}
 
-	kfree_skb(skb);
+	if (free)
+		kfree_skb(skb);
 }
 
 /* called with rcu_read_lock */
@@ -196,8 +207,8 @@ out:
 
 /* called under bridge lock */
 static void br_flood(struct net_bridge *br, struct sk_buff *skb,
-		     struct sk_buff *skb0,
-		     void (*__packet_hook)(const struct net_bridge_port *p, 
+	     struct sk_buff *skb0,
+	     int free, void (*__packet_hook)(const struct net_bridge_port *p,
 					   struct sk_buff *skb))
 {
 	struct net_bridge_port *p;
@@ -221,7 +232,7 @@ static void br_flood(struct net_bridge *
 	return;
 
 out:
-	if (!skb0)
+	if (!skb0 && free)
 		kfree_skb(skb);
 }
 
@@ -229,14 +240,36 @@ out:
 /* called with rcu_read_lock */
 void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb)
 {
-	br_flood(br, skb, NULL, __br_deliver);
+	br_flood(br, skb, NULL, 1, __br_deliver);
+}
+
+/* called with rcu_read_lock */
+void br_xmit_deliver(struct net_bridge *br, struct net_bridge_port *port,
+			struct sk_buff *skb)
+{
+	struct net_bridge_port *p;
+
+	list_for_each_entry_rcu(p, &br->port_list, list) {
+		if (p == port)
+			continue;
+		if (should_deliver(p, skb)) {
+			struct sk_buff *skb2;
+
+			if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+				br->statistics.tx_dropped++;
+				return;
+			}
+			__br_deliver(p, skb2);
+		}
+	}
 }
 
 /* called under bridge lock */
 void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
 		      struct sk_buff *skb2)
 {
-	br_flood(br, skb, skb2, __br_forward);
+	skb->brmark = BR_ALREADY_SEEN;
+	br_flood(br, skb, skb2, 1, __br_forward);
 }
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
diff -upr kernel-2.6.18-417.el5.orig/net/bridge/br_if.c kernel-2.6.18-417.el5-028stab121/net/bridge/br_if.c
--- kernel-2.6.18-417.el5.orig/net/bridge/br_if.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/bridge/br_if.c	2017-01-13 08:40:30.000000000 -0500
@@ -179,6 +179,11 @@ static void del_br(struct net_bridge *br
 {
 	struct net_bridge_port *p, *n;
 
+	if (br->master_dev) {
+		dev_put(br->master_dev);
+		br->master_dev = NULL;
+	}
+
 	list_for_each_entry_safe(p, n, &br->port_list, list) {
 		del_nbp(p);
 	}
@@ -439,6 +444,14 @@ int br_add_if(struct net_bridge *br, str
 	if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
 	    (br->dev->flags & IFF_UP))
 		br_stp_enable_port(p);
+	if (!(dev->features & NETIF_F_VIRTUAL) && !br->master_dev) {
+		dev_hold(dev);
+		br->master_dev = dev;
+	}
+
+	if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
+	    (br->dev->flags & IFF_UP))
+		br_stp_enable_port(p);
 	spin_unlock_bh(&br->lock);
 
 	dev_set_mtu(br->dev, br_min_mtu(br));
@@ -483,17 +496,27 @@ int br_del_if(struct net_bridge *br, str
 	spin_lock_bh(&br->lock);
 	br_stp_recalculate_bridge_id(br);
 	br_features_recompute(br);
+	if (br->master_dev == dev) {
+		br->master_dev = NULL;
+		dev_put(dev);
+		list_for_each_entry(p, &br->port_list, list)
+			if (!(p->dev->features & NETIF_F_VIRTUAL)) {
+				dev_hold(p->dev);
+				br->master_dev = p->dev;
+				break;
+			}
+	}
 	spin_unlock_bh(&br->lock);
 
 	return 0;
 }
 
-void __exit br_cleanup_bridges(void)
+void br_cleanup_bridges(struct ve_struct *ve)
 {
 	struct net_device *dev, *nxt;
 
 	rtnl_lock();
-	for (dev = dev_base; dev; dev = nxt) {
+	for (dev = ve->_net_dev_base; dev; dev = nxt) {
 		nxt = dev->next;
 		if (dev->priv_flags & IFF_EBRIDGE)
 			del_br(dev->priv);
diff -upr kernel-2.6.18-417.el5.orig/net/bridge/br_input.c kernel-2.6.18-417.el5-028stab121/net/bridge/br_input.c
--- kernel-2.6.18-417.el5.orig/net/bridge/br_input.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/bridge/br_input.c	2017-01-13 08:40:23.000000000 -0500
@@ -24,14 +24,22 @@ const u8 br_group_address[ETH_ALEN] = { 
 
 static int br_pass_frame_up(struct sk_buff *skb)
 {
-	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
+	struct net_device *indev, *outdev,
+			*brdev = BR_INPUT_SKB_CB(skb)->brdev;
 	struct net_bridge *br = netdev_priv(brdev);
 
 	br->statistics.rx_packets++;
 	br->statistics.rx_bytes += skb->len;
 
 	indev = skb->dev;
-	skb->dev = br->dev;
+	if (!br->via_phys_dev)
+		skb->dev = br->dev;
+	else {
+		skb->brmark = BR_ALREADY_SEEN;
+		outdev = br->master_dev;
+		if (outdev)
+			skb->dev = outdev;
+	}
 
 	return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
 		       netif_receive_skb);
@@ -66,7 +74,7 @@ int br_handle_frame_finish(struct sk_buf
 	/* The packet skb2 goes to the local host (NULL to skip). */
 	skb2 = NULL;
 
-	if (br->dev->flags & IFF_PROMISC)
+	if ((br->dev->flags & IFF_PROMISC) && !br->via_phys_dev)
 		skb2 = skb;
 
 	dst = NULL;
@@ -149,14 +157,20 @@ int br_handle_frame(struct net_bridge_po
 	}
 
 	if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
+		struct net_device *out;
+
 		if (br_should_route_hook) {
 			if (br_should_route_hook(pskb)) 
 				return 0;
 			skb = *pskb;
 			dest = eth_hdr(skb)->h_dest;
 		}
+		if ((*pskb)->brmark == BR_ALREADY_SEEN)
+			return 0;
+
+		out = p->br->via_phys_dev ? p->br->master_dev : p->br->dev;
 
-		if (!compare_ether_addr(p->br->dev->dev_addr, dest))
+		if (out && !compare_ether_addr(out->dev_addr, dest))
 			skb->pkt_type = PACKET_HOST;
 
 		NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
diff -upr kernel-2.6.18-417.el5.orig/net/bridge/br_ioctl.c kernel-2.6.18-417.el5-028stab121/net/bridge/br_ioctl.c
--- kernel-2.6.18-417.el5.orig/net/bridge/br_ioctl.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/bridge/br_ioctl.c	2017-01-13 08:40:23.000000000 -0500
@@ -22,12 +22,12 @@
 #include "br_private.h"
 
 /* called with RTNL */
-static int get_bridge_ifindices(int *indices, int num)
+static int get_bridge_ifindices(struct ve_struct *ve, int *indices, int num)
 {
 	struct net_device *dev;
 	int i = 0;
 
-	for (dev = dev_base; dev && i < num; dev = dev->next) {
+	for (dev = ve->_net_dev_base; dev && i < num; dev = dev->next) {
 		if (dev->priv_flags & IFF_EBRIDGE) 
 			indices[i++] = dev->ifindex;
 	}
@@ -138,6 +138,7 @@ static int old_dev_ioctl(struct net_devi
 		b.topology_change_detected = br->topology_change_detected;
 		b.root_port = br->root_port;
 		b.stp_enabled = br->stp_enabled;
+		b.via_phys_dev = br->via_phys_dev;
 		b.ageing_time = jiffies_to_clock_t(br->ageing_time);
 		b.hello_timer_value = br_timer_value(&br->hello_timer);
 		b.tcn_timer_value = br_timer_value(&br->tcn_timer);
@@ -254,6 +255,13 @@ static int old_dev_ioctl(struct net_devi
 		br->stp_enabled = args[1]?1:0;
 		return 0;
 
+	case BRCTL_SET_VIA_ORIG_DEV:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		br->via_phys_dev = args[1] ? 1 : 0;
+		return 0;
+
 	case BRCTL_SET_BRIDGE_PRIORITY:
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
@@ -308,7 +316,7 @@ static int old_dev_ioctl(struct net_devi
 	return -EOPNOTSUPP;
 }
 
-static int old_deviceless(void __user *uarg)
+static int old_deviceless(struct ve_struct *ve, void __user *uarg)
 {
 	unsigned long args[3];
 
@@ -330,7 +338,7 @@ static int old_deviceless(void __user *u
 		if (indices == NULL)
 			return -ENOMEM;
 
-		args[2] = get_bridge_ifindices(indices, args[2]);
+		args[2] = get_bridge_ifindices(ve, indices, args[2]);
 
 		ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int))
 			? -EFAULT : args[2];
@@ -364,10 +372,15 @@ static int old_deviceless(void __user *u
 
 int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
 {
+	struct ve_struct *ve = get_exec_env();
+
+	if (!ve->has_bridge_support)
+		return -ENOTTY;
+
 	switch (cmd) {
 	case SIOCGIFBR:
 	case SIOCSIFBR:
-		return old_deviceless(uarg);
+		return old_deviceless(ve, uarg);
 		
 	case SIOCBRADDBR:
 	case SIOCBRDELBR:
diff -upr kernel-2.6.18-417.el5.orig/net/bridge/br_netfilter.c kernel-2.6.18-417.el5-028stab121/net/bridge/br_netfilter.c
--- kernel-2.6.18-417.el5.orig/net/bridge/br_netfilter.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/bridge/br_netfilter.c	2017-01-13 08:40:41.000000000 -0500
@@ -34,6 +34,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_arp.h>
 #include <linux/in_route.h>
+#include <linux/inetdevice.h>
 
 #include <net/ip.h>
 #include <net/ipv6.h>
@@ -125,6 +126,70 @@ static inline struct nf_bridge_info *nf_
 	return skb->nf_bridge;
 }
 
+/* When handing a packet over to the IP layer
+ * check whether we have a skb that is in the
+ * expected format
+ */
+
+int br_parse_ip_options(struct sk_buff *skb)
+{
+	struct ip_options *opt;
+	struct iphdr *iph;
+	struct net_device *dev = skb->dev;
+	u32 len;
+
+	iph = ip_hdr(skb);
+	opt = &(IPCB(skb)->opt);
+
+	/* Basic sanity checks */
+	if (iph->ihl < 5 || iph->version != 4)
+		goto inhdr_error;
+
+	if (!pskb_may_pull(skb, iph->ihl*4))
+		goto inhdr_error;
+
+	iph = ip_hdr(skb);
+	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
+		goto inhdr_error;
+
+	len = ntohs(iph->tot_len);
+	if (skb->len < len) {
+		IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+		goto drop;
+	} else if (len < (iph->ihl*4))
+		goto inhdr_error;
+
+	if (pskb_trim_rcsum(skb, len)) {
+		IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+		goto drop;
+	}
+
+	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+	if (iph->ihl == 5)
+		return 0;
+
+	opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
+	if (ip_options_compile(opt, skb))
+		goto inhdr_error;
+
+	/* Check correct handling of SRR option */
+	if (unlikely(opt->srr)) {
+		struct in_device *in_dev = __in_dev_get_rcu(dev);
+		if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev))
+			goto drop;
+
+		if (ip_options_rcv_srr(skb))
+			goto drop;
+	}
+
+	return 0;
+
+inhdr_error:
+	IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+drop:
+	return -1;
+}
+
 static inline void nf_bridge_save_header(struct sk_buff *skb)
 {
         int header_size = 16;
@@ -240,9 +305,11 @@ static int br_nf_pre_routing_finish_brid
 static int br_nf_pre_routing_finish(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
+	struct net_bridge *br = netdev_priv(dev);
 	struct iphdr *iph = skb->nh.iph;
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 
+	dev = br->via_phys_dev ? br->master_dev : dev;
 	if (nf_bridge->mask & BRNF_PKT_TYPE) {
 		skb->pkt_type = PACKET_OTHERHOST;
 		nf_bridge->mask ^= BRNF_PKT_TYPE;
@@ -442,8 +509,6 @@ static unsigned int br_nf_pre_routing(un
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	struct iphdr *iph;
-	__u32 len;
 	struct sk_buff *skb = *pskb;
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
@@ -478,28 +543,9 @@ static unsigned int br_nf_pre_routing(un
 		skb->nh.raw += VLAN_HLEN;
 	}
 
-	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-		goto inhdr_error;
-
-	iph = skb->nh.iph;
-	if (iph->ihl < 5 || iph->version != 4)
-		goto inhdr_error;
-
-	if (!pskb_may_pull(skb, 4 * iph->ihl))
-		goto inhdr_error;
-
-	iph = skb->nh.iph;
-	if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
-		goto inhdr_error;
-
-	len = ntohs(iph->tot_len);
-	if (skb->len < len || len < 4 * iph->ihl)
-		goto inhdr_error;
-
-	pskb_trim_rcsum(skb, len);
-
-	/* BUG: Should really parse the IP options here. */
-	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+	if (br_parse_ip_options(skb))
+		/* Drop invalid packet */
+		goto out;
 
 	nf_bridge_put(skb->nf_bridge);
 	if (!nf_bridge_alloc(skb))
@@ -513,8 +559,6 @@ static unsigned int br_nf_pre_routing(un
 
 	return NF_STOLEN;
 
-inhdr_error:
-//      IP_INC_STATS_BH(IpInHdrErrors);
 out:
 	return NF_DROP;
 }
@@ -604,8 +648,8 @@ static unsigned int br_nf_forward_ip(uns
 		nf_bridge->mask |= BRNF_PKT_TYPE;
 	}
 
-	/* BUG: Should really parse the IP options here. */
-	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+	if (pf == PF_INET && br_parse_ip_options(skb))
+		return NF_DROP;
 
 	/* The physdev module checks on this */
 	nf_bridge->mask |= BRNF_BRIDGED;
@@ -773,14 +817,20 @@ out:
 
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
+	int ret;
+
 	if (skb->protocol == htons(ETH_P_IP) &&
+	    !(skb->dev->features & NETIF_F_VENET) &&
 	    skb->len > skb->dev->mtu &&
 	    !skb_is_gso(skb)) {
-		/* BUG: Should really parse the IP options here. */
-		memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
-		return ip_fragment(skb, br_dev_queue_push_xmit);
+		if (br_parse_ip_options(skb))
+			/* Drop invalid packet */
+			return NF_DROP;
+		ret = ip_fragment(skb, br_dev_queue_push_xmit);
 	} else
-		return br_dev_queue_push_xmit(skb);
+		ret = br_dev_queue_push_xmit(skb);
+
+	return ret;
 }
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
diff -upr kernel-2.6.18-417.el5.orig/net/bridge/br_private.h kernel-2.6.18-417.el5-028stab121/net/bridge/br_private.h
--- kernel-2.6.18-417.el5.orig/net/bridge/br_private.h	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/bridge/br_private.h	2017-01-13 08:40:23.000000000 -0500
@@ -19,6 +19,10 @@
 #include <linux/miscdevice.h>
 #include <linux/if_bridge.h>
 
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/vzcalluser.h>
+
 #define BR_HASH_BITS 8
 #define BR_HASH_SIZE (1 << BR_HASH_BITS)
 
@@ -129,6 +133,8 @@ struct net_bridge
 	spinlock_t			lock;
 	struct list_head		port_list;
 	struct net_device		*dev;
+	struct net_device		*master_dev;
+	unsigned char			via_phys_dev;
 	struct net_device_stats		statistics;
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
@@ -214,6 +220,7 @@ static inline int br_is_root_bridge(cons
 /* br_device.c */
 extern void br_dev_setup(struct net_device *dev);
 extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
+extern int br_xmit(struct sk_buff *skb, struct net_bridge_port *port);
 extern bool br_devices_support_netpoll(struct net_bridge *br);
 extern void br_poll_controller(struct net_device *br_dev);
 extern void br_netpoll_cleanup(struct net_device *br_dev);
@@ -242,12 +249,15 @@ extern void br_fdb_update(struct net_bri
 
 /* br_forward.c */
 extern void br_deliver(const struct net_bridge_port *to,
-		struct sk_buff *skb);
+		struct sk_buff *skb, int free);
 extern int br_dev_queue_push_xmit(struct sk_buff *skb);
 extern void br_forward(const struct net_bridge_port *to,
 		struct sk_buff *skb, struct sk_buff *skb0);
 extern int br_forward_finish(struct sk_buff *skb);
 extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb);
+extern void br_xmit_deliver(struct net_bridge *br,
+			    struct net_bridge_port *port,
+			    struct sk_buff *skb);
 extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
 			     struct sk_buff *skb2);
 
@@ -255,7 +265,7 @@ extern void br_flood_forward(struct net_
 extern void br_port_carrier_check(struct net_bridge_port *p);
 extern int br_add_bridge(const char *name);
 extern int br_del_bridge(const char *name);
-extern void br_cleanup_bridges(void);
+extern void br_cleanup_bridges(struct ve_struct *ve);
 extern int br_add_if(struct net_bridge *br,
 	      struct net_device *dev);
 extern int br_del_if(struct net_bridge *br,
diff -upr kernel-2.6.18-417.el5.orig/net/bridge/br_stp_if.c kernel-2.6.18-417.el5-028stab121/net/bridge/br_stp_if.c
--- kernel-2.6.18-417.el5.orig/net/bridge/br_stp_if.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/bridge/br_stp_if.c	2017-01-13 08:40:15.000000000 -0500
@@ -126,7 +126,9 @@ void br_stp_disable_port(struct net_brid
 /* called under bridge lock */
 void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
 {
-	unsigned char oldaddr[6];
+	/* should be aligned on 2 bytes for compare_ether_addr() */
+	unsigned short oldaddr_aligned[ETH_ALEN >> 1];
+	unsigned char *oldaddr = (unsigned char *)oldaddr_aligned;
 	struct net_bridge_port *p;
 	int wasroot;
 
@@ -151,11 +153,14 @@ void br_stp_change_bridge_id(struct net_
 		br_become_root_bridge(br);
 }
 
-static const unsigned char br_mac_zero[6];
+/* should be aligned on 2 bytes for compare_ether_addr() */
+static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1];
 
 /* called under bridge lock */
 void br_stp_recalculate_bridge_id(struct net_bridge *br)
 {
+	const unsigned char *br_mac_zero =
+			(const unsigned char *)br_mac_zero_aligned;
 	const unsigned char *addr = br_mac_zero;
 	struct net_bridge_port *p;
 
diff -upr kernel-2.6.18-417.el5.orig/net/bridge/br_sysfs_br.c kernel-2.6.18-417.el5-028stab121/net/bridge/br_sysfs_br.c
--- kernel-2.6.18-417.el5.orig/net/bridge/br_sysfs_br.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/bridge/br_sysfs_br.c	2017-01-13 08:40:23.000000000 -0500
@@ -157,6 +157,26 @@ static ssize_t store_stp_state(struct cl
 static CLASS_DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state,
 			 store_stp_state);
 
+static ssize_t show_via_phys_dev_state(struct class_device *cd, char *buf)
+{
+	struct net_bridge *br = to_bridge(cd);
+	return sprintf(buf, "%d\n", br->via_phys_dev);
+}
+
+static void set_via_phys_dev_state(struct net_bridge *br, unsigned long val)
+{
+	br->via_phys_dev = val;
+}
+
+static ssize_t store_via_phys_dev_state(struct class_device *cd,
+			       const char *buf, size_t len)
+{
+	return store_bridge_parm(cd, buf, len, set_via_phys_dev_state);
+}
+
+static CLASS_DEVICE_ATTR(via_phys_dev, S_IRUGO | S_IWUSR, show_via_phys_dev_state,
+			 store_via_phys_dev_state);
+
 static ssize_t show_priority(struct class_device *cd, char *buf)
 {
 	struct net_bridge *br = to_bridge(cd);
@@ -551,6 +571,7 @@ static struct attribute *bridge_attrs[] 
 	&class_device_attr_max_age.attr,
 	&class_device_attr_ageing_time.attr,
 	&class_device_attr_stp_state.attr,
+	&class_device_attr_via_phys_dev.attr,
 	&class_device_attr_priority.attr,
 	&class_device_attr_bridge_id.attr,
 	&class_device_attr_root_id.attr,
diff -upr kernel-2.6.18-417.el5.orig/net/bridge/netfilter/ebt_among.c kernel-2.6.18-417.el5-028stab121/net/bridge/netfilter/ebt_among.c
--- kernel-2.6.18-417.el5.orig/net/bridge/netfilter/ebt_among.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/bridge/netfilter/ebt_among.c	2017-01-13 08:40:16.000000000 -0500
@@ -176,7 +176,7 @@ static int ebt_among_check(const char *t
 			   unsigned int datalen)
 {
 	struct ebt_among_info *info = (struct ebt_among_info *) data;
-	int expected_length = sizeof(struct ebt_among_info);
+	int expected_length = EBT_ALIGN(sizeof(struct ebt_among_info));
 	const struct ebt_mac_wormhash *wh_dst, *wh_src;
 	int err;
 
@@ -185,7 +185,7 @@ static int ebt_among_check(const char *t
 	expected_length += ebt_mac_wormhash_size(wh_dst);
 	expected_length += ebt_mac_wormhash_size(wh_src);
 
-	if (datalen != EBT_ALIGN(expected_length)) {
+	if (datalen != expected_length) {
 		printk(KERN_WARNING
 		       "ebtables: among: wrong size: %d"
 		       "against expected %d, rounded to %Zd\n",
diff -upr kernel-2.6.18-417.el5.orig/net/compat.c kernel-2.6.18-417.el5-028stab121/net/compat.c
--- kernel-2.6.18-417.el5.orig/net/compat.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/compat.c	2017-01-13 08:40:40.000000000 -0500
@@ -314,107 +314,6 @@ void scm_detach_fds_compat(struct msghdr
 }
 
 /*
- * For now, we assume that the compatibility and native version
- * of struct ipt_entry are the same - sfr.  FIXME
- */
-struct compat_ipt_replace {
-	char			name[IPT_TABLE_MAXNAMELEN];
-	u32			valid_hooks;
-	u32			num_entries;
-	u32			size;
-	u32			hook_entry[NF_IP_NUMHOOKS];
-	u32			underflow[NF_IP_NUMHOOKS];
-	u32			num_counters;
-	compat_uptr_t		counters;	/* struct ipt_counters * */
-	struct ipt_entry	entries[0];
-};
-
-static int do_netfilter_replace(int fd, int level, int optname,
-				char __user *optval, int optlen)
-{
-	struct compat_ipt_replace __user *urepl;
-	struct ipt_replace __user *repl_nat;
-	char name[IPT_TABLE_MAXNAMELEN];
-	u32 origsize, tmp32, num_counters;
-	unsigned int repl_nat_size;
-	int ret;
-	int i;
-	compat_uptr_t ucntrs;
-
-	urepl = (struct compat_ipt_replace __user *)optval;
-	if (get_user(origsize, &urepl->size))
-		return -EFAULT;
-
-	/* Hack: Causes ipchains to give correct error msg --RR */
-	if (optlen != sizeof(*urepl) + origsize)
-		return -ENOPROTOOPT;
-
-	/* XXX Assumes that size of ipt_entry is the same both in
-	 *     native and compat environments.
-	 */
-	repl_nat_size = sizeof(*repl_nat) + origsize;
-	repl_nat = compat_alloc_user_space(repl_nat_size);
-
-	ret = -EFAULT;
-	if (put_user(origsize, &repl_nat->size))
-		goto out;
-
-	if (!access_ok(VERIFY_READ, urepl, optlen) ||
-	    !access_ok(VERIFY_WRITE, repl_nat, optlen))
-		goto out;
-
-	if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
-	    __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
-		goto out;
-
-	if (__get_user(tmp32, &urepl->valid_hooks) ||
-	    __put_user(tmp32, &repl_nat->valid_hooks))
-		goto out;
-
-	if (__get_user(tmp32, &urepl->num_entries) ||
-	    __put_user(tmp32, &repl_nat->num_entries))
-		goto out;
-
-	if (__get_user(num_counters, &urepl->num_counters) ||
-	    __put_user(num_counters, &repl_nat->num_counters))
-		goto out;
-
-	if (__get_user(ucntrs, &urepl->counters) ||
-	    __put_user(compat_ptr(ucntrs), &repl_nat->counters))
-		goto out;
-
-	if (__copy_in_user(&repl_nat->entries[0],
-			   &urepl->entries[0],
-			   origsize))
-		goto out;
-
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
-		if (__get_user(tmp32, &urepl->hook_entry[i]) ||
-		    __put_user(tmp32, &repl_nat->hook_entry[i]) ||
-		    __get_user(tmp32, &urepl->underflow[i]) ||
-		    __put_user(tmp32, &repl_nat->underflow[i]))
-			goto out;
-	}
-
-	/*
-	 * Since struct ipt_counters just contains two u_int64_t members
-	 * we can just do the access_ok check here and pass the (converted)
-	 * pointer into the standard syscall.  We hope that the pointer is
-	 * not misaligned ...
-	 */
-	if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
-		       num_counters * sizeof(struct ipt_counters)))
-		goto out;
-
-
-	ret = sys_setsockopt(fd, level, optname,
-			     (char __user *)repl_nat, repl_nat_size);
-
-out:
-	return ret;
-}
-
-/*
  * A struct sock_filter is architecture independent.
  */
 struct compat_sock_fprog {
@@ -482,10 +381,6 @@ asmlinkage long compat_sys_setsockopt(in
 	int err;
 	struct socket *sock;
 
-	if (level == SOL_IPV6 && optname == IPT_SO_SET_REPLACE)
-		return do_netfilter_replace(fd, level, optname,
-					    optval, optlen);
-
 	if (optlen < 0)
 		return -EINVAL;
 
@@ -903,6 +798,9 @@ asmlinkage long compat_sys_socketcall(in
 		ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3],
 					  compat_ptr(a[4]));
 		break;
+	case SYS_ACCEPT4:
+		ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff -upr kernel-2.6.18-417.el5.orig/net/core/datagram.c kernel-2.6.18-417.el5-028stab121/net/core/datagram.c
--- kernel-2.6.18-417.el5.orig/net/core/datagram.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/datagram.c	2017-01-13 08:40:17.000000000 -0500
@@ -56,6 +56,8 @@
 #include <net/sock.h>
 #include <net/tcp_states.h>
 
+#include <ub/ub_net.h>
+
 /*
  *	Is a socket 'connection oriented' ?
  */
@@ -551,6 +553,9 @@ int skb_copy_and_csum_datagram_iovec(str
 	unsigned int csum;
 	int chunk = skb->len - hlen;
 
+	if (!chunk)
+		return 0;
+
 	/* Skip filled elements.
 	 * Pretty silly, look at memcpy_toiovec, though 8)
 	 */
@@ -600,6 +605,7 @@ unsigned int datagram_poll(struct file *
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask;
+	int no_ubc_space;
 
 	sock_poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
@@ -609,8 +615,14 @@ unsigned int datagram_poll(struct file *
 		mask |= POLLERR;
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP;
-	if (sk->sk_shutdown == SHUTDOWN_MASK)
+	if (sk->sk_shutdown == SHUTDOWN_MASK) {
+		no_ubc_space = 0;
 		mask |= POLLHUP;
+	} else {
+		no_ubc_space = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
+		if (no_ubc_space)
+			ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
+	}
 
 	/* readable? */
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
@@ -627,7 +639,7 @@ unsigned int datagram_poll(struct file *
 	}
 
 	/* writable? */
-	if (sock_writeable(sk))
+	if (!no_ubc_space && sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
diff -upr kernel-2.6.18-417.el5.orig/net/core/dev.c kernel-2.6.18-417.el5-028stab121/net/core/dev.c
--- kernel-2.6.18-417.el5.orig/net/core/dev.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/dev.c	2017-01-13 08:40:26.000000000 -0500
@@ -126,8 +126,9 @@
 #include <trace/napi.h>
 #include <trace/net.h>
 
-#ifdef CONFIG_XEN
+#if defined(CONFIG_XEN) || defined(CONFIG_VE)
 #include <net/ip.h>
+#include <net/ipv6.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #endif
@@ -138,6 +139,9 @@
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+
 /*
  *	The list of packet types we will receive (as opposed to discard)
  *	and the routines to invoke.
@@ -190,25 +194,40 @@ static struct list_head gro_ptype_base[1
  * unregister_netdevice(), which must be called with the rtnl
  * semaphore held.
  */
+#ifdef CONFIG_VE
+#define dev_tail	(get_exec_env()->_net_dev_tail)
+#else
 struct net_device *dev_base;
 static struct net_device **dev_tail = &dev_base;
+EXPORT_SYMBOL(dev_base);
+#endif
 DEFINE_RWLOCK(dev_base_lock);
 
-EXPORT_SYMBOL(dev_base);
 EXPORT_SYMBOL(dev_base_lock);
 
+#ifdef CONFIG_VE
+#define MAX_UNMOVABLE_NETDEVICES (8*4096)
+static uint8_t unmovable_ifindex_list[MAX_UNMOVABLE_NETDEVICES/8];
+static LIST_HEAD(dev_global_list);
+#endif
+
 #define NETDEV_HASHBITS	8
 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 
-static inline struct hlist_head *dev_name_hash(const char *name)
+struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env)
 {
-	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+	unsigned hash;
+	if (!ve_is_super(env))
+		return visible_dev_head(env);
+	hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 }
 
-static inline struct hlist_head *dev_index_hash(int ifindex)
+struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env)
 {
+	if (!ve_is_super(env))
+		return visible_dev_index_head(env);
 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 }
 
@@ -224,17 +243,6 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
  */
 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
 
-#ifdef CONFIG_SYSFS
-extern int netdev_sysfs_init(void);
-extern int netdev_register_sysfs(struct net_device *);
-extern void netdev_unregister_sysfs(struct net_device *);
-#else
-#define netdev_sysfs_init()	 	(0)
-#define netdev_register_sysfs(dev)	(0)
-#define	netdev_unregister_sysfs(dev)	do { } while(0)
-#endif
-
-
 /*******************************************************************************
 
 		Protocol management and registration routines
@@ -530,7 +538,7 @@ struct net_device *__dev_get_by_name(con
 {
 	struct hlist_node *p;
 
-	hlist_for_each(p, dev_name_hash(name)) {
+	hlist_for_each(p, dev_name_hash(name, get_exec_env())) {
 		struct net_device *dev
 			= hlist_entry(p, struct net_device, name_hlist);
 		if (!strncmp(dev->name, name, IFNAMSIZ))
@@ -563,6 +571,32 @@ struct net_device *dev_get_by_name(const
 }
 
 /**
+ *	__dev_global_get_by_name - find a device by its name in dev_global_list
+ *	@name: name to find
+ *
+ *	Find an interface by name. Must be called under RTNL semaphore
+ *	If the name is found a pointer to the device
+ *	is returned. If the name is not found then %NULL is returned. The
+ *	reference counters are not incremented so the caller must be
+ *	careful with locks.
+ */
+
+#ifdef CONFIG_VE
+struct net_device *__dev_global_get_by_name(const char *name)
+{
+	struct net_device *dev;
+	/* It's called relatively rarely */
+	list_for_each_entry(dev, &dev_global_list, dev_global_list_entry) {
+		if (strncmp(dev->name, name, IFNAMSIZ) == 0)
+			return dev;
+	}
+	return NULL;
+}
+#else	/* CONFIG_VE */
+#define __dev_global_get_by_name(name)		__dev_get_by_name(name)
+#endif	/* CONFIG_VE */
+
+/**
  *	__dev_get_by_index - find a device by its ifindex
  *	@ifindex: index of device
  *
@@ -577,7 +611,7 @@ struct net_device *__dev_get_by_index(in
 {
 	struct hlist_node *p;
 
-	hlist_for_each(p, dev_index_hash(ifindex)) {
+	hlist_for_each(p, dev_index_hash(ifindex, get_exec_env())) {
 		struct net_device *dev
 			= hlist_entry(p, struct net_device, index_hlist);
 		if (dev->ifindex == ifindex)
@@ -704,6 +738,23 @@ int dev_valid_name(const char *name)
 	return 1;
 }
 
+static inline void __dev_check_name(const char *dev_name, const char *name, 
+		long *inuse, const int max_netdevices)
+{
+	int i = 0;
+	char buf[IFNAMSIZ];
+
+	if (!sscanf(dev_name, name, &i))
+		return;
+	if (i < 0 || i >= max_netdevices)
+		return;
+
+	/* avoid cases where sscanf is not exact inverse of printf */
+	snprintf(buf, sizeof(buf), name, i);
+	if (!strncmp(buf, dev_name, IFNAMSIZ))
+		set_bit(i, inuse);
+}
+
 /**
  *	dev_alloc_name - allocate a name for a device
  *	@dev: device
@@ -742,16 +793,20 @@ int dev_alloc_name(struct net_device *de
 		if (!inuse)
 			return -ENOMEM;
 
-		for (d = dev_base; d; d = d->next) {
-			if (!sscanf(d->name, name, &i))
-				continue;
-			if (i < 0 || i >= max_netdevices)
-				continue;
-
-			/*  avoid cases where sscanf is not exact inverse of printf */
-			snprintf(buf, sizeof(buf), name, i);
-			if (!strncmp(buf, d->name, IFNAMSIZ))
-				set_bit(i, inuse);
+#ifdef CONFIG_VE
+		if (ve_is_super(get_exec_env())) {
+			list_for_each_entry(d, &dev_global_list, 
+					dev_global_list_entry) {
+				__dev_check_name(d->name, name, inuse, 
+						max_netdevices);
+			}
+		} else
+#endif
+		{
+			for (d = dev_base; d; d = d->next) {
+				__dev_check_name(d->name, name, inuse, 
+						max_netdevices);
+			}
 		}
 
 		i = find_first_zero_bit(inuse, max_netdevices);
@@ -759,7 +814,11 @@ int dev_alloc_name(struct net_device *de
 	}
 
 	snprintf(buf, sizeof(buf), name, i);
-	if (!__dev_get_by_name(buf)) {
+	if (ve_is_super(get_exec_env()))
+		d = __dev_global_get_by_name(buf);
+	else
+		d = __dev_get_by_name(buf);
+	if (d == NULL) {
 		strlcpy(dev->name, buf, IFNAMSIZ);
 		return i;
 	}
@@ -792,13 +851,14 @@ int dev_change_name(struct net_device *d
 	if (!dev_valid_name(newname))
 		return -EINVAL;
 
+	/* Rename of devices in VE is prohibited by CAP_NET_ADMIN */
 	if (strchr(newname, '%')) {
 		err = dev_alloc_name(dev, newname);
 		if (err < 0)
 			return err;
 		strcpy(newname, dev->name);
 	}
-	else if (__dev_get_by_name(newname))
+	else if (__dev_global_get_by_name(newname))
 		return -EEXIST;
 	else
 		strlcpy(dev->name, newname, IFNAMSIZ);
@@ -806,7 +866,8 @@ int dev_change_name(struct net_device *d
 	err = class_device_rename(&dev->class_dev, dev->name);
 	if (!err) {
 		hlist_del(&dev->name_hlist);
-		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name,
+					get_exec_env()));
 		raw_notifier_call_chain(&netdev_chain,
 				NETDEV_CHANGENAME, dev);
 	}
@@ -837,9 +898,11 @@ EXPORT_SYMBOL(netdev_features_change);
 void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
+		struct ve_struct *vesave = set_exec_env(dev->owner_env);
 		raw_notifier_call_chain(&netdev_chain,
 				NETDEV_CHANGE, dev);
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		set_exec_env(vesave);
 	}
 }
 
@@ -1439,6 +1502,7 @@ gso:
 
 		skb->next = nskb->next;
 		nskb->next = NULL;
+
 		rc = dev->hard_start_xmit(nskb, dev);
 		trace_net_dev_xmit(skb, rc);
 		if (unlikely(rc)) {
@@ -1470,45 +1534,103 @@ out_kfree_skb:
 	}						\
 }
 
-#ifdef CONFIG_XEN
-inline int skb_checksum_setup(struct sk_buff *skb)
+#if defined(CONFIG_XEN) || defined(CONFIG_VE)
+static inline int skb_setup_csum_ptr(struct sk_buff *skb, int proto)
 {
-	if (skb->proto_csum_blank) {
-		if (skb->protocol != htons(ETH_P_IP))
-			goto out;
-		if (skb->data < skb->nh.raw + sizeof(*skb->nh.iph) &&
-		    !pskb_may_pull(skb, skb->nh.raw + sizeof(*skb->nh.iph) -
-					skb->data))
-			goto out;
-		skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
-		switch (skb->nh.iph->protocol) {
-		case IPPROTO_TCP:
-			skb->csum = offsetof(struct tcphdr, check);
-			break;
-		case IPPROTO_UDP:
-			skb->csum = offsetof(struct udphdr, check);
-			break;
-		default:
-			if (net_ratelimit())
-				printk(KERN_ERR "Attempting to checksum a non-"
-				       "TCP/UDP packet, dropping a protocol"
-				       " %d packet", skb->nh.iph->protocol);
-			goto out;
-		}
-		if (skb->data < skb->h.raw + skb->csum + 2 &&
-		    !pskb_may_pull(skb, skb->h.raw + skb->csum + 2 - skb->data))
-			goto out;
-		skb->ip_summed = CHECKSUM_HW;
-		skb->proto_csum_blank = 0;
+	switch (proto) {
+	case IPPROTO_TCP:
+		skb->csum = offsetof(struct tcphdr, check);
+		break;
+	case IPPROTO_UDP:
+		skb->csum = offsetof(struct udphdr, check);
+		break;
+	default:
+		if (net_ratelimit())
+			printk(KERN_ERR "Attempting to checksum a non-"
+			       "TCP/UDP packet, dropping a protocol"
+			       " %d packet", proto);
+		goto out;
 	}
+	if (unlikely(skb->data < skb->h.raw + skb->csum + 2 &&
+	    !pskb_may_pull(skb, skb->h.raw + skb->csum + 2 - skb->data)))
+		goto out;
+	skb->ip_summed = CHECKSUM_HW;
+	skb->proto_csum_blank = 0;
 	return 0;
 out:
 	return -EPROTO;
 }
+
+static inline int skb_checksum_setup_v4(struct sk_buff *skb)
+{
+	if (unlikely(skb->data < skb->nh.raw + sizeof(*skb->nh.iph) &&
+	    !pskb_may_pull(skb, skb->nh.raw + sizeof(*skb->nh.iph) -
+				skb->data)))
+		return -EPROTO;
+	skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
+	return skb_setup_csum_ptr(skb, skb->nh.iph->protocol);
+}
+
+static inline int skb_checksum_setup_v6(struct sk_buff *skb)
+{
+	int ptr;
+	u8 nexthdr;
+	if (unlikely(skb->data < skb->nh.raw + sizeof(*skb->nh.ipv6h) &&
+	    !pskb_may_pull(skb, skb->nh.raw + sizeof(*skb->nh.ipv6h) -
+				skb->data)))
+		return -EPROTO;
+
+	ptr = (u8*)(skb->nh.ipv6h + 1) - skb->data;
+	nexthdr = skb->nh.ipv6h->nexthdr;
+
+	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
+	if (unlikely(ptr < 0))
+		return -EPROTO;
+
+	skb->h.raw = skb->data + ptr;
+	return skb_setup_csum_ptr(skb, nexthdr);
+}
+
+inline int skb_checksum_setup(struct sk_buff *skb)
+{
+	if (skb->proto_csum_blank) {
+		if (skb->protocol == htons(ETH_P_IP))
+			return skb_checksum_setup_v4(skb);
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			return skb_checksum_setup_v6(skb);
+		return -EPROTO;
+	}
+	return 0;
+}
 #else
 inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
 #endif
 
+#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
+#include "../bridge/br_private.h"
+
+static __inline__ struct net_device *
+bridge_check(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_bridge_port *port;
+
+	port = rcu_dereference(dev->br_port);
+	if (port == NULL)
+		return dev;
+	if (skb->brmark == BR_ALREADY_SEEN ||
+	    !(port->br->via_phys_dev &&	dev == port->br->master_dev))
+		return dev;
+
+	dev = port->br->dev;
+	do {
+		skb->dev = dev;
+	} while ((skb = skb->next) != NULL);
+	return dev;
+}
+#else
+#define bridge_check(skb, dev)	(dev)
+#endif
+
 
 /**
  *	dev_queue_xmit - transmit a buffer
@@ -1577,6 +1699,8 @@ int dev_queue_xmit(struct sk_buff *skb)
 	      		goto out_kfree_skb;
 
 gso:
+	dev = bridge_check(skb, dev);
+
 	spin_lock_prefetch(&dev->queue_lock);
 
 	/* Disable soft irqs for various locks below. Also 
@@ -1602,6 +1726,34 @@ gso:
 #endif
 	trace_net_dev_queue(skb);
 	if (q->enqueue) {
+		/*
+		 * XXX this code is broken:
+		 *  1) it is activated for normal devices in VE0,
+		 *  2) it doesn't use API functions like ub_skb_set_charge,
+		 *  3) it isn't allowed to charge skb as UB_OTHERSOCKBUF
+		 *     if its socket is TCP.
+		 */
+#if 0
+		struct user_beancounter *ub;
+
+		ub = netdev_bc(dev)->exec_ub;
+		/* the skb CAN be already charged if it transmitted via
+		 * something like bonding device */
+		if (ub && (skb_bc(skb)->resource == 0)) {
+			unsigned long chargesize;
+			chargesize = skb_charge_fullsize(skb);
+			if (charge_beancounter(ub, UB_OTHERSOCKBUF,
+						chargesize, UB_SOFT)) {
+				rcu_read_unlock();
+				rc = -ENOMEM;
+				goto out_kfree_skb;
+			}
+			skb_bc(skb)->ub = ub;
+			skb_bc(skb)->charged = chargesize;
+			skb_bc(skb)->resource = UB_OTHERSOCKBUF;
+		}
+#endif
+
 		/* Grab device queue */
 		spin_lock(&dev->queue_lock);
 		q = dev->qdisc;
@@ -1899,6 +2051,7 @@ int netif_receive_skb(struct sk_buff *sk
 	struct net_device *orig_or_bond;
 	int ret = NET_RX_DROP;
 	unsigned short type;
+	struct ve_struct *old_env;
 
 	trace_net_dev_receive(skb);
 
@@ -1927,6 +2080,17 @@ int netif_receive_skb(struct sk_buff *sk
 	skb->h.raw = skb->nh.raw = skb->data;
 	skb->mac_len = skb->nh.raw - skb->mac.raw;
 
+#ifdef CONFIG_VE
+	/*
+	 * Skb might be alloced in another VE context, than its device works.
+	 * So, set the correct owner_env.
+	 */
+	skb->owner_env = skb->dev->owner_env;
+	BUG_ON(skb->owner_env == NULL);
+#endif
+
+	old_env = set_exec_env(skb->owner_env);
+
 	pt_prev = NULL;
 
 	rcu_read_lock();
@@ -1938,7 +2102,7 @@ int netif_receive_skb(struct sk_buff *sk
 	}
 #endif
 
-#ifdef CONFIG_XEN
+#if defined(CONFIG_XEN) || defined(CONFIG_VE)
 	switch (skb->ip_summed) {
 	case CHECKSUM_UNNECESSARY:
 		skb->proto_data_valid = 1;
@@ -2030,6 +2194,7 @@ ncls:
 
 out:
 	rcu_read_unlock();
+	(void)set_exec_env(old_env);
 	return ret;
 }
 
@@ -2754,7 +2919,7 @@ static int __init dev_proc_init(void)
 {
 	int rc = -ENOMEM;
 
-	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
+	if (!proc_glob_fops_create("net/dev", S_IRUGO, &dev_seq_fops))
 		goto out;
 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
 		goto out_dev;
@@ -2766,7 +2931,7 @@ out:
 out_softnet:
 	proc_net_remove("softnet_stat");
 out_dev:
-	proc_net_remove("dev");
+	remove_proc_glob_entry("net/dev", NULL);
 	goto out;
 }
 #else
@@ -2831,9 +2996,12 @@ void dev_set_promiscuity(struct net_devi
 		dev->flags &= ~IFF_PROMISC;
 	else
 		dev->flags |= IFF_PROMISC;
+	/* Promiscous mode on these devices does not mean anything */
+	if (dev->flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
+		return;
 	if (dev->flags != old_flags) {
 		dev_mc_upload(dev);
-		printk(KERN_INFO "device %s %s promiscuous mode\n",
+		ve_printk(VE_LOG, KERN_INFO "device %s %s promiscuous mode\n",
 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
 		       					       "left");
 		audit_log(current->audit_context, GFP_ATOMIC,
@@ -3262,16 +3430,25 @@ int dev_ioctl(unsigned int cmd, void __u
 		 *	- require strict serialization.
 		 *	- do not return a value
 		 */
+		case SIOCSIFMTU:
+		case SIOCSIFHWADDR:
 		case SIOCSIFFLAGS:
+		case SIOCSIFTXQLEN:
+			if (!capable(CAP_NET_ADMIN) &&
+			    !capable(CAP_VE_NET_ADMIN))
+				return -EPERM;
+			dev_load(ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ifsioc(&ifr, cmd);
+			rtnl_unlock();
+			return ret;
+
 		case SIOCSIFMETRIC:
-		case SIOCSIFMTU:
 		case SIOCSIFMAP:
-		case SIOCSIFHWADDR:
 		case SIOCSIFSLAVE:
 		case SIOCADDMULTI:
 		case SIOCDELMULTI:
 		case SIOCSIFHWBROADCAST:
-		case SIOCSIFTXQLEN:
 		case SIOCSMIIREG:
 		case SIOCBONDENSLAVE:
 		case SIOCBONDRELEASE:
@@ -3347,20 +3524,72 @@ int dev_ioctl(unsigned int cmd, void __u
  *	dev_new_index	-	allocate an ifindex
  *
  *	Returns a suitable unique value for a new device interface
- *	number.  The caller must hold the rtnl semaphore or the
+ *	number. The caller must hold the rtnl semaphore or the
  *	dev_base_lock to be sure it remains unique.
+ *
+ *	Note: dev->name must be valid on entrance
  */
-static int dev_new_index(void)
+static int dev_ve_new_index(void)
 {
-	static int ifindex;
+#ifdef CONFIG_VE
+	int *ifindex = &get_exec_env()->ifindex;
+	int delta = 2;
+#else
+	static int s_ifindex;
+	int *ifindex = &s_ifindex;
+	int delta = 1;
+#endif
 	for (;;) {
-		if (++ifindex <= 0)
-			ifindex = 1;
-		if (!__dev_get_by_index(ifindex))
-			return ifindex;
+		*ifindex += delta;
+		if (*ifindex <= 0)
+			*ifindex = 1;
+		if (!__dev_get_by_index(*ifindex))
+			return *ifindex;
 	}
 }
 
+#ifdef CONFIG_VE
+static int dev_glb_new_index(void)
+{
+	int i;
+
+	i = find_first_zero_bit((long*)unmovable_ifindex_list, 
+			MAX_UNMOVABLE_NETDEVICES);
+
+	if (i == MAX_UNMOVABLE_NETDEVICES)
+		return -EMFILE;
+
+	__set_bit(i, (long*)unmovable_ifindex_list);
+	return (i + 1) * 2;
+}
+#endif
+
+static void dev_glb_free_index(struct net_device *dev)
+{
+#ifdef CONFIG_VE
+	int bit;
+
+	bit = dev->ifindex / 2 - 1;
+	BUG_ON(bit >= MAX_UNMOVABLE_NETDEVICES);
+	__clear_bit(bit, (long*)unmovable_ifindex_list);
+#endif
+}
+
+static int dev_new_index(struct net_device *dev)
+{
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
+		return dev_glb_new_index();
+#endif
+
+	return dev_ve_new_index();
+}
+
+static void dev_free_index(struct net_device *dev)
+{
+	if ((dev->ifindex % 2) == 0)
+		dev_glb_free_index(dev);
+}
 
 struct netdev_lro_entry {
 	struct list_head list;
@@ -3558,6 +3787,10 @@ int register_netdevice(struct net_device
 	/* When net_device's are persistent, this will be fatal. */
 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
 
+	ret = -EPERM;
+	if (!ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
+		goto out;
+
 	spin_lock_init(&dev->queue_lock);
 	spin_lock_init(&dev->_xmit_lock);
 	dev->xmit_lock_owner = -1;
@@ -3577,27 +3810,32 @@ int register_netdevice(struct net_device
 		if (ret) {
 			if (ret > 0)
 				ret = -EIO;
-			goto out_err;
+			goto out_free_div;
 		}
 	}
  
 	if (!dev_valid_name(dev->name)) {
 		ret = -EINVAL;
-		goto out_err;
+		goto out_free_div;
+	}
+
+	dev->ifindex = dev_new_index(dev);
+	if (dev->ifindex < 0) {
+		ret = dev->ifindex;
+		goto out_free_div;
 	}
 
-	dev->ifindex = dev_new_index();
 	if (dev->iflink == -1)
 		dev->iflink = dev->ifindex;
 
 	/* Check for existence of name */
-	head = dev_name_hash(dev->name);
+	head = dev_name_hash(dev->name, get_exec_env());
 	hlist_for_each(p, head) {
 		struct net_device *d
 			= hlist_entry(p, struct net_device, name_hlist);
 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
 			ret = -EEXIST;
- 			goto out_err;
+ 			goto out_free_ind;
 		}
 	}
 
@@ -3611,9 +3849,9 @@ int register_netdevice(struct net_device
 	if (!dev->rebuild_header)
 		dev->rebuild_header = default_rebuild_header;
 
-	ret = netdev_register_sysfs(dev);
+	ret = netdev_register_sysfs(dev, 0);
 	if (ret)
-		goto out_err;
+		goto out_free_ind;
 	dev->reg_state = NETREG_REGISTERED;
 
 	/*
@@ -3624,12 +3862,21 @@ int register_netdevice(struct net_device
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 
 	dev->next = NULL;
+	dev->owner_env = get_exec_env();
+	netdev_bc(dev)->owner_ub = get_beancounter(get_exec_ub());
+	netdev_bc(dev)->exec_ub = get_beancounter(get_exec_ub());
 	dev_init_scheduler(dev);
+#ifdef CONFIG_VE
+	INIT_LIST_HEAD(&dev->dev_global_list_entry);
+	if (ve_is_super(get_exec_env()))
+		list_add_tail(&dev->dev_global_list_entry, &dev_global_list);
+#endif
 	write_lock_bh(&dev_base_lock);
 	*dev_tail = dev;
 	dev_tail = &dev->next;
 	hlist_add_head(&dev->name_hlist, head);
-	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
+	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, 
+						get_exec_env()));
 	dev_hold(dev);
 	write_unlock_bh(&dev_base_lock);
 
@@ -3640,7 +3887,9 @@ int register_netdevice(struct net_device
 
 out:
 	return ret;
-out_err:
+out_free_ind:
+	dev_free_index(dev);
+out_free_div:
 	free_divert_blk(dev);
 	goto out;
 }
@@ -3690,6 +3939,39 @@ out:
 }
 EXPORT_SYMBOL(register_netdev);
 
+static LIST_HEAD(dev_cpt_operations);
+
+void register_dev_cpt_ops(struct dev_cpt_ops *ops)
+{
+	rtnl_lock();
+	list_add_tail(&ops->list, &dev_cpt_operations);
+	__rtnl_unlock();
+}
+EXPORT_SYMBOL(register_dev_cpt_ops);
+
+void unregister_dev_cpt_ops(struct dev_cpt_ops *ops)
+{
+	rtnl_lock();
+	list_del(&ops->list);
+	__rtnl_unlock();
+}
+EXPORT_SYMBOL(unregister_dev_cpt_ops);
+
+struct dev_cpt_ops *dev_cpt_ops_get(int cpt_object,
+					struct dev_cpt_ops *ops)
+{
+	ASSERT_RTNL();
+
+	ops = list_prepare_entry(ops, &dev_cpt_operations, list);
+
+	list_for_each_entry_continue(ops, &dev_cpt_operations, list)
+		if (ops->cpt_object == cpt_object)
+			return ops;
+
+	return NULL;
+}
+EXPORT_SYMBOL(dev_cpt_ops_get);
+
 /*
  * netdev_wait_allrefs - wait until all references are gone.
  *
@@ -3769,12 +4051,14 @@ static void netdev_wait_allrefs(struct n
 void netdev_run_todo(void)
 {
 	struct list_head list;
+	struct ve_struct *current_env;
 
 	/* Snapshot list, allow later requests */
 	list_replace_init(&net_todo_list, &list);
 
 	__rtnl_unlock();
 
+	current_env = get_exec_env();
 	while (!list_empty(&list)) {
 		struct net_device *dev
 			= list_entry(list.next, struct net_device, todo_list);
@@ -3787,6 +4071,7 @@ void netdev_run_todo(void)
 			continue;
 		}
 
+		(void)set_exec_env(dev->owner_env);
 		dev->reg_state = NETREG_UNREGISTERED;
 
 		netdev_wait_allrefs(dev);
@@ -3797,12 +4082,18 @@ void netdev_run_todo(void)
 		BUG_TRAP(!dev->ip6_ptr);
 		BUG_TRAP(!dev->dn_ptr);
 
+		put_beancounter(netdev_bc(dev)->exec_ub);
+		put_beancounter(netdev_bc(dev)->owner_ub);
+		netdev_bc(dev)->exec_ub = NULL;
+		netdev_bc(dev)->owner_ub = NULL;
+
 		if (dev->destructor)
 			dev->destructor(dev);
 
 		/* Free network device */
 		class_device_put(&dev->class_dev);
 	}
+	(void)set_exec_env(current_env);
 }
 
 /**
@@ -3831,7 +4122,7 @@ struct net_device *alloc_netdev(int size
 	alloc_size += (sizeof_priv + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
 	alloc_size += sizeof(struct net_device_extended) + NETDEV_ALIGN_CONST;
 
-	p = kzalloc(alloc_size, GFP_KERNEL);
+	p = ub_kzalloc(alloc_size, GFP_KERNEL);
 	if (!p) {
 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
 		return NULL;
@@ -3928,6 +4219,10 @@ int unregister_netdevice(struct net_devi
 				dev_tail = dp;
 			*dp = d->next;
 			write_unlock_bh(&dev_base_lock);
+#ifdef CONFIG_VE
+			if (!list_empty(&dev->dev_global_list_entry))
+				list_del(&dev->dev_global_list_entry);
+#endif
 			break;
 		}
 	}
@@ -3961,10 +4256,12 @@ int unregister_netdevice(struct net_devi
 	/* Notifier chain MUST detach us from master device. */
 	BUG_TRAP(!dev->master);
 
+	dev_free_index(dev);
+
 	free_divert_blk(dev);
 
 	/* Remove entries from sysfs */
-	netdev_unregister_sysfs(dev);
+	netdev_unregister_sysfs(dev, 0);
 
 	/* Finish processing unregister after unlock */
 	net_set_todo(dev);
@@ -4083,7 +4380,7 @@ unsigned long netdev_increment_features(
 	one |= NETIF_F_ALL_CSUM;
 
 	one |= all & NETIF_F_ONE_FOR_ALL;
-	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
+	all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_VIRTUAL;
 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
 
 	return all;
@@ -4160,6 +4457,8 @@ EXPORT_SYMBOL(dev_close);
 EXPORT_SYMBOL(dev_get_by_flags);
 EXPORT_SYMBOL(dev_get_by_index);
 EXPORT_SYMBOL(dev_get_by_name);
+EXPORT_SYMBOL(dev_name_hash);
+EXPORT_SYMBOL(dev_index_hash);
 EXPORT_SYMBOL(dev_open);
 EXPORT_SYMBOL(dev_queue_xmit);
 EXPORT_SYMBOL(dev_remove_pack);
diff -upr kernel-2.6.18-417.el5.orig/net/core/dev_mcast.c kernel-2.6.18-417.el5-028stab121/net/core/dev_mcast.c
--- kernel-2.6.18-417.el5.orig/net/core/dev_mcast.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/dev_mcast.c	2017-01-13 08:40:19.000000000 -0500
@@ -289,9 +289,10 @@ static struct file_operations dev_mc_seq
 
 void __init dev_mcast_init(void)
 {
-	proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
+	proc_glob_fops_create("net/dev_mcast", 0, &dev_mc_seq_fops);
 }
 
 EXPORT_SYMBOL(dev_mc_add);
 EXPORT_SYMBOL(dev_mc_delete);
 EXPORT_SYMBOL(dev_mc_upload);
+EXPORT_SYMBOL(dev_mc_discard);
diff -upr kernel-2.6.18-417.el5.orig/net/core/dst.c kernel-2.6.18-417.el5-028stab121/net/core/dst.c
--- kernel-2.6.18-417.el5.orig/net/core/dst.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/dst.c	2017-01-13 08:40:21.000000000 -0500
@@ -259,11 +259,14 @@ static int dst_dev_event(struct notifier
 	switch (event) {
 	case NETDEV_UNREGISTER:
 	case NETDEV_DOWN:
-		spin_lock_bh(&dst_lock);
+		local_bh_disable();
+		dst_run_gc(0);
+		spin_lock(&dst_lock);
 		for (dst = dst_garbage_list; dst; dst = dst->next) {
 			dst_ifdown(dst, dev, event != NETDEV_DOWN);
 		}
-		spin_unlock_bh(&dst_lock);
+		spin_unlock(&dst_lock);
+		local_bh_enable();
 		break;
 	}
 	return NOTIFY_DONE;
diff -upr kernel-2.6.18-417.el5.orig/net/core/dv.c kernel-2.6.18-417.el5-028stab121/net/core/dv.c
--- kernel-2.6.18-417.el5.orig/net/core/dv.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/dv.c	2017-01-13 08:40:16.000000000 -0500
@@ -544,3 +544,5 @@ void divert_frame(struct sk_buff *skb)
 		break;
 	}
 }
+
+EXPORT_SYMBOL(free_divert_blk);
diff -upr kernel-2.6.18-417.el5.orig/net/core/ethtool.c kernel-2.6.18-417.el5-028stab121/net/core/ethtool.c
--- kernel-2.6.18-417.el5.orig/net/core/ethtool.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/ethtool.c	2017-01-13 08:40:22.000000000 -0500
@@ -839,7 +839,7 @@ int dev_ethtool(struct ifreq *ifr)
 	 * XXX: This can be pushed down into the ethtool_* handlers that
 	 * need it.  Keep existing behaviour for the moment.
 	 */
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
 	if (!dev || !netif_device_present(dev))
diff -upr kernel-2.6.18-417.el5.orig/net/core/fib_rules.c kernel-2.6.18-417.el5-028stab121/net/core/fib_rules.c
--- kernel-2.6.18-417.el5.orig/net/core/fib_rules.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/fib_rules.c	2017-01-13 08:40:41.000000000 -0500
@@ -14,8 +14,13 @@
 #include <linux/list.h>
 #include <net/fib_rules.h>
 
-static LIST_HEAD(rules_ops);
+static LIST_HEAD(_rules_ops);
 static DEFINE_SPINLOCK(rules_mod_lock);
+#ifdef CONFIG_VE
+#define rules_ops  (get_exec_env()->_rules_ops)
+#else
+#define rules_ops  _rules_ops
+#endif
 
 static void notify_rule_change(int event, struct fib_rule *rule,
 			       struct fib_rules_ops *ops);
@@ -76,7 +81,7 @@ static void cleanup_ops(struct fib_rules
 {
 	struct fib_rule *rule, *tmp;
 
-	list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
+	list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
 		list_del_rcu(&rule->list);
 		fib_rule_put(rule);
 	}
@@ -115,7 +120,7 @@ int fib_rules_lookup(struct fib_rules_op
 
 	rcu_read_lock();
 
-	list_for_each_entry_rcu(rule, ops->rules_list, list) {
+	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
 		if (rule->ifindex && (rule->ifindex != fl->iif))
 			continue;
 
@@ -163,7 +168,7 @@ int fib_nl_newrule(struct sk_buff *skb, 
 	if (tb[FRA_IFNAME] && nla_len(tb[FRA_IFNAME]) > IFNAMSIZ)
 		goto errout;
 
-	rule = kzalloc(ops->rule_size, GFP_KERNEL);
+	rule = kzalloc(ops->rule_size, GFP_KERNEL_UBC);
 	if (rule == NULL) {
 		err = -ENOMEM;
 		goto errout;
@@ -196,7 +201,7 @@ int fib_nl_newrule(struct sk_buff *skb, 
 	if (err < 0)
 		goto errout_free;
 
-	list_for_each_entry(r, ops->rules_list, list) {
+	list_for_each_entry(r, &ops->rules_list, list) {
 		if (r->pref > rule->pref)
 			break;
 		last = r;
@@ -207,7 +212,7 @@ int fib_nl_newrule(struct sk_buff *skb, 
 	if (last)
 		list_add_rcu(&rule->list, &last->list);
 	else
-		list_add_rcu(&rule->list, ops->rules_list);
+		list_add_rcu(&rule->list, &ops->rules_list);
 
 	notify_rule_change(RTM_NEWRULE, rule, ops);
 	rules_ops_put(ops);
@@ -241,7 +246,7 @@ int fib_nl_delrule(struct sk_buff *skb, 
 	if (err < 0)
 		goto errout;
 
-	list_for_each_entry(rule, ops->rules_list, list) {
+	list_for_each_entry(rule, &ops->rules_list, list) {
 		if (frh->action && (frh->action != rule->action))
 			continue;
 
@@ -323,7 +328,7 @@ int fib_rules_dump(struct sk_buff *skb, 
 		return -EAFNOSUPPORT;
 
 	rcu_read_lock();
-	list_for_each_entry(rule, ops->rules_list, list) {
+	list_for_each_entry(rule, &ops->rules_list, list) {
 		if (idx < cb->args[0])
 			goto skip;
 
@@ -391,12 +396,12 @@ static int fib_rules_event(struct notifi
 	switch (event) {
 	case NETDEV_REGISTER:
 		list_for_each_entry(ops, &rules_ops, list)
-			attach_rules(ops->rules_list, dev);
+			attach_rules(&ops->rules_list, dev);
 		break;
 
 	case NETDEV_UNREGISTER:
 		list_for_each_entry(ops, &rules_ops, list)
-			detach_rules(ops->rules_list, dev);
+			detach_rules(&ops->rules_list, dev);
 		break;
 	}
 
diff -upr kernel-2.6.18-417.el5.orig/net/core/filter.c kernel-2.6.18-417.el5-028stab121/net/core/filter.c
--- kernel-2.6.18-417.el5.orig/net/core/filter.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/filter.c	2017-01-13 08:40:16.000000000 -0500
@@ -413,7 +413,7 @@ int sk_attach_filter(struct sock_fprog *
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL_UBC);
 	if (!fp)
 		return -ENOMEM;
 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
diff -upr kernel-2.6.18-417.el5.orig/net/core/neighbour.c kernel-2.6.18-417.el5-028stab121/net/core/neighbour.c
--- kernel-2.6.18-417.el5.orig/net/core/neighbour.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/neighbour.c	2017-01-13 08:40:22.000000000 -0500
@@ -33,6 +33,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/random.h>
 #include <linux/string.h>
+#include <ub/beancounter.h>
 
 #define NEIGH_DEBUG 1
 
@@ -261,6 +262,7 @@ static struct neighbour *neigh_alloc(str
 	int entries;
 
 	entries = atomic_inc_return(&tbl->entries) - 1;
+	n = ERR_PTR(-ENOBUFS);
 	if (entries >= tbl->gc_thresh3 ||
 	    (entries >= tbl->gc_thresh2 &&
 	     time_after(now, tbl->last_flush + 5 * HZ))) {
@@ -271,7 +273,7 @@ static struct neighbour *neigh_alloc(str
 
 	n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
 	if (!n)
-		goto out_entries;
+		goto out_nomem;
 
 	memset(n, 0, tbl->entry_size);
 
@@ -292,6 +294,8 @@ static struct neighbour *neigh_alloc(str
 out:
 	return n;
 
+out_nomem:
+	n = ERR_PTR(-ENOMEM);
 out_entries:
 	atomic_dec(&tbl->entries);
 	goto out;
@@ -404,12 +408,11 @@ struct neighbour *neigh_create(struct ne
 	u32 hash_val;
 	int key_len = tbl->key_len;
 	int error;
-	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+	struct neighbour *n1, *rc, *n;
 
-	if (!n) {
-		rc = ERR_PTR(-ENOBUFS);
+	rc = n = neigh_alloc(tbl);
+	if (IS_ERR(n))
 		goto out;
-	}
 
 	memcpy(n->primary_key, pkey, key_len);
 	n->dev = dev;
@@ -652,6 +655,8 @@ static void neigh_periodic_timer(unsigne
 	struct neigh_table *tbl = (struct neigh_table *)arg;
 	struct neighbour *n, **np;
 	unsigned long expire, now = jiffies;
+	struct ve_struct *env = set_exec_env(tbl->owner_env);
+	struct user_beancounter *ub = set_exec_ub(tbl->owner_ub);
 
 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 
@@ -713,6 +718,8 @@ next_elt:
  	mod_timer(&tbl->gc_timer, now + expire);
 
 	write_unlock(&tbl->lock);
+	set_exec_ub(ub);
+	set_exec_env(env);
 }
 
 static __inline__ int neigh_max_probes(struct neighbour *n)
@@ -753,6 +760,11 @@ static void neigh_timer_handler(unsigned
 	struct neighbour *neigh = (struct neighbour *)arg;
 	unsigned state;
 	int notify = 0;
+	struct ve_struct *env;
+	struct user_beancounter *ub;
+
+	env = set_exec_env(neigh->dev->owner_env);
+	ub = set_exec_ub(netdev_bc(neigh->dev)->exec_ub);
 
 	write_lock(&neigh->lock);
 
@@ -838,6 +850,8 @@ out:
 		neigh_update_notify(neigh);
 
 	neigh_release(neigh);
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(env);
 }
 
 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
@@ -1228,6 +1242,9 @@ static void neigh_proxy_process(unsigned
 	unsigned long now = jiffies;
 	struct sk_buff *skb;
 
+	struct ve_struct *env = set_exec_env(tbl->owner_env);
+	struct user_beancounter *ub = set_exec_ub(tbl->owner_ub);
+
 	spin_lock(&tbl->proxy_queue.lock);
 
 	skb = tbl->proxy_queue.next;
@@ -1239,6 +1256,7 @@ static void neigh_proxy_process(unsigned
 		skb = skb->next;
 		if (tdif <= 0) {
 			struct net_device *dev = back->dev;
+
 			__skb_unlink(back, &tbl->proxy_queue);
 			if (tbl->proxy_redo && netif_running(dev))
 				tbl->proxy_redo(back);
@@ -1246,6 +1264,7 @@ static void neigh_proxy_process(unsigned
 				kfree_skb(back);
 
 			dev_put(dev);
+
 		} else if (!sched_next || tdif < sched_next)
 			sched_next = tdif;
 	}
@@ -1253,6 +1272,8 @@ static void neigh_proxy_process(unsigned
 	if (sched_next)
 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
 	spin_unlock(&tbl->proxy_queue.lock);
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(env);
 }
 
 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
@@ -1348,12 +1369,17 @@ void neigh_parms_destroy(struct neigh_pa
 	kfree(parms);
 }
 
-void neigh_table_init_no_netlink(struct neigh_table *tbl)
+struct lock_class_key neigh_table_proxy_queue_class;
+
+int neigh_table_init_no_netlink(struct neigh_table *tbl)
 {
 	unsigned long now = jiffies;
 	unsigned long phsize;
 
+	atomic_set(&tbl->entries, 0);
+	tbl->hash_chain_gc = 0;
 	atomic_set(&tbl->parms.refcnt, 1);
+	tbl->parms.next = NULL;
 	INIT_RCU_HEAD(&tbl->parms.rcu_head);
 	tbl->parms.reachable_time =
 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
@@ -1361,22 +1387,30 @@ void neigh_table_init_no_netlink(struct 
 	if (!tbl->kmem_cachep)
 		tbl->kmem_cachep = kmem_cache_create(tbl->id,
 						     tbl->entry_size,
-						     0, SLAB_HWCACHE_ALIGN,
+						     0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						     NULL, NULL);
 
 	if (!tbl->kmem_cachep)
-		panic("cannot create neighbour cache");
+		return -ENOMEM;
+
+	tbl->owner_env = get_ve(get_exec_env());
+	tbl->owner_ub = get_beancounter(get_exec_ub());
 
 	tbl->stats = alloc_percpu(struct neigh_statistics);
 	if (!tbl->stats)
-		panic("cannot create neighbour cache statistics");
+		goto out;
 	
 #ifdef CONFIG_PROC_FS
-	tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
-	if (!tbl->pde) 
-		panic("cannot create neighbour proc dir entry");
-	tbl->pde->proc_fops = &neigh_stat_seq_fops;
-	tbl->pde->data = tbl;
+	if (ve_is_super(get_exec_env())) {
+		char name[strlen(tbl->id) + sizeof("net/stat/")];
+		strcpy(name, "net/stat/");
+		strcat(name, tbl->id);
+		tbl->pde = create_proc_glob_entry(name, S_IRUGO, NULL);
+		if (tbl->pde) {
+			tbl->pde->proc_fops = &neigh_stat_seq_fops;
+			tbl->pde->data = tbl;
+		}
+	}
 #endif
 
 	tbl->hash_mask = 1;
@@ -1386,7 +1420,7 @@ void neigh_table_init_no_netlink(struct 
 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
 
 	if (!tbl->hash_buckets || !tbl->phash_buckets)
-		panic("cannot allocate neighbour cache hashes");
+		goto nomem;
 
 	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
 
@@ -1400,19 +1434,44 @@ void neigh_table_init_no_netlink(struct 
 	init_timer(&tbl->proxy_timer);
 	tbl->proxy_timer.data	  = (unsigned long)tbl;
 	tbl->proxy_timer.function = neigh_proxy_process;
-	skb_queue_head_init(&tbl->proxy_queue);
+	skb_queue_head_init_class(&tbl->proxy_queue,
+			&neigh_table_proxy_queue_class);
 
 	tbl->last_flush = now;
 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
+	return 0;
+
+nomem:
+	if (tbl->hash_buckets) {
+		neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
+		tbl->hash_buckets = NULL;
+	}
+	if (tbl->phash_buckets) {
+		kfree(tbl->phash_buckets);
+		tbl->phash_buckets = NULL;
+	}
+	if (tbl->stats) {
+		free_percpu(tbl->stats);
+		tbl->stats = NULL;
+	}
+out:
+	put_beancounter(tbl->owner_ub);
+	put_ve(tbl->owner_env);
+	return -ENOMEM;
 }
 
-void neigh_table_init(struct neigh_table *tbl)
+int neigh_table_init(struct neigh_table *tbl)
 {
 	struct neigh_table *tmp;
+	int err;
 
-	neigh_table_init_no_netlink(tbl);
+	err = neigh_table_init_no_netlink(tbl);
+	if (err)
+		return err;
 	write_lock(&neigh_tbl_lock);
 	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
+		if (!ve_accessible_strict(tmp->owner_env, get_exec_env()))
+			continue;
 		if (tmp->family == tbl->family)
 			break;
 	}
@@ -1425,6 +1484,7 @@ void neigh_table_init(struct neigh_table
 		       "family %d\n", tbl->family);
 		dump_stack();
 	}
+	return 0;
 }
 
 int neigh_table_clear(struct neigh_table *tbl)
@@ -1438,6 +1498,15 @@ int neigh_table_clear(struct neigh_table
 	neigh_ifdown(tbl, NULL);
 	if (atomic_read(&tbl->entries))
 		printk(KERN_CRIT "neighbour leakage\n");
+#ifdef CONFIG_PROC_FS
+	if (ve_is_super(get_exec_env())) {
+		char name[strlen(tbl->id) + sizeof("net/stat/")];
+		strcpy(name, "net/stat/");
+		strcat(name, tbl->id);
+		remove_proc_glob_entry(name, NULL);
+	}
+#endif
+
 	write_lock(&neigh_tbl_lock);
 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
 		if (*tp == tbl) {
@@ -1456,8 +1525,13 @@ int neigh_table_clear(struct neigh_table
 	free_percpu(tbl->stats);
 	tbl->stats = NULL;
 
-	kmem_cache_destroy(tbl->kmem_cachep);
-	tbl->kmem_cachep = NULL;
+	if (tbl->kmem_cachep) {
+		kmem_cache_destroy(tbl->kmem_cachep);
+		tbl->kmem_cachep = NULL;
+	}
+
+	put_beancounter(tbl->owner_ub);
+	put_ve(tbl->owner_env);
 
 	return 0;
 }
@@ -1481,6 +1555,8 @@ int neigh_delete(struct sk_buff *skb, st
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		read_unlock(&neigh_tbl_lock);
 
 		err = -EINVAL;
@@ -1534,6 +1610,8 @@ int neigh_add(struct sk_buff *skb, struc
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		read_unlock(&neigh_tbl_lock);
 
 		err = -EINVAL;
@@ -1766,6 +1844,9 @@ int neightbl_set(struct sk_buff *skb, st
 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
 			continue;
 
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
+
 		if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
 			break;
 	}
@@ -1992,6 +2073,8 @@ int neigh_dump_info(struct sk_buff *skb,
 	s_t = cb->args[0];
 
 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		if (t < s_t || (family && tbl->family != family))
 			continue;
 		if (t > s_t)
@@ -2569,11 +2652,12 @@ int neigh_sysctl_register(struct net_dev
 			  int p_id, int pdev_id, char *p_name, 
 			  proc_handler *handler, ctl_handler *strategy)
 {
-	struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+	struct neigh_sysctl_table *t;
 	const char *dev_name_source = NULL;
 	char *dev_name = NULL;
 	int err = 0;
 
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (!t)
 		return -ENOBUFS;
 	memcpy(t, &neigh_sysctl_template, sizeof(*t));
diff -upr kernel-2.6.18-417.el5.orig/net/core/net-sysfs.c kernel-2.6.18-417.el5-028stab121/net/core/net-sysfs.c
--- kernel-2.6.18-417.el5.orig/net/core/net-sysfs.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/net-sysfs.c	2017-01-13 08:40:22.000000000 -0500
@@ -293,6 +293,28 @@ static struct class_device_attribute net
 	{}
 };
 
+#ifdef CONFIG_VE
+struct class_device_attribute ve_net_class_attributes[] = {
+	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
+	__ATTR(iflink, S_IRUGO, show_iflink, NULL),
+	__ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
+	__ATTR(features, S_IRUGO, show_features, NULL),
+	__ATTR(type, S_IRUGO, show_type, NULL),
+	__ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
+	__ATTR(address, S_IRUGO, show_address, NULL),
+	__ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
+	__ATTR(carrier, S_IRUGO, show_carrier, NULL),
+	__ATTR(dormant, S_IRUGO, show_dormant, NULL),
+	__ATTR(operstate, S_IRUGO, show_operstate, NULL),
+	__ATTR(mtu, S_IRUGO, show_mtu, NULL),
+	__ATTR(flags, S_IRUGO, show_flags, NULL),
+	__ATTR(tx_queue_len, S_IRUGO, show_tx_queue_len, NULL),
+	__ATTR(weight, S_IRUGO, show_weight, NULL),
+	{}
+};
+EXPORT_SYMBOL(ve_net_class_attributes);
+#endif
+
 /* Show a given an attribute in the statistics group */
 static ssize_t netstat_show(const struct class_device *cd, char *buf, 
 			    unsigned long offset)
@@ -442,6 +464,13 @@ static struct attribute *wireless_attrs[
 	&class_device_attr_beacon.attr,
 	NULL
 };
+EXPORT_SYMBOL(net_class);
+
+#ifndef CONFIG_VE
+#define visible_net_class net_class
+#else
+#define visible_net_class (*get_exec_env()->net_class)
+#endif
 
 static struct attribute_group wireless_group = {
 	.name = "wireless",
@@ -485,7 +514,7 @@ static void netdev_release(struct class_
 	kfree((char *)dev - dev->padded);
 }
 
-static struct class net_class = {
+struct class net_class = {
 	.name = "net",
 	.release = netdev_release,
 	.class_dev_attrs = net_class_attributes,
@@ -511,22 +540,29 @@ void netdev_class_remove_file(struct cla
 EXPORT_SYMBOL(netdev_class_create_file);
 EXPORT_SYMBOL(netdev_class_remove_file);
 
-void netdev_unregister_sysfs(struct net_device * net)
+void netdev_unregister_sysfs(struct net_device * net, int move)
 {
 	struct class_device *dev = &(net->class_dev);
 
-	kobject_get(&dev->kobj);
+	if (!move)
+		kobject_get(&dev->kobj);
+
 	class_device_del(&(net->class_dev));
 }
+EXPORT_SYMBOL(netdev_unregister_sysfs);
 
-/* Create sysfs entries for network device. */
-int netdev_register_sysfs(struct net_device *net)
+/* This special implementation is used for moving net device to container */
+int netdev_register_sysfs(struct net_device *net, int move)
 {
 	struct class_device *class_dev = &(net->class_dev);
 	struct attribute_group **groups = net->sysfs_groups;
 
-	class_device_initialize(class_dev);
-	class_dev->class = &net_class;
+	if (move)
+		class_device_virtualize(class_dev);
+	else
+		class_device_initialize(class_dev);
+
+	class_dev->class = &visible_net_class;
 	class_dev->class_data = net;
 	class_dev->groups = groups;
 
@@ -545,8 +581,17 @@ int netdev_register_sysfs(struct net_dev
 
 	return class_device_add(class_dev);
 }
+EXPORT_SYMBOL(netdev_register_sysfs);
+
+void prepare_sysfs_netdev(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->net_class = &net_class;
+#endif
+}
 
 int netdev_sysfs_init(void)
 {
+	prepare_sysfs_netdev();
 	return class_register(&net_class);
 }
diff -upr kernel-2.6.18-417.el5.orig/net/core/pktgen.c kernel-2.6.18-417.el5-028stab121/net/core/pktgen.c
--- kernel-2.6.18-417.el5.orig/net/core/pktgen.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/pktgen.c	2017-01-13 08:40:28.000000000 -0500
@@ -3239,8 +3239,7 @@ static void pktgen_thread_worker(struct 
 
 	/* Migrate to the right CPU */
 	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-	if (smp_processor_id() != cpu)
-		BUG();
+	BUG_ON(task_cpu(current) != cpu);
 
 	init_waitqueue_head(&t->queue);
 
diff -upr kernel-2.6.18-417.el5.orig/net/core/rtnetlink.c kernel-2.6.18-417.el5-028stab121/net/core/rtnetlink.c
--- kernel-2.6.18-417.el5.orig/net/core/rtnetlink.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/rtnetlink.c	2017-01-13 08:40:22.000000000 -0500
@@ -599,6 +599,8 @@ static int rtnetlink_dump_all(struct sk_
 		if (rtnetlink_links[idx] == NULL ||
 		    rtnetlink_links[idx][type].dumpit == NULL)
 			continue;
+		if (vz_security_family_check(idx))
+			continue;
 		if (idx > s_idx)
 			memset(&cb->args[0], 0, sizeof(cb->args));
 		if (rtnetlink_links[idx][type].dumpit(skb, cb))
@@ -666,7 +668,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
 		return 0;
 
 	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
-	if (family >= NPROTO) {
+	if (family >= NPROTO || vz_security_family_check(family)) {
 		*errp = -EAFNOSUPPORT;
 		return -1;
 	}
@@ -679,7 +681,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
 	sz_idx = type>>2;
 	kind = type&3;
 
-	if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) {
+	if (kind != 2 && security_netlink_recv(skb, CAP_VE_NET_ADMIN)) {
 		*errp = -EPERM;
 		return -1;
 	}
@@ -723,9 +725,12 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
 
 	if (link->doit == NULL)
 		link = &(rtnetlink_links[PF_UNSPEC][type]);
-	if (link->doit == NULL)
-		goto err_inval;
-	err = link->doit(skb, nlh, (void *)&rta_buf[0]);
+	/* SUSE 11 compat kludge here */
+	err = -EINVAL;
+	if (nlh->nlmsg_type == RTM_NEWLINK)
+		err = -EOPNOTSUPP;
+	if (link->doit != NULL)
+		err = link->doit(skb, nlh, (void *)&rta_buf[0]);
 
 	*errp = err;
 	return err;
@@ -825,5 +830,6 @@ EXPORT_SYMBOL(rtnetlink_links);
 EXPORT_SYMBOL(rtnetlink_put_metrics);
 EXPORT_SYMBOL(rtnl);
 EXPORT_SYMBOL(rtnl_lock);
+EXPORT_SYMBOL(__rtnl_unlock);
 EXPORT_SYMBOL(rtnl_trylock);
 EXPORT_SYMBOL(rtnl_unlock);
diff -upr kernel-2.6.18-417.el5.orig/net/core/scm.c kernel-2.6.18-417.el5-028stab121/net/core/scm.c
--- kernel-2.6.18-417.el5.orig/net/core/scm.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/scm.c	2017-01-13 08:40:21.000000000 -0500
@@ -34,6 +34,7 @@
 #include <net/compat.h>
 #include <net/scm.h>
 
+#include <ub/ub_mem.h>
 
 /*
  *	Only allow a user to send credentials, that they could set with 
@@ -42,7 +43,9 @@
 
 static __inline__ int scm_check_creds(struct ucred *creds)
 {
-	if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
+	if ((creds->pid == virt_tgid(current) ||
+	     creds->pid == current->tgid ||
+	     capable(CAP_VE_SYS_ADMIN)) &&
 	    ((creds->uid == current->uid || creds->uid == current->euid ||
 	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
 	    ((creds->gid == current->gid || creds->gid == current->egid ||
@@ -69,7 +72,7 @@ static int scm_fp_copy(struct cmsghdr *c
 
 	if (!fpl)
 	{
-		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+		fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
 		if (!fpl)
 			return -ENOMEM;
 		*fplp = fpl;
@@ -294,7 +297,7 @@ struct scm_fp_list *scm_fp_dup(struct sc
 	if (!fpl)
 		return NULL;
 
-	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	new_fpl = ub_kmalloc(sizeof(*fpl), GFP_KERNEL);
 	if (new_fpl) {
 		INIT_LIST_HEAD(&new_fpl->list);
 		for (i=fpl->count-1; i>=0; i--)
diff -upr kernel-2.6.18-417.el5.orig/net/core/skbuff.c kernel-2.6.18-417.el5-028stab121/net/core/skbuff.c
--- kernel-2.6.18-417.el5.orig/net/core/skbuff.c	2017-01-13 07:39:14.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/skbuff.c	2017-01-13 08:40:41.000000000 -0500
@@ -47,6 +47,7 @@
 #include <linux/in.h>
 #include <linux/inet.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/netdevice.h>
 #ifdef CONFIG_NET_CLS_ACT
 #include <net/pkt_sched.h>
@@ -68,6 +69,8 @@
 #include <asm/system.h>
 #include <trace/skb.h>
 
+#include <ub/ub_net.h>
+
 static kmem_cache_t *skbuff_head_cache __read_mostly;
 static kmem_cache_t *skbuff_fclone_cache __read_mostly;
 
@@ -155,6 +158,9 @@ struct sk_buff *__alloc_skb(unsigned int
 	if (!skb)
 		goto out;
 
+	if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
+		goto nobc;
+
 	/* Get the DATA. Size must match skb_add_mtu(). */
 	size = SKB_DATA_ALIGN(size);
 	data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
@@ -169,6 +175,7 @@ struct sk_buff *__alloc_skb(unsigned int
 	skb->data = data;
 	skb->tail = data;
 	skb->end  = data + size;
+	skb->owner_env = get_exec_env();
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
 	atomic_set(&shinfo->dataref, 1);
@@ -191,6 +198,8 @@ struct sk_buff *__alloc_skb(unsigned int
 out:
 	return skb;
 nodata:
+	ub_skb_free_bc(skb);
+nobc:
 	kmem_cache_free(cache, skb);
 	skb = NULL;
 	goto out;
@@ -223,6 +232,9 @@ struct sk_buff *alloc_skb_from_cache(kme
 	if (!skb)
 		goto out;
 
+	if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
+		goto nobc;
+
 	/* Get the DATA. */
 	size = SKB_DATA_ALIGN(size);
 	data = kmem_cache_alloc(cp, gfp_mask);
@@ -237,6 +249,7 @@ struct sk_buff *alloc_skb_from_cache(kme
 	skb->data = data;
 	skb->tail = data;
 	skb->end  = data + size;
+	skb->owner_env = get_exec_env();
 
 	atomic_set(&(skb_shinfo(skb)->dataref), 1);
 	skb_shinfo(skb)->nr_frags  = 0;
@@ -247,6 +260,8 @@ struct sk_buff *alloc_skb_from_cache(kme
 out:
 	return skb;
 nodata:
+	ub_skb_free_bc(skb);
+nobc:
 	kmem_cache_free(skbuff_head_cache, skb);
 	skb = NULL;
 	goto out;
@@ -278,6 +293,11 @@ struct sk_buff *build_skb(void *data)
 	if (!skb)
 		return NULL;
 
+	if (ub_skb_alloc_bc(skb, GFP_ATOMIC)) {
+		kmem_cache_free(skbuff_head_cache, skb);
+		return NULL;
+	}
+
 	size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 	memset(skb, 0, offsetof(struct sk_buff, truesize));
@@ -288,6 +308,7 @@ struct sk_buff *build_skb(void *data)
 	skb->data = data;
 	skb_reset_tail_pointer(skb);
 	skb->end = skb->tail + size;
+	skb->owner_env = get_exec_env();
 
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
@@ -381,6 +402,7 @@ void kfree_skbmem(struct sk_buff *skb)
 	struct sk_buff *other;
 	atomic_t *fclone_ref;
 
+	ub_skb_free_bc(skb);
 	switch (skb->fclone) {
 	case SKB_FCLONE_UNAVAILABLE:
 		kmem_cache_free(skbuff_head_cache, skb);
@@ -413,6 +435,7 @@ static void skb_release_head_state(struc
 #ifdef CONFIG_XFRM
 	secpath_put(skb->sp);
 #endif
+	ub_skb_uncharge(skb);
 	if (skb->destructor) {
 		WARN_ON(in_irq());
 		skb->destructor(skb);
@@ -510,7 +533,7 @@ static void __copy_skb_header(struct sk_
 	memcpy(new->cb, old->cb, sizeof(old->cb));
 	new->csum		= old->csum;
 	new->local_df		= old->local_df;
-#ifdef CONFIG_XEN
+#if defined(CONFIG_XEN) || defined(CONFIG_VE)
 	new->proto_data_valid	= old->proto_data_valid;
 	new->proto_csum_blank	= old->proto_csum_blank;
 #endif
@@ -531,8 +554,13 @@ static void __copy_skb_header(struct sk_
 	new->input_dev		= old->input_dev;
 #endif
 #endif
+	new->vlan_tci		= old->vlan_tci;
 
+#ifdef CONFIG_VE
+	new->redirected = old->redirected;
+#endif
 	skb_copy_secmark(new, old);
+	skb_copy_brmark(new, old);
 }
 
 /**
@@ -566,6 +594,11 @@ struct sk_buff *skb_clone(struct sk_buff
 		n->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
 
+	if (ub_skb_alloc_bc(n, gfp_mask)) {
+		kmem_cache_free(skbuff_head_cache, n);
+		return NULL;
+	}
+
 #define C(x) n->x = skb->x
 
 	n->next = n->prev = NULL;
@@ -577,6 +610,7 @@ struct sk_buff *skb_clone(struct sk_buff
 	C(csum);
 	n->cloned = 1;
 	n->nohdr = 0;
+	C(owner_env);
 	n->destructor = NULL;
 	C(truesize);
 	atomic_set(&n->users, 1);
@@ -644,6 +678,11 @@ struct sk_buff *skb_copy(const struct sk
 	n->csum	     = skb->csum;
 	n->ip_summed = skb->ip_summed;
 
+#if defined(CONFIG_XEN) || defined(CONFIG_VE)
+	n->proto_data_valid = skb->proto_data_valid;
+	n->proto_csum_blank = skb->proto_csum_blank;
+#endif
+
 	if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
 		BUG();
 
diff -upr kernel-2.6.18-417.el5.orig/net/core/sock.c kernel-2.6.18-417.el5-028stab121/net/core/sock.c
--- kernel-2.6.18-417.el5.orig/net/core/sock.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/sock.c	2017-01-13 08:40:30.000000000 -0500
@@ -107,6 +107,7 @@
 #include <linux/net.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/tcp.h>
@@ -123,6 +124,9 @@
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
 
+#include <ub/ub_net.h>
+#include <ub/beancounter.h>
+
 #include <linux/filter.h>
 
 #ifdef CONFIG_INET
@@ -216,7 +220,20 @@ static int sock_set_timeout(long *timeo_
 		return -EINVAL;
 	if (copy_from_user(&tv, optval, sizeof(tv)))
 		return -EFAULT;
+	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
+		return -EDOM;
+
+	if (tv.tv_sec < 0) {
+		static int warned;
 
+		*timeo_p = 0;
+		if (warned < 10 && net_ratelimit())
+			warned++;
+			ve_printk(VE_LOG, KERN_INFO "sock_set_timeout: "
+				"`%s' (pid %d) tries to set negative timeout\n",
+				 current->comm, current->pid);
+		return 0;
+	}
 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
 		return 0;
@@ -231,7 +248,7 @@ static void sock_warn_obsolete_bsdism(co
 	static char warncomm[TASK_COMM_LEN];
 	if (strcmp(warncomm, current->comm) && warned < 5) { 
 		strcpy(warncomm,  current->comm); 
-		printk(KERN_WARNING "process `%s' is using obsolete "
+		ve_printk(VE_LOG, KERN_WARNING "process `%s' is using obsolete "
 		       "%s SO_BSDCOMPAT\n", warncomm, name);
 		warned++;
 	}
@@ -259,6 +276,10 @@ static int __sock_queue_rcv_skb(struct s
 		goto out;
 	}
 
+	err = ub_sockrcvbuf_charge(sk, skb);
+	if (err < 0)
+		goto out;
+
 	err = sk_filter(sk, skb, needlock);
 	if (err)
 		goto out;
@@ -884,6 +905,7 @@ struct sock *sk_alloc(int family, gfp_t 
 			 */
 			sk->sk_prot = sk->sk_prot_creator = prot;
 			sock_lock_init(sk);
+			sk->owner_env = get_exec_env();
 		}
 		
 		if (security_sk_alloc(sk, family, priority))
@@ -923,6 +945,7 @@ void sk_free(struct sock *sk)
 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
 
 	security_sk_free(sk);
+	ub_sock_uncharge(sk);
 	if (sk->sk_prot_creator->slab != NULL)
 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
 	else
@@ -977,14 +1000,11 @@ struct sock *sk_clone(struct sock *sk, c
 		if (sk->sk_create_child)
 			sk->sk_create_child(sk, newsk);
 
-		if (unlikely(xfrm_sk_clone_policy(newsk))) {
-			/* It is still raw copy of parent, so invalidate
-			 * destructor and make plain sk_free() */
-			newsk->sk_destruct = NULL;
-			sk_free(newsk);
-			newsk = NULL;
-			goto out;
-		}
+		if (ub_sock_charge(newsk, newsk->sk_family, newsk->sk_type) < 0)
+			goto out_err;
+
+		if (unlikely(xfrm_sk_clone_policy(newsk)))
+			 goto out_err;
 
 		newsk->sk_err	   = 0;
 		newsk->sk_priority = 0;
@@ -1008,8 +1028,15 @@ struct sock *sk_clone(struct sock *sk, c
 		if (newsk->sk_prot->sockets_allocated)
 			atomic_inc(newsk->sk_prot->sockets_allocated);
 	}
-out:
 	return newsk;
+
+out_err:
+	/* It is still raw copy of parent, so invalidate
+	 * destructor and make plain sk_free() */
+	sock_reset_flag(newsk, SOCK_TIMESTAMP);
+	newsk->sk_destruct = NULL;
+	sk_free(newsk);
+	return NULL;
 }
 
 EXPORT_SYMBOL_GPL(sk_clone);
@@ -1170,9 +1197,8 @@ static long sock_wait_for_wmem(struct so
 /*
  *	Generic send/receive buffer handlers
  */
-
-struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
-				     unsigned long data_len, int noblock,
+struct sk_buff *sock_alloc_send_skb2(struct sock *sk, unsigned long size,
+				     unsigned long size2, int noblock,
 				     int *errcode)
 {
 	struct sk_buff *skb;
@@ -1186,7 +1212,6 @@ struct sk_buff *sock_alloc_send_pskb(str
 
 	timeo = sock_sndtimeo(sk, noblock);
 	while (1) {
-		int npages;
 		err = sock_error(sk);
 		if (err != 0)
 			goto failure;
@@ -1195,49 +1220,35 @@ struct sk_buff *sock_alloc_send_pskb(str
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
 			goto failure;
 
-		err = -EMSGSIZE;
-		npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-		if (npages > MAX_SKB_FRAGS)
-			goto failure;
+		if (ub_sock_getwres_other(sk, skb_charge_size(size))) {
+			if (size2 < size) {
+				size = size2;
+				continue;
+			}
+			set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+			err = -EAGAIN;
+			if (!timeo)
+				goto failure;
+			if (signal_pending(current))
+				goto interrupted;
+			timeo = ub_sock_wait_for_space(sk, timeo,
+					skb_charge_size(size));
+			continue;
+		}
 
 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
-			skb = alloc_skb(header_len, gfp_mask);
-			if (skb) {
-				int i;
-
-				/* No pages, we're done... */
-				if (!data_len)
-					break;
-
-				skb->truesize += data_len;
-				skb_shinfo(skb)->nr_frags = npages;
-				for (i = 0; i < npages; i++) {
-					struct page *page;
-					skb_frag_t *frag;
-
-					page = alloc_pages(sk->sk_allocation, 0);
-					if (!page) {
-						err = -ENOBUFS;
-						skb_shinfo(skb)->nr_frags = i;
-						kfree_skb(skb);
-						goto failure;
-					}
-
-					frag = &skb_shinfo(skb)->frags[i];
-					frag->page = page;
-					frag->page_offset = 0;
-					frag->size = (data_len >= PAGE_SIZE ?
-						      PAGE_SIZE :
-						      data_len);
-					data_len -= PAGE_SIZE;
-				}
-
+			skb = alloc_skb(size, gfp_mask);
+			if (skb)
 				/* Full success... */
 				break;
-			}
+			ub_sock_retwres_other(sk, skb_charge_size(size),
+					SOCK_MIN_UBCSPACE_CH);
 			err = -ENOBUFS;
 			goto failure;
 		}
+		ub_sock_retwres_other(sk,
+				skb_charge_size(size),
+				SOCK_MIN_UBCSPACE_CH);
 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		err = -EAGAIN;
@@ -1248,6 +1259,7 @@ struct sk_buff *sock_alloc_send_pskb(str
 		timeo = sock_wait_for_wmem(sk, timeo);
 	}
 
+	ub_skb_set_charge(skb, sk, skb_charge_size(size), UB_OTHERSOCKBUF);
 	skb_set_owner_w(skb, sk);
 	return skb;
 
@@ -1257,12 +1269,12 @@ failure:
 	*errcode = err;
 	return NULL;
 }
-EXPORT_SYMBOL(sock_alloc_send_pskb);
+EXPORT_SYMBOL(sock_alloc_send_skb2);
 
 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
 				    int noblock, int *errcode)
 {
-	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
+	return sock_alloc_send_skb2(sk, size, size, noblock, errcode);
 }
 
 static void __lock_sock(struct sock *sk)
@@ -1850,7 +1862,8 @@ int proto_register(struct proto *prot, i
 	if (alloc_slab) {
 		prot->slab = kmem_cache_create(prot->name,
 					       sk_alloc_size(prot->obj_size), 0,
-					       SLAB_HWCACHE_ALIGN, NULL, NULL);
+					       SLAB_HWCACHE_ALIGN | SLAB_UBC,
+					       NULL, NULL);
 
 		if (prot->slab == NULL) {
 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
@@ -1866,9 +1879,11 @@ int proto_register(struct proto *prot, i
 				goto out_free_sock_slab;
 
 			sprintf(request_sock_slab_name, mask, prot->name);
-			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
-								 sk_alloc_size(prot->rsk_prot->obj_size), 0,
-								 SLAB_HWCACHE_ALIGN, NULL, NULL);
+			prot->rsk_prot->slab =
+				kmem_cache_create(request_sock_slab_name,
+						 sk_alloc_size(prot->rsk_prot->obj_size), 0,
+						 SLAB_HWCACHE_ALIGN | SLAB_UBC,
+						 NULL, NULL);
 
 			if (prot->rsk_prot->slab == NULL) {
 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
@@ -1889,7 +1904,7 @@ int proto_register(struct proto *prot, i
 			prot->twsk_prot->twsk_slab =
 				kmem_cache_create(timewait_sock_slab_name,
 						  sk_alloc_size(prot->twsk_prot->twsk_obj_size),
-						  0, SLAB_HWCACHE_ALIGN,
+						  0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						  NULL, NULL);
 			if (prot->twsk_prot->twsk_slab == NULL)
 				goto out_free_timewait_sock_slab_name;
diff -upr kernel-2.6.18-417.el5.orig/net/core/stream.c kernel-2.6.18-417.el5-028stab121/net/core/stream.c
--- kernel-2.6.18-417.el5.orig/net/core/stream.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/core/stream.c	2017-01-13 08:40:18.000000000 -0500
@@ -111,8 +111,10 @@ EXPORT_SYMBOL(sk_stream_wait_close);
  * sk_stream_wait_memory - Wait for more memory for a socket
  * @sk: socket to wait for memory
  * @timeo_p: for how long
+ * @amount - amount of memory to wait for (in UB space!)
  */
-int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+int __sk_stream_wait_memory(struct sock *sk, long *timeo_p,
+		unsigned long amount)
 {
 	int err = 0;
 	long vm_wait = 0;
@@ -134,7 +136,10 @@ int sk_stream_wait_memory(struct sock *s
 		if (signal_pending(current))
 			goto do_interrupted;
 		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-		if (sk_stream_memory_free(sk) && !vm_wait)
+		if (amount == 0) {
+			if (sk_stream_memory_free(sk) && !vm_wait)
+				break;
+		} else if (!ub_sock_sndqueueadd_tcp(sk, amount))
 			break;
 
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
@@ -144,6 +149,8 @@ int sk_stream_wait_memory(struct sock *s
 						  sk_stream_memory_free(sk) &&
 						  vm_wait);
 		sk->sk_write_pending--;
+		if (amount > 0)
+			ub_sock_sndqueuedel(sk);
 
 		if (vm_wait) {
 			vm_wait -= current_timeo;
@@ -170,6 +177,10 @@ do_interrupted:
 	goto out;
 }
 
+int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+{
+	return __sk_stream_wait_memory(sk, timeo_p, 0);
+}
 EXPORT_SYMBOL(sk_stream_wait_memory);
 
 void sk_stream_rfree(struct sk_buff *skb)
diff -upr kernel-2.6.18-417.el5.orig/net/dccp/ipv6.c kernel-2.6.18-417.el5-028stab121/net/dccp/ipv6.c
--- kernel-2.6.18-417.el5.orig/net/dccp/ipv6.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/dccp/ipv6.c	2017-01-13 08:40:16.000000000 -0500
@@ -880,6 +880,8 @@ static struct sock *dccp_v6_request_recv
 	__ip6_dst_store(newsk, dst, NULL, NULL);
 	newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
 						      NETIF_F_TSO);
+	if (!sysctl_tcp_use_sg)
+		newsk->sk_route_caps &= ~NETIF_F_SG;
 	newdp6 = (struct dccp6_sock *)newsk;
 	newinet = inet_sk(newsk);
 	newinet->pinet6 = &newdp6->inet6;
diff -upr kernel-2.6.18-417.el5.orig/net/dccp/minisocks.c kernel-2.6.18-417.el5-028stab121/net/dccp/minisocks.c
--- kernel-2.6.18-417.el5.orig/net/dccp/minisocks.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/dccp/minisocks.c	2017-01-13 08:40:17.000000000 -0500
@@ -18,6 +18,8 @@
 #include <net/xfrm.h>
 #include <net/inet_timewait_sock.h>
 
+#include <ub/ub_orphan.h>
+
 #include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
@@ -46,7 +48,8 @@ void dccp_time_wait(struct sock *sk, int
 {
 	struct inet_timewait_sock *tw = NULL;
 
-	if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
+	if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets &&
+			ub_timewait_check(sk, &dccp_death_row))
 		tw = inet_twsk_alloc(sk, state);
 
 	if (tw != NULL) {
diff -upr kernel-2.6.18-417.el5.orig/net/decnet/dn_rules.c kernel-2.6.18-417.el5-028stab121/net/decnet/dn_rules.c
--- kernel-2.6.18-417.el5.orig/net/decnet/dn_rules.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/decnet/dn_rules.c	2017-01-13 08:40:15.000000000 -0500
@@ -60,8 +60,6 @@ static struct dn_fib_rule default_rule =
 	},
 };
 
-static LIST_HEAD(dn_fib_rules);
-
 
 int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res)
 {
@@ -270,9 +268,9 @@ static u32 dn_fib_rule_default_pref(void
 	struct list_head *pos;
 	struct fib_rule *rule;
 
-	if (!list_empty(&dn_fib_rules)) {
-		pos = dn_fib_rules.next;
-		if (pos->next != &dn_fib_rules) {
+	if (!list_empty(&dn_fib_rules_ops.rules_list)) {
+		pos = dn_fib_rules_ops.rules_list.next;
+		if (pos->next != &dn_fib_rules_ops.rules_list) {
 			rule = list_entry(pos->next, struct fib_rule, list);
 			if (rule->pref)
 				return rule->pref - 1;
@@ -298,13 +296,13 @@ static struct fib_rules_ops dn_fib_rules
 	.default_pref	= dn_fib_rule_default_pref,
 	.nlgroup	= RTNLGRP_DECnet_RULE,
 	.policy		= dn_fib_rule_policy,
-	.rules_list	= &dn_fib_rules,
+	.rules_list	= LIST_HEAD_INIT(dn_fib_rules_ops.rules_list),
 	.owner		= THIS_MODULE,
 };
 
 void __init dn_fib_rules_init(void)
 {
-	list_add_tail(&default_rule.common.list, &dn_fib_rules);
+	list_add_tail(&default_rule.common.list, &dn_fib_rules_ops.rules_list);
 	fib_rules_register(&dn_fib_rules_ops);
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/decnet/netfilter/dn_rtmsg.c kernel-2.6.18-417.el5-028stab121/net/decnet/netfilter/dn_rtmsg.c
--- kernel-2.6.18-417.el5.orig/net/decnet/netfilter/dn_rtmsg.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/decnet/netfilter/dn_rtmsg.c	2017-01-13 08:40:19.000000000 -0500
@@ -107,7 +107,7 @@ static inline void dnrmg_receive_user_sk
 	if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
 		return;
 
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (security_netlink_recv(skb, CAP_VE_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
 	/* Eventually we might send routing messages too */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/af_inet.c kernel-2.6.18-417.el5-028stab121/net/ipv4/af_inet.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/af_inet.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/af_inet.c	2017-01-13 08:40:22.000000000 -0500
@@ -114,6 +114,7 @@
 #ifdef CONFIG_IP_MROUTE
 #include <linux/mroute.h>
 #endif
+#include <ub/ub_net.h>
 
 DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
 
@@ -283,6 +284,10 @@ lookup_protocol:
 			goto out_rcu_unlock;
 	}
 
+	err = vz_security_protocol_check(answer->protocol);
+	if (err < 0)
+		goto out_rcu_unlock;
+
 	err = -EPERM;
 	if (answer->capability > 0 && !capable(answer->capability))
 		goto out_rcu_unlock;
@@ -300,6 +305,13 @@ lookup_protocol:
 	if (sk == NULL)
 		goto out;
 
+	err = -ENOBUFS;
+	if (ub_sock_charge(sk, PF_INET, sock->type))
+		goto out_sk_free;
+	/* if charge was successful, sock_init_data() MUST be called to
+	 * set sk->sk_type. otherwise sk will be uncharged to wrong resource
+	 */
+
 	err = 0;
 	sk->sk_no_check = answer_no_check;
 	if (INET_PROTOSW_REUSE & answer_flags)
@@ -359,6 +371,9 @@ out:
 out_rcu_unlock:
 	rcu_read_unlock();
 	goto out;
+out_sk_free:
+	sk_free(sk);
+	return err;
 }
 
 
@@ -373,6 +388,9 @@ int inet_release(struct socket *sock)
 
 	if (sk) {
 		long timeout;
+		struct ve_struct *saved_env;
+
+		saved_env = set_exec_env(sk->owner_env);
 
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
@@ -390,6 +408,8 @@ int inet_release(struct socket *sock)
 			timeout = sk->sk_lingertime;
 		sock->sk = NULL;
 		sk->sk_prot->close(sk, timeout);
+
+		(void)set_exec_env(saved_env);
 	}
 	return 0;
 }
@@ -1333,23 +1353,23 @@ static struct net_protocol icmp_protocol
 
 static int __init init_ipv4_mibs(void)
 {
-	net_statistics[0] = alloc_percpu(struct linux_mib);
-	net_statistics[1] = alloc_percpu(struct linux_mib);
-	ip_statistics[0] = alloc_percpu(struct ipstats_mib);
-	ip_statistics[1] = alloc_percpu(struct ipstats_mib);
-	icmp_statistics[0] = alloc_percpu(struct icmp_mib);
-	icmp_statistics[1] = alloc_percpu(struct icmp_mib);
-	icmpmsg_statistics[0] = alloc_percpu(struct icmpmsg_mib);
-	icmpmsg_statistics[1] = alloc_percpu(struct icmpmsg_mib);
-	tcp_statistics[0] = alloc_percpu(struct tcp_mib);
-	tcp_statistics[1] = alloc_percpu(struct tcp_mib);
-	udp_statistics[0] = alloc_percpu(struct udp_mib);
-	udp_statistics[1] = alloc_percpu(struct udp_mib);
+	ve_net_statistics[0] = alloc_percpu(struct linux_mib);
+	ve_net_statistics[1] = alloc_percpu(struct linux_mib);
+	ve_ip_statistics[0] = alloc_percpu(struct ipstats_mib);
+	ve_ip_statistics[1] = alloc_percpu(struct ipstats_mib);
+	ve_icmp_statistics[0] = alloc_percpu(struct icmp_mib);
+	ve_icmp_statistics[1] = alloc_percpu(struct icmp_mib);
+	ve_icmpmsg_statistics[0] = alloc_percpu(struct icmpmsg_mib);
+	ve_icmpmsg_statistics[1] = alloc_percpu(struct icmpmsg_mib);
+	ve_tcp_statistics[0] = alloc_percpu(struct tcp_mib);
+	ve_tcp_statistics[1] = alloc_percpu(struct tcp_mib);
+	ve_udp_statistics[0] = alloc_percpu(struct udp_mib);
+	ve_udp_statistics[1] = alloc_percpu(struct udp_mib);
 	if (!
-	    (net_statistics[0] && net_statistics[1] && ip_statistics[0]
-	     && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
-	     && udp_statistics[0] && udp_statistics[1] && icmpmsg_statistics[0]
-	     && icmpmsg_statistics[1]))
+	    (ve_net_statistics[0] && ve_net_statistics[1] && ve_ip_statistics[0]
+	     && ve_ip_statistics[1] && ve_tcp_statistics[0] && ve_tcp_statistics[1]
+	     && ve_udp_statistics[0] && ve_udp_statistics[1] && ve_icmpmsg_statistics[0]
+	     && ve_icmpmsg_statistics[1]))
 		return -ENOMEM;
 
 	(void) tcp_mib_init();
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/arp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/arp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/arp.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/arp.c	2017-01-13 08:40:22.000000000 -0500
@@ -174,7 +174,7 @@ struct neigh_ops arp_broken_ops = {
 	.queue_xmit =		dev_queue_xmit,
 };
 
-struct neigh_table arp_tbl = {
+struct neigh_table global_arp_tbl = {
 	.family =	AF_INET,
 	.entry_size =	sizeof(struct neighbour) + 4,
 	.key_len =	4,
@@ -183,7 +183,7 @@ struct neigh_table arp_tbl = {
 	.proxy_redo =	parp_redo,
 	.id =		"arp_cache",
 	.parms = {
-		.tbl =			&arp_tbl,
+		.tbl =			&global_arp_tbl,
 		.base_reachable_time =	30 * HZ,
 		.retrans_time =	1 * HZ,
 		.gc_staletime =	60 * HZ,
@@ -987,7 +987,7 @@ static int arp_req_set(struct arpreq *r,
 			return 0;
 		}
 		if (dev == NULL) {
-			ipv4_devconf.proxy_arp = 1;
+			ve_ipv4_devconf.proxy_arp = 1;
 			return 0;
 		}
 		if (__in_dev_get_rtnl(dev)) {
@@ -1093,7 +1093,7 @@ static int arp_req_delete(struct arpreq 
 			return pneigh_delete(&arp_tbl, &ip, dev);
 		if (mask == 0) {
 			if (dev == NULL) {
-				ipv4_devconf.proxy_arp = 0;
+				ve_ipv4_devconf.proxy_arp = 0;
 				return 0;
 			}
 			if (__in_dev_get_rtnl(dev)) {
@@ -1141,7 +1141,8 @@ int arp_ioctl(unsigned int cmd, void __u
 	switch (cmd) {
 		case SIOCDARP:
 		case SIOCSARP:
-			if (!capable(CAP_NET_ADMIN))
+			if (!capable(CAP_NET_ADMIN) &&
+					!capable(CAP_VE_NET_ADMIN))
 				return -EPERM;
 		case SIOCGARP:
 			err = copy_from_user(&r, arg, sizeof(struct arpreq));
@@ -1239,7 +1240,9 @@ static int arp_proc_init(void);
 
 void __init arp_init(void)
 {
-	neigh_table_init(&arp_tbl);
+	get_ve0()->ve_arp_tbl = &global_arp_tbl;
+	if (neigh_table_init(&arp_tbl))
+		panic("cannot initialize ARP tables\n");
 
 	dev_add_pack(&arp_packet_type);
 	arp_proc_init();
@@ -1371,8 +1374,9 @@ static int arp_seq_open(struct inode *in
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
-       
+	struct neigh_seq_state *s;
+
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
 		goto out;
 
@@ -1399,7 +1403,7 @@ static struct file_operations arp_seq_fo
 
 static int __init arp_proc_init(void)
 {
-	if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops))
+	if (!proc_glob_fops_create("net/arp", S_IRUGO, &arp_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
@@ -1418,8 +1422,56 @@ EXPORT_SYMBOL(arp_find);
 EXPORT_SYMBOL(arp_create);
 EXPORT_SYMBOL(arp_xmit);
 EXPORT_SYMBOL(arp_send);
-EXPORT_SYMBOL(arp_tbl);
+EXPORT_SYMBOL(global_arp_tbl);
 
 #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
 EXPORT_SYMBOL(clip_tbl_hook);
 #endif
+
+#ifdef CONFIG_VE
+int ve_arp_init(struct ve_struct *ve)
+{
+	struct ve_struct *old_env;
+	int err;
+
+	ve->ve_arp_tbl = kmalloc(sizeof(struct neigh_table), GFP_KERNEL);
+	if (ve->ve_arp_tbl == NULL)
+		return -ENOMEM;
+
+	*(ve->ve_arp_tbl) = global_arp_tbl;
+	ve->ve_arp_tbl->parms.tbl = ve->ve_arp_tbl;
+	old_env = set_exec_env(ve);
+	err = neigh_table_init(ve->ve_arp_tbl);
+	if (err)
+		goto out_free;
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
+			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+#endif
+	err = 0;
+
+out:
+	set_exec_env(old_env);
+	return err;
+
+out_free:
+	kfree(ve->ve_arp_tbl);
+	ve->ve_arp_tbl = NULL;
+	goto out;
+}
+EXPORT_SYMBOL(ve_arp_init);
+
+void ve_arp_fini(struct ve_struct *ve)
+{
+	if (ve->ve_arp_tbl) {
+#ifdef CONFIG_SYSCTL
+		neigh_sysctl_unregister(&ve->ve_arp_tbl->parms);
+#endif
+		ve->ve_arp_tbl->kmem_cachep = NULL;
+		neigh_table_clear(ve->ve_arp_tbl);
+		kfree(ve->ve_arp_tbl);
+		ve->ve_arp_tbl = NULL;
+	}
+}
+EXPORT_SYMBOL(ve_arp_fini);
+#endif /* CONFIG_VE */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/devinet.c kernel-2.6.18-417.el5-028stab121/net/ipv4/devinet.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/devinet.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/devinet.c	2017-01-13 08:40:41.000000000 -0500
@@ -71,7 +71,7 @@ struct ipv4_devconf ipv4_devconf = {
 	.shared_media =	  1,
 };
 
-static struct ipv4_devconf ipv4_devconf_dflt = {
+struct ipv4_devconf ipv4_devconf_dflt = {
 	.accept_redirects =  1,
 	.send_redirects =    1,
 	.secure_redirects =  1,
@@ -87,10 +87,16 @@ static struct ipv4_devconf_extensions ip
 	.accept_local = 0,
 };
 
+#ifdef CONFIG_VE
+#define ve_ipv4_devconf_dflt	(*(get_exec_env()->_ipv4_devconf_dflt))
+#else
+#define ve_ipv4_devconf_dflt	ipv4_devconf_dflt
+#endif
+
 static void rtmsg_ifa(int event, struct in_ifaddr *);
 
 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy);
 #ifdef CONFIG_SYSCTL
 static void devinet_sysctl_register(struct in_device *in_dev,
@@ -100,22 +106,44 @@ static void devinet_sysctl_unregister(st
 
 /* Locks all the inet devices. */
 
-static struct in_ifaddr *inet_alloc_ifa(void)
+extern unsigned int sysctl_ve_ifa_nr;
+
+struct in_ifaddr *inet_alloc_ifa(void)
 {
-	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
+	struct in_ifaddr *ifa = NULL;
+	struct ve_struct *ve = get_exec_env();
+
+	if (atomic_add_return(1, &ve->ifa_nr) > sysctl_ve_ifa_nr &&
+	    !ve_is_super(ve))
+		goto out;
+
+	ifa = kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_UBC);
 
 	if (ifa) {
 		INIT_RCU_HEAD(&ifa->rcu_head);
-	}
+	} else
+out:
+		atomic_dec(&ve->ifa_nr);
 
 	return ifa;
 }
+EXPORT_SYMBOL_GPL(inet_alloc_ifa);
 
 static void inet_rcu_free_ifa(struct rcu_head *head)
 {
 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
-	if (ifa->ifa_dev)
-		in_dev_put(ifa->ifa_dev);
+	struct in_device *ifa_dev = ifa->ifa_dev;
+	struct ve_struct *ve = NULL;
+
+	if (ifa_dev) {
+		if (ifa_dev->dev)
+			ve = get_ve(ifa_dev->dev->owner_env);
+		if (ve) {
+			atomic_dec(&ve->ifa_nr);
+			put_ve(ve);
+		}
+		in_dev_put(ifa_dev);
+	}
 	kfree(ifa);
 }
 
@@ -153,7 +181,7 @@ struct in_device *inetdev_init(struct ne
 	if (!in_dev)
 		goto out;
 	INIT_RCU_HEAD(&in_dev->rcu_head);
-	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
+	memcpy(&in_dev->cnf, &ve_ipv4_devconf_dflt, sizeof(in_dev->cnf));
 	if (ext)
 		memcpy(&ext->ipv4_devconf_ext, &ipv4_devconf_dflt_ext,
 		       sizeof(ipv4_devconf_dflt_ext));
@@ -189,6 +217,7 @@ out_kfree:
 	in_dev = NULL;
 	goto out;
 }
+EXPORT_SYMBOL_GPL(inetdev_init);
 
 static void in_dev_rcu_put(struct rcu_head *head)
 {
@@ -196,7 +225,7 @@ static void in_dev_rcu_put(struct rcu_he
 	in_dev_put(idev);
 }
 
-static void inetdev_destroy(struct in_device *in_dev)
+static void inetdev_destroy(struct in_device *in_dev, int force)
 {
 	struct in_ifaddr *ifa;
 	struct net_device *dev;
@@ -204,7 +233,7 @@ static void inetdev_destroy(struct in_de
 	ASSERT_RTNL();
 
 	dev = in_dev->dev;
-	if (dev == &loopback_dev)
+	if (!force && dev == &loopback_dev)
 		return;
 
 	in_dev->dead = 1;
@@ -246,7 +275,7 @@ int inet_addr_onlink(struct in_device *i
 	return 0;
 }
 
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy)
 {
 	struct in_ifaddr *promote = NULL;
@@ -332,11 +361,11 @@ static void inet_del_ifa(struct in_devic
 		inet_free_ifa(ifa1);
 
 		if (!in_dev->ifa_list)
-			inetdev_destroy(in_dev);
+			inetdev_destroy(in_dev, 0);
 	}
 }
 
-static int inet_insert_ifa(struct in_ifaddr *ifa)
+int inet_insert_ifa(struct in_ifaddr *ifa)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -386,6 +415,7 @@ static int inet_insert_ifa(struct in_ifa
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(inet_insert_ifa);
 
 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 {
@@ -594,7 +624,7 @@ int devinet_ioctl(unsigned int cmd, void
 
 	case SIOCSIFFLAGS:
 		ret = -EACCES;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			goto out;
 		break;
 	case SIOCSIFADDR:	/* Set interface address (and family) */
@@ -602,7 +632,7 @@ int devinet_ioctl(unsigned int cmd, void
 	case SIOCSIFDSTADDR:	/* Set the destination address */
 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
 		ret = -EACCES;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			goto out;
 		ret = -EINVAL;
 		if (sin->sin_family != AF_INET)
@@ -1061,11 +1091,12 @@ static int inetdev_event(struct notifier
 		inetdev_send_gratuitous_arp(dev, in_dev);
 		break;
 	case NETDEV_CHANGEMTU:
-		if (dev->mtu >= 68)
-			break;
-		/* MTU falled under 68, disable IP */
+		if (dev->mtu < 68)
+			/* MTU falled under 68, disable IP */
+			inetdev_destroy(in_dev, 0);
+		break;
 	case NETDEV_UNREGISTER:
-		inetdev_destroy(in_dev);
+		inetdev_destroy(in_dev, 1);
 		break;
 	case NETDEV_CHANGENAME:
 		/* Do not notify about label change, this event is
@@ -1199,10 +1230,10 @@ static struct rtnetlink_link inet_rtnetl
 void inet_forward_change(void)
 {
 	struct net_device *dev;
-	int on = ipv4_devconf.forwarding;
+	int on = ve_ipv4_devconf.forwarding;
 
-	ipv4_devconf.accept_redirects = !on;
-	ipv4_devconf_dflt.forwarding = on;
+	ve_ipv4_devconf.accept_redirects = !on;
+	ve_ipv4_devconf_dflt.forwarding = on;
 
 	read_lock(&dev_base_lock);
 	for (dev = dev_base; dev; dev = dev->next) {
@@ -1227,9 +1258,9 @@ static int devinet_sysctl_forward(ctl_ta
 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
 	if (write && *valp != val) {
-		if (valp != &ipv4_devconf_dflt.forwarding) {
+		if (valp != &ve_ipv4_devconf_dflt.forwarding) {
 			rtnl_lock();
-			if (valp == &ipv4_devconf.forwarding) {
+			if (valp == &ve_ipv4_devconf.forwarding) {
 				inet_forward_change();
 			} else if (*valp) {
 				struct ipv4_devconf *cnf = ctl->extra1;
@@ -1542,7 +1573,7 @@ static int check_ext_conf(ctl_table *t, 
 	if (ext)
 		t->data += (char*) &ext->ipv4_devconf_ext -
 			   (char*) &ipv4_devconf_ext;
-	else if (dflt)
+	else if (dflt && ve_is_super(get_exec_env()))
 		t->data = &ipv4_devconf_dflt_ext.accept_local;
 	else {
 		/*
@@ -1558,16 +1589,16 @@ static int check_ext_conf(ctl_table *t, 
 	return 1;
 }
 
-static void devinet_sysctl_register(struct in_device *in_dev,
-				    struct ipv4_devconf *p)
+static struct devinet_sysctl_table *__devinet_sysctl_register(struct in_device *in_dev,
+		char *dev_name, int ifindex, struct ipv4_devconf *p)
 {
 	int i;
-	struct net_device *dev = in_dev ? in_dev->dev : NULL;
-	struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
-	char *dev_name = NULL;
+	struct devinet_sysctl_table *t;
 
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (!t)
-		return;
+		goto out;
+
 	memcpy(t, &devinet_sysctl, sizeof(*t));
 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
 
@@ -1578,14 +1609,8 @@ static void devinet_sysctl_register(stru
 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
 		t->devinet_vars[i].extra1 = p;
 		t->devinet_vars[i].de = NULL;
-	}
-
-	if (dev) {
-		dev_name = dev->name; 
-		t->devinet_dev[0].ctl_name = dev->ifindex;
-	} else {
-		dev_name = "default";
-		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+		t->devinet_vars[i].extra1 = p;
+		t->devinet_vars[i].owner_env = get_exec_env();
 	}
 
 	/* 
@@ -1595,8 +1620,9 @@ static void devinet_sysctl_register(stru
 	 */	
 	dev_name = kstrdup(dev_name, GFP_KERNEL);
 	if (!dev_name)
-	    goto free;
+	    goto out_free_table;
 
+	t->devinet_dev[0].ctl_name    = ifindex;
 	t->devinet_dev[0].procname    = dev_name;
 	t->devinet_dev[0].child	      = t->devinet_vars;
 	t->devinet_dev[0].de	      = NULL;
@@ -1609,17 +1635,38 @@ static void devinet_sysctl_register(stru
 
 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
 	if (!t->sysctl_header)
-	    goto free_procname;
+	    goto out_free_procname;
 
-	p->sysctl = t;
-	return;
+	return t;
 
 	/* error path */
- free_procname:
+out_free_procname:
 	kfree(dev_name);
- free:
+out_free_table:
 	kfree(t);
-	return;
+out:
+	printk(KERN_DEBUG "Can't register net/ipv4/conf sysctls.\n");
+	return NULL;
+}
+
+static void devinet_sysctl_register(struct in_device *in_dev,
+				    struct ipv4_devconf *p)
+{
+	struct net_device *dev;
+	char *dev_name;
+	int ifindex;
+
+	dev = in_dev ? in_dev->dev : NULL;
+
+	if (dev) {
+		dev_name = dev->name; 
+		ifindex = dev->ifindex;
+	} else {
+		dev_name = "default";
+		ifindex = NET_PROTO_CONF_DEFAULT;
+	}
+
+	p->sysctl = __devinet_sysctl_register(in_dev, dev_name, ifindex, p);
 }
 
 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
@@ -1632,8 +1679,176 @@ static void devinet_sysctl_unregister(st
 		kfree(t);
 	}
 }
+
+#ifdef CONFIG_VE
+static ctl_table net_sysctl_tables[] = {
+	/* 0: net */
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[2],
+	},
+	{ .ctl_name = 0, },
+	/* 2: net/ipv4 */
+	{
+		.ctl_name	= NET_IPV4,
+		.procname	= "ipv4",
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[4],
+	},
+	{ .ctl_name = 0, },
+	/* 4, 5: net/ipv4/[vars] */
+	{
+		.ctl_name	= NET_IPV4_FORWARD,
+		.procname	= "ip_forward",
+		.data		= &ipv4_devconf.forwarding,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &ipv4_sysctl_forward,
+		.strategy	= &ipv4_sysctl_forward_strategy,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE,
+		.procname	= "route",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[7],
+	},
+	{ .ctl_name = 0 },
+	/* 7: net/ipv4/route/flush */
+	{
+		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
+		.procname	= "flush",
+		.data		= NULL, /* setuped below */
+		.maxlen		= sizeof(int),
+		.mode		= 0200,
+		.proc_handler	= &ipv4_sysctl_rtcache_flush,
+		.strategy	= &ipv4_sysctl_rtcache_flush_strategy,
+	},
+	{ .ctl_name = 0 },
+};
+
+static int ip_forward_sysctl_register(struct ve_struct *ve,
+		struct ipv4_devconf *p)
+{
+	struct ctl_table_header *hdr;
+	ctl_table *root, *ipv4_table, *route_table;
+
+	root = clone_sysctl_template(net_sysctl_tables);
+	if (root == NULL)
+		goto out;
+
+	ipv4_table = root->child->child;
+	ipv4_table[0].data = &p->forwarding;
+
+	route_table = ipv4_table[1].child;
+	route_table[0].data = &ipv4_flush_delay;
+
+	hdr = register_sysctl_table(root, 1);
+	if (hdr == NULL)
+		goto out_free;
+
+	ve->forward_header = hdr;
+	ve->forward_table = root;
+	return 0;
+
+out_free:
+	free_sysctl_clone(root);
+out:
+	return -ENOMEM;
+}
+
+static inline void ip_forward_sysctl_unregister(struct ve_struct *ve)
+{
+	unregister_sysctl_table(ve->forward_header);
+	ve->forward_header = NULL;
+}
+
+static inline void ip_forward_sysctl_free(struct ve_struct *ve)
+{
+	if (ve->forward_table == NULL)
+		return;
+
+	free_sysctl_clone(ve->forward_table);
+	ve->forward_table = NULL;
+}
+#endif
 #endif
 
+int devinet_sysctl_init(struct ve_struct *ve)
+{
+	int err = 0;
+#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_VE
+	struct ipv4_devconf *conf, *conf_def;
+
+	err = -ENOMEM;
+
+	conf = kmalloc(sizeof(*conf), GFP_KERNEL);
+	if (!conf)
+		goto err1;
+
+	memcpy(conf, &ipv4_devconf, sizeof(*conf));
+	conf->sysctl = __devinet_sysctl_register(NULL, "all",
+			NET_PROTO_CONF_ALL, conf);
+	if (!conf->sysctl)
+		goto err2;
+
+	conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
+	if (!conf_def)
+		goto err3;
+
+	memcpy(conf_def, &ipv4_devconf_dflt, sizeof(*conf_def));
+	conf_def->sysctl = __devinet_sysctl_register(NULL, "default",
+			NET_PROTO_CONF_DEFAULT, conf_def);
+	if (!conf_def->sysctl)
+		goto err4;
+
+	err = ip_forward_sysctl_register(ve, conf);
+	if (err)
+		goto err5;
+
+	ve->_ipv4_devconf = conf;
+	ve->_ipv4_devconf_dflt = conf_def;
+	return 0;
+
+err5:
+	devinet_sysctl_unregister(conf_def);
+err4:
+	kfree(conf_def);
+err3:
+	devinet_sysctl_unregister(conf);
+err2:
+	kfree(conf);
+err1:
+#endif
+#endif
+	return err;
+}
+
+void devinet_sysctl_fini(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_VE
+	ip_forward_sysctl_unregister(ve);
+	devinet_sysctl_unregister(ve->_ipv4_devconf);
+	devinet_sysctl_unregister(ve->_ipv4_devconf_dflt);
+#endif
+#endif
+}
+
+void devinet_sysctl_free(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_VE
+	ip_forward_sysctl_free(ve);
+	kfree(ve->_ipv4_devconf);
+	kfree(ve->_ipv4_devconf_dflt);
+#endif
+#endif
+}
+
 void __init devinet_init(void)
 {
 	register_gifconf(PF_INET, inet_gifconf);
@@ -1642,7 +1857,8 @@ void __init devinet_init(void)
 #ifdef CONFIG_SYSCTL
 	devinet_sysctl.sysctl_header =
 		register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
-	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
+	__devinet_sysctl_register(NULL, "default", NET_PROTO_CONF_DEFAULT,
+			&ipv4_devconf_dflt);
 #endif
 }
 
@@ -1652,3 +1868,7 @@ EXPORT_SYMBOL(inetdev_by_index);
 EXPORT_SYMBOL(register_inetaddr_notifier);
 EXPORT_SYMBOL(unregister_inetaddr_notifier);
 EXPORT_SYMBOL(inet_confirm_addr);
+EXPORT_SYMBOL(inet_del_ifa);
+EXPORT_SYMBOL(devinet_sysctl_init);
+EXPORT_SYMBOL(devinet_sysctl_fini);
+EXPORT_SYMBOL(devinet_sysctl_free);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/fib_frontend.c kernel-2.6.18-417.el5-028stab121/net/ipv4/fib_frontend.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/fib_frontend.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/fib_frontend.c	2017-01-13 08:40:21.000000000 -0500
@@ -52,17 +52,21 @@
 
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
-struct fib_table *ip_fib_local_table;
-struct fib_table *ip_fib_main_table;
+struct fib_table *__ip_fib_local_table;
+struct fib_table *__ip_fib_main_table;
 
 #define FIB_TABLE_HASHSZ 1
-static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
-
 #else
-
 #define FIB_TABLE_HASHSZ 256
+#endif
+
+#ifdef CONFIG_VE
+#define fib_table_hash	(get_exec_env()->_fib_table_hash)
+#else
 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+#endif
 
+#ifdef CONFIG_IP_MULTIPLE_TABLES
 struct fib_table *fib_new_table(u32 id)
 {
 	struct fib_table *tb;
@@ -200,6 +204,8 @@ int fib_validate_source(u32 src, u32 dst
 		no_addr = in_dev->ifa_list == NULL;
 		rpf = IN_DEV_RPFILTER(in_dev);
 		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
+		if (dev->features & NETIF_F_VENET)
+			accept_local = 1;
 	}
 	rcu_read_unlock();
 
@@ -273,7 +279,7 @@ int ip_rt_ioctl(unsigned int cmd, void _
 	switch (cmd) {
 	case SIOCADDRT:		/* Add a route */
 	case SIOCDELRT:		/* Delete a route */
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_VE_NET_ADMIN))
 			return -EPERM;
 		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
 			return -EFAULT;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/fib_hash.c kernel-2.6.18-417.el5-028stab121/net/ipv4/fib_hash.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/fib_hash.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/fib_hash.c	2017-01-13 08:40:41.000000000 -0500
@@ -35,6 +35,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
+#include <linux/ve.h>
 
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -72,11 +73,6 @@ struct fn_zone {
  * can be cheaper than memory lookup, so that FZ_* macros are used.
  */
 
-struct fn_hash {
-	struct fn_zone	*fn_zones[33];
-	struct fn_zone	*fn_zone_list;
-};
-
 static inline u32 fn_hash(u32 key, struct fn_zone *fz)
 {
 	u32 h = ntohl(key)>>(32 - fz->fz_order);
@@ -623,7 +619,7 @@ fn_hash_delete(struct fib_table *tb, str
 	return -ESRCH;
 }
 
-static int fn_flush_list(struct fn_zone *fz, int idx)
+static int fn_flush_list(struct fn_zone *fz, int idx, int destroy)
 {
 	struct hlist_head *head = &fz->fz_hash[idx];
 	struct hlist_node *node, *n;
@@ -638,7 +634,9 @@ static int fn_flush_list(struct fn_zone 
 		list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
 			struct fib_info *fi = fa->fa_info;
 
-			if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
+			if (fi == NULL)
+				continue;
+			if (destroy || (fi->fib_flags&RTNH_F_DEAD)) {
 				write_lock_bh(&fib_hash_lock);
 				list_del(&fa->fa_list);
 				if (list_empty(&f->fn_alias)) {
@@ -660,7 +658,7 @@ static int fn_flush_list(struct fn_zone 
 	return found;
 }
 
-static int fn_hash_flush(struct fib_table *tb)
+static int __fn_hash_flush(struct fib_table *tb, int destroy)
 {
 	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
 	struct fn_zone *fz;
@@ -670,11 +668,85 @@ static int fn_hash_flush(struct fib_tabl
 		int i;
 
 		for (i = fz->fz_divisor - 1; i >= 0; i--)
-			found += fn_flush_list(fz, i);
+			found += fn_flush_list(fz, i, destroy);
 	}
 	return found;
 }
 
+static int fn_hash_flush(struct fib_table *tb)
+{
+	return __fn_hash_flush(tb, 0);
+}
+
+#ifdef CONFIG_VE
+static void fn_free_zones(struct fib_table *tb)
+{
+	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
+	struct fn_zone *fz;
+
+	while ((fz = table->fn_zone_list) != NULL) {
+		table->fn_zone_list = fz->fz_next;
+		fz_hash_free(fz->fz_hash, fz->fz_divisor);
+		kfree(fz);
+	}
+}
+
+void fib_hash_destroy(struct fib_table *tb)
+{
+	__fn_hash_flush(tb, 1);
+	fn_free_zones(tb);
+	kfree(tb);
+}
+
+/*
+ * Initialization of virtualized networking subsystem.
+ */
+int init_ve_route(struct ve_struct *ve)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ve->_fib_table_hash); i++)
+		INIT_HLIST_HEAD(&ve->_fib_table_hash[i]);
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	return fib_rules_create();
+#else
+	ve->_local_table = fib_hash_init(RT_TABLE_LOCAL);
+	if (!ve->_local_table)
+		return -ENOMEM;
+	ve->_main_table = fib_hash_init(RT_TABLE_MAIN);
+	if (!ve->_main_table) {
+		fib_hash_destroy(ve->_local_table);
+		return -ENOMEM;
+	}
+
+	hlist_add_head_rcu(&ve->_local_table->tb_hlist,
+			&ve->_fib_table_hash[0]);
+	hlist_add_head_rcu(&ve->_main_table->tb_hlist,
+			&ve->_fib_table_hash[0]);
+	return 0;
+#endif
+}
+
+void fini_ve_route(struct ve_struct *ve)
+{
+	unsigned int bytes;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	fib_rules_destroy();
+#else
+	fib_hash_destroy(ve->_local_table);
+	fib_hash_destroy(ve->_main_table);
+#endif
+	bytes = ve->_fib_hash_size * sizeof(struct hlist_head *);
+	fib_hash_free(ve->_fib_info_hash, bytes);
+	fib_hash_free(ve->_fib_info_laddrhash, bytes);
+	ve->_fib_info_hash = ve->_fib_info_laddrhash = NULL;
+}
+
+EXPORT_SYMBOL(init_ve_route);
+EXPORT_SYMBOL(fini_ve_route);
+#endif
+
 
 static inline int
 fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
@@ -766,7 +838,7 @@ static int fn_hash_dump(struct fib_table
 	return skb->len;
 }
 
-#ifdef CONFIG_IP_MULTIPLE_TABLES
+#if defined(CONFIG_IP_MULTIPLE_TABLES) || defined(CONFIG_VE)
 struct fib_table * fib_hash_init(u32 id)
 #else
 struct fib_table * __init fib_hash_init(u32 id)
@@ -777,13 +849,13 @@ struct fib_table * __init fib_hash_init(
 	if (fn_hash_kmem == NULL)
 		fn_hash_kmem = kmem_cache_create("ip_fib_hash",
 						 sizeof(struct fib_node),
-						 0, SLAB_HWCACHE_ALIGN,
+						 0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						 NULL, NULL);
 
 	if (fn_alias_kmem == NULL)
 		fn_alias_kmem = kmem_cache_create("ip_fib_alias",
 						  sizeof(struct fib_alias),
-						  0, SLAB_HWCACHE_ALIGN,
+						  0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						  NULL, NULL);
 
 	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
@@ -1075,13 +1147,13 @@ static struct file_operations fib_seq_fo
 
 int __init fib_proc_init(void)
 {
-	if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
+	if (!proc_glob_fops_create("net/route", S_IRUGO, &fib_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void __init fib_proc_exit(void)
 {
-	proc_net_remove("route");
+	remove_proc_glob_entry("net/route", NULL);
 }
 #endif /* CONFIG_PROC_FS */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/fib_lookup.h kernel-2.6.18-417.el5-028stab121/net/ipv4/fib_lookup.h
--- kernel-2.6.18-417.el5.orig/net/ipv4/fib_lookup.h	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/fib_lookup.h	2017-01-13 08:40:21.000000000 -0500
@@ -42,5 +42,6 @@ extern struct fib_alias *fib_find_alias(
 extern int fib_detect_death(struct fib_info *fi, int order,
 			    struct fib_info **last_resort,
 			    int *last_idx, int *dflt);
+void fib_hash_free(struct hlist_head *hash, int bytes);
 
 #endif /* _FIB_LOOKUP_H */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/fib_rules.c kernel-2.6.18-417.el5-028stab121/net/ipv4/fib_rules.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/fib_rules.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/fib_rules.c	2017-01-13 08:40:22.000000000 -0500
@@ -32,7 +32,13 @@
 #include <net/ip_fib.h>
 #include <net/fib_rules.h>
 
-static struct fib_rules_ops fib4_rules_ops;
+static struct fib_rules_ops _fib4_rules_ops;
+#ifdef CONFIG_VE
+#define fib4_rules_ops (*get_exec_env()->_fib4_ops)
+#else
+#define fib4_rules_ops _fib4_rules_ops
+#endif
+
 
 struct fib4_rule
 {
@@ -71,7 +77,7 @@ static struct fib4_rule main_rule = {
 	},
 };
 
-static struct fib4_rule local_rule = {
+static struct fib4_rule loc_rule = {
 	.common = {
 		.refcnt =	ATOMIC_INIT(2),
 		.table =	RT_TABLE_LOCAL,
@@ -80,7 +86,91 @@ static struct fib4_rule local_rule = {
 	},
 };
 
-static LIST_HEAD(fib4_rules);
+#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
+#ifdef CONFIG_VE
+static inline void init_rule_struct(struct fib4_rule *r,
+		u32 pref, unsigned char table, unsigned char action)
+{
+	memset(r, 0, sizeof(struct fib4_rule));
+	atomic_set(&r->common.refcnt, 1);
+	r->common.pref = pref;
+	r->common.table = table;
+	r->common.action = action;
+}
+#endif
+
+int fib_rules_create(void)
+{
+#ifdef CONFIG_VE
+	struct fib4_rule *default_rule, *main_rule, *loc_rule;
+	struct fib_rules_ops *ops;
+
+	ops = kmalloc(sizeof(struct fib_rules_ops), GFP_KERNEL_UBC);
+	if (ops == NULL)
+		goto out_ops;
+	memcpy(ops, &_fib4_rules_ops, sizeof(struct fib_rules_ops));
+	INIT_LIST_HEAD(&ops->rules_list);
+
+	default_rule = kmalloc(sizeof(struct fib4_rule), GFP_KERNEL_UBC);
+	if (default_rule == NULL)
+		goto out_def;
+
+	main_rule = kmalloc(sizeof(struct fib4_rule), GFP_KERNEL_UBC);
+	if (main_rule == NULL)
+		goto out_main;
+
+	loc_rule = kmalloc(sizeof(struct fib4_rule), GFP_KERNEL_UBC);
+	if (loc_rule == NULL)
+		goto out_loc;
+
+	init_rule_struct(default_rule, 0x7FFF, RT_TABLE_DEFAULT, FR_ACT_TO_TBL);
+	init_rule_struct(main_rule, 0x7FFE, RT_TABLE_MAIN, FR_ACT_TO_TBL);
+	init_rule_struct(loc_rule, 0, RT_TABLE_LOCAL, FR_ACT_TO_TBL);
+
+	list_add_tail(&loc_rule->common.list, &ops->rules_list);
+	list_add_tail(&main_rule->common.list, &ops->rules_list);
+	list_add_tail(&default_rule->common.list, &ops->rules_list);
+
+	get_exec_env()->_fib4_ops = ops;
+	fib_rules_register(ops);
+
+	return 0;
+
+out_loc:
+	kfree(main_rule);
+out_main:
+	kfree(default_rule);
+out_def:
+	kfree(ops);
+out_ops:
+	return -1;
+#else
+	return 0;
+#endif
+}
+
+void fib_rules_destroy(void)
+{
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+	struct fib_rule *r;
+	struct list_head *pos, *tmp;
+
+	ve = get_exec_env();
+	rtnl_lock();
+	list_for_each_safe (pos, tmp, &ve->_fib4_ops->rules_list) {
+		r = list_entry(pos, struct fib_rule, list);
+
+		list_del_rcu(pos);
+		fib_rule_put(r);
+	}
+	rtnl_unlock();
+
+	fib_rules_unregister(ve->_fib4_ops);
+	kfree(ve->_fib4_ops);
+#endif
+}
+#endif
 
 #ifdef CONFIG_NET_CLS_ROUTE
 u32 fib_rules_tclass(struct fib_result *res)
@@ -330,9 +420,9 @@ static u32 fib4_rule_default_pref(void)
 	struct list_head *pos;
 	struct fib_rule *rule;
 
-	if (!list_empty(&fib4_rules)) {
-		pos = fib4_rules.next;
-		if (pos->next != &fib4_rules) {
+	if (!list_empty(&fib4_rules_ops.rules_list)) {
+		pos = fib4_rules_ops.rules_list.next;
+		if (pos->next != &fib4_rules_ops.rules_list) {
 			rule = list_entry(pos->next, struct fib_rule, list);
 			if (rule->pref)
 				return rule->pref - 1;
@@ -342,7 +432,7 @@ static u32 fib4_rule_default_pref(void)
 	return 0;
 }
 
-static struct fib_rules_ops fib4_rules_ops = {
+static struct fib_rules_ops _fib4_rules_ops = {
 	.family		= AF_INET,
 	.rule_size	= sizeof(struct fib4_rule),
 	.action		= fib4_rule_action,
@@ -353,15 +443,18 @@ static struct fib_rules_ops fib4_rules_o
 	.default_pref	= fib4_rule_default_pref,
 	.nlgroup	= RTNLGRP_IPV4_RULE,
 	.policy		= fib4_rule_policy,
-	.rules_list	= &fib4_rules,
+	.rules_list	= LIST_HEAD_INIT(_fib4_rules_ops.rules_list),
 	.owner		= THIS_MODULE,
 };
 
 void __init fib4_rules_init(void)
 {
-	list_add_tail(&local_rule.common.list, &fib4_rules);
-	list_add_tail(&main_rule.common.list, &fib4_rules);
-	list_add_tail(&default_rule.common.list, &fib4_rules);
+#ifdef CONFIG_VE
+	get_ve0()->_fib4_ops = &_fib4_rules_ops;
+#endif
+	list_add_tail(&loc_rule.common.list, &fib4_rules_ops.rules_list);
+	list_add_tail(&main_rule.common.list, &fib4_rules_ops.rules_list);
+	list_add_tail(&default_rule.common.list, &fib4_rules_ops.rules_list);
 
-	fib_rules_register(&fib4_rules_ops);
+	fib_rules_register(&_fib4_rules_ops);
 }
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/fib_semantics.c kernel-2.6.18-417.el5-028stab121/net/ipv4/fib_semantics.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/fib_semantics.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/fib_semantics.c	2017-01-13 08:40:21.000000000 -0500
@@ -32,6 +32,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
+#include <linux/ve.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
@@ -55,6 +56,24 @@ static struct hlist_head *fib_info_laddr
 static unsigned int fib_hash_size;
 static unsigned int fib_info_cnt;
 
+void prepare_fib_info(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->_fib_info_hash = fib_info_hash;
+	get_ve0()->_fib_info_laddrhash = fib_info_laddrhash;
+	get_ve0()->_fib_hash_size = fib_hash_size;
+	get_ve0()->_fib_info_cnt = fib_info_cnt;
+#endif
+}
+
+#ifdef CONFIG_VE
+#define fib_info_hash (get_exec_env()->_fib_info_hash)
+#define fib_info_laddrhash (get_exec_env()->_fib_info_laddrhash)
+#define fib_hash_size (get_exec_env()->_fib_hash_size)
+#define fib_info_cnt (get_exec_env()->_fib_info_cnt)
+#endif
+
+
 #define DEVINDEX_HASHBITS 8
 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
@@ -234,13 +253,15 @@ static struct fib_info *fib_find_info(co
 	return NULL;
 }
 
-static inline unsigned int fib_devindex_hashfn(unsigned int val)
+static inline unsigned int fib_devindex_hashfn(unsigned int val,
+		envid_t veid)
 {
 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
 
 	return (val ^
 		(val >> DEVINDEX_HASHBITS) ^
-		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
+		(val >> (DEVINDEX_HASHBITS * 2)) ^
+		(veid ^ (veid >> 16))) & mask;
 }
 
 /* Check, that the gateway is already configured.
@@ -256,7 +277,7 @@ int ip_fib_check_default(u32 gw, struct 
 
 	read_lock(&fib_info_lock);
 
-	hash = fib_devindex_hashfn(dev->ifindex);
+	hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
 	head = &fib_info_devhash[hash];
 	hlist_for_each_entry(nh, node, head, nh_hash) {
 		if (nh->nh_dev == dev &&
@@ -580,7 +601,7 @@ static struct hlist_head *fib_hash_alloc
 			__get_free_pages(GFP_KERNEL, get_order(bytes));
 }
 
-static void fib_hash_free(struct hlist_head *hash, int bytes)
+void fib_hash_free(struct hlist_head *hash, int bytes)
 {
 	if (!hash)
 		return;
@@ -836,7 +857,8 @@ link_it:
 
 		if (!nh->nh_dev)
 			continue;
-		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
+		hash = fib_devindex_hashfn(nh->nh_dev->ifindex,
+				VEID(nh->nh_dev->owner_env));
 		head = &fib_info_devhash[hash];
 		hlist_add_head(&nh->nh_hash, head);
 	} endfor_nexthops(fi)
@@ -1188,7 +1210,8 @@ int fib_sync_down(u32 local, struct net_
 
 	if (dev) {
 		struct fib_info *prev_fi = NULL;
-		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+		unsigned int hash = fib_devindex_hashfn(dev->ifindex,
+				VEID(dev->owner_env));
 		struct hlist_head *head = &fib_info_devhash[hash];
 		struct hlist_node *node;
 		struct fib_nh *nh;
@@ -1253,7 +1276,7 @@ int fib_sync_up(struct net_device *dev)
 		return 0;
 
 	prev_fi = NULL;
-	hash = fib_devindex_hashfn(dev->ifindex);
+	hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
 	head = &fib_info_devhash[hash];
 	ret = 0;
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/icmp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/icmp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/icmp.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/icmp.c	2017-01-13 08:40:16.000000000 -0500
@@ -93,6 +93,7 @@
 #include <asm/uaccess.h>
 #include <net/checksum.h>
 #include <linux/xfrm.h>
+#include <net/xfrm.h>
 
 /*
  *	Build xmit assembly blocks
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/igmp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/igmp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/igmp.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/igmp.c	2017-01-13 08:40:22.000000000 -0500
@@ -696,22 +696,28 @@ static int igmp_send_report(struct in_de
 static void igmp_gq_timer_expire(unsigned long data)
 {
 	struct in_device *in_dev = (struct in_device *)data;
+	struct ve_struct *old_env;
 
+	old_env = set_exec_env(in_dev->dev->owner_env);
 	in_dev->mr_gq_running = 0;
 	igmpv3_send_report(in_dev, NULL);
 	__in_dev_put(in_dev);
+	(void)set_exec_env(old_env);
 }
 
 static void igmp_ifc_timer_expire(unsigned long data)
 {
 	struct in_device *in_dev = (struct in_device *)data;
+	struct ve_struct *old_env;
 
+	old_env = set_exec_env(in_dev->dev->owner_env);
 	igmpv3_send_cr(in_dev);
 	if (in_dev->mr_ifc_count) {
 		in_dev->mr_ifc_count--;
 		igmp_ifc_start_timer(in_dev, IGMP_Unsolicited_Report_Interval);
 	}
 	__in_dev_put(in_dev);
+	(void)set_exec_env(old_env);
 }
 
 static void igmp_ifc_event(struct in_device *in_dev)
@@ -728,6 +734,7 @@ static void igmp_timer_expire(unsigned l
 {
 	struct ip_mc_list *im=(struct ip_mc_list *)data;
 	struct in_device *in_dev = im->interface;
+	struct ve_struct *old_env;
 
 	spin_lock(&im->lock);
 	im->tm_running=0;
@@ -739,6 +746,7 @@ static void igmp_timer_expire(unsigned l
 	im->reporter = 1;
 	spin_unlock(&im->lock);
 
+	old_env = set_exec_env(in_dev->dev->owner_env);
 	if (IGMP_V1_SEEN(in_dev))
 		igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
 	else if (IGMP_V2_SEEN(in_dev))
@@ -747,6 +755,7 @@ static void igmp_timer_expire(unsigned l
 		igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
 
 	ip_ma_put(im);
+	(void)set_exec_env(old_env);
 }
 
 /* mark EXCLUDE-mode sources */
@@ -2344,6 +2353,8 @@ static inline struct ip_mc_list *igmp_mc
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct in_device *in_dev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		in_dev = in_dev_get(state->dev);
 		if (!in_dev)
 			continue;
@@ -2373,6 +2384,8 @@ static struct ip_mc_list *igmp_mc_get_ne
 			state->in_dev = NULL;
 			break;
 		}
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		state->in_dev = in_dev_get(state->dev);
 		if (!state->in_dev)
 			continue;
@@ -2506,6 +2519,8 @@ static inline struct ip_sf_list *igmp_mc
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct in_device *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in_dev_get(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
@@ -2545,6 +2560,8 @@ static struct ip_sf_list *igmp_mcf_get_n
 				state->idev = NULL;
 				goto out;
 			}
+			if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+				continue;
 			state->idev = in_dev_get(state->dev);
 			if (!state->idev)
 				continue;
@@ -2664,8 +2681,8 @@ static struct file_operations igmp_mcf_s
 
 int __init igmp_mc_proc_init(void)
 {
-	proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops);
-	proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+	proc_glob_fops_create("net/igmp", S_IRUGO, &igmp_mc_seq_fops);
+	proc_glob_fops_create("net/mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
 	return 0;
 }
 #endif
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/inet_connection_sock.c kernel-2.6.18-417.el5-028stab121/net/ipv4/inet_connection_sock.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/inet_connection_sock.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/inet_connection_sock.c	2017-01-13 08:40:22.000000000 -0500
@@ -24,6 +24,9 @@
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
 
+#include <ub/ub_net.h>
+#include <ub/ub_orphan.h>
+
 #ifdef INET_CSK_DEBUG
 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
 EXPORT_SYMBOL(inet_csk_timer_bug_msg);
@@ -101,7 +104,9 @@ int inet_csk_get_port(struct inet_hashin
 	struct hlist_node *node;
 	struct inet_bind_bucket *tb;
 	int ret;
+	struct ve_struct *env;
 
+	env = sk->owner_env;
 	local_bh_disable();
 	if (!snum) {
 		int remaining, rover, low, high;
@@ -110,12 +115,21 @@ int inet_csk_get_port(struct inet_hashin
 		remaining = (high - low) + 1;
 		rover = net_random() % remaining + low;
 
+		/* Below we treat low > high as high == low. So do here. Den */
+		if (remaining < 1) {
+			remaining = 1;
+			rover = low;
+		}
+
 		do {
 			if (inet_is_reserved_local_port(rover))
 				goto next_nolock;
-			head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
+			head = &hashinfo->bhash[inet_bhashfn(rover,
+					hashinfo->bhash_size, VEID(env))];
 			spin_lock(&head->lock);
-			inet_bind_bucket_for_each(tb, node, &head->chain)
+			inet_bind_bucket_for_each(tb, node, &head->chain) {
+				if (!ve_accessible_strict(tb->owner_env, env))
+					continue;
 				if (tb->port == rover) {
 					if(!bind_conflict(sk, tb)) {
 						spin_unlock(&head->lock);
@@ -124,6 +138,7 @@ int inet_csk_get_port(struct inet_hashin
 					}
 					goto next;
 				}
+			}
 			break;
 		next:
 			spin_unlock(&head->lock);
@@ -148,11 +163,15 @@ int inet_csk_get_port(struct inet_hashin
 		snum = rover;
 	} else {
 have_snum:
-		head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
+		head = &hashinfo->bhash[inet_bhashfn(snum,
+				hashinfo->bhash_size, VEID(env))];
 		spin_lock(&head->lock);
-		inet_bind_bucket_for_each(tb, node, &head->chain)
+		inet_bind_bucket_for_each(tb, node, &head->chain) {
+			if (!ve_accessible_strict(tb->owner_env, env))
+				continue;
 			if (tb->port == snum)
 				goto tb_found;
+		}
 	}
 	tb = NULL;
 	goto tb_not_found;
@@ -173,7 +192,7 @@ tb_found:
 	}
 tb_not_found:
 	ret = 1;
-	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
+	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum, env)) == NULL)
 		goto fail_unlock;
 	if (hlist_empty(&tb->owners)) {
 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
@@ -581,7 +600,7 @@ void inet_csk_destroy_sock(struct sock *
 
 	sk_refcnt_debug_release(sk);
 
-	atomic_dec(sk->sk_prot->orphan_count);
+	ub_dec_orphan_count(sk);
 	sock_put(sk);
 }
 
@@ -661,7 +680,7 @@ void inet_csk_listen_stop(struct sock *s
 
 		sock_orphan(child);
 
-		atomic_inc(sk->sk_prot->orphan_count);
+		ub_inc_orphan_count(sk);
 
 		inet_csk_destroy_sock(child);
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/inet_diag.c kernel-2.6.18-417.el5-028stab121/net/ipv4/inet_diag.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/inet_diag.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/inet_diag.c	2017-01-13 08:40:21.000000000 -0500
@@ -676,7 +676,9 @@ static int inet_diag_dump(struct sk_buff
 	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
 	const struct inet_diag_handler *handler;
 	struct inet_hashinfo *hashinfo;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	handler = inet_diag_table[cb->nlh->nlmsg_type];
 	BUG_ON(handler == NULL);
 	hashinfo = handler->idiag_hashinfo;
@@ -697,6 +699,8 @@ static int inet_diag_dump(struct sk_buff
 			sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
 				struct inet_sock *inet = inet_sk(sk);
 
+				if (!ve_accessible(sk->owner_env, ve))
+					continue;
 				if (num < s_num) {
 					num++;
 					continue;
@@ -757,6 +761,8 @@ skip_listen_ht:
 		sk_for_each(sk, node, &head->chain) {
 			struct inet_sock *inet = inet_sk(sk);
 
+			if (!ve_accessible(sk->owner_env, ve))
+				continue;
 			if (num < s_num)
 				goto next_normal;
 			if (!(r->idiag_states & (1 << sk->sk_state)))
@@ -781,6 +787,8 @@ next_normal:
 			inet_twsk_for_each(tw, node,
 				    &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
 
+				if (!ve_accessible_veid(tw->tw_owner_env, VEID(ve)))
+					continue;
 				if (num < s_num)
 					goto next_dying;
 				if (r->id.idiag_sport != tw->tw_sport &&
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/inet_hashtables.c kernel-2.6.18-417.el5-028stab121/net/ipv4/inet_hashtables.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/inet_hashtables.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/inet_hashtables.c	2017-01-13 08:40:23.000000000 -0500
@@ -30,7 +30,8 @@
  */
 struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
 						 struct inet_bind_hashbucket *head,
-						 const unsigned short snum)
+						 const unsigned short snum,
+						 struct ve_struct *ve)
 {
 	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC);
 
@@ -38,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucke
 		tb->port      = snum;
 		tb->fastreuse = 0;
 		INIT_HLIST_HEAD(&tb->owners);
+		tb->owner_env = ve;
 		hlist_add_head(&tb->node, &head->chain);
 	}
 	return tb;
@@ -67,10 +69,13 @@ void inet_bind_hash(struct sock *sk, str
  */
 static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
 {
-	const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
-	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
+	int bhash;
+	struct inet_bind_hashbucket *head;
 	struct inet_bind_bucket *tb;
 
+	bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size,
+			VEID(sk->owner_env));
+	head = &hashinfo->bhash[bhash];
 	spin_lock(&head->lock);
 	tb = inet_csk(sk)->icsk_bind_hash;
 	__sk_del_bind_node(sk);
@@ -126,7 +131,8 @@ EXPORT_SYMBOL(inet_listen_wlock);
  * wildcarded during the search since they can never be otherwise.
  */
 struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
-				    const unsigned short hnum, const int dif)
+				    const unsigned short hnum, const int dif,
+				    struct ve_struct *env)
 {
 	struct sock *result = NULL, *sk;
 	const struct hlist_node *node;
@@ -135,6 +141,8 @@ struct sock *__inet_lookup_listener(cons
 	sk_for_each(sk, node, head) {
 		const struct inet_sock *inet = inet_sk(sk);
 
+		if (!ve_accessible_strict(sk->owner_env, env))
+			continue;
 		if (inet->num == hnum && !ipv6_only_sock(sk)) {
 			const __u32 rcv_saddr = inet->rcv_saddr;
 			int score = sk->sk_family == PF_INET ? 1 : 0;
@@ -160,12 +168,13 @@ struct sock *__inet_lookup_listener(cons
 	return result;
 }
 
-EXPORT_SYMBOL_GPL(__inet_lookup_listener);
+EXPORT_SYMBOL(__inet_lookup_listener);
 
 /* called with local bh disabled */
 static int __inet_check_established(struct inet_timewait_death_row *death_row,
 				    struct sock *sk, __u16 lport,
-				    struct inet_timewait_sock **twp)
+				    struct inet_timewait_sock **twp,
+				    struct ve_struct *ve)
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_sock *inet = inet_sk(sk);
@@ -174,13 +183,16 @@ static int __inet_check_established(stru
 	int dif = sk->sk_bound_dev_if;
 	INET_ADDR_COOKIE(acookie, saddr, daddr)
 	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	unsigned int hash;
+	struct inet_ehash_bucket *head;
 	struct sock *sk2;
 	const struct hlist_node *node;
 	struct inet_timewait_sock *tw;
 	int twrefcnt = 0;
 
+	hash = inet_ehashfn(daddr, lport, saddr, inet->dport, VEID(ve));
+	head = inet_ehash_bucket(hinfo, hash);
+
 	prefetch(head->chain.first);
 	write_lock(&head->lock);
 
@@ -188,7 +200,8 @@ static int __inet_check_established(stru
 	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
 		tw = inet_twsk(sk2);
 
-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
+		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr,
+					ports, dif, ve)) {
 			if (twsk_unique(sk, sk2, twp))
 				goto unique;
 			else
@@ -199,7 +212,8 @@ static int __inet_check_established(stru
 
 	/* And established part... */
 	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
+		if (INET_MATCH(sk2, hash, acookie, saddr, daddr,
+					ports, dif, ve))
 			goto not_unique;
 	}
 
@@ -253,7 +267,9 @@ int inet_hash_connect(struct inet_timewa
  	struct inet_bind_hashbucket *head;
  	struct inet_bind_bucket *tb;
 	int ret;
+	struct ve_struct *ve;
 
+	ve = sk->owner_env;
  	if (!snum) {
 		int i, remaining, low, high, port;
 		static u32 hint;
@@ -269,7 +285,8 @@ int inet_hash_connect(struct inet_timewa
 			port = low + (i + offset) % remaining;
 			if (inet_is_reserved_local_port(port))
 				continue;
- 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			head = &hinfo->bhash[inet_bhashfn(port,
+					 hinfo->bhash_size, VEID(ve))];
  			spin_lock(&head->lock);
 
  			/* Does not bother with rcv_saddr checks,
@@ -277,19 +294,21 @@ int inet_hash_connect(struct inet_timewa
  			 * unique enough.
  			 */
 			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
+ 				if (tb->port == port &&
+				    ve_accessible_strict(tb->owner_env, ve)) {
  					BUG_TRAP(!hlist_empty(&tb->owners));
  					if (tb->fastreuse >= 0)
  						goto next_port;
  					if (!__inet_check_established(death_row,
 								      sk, port,
-								      &tw))
+								      &tw, ve))
  						goto ok;
  					goto next_port;
  				}
  			}
 
- 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
+ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+					head, port, ve);
  			if (!tb) {
  				spin_unlock(&head->lock);
  				break;
@@ -324,7 +343,7 @@ ok:
 		goto out;
  	}
 
- 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size, VEID(ve))];
  	tb  = inet_csk(sk)->icsk_bind_hash;
 	spin_lock_bh(&head->lock);
 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
@@ -334,7 +353,7 @@ ok:
 	} else {
 		spin_unlock(&head->lock);
 		/* No definite answer... Walk to established hash table */
-		ret = __inet_check_established(death_row, sk, snum, NULL);
+		ret = __inet_check_established(death_row, sk, snum, NULL, ve);
 out:
 		local_bh_enable();
 		return ret;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/inet_timewait_sock.c kernel-2.6.18-417.el5-028stab121/net/ipv4/inet_timewait_sock.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/inet_timewait_sock.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/inet_timewait_sock.c	2017-01-13 08:40:21.000000000 -0500
@@ -28,6 +28,8 @@ int inet_twsk_unhash(struct inet_timewai
 	return 1;
 }
 
+#include <ub/ub_orphan.h>
+
 /* Must be called with locally disabled BHs. */
 void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo)
 {
@@ -42,7 +44,8 @@ void __inet_twsk_kill(struct inet_timewa
 	write_unlock(&ehead->lock);
 
 	/* Disassociate with bind bucket. */
-	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
+	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num,
+			hashinfo->bhash_size, tw->tw_owner_env)];
 	spin_lock(&bhead->lock);
 	tb = tw->tw_tb;
 	if (tb) {
@@ -82,7 +85,8 @@ void __inet_twsk_hashdance(struct inet_t
 	   Note, that any socket with inet->num != 0 MUST be bound in
 	   binding cache, even if it is closed.
 	 */
-	bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
+	bhead = &hashinfo->bhash[inet_bhashfn(inet->num,
+			hashinfo->bhash_size, tw->tw_owner_env)];
 	spin_lock(&bhead->lock);
 	tw->tw_tb = icsk->icsk_bind_hash;
 	BUG_TRAP(icsk->icsk_bind_hash);
@@ -107,9 +111,14 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance)
 
 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
 {
-	struct inet_timewait_sock *tw =
-		kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
-				 SLAB_ATOMIC);
+	struct user_beancounter *ub;
+	struct inet_timewait_sock *tw;
+
+	ub = set_exec_ub(sock_bc(sk)->ub);
+	tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+			SLAB_ATOMIC);
+	(void)set_exec_ub(ub);
+
 	if (tw != NULL) {
 		const struct inet_sock *inet = inet_sk(sk);
 
@@ -157,6 +166,7 @@ static int inet_twdr_do_twkill_work(stru
 rescan:
 	inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
 		__inet_twsk_del_dead_node(tw);
+		ub_timewait_dec(tw, twdr);
 		spin_unlock(&twdr->death_lock);
 		__inet_twsk_kill(tw, twdr->hashinfo);
 		inet_twsk_put(tw);
@@ -255,6 +265,7 @@ void inet_twsk_deschedule(struct inet_ti
 {
 	spin_lock(&twdr->death_lock);
 	if (inet_twsk_del_dead_node(tw)) {
+		ub_timewait_dec(tw, twdr);
 		inet_twsk_put(tw);
 		if (--twdr->tw_count == 0)
 			del_timer(&twdr->tw_timer);
@@ -301,9 +312,10 @@ void inet_twsk_schedule(struct inet_time
 	spin_lock(&twdr->death_lock);
 
 	/* Unlink it, if it was scheduled */
-	if (inet_twsk_del_dead_node(tw))
+	if (inet_twsk_del_dead_node(tw)) {
+		ub_timewait_dec(tw, twdr);
 		twdr->tw_count--;
-	else
+	} else
 		atomic_inc(&tw->tw_refcnt);
 
 	if (slot >= INET_TWDR_RECYCLE_SLOTS) {
@@ -339,6 +351,7 @@ void inet_twsk_schedule(struct inet_time
 
 	hlist_add_head(&tw->tw_death_node, list);
 
+	ub_timewait_inc(tw, twdr);
 	if (twdr->tw_count++ == 0)
 		mod_timer(&twdr->tw_timer, jiffies + twdr->period);
 	spin_unlock(&twdr->death_lock);
@@ -373,6 +386,7 @@ void inet_twdr_twcal_tick(unsigned long 
 						       &twdr->twcal_row[slot]) {
 				__inet_twsk_del_dead_node(tw);
 				__inet_twsk_kill(tw, twdr->hashinfo);
+				ub_timewait_dec(tw, twdr);
 				inet_twsk_put(tw);
 				killed++;
 			}
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/ipconfig.c kernel-2.6.18-417.el5-028stab121/net/ipv4/ipconfig.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/ipconfig.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/ipconfig.c	2017-01-13 08:40:15.000000000 -0500
@@ -366,7 +366,7 @@ static int __init ic_defaults(void)
 	 */
 	 
 	if (!ic_host_name_set)
-		sprintf(system_utsname.nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr));
+		sprintf(init_utsname()->nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr));
 
 	if (root_server_addr == INADDR_NONE)
 		root_server_addr = ic_servaddr;
@@ -805,7 +805,7 @@ static void __init ic_do_bootp_ext(u8 *e
 			}
 			break;
 		case 12:	/* Host name */
-			ic_bootp_string(system_utsname.nodename, ext+1, *ext, __NEW_UTS_LEN);
+			ic_bootp_string(utsname()->nodename, ext+1, *ext, __NEW_UTS_LEN);
 			ic_host_name_set = 1;
 			break;
 		case 15:	/* Domain name (DNS) */
@@ -816,7 +816,7 @@ static void __init ic_do_bootp_ext(u8 *e
 				ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path));
 			break;
 		case 40:	/* NIS Domain name (_not_ DNS) */
-			ic_bootp_string(system_utsname.domainname, ext+1, *ext, __NEW_UTS_LEN);
+			ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN);
 			break;
 	}
 }
@@ -1368,7 +1368,7 @@ static int __init ip_auto_config(void)
 	printk(", mask=%u.%u.%u.%u", NIPQUAD(ic_netmask));
 	printk(", gw=%u.%u.%u.%u", NIPQUAD(ic_gateway));
 	printk(",\n     host=%s, domain=%s, nis-domain=%s",
-	       system_utsname.nodename, ic_domain, system_utsname.domainname);
+	       utsname()->nodename, ic_domain, utsname()->domainname);
 	printk(",\n     bootserver=%u.%u.%u.%u", NIPQUAD(ic_servaddr));
 	printk(", rootserver=%u.%u.%u.%u", NIPQUAD(root_server_addr));
 	printk(", rootpath=%s", root_server_path);
@@ -1478,11 +1478,11 @@ static int __init ip_auto_config_setup(c
 			case 4:
 				if ((dp = strchr(ip, '.'))) {
 					*dp++ = '\0';
-					strlcpy(system_utsname.domainname, dp,
-						sizeof(system_utsname.domainname));
+					strlcpy(utsname()->domainname, dp,
+						sizeof(utsname()->domainname));
 				}
-				strlcpy(system_utsname.nodename, ip,
-					sizeof(system_utsname.nodename));
+				strlcpy(utsname()->nodename, ip,
+					sizeof(utsname()->nodename));
 				ic_host_name_set = 1;
 				break;
 			case 5:
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/ip_forward.c kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_forward.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/ip_forward.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_forward.c	2017-01-13 08:40:23.000000000 -0500
@@ -89,6 +89,24 @@ int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto sr_failed;
 
+	/*
+	 * We try to optimize forwarding of VE packets:
+	 * do not decrement TTL (and so save skb_cow)
+	 * during forwarding of outgoing pkts from VE.
+	 * For incoming pkts we still do ttl decr,
+	 * since such skb is not cloned and does not require
+	 * actual cow. So, there is at least one place
+	 * in pkts path with mandatory ttl decr, that is
+	 * sufficient to prevent routing loops.
+	 */
+	iph = skb->nh.iph;
+	if (
+#ifdef CONFIG_IP_ROUTE_NAT			
+	    (rt->rt_flags & RTCF_NAT) == 0 &&	  /* no NAT mangling expected */
+#endif						  /* and */
+	    (skb->dev->features & NETIF_F_VENET)) /* src is VENET device */
+		goto no_ttl_decr;
+
 	/* We are about to mangle packet. Copy it! */
 	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
 		goto drop;
@@ -97,6 +115,8 @@ int ip_forward(struct sk_buff *skb)
 	/* Decrease ttl after skb cow done */
 	ip_decrease_ttl(iph);
 
+no_ttl_decr:
+
 	/*
 	 *	We now generate an ICMP HOST REDIRECT giving the route
 	 *	we calculated.
@@ -124,3 +144,5 @@ drop:
 	kfree_skb(skb);
 	return NET_RX_DROP;
 }
+
+EXPORT_SYMBOL(ip_forward);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/ip_fragment.c kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_fragment.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/ip_fragment.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_fragment.c	2017-01-13 08:40:21.000000000 -0500
@@ -96,6 +96,7 @@ struct ipq {
 	int             iif;
 	unsigned int    rid;
 	struct inet_peer *peer;
+	struct ve_struct *owner_env;
 };
 
 /* Hash table. */
@@ -181,7 +182,8 @@ static __inline__ void frag_free_queue(s
 
 static __inline__ struct ipq *frag_alloc_queue(void)
 {
-	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+	struct ipq *qp = kmalloc(sizeof(struct ipq) + sizeof(void *),
+				GFP_ATOMIC);
 
 	if(!qp)
 		return NULL;
@@ -277,6 +279,9 @@ static void ip_evictor(void)
 static void ip_expire(unsigned long arg)
 {
 	struct ipq *qp = (struct ipq *) arg;
+	struct ve_struct *envid;
+
+	envid = set_exec_env(qp->owner_env);
 
 	spin_lock(&qp->lock);
 
@@ -299,6 +304,8 @@ static void ip_expire(unsigned long arg)
 out:
 	spin_unlock(&qp->lock);
 	ipq_put(qp, NULL);
+
+	(void)set_exec_env(envid);
 }
 
 /* Creation primitives. */
@@ -324,7 +331,8 @@ static struct ipq *ip_frag_intern(struct
 		   qp->saddr == qp_in->saddr	&&
 		   qp->daddr == qp_in->daddr	&&
 		   qp->protocol == qp_in->protocol &&
-		   qp->user == qp_in->user) {
+		   qp->user == qp_in->user	&&
+		   qp->owner_env == get_exec_env()) {
 			atomic_inc(&qp->refcnt);
 			write_unlock(&ipfrag_lock);
 			qp_in->last_in |= COMPLETE;
@@ -373,6 +381,7 @@ static struct ipq *ip_frag_create(struct
 	qp->timer.function = ip_expire;		/* expire function	*/
 	spin_lock_init(&qp->lock);
 	atomic_set(&qp->refcnt, 1);
+	qp->owner_env = get_exec_env();
 
 	return ip_frag_intern(qp);
 
@@ -401,7 +410,8 @@ static inline struct ipq *ip_find(struct
 		   qp->saddr == saddr	&&
 		   qp->daddr == daddr	&&
 		   qp->protocol == protocol &&
-		   qp->user == user) {
+		   qp->user == user	&&
+		   qp->owner_env == get_exec_env()) {
 			atomic_inc(&qp->refcnt);
 			read_unlock(&ipfrag_lock);
 			return qp;
@@ -723,6 +733,9 @@ struct sk_buff *ip_defrag(struct sk_buff
 		    qp->meat == qp->len)
 			ret = ip_frag_reasm(qp, dev);
 
+		if (ret)
+			ret->owner_env = skb->owner_env;
+
 		spin_unlock(&qp->lock);
 		ipq_put(qp, NULL);
 		return ret;
@@ -733,6 +746,49 @@ struct sk_buff *ip_defrag(struct sk_buff
 	return NULL;
 }
 
+#ifdef CONFIG_VE
+/* XXX */
+void ip_fragment_cleanup(struct ve_struct *envid)
+{
+	int i, progress;
+
+	/* All operations with fragment queues are performed from NET_RX/TX
+	 * soft interrupts or from timer context.  --Den */
+	local_bh_disable();
+	do {
+		progress = 0;
+		for (i = 0; i < IPQ_HASHSZ; i++) {
+			struct ipq *qp;
+			struct hlist_node *p, *n;
+
+			if (hlist_empty(&ipq_hash[i]))
+				continue;
+inner_restart:
+			read_lock(&ipfrag_lock);
+			hlist_for_each_entry_safe(qp, p, n,
+					&ipq_hash[i], list) {
+				if (!ve_accessible_strict(qp->owner_env, envid))
+					continue;
+				atomic_inc(&qp->refcnt);
+				read_unlock(&ipfrag_lock);
+
+				spin_lock(&qp->lock);
+				if (!(qp->last_in&COMPLETE))
+					ipq_kill(qp);
+				spin_unlock(&qp->lock);
+
+				ipq_put(qp, NULL);
+				progress = 1;
+				goto inner_restart;
+			}
+			read_unlock(&ipfrag_lock);
+		}
+	} while(progress);
+	local_bh_enable();
+}
+EXPORT_SYMBOL(ip_fragment_cleanup);
+#endif
+
 void ipfrag_init(void)
 {
 	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/ip_gre.c kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_gre.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/ip_gre.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_gre.c	2017-01-13 08:40:26.000000000 -0500
@@ -30,6 +30,13 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/if_ether.h>
 
+#include <linux/vzcalluser.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+
+#include <linux/cpt_image.h>
+#include <linux/cpt_exports.h>
+
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
@@ -123,8 +130,6 @@ static void ipgre_tunnel_setup(struct ne
 
 static int ipgre_fb_tunnel_init(struct net_device *dev);
 
-static struct net_device *ipgre_fb_tunnel_dev;
-
 /* Tunnel hash table */
 
 /*
@@ -146,49 +151,52 @@ static struct net_device *ipgre_fb_tunne
 #define HASH_SIZE  16
 #define HASH(addr) ((addr^(addr>>4))&0xF)
 
-static struct ip_tunnel *tunnels[4][HASH_SIZE];
+struct ve_gre {
+	struct net_device *ipgre_fb_tunnel_dev;
+	struct ip_tunnel *tunnels[4][HASH_SIZE];
+};
 
-#define tunnels_r_l	(tunnels[3])
-#define tunnels_r	(tunnels[2])
-#define tunnels_l	(tunnels[1])
-#define tunnels_wc	(tunnels[0])
+#define tunnels_r_l	tunnels[3]
+#define tunnels_r	tunnels[2]
+#define tunnels_l	tunnels[1]
+#define tunnels_wc	tunnels[0]
 
 static DEFINE_RWLOCK(ipgre_lock);
 
 /* Given src, dst and key, find appropriate for input tunnel. */
 
-static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
+static struct ip_tunnel * ipgre_tunnel_lookup(struct ve_gre *vg, u32 remote, u32 local, u32 key)
 {
 	unsigned h0 = HASH(remote);
 	unsigned h1 = HASH(key);
 	struct ip_tunnel *t;
 
-	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+	for (t = vg->tunnels_r_l[h0^h1]; t; t = t->next) {
 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
 				return t;
 		}
 	}
-	for (t = tunnels_r[h0^h1]; t; t = t->next) {
+	for (t = vg->tunnels_r[h0^h1]; t; t = t->next) {
 		if (remote == t->parms.iph.daddr) {
 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
 				return t;
 		}
 	}
-	for (t = tunnels_l[h1]; t; t = t->next) {
+	for (t = vg->tunnels_l[h1]; t; t = t->next) {
 		if (local == t->parms.iph.saddr ||
 		     (local == t->parms.iph.daddr && MULTICAST(local))) {
 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
 				return t;
 		}
 	}
-	for (t = tunnels_wc[h1]; t; t = t->next) {
+	for (t = vg->tunnels_wc[h1]; t; t = t->next) {
 		if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
 			return t;
 	}
 
-	if (ipgre_fb_tunnel_dev->flags&IFF_UP)
-		return netdev_priv(ipgre_fb_tunnel_dev);
+	if (vg->ipgre_fb_tunnel_dev->flags&IFF_UP)
+		return netdev_priv(vg->ipgre_fb_tunnel_dev);
 	return NULL;
 }
 
@@ -199,6 +207,7 @@ static struct ip_tunnel **ipgre_bucket(s
 	u32 key = t->parms.i_key;
 	unsigned h = HASH(key);
 	int prio = 0;
+	struct ve_gre *vg = get_exec_env()->ve_gre;
 
 	if (local)
 		prio |= 1;
@@ -207,7 +216,7 @@ static struct ip_tunnel **ipgre_bucket(s
 		h ^= HASH(remote);
 	}
 
-	return &tunnels[prio][h];
+	return &vg->tunnels[prio][h];
 }
 
 static void ipgre_tunnel_link(struct ip_tunnel *t)
@@ -244,6 +253,7 @@ static struct ip_tunnel * ipgre_tunnel_l
 	unsigned h = HASH(key);
 	int prio = 0;
 	char name[IFNAMSIZ];
+	struct ve_gre *vg = get_exec_env()->ve_gre;
 
 	if (local)
 		prio |= 1;
@@ -251,7 +261,7 @@ static struct ip_tunnel * ipgre_tunnel_l
 		prio |= 2;
 		h ^= HASH(remote);
 	}
-	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+	for (tp = &vg->tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
 			if (key == t->parms.i_key)
 				return t;
@@ -325,6 +335,7 @@ static void ipgre_err(struct sk_buff *sk
 	int code = skb->h.icmph->code;
 	struct ip_tunnel *t;
 	u16 flags;
+	struct ve_struct *ve;
 
 	flags = p[0];
 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
@@ -370,7 +381,10 @@ static void ipgre_err(struct sk_buff *sk
 	}
 
 	read_lock(&ipgre_lock);
-	t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0);
+	ve = skb->owner_env;
+	if (ve->ve_gre == NULL)
+		goto out;
+	t = ipgre_tunnel_lookup(ve->ve_gre, iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0);
 	if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
 		goto out;
 
@@ -559,6 +573,7 @@ static int ipgre_rcv(struct sk_buff *skb
 	u32    seqno = 0;
 	struct ip_tunnel *tunnel;
 	int    offset = 4;
+	struct ve_struct *ve;
 
 	if (!pskb_may_pull(skb, 16))
 		goto drop_nolock;
@@ -598,8 +613,12 @@ static int ipgre_rcv(struct sk_buff *skb
 		}
 	}
 
+	ve = skb->owner_env;
+	if (ve->ve_gre == NULL)
+		goto drop_nolock;
+
 	read_lock(&ipgre_lock);
-	if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
+	if ((tunnel = ipgre_tunnel_lookup(ve->ve_gre, iph->saddr, iph->daddr, key)) != NULL) {
 		secpath_reset(skb);
 
 		skb->protocol = *(u16*)(h + 2);
@@ -953,11 +972,12 @@ ipgre_tunnel_ioctl (struct net_device *d
 	int err = 0;
 	struct ip_tunnel_parm p;
 	struct ip_tunnel *t;
+	struct ve_gre *vg = get_exec_env()->ve_gre;
 
 	switch (cmd) {
 	case SIOCGETTUNNEL:
 		t = NULL;
-		if (dev == ipgre_fb_tunnel_dev) {
+		if (dev == vg->ipgre_fb_tunnel_dev) {
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 				err = -EFAULT;
 				break;
@@ -974,7 +994,7 @@ ipgre_tunnel_ioctl (struct net_device *d
 	case SIOCADDTUNNEL:
 	case SIOCCHGTUNNEL:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			goto done;
 
 		err = -EFAULT;
@@ -996,7 +1016,7 @@ ipgre_tunnel_ioctl (struct net_device *d
 
 		t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 
-		if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+		if (dev != vg->ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 			if (t != NULL) {
 				if (t->dev != dev) {
 					err = -EEXIST;
@@ -1048,10 +1068,10 @@ ipgre_tunnel_ioctl (struct net_device *d
 
 	case SIOCDELTUNNEL:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			goto done;
 
-		if (dev == ipgre_fb_tunnel_dev) {
+		if (dev == vg->ipgre_fb_tunnel_dev) {
 			err = -EFAULT;
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 				goto done;
@@ -1059,7 +1079,7 @@ ipgre_tunnel_ioctl (struct net_device *d
 			if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
 				goto done;
 			err = -EPERM;
-			if (t == netdev_priv(ipgre_fb_tunnel_dev))
+			if (t == netdev_priv(vg->ipgre_fb_tunnel_dev))
 				goto done;
 			dev = t->dev;
 		}
@@ -1185,6 +1205,112 @@ static int ipgre_close(struct net_device
 
 #endif
 
+static void cpt_dump_gre(struct net_device *dev,
+		struct cpt_ops *ops, struct cpt_context *ctx)
+{
+	struct cpt_tunnel_image v;
+	struct ip_tunnel *t;
+
+	t = netdev_priv(dev);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_IPIP_TUNNEL;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_VOID;
+
+	/* mark fb dev */
+	v.cpt_tnl_flags = CPT_TUNNEL_GRE;
+	if (dev == get_exec_env()->ve_gre->ipgre_fb_tunnel_dev)
+		v.cpt_tnl_flags |= CPT_TUNNEL_FBDEV;
+
+	v.cpt_i_flags = t->parms.i_flags;
+	v.cpt_o_flags = t->parms.o_flags;
+	v.cpt_i_key = t->parms.i_key;
+	v.cpt_o_key = t->parms.o_key;
+	v.cpt_i_seqno = t->i_seqno;
+	v.cpt_o_seqno = t->o_seqno;
+
+	BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
+	memcpy(&v.cpt_iphdr, &t->parms.iph, sizeof(t->parms.iph));
+
+	ops->write(&v, sizeof(v), ctx);
+}
+
+static int rst_restore_gre(loff_t start, struct cpt_netdev_image *di,
+			struct cpt_ops *ops, struct cpt_context *ctx)
+{
+	int err = -ENODEV;
+	struct cpt_tunnel_image v;
+	struct net_device *dev;
+	struct ip_tunnel *t;
+	loff_t pos;
+	int fbdev;
+
+	pos = start + di->cpt_hdrlen;
+	err = ops->get_object(CPT_OBJ_NET_IPIP_TUNNEL,
+			pos, &v, sizeof(v), ctx);
+	if (err)
+		return err;
+
+	/* some sanity */
+	if (v.cpt_content != CPT_CONTENT_VOID)
+		return -EINVAL;
+
+	if (!(v.cpt_tnl_flags & CPT_TUNNEL_GRE))
+		return 1;
+
+	if (v.cpt_tnl_flags & CPT_TUNNEL_FBDEV) {
+		fbdev = 1;
+		err = 0;
+		dev = get_exec_env()->ve_gre->ipgre_fb_tunnel_dev;
+	} else {
+		fbdev = 0;
+		err = -ENOMEM;
+		dev = alloc_netdev(sizeof(struct ip_tunnel), di->cpt_name,
+				ipgre_tunnel_setup);
+		if (!dev)
+			goto out;
+	}
+
+	t = netdev_priv(dev);
+	t->parms.i_flags = v.cpt_i_flags;
+	t->parms.o_flags = v.cpt_o_flags;
+	t->parms.i_key = v.cpt_i_key;
+	t->parms.o_key = v.cpt_o_key;
+	t->i_seqno = v.cpt_i_seqno;
+	t->o_seqno = v.cpt_o_seqno;
+
+	BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
+	memcpy(&t->parms.iph, &v.cpt_iphdr, sizeof(t->parms.iph));
+
+	if (!fbdev) {
+		dev->init = ipgre_tunnel_init;
+		err = register_netdevice(dev);
+		if (err) {
+			free_netdev(dev);
+			goto out;
+		}
+
+		dev_hold(dev);
+		ipgre_tunnel_link(t);
+	}
+out:
+	return err;
+}
+
+static struct net_device_stats *cpt_gre_stats_ptr(struct net_device *dev)
+{
+	return &((struct ip_tunnel *)netdev_priv(dev))->stat;
+}
+
+static struct dev_cpt_ops ipgre_cpt_ops = {
+	.cpt_object = CPT_OBJ_NET_IPIP_TUNNEL,
+	.name = "ipgre",
+	.dump = cpt_dump_gre,
+	.restore = rst_restore_gre,
+	.stats = cpt_gre_stats_ptr,
+};
+
 static void ipgre_tunnel_setup(struct net_device *dev)
 {
 	SET_MODULE_OWNER(dev);
@@ -1201,6 +1327,10 @@ static void ipgre_tunnel_setup(struct ne
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
+
+	dev->features		|= NETIF_F_VIRTUAL;
+
+	dev->cpt_ops		= &ipgre_cpt_ops;
 }
 
 static int ipgre_tunnel_init(struct net_device *dev)
@@ -1235,10 +1365,11 @@ static int ipgre_tunnel_init(struct net_
 	return 0;
 }
 
-static int __init ipgre_fb_tunnel_init(struct net_device *dev)
+static int ipgre_fb_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
+	struct ve_gre *vg = get_exec_env()->ve_gre;
 
 	tunnel->dev = dev;
 	strcpy(tunnel->parms.name, dev->name);
@@ -1249,7 +1380,7 @@ static int __init ipgre_fb_tunnel_init(s
 	tunnel->hlen		= sizeof(struct iphdr) + 4;
 
 	dev_hold(dev);
-	tunnels_wc[0]		= tunnel;
+	vg->tunnels_wc[0]		= tunnel;
 	return 0;
 }
 
@@ -1259,54 +1390,120 @@ static struct net_protocol ipgre_protoco
 	.err_handler	=	ipgre_err,
 };
 
+static int ipgre_create_tunnels(struct ve_struct *ve);
+static void ipgre_destroy_tunnels(struct ve_struct *ve);
+
+static int ve_gre_init(void *x)
+{
+	int err;
+	struct ve_struct *ve = x;
+
+	if (!(ve->features & VE_FEATURE_IPGRE))
+		return 0;
+
+	err = ipgre_create_tunnels(ve);
+	if (err == 0)
+		__module_get(THIS_MODULE);
+
+	return err;
+}
+
+static void ve_gre_fini(void *x)
+{
+	struct ve_struct *ve = x;
+
+	if (!(ve->features & VE_FEATURE_IPGRE)) {
+		BUG_ON(ve->ve_gre != NULL);
+		return;
+	}
+
+	if (ve->ve_gre != NULL) {
+		ipgre_destroy_tunnels(ve);
+		module_put(THIS_MODULE);
+	}
+}
+
+static struct ve_hook ve_gre_hook = {
+	.owner = THIS_MODULE,
+	.priority = HOOK_PRIO_NET,
+	.init = ve_gre_init,
+	.fini = ve_gre_fini,
+};
 
 /*
  *	And now the modules code and kernel interface.
  */
 
+static int ipgre_create_tunnels(struct ve_struct *ve)
+{
+	int err = -ENOMEM;
+	struct ve_gre *vg;
+
+	vg = kzalloc(sizeof(struct ve_gre), GFP_KERNEL);
+	if (vg == NULL)
+		goto err0;
+
+	ve->ve_gre = vg;
+	vg->ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
+					   ipgre_tunnel_setup);
+	if (!vg->ipgre_fb_tunnel_dev)
+		goto err1;
+
+	vg->ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
+	if ((err = register_netdev(vg->ipgre_fb_tunnel_dev)))
+		goto err2;
+
+	return 0;
+
+err2:
+	free_netdev(vg->ipgre_fb_tunnel_dev);
+err1:
+	kfree(vg);
+err0:
+	printk("Cannot create GRE for %d (%d)\n", ve->veid, err);
+	return err;
+}
+
 static int __init ipgre_init(void)
 {
-	int err;
+	int err = -EAGAIN;
 
 	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
 
 	if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
 		printk(KERN_INFO "ipgre init: can't add protocol\n");
-		return -EAGAIN;
+		goto err0;
 	}
 
-	ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
-					   ipgre_tunnel_setup);
-	if (!ipgre_fb_tunnel_dev) {
-		err = -ENOMEM;
+	err = ipgre_create_tunnels(get_ve0());
+	if (err)
 		goto err1;
-	}
 
-	ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
+	ve_hook_register(VE_SS_CHAIN, &ve_gre_hook);
+	register_dev_cpt_ops(&ipgre_cpt_ops);
+	return 0;
 
-	if ((err = register_netdev(ipgre_fb_tunnel_dev)))
-		goto err2;
-out:
-	return err;
-err2:
-	free_netdev(ipgre_fb_tunnel_dev);
 err1:
 	inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
-	goto out;
+err0:
+	return err;
 }
 
-static void __exit ipgre_destroy_tunnels(void)
+static void ipgre_destroy_tunnels(struct ve_struct *ve)
 {
 	int prio;
+	struct ve_gre *vg = ve->ve_gre;
 
+	rtnl_lock();
 	for (prio = 0; prio < 4; prio++) {
 		int h;
 		for (h = 0; h < HASH_SIZE; h++) {
 			struct ip_tunnel *t;
-			while ((t = tunnels[prio][h]) != NULL)
+			while ((t = vg->tunnels[prio][h]) != NULL)
 				unregister_netdevice(t->dev);
 		}
 	}
+	rtnl_unlock();
 }
 
 static void __exit ipgre_fini(void)
@@ -1314,9 +1511,9 @@ static void __exit ipgre_fini(void)
 	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
 
-	rtnl_lock();
-	ipgre_destroy_tunnels();
-	rtnl_unlock();
+	unregister_dev_cpt_ops(&ipgre_cpt_ops);
+	ve_hook_unregister(&ve_gre_hook);
+	ipgre_destroy_tunnels(get_ve0());
 }
 
 module_init(ipgre_init);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/ip_input.c kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_input.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/ip_input.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_input.c	2017-01-13 08:40:34.000000000 -0500
@@ -200,6 +200,9 @@ static inline int ip_local_deliver_finis
 {
 	int ihl = skb->nh.iph->ihl*4;
 
+	if (skb->destructor)
+		skb_orphan(skb);
+
 	__skb_pull(skb, ihl);
 
         /* Point into the IP datagram, just past the header. */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/ipip.c kernel-2.6.18-417.el5-028stab121/net/ipv4/ipip.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/ipip.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/ipip.c	2017-01-13 08:40:26.000000000 -0500
@@ -117,43 +117,52 @@
 #include <net/inet_ecn.h>
 #include <net/xfrm.h>
 
+#include <linux/vzcalluser.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+
+#include <linux/cpt_image.h>
+#include <linux/cpt_exports.h>
+
 #define HASH_SIZE  16
 #define HASH(addr) ((addr^(addr>>4))&0xF)
 
-static int ipip_fb_tunnel_init(struct net_device *dev);
-static int ipip_tunnel_init(struct net_device *dev);
-static void ipip_tunnel_setup(struct net_device *dev);
+struct ve_ipip {
+	struct net_device *ipip_fb_tunnel_dev;
 
-static struct net_device *ipip_fb_tunnel_dev;
+	struct ip_tunnel *tunnels_r_l[HASH_SIZE];
+	struct ip_tunnel *tunnels_r[HASH_SIZE];
+	struct ip_tunnel *tunnels_l[HASH_SIZE];
+	struct ip_tunnel *tunnels_wc[1];
+	struct ip_tunnel **tunnels[4];
+};
 
-static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_r[HASH_SIZE];
-static struct ip_tunnel *tunnels_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_wc[1];
-static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
+static int ipip_fb_tunnel_init(struct net_device *dev);
+static void ipip_tunnel_setup(struct net_device *dev);
+static int ipip_tunnel_init(struct net_device *dev);
 
 static DEFINE_RWLOCK(ipip_lock);
 
-static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
+static struct ip_tunnel * ipip_tunnel_lookup(struct ve_ipip *vip, u32 remote, u32 local)
 {
 	unsigned h0 = HASH(remote);
 	unsigned h1 = HASH(local);
 	struct ip_tunnel *t;
 
-	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+	for (t = vip->tunnels_r_l[h0^h1]; t; t = t->next) {
 		if (local == t->parms.iph.saddr &&
 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 			return t;
 	}
-	for (t = tunnels_r[h0]; t; t = t->next) {
+	for (t = vip->tunnels_r[h0]; t; t = t->next) {
 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 			return t;
 	}
-	for (t = tunnels_l[h1]; t; t = t->next) {
+	for (t = vip->tunnels_l[h1]; t; t = t->next) {
 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 			return t;
 	}
-	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+	if ((t = vip->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 		return t;
 	return NULL;
 }
@@ -164,6 +173,7 @@ static struct ip_tunnel **ipip_bucket(st
 	u32 local = t->parms.iph.saddr;
 	unsigned h = 0;
 	int prio = 0;
+	struct ve_ipip *vip = get_exec_env()->ve_ipip;
 
 	if (remote) {
 		prio |= 2;
@@ -173,7 +183,7 @@ static struct ip_tunnel **ipip_bucket(st
 		prio |= 1;
 		h ^= HASH(local);
 	}
-	return &tunnels[prio][h];
+	return &vip->tunnels[prio][h];
 }
 
 
@@ -210,6 +220,7 @@ static struct ip_tunnel * ipip_tunnel_lo
 	unsigned h = 0;
 	int prio = 0;
 	char name[IFNAMSIZ];
+	struct ve_ipip *vip = get_exec_env()->ve_ipip;
 
 	if (remote) {
 		prio |= 2;
@@ -219,7 +230,7 @@ static struct ip_tunnel * ipip_tunnel_lo
 		prio |= 1;
 		h ^= HASH(local);
 	}
-	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+	for (tp = &vip->tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 			return t;
 	}
@@ -263,9 +274,11 @@ failed:
 
 static void ipip_tunnel_uninit(struct net_device *dev)
 {
-	if (dev == ipip_fb_tunnel_dev) {
+	struct ve_ipip *vip = get_exec_env()->ve_ipip;
+
+	if (dev == vip->ipip_fb_tunnel_dev) {
 		write_lock_bh(&ipip_lock);
-		tunnels_wc[0] = NULL;
+		vip->tunnels_wc[0] = NULL;
 		write_unlock_bh(&ipip_lock);
 	} else
 		ipip_tunnel_unlink(netdev_priv(dev));
@@ -285,6 +298,7 @@ static int ipip_err(struct sk_buff *skb,
 	int code = skb->h.icmph->code;
 	struct ip_tunnel *t;
 	int err;
+	struct ve_struct *ve;
 
 	switch (type) {
 	default:
@@ -314,10 +328,15 @@ static int ipip_err(struct sk_buff *skb,
 		break;
 	}
 
-	err = -ENOENT;
+	err = 0;
 
 	read_lock(&ipip_lock);
-	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
+	ve = skb->owner_env;
+	if (ve->ve_ipip == NULL)
+		goto out;
+
+	err = -ENOENT;
+	t = ipip_tunnel_lookup(ve->ve_ipip, iph->daddr, iph->saddr);
 	if (t == NULL || t->parms.iph.daddr == 0)
 		goto out;
 
@@ -334,6 +353,7 @@ out:
 	read_unlock(&ipip_lock);
 	return err;
 #else
+#error "World is not perfect"
 	struct iphdr *iph = (struct iphdr*)dp;
 	int hlen = iph->ihl<<2;
 	struct iphdr *eiph;
@@ -472,14 +492,20 @@ static int ipip_rcv(struct sk_buff *skb)
 {
 	struct iphdr *iph;
 	struct ip_tunnel *tunnel;
+	struct ve_struct *ve;
+
+	ve = set_exec_env(skb->owner_env);
+	if (ve->ve_ipip == NULL)
+		goto out;
 
 	iph = skb->nh.iph;
 
 	read_lock(&ipip_lock);
-	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+	if ((tunnel = ipip_tunnel_lookup(ve->ve_ipip, iph->saddr, iph->daddr)) != NULL) {
 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 			read_unlock(&ipip_lock);
 			kfree_skb(skb);
+			(void)set_exec_env(ve);
 			return 0;
 		}
 
@@ -499,10 +525,12 @@ static int ipip_rcv(struct sk_buff *skb)
 		ipip_ecn_decapsulate(iph, skb);
 		netif_rx(skb);
 		read_unlock(&ipip_lock);
+		(void)set_exec_env(ve);
 		return 0;
 	}
 	read_unlock(&ipip_lock);
-
+out:
+	(void)set_exec_env(ve);
 	return -1;
 }
 
@@ -525,6 +553,9 @@ static int ipip_tunnel_xmit(struct sk_bu
 	int    max_headroom;			/* The extra header space needed */
 	u32    dst = tiph->daddr;
 	int    mtu;
+	struct ve_struct *ve;
+
+	ve = set_exec_env(dev->owner_env);
 
 	if (tunnel->recursion++) {
 		tunnel->stat.collisions++;
@@ -608,6 +639,7 @@ static int ipip_tunnel_xmit(struct sk_bu
   			stats->tx_dropped++;
 			dev_kfree_skb(skb);
 			tunnel->recursion--;
+			(void)set_exec_env(ve);
 			return 0;
 		}
 		if (skb->sk)
@@ -645,6 +677,7 @@ static int ipip_tunnel_xmit(struct sk_bu
 
 	IPTUNNEL_XMIT();
 	tunnel->recursion--;
+	(void)set_exec_env(ve);
 	return 0;
 
 tx_error_icmp:
@@ -653,6 +686,7 @@ tx_error:
 	stats->tx_errors++;
 	dev_kfree_skb(skb);
 	tunnel->recursion--;
+	(void)set_exec_env(ve);
 	return 0;
 }
 
@@ -696,11 +730,12 @@ ipip_tunnel_ioctl (struct net_device *de
 	int err = 0;
 	struct ip_tunnel_parm p;
 	struct ip_tunnel *t;
+	struct ve_ipip *vip = get_exec_env()->ve_ipip;
 
 	switch (cmd) {
 	case SIOCGETTUNNEL:
 		t = NULL;
-		if (dev == ipip_fb_tunnel_dev) {
+		if (dev == vip->ipip_fb_tunnel_dev) {
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 				err = -EFAULT;
 				break;
@@ -717,7 +752,7 @@ ipip_tunnel_ioctl (struct net_device *de
 	case SIOCADDTUNNEL:
 	case SIOCCHGTUNNEL:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			goto done;
 
 		err = -EFAULT;
@@ -733,7 +768,7 @@ ipip_tunnel_ioctl (struct net_device *de
 
 		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 
-		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+		if (dev != vip->ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 			if (t != NULL) {
 				if (t->dev != dev) {
 					err = -EEXIST;
@@ -776,10 +811,10 @@ ipip_tunnel_ioctl (struct net_device *de
 
 	case SIOCDELTUNNEL:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			goto done;
 
-		if (dev == ipip_fb_tunnel_dev) {
+		if (dev == vip->ipip_fb_tunnel_dev) {
 			err = -EFAULT;
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 				goto done;
@@ -787,7 +822,7 @@ ipip_tunnel_ioctl (struct net_device *de
 			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 				goto done;
 			err = -EPERM;
-			if (t->dev == ipip_fb_tunnel_dev)
+			if (t->dev == vip->ipip_fb_tunnel_dev)
 				goto done;
 			dev = t->dev;
 		}
@@ -815,6 +850,108 @@ static int ipip_tunnel_change_mtu(struct
 	return 0;
 }
 
+static void cpt_dump_ipip(struct net_device *dev,
+		struct cpt_ops *ops, struct cpt_context *ctx)
+{
+	struct cpt_tunnel_image v;
+	struct ip_tunnel *t;
+
+	t = netdev_priv(dev);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_IPIP_TUNNEL;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_VOID;
+
+	/* mark fb dev */
+	v.cpt_tnl_flags = 0;
+	if (dev == get_exec_env()->ve_ipip->ipip_fb_tunnel_dev)
+		v.cpt_tnl_flags |= CPT_TUNNEL_FBDEV;
+
+	v.cpt_i_flags = t->parms.i_flags;
+	v.cpt_o_flags = t->parms.o_flags;
+	v.cpt_i_key = t->parms.i_key;
+	v.cpt_o_key = t->parms.o_key;
+
+	BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
+	memcpy(&v.cpt_iphdr, &t->parms.iph, sizeof(t->parms.iph));
+
+	ops->write(&v, sizeof(v), ctx);
+}
+
+static int rst_restore_ipip(loff_t start, struct cpt_netdev_image *di,
+			struct cpt_ops *ops, struct cpt_context *ctx)
+{
+	int err = -ENODEV;
+	struct cpt_tunnel_image v;
+	struct net_device *dev;
+	struct ip_tunnel *t;
+	loff_t pos;
+	int fbdev;
+
+	pos = start + di->cpt_hdrlen;
+	err = ops->get_object(CPT_OBJ_NET_IPIP_TUNNEL,
+			pos, &v, sizeof(v), ctx);
+	if (err)
+		return err;
+
+	/* some sanity */
+	if (v.cpt_content != CPT_CONTENT_VOID)
+		return -EINVAL;
+
+	if (v.cpt_tnl_flags & (~CPT_TUNNEL_FBDEV))
+		return 1;
+
+	if (v.cpt_tnl_flags & CPT_TUNNEL_FBDEV) {
+		fbdev = 1;
+		err = 0;
+		dev = get_exec_env()->ve_ipip->ipip_fb_tunnel_dev;
+	} else {
+		fbdev = 0;
+		err = -ENOMEM;
+		dev = alloc_netdev(sizeof(struct ip_tunnel), di->cpt_name,
+				ipip_tunnel_setup);
+		if (!dev)
+			goto out;
+	}
+
+	t = netdev_priv(dev);
+	t->parms.i_flags = v.cpt_i_flags;
+	t->parms.o_flags = v.cpt_o_flags;
+	t->parms.i_key = v.cpt_i_key;
+	t->parms.o_key = v.cpt_o_key;
+
+	BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
+	memcpy(&t->parms.iph, &v.cpt_iphdr, sizeof(t->parms.iph));
+
+	if (!fbdev) {
+		dev->init = ipip_tunnel_init;
+		err = register_netdevice(dev);
+		if (err) {
+			free_netdev(dev);
+			goto out;
+		}
+
+		dev_hold(dev);
+		ipip_tunnel_link(t);
+	}
+out:
+	return err;
+}
+
+static struct net_device_stats *cpt_ipip_stats_ptr(struct net_device *dev)
+{
+	return &((struct ip_tunnel *)netdev_priv(dev))->stat;
+}
+
+static struct dev_cpt_ops ipip_cpt_ops = {
+	.cpt_object = CPT_OBJ_NET_IPIP_TUNNEL,
+	.name = "ipip",
+	.dump = cpt_dump_ipip,
+	.restore = rst_restore_ipip,
+	.stats = cpt_ipip_stats_ptr,
+};
+
 static void ipip_tunnel_setup(struct net_device *dev)
 {
 	SET_MODULE_OWNER(dev);
@@ -831,6 +968,10 @@ static void ipip_tunnel_setup(struct net
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
+
+	dev->features		|= NETIF_F_VIRTUAL;
+
+	dev->cpt_ops		= &ipip_cpt_ops;
 }
 
 static int ipip_tunnel_init(struct net_device *dev)
@@ -850,10 +991,11 @@ static int ipip_tunnel_init(struct net_d
 	return 0;
 }
 
-static int __init ipip_fb_tunnel_init(struct net_device *dev)
+static int ipip_fb_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
+	struct ve_ipip *vip = get_exec_env()->ve_ipip;
 
 	tunnel->dev = dev;
 	strcpy(tunnel->parms.name, dev->name);
@@ -863,7 +1005,7 @@ static int __init ipip_fb_tunnel_init(st
 	iph->ihl		= 5;
 
 	dev_hold(dev);
-	tunnels_wc[0]		= tunnel;
+	vip->tunnels_wc[0]		= tunnel;
 	return 0;
 }
 
@@ -876,50 +1018,122 @@ static struct xfrm_tunnel ipip_handler =
 static char banner[] __initdata =
 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 
-static int __init ipip_init(void)
+static int ipip_create_tunnels(struct ve_struct *ve)
 {
-	int err;
+	int err = -ENOMEM;
+	struct ve_ipip *vip;
 
-	printk(banner);
-
-	if (xfrm4_tunnel_register(&ipip_handler)) {
-		printk(KERN_INFO "ipip init: can't register tunnel\n");
-		return -EAGAIN;
-	}
-
-	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
-					   "tunl0",
-					   ipip_tunnel_setup);
-	if (!ipip_fb_tunnel_dev) {
-		err = -ENOMEM;
+	vip = kzalloc(sizeof(struct ve_ipip), GFP_KERNEL);
+	if (vip == NULL)
+		goto err0;
+
+	ve->ve_ipip = vip;
+	vip->tunnels[0] = vip->tunnels_wc;
+	vip->tunnels[1] = vip->tunnels_l;
+	vip->tunnels[2] = vip->tunnels_r;
+	vip->tunnels[3] = vip->tunnels_r_l;
+
+	vip->ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
+			"tunl0", ipip_tunnel_setup);
+	if (!vip->ipip_fb_tunnel_dev)
 		goto err1;
-	}
 
-	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
-
-	if ((err = register_netdev(ipip_fb_tunnel_dev)))
+	vip->ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
+	if ((err = register_netdev(vip->ipip_fb_tunnel_dev)))
 		goto err2;
- out:
+
+	return 0;
+
+err2:
+	free_netdev(vip->ipip_fb_tunnel_dev);
+err1:
+	kfree(vip);
+err0:
 	return err;
- err2:
-	free_netdev(ipip_fb_tunnel_dev);
- err1:
-	xfrm4_tunnel_deregister(&ipip_handler);
-	goto out;
 }
 
-static void __exit ipip_destroy_tunnels(void)
+static void ipip_destroy_tunnels(struct ve_struct *ve)
 {
 	int prio;
+	struct ve_ipip *vip = get_exec_env()->ve_ipip;
 
+	rtnl_lock();
 	for (prio = 1; prio < 4; prio++) {
 		int h;
 		for (h = 0; h < HASH_SIZE; h++) {
 			struct ip_tunnel *t;
-			while ((t = tunnels[prio][h]) != NULL)
+			while ((t = vip->tunnels[prio][h]) != NULL)
 				unregister_netdevice(t->dev);
 		}
 	}
+
+	unregister_netdevice(vip->ipip_fb_tunnel_dev);
+	rtnl_unlock();
+
+	kfree(vip);
+	ve->ve_ipip = NULL;
+}
+
+static int ve_ipip_init(void *x)
+{
+	int err;
+	struct ve_struct *ve = x;
+
+	if (!(ve->features & VE_FEATURE_IPIP))
+		return 0;
+
+	err = ipip_create_tunnels(ve);
+	if (err == 0)
+		__module_get(THIS_MODULE);
+
+	return err;
+}
+
+static void ve_ipip_fini(void *x)
+{
+	struct ve_struct *ve = x;
+
+	if (!(ve->features & VE_FEATURE_IPIP)) {
+		BUG_ON(ve->ve_ipip != NULL);
+		return;
+	}
+
+	if (ve->ve_ipip != NULL) {
+		/*
+		 * ipip is a module and can be loaded after ve's with
+		 * the ipip feature
+		 */
+
+		ipip_destroy_tunnels(ve);
+		module_put(THIS_MODULE);
+	}
+}
+
+static struct ve_hook ipip_hook = {
+	.owner = THIS_MODULE,
+	.priority = HOOK_PRIO_NET,
+	.init = ve_ipip_init,
+	.fini = ve_ipip_fini,
+};
+
+static int __init ipip_init(void)
+{
+	int err;
+
+	printk(banner);
+
+	if (xfrm4_tunnel_register(&ipip_handler)) {
+		printk(KERN_INFO "ipip init: can't register tunnel\n");
+		return -EAGAIN;
+	}
+
+	err = ipip_create_tunnels(get_ve0());
+	if (err)
+		xfrm4_tunnel_deregister(&ipip_handler);
+
+	ve_hook_register(VE_SS_CHAIN, &ipip_hook);
+	register_dev_cpt_ops(&ipip_cpt_ops);
+	return err;
 }
 
 static void __exit ipip_fini(void)
@@ -927,10 +1141,9 @@ static void __exit ipip_fini(void)
 	if (xfrm4_tunnel_deregister(&ipip_handler))
 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 
-	rtnl_lock();
-	ipip_destroy_tunnels();
-	unregister_netdevice(ipip_fb_tunnel_dev);
-	rtnl_unlock();
+	unregister_dev_cpt_ops(&ipip_cpt_ops);
+	ve_hook_unregister(&ipip_hook);
+	ipip_destroy_tunnels(get_ve0());
 }
 
 module_init(ipip_init);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/ipmr.c kernel-2.6.18-417.el5-028stab121/net/ipv4/ipmr.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/ipmr.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/ipmr.c	2017-01-13 08:40:21.000000000 -0500
@@ -836,7 +836,7 @@ static void mrtsock_destruct(struct sock
 {
 	rtnl_lock();
 	if (sk == mroute_socket) {
-		ipv4_devconf.mc_forwarding--;
+		ve_ipv4_devconf.mc_forwarding--;
 
 		write_lock_bh(&mrt_lock);
 		mroute_socket=NULL;
@@ -887,7 +887,7 @@ int ip_mroute_setsockopt(struct sock *sk
 				mroute_socket=sk;
 				write_unlock_bh(&mrt_lock);
 
-				ipv4_devconf.mc_forwarding++;
+				ve_ipv4_devconf.mc_forwarding++;
 			}
 			rtnl_unlock();
 			return ret;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/ip_options.c kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_options.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/ip_options.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_options.c	2017-01-13 08:40:41.000000000 -0500
@@ -337,7 +337,7 @@ int ip_options_compile(struct ip_options
 					pp_ptr = optptr + 2;
 					goto error;
 				}
-				if (skb) {
+				if (rt) {
 					memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
 					opt->is_changed = 1;
 				}
@@ -379,7 +379,7 @@ int ip_options_compile(struct ip_options
 						goto error;
 					}
 					opt->ts = optptr - iph;
-					if (skb) {
+					if (rt)  {
 						memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
 						timeptr = (__u32*)&optptr[optptr[2]+3];
 					}
@@ -474,7 +474,7 @@ error:
 	}
 	return -EINVAL;
 }
-
+EXPORT_SYMBOL(ip_options_compile);
 
 /*
  *	Undo all the changes done by ip_options_compile().
@@ -609,7 +609,7 @@ int ip_options_rcv_srr(struct sk_buff *s
 	struct rtable *rt2;
 	int err;
 
-	if (!opt->srr)
+	if (!opt->srr || !rt)
 		return 0;
 
 	if (skb->pkt_type != PACKET_HOST)
@@ -652,3 +652,4 @@ int ip_options_rcv_srr(struct sk_buff *s
 	}
 	return 0;
 }
+EXPORT_SYMBOL(ip_options_rcv_srr);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/ip_output.c kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_output.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/ip_output.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/ip_output.c	2017-01-13 08:40:41.000000000 -0500
@@ -293,6 +293,7 @@ int ip_output(struct sk_buff *skb)
 		            ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
+EXPORT_SYMBOL(ip_output);
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 {
@@ -1357,12 +1358,13 @@ void ip_send_reply(struct sock *sk, stru
 		char			data[40];
 	} replyopts;
 	struct ipcm_cookie ipc;
-	u32 daddr;
+	u32 saddr, daddr;
 	struct rtable *rt = (struct rtable*)skb->dst;
 
 	if (ip_options_echo(&replyopts.opt, skb))
 		return;
 
+	saddr = skb->nh.iph->daddr;
 	daddr = ipc.addr = rt->rt_src;
 	ipc.opt = NULL;
 
@@ -1376,7 +1378,7 @@ void ip_send_reply(struct sock *sk, stru
 	{
 		struct flowi fl = { .nl_u = { .ip4_u =
 					      { .daddr = daddr,
-						.saddr = rt->rt_spec_dst,
+						.saddr = saddr,
 						.tos = RT_TOS(skb->nh.iph->tos) } },
 				    /* Not quite clean, but right. */
 				    .uli_u = { .ports =
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/ipvs/ip_vs_conn.c kernel-2.6.18-417.el5-028stab121/net/ipv4/ipvs/ip_vs_conn.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/ipvs/ip_vs_conn.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/ipvs/ip_vs_conn.c	2017-01-13 08:40:16.000000000 -0500
@@ -906,7 +906,8 @@ int ip_vs_conn_init(void)
 	/* Allocate ip_vs_conn slab cache */
 	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
 					      sizeof(struct ip_vs_conn), 0,
-					      SLAB_HWCACHE_ALIGN, NULL, NULL);
+					      SLAB_HWCACHE_ALIGN | SLAB_UBC,
+					      NULL, NULL);
 	if (!ip_vs_conn_cachep) {
 		vfree(ip_vs_conn_tab);
 		return -ENOMEM;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/ipvs/ip_vs_core.c kernel-2.6.18-417.el5-028stab121/net/ipv4/ipvs/ip_vs_core.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/ipvs/ip_vs_core.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/ipvs/ip_vs_core.c	2017-01-13 08:40:21.000000000 -0500
@@ -973,6 +973,10 @@ ip_vs_in(unsigned int hooknum, struct sk
 	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
 	 *	... don't know why 1st test DOES NOT include 2nd (?)
 	 */
+	/*
+	 * VZ: the question above is right.
+	 * The second test is superfluous.
+	 */
 	if (unlikely(skb->pkt_type != PACKET_HOST
 		     || skb->dev == &loopback_dev || skb->sk)) {
 		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/arp_tables.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/arp_tables.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/arp_tables.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/arp_tables.c	2017-01-13 08:40:15.000000000 -0500
@@ -1206,6 +1206,8 @@ err1:
 static void __exit arp_tables_fini(void)
 {
 	nf_unregister_sockopt(&arpt_sockopts);
+	xt_unregister_target(&arpt_error_target);
+	xt_unregister_target(&arpt_standard_target);
 	xt_proto_fini(NF_ARP);
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_core.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_core.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_core.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_core.c	2017-01-13 08:40:40.000000000 -0500
@@ -48,6 +48,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/listhelp.h>
+#include <ub/ub_mem.h>
 
 #define IP_CONNTRACK_VERSION	"2.4"
 
@@ -59,22 +60,41 @@
 
 DEFINE_RWLOCK(ip_conntrack_lock);
 
-/* ip_conntrack_standalone needs this */
-atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_conntrack_helpers \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_helpers)
+#define ve_ip_conntrack_max \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_max)
+#define ve_ip_conntrack_count \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_count)
+#define ve_ip_conntrack_unconfirmed \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_unconfirmed)
+#else
 
 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
 LIST_HEAD(ip_conntrack_expect_list);
 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 static LIST_HEAD(helpers);
+struct list_head *ip_conntrack_hash;
+static LIST_HEAD(unconfirmed);
+#define ve_ip_conntrack_count 		ip_conntrack_count
+#define ve_ip_conntrack_helpers		helpers
+#define ve_ip_conntrack_max 		ip_conntrack_max
+#define ve_ip_conntrack_unconfirmed 	unconfirmed
+#endif
+
+/* ip_conntrack_standalone needs this */
+atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+
 unsigned int ip_conntrack_htable_size = 0;
 int ip_conntrack_max;
-struct list_head *ip_conntrack_hash;
 static kmem_cache_t *ip_conntrack_cachep __read_mostly;
 static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
 struct ip_conntrack ip_conntrack_untracked;
 unsigned int ip_ct_log_invalid;
-static LIST_HEAD(unconfirmed);
+#ifndef CONFIG_VE_IPTABLES
 static int ip_conntrack_vmalloc;
+#endif
 
 static unsigned int ip_conntrack_next_id;
 static unsigned int ip_conntrack_expect_next_id;
@@ -104,6 +124,9 @@ void ip_ct_deliver_cached_events(const s
 {
 	struct ip_conntrack_ecache *ecache;
 	
+	if (!ve_is_super(get_exec_env()))
+		return;
+
 	local_bh_disable();
 	ecache = &__get_cpu_var(ip_conntrack_ecache);
 	if (ecache->ct == ct)
@@ -132,6 +155,9 @@ static void ip_ct_event_cache_flush(void
 	struct ip_conntrack_ecache *ecache;
 	int cpu;
 
+	if (!ve_is_super(get_exec_env()))
+		return;
+
 	for_each_possible_cpu(cpu) {
 		ecache = &per_cpu(ip_conntrack_ecache, cpu);
 		if (ecache->ct)
@@ -150,10 +176,12 @@ static unsigned int ip_conntrack_hash_rn
 static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
 			    unsigned int size, unsigned int rnd)
 {
-	return (jhash_3words(tuple->src.ip,
+	unsigned int hash;
+	hash = jhash_3words(tuple->src.ip,
 	                     (tuple->dst.ip ^ tuple->dst.protonum),
 	                     (tuple->src.u.all | (tuple->dst.u.all << 16)),
-	                     rnd) % size);
+	                     rnd);
+	return ((u64)hash * size) >> 32;
 }
 
 static u_int32_t
@@ -247,7 +275,7 @@ __ip_conntrack_expect_find(const struct 
 {
 	struct ip_conntrack_expect *i;
 	
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
 		if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
 			atomic_inc(&i->use);
 			return i;
@@ -276,7 +304,7 @@ find_expectation(const struct ip_conntra
 {
 	struct ip_conntrack_expect *i;
 
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
 		/* If master is not in hash table yet (ie. packet hasn't left
 		   this machine yet), how can other end know about expected?
 		   Hence these are not the droids you are looking for (if
@@ -305,7 +333,7 @@ void ip_ct_remove_expectations(struct ip
 	if (ct->expecting == 0)
 		return;
 
-	list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_safe(i, tmp, &ve_ip_conntrack_expect_list, list) {
 		if (i->master == ct && del_timer(&i->timeout)) {
 			ip_ct_unlink_expect(i);
 			ip_conntrack_expect_put(i);
@@ -323,8 +351,10 @@ clean_from_lists(struct ip_conntrack *ct
 
 	ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 	hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-	LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-	LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+	LIST_DELETE(&ct->ct_owner_env->_ip_conntrack->_ip_conntrack_hash[ho],
+ 		    &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+	LIST_DELETE(&ct->ct_owner_env->_ip_conntrack->_ip_conntrack_hash[hr],
+ 		    &ct->tuplehash[IP_CT_DIR_REPLY]);
 
 	/* Destroy all pending expectations */
 	ip_ct_remove_expectations(ct);
@@ -335,7 +365,11 @@ destroy_conntrack(struct nf_conntrack *n
 {
 	struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
 	struct ip_conntrack_protocol *proto;
+#ifdef CONFIG_VE_IPTABLES
+	struct ve_struct *old;
 
+	old = set_exec_env(ct->ct_owner_env);
+#endif
 	DEBUGP("destroy_conntrack(%p)\n", ct);
 	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
 	IP_NF_ASSERT(!timer_pending(&ct->timeout));
@@ -350,8 +384,8 @@ destroy_conntrack(struct nf_conntrack *n
 	if (proto && proto->destroy)
 		proto->destroy(ct);
 
-	if (ip_conntrack_destroyed)
-		ip_conntrack_destroyed(ct);
+	if (ve_ip_conntrack_destroyed)
+		ve_ip_conntrack_destroyed(ct);
 
 	write_lock_bh(&ip_conntrack_lock);
 	/* Expectations will have been removed in clean_from_lists,
@@ -374,6 +408,9 @@ destroy_conntrack(struct nf_conntrack *n
 
 	DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
 	ip_conntrack_free(ct);
+#ifdef CONFIG_VE_IPTABLES
+	(void)set_exec_env(old);
+#endif
 }
 
 static void death_by_timeout(unsigned long ul_conntrack)
@@ -407,7 +444,7 @@ __ip_conntrack_find(const struct ip_conn
 	unsigned int hash = hash_conntrack(tuple);
 
 	ASSERT_READ_LOCK(&ip_conntrack_lock);
-	list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
+	list_for_each_entry(h, &ve_ip_conntrack_hash[hash], list) {
 		if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
 			CONNTRACK_STAT_INC(found);
 			return h;
@@ -439,9 +476,9 @@ static void __ip_conntrack_hash_insert(s
 					unsigned int repl_hash) 
 {
 	ct->id = ++ip_conntrack_next_id;
-	list_prepend(&ip_conntrack_hash[hash],
+	list_prepend(&ve_ip_conntrack_hash[hash],
 		     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	list_prepend(&ip_conntrack_hash[repl_hash],
+	list_prepend(&ve_ip_conntrack_hash[repl_hash],
 		     &ct->tuplehash[IP_CT_DIR_REPLY].list);
 }
 
@@ -492,11 +529,11 @@ __ip_conntrack_confirm(struct sk_buff **
 	/* See if there's one in the list already, including reverse:
            NAT could have grabbed it without realizing, since we're
            not in the hash.  If there is, we lost race. */
-	if (!LIST_FIND(&ip_conntrack_hash[hash],
+	if (!LIST_FIND(&ve_ip_conntrack_hash[hash],
 		       conntrack_tuple_cmp,
 		       struct ip_conntrack_tuple_hash *,
 		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
-	    && !LIST_FIND(&ip_conntrack_hash[repl_hash],
+	    && !LIST_FIND(&ve_ip_conntrack_hash[repl_hash],
 			  conntrack_tuple_cmp,
 			  struct ip_conntrack_tuple_hash *,
 			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
@@ -547,34 +584,26 @@ ip_conntrack_tuple_taken(const struct ip
 	return h != NULL;
 }
 
-#define IP_CT_EVICTION_RANGE   8
-
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static int early_drop(unsigned int hash)
+static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
+{
+	return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
+}
+
+static int early_drop(struct list_head *chain)
 {
 	/* Traverse backwards: gives us oldest, which is roughly LRU */
 	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack *ct = NULL, *tmp;
+	struct ip_conntrack *ct = NULL;
 	int dropped = 0;
-	unsigned int i, cnt = 0;
 
 	read_lock_bh(&ip_conntrack_lock);
-	for (i = 0; i < ip_conntrack_htable_size; i++) {
-		list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
-			tmp = tuplehash_to_ctrack(h);
-			if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
-				ct = tmp;
-			cnt++;
-		}
-
-		if (ct || cnt >= IP_CT_EVICTION_RANGE)
-			break;
-
-		hash = (hash + 1) % ip_conntrack_htable_size;
-	}
-	if (ct)
+	h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
+	if (h) {
+		ct = tuplehash_to_ctrack(h);
 		atomic_inc(&ct->ct_general.use);
+	}
 	read_unlock_bh(&ip_conntrack_lock);
 
 	if (!ct)
@@ -598,7 +627,7 @@ static inline int helper_cmp(const struc
 static struct ip_conntrack_helper *
 __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
 {
-	return LIST_FIND(&helpers, helper_cmp,
+	return LIST_FIND(&ve_ip_conntrack_helpers, helper_cmp,
 			 struct ip_conntrack_helper *,
 			 tuple);
 }
@@ -634,7 +663,7 @@ void ip_conntrack_helper_put(struct ip_c
 struct ip_conntrack_protocol *
 __ip_conntrack_proto_find(u_int8_t protocol)
 {
-	return ip_ct_protos[protocol];
+	return ve_ip_ct_protos[protocol];
 }
 
 /* this is guaranteed to always return a valid protocol helper, since
@@ -661,9 +690,10 @@ void ip_conntrack_proto_put(struct ip_co
 }
 
 struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
-					struct ip_conntrack_tuple *repl)
+		struct ip_conntrack_tuple *repl, struct user_beancounter *ub)
 {
 	struct ip_conntrack *conntrack;
+	struct user_beancounter *old_ub;
 
 	if (!ip_conntrack_hash_rnd_initted) {
 		get_random_bytes(&ip_conntrack_hash_rnd, 4);
@@ -671,25 +701,28 @@ struct ip_conntrack *ip_conntrack_alloc(
 	}
 
 	/* We don't want any race condition at early drop stage */
-	atomic_inc(&ip_conntrack_count);
+	atomic_inc(&ve_ip_conntrack_count);
 
-	if (ip_conntrack_max
-	    && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
+	if (ve_ip_conntrack_max
+	    && atomic_read(&ve_ip_conntrack_count) > ve_ip_conntrack_max) {
 		unsigned int hash = hash_conntrack(orig);
-		if (!early_drop(hash)) {
-			atomic_dec(&ip_conntrack_count);
+  		/* Try dropping from this hash chain. */
+		if (!early_drop(&ve_ip_conntrack_hash[hash])) {
+			atomic_dec(&ve_ip_conntrack_count);
 			if (net_ratelimit())
-				printk(KERN_WARNING
-				       "ip_conntrack: table full, dropping"
-				       " packet.\n");
+				ve_printk(VE_LOG_BOTH, KERN_WARNING
+				       "ip_conntrack: CT %d: table full, dropping"
+				       " packet.\n", VEID(get_exec_env()));
 			return ERR_PTR(-ENOMEM);
 		}
 	}
 
+	old_ub = set_exec_ub(ub);
 	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
+	(void)set_exec_ub(old_ub);
 	if (!conntrack) {
 		DEBUGP("Can't allocate conntrack.\n");
-		atomic_dec(&ip_conntrack_count);
+		atomic_dec(&ve_ip_conntrack_count);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -702,6 +735,9 @@ struct ip_conntrack *ip_conntrack_alloc(
 	init_timer(&conntrack->timeout);
 	conntrack->timeout.data = (unsigned long)conntrack;
 	conntrack->timeout.function = death_by_timeout;
+#ifdef CONFIG_VE_IPTABLES
+	conntrack->ct_owner_env = get_exec_env();
+#endif
 
 	return conntrack;
 }
@@ -709,7 +745,7 @@ struct ip_conntrack *ip_conntrack_alloc(
 void
 ip_conntrack_free(struct ip_conntrack *conntrack)
 {
-	atomic_dec(&ip_conntrack_count);
+	atomic_dec(&ve_ip_conntrack_count);
 	kmem_cache_free(ip_conntrack_cachep, conntrack);
 }
 
@@ -723,13 +759,22 @@ init_conntrack(struct ip_conntrack_tuple
 	struct ip_conntrack *conntrack;
 	struct ip_conntrack_tuple repl_tuple;
 	struct ip_conntrack_expect *exp;
+	struct user_beancounter *ub;
 
 	if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
 		DEBUGP("Can't invert tuple.\n");
 		return NULL;
 	}
 
-	conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
+#ifdef CONFIG_USER_RESOURCE
+	if (skb->dev != NULL)  /* received skb */
+		ub = netdev_bc(skb->dev)->exec_ub;
+	else if (skb->sk != NULL) /* sent skb */
+		ub = sock_bc(skb->sk)->ub;
+	else
+#endif
+		ub = NULL;
+	conntrack = ip_conntrack_alloc(tuple, &repl_tuple, ub);
 	if (conntrack == NULL || IS_ERR(conntrack))
 		return (struct ip_conntrack_tuple_hash *)conntrack;
 
@@ -767,7 +812,8 @@ init_conntrack(struct ip_conntrack_tuple
 	}
 
 	/* Overload tuple linked list to put us in unconfirmed list. */
-	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
+	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list,
+			&ve_ip_conntrack_unconfirmed);
 
 	write_unlock_bh(&ip_conntrack_lock);
 
@@ -959,7 +1005,7 @@ void ip_conntrack_unexpect_related(struc
 
 	write_lock_bh(&ip_conntrack_lock);
 	/* choose the the oldest expectation to evict */
-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
 		if (expect_matches(i, exp) && del_timer(&i->timeout)) {
 			ip_ct_unlink_expect(i);
 			write_unlock_bh(&ip_conntrack_lock);
@@ -993,11 +1039,11 @@ void ip_conntrack_expect_put(struct ip_c
 		kmem_cache_free(ip_conntrack_expect_cachep, exp);
 }
 
-static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
+void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
 {
 	atomic_inc(&exp->use);
 	exp->master->expecting++;
-	list_add(&exp->list, &ip_conntrack_expect_list);
+	list_add(&exp->list, &ve_ip_conntrack_expect_list);
 
 	init_timer(&exp->timeout);
 	exp->timeout.data = (unsigned long)exp;
@@ -1009,13 +1055,14 @@ static void ip_conntrack_expect_insert(s
 	atomic_inc(&exp->use);
 	CONNTRACK_STAT_INC(expect_create);
 }
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_insert);
 
 /* Race with expectations being used means we could have none to find; OK. */
 static void evict_oldest_expect(struct ip_conntrack *master)
 {
 	struct ip_conntrack_expect *i;
 
-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
 		if (i->master == master) {
 			if (del_timer(&i->timeout)) {
 				ip_ct_unlink_expect(i);
@@ -1046,7 +1093,7 @@ int ip_conntrack_expect_related(struct i
 	DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
 
 	write_lock_bh(&ip_conntrack_lock);
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
 		if (expect_matches(i, expect)) {
 			/* Refresh timer: if it's dying, ignore.. */
 			if (refresh_timer(i)) {
@@ -1094,18 +1141,48 @@ int ip_conntrack_helper_register(struct 
 {
 	BUG_ON(me->timeout == 0);
 	write_lock_bh(&ip_conntrack_lock);
-	list_prepend(&helpers, me);
+	list_prepend(&ve_ip_conntrack_helpers, me);
 	write_unlock_bh(&ip_conntrack_lock);
 
 	return 0;
 }
 
+int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *me)
+{
+	int ret;
+	struct module *mod = me->me;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct ip_conntrack_helper *tmp;
+		__module_get(mod);
+		ret = -ENOMEM;
+		tmp = kmalloc(sizeof(struct ip_conntrack_helper), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, me, sizeof(struct ip_conntrack_helper));
+		me = tmp;
+	}
+
+	ret = ip_conntrack_helper_register(me);
+	if (ret)
+		goto out;
+
+	return 0;
+out:
+	if (!ve_is_super(get_exec_env())){
+		kfree(me);
+nomem:
+		module_put(mod);
+	}
+	return ret;
+}
+
 struct ip_conntrack_helper *
 __ip_conntrack_helper_find_byname(const char *name)
 {
 	struct ip_conntrack_helper *h;
 
-	list_for_each_entry(h, &helpers, list) {
+	list_for_each_entry(h, &ve_ip_conntrack_helpers, list) {
 		if (!strcmp(h->name, name))
 			return h;
 	}
@@ -1130,19 +1207,20 @@ void ip_conntrack_helper_unregister(stru
 
 	/* Need write lock here, to delete helper. */
 	write_lock_bh(&ip_conntrack_lock);
-	LIST_DELETE(&helpers, me);
+	LIST_DELETE(&ve_ip_conntrack_helpers, me);
 
 	/* Get rid of expectations */
-	list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list, list) {
 		if (exp->master->helper == me && del_timer(&exp->timeout)) {
 			ip_ct_unlink_expect(exp);
 			ip_conntrack_expect_put(exp);
 		}
 	}
 	/* Get rid of expecteds, set helpers to NULL. */
-	LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
+	LIST_FIND_W(&ve_ip_conntrack_unconfirmed, unhelp,
+			struct ip_conntrack_tuple_hash*, me);
 	for (i = 0; i < ip_conntrack_htable_size; i++)
-		LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
+		LIST_FIND_W(&ve_ip_conntrack_hash[i], unhelp,
 			    struct ip_conntrack_tuple_hash *, me);
 	write_unlock_bh(&ip_conntrack_lock);
 
@@ -1150,6 +1228,25 @@ void ip_conntrack_helper_unregister(stru
 	synchronize_net();
 }
 
+void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
+{
+
+	if (!ve_is_super(get_exec_env())) {
+		read_lock_bh(&ip_conntrack_lock);
+		me = list_named_find2(&ve_ip_conntrack_helpers, me->name);
+		read_unlock_bh(&ip_conntrack_lock);
+		if (!me)
+			return;
+	}
+
+	ip_conntrack_helper_unregister(me);
+
+	if (!ve_is_super(get_exec_env())) {
+		module_put(me->me);
+		kfree(me);
+	}
+}
+
 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
 void __ip_ct_refresh_acct(struct ip_conntrack *ct, 
 		        enum ip_conntrack_info ctinfo,
@@ -1286,13 +1383,13 @@ get_next_corpse(int (*iter)(struct ip_co
 
 	write_lock_bh(&ip_conntrack_lock);
 	for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
-		h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
+		h = LIST_FIND_W(&ve_ip_conntrack_hash[*bucket], do_iter,
 				struct ip_conntrack_tuple_hash *, iter, data);
 		if (h)
 			break;
 	}
 	if (!h)
-		h = LIST_FIND_W(&unconfirmed, do_iter,
+		h = LIST_FIND_W(&ve_ip_conntrack_unconfirmed, do_iter,
 				struct ip_conntrack_tuple_hash *, iter, data);
 	if (h)
 		atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
@@ -1329,6 +1426,11 @@ getorigdst(struct sock *sk, int optval, 
 	struct ip_conntrack_tuple_hash *h;
 	struct ip_conntrack_tuple tuple;
 	
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_ip_conntrack)
+		return -ENOPROTOOPT;
+#endif
+
 	IP_CT_TUPLE_U_BLANK(&tuple);
 	tuple.src.ip = inet->rcv_saddr;
 	tuple.src.u.tcp.port = inet->sport;
@@ -1400,12 +1502,17 @@ static void free_conntrack_hash(struct l
 			   get_order(sizeof(struct list_head) * size));
 }
 
+static void ip_conntrack_cache_free(void)
+{
+	kmem_cache_destroy(ip_conntrack_expect_cachep);
+	kmem_cache_destroy(ip_conntrack_cachep);
+	nf_unregister_sockopt(&so_getorigdst);
+}
+
 /* Mishearing the voices in his head, our hero wonders how he's
    supposed to kill the mall. */
 void ip_conntrack_cleanup(void)
 {
-	ip_ct_attach = NULL;
-
 	/* This makes sure all current packets have passed through
            netfilter framework.  Roll on, two-stage module
            delete... */
@@ -1414,19 +1521,32 @@ void ip_conntrack_cleanup(void)
 	ip_ct_event_cache_flush();
  i_see_dead_people:
 	ip_conntrack_flush();
-	if (atomic_read(&ip_conntrack_count) != 0) {
+	if (atomic_read(&ve_ip_conntrack_count) != 0) {
 		schedule();
 		goto i_see_dead_people;
 	}
-	/* wait until all references to ip_conntrack_untracked are dropped */
-	while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
-		schedule();
-
-	kmem_cache_destroy(ip_conntrack_cachep);
-	kmem_cache_destroy(ip_conntrack_expect_cachep);
-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+	if (ve_is_super(get_exec_env())) {
+		/* wait until all references to ip_conntrack_untracked are
+		 * dropped */
+		while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
+			schedule();
+		ip_ct_attach = NULL;
+		ip_conntrack_cache_free();
+	}
+	free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
 			    ip_conntrack_htable_size);
-	nf_unregister_sockopt(&so_getorigdst);
+	ve_ip_conntrack_hash = NULL;		    
+	INIT_LIST_HEAD(&ve_ip_conntrack_unconfirmed);
+	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
+	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
+	atomic_set(&ve_ip_conntrack_count, 0);
+	ve_ip_conntrack_max = 0;
+#ifdef CONFIG_VE_IPTABLES
+	kfree(ve_ip_ct_protos);
+	ve_ip_ct_protos = NULL;
+	kfree(get_exec_env()->_ip_conntrack);
+	get_exec_env()->_ip_conntrack = NULL;
+#endif
 }
 
 static struct list_head *alloc_hashtable(int size, int *vmalloced)
@@ -1435,13 +1555,13 @@ static struct list_head *alloc_hashtable
 	unsigned int i;
 
 	*vmalloced = 0; 
-	hash = (void*)__get_free_pages(GFP_KERNEL, 
+	hash = (void*)__get_free_pages(GFP_KERNEL_UBC,
 				       get_order(sizeof(struct list_head)
 						 * size));
 	if (!hash) { 
 		*vmalloced = 1;
 		printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
-		hash = vmalloc(sizeof(struct list_head) * size);
+		hash = ub_vmalloc(sizeof(struct list_head) * size);
 	}
 
 	if (hash)
@@ -1478,11 +1598,11 @@ static int set_hashsize(const char *val,
 	write_lock_bh(&ip_conntrack_lock);
 
 	/* Don't need to empty the hash table if its not allocated yet */
-	i = (!ip_conntrack_hash) ? ip_conntrack_htable_size : 0;
+	i = (!ve_ip_conntrack_hash) ? ip_conntrack_htable_size : 0;
 
 	for (; i < ip_conntrack_htable_size; i++) {
-		while (!list_empty(&ip_conntrack_hash[i])) {
-			h = list_entry(ip_conntrack_hash[i].next,
+		while (!list_empty(&ve_ip_conntrack_hash[i])) {
+			h = list_entry(ve_ip_conntrack_hash[i].next,
 				       struct ip_conntrack_tuple_hash, list);
 			list_del(&h->list);
 			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
@@ -1490,12 +1610,12 @@ static int set_hashsize(const char *val,
 		}
 	}
 	old_size = ip_conntrack_htable_size;
-	old_vmalloced = ip_conntrack_vmalloc;
-	old_hash = ip_conntrack_hash;
+	old_vmalloced = ve_ip_conntrack_vmalloc;
+	old_hash = ve_ip_conntrack_hash;
 
 	ip_conntrack_htable_size = hashsize;
-	ip_conntrack_vmalloc = vmalloced;
-	ip_conntrack_hash = hash;
+	ve_ip_conntrack_vmalloc = vmalloced;
+	ve_ip_conntrack_hash = hash;
 	ip_conntrack_hash_rnd = rnd;
 	write_unlock_bh(&ip_conntrack_lock);
 	if (old_hash)
@@ -1511,9 +1631,8 @@ s16 (*ip_ct_nat_offset)(const struct ip_
 			u32 seq);
 EXPORT_SYMBOL_GPL(ip_ct_nat_offset);
 
-int __init ip_conntrack_init(void)
+static int ip_conntrack_cache_create(void)
 {
-	unsigned int i;
 	int ret;
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
@@ -1527,72 +1646,131 @@ int __init ip_conntrack_init(void)
 		if (ip_conntrack_htable_size < 16)
 			ip_conntrack_htable_size = 16;
 	}
-	ip_conntrack_max = 8 * ip_conntrack_htable_size;
+	ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
 
 	printk("ip_conntrack version %s (%u buckets, %d max)"
 	       " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
-	       ip_conntrack_htable_size, ip_conntrack_max,
+	       ip_conntrack_htable_size, ve_ip_conntrack_max,
 	       sizeof(struct ip_conntrack));
 
 	ret = nf_register_sockopt(&so_getorigdst);
 	if (ret != 0) {
 		printk(KERN_ERR "Unable to register netfilter socket option\n");
-		return ret;
-	}
-
-	ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
-					    &ip_conntrack_vmalloc);
-	if (!ip_conntrack_hash) {
-		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
-		goto err_unreg_sockopt;
+		goto out_sockopt;
 	}
 
+	ret = -ENOMEM;
 	ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
 	                                        sizeof(struct ip_conntrack), 0,
-	                                        0, NULL, NULL);
+	                                        SLAB_UBC, NULL, NULL);
 	if (!ip_conntrack_cachep) {
 		printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
-		goto err_free_hash;
+		goto err_unreg_sockopt;
 	}
 
 	ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
 					sizeof(struct ip_conntrack_expect),
-					0, 0, NULL, NULL);
+					0, SLAB_UBC, NULL, NULL);
 	if (!ip_conntrack_expect_cachep) {
 		printk(KERN_ERR "Unable to create ip_expect slab cache\n");
 		goto err_free_conntrack_slab;
 	}
 
+	return 0;
+
+err_free_conntrack_slab:
+	kmem_cache_destroy(ip_conntrack_cachep);
+err_unreg_sockopt:
+	nf_unregister_sockopt(&so_getorigdst);
+out_sockopt:
+	return ret;
+}
+
+int ip_conntrack_init(void)
+{
+	struct ve_struct *env;
+	unsigned int i;
+	int ret;
+
+	env = get_exec_env();
+#ifdef CONFIG_VE_IPTABLES
+	ret = -ENOMEM;
+	env->_ip_conntrack =
+		kmalloc(sizeof(struct ve_ip_conntrack), GFP_KERNEL);
+	if (!env->_ip_conntrack)
+		goto out;
+	memset(env->_ip_conntrack, 0, sizeof(struct ve_ip_conntrack));
+	if (ve_is_super(env)) {
+		ret = ip_conntrack_cache_create();
+		if (ret)
+			goto cache_fail;
+	} else
+		ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
+#else /* CONFIG_VE_IPTABLES */
+	ret = ip_conntrack_cache_create();
+	if (ret)
+		goto out;
+#endif
+
+	ret = -ENOMEM;
+	ve_ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
+					    &ve_ip_conntrack_vmalloc);
+	if (!ve_ip_conntrack_hash) {
+		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
+		goto err_free_cache;
+	}
+
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_ct_protos = (struct ip_conntrack_protocol **)
+		ub_kmalloc(sizeof(void *)*MAX_IP_CT_PROTO, GFP_KERNEL);
+	if (!ve_ip_ct_protos)
+		goto err_free_hash;
+#endif
 	/* Don't NEED lock here, but good form anyway. */
 	write_lock_bh(&ip_conntrack_lock);
 	for (i = 0; i < MAX_IP_CT_PROTO; i++)
-		ip_ct_protos[i] = &ip_conntrack_generic_protocol;
+		ve_ip_ct_protos[i] = &ip_conntrack_generic_protocol;
 	/* Sew in builtin protocols. */
-	ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
-	ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
-	ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
+	ve_ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
+	ve_ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
+	ve_ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
 	write_unlock_bh(&ip_conntrack_lock);
 
-	/* For use by ipt_REJECT */
-	ip_ct_attach = ip_conntrack_attach;
 	/* Howto get NAT offsets */
-	ip_ct_nat_offset = NULL;
+	if (ve_is_super(env))
+		ip_ct_nat_offset = NULL;
 
-	/* Set up fake conntrack:
-	    - to never be deleted, not in any hashes */
-	atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
-	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
+	INIT_LIST_HEAD(&ve_ip_conntrack_unconfirmed);
+	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
+	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
+
+	if (ve_is_super(env)) {
+		/* For use by ipt_REJECT */
+		ip_ct_attach = ip_conntrack_attach;
+
+		/* Set up fake conntrack:
+		    - to never be deleted, not in any hashes */
+		atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
+		/*  - and look it like as a confirmed connection */
+		set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
+	}
 
-	return ret;
+	return 0;
 
-err_free_conntrack_slab:
-	kmem_cache_destroy(ip_conntrack_cachep);
+#ifdef CONFIG_VE_IPTABLES
 err_free_hash:
-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+#endif
+	free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
 			    ip_conntrack_htable_size);
-err_unreg_sockopt:
-	nf_unregister_sockopt(&so_getorigdst);
-
-	return -ENOMEM;
+	ve_ip_conntrack_hash = NULL;
+err_free_cache:
+	if (ve_is_super(env))
+		ip_conntrack_cache_free();
+#ifdef CONFIG_VE_IPTABLES
+cache_fail:
+	kfree(env->_ip_conntrack);
+	env->_ip_conntrack = NULL;
+#endif
+out:
+	return ret;
 }
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_ftp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_ftp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_ftp.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_ftp.c	2017-01-13 08:40:23.000000000 -0500
@@ -14,6 +14,7 @@
 #include <linux/ctype.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
@@ -436,8 +437,8 @@ static int help(struct sk_buff **pskb,
 
 	/* Now, NAT might want to mangle the packet, and register the
 	 * (possibly changed) expectation itself. */
-	if (ip_nat_ftp_hook)
-		ret = ip_nat_ftp_hook(pskb, ctinfo, search[dir][i].ftptype,
+	if (ve_ip_nat_ftp_hook)
+		ret = ve_ip_nat_ftp_hook(pskb, ctinfo, search[dir][i].ftptype,
 				      matchoff, matchlen, exp, &seq);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
@@ -464,15 +465,40 @@ static struct ip_conntrack_helper ftp[MA
 static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
 
 /* Not __exit: called from init() */
-static void ip_conntrack_ftp_fini(void)
+void fini_ip_ct_ftp(void)
 {
 	int i;
 	for (i = 0; i < ports_c; i++) {
 		DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
 				ports[i]);
-		ip_conntrack_helper_unregister(&ftp[i]);
+		virt_ip_conntrack_helper_unregister(&ftp[i]);
 	}
 
+}
+
+int init_ip_ct_ftp(void)
+{
+	int i, ret;
+
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("ip_ct_ftp: registering helper for port %d\n",
+				ports[i]);
+		ret = virt_ip_conntrack_helper_register(&ftp[i]);
+		if (ret) {
+			fini_ip_ct_ftp();
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/* Not __exit: called from init() */
+static void ip_conntrack_ftp_fini(void)
+{
+	KSYMMODUNRESOLVE(ip_conntrack_ftp);
+	KSYMUNRESOLVE(init_ip_ct_ftp);
+	KSYMUNRESOLVE(fini_ip_ct_ftp);
+	fini_ip_ct_ftp();
 	kfree(ftp_buffer);
 }
 
@@ -507,13 +533,17 @@ static int __init ip_conntrack_ftp_init(
 
 		DEBUGP("ip_ct_ftp: registering helper for port %d\n", 
 				ports[i]);
-		ret = ip_conntrack_helper_register(&ftp[i]);
+		ret = virt_ip_conntrack_helper_register(&ftp[i]);
 
 		if (ret) {
 			ip_conntrack_ftp_fini();
 			return ret;
 		}
 	}
+
+	KSYMRESOLVE(init_ip_ct_ftp);
+	KSYMRESOLVE(fini_ip_ct_ftp);
+	KSYMMODRESOLVE(ip_conntrack_ftp);
 	return 0;
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c	2017-01-13 08:40:15.000000000 -0500
@@ -518,7 +518,7 @@ int decode_seq(bitstr_t * bs, field_t * 
 			CHECK_BOUND(bs, 2);
 			len = get_len(bs);
 			CHECK_BOUND(bs, len);
-			if (!base) {
+			if (!base || !(son->attr & DECODE)) {
 				PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
 				      " ", son->name);
 				bs->cur += len;
@@ -704,6 +704,8 @@ int decode_choice(bitstr_t * bs, field_t
 	} else {
 		ext = 0;
 		type = get_bits(bs, f->sz);
+		if (type >= f->lb)
+			return H323_ERROR_RANGE;
 	}
 
 	/* Write Type */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_irc.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_irc.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_irc.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_irc.c	2017-01-13 08:40:23.000000000 -0500
@@ -27,6 +27,7 @@
 #include <linux/ip.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
@@ -243,6 +244,33 @@ static char irc_names[MAX_PORTS][sizeof(
 
 static void ip_conntrack_irc_fini(void);
 
+void fini_ip_ct_irc(void)
+{
+	int i;
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("unregistering port %d\n",
+		       ports[i]);
+		virt_ip_conntrack_helper_unregister(&irc_helpers[i]);
+	}
+}
+
+int init_ip_ct_irc(void)
+{
+	int i, ret;
+
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("port #%d: %d\n", i, ports[i]);
+		ret = virt_ip_conntrack_helper_register(&irc_helpers[i]);
+		if (ret) {
+			printk("ip_conntrack_irc: ERROR registering port %d\n",
+				ports[i]);
+			fini_ip_ct_irc();
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
 static int __init ip_conntrack_irc_init(void)
 {
 	int i, ret;
@@ -282,7 +310,7 @@ static int __init ip_conntrack_irc_init(
 
 		DEBUGP("port #%d: %d\n", i, ports[i]);
 
-		ret = ip_conntrack_helper_register(hlpr);
+		ret = virt_ip_conntrack_helper_register(hlpr);
 
 		if (ret) {
 			printk("ip_conntrack_irc: ERROR registering port %d\n",
@@ -291,6 +319,10 @@ static int __init ip_conntrack_irc_init(
 			return -EBUSY;
 		}
 	}
+
+	KSYMRESOLVE(init_ip_ct_irc);
+	KSYMRESOLVE(fini_ip_ct_irc);
+	KSYMMODRESOLVE(ip_conntrack_irc);
 	return 0;
 }
 
@@ -298,12 +330,10 @@ static int __init ip_conntrack_irc_init(
  * it is needed by the init function */
 static void ip_conntrack_irc_fini(void)
 {
-	int i;
-	for (i = 0; i < ports_c; i++) {
-		DEBUGP("unregistering port %d\n",
-		       ports[i]);
-		ip_conntrack_helper_unregister(&irc_helpers[i]);
-	}
+	KSYMMODUNRESOLVE(ip_conntrack_irc);
+	KSYMUNRESOLVE(init_ip_ct_irc);
+	KSYMUNRESOLVE(fini_ip_ct_irc);
+	fini_ip_ct_irc();
 	kfree(irc_buffer);
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_netlink.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_netlink.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_netlink.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_netlink.c	2017-01-13 08:40:23.000000000 -0500
@@ -29,6 +29,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
+#include <net/sock.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
@@ -39,6 +40,8 @@
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
+#include <ub/beancounter.h>
+#include <ub/ub_sk.h>
 
 MODULE_LICENSE("GPL");
 
@@ -418,7 +421,7 @@ ctnetlink_dump_table(struct sk_buff *skb
 	last = (struct ip_conntrack *)cb->args[1];
 	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) {
 restart:
-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
+		list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
 			h = (struct ip_conntrack_tuple_hash *) i;
 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
@@ -465,7 +468,7 @@ ctnetlink_dump_table_w(struct sk_buff *s
 
 	write_lock_bh(&ip_conntrack_lock);
 	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
+		list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
 			h = (struct ip_conntrack_tuple_hash *) i;
 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
@@ -716,6 +719,9 @@ ctnetlink_del_conntrack(struct sock *ctn
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
+	if (!ve_ip_ct_initialized())
+		return -ENOPROTOOPT;
+
 	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
 		return -EINVAL;
 
@@ -768,6 +774,9 @@ ctnetlink_get_conntrack(struct sock *ctn
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
+	if (!ve_ip_ct_initialized())
+		return -ENOPROTOOPT;
+
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct nfgenmsg *msg = NLMSG_DATA(nlh);
 		u32 rlen;
@@ -873,6 +882,9 @@ ctnetlink_change_status(struct ip_conntr
 #else
 		struct ip_nat_range range;
 
+		if (!ve_ip_nat_initialized())
+			return -ENOPROTOOPT;
+
 		if (cda[CTA_NAT_DST-1]) {
 			if (ctnetlink_parse_nat(cda[CTA_NAT_DST-1], ct,
 						&range) < 0)
@@ -1019,14 +1031,15 @@ ctnetlink_change_conntrack(struct ip_con
 static int
 ctnetlink_create_conntrack(struct nfattr *cda[], 
 			   struct ip_conntrack_tuple *otuple,
-			   struct ip_conntrack_tuple *rtuple)
+			   struct ip_conntrack_tuple *rtuple,
+			   struct user_beancounter *ub)
 {
 	struct ip_conntrack *ct;
 	int err = -EINVAL;
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
-	ct = ip_conntrack_alloc(otuple, rtuple);
+	ct = ip_conntrack_alloc(otuple, rtuple, ub);
 	if (ct == NULL || IS_ERR(ct))
 		return -ENOMEM;	
 
@@ -1078,6 +1091,9 @@ ctnetlink_new_conntrack(struct sock *ctn
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
+	if (!ve_ip_ct_initialized())
+		return -ENOPROTOOPT;
+
 	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
 		return -EINVAL;
 
@@ -1103,8 +1119,16 @@ ctnetlink_new_conntrack(struct sock *ctn
 		write_unlock_bh(&ip_conntrack_lock);
 		DEBUGP("no such conntrack, create new\n");
 		err = -ENOENT;
-		if (nlh->nlmsg_flags & NLM_F_CREATE)
-			err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
+		if (nlh->nlmsg_flags & NLM_F_CREATE) {
+#ifdef CONFIG_USER_RESOURCE
+			if (skb->sk)
+				err = ctnetlink_create_conntrack(cda, &otuple,
+						&rtuple, sock_bc(skb->sk)->ub);
+			else
+#endif
+				err = ctnetlink_create_conntrack(cda,
+						&otuple, &rtuple, NULL);
+		}
 		return err;
 	}
 	/* implicit 'else' */
@@ -1292,7 +1316,7 @@ ctnetlink_exp_dump_table(struct sk_buff 
 	DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
 
 	read_lock_bh(&ip_conntrack_lock);
-	list_for_each_prev(i, &ip_conntrack_expect_list) {
+	list_for_each_prev(i, &ve_ip_conntrack_expect_list) {
 		exp = (struct ip_conntrack_expect *) i;
 		if (exp->id <= *id)
 			continue;
@@ -1327,6 +1351,9 @@ ctnetlink_get_expect(struct sock *ctnl, 
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
+	if (!ve_ip_ct_initialized())
+		return -ENOPROTOOPT;
+
 	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
 		return -EINVAL;
 
@@ -1400,6 +1427,9 @@ ctnetlink_del_expect(struct sock *ctnl, 
 	struct ip_conntrack_helper *h;
 	int err;
 
+	if (!ve_ip_ct_initialized())
+		return -ENOPROTOOPT;
+
 	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
 		return -EINVAL;
 
@@ -1438,7 +1468,7 @@ ctnetlink_del_expect(struct sock *ctnl, 
 			write_unlock_bh(&ip_conntrack_lock);
 			return -EINVAL;
 		}
-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
+		list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
 					 list) {
 			if (exp->master->helper == h 
 			    && del_timer(&exp->timeout)) {
@@ -1450,7 +1480,7 @@ ctnetlink_del_expect(struct sock *ctnl, 
 	} else {
 		/* This basically means we have to flush everything*/
 		write_lock_bh(&ip_conntrack_lock);
-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
+		list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
 					 list) {
 			if (del_timer(&exp->timeout)) {
 				ip_ct_unlink_expect(exp);
@@ -1532,6 +1562,9 @@ ctnetlink_new_expect(struct sock *ctnl, 
 
 	DEBUGP("entered %s\n", __FUNCTION__);	
 
+	if (!ve_ip_ct_initialized())
+		return -ENOPROTOOPT;
+
 	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
 		return -EINVAL;
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_proto_generic.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2017-01-13 08:40:23.000000000 -0500
@@ -52,7 +52,7 @@ static int packet(struct ip_conntrack *c
 		  const struct sk_buff *skb,
 		  enum ip_conntrack_info ctinfo)
 {
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
+	ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_generic_timeout);
 	return NF_ACCEPT;
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2017-01-13 08:40:23.000000000 -0500
@@ -104,7 +104,7 @@ static int icmp_packet(struct ip_conntra
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
 		ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
-		ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
+		ip_ct_refresh_acct(ct, ctinfo, skb, ve_ip_ct_icmp_timeout);
 	}
 
 	return NF_ACCEPT;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2017-01-13 08:40:23.000000000 -0500
@@ -94,7 +94,7 @@ unsigned int ip_ct_tcp_timeout_close =  
    to ~13-30min depending on RTO. */
 unsigned int ip_ct_tcp_timeout_max_retrans =     5 MINS;
  
-static const unsigned int * tcp_timeouts[]
+const unsigned int * tcp_timeouts[]
 = { NULL,                              /*      TCP_CONNTRACK_NONE */
     &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
     &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
@@ -761,7 +761,7 @@ static int tcp_in_window(struct ip_connt
 	} else {
 		res = 0;
 		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
-		    ip_ct_tcp_be_liberal)
+		    ve_ip_ct_tcp_be_liberal)
 			res = 1;
 		if (!res && LOG_INVALID(IPPROTO_TCP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
@@ -1041,9 +1041,11 @@ static int tcp_packet(struct ip_conntrac
 	    && (new_state == TCP_CONNTRACK_FIN_WAIT
 	    	|| new_state == TCP_CONNTRACK_CLOSE))
 		conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
-	timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
-		  && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
-		  ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
+	timeout = conntrack->proto.tcp.retrans >= ve_ip_ct_tcp_max_retrans &&
+		ve_ip_ct_tcp_timeouts[new_state] >
+					ve_ip_ct_tcp_timeout_max_retrans
+		? ve_ip_ct_tcp_timeout_max_retrans :
+					ve_ip_ct_tcp_timeouts[new_state];
 	write_unlock_bh(&tcp_lock);
 
 	ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
@@ -1116,7 +1118,7 @@ static int tcp_new(struct ip_conntrack *
 
 		tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
 		conntrack->proto.tcp.seen[1].flags = 0;
-	} else if (ip_ct_tcp_loose == 0) {
+	} else if (ve_ip_ct_tcp_loose == 0) {
 		/* Don't try to pick up connections. */
 		return 0;
 	} else {
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_proto_udp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2017-01-13 08:40:23.000000000 -0500
@@ -71,12 +71,12 @@ static int udp_packet(struct ip_conntrac
 	   stream.  Extend timeout. */
 	if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
 		ip_ct_refresh_acct(conntrack, ctinfo, skb, 
-				   ip_ct_udp_timeout_stream);
+				   ve_ip_ct_udp_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
 			ip_conntrack_event_cache(IPCT_STATUS, skb);
 	} else
-		ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
+		ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_udp_timeout);
 
 	return NF_ACCEPT;
 }
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_standalone.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_standalone.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_conntrack_standalone.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_conntrack_standalone.c	2017-01-13 08:40:23.000000000 -0500
@@ -27,6 +27,7 @@
 #include <net/checksum.h>
 #include <net/ip.h>
 #include <net/route.h>
+#include <linux/nfcalls.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -46,9 +47,31 @@
 
 MODULE_LICENSE("GPL");
 
+int ip_conntrack_disable_ve0 = 0;
+module_param(ip_conntrack_disable_ve0, int, 0440);
+
 extern atomic_t ip_conntrack_count;
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_conntrack_count \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_count)
+#else
+#define ve_ip_conntrack_count	ip_conntrack_count
+#endif
 DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
 
+/* Prior to 2.6.15, we had a ip_conntrack_enable_ve0 param. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+	printk(KERN_INFO KBUILD_MODNAME
+	       ": parameter ip_conntrack_enable_ve0 is obsoleted. In ovzkernel"
+	       " >= 2.6.15 connection tracking on hardware node is enabled by "
+	       "default, use ip_conntrack_disable_ve0=1 parameter to "
+	       "disable.\n");
+	return 0;
+}
+module_param_call(ip_conntrack_enable_ve0, warn_set, NULL, NULL, 0);
+
 static int kill_proto(struct ip_conntrack *i, void *data)
 {
 	return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == 
@@ -89,8 +112,8 @@ static struct list_head *ct_get_first(st
 	for (st->bucket = 0;
 	     st->bucket < ip_conntrack_htable_size;
 	     st->bucket++) {
-		if (!list_empty(&ip_conntrack_hash[st->bucket]))
-			return ip_conntrack_hash[st->bucket].next;
+		if (!list_empty(&ve_ip_conntrack_hash[st->bucket]))
+			return ve_ip_conntrack_hash[st->bucket].next;
 	}
 	return NULL;
 }
@@ -100,10 +123,10 @@ static struct list_head *ct_get_next(str
 	struct ct_iter_state *st = seq->private;
 
 	head = head->next;
-	while (head == &ip_conntrack_hash[st->bucket]) {
+	while (head == &ve_ip_conntrack_hash[st->bucket]) {
 		if (++st->bucket >= ip_conntrack_htable_size)
 			return NULL;
-		head = ip_conntrack_hash[st->bucket].next;
+		head = ve_ip_conntrack_hash[st->bucket].next;
 	}
 	return head;
 }
@@ -239,7 +262,7 @@ static struct file_operations ct_file_op
 /* expects */
 static void *exp_seq_start(struct seq_file *s, loff_t *pos)
 {
-	struct list_head *e = &ip_conntrack_expect_list;
+	struct list_head *e = &ve_ip_conntrack_expect_list;
 	loff_t i;
 
 	/* strange seq_file api calls stop even if we fail,
@@ -251,7 +274,7 @@ static void *exp_seq_start(struct seq_fi
 
 	for (i = 0; i <= *pos; i++) {
 		e = e->next;
-		if (e == &ip_conntrack_expect_list)
+		if (e == &ve_ip_conntrack_expect_list)
 			return NULL;
 	}
 	return e;
@@ -264,7 +287,7 @@ static void *exp_seq_next(struct seq_fil
 	++*pos;
 	e = e->next;
 
-	if (e == &ip_conntrack_expect_list)
+	if (e == &ve_ip_conntrack_expect_list)
 		return NULL;
 
 	return e;
@@ -349,7 +372,7 @@ static void ct_cpu_seq_stop(struct seq_f
 
 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 {
-	unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
+	unsigned int nr_conntracks = atomic_read(&ve_ip_conntrack_count);
 	struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
@@ -559,6 +582,28 @@ int ip_conntrack_checksum = 1;
 
 /* From ip_conntrack_core.c */
 extern int ip_conntrack_max;
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_conntrack_max \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_max)
+#define ve_ip_ct_sysctl_header \
+	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_header)
+#define ve_ip_ct_net_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_net_table)
+#define ve_ip_ct_ipv4_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_ipv4_table)
+#define ve_ip_ct_netfilter_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_netfilter_table)
+#define ve_ip_ct_sysctl_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_table)
+#else
+#define ve_ip_conntrack_max		ip_conntrack_max
+static struct ctl_table_header *ip_ct_sysctl_header;
+#define ve_ip_ct_sysctl_header		ip_ct_sysctl_header
+#define ve_ip_ct_net_table		ip_ct_net_table
+#define ve_ip_ct_ipv4_table		ip_ct_ipv4_table
+#define ve_ip_ct_netfilter_table	ip_ct_netfilter_table
+#define ve_ip_ct_sysctl_table		ip_ct_sysctl_table
+#endif
 extern unsigned int ip_conntrack_htable_size;
 
 /* From ip_conntrack_proto_tcp.c */
@@ -589,8 +634,6 @@ extern unsigned int ip_ct_generic_timeou
 static int log_invalid_proto_min = 0;
 static int log_invalid_proto_max = 255;
 
-static struct ctl_table_header *ip_ct_sysctl_header;
-
 static ctl_table ip_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_NF_CONNTRACK_MAX,
@@ -807,6 +850,84 @@ static ctl_table ip_ct_net_table[] = {
 };
 
 EXPORT_SYMBOL(ip_ct_log_invalid);
+
+#ifdef CONFIG_VE_IPTABLES
+static void ip_conntrack_sysctl_cleanup(void)
+{
+	if (!ve_is_super(get_exec_env()))
+		free_sysctl_clone(ve_ip_ct_net_table);
+
+	ve_ip_ct_net_table = NULL;
+	ve_ip_ct_ipv4_table = NULL;
+	ve_ip_ct_netfilter_table = NULL;
+	ve_ip_ct_sysctl_table = NULL;
+}
+
+static int ip_conntrack_sysctl_init(void)
+{
+	if (ve_is_super(get_exec_env())) {
+		ve_ip_ct_net_table = ip_ct_net_table;
+		ve_ip_ct_ipv4_table = ip_ct_ipv4_table;
+		ve_ip_ct_netfilter_table = ip_ct_netfilter_table;
+		ve_ip_ct_sysctl_table = ip_ct_sysctl_table;
+	} else {
+		ve_ip_ct_net_table = clone_sysctl_template(ip_ct_net_table);
+		if (ve_ip_ct_net_table == NULL)
+			return -ENOMEM;
+
+		ve_ip_ct_ipv4_table = ve_ip_ct_net_table[0].child;
+		ve_ip_ct_netfilter_table = ve_ip_ct_ipv4_table[0].child;
+		ve_ip_ct_sysctl_table = ve_ip_ct_netfilter_table[0].child;
+		/* make ip_conntrack_checksum read only inside container */
+		ve_ip_ct_sysctl_table[3].mode = 0444;
+	}
+
+	ve_ip_ct_sysctl_table[0].data = &ve_ip_conntrack_max;
+	ve_ip_ct_netfilter_table[1].data = &ve_ip_conntrack_max;
+	ve_ip_ct_sysctl_table[1].data = &ve_ip_conntrack_count;
+	/* skip ve_ip_ct_sysctl_table[2].data as it is read-only and common
+	 * for all environments */
+	ve_ip_ct_tcp_timeouts[1] = ip_ct_tcp_timeout_syn_sent;
+	BUG_ON(ve_ip_ct_sysctl_table[4].ctl_name
+			!= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT);
+	ve_ip_ct_sysctl_table[4].data = &ve_ip_ct_tcp_timeouts[1];
+	ve_ip_ct_tcp_timeouts[2] = ip_ct_tcp_timeout_syn_recv;
+	ve_ip_ct_sysctl_table[5].data = &ve_ip_ct_tcp_timeouts[2];
+	ve_ip_ct_tcp_timeouts[3] = ip_ct_tcp_timeout_established;
+	ve_ip_ct_sysctl_table[6].data = &ve_ip_ct_tcp_timeouts[3];
+	ve_ip_ct_tcp_timeouts[4] = ip_ct_tcp_timeout_fin_wait;
+	ve_ip_ct_sysctl_table[7].data = &ve_ip_ct_tcp_timeouts[4];
+	ve_ip_ct_tcp_timeouts[5] = ip_ct_tcp_timeout_close_wait;
+	ve_ip_ct_sysctl_table[8].data = &ve_ip_ct_tcp_timeouts[5];
+	ve_ip_ct_tcp_timeouts[6] = ip_ct_tcp_timeout_last_ack;
+	ve_ip_ct_sysctl_table[9].data = &ve_ip_ct_tcp_timeouts[6];
+	ve_ip_ct_tcp_timeouts[7] = ip_ct_tcp_timeout_time_wait;
+	ve_ip_ct_sysctl_table[10].data = &ve_ip_ct_tcp_timeouts[7];
+	ve_ip_ct_tcp_timeouts[8] = ip_ct_tcp_timeout_close;
+	ve_ip_ct_sysctl_table[11].data = &ve_ip_ct_tcp_timeouts[8];
+	ve_ip_ct_udp_timeout = ip_ct_udp_timeout;
+	ve_ip_ct_sysctl_table[12].data = &ve_ip_ct_udp_timeout;
+	ve_ip_ct_udp_timeout_stream = ip_ct_udp_timeout_stream;
+	ve_ip_ct_sysctl_table[13].data = &ve_ip_ct_udp_timeout_stream;
+	ve_ip_ct_icmp_timeout = ip_ct_icmp_timeout;
+	ve_ip_ct_sysctl_table[14].data = &ve_ip_ct_icmp_timeout;
+	ve_ip_ct_generic_timeout = ip_ct_generic_timeout;
+	ve_ip_ct_sysctl_table[15].data = &ve_ip_ct_generic_timeout;
+	ve_ip_ct_log_invalid = ip_ct_log_invalid;
+	ve_ip_ct_sysctl_table[16].data = &ve_ip_ct_log_invalid;
+	ve_ip_ct_tcp_timeout_max_retrans = ip_ct_tcp_timeout_max_retrans;
+	ve_ip_ct_sysctl_table[17].data = &ve_ip_ct_tcp_timeout_max_retrans;
+	ve_ip_ct_tcp_loose = ip_ct_tcp_loose;
+	ve_ip_ct_sysctl_table[18].data = &ve_ip_ct_tcp_loose;
+	ve_ip_ct_tcp_be_liberal = ip_ct_tcp_be_liberal;
+	ve_ip_ct_sysctl_table[19].data = &ve_ip_ct_tcp_be_liberal;
+	ve_ip_ct_tcp_max_retrans = ip_ct_tcp_max_retrans;
+	BUG_ON(ve_ip_ct_sysctl_table[20].ctl_name
+			!= NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS);
+	ve_ip_ct_sysctl_table[20].data = &ve_ip_ct_tcp_max_retrans;
+	return 0;
+}
+#endif /*CONFIG_VE*/
 #endif /* CONFIG_SYSCTL */
 
 /* FIXME: Allow NULL functions and sub in pointers to generic for
@@ -816,11 +937,11 @@ int ip_conntrack_protocol_register(struc
 	int ret = 0;
 
 	write_lock_bh(&ip_conntrack_lock);
-	if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
+	if (ve_ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
 		ret = -EBUSY;
 		goto out;
 	}
-	ip_ct_protos[proto->proto] = proto;
+	ve_ip_ct_protos[proto->proto] = proto;
  out:
 	write_unlock_bh(&ip_conntrack_lock);
 	return ret;
@@ -829,7 +950,7 @@ int ip_conntrack_protocol_register(struc
 void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
 {
 	write_lock_bh(&ip_conntrack_lock);
-	ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
+	ve_ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
 	write_unlock_bh(&ip_conntrack_lock);
 
 	/* Somebody could be still looking at the proto in bh. */
@@ -839,16 +960,22 @@ void ip_conntrack_protocol_unregister(st
 	ip_ct_iterate_cleanup(kill_proto, &proto->proto);
 }
 
-static int __init ip_conntrack_standalone_init(void)
+int init_iptable_conntrack(void)
 {
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *proc, *proc_exp, *proc_stat;
 #endif
 	int ret = 0;
 
+	if (!ve_is_super(get_exec_env()))
+		__module_get(THIS_MODULE);
+
 	ret = ip_conntrack_init();
 	if (ret < 0)
-		return ret;
+		goto cleanup_unget;
+
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		return 0;
 
 #ifdef CONFIG_PROC_FS
 	ret = -ENOMEM;
@@ -859,12 +986,14 @@ static int __init ip_conntrack_standalon
 					&exp_file_ops);
 	if (!proc_exp) goto cleanup_proc;
 
-	proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
-	if (!proc_stat)
-		goto cleanup_proc_exp;
+	if (ve_is_super(get_exec_env())) {
+		proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
+		if (!proc_stat)
+			goto cleanup_proc_exp;
 
-	proc_stat->proc_fops = &ct_cpu_seq_fops;
-	proc_stat->owner = THIS_MODULE;
+		proc_stat->proc_fops = &ct_cpu_seq_fops;
+		proc_stat->owner = THIS_MODULE;
+	}
 #endif
 
 	ret = nf_register_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
@@ -873,22 +1002,32 @@ static int __init ip_conntrack_standalon
 		goto cleanup_proc_stat;
 	}
 #ifdef CONFIG_SYSCTL
-	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
-	if (ip_ct_sysctl_header == NULL) {
+#ifdef CONFIG_VE_IPTABLES
+	ret = ip_conntrack_sysctl_init();
+	if (ret < 0)
+		goto cleanup_sysctl;
+#endif
+	ret = -ENOMEM;
+	ve_ip_ct_sysctl_header = register_sysctl_table(ve_ip_ct_net_table, 0);
+	if (ve_ip_ct_sysctl_header == NULL) {
 		printk("ip_conntrack: can't register to sysctl.\n");
-		ret = -ENOMEM;
-		goto cleanup_hooks;
+		goto cleanup_sysctl2;
 	}
 #endif
-	return ret;
+	return 0;
 
 #ifdef CONFIG_SYSCTL
- cleanup_hooks:
+ cleanup_sysctl2:
+#ifdef CONFIG_VE_IPTABLES
+	ip_conntrack_sysctl_cleanup();
+ cleanup_sysctl:
+#endif
 	nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
 #endif
  cleanup_proc_stat:
 #ifdef CONFIG_PROC_FS
-	remove_proc_entry("ip_conntrack", proc_net_stat);
+	if (ve_is_super(get_exec_env()))
+		remove_proc_entry("ip_conntrack", proc_net_stat);
  cleanup_proc_exp:
 	proc_net_remove("ip_conntrack_expect");
  cleanup_proc:
@@ -896,25 +1035,59 @@ static int __init ip_conntrack_standalon
  cleanup_init:
 #endif /* CONFIG_PROC_FS */
 	ip_conntrack_cleanup();
+ cleanup_unget:
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
 	return ret;
 }
 
-static void __exit ip_conntrack_standalone_fini(void)
+void fini_iptable_conntrack(void)
 {
 	synchronize_net();
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		goto cleanup;
 #ifdef CONFIG_SYSCTL
-	unregister_sysctl_table(ip_ct_sysctl_header);
+ 	unregister_sysctl_table(ve_ip_ct_sysctl_header);
+#ifdef CONFIG_VE_IPTABLES
+	ip_conntrack_sysctl_cleanup();
+#endif
 #endif
 	nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
 #ifdef CONFIG_PROC_FS
-	remove_proc_entry("ip_conntrack", proc_net_stat);
+	if (ve_is_super(get_exec_env()))
+		remove_proc_entry("ip_conntrack", proc_net_stat);
 	proc_net_remove("ip_conntrack_expect");
 	proc_net_remove("ip_conntrack");
 #endif /* CONFIG_PROC_FS */
+cleanup:
 	ip_conntrack_cleanup();
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
+}
+
+static int __init ip_conntrack_standalone_init(void)
+{
+	int err;
+
+	err = init_iptable_conntrack();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_conntrack);
+	KSYMRESOLVE(fini_iptable_conntrack);
+	KSYMMODRESOLVE(ip_conntrack);
+	return 0;
 }
 
-module_init(ip_conntrack_standalone_init);
+static void __exit ip_conntrack_standalone_fini(void)
+{
+	KSYMMODUNRESOLVE(ip_conntrack);
+	KSYMUNRESOLVE(init_iptable_conntrack);
+	KSYMUNRESOLVE(fini_iptable_conntrack);
+	fini_iptable_conntrack();
+}
+
+subsys_initcall(ip_conntrack_standalone_init);
 module_exit(ip_conntrack_standalone_fini);
 
 /* Some modules need us, but don't depend directly on any symbol.
@@ -931,16 +1104,21 @@ EXPORT_SYMBOL_GPL(ip_conntrack_unregiste
 EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
 EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
 #endif
+EXPORT_SYMBOL(ip_conntrack_disable_ve0);
 EXPORT_SYMBOL(ip_conntrack_protocol_register);
 EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
 EXPORT_SYMBOL(ip_ct_get_tuple);
 EXPORT_SYMBOL(ip_ct_get_tuplepr);
 EXPORT_SYMBOL(invert_tuplepr);
 EXPORT_SYMBOL(ip_conntrack_alter_reply);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL(ip_conntrack_destroyed);
+#endif
 EXPORT_SYMBOL(need_conntrack);
 EXPORT_SYMBOL(ip_conntrack_helper_register);
 EXPORT_SYMBOL(ip_conntrack_helper_unregister);
+EXPORT_SYMBOL(virt_ip_conntrack_helper_register);
+EXPORT_SYMBOL(virt_ip_conntrack_helper_unregister);
 EXPORT_SYMBOL(ip_ct_iterate_cleanup);
 EXPORT_SYMBOL(__ip_ct_refresh_acct);
 
@@ -950,14 +1128,18 @@ EXPORT_SYMBOL_GPL(__ip_conntrack_expect_
 EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
 EXPORT_SYMBOL(ip_conntrack_expect_related);
 EXPORT_SYMBOL(ip_conntrack_unexpect_related);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
+#endif
 EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
 
 EXPORT_SYMBOL(ip_conntrack_tuple_taken);
 EXPORT_SYMBOL(ip_ct_gather_frags);
 EXPORT_SYMBOL(ip_conntrack_htable_size);
 EXPORT_SYMBOL(ip_conntrack_lock);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL(ip_conntrack_hash);
+#endif
 EXPORT_SYMBOL(ip_conntrack_untracked);
 EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
 #ifdef CONFIG_IP_NF_NAT_NEEDED
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_core.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_core.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_core.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_core.c	2017-01-13 08:40:40.000000000 -0500
@@ -21,6 +21,8 @@
 #include <linux/icmp.h>
 #include <linux/udp.h>
 #include <linux/jhash.h>
+#include <linux/nfcalls.h>
+#include <ub/ub_mem.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -46,15 +48,24 @@ DEFINE_RWLOCK(ip_nat_lock);
 /* Calculated at init based on memory size */
 static unsigned int ip_nat_htable_size;
 
-static struct list_head *bysource;
-
 #define MAX_IP_NAT_PROTO 256
+
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_nat_bysource	\
+	(get_exec_env()->_ip_conntrack->_ip_nat_bysource)
+#define ve_ip_nat_protos	\
+	(get_exec_env()->_ip_conntrack->_ip_nat_protos)
+#else
+static struct list_head *bysource;
+#define ve_ip_nat_bysource	bysource
 static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+#define ve_ip_nat_protos	ip_nat_protos
+#endif
 
 static inline struct ip_nat_protocol *
 __ip_nat_proto_find(u_int8_t protonum)
 {
-	return ip_nat_protos[protonum];
+	return ve_ip_nat_protos[protonum];
 }
 
 struct ip_nat_protocol *
@@ -85,9 +96,12 @@ EXPORT_SYMBOL_GPL(ip_nat_proto_put);
 static inline unsigned int
 hash_by_src(const struct ip_conntrack_tuple *tuple)
 {
+	unsigned int hash;
+
 	/* Original src, to ensure we map it consistently if poss. */
-	return jhash_3words(tuple->src.ip, tuple->src.u.all,
-			    tuple->dst.protonum, 0) % ip_nat_htable_size;
+	hash = jhash_3words(tuple->src.ip, tuple->src.u.all,
+			    tuple->dst.protonum, 0);
+	return ((u64)hash * ip_nat_htable_size) >> 32;
 }
 
 /* Noone using conntrack by the time this called. */
@@ -101,18 +115,6 @@ static void ip_nat_cleanup_conntrack(str
 	write_unlock_bh(&ip_nat_lock);
 }
 
-/* We do checksum mangling, so if they were wrong before they're still
- * wrong.  Also works for incomplete packets (eg. ICMP dest
- * unreachables.) */
-u_int16_t
-ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
-	u_int32_t diffs[] = { oldvalinv, newval };
-	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
-				      oldcheck^0xFFFF));
-}
-EXPORT_SYMBOL(ip_nat_cheat_check);
-
 /* Is this tuple already taken? (not by us) */
 int
 ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
@@ -177,7 +179,7 @@ find_appropriate_src(const struct ip_con
 	struct ip_conntrack *ct;
 
 	read_lock_bh(&ip_nat_lock);
-	list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
+	list_for_each_entry(ct, &ve_ip_nat_bysource[h], nat.info.bysource) {
 		if (same_src(ct, tuple)) {
 			/* Copy source part from reply tuple. */
 			invert_tuplepr(result,
@@ -234,7 +236,8 @@ find_best_ips_proto(struct ip_conntrack_
 	minip = ntohl(range->min_ip);
 	maxip = ntohl(range->max_ip);
 	j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0);
-	*var_ipp = htonl(minip + j % (maxip - minip + 1));
+	j = ((u64)j * (maxip - minip + 1)) >> 32;
+	*var_ipp = htonl(minip + j);
 }
 
 /* Manipulate the tuple into the range given.  For NF_IP_POST_ROUTING,
@@ -299,13 +302,22 @@ get_unique_tuple(struct ip_conntrack_tup
 	ip_nat_proto_put(proto);
 }
 
+void ip_nat_hash_conntrack(struct ip_conntrack *conntrack)
+{
+	unsigned int srchash
+		= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	write_lock_bh(&ip_nat_lock);
+	list_add(&conntrack->nat.info.bysource, &ve_ip_nat_bysource[srchash]);
+	write_unlock_bh(&ip_nat_lock);
+}
+EXPORT_SYMBOL_GPL(ip_nat_hash_conntrack);
+
 unsigned int
 ip_nat_setup_info(struct ip_conntrack *conntrack,
 		  const struct ip_nat_range *range,
 		  unsigned int hooknum)
 {
 	struct ip_conntrack_tuple curr_tuple, new_tuple;
-	struct ip_nat_info *info = &conntrack->nat.info;
 	int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
 	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
 
@@ -340,14 +352,8 @@ ip_nat_setup_info(struct ip_conntrack *c
 	}
 
 	/* Place in source hash if this is the first time. */
-	if (have_to_hash) {
-		unsigned int srchash
-			= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				      .tuple);
-		write_lock_bh(&ip_nat_lock);
-		list_add(&info->bysource, &bysource[srchash]);
-		write_unlock_bh(&ip_nat_lock);
-	}
+	if (have_to_hash)
+		ip_nat_hash_conntrack(conntrack);
 
 	/* It's done. */
 	if (maniptype == IP_NAT_MANIP_DST)
@@ -386,12 +392,12 @@ manip_pkt(u_int16_t proto,
 	iph = (void *)(*pskb)->data + iphdroff;
 
 	if (maniptype == IP_NAT_MANIP_SRC) {
-		iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
-						iph->check);
+		iph->check = nf_csum_update(~iph->saddr, target->src.ip,
+					    iph->check);
 		iph->saddr = target->src.ip;
 	} else {
-		iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
-						iph->check);
+		iph->check = nf_csum_update(~iph->daddr, target->dst.ip,
+					    iph->check);
 		iph->daddr = target->dst.ip;
 	}
 	return 1;
@@ -431,10 +437,10 @@ unsigned int ip_nat_packet(struct ip_con
 EXPORT_SYMBOL_GPL(ip_nat_packet);
 
 /* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
-				  struct ip_conntrack *ct,
-				  enum ip_nat_manip_type manip,
-				  enum ip_conntrack_dir dir)
+int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
+				  enum ip_conntrack_info ctinfo,
+				  unsigned int hooknum,
+				  struct sk_buff **pskb)
 {
 	struct {
 		struct icmphdr icmp;
@@ -442,7 +448,9 @@ int ip_nat_icmp_reply_translation(struct
 	} *inside;
 	struct ip_conntrack_tuple inner, target;
 	int hdrlen = (*pskb)->nh.iph->ihl * 4;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
+	enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
 
 	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
 		return 0;
@@ -451,12 +459,8 @@ int ip_nat_icmp_reply_translation(struct
 
 	/* We're actually going to mangle it beyond trivial checksum
 	   adjustment, so make sure the current checksum is correct. */
-	if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
-		hdrlen = (*pskb)->nh.iph->ihl * 4;
-		if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
-						(*pskb)->len - hdrlen, 0)))
-			return 0;
-	}
+	if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+		return 0;
 
 	/* Must be RELATED */
 	IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
@@ -496,11 +500,13 @@ int ip_nat_icmp_reply_translation(struct
 		return 0;
 
 	/* Reloading "inside" here since manip_pkt inner. */
-	inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
-	inside->icmp.checksum = 0;
-	inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
-						       (*pskb)->len - hdrlen,
-						       0));
+	if (!skb_partial_checksummed(*pskb)) {
+		inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+		inside->icmp.checksum = 0;
+		inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
+							       (*pskb)->len - hdrlen,
+							       0));
+	}
 
 	/* Change outer to look the reply to an incoming packet
 	 * (proto 0 means don't invert per-proto part). */
@@ -529,11 +535,11 @@ int ip_nat_protocol_register(struct ip_n
 	int ret = 0;
 
 	write_lock_bh(&ip_nat_lock);
-	if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
+	if (ve_ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
 		ret = -EBUSY;
 		goto out;
 	}
-	ip_nat_protos[proto->protonum] = proto;
+	ve_ip_nat_protos[proto->protonum] = proto;
  out:
 	write_unlock_bh(&ip_nat_lock);
 	return ret;
@@ -544,7 +550,7 @@ EXPORT_SYMBOL(ip_nat_protocol_register);
 void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
 {
 	write_lock_bh(&ip_nat_lock);
-	ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
+	ve_ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
 	write_unlock_bh(&ip_nat_lock);
 
 	/* Someone could be still looking at the proto in a bh. */
@@ -597,40 +603,59 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_
 EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
 #endif
 
-static int __init ip_nat_init(void)
+static int ip_nat_init(void)
 {
 	size_t i;
+	int ret;
 
-	/* Leave them the same for the moment. */
-	ip_nat_htable_size = ip_conntrack_htable_size;
+	if (ve_is_super(get_exec_env()))
+		ip_nat_htable_size = ip_conntrack_htable_size;
 
 	/* One vmalloc for both hash tables */
-	bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
-	if (!bysource)
-		return -ENOMEM;
+	ret = -ENOMEM;
+	ve_ip_nat_bysource =
+		ub_vmalloc(sizeof(struct list_head)*ip_nat_htable_size*2);
+	if (!ve_ip_nat_bysource)
+		goto nomem;
+
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_protos =
+		ub_kmalloc(sizeof(void *)*MAX_IP_NAT_PROTO, GFP_KERNEL);
+	if (!ve_ip_nat_protos)
+		goto nomem2;
+#endif
 
 	/* Sew in builtin protocols. */
 	write_lock_bh(&ip_nat_lock);
 	for (i = 0; i < MAX_IP_NAT_PROTO; i++)
-		ip_nat_protos[i] = &ip_nat_unknown_protocol;
-	ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
-	ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
-	ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
+		ve_ip_nat_protos[i] = &ip_nat_unknown_protocol;
+	ve_ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
+	ve_ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
+	ve_ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
 	write_unlock_bh(&ip_nat_lock);
 
 	for (i = 0; i < ip_nat_htable_size; i++) {
-		INIT_LIST_HEAD(&bysource[i]);
+		INIT_LIST_HEAD(&ve_ip_nat_bysource[i]);
 	}
 
 	/* FIXME: Man, this is a hack.  <SIGH> */
-	IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
-	ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
-	IP_NF_ASSERT(ip_ct_nat_offset == NULL);
-	ip_ct_nat_offset = &ip_nat_get_offset;
+	IP_NF_ASSERT(ve_ip_conntrack_destroyed == NULL);
+	ve_ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+	if (ve_is_super(get_exec_env())) {
+		IP_NF_ASSERT(ip_ct_nat_offset == NULL);
+		ip_ct_nat_offset = &ip_nat_get_offset;
+	}
 
-	/* Initialize fake conntrack so that NAT will skip it */
-	ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+	if (ve_is_super(get_exec_env()))
+		/* Initialize fake conntrack so that NAT will skip it */
+		ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
 	return 0;
+#ifdef CONFIG_VE_IPTABLES
+nomem2:
+#endif
+	vfree(ve_ip_nat_bysource);
+nomem:
+	return ret;
 }
 
 /* Clear NAT section of all conntracks, in case we're loaded again. */
@@ -641,15 +666,43 @@ static int clean_nat(struct ip_conntrack
 	return 0;
 }
 
-static void __exit ip_nat_cleanup(void)
+static void ip_nat_cleanup(void)
 {
 	ip_ct_iterate_cleanup(&clean_nat, NULL);
-	ip_ct_nat_offset = NULL;
-	ip_conntrack_destroyed = NULL;
-	vfree(bysource);
+	if (ve_is_super(get_exec_env()))
+		ip_ct_nat_offset = NULL;
+	ve_ip_conntrack_destroyed = NULL;
+	vfree(ve_ip_nat_bysource);
+	ve_ip_nat_bysource = NULL;
+#ifdef CONFIG_VE_IPTABLES
+	kfree(ve_ip_nat_protos);
+	ve_ip_nat_protos = NULL;
+#endif
+}
+
+static int __init init(void)
+{
+	int err;
+
+	err = ip_nat_init();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(ip_nat_init);
+	KSYMRESOLVE(ip_nat_cleanup);
+	KSYMMODRESOLVE(ip_nat);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip_nat);
+	KSYMUNRESOLVE(ip_nat_cleanup);
+	KSYMUNRESOLVE(ip_nat_init);
+	ip_nat_cleanup();
 }
 
 MODULE_LICENSE("GPL");
 
-module_init(ip_nat_init);
-module_exit(ip_nat_cleanup);
+fs_initcall(init);
+module_exit(fini);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_ftp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_ftp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_ftp.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_ftp.c	2017-01-13 08:40:23.000000000 -0500
@@ -19,6 +19,7 @@
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
@@ -154,18 +155,49 @@ static unsigned int ip_nat_ftp(struct sk
 	return NF_ACCEPT;
 }
 
-static void __exit ip_nat_ftp_fini(void)
+#ifdef CONFIG_VE_IPTABLES
+#undef ve_ip_nat_ftp_hook
+#define ve_ip_nat_ftp_hook \
+		(get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook)
+#endif
+int init_iptable_nat_ftp(void)
 {
-	ip_nat_ftp_hook = NULL;
+	BUG_ON(ve_ip_nat_ftp_hook);
+
+	if (!ve_is_super(get_exec_env()) && !try_module_get(THIS_MODULE))
+		return 0;
+
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_ftp_hook = (ip_nat_helper_func)ip_nat_ftp;
+#else
+	ve_ip_nat_ftp_hook = ip_nat_ftp;
+#endif
+	return 0;
+}
+
+void fini_iptable_nat_ftp(void)
+{
+	ve_ip_nat_ftp_hook = NULL;
 	/* Make sure noone calls it, meanwhile. */
 	synchronize_net();
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
+}
+
+static void __exit ip_nat_ftp_fini(void)
+{
+	KSYMMODUNRESOLVE(ip_nat_ftp);
+	KSYMUNRESOLVE(init_iptable_nat_ftp);
+	KSYMUNRESOLVE(fini_iptable_nat_ftp);
+	fini_iptable_nat_ftp();
 }
 
 static int __init ip_nat_ftp_init(void)
 {
-	BUG_ON(ip_nat_ftp_hook);
-	ip_nat_ftp_hook = ip_nat_ftp;
-	return 0;
+	KSYMRESOLVE(init_iptable_nat_ftp);
+	KSYMRESOLVE(fini_iptable_nat_ftp);
+	KSYMMODRESOLVE(ip_nat_ftp);
+	return init_iptable_nat_ftp();
 }
 
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_helper.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_helper.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_helper.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_helper.c	2017-01-13 08:40:41.000000000 -0500
@@ -95,6 +95,9 @@ s16 ip_nat_get_offset(const struct ip_co
 	struct ip_nat_seq *this_way;
 	s16 offset;
 
+	if (!(ct->status & IPS_NAT_DONE_MASK))
+		return 0;
+
 	this_way = &ct->nat.info.seq[dir];
 	spin_lock_bh(&ip_nat_seqofs_lock);
 	offset = after(seq, this_way->correction_pos)
@@ -183,7 +186,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff 
 {
 	struct iphdr *iph;
 	struct tcphdr *tcph;
-	int datalen;
+	int oldlen, datalen;
 
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return 0;
@@ -198,13 +201,23 @@ ip_nat_mangle_tcp_packet(struct sk_buff 
 	iph = (*pskb)->nh.iph;
 	tcph = (void *)iph + iph->ihl*4;
 
+	oldlen = (*pskb)->len - iph->ihl*4;
 	mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
 			match_offset, match_len, rep_buffer, rep_len);
 
 	datalen = (*pskb)->len - iph->ihl*4;
-	tcph->check = 0;
-	tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr,
-				   csum_partial((char *)tcph, datalen, 0));
+
+	if (!skb_partial_checksummed(*pskb)) {
+		tcph->check = 0;
+		tcph->check = tcp_v4_check(tcph, datalen,
+					   iph->saddr, iph->daddr,
+					   csum_partial((char *)tcph,
+							datalen, 0));
+	} else
+		tcph->check = nf_proto_csum_update(*pskb,
+						   htons(oldlen) ^ 0xFFFF,
+						   htons(datalen),
+						   tcph->check, 1);
 
 	if (rep_len != match_len) {
 		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -237,6 +250,7 @@ ip_nat_mangle_udp_packet(struct sk_buff 
 {
 	struct iphdr *iph;
 	struct udphdr *udph;
+	int datalen, oldlen;
 
 	/* UDP helpers might accidentally mangle the wrong packet */
 	iph = (*pskb)->nh.iph;
@@ -254,22 +268,33 @@ ip_nat_mangle_udp_packet(struct sk_buff 
 
 	iph = (*pskb)->nh.iph;
 	udph = (void *)iph + iph->ihl*4;
+
+	oldlen = (*pskb)->len - iph->ihl*4;
 	mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
 			match_offset, match_len, rep_buffer, rep_len);
 
 	/* update the length of the UDP packet */
-	udph->len = htons((*pskb)->len - iph->ihl*4);
+	datalen = (*pskb)->len - iph->ihl*4;
+	udph->len = htons(datalen);
+
+	if (!udph->check && !skb_partial_checksummed(*pskb))
+		return 1;
 
 	/* fix udp checksum if udp checksum was previously calculated */
-	if (udph->check) {
+	if (!skb_partial_checksummed(*pskb)) {
 		int datalen = (*pskb)->len - iph->ihl * 4;
 		udph->check = 0;
 		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 		                                datalen, IPPROTO_UDP,
 		                                csum_partial((char *)udph,
 		                                             datalen, 0));
-	}
-
+		if (!udph->check)
+			udph->check = -1;
+	} else
+		udph->check = nf_proto_csum_update(*pskb,
+						   htons(oldlen) ^ 0xFFFF,
+						   htons(datalen),
+						   udph->check, 1);
 	return 1;
 }
 EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
@@ -309,11 +334,14 @@ sack_adjust(struct sk_buff *skb,
 			ntohl(sack->start_seq), new_start_seq,
 			ntohl(sack->end_seq), new_end_seq);
 
-		tcph->check = 
-			ip_nat_cheat_check(~sack->start_seq, new_start_seq,
-					   ip_nat_cheat_check(~sack->end_seq, 
-						   	      new_end_seq,
-							      tcph->check));
+		tcph->check = nf_proto_csum_update(skb,
+						   ~sack->start_seq,
+						   new_start_seq,
+						   tcph->check, 0);
+		tcph->check = nf_proto_csum_update(skb,
+						   ~sack->end_seq,
+						   new_end_seq,
+						   tcph->check, 0);
 		sack->start_seq = new_start_seq;
 		sack->end_seq = new_end_seq;
 		sackoff += sizeof(*sack);
@@ -397,10 +425,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
 		newack = ntohl(tcph->ack_seq) - other_way->offset_before;
 	newack = htonl(newack);
 
-	tcph->check = ip_nat_cheat_check(~tcph->seq, newseq,
-					 ip_nat_cheat_check(~tcph->ack_seq, 
-					 		    newack, 
-							    tcph->check));
+	tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq,
+					   tcph->check, 0);
+	tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack,
+					   tcph->check, 0);
 
 	DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
 		ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_irc.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_irc.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_irc.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_irc.c	2017-01-13 08:40:23.000000000 -0500
@@ -23,6 +23,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/moduleparam.h>
+#include <linux/nfcalls.h>
 
 #if 0
 #define DEBUGP printk
@@ -96,18 +97,44 @@ static unsigned int help(struct sk_buff 
 	return ret;
 }
 
-static void __exit ip_nat_irc_fini(void)
+#ifdef CONFIG_VE_IPTABLES
+#undef ve_ip_nat_irc_hook
+#define ve_ip_nat_irc_hook \
+		(get_exec_env()->_ip_conntrack->_ip_nat_irc_hook)
+#endif
+
+int init_iptable_nat_irc(void)
+{
+	BUG_ON(ve_ip_nat_irc_hook);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_irc_hook = (ip_nat_helper_func)help;
+#else
+	ve_ip_nat_irc_hook = help;
+#endif
+	return 0;
+}
+
+void fini_iptable_nat_irc(void)
 {
-	ip_nat_irc_hook = NULL;
+	ve_ip_nat_irc_hook = NULL;
 	/* Make sure noone calls it, meanwhile. */
 	synchronize_net();
 }
 
+static void __exit ip_nat_irc_fini(void)
+{
+	KSYMMODUNRESOLVE(ip_nat_irc);
+	KSYMUNRESOLVE(init_iptable_nat_irc);
+	KSYMUNRESOLVE(fini_iptable_nat_irc);
+	fini_iptable_nat_irc();
+}
+
 static int __init ip_nat_irc_init(void)
 {
-	BUG_ON(ip_nat_irc_hook);
-	ip_nat_irc_hook = help;
-	return 0;
+	KSYMRESOLVE(init_iptable_nat_irc);
+	KSYMRESOLVE(fini_iptable_nat_irc);
+	KSYMMODRESOLVE(ip_nat_irc);
+	return init_iptable_nat_irc();
 }
 
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_proto_gre.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_proto_gre.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_proto_gre.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_proto_gre.c	2017-01-13 08:40:40.000000000 -0500
@@ -130,9 +130,10 @@ gre_manip_pkt(struct sk_buff **pskb,
 			if (greh->csum) {
 				/* FIXME: Never tested this code... */
 				*(gre_csum(greh)) = 
-					ip_nat_cheat_check(~*(gre_key(greh)),
+					nf_proto_csum_update(*pskb,
+							~*(gre_key(greh)),
 							tuple->dst.u.gre.key,
-							*(gre_csum(greh)));
+							*(gre_csum(greh)), 0);
 			}
 			*(gre_key(greh)) = tuple->dst.u.gre.key;
 			break;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_proto_icmp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_proto_icmp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_proto_icmp.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_proto_icmp.c	2017-01-13 08:40:40.000000000 -0500
@@ -67,9 +67,10 @@ icmp_manip_pkt(struct sk_buff **pskb,
 
 	hdr = (struct icmphdr *)((*pskb)->data + hdroff);
 
-	hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
-					    tuple->src.u.icmp.id,
-					    hdr->checksum);
+	hdr->checksum = nf_proto_csum_update(*pskb,
+					     hdr->un.echo.id ^ 0xFFFF,
+					     tuple->src.u.icmp.id,
+					     hdr->checksum, 0);
 	hdr->un.echo.id = tuple->src.u.icmp.id;
 	return 1;
 }
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_proto_tcp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_proto_tcp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_proto_tcp.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_proto_tcp.c	2017-01-13 08:40:40.000000000 -0500
@@ -138,15 +138,9 @@ tcp_manip_pkt(struct sk_buff **pskb,
 	if (hdrsize < sizeof(*hdr))
 		return 1;
 
-#ifdef CONFIG_XEN
-	if ((*pskb)->proto_csum_blank)
-		hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
-	else
-#endif
-		hdr->check = ip_nat_cheat_check(~oldip, newip,
-					ip_nat_cheat_check(oldport ^ 0xFFFF,
-							   newport,
-							   hdr->check));
+	hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1);
+	hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport,
+					  hdr->check, 0);
 	return 1;
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_proto_udp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_proto_udp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_proto_udp.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_proto_udp.c	2017-01-13 08:40:40.000000000 -0500
@@ -122,16 +122,16 @@ udp_manip_pkt(struct sk_buff **pskb,
 		newport = tuple->dst.u.udp.port;
 		portptr = &hdr->dest;
 	}
-	if (hdr->check) /* 0 is a special case meaning no checksum */
-#ifdef CONFIG_XEN
-		if ((*pskb)->proto_csum_blank)
-			hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
-		else
-#endif
-			hdr->check = ip_nat_cheat_check(~oldip, newip,
-					ip_nat_cheat_check(*portptr ^ 0xFFFF,
-							   newport,
-							   hdr->check));
+
+	if (hdr->check || skb_partial_checksummed(*pskb)) {
+		hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip,
+						  hdr->check, 1);
+		hdr->check = nf_proto_csum_update(*pskb,
+						  *portptr ^ 0xFFFF, newport,
+						  hdr->check, 0);
+		if (!hdr->check)
+			hdr->check = -1;
+	}
 	*portptr = newport;
 	return 1;
 }
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_rule.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_rule.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_rule.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_rule.c	2017-01-13 08:40:23.000000000 -0500
@@ -23,6 +23,7 @@
 #define ASSERT_WRITE_LOCK(x)
 
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_nat.h>
 #include <linux/netfilter_ipv4/ip_nat_core.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
@@ -34,6 +35,13 @@
 #define DEBUGP(format, args...)
 #endif
 
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_nat_table		\
+	(get_exec_env()->_ip_conntrack->_ip_nat_table)
+#else
+#define ve_ip_nat_table		&nat_table
+#endif
+
 #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
 
 static struct
@@ -41,7 +49,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
 	struct ipt_error term;
-} nat_initial_table __initdata
+} nat_initial_table
 = { { "nat", NAT_VALID_HOOKS, 4,
       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
       { [NF_IP_PRE_ROUTING] = 0,
@@ -238,7 +246,7 @@ int ip_nat_rule_find(struct sk_buff **ps
 {
 	int ret;
 
-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
+	ret = ipt_do_table(pskb, hooknum, in, out, ve_ip_nat_table, NULL);
 
 	if (ret == NF_ACCEPT) {
 		if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
@@ -266,34 +274,58 @@ static struct ipt_target ipt_dnat_reg = 
 	.checkentry	= ipt_dnat_checkentry,
 };
 
-int __init ip_nat_rule_init(void)
+int ip_nat_rule_init(void)
 {
 	int ret;
 
-	ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
-	if (ret != 0)
-		return ret;
-	ret = ipt_register_target(&ipt_snat_reg);
-	if (ret != 0)
-		goto unregister_table;
-
-	ret = ipt_register_target(&ipt_dnat_reg);
-	if (ret != 0)
-		goto unregister_snat;
+	if (!ve_is_super(get_exec_env()) ||
+			!ip_conntrack_disable_ve0) {
+		struct ipt_table *tmp_table;
+
+		tmp_table = ipt_register_table(&nat_table,
+					&nat_initial_table.repl);
+		if (IS_ERR(tmp_table))
+			return PTR_ERR(tmp_table);
+#ifdef CONFIG_VE_IPTABLES
+		ve_ip_nat_table = tmp_table;
+#endif
+	}
 
-	return ret;
+	if (ve_is_super(get_exec_env())) {
+		ret = ipt_register_target(&ipt_snat_reg);
+		if (ret != 0)
+			goto unregister_table;
+
+		ret = ipt_register_target(&ipt_dnat_reg);
+		if (ret != 0)
+			goto unregister_snat;
+	}
+	return 0;
 
  unregister_snat:
 	ipt_unregister_target(&ipt_snat_reg);
  unregister_table:
-	ipt_unregister_table(&nat_table);
+	if (!ip_conntrack_disable_ve0) {
+		ipt_unregister_table(ve_ip_nat_table);
+#ifdef CONFIG_VE_IPTABLES
+		ve_ip_nat_table = NULL;
+#endif
+	}
 
 	return ret;
 }
 
 void ip_nat_rule_cleanup(void)
 {
-	ipt_unregister_target(&ipt_dnat_reg);
-	ipt_unregister_target(&ipt_snat_reg);
-	ipt_unregister_table(&nat_table);
+	if (ve_is_super(get_exec_env())) {
+		ipt_unregister_target(&ipt_dnat_reg);
+		ipt_unregister_target(&ipt_snat_reg);
+	}
+	if (!ve_is_super(get_exec_env()) ||
+			!ip_conntrack_disable_ve0) {
+		ipt_unregister_table(ve_ip_nat_table);
+#ifdef CONFIG_VE_IPTABLES
+		ve_ip_nat_table = NULL;
+#endif
+	}
 }
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_standalone.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_standalone.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_nat_standalone.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_nat_standalone.c	2017-01-13 08:40:40.000000000 -0500
@@ -29,6 +29,7 @@
 #include <net/ip.h>
 #include <net/checksum.h>
 #include <linux/spinlock.h>
+#include <linux/nfcalls.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -116,10 +117,15 @@ ip_nat_fn(unsigned int hooknum,
 	if (ct == &ip_conntrack_untracked)
 		return NF_ACCEPT;
 
+#if defined(CONFIG_VE)
+	if ((*pskb)->ip_summed == CHECKSUM_HW && out != NULL)
+			(*pskb)->proto_csum_blank = 1;
+#else
 	/* If we had a hardware checksum before, it's now invalid */
 	if ((*pskb)->ip_summed == CHECKSUM_HW)
 		if (skb_checksum_help(*pskb, (out == NULL)))
 			return NF_DROP;
+#endif
 
 	/* Can't track?  It's not due to stress, or conntrack would
 	   have dropped it.  Hence it's the user's responsibilty to
@@ -146,8 +152,8 @@ ip_nat_fn(unsigned int hooknum,
 	case IP_CT_RELATED:
 	case IP_CT_RELATED+IP_CT_IS_REPLY:
 		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
-			if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype,
-							   CTINFO2DIR(ctinfo)))
+			if (!ip_nat_icmp_reply_translation(ct, ctinfo,
+							   hooknum, pskb))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
@@ -355,21 +361,19 @@ static struct nf_hook_ops ip_nat_ops[] =
 	},
 };
 
-static int __init ip_nat_standalone_init(void)
+int init_iptable_nat(void)
 {
 	int ret = 0;
 
-	need_conntrack();
+	if (!ve_is_super(get_exec_env()))
+		__module_get(THIS_MODULE);
 
-#ifdef CONFIG_XFRM
-	BUG_ON(ip_nat_decode_session != NULL);
-	ip_nat_decode_session = nat_decode_session;
-#endif
 	ret = ip_nat_rule_init();
 	if (ret < 0) {
 		printk("ip_nat_init: can't setup rules.\n");
-		goto cleanup_decode_session;
+ 		goto out_modput;
 	}
+
 	ret = nf_register_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
 	if (ret < 0) {
 		printk("ip_nat_init: can't register hooks.\n");
@@ -379,25 +383,64 @@ static int __init ip_nat_standalone_init
 
  cleanup_rule_init:
 	ip_nat_rule_cleanup();
- cleanup_decode_session:
-#ifdef CONFIG_XFRM
-	ip_nat_decode_session = NULL;
-	synchronize_net();
-#endif
+ out_modput:
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
 	return ret;
 }
 
-static void __exit ip_nat_standalone_fini(void)
+void fini_iptable_nat(void)
 {
 	nf_unregister_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
 	ip_nat_rule_cleanup();
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
+}
+
+static int __init ip_nat_standalone_init(void)
+{
+	int err;
+
+	need_conntrack();
+
+#ifdef CONFIG_XFRM
+	BUG_ON(ip_nat_decode_session != NULL);
+	ip_nat_decode_session = nat_decode_session;
+#endif
+	if (!ip_conntrack_disable_ve0)
+		err = init_iptable_nat();
+	else
+		err = ip_nat_rule_init();
+	if (err < 0) {
+#ifdef CONFIG_XFRM
+		ip_nat_decode_session = NULL;
+		synchronize_net();
+#endif
+		return err;
+	}
+
+	KSYMRESOLVE(init_iptable_nat);
+	KSYMRESOLVE(fini_iptable_nat);
+	KSYMMODRESOLVE(iptable_nat);
+	return 0;
+}
+
+static void __exit ip_nat_standalone_fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_nat);
+	KSYMUNRESOLVE(init_iptable_nat);
+	KSYMUNRESOLVE(fini_iptable_nat);
+	if (!ip_conntrack_disable_ve0)
+		fini_iptable_nat();
+	else
+		ip_nat_rule_cleanup();
 #ifdef CONFIG_XFRM
 	ip_nat_decode_session = NULL;
 	synchronize_net();
 #endif
 }
 
-module_init(ip_nat_standalone_init);
+fs_initcall(ip_nat_standalone_init);
 module_exit(ip_nat_standalone_fini);
 
 MODULE_LICENSE("GPL");
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_queue.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_queue.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_queue.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_queue.c	2017-01-13 08:40:23.000000000 -0500
@@ -515,7 +515,7 @@ ipq_rcv_skb(struct sk_buff *skb)
 	if (type <= IPQM_BASE)
 		return;
 		
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (security_netlink_recv(skb, CAP_VE_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 	
 	write_lock_bh(&queue_lock);
@@ -547,12 +547,15 @@ ipq_rcv_sk(struct sock *sk, int len)
 {
 	struct sk_buff *skb;
 	unsigned int qlen;
+	struct ve_struct *env;
 
 	mutex_lock(&ipqnl_mutex);
 			
 	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
 		skb = skb_dequeue(&sk->sk_receive_queue);
+		env = set_exec_env(skb->owner_env);
 		ipq_rcv_skb(skb);
+		(void)set_exec_env(env);
 		kfree_skb(skb);
 	}
 		
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/iptable_filter.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/iptable_filter.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/iptable_filter.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/iptable_filter.c	2017-01-13 08:40:23.000000000 -0500
@@ -12,12 +12,20 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables filter table");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_filter	(get_exec_env()->_ve_ipt_filter_pf)
+#else
+#define	ve_packet_filter	&packet_filter
+#endif
+
 #define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
 
 static struct
@@ -25,7 +33,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
 	struct ipt_error term;
-} initial_table __initdata 
+} initial_table
 = { { "filter", FILTER_VALID_HOOKS, 4,
       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
       { [NF_IP_LOCAL_IN] = 0,
@@ -90,7 +98,7 @@ ipt_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static unsigned int
@@ -108,7 +116,7 @@ ipt_local_out_hook(unsigned int hook,
 		return NF_ACCEPT;
 	}
 
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static struct nf_hook_ops ipt_ops[] = {
@@ -139,22 +147,19 @@ static struct nf_hook_ops ipt_ops[] = {
 static int forward = NF_ACCEPT;
 module_param(forward, bool, 0000);
 
-static int __init iptable_filter_init(void)
+int init_iptable_filter(void)
 {
 	int ret;
-
-	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
-		return -EINVAL;
-	}
-
-	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
+	struct ipt_table *tmp_filter;
 
 	/* Register table */
-	ret = ipt_register_table(&packet_filter, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_filter = ipt_register_table(&packet_filter,
+			&initial_table.repl);
+	if (IS_ERR(tmp_filter))
+		return PTR_ERR(tmp_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = tmp_filter;
+#endif
 
 	/* Register hooks */
 	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
@@ -164,14 +169,50 @@ static int __init iptable_filter_init(vo
 	return ret;
 
  cleanup_table:
-	ipt_unregister_table(&packet_filter);
+	ipt_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
 	return ret;
 }
 
-static void __exit iptable_filter_fini(void)
+void fini_iptable_filter(void)
 {
 	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
-	ipt_unregister_table(&packet_filter);
+	ipt_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
+}
+
+static int __init iptable_filter_init(void)
+{
+	int err;
+
+	if (forward < 0 || forward > NF_MAX_VERDICT) {
+		printk("iptables forward must be 0 or 1\n");
+		return -EINVAL;
+	}
+
+	/* Entry 1 is the FORWARD hook */
+	initial_table.entries[1].target.verdict = -forward - 1;
+
+	err = init_iptable_filter();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_filter);
+	KSYMRESOLVE(fini_iptable_filter);
+	KSYMMODRESOLVE(iptable_filter);
+	return 0;
+}
+
+static void __exit iptable_filter_fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_filter);
+	KSYMUNRESOLVE(init_iptable_filter);
+	KSYMUNRESOLVE(fini_iptable_filter);
+	fini_iptable_filter();
 }
 
 module_init(iptable_filter_init);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/iptable_mangle.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/iptable_mangle.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/iptable_mangle.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/iptable_mangle.c	2017-01-13 08:40:23.000000000 -0500
@@ -16,6 +16,7 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/route.h>
+#include <linux/nfcalls.h>
 #include <linux/ip.h>
 
 MODULE_LICENSE("GPL");
@@ -34,7 +35,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[5];
 	struct ipt_error term;
-} initial_table __initdata
+} initial_table
 = { { "mangle", MANGLE_VALID_HOOKS, 6,
       sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
       { [NF_IP_PRE_ROUTING] 	= 0,
@@ -111,6 +112,13 @@ static struct ipt_table packet_mangler =
 	.af		= AF_INET,
 };
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_mangler	(get_exec_env()->_ipt_mangle_table)
+#else
+#define ve_packet_mangler	&packet_mangler
+#endif
+
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ipt_route_hook(unsigned int hook,
@@ -119,7 +127,7 @@ ipt_route_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	return ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
 }
 
 static unsigned int
@@ -148,7 +156,8 @@ ipt_local_hook(unsigned int hook,
 	daddr = (*pskb)->nh.iph->daddr;
 	tos = (*pskb)->nh.iph->tos;
 
-	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	ret = ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
+
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
 	    && ((*pskb)->nh.iph->saddr != saddr
@@ -200,14 +209,19 @@ static struct nf_hook_ops ipt_ops[] = {
 	},
 };
 
-static int __init iptable_mangle_init(void)
+int init_iptable_mangle(void)
 {
 	int ret;
+	struct ipt_table *tmp_mangler;
 
 	/* Register table */
-	ret = ipt_register_table(&packet_mangler, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_mangler = ipt_register_table(&packet_mangler,
+			&initial_table.repl);
+	if (IS_ERR(tmp_mangler))
+		return PTR_ERR(tmp_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = tmp_mangler;
+#endif
 
 	/* Register hooks */
 	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
@@ -217,14 +231,42 @@ static int __init iptable_mangle_init(vo
 	return ret;
 
  cleanup_table:
-	ipt_unregister_table(&packet_mangler);
+	ipt_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
 	return ret;
 }
 
-static void __exit iptable_mangle_fini(void)
+void fini_iptable_mangle(void)
 {
 	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
-	ipt_unregister_table(&packet_mangler);
+	ipt_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
+}
+
+static int __init iptable_mangle_init(void)
+{
+	int err;
+
+	err = init_iptable_mangle();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_mangle);
+	KSYMRESOLVE(fini_iptable_mangle);
+	KSYMMODRESOLVE(iptable_mangle);
+	return 0;
+}
+
+static void __exit iptable_mangle_fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_mangle);
+	KSYMUNRESOLVE(init_iptable_mangle);
+	KSYMUNRESOLVE(fini_iptable_mangle);
+	fini_iptable_mangle();
 }
 
 module_init(iptable_mangle_init);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/iptable_raw.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/iptable_raw.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/iptable_raw.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/iptable_raw.c	2017-01-13 08:40:23.000000000 -0500
@@ -118,12 +118,13 @@ static struct nf_hook_ops ipt_ops[] = {
 
 static int __init iptable_raw_init(void)
 {
+	struct ipt_table *tmp;
 	int ret;
 
 	/* Register table */
-	ret = ipt_register_table(&packet_raw, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp = ipt_register_table(&packet_raw, &initial_table.repl);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
 
 	/* Register hooks */
 	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_tables.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_tables.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ip_tables.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ip_tables.c	2017-01-13 08:40:40.000000000 -0500
@@ -29,9 +29,11 @@
 #include <linux/proc_fs.h>
 #include <linux/err.h>
 #include <linux/cpumask.h>
+#include <ub/ub_mem.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -389,8 +391,8 @@ mark_source_chains(struct xt_table_info 
 				= (void *)ipt_get_target(e);
 
 			if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
-				printk("iptables: loop hook %u pos %u %08X.\n",
-				       hook, pos, e->comefrom);
+				ve_printk(VE_LOG, "iptables: loop hook %u pos "
+					"%u %08X.\n", hook, pos, e->comefrom);
 				return 0;
 			}
 			e->comefrom
@@ -404,6 +406,13 @@ mark_source_chains(struct xt_table_info 
 			    && unconditional(&e->ip)) {
 				unsigned int oldpos, size;
 
+				if (t->verdict < -NF_MAX_VERDICT - 1) {
+					duprintf("mark_source_chains: bad "
+						"negative verdict (%i)\n",
+							t->verdict);
+					return 0;
+				}
+
 				/* Return: backtrack through the last
 				   big jump. */
 				do {
@@ -441,6 +450,14 @@ mark_source_chains(struct xt_table_info 
 				if (strcmp(t->target.u.user.name,
 					   IPT_STANDARD_TARGET) == 0
 				    && newpos >= 0) {
+					if (newpos > newinfo->size -
+						sizeof(struct ipt_entry)) {
+						duprintf("mark_source_chains: "
+							"bad verdict (%i)\n",
+								newpos);
+						return 0;
+					}
+
 					/* This a jump; chase it. */
 					duprintf("Jump rule %u -> %u\n",
 						 pos, newpos);
@@ -473,29 +490,32 @@ cleanup_match(struct ipt_entry_match *m,
 	return 0;
 }
 
-static inline int
-standard_check(const struct ipt_entry_target *t,
-	       unsigned int max_offset)
+static inline int check_match(struct ipt_entry_match *m, const char *name,
+				const struct ipt_ip *ip, unsigned int hookmask,
+				unsigned int *i)
 {
-	struct ipt_standard_target *targ = (void *)t;
+	struct ipt_match *match;
+	int ret;
 
-	/* Check standard info. */
-	if (targ->verdict >= 0
-	    && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
-		duprintf("ipt_standard_check: bad verdict (%i)\n",
-			 targ->verdict);
-		return 0;
-	}
-	if (targ->verdict < -NF_MAX_VERDICT - 1) {
-		duprintf("ipt_standard_check: bad negative verdict (%i)\n",
-			 targ->verdict);
-		return 0;
+	match = m->u.kernel.match;
+	ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
+			     name, hookmask, ip->proto,
+			     ip->invflags & IPT_INV_PROTO);
+	if (!ret && m->u.kernel.match->checkentry
+	    && !m->u.kernel.match->checkentry(name, ip, match, m->data,
+					      m->u.match_size - sizeof(*m),
+					      hookmask)) {
+		duprintf("ip_tables: check failed for `%s'.\n",
+			 m->u.kernel.match->name);
+		ret = -EINVAL;
 	}
-	return 1;
+	if (!ret)
+		(*i)++;
+	return ret;
 }
 
 static inline int
-check_match(struct ipt_entry_match *m,
+find_check_match(struct ipt_entry_match *m,
 	    const char *name,
 	    const struct ipt_ip *ip,
 	    unsigned int hookmask,
@@ -508,62 +528,85 @@ check_match(struct ipt_entry_match *m,
 						   m->u.user.revision),
 					"ipt_%s", m->u.user.name);
 	if (IS_ERR(match) || !match) {
-		duprintf("check_match: `%s' not found\n", m->u.user.name);
+		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
 		return match ? PTR_ERR(match) : -ENOENT;
 	}
 	m->u.kernel.match = match;
 
-	ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
-			     name, hookmask, ip->proto,
-			     ip->invflags & IPT_INV_PROTO);
+	ret = check_match(m, name, ip, hookmask, i);
 	if (ret)
 		goto err;
 
-	if (m->u.kernel.match->checkentry
-	    && !m->u.kernel.match->checkentry(name, ip, match, m->data,
-					      m->u.match_size - sizeof(*m),
-					      hookmask)) {
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 m->u.kernel.match->name);
-		ret = -EINVAL;
-		goto err;
-	}
-
-	(*i)++;
 	return 0;
 err:
 	module_put(m->u.kernel.match->me);
 	return ret;
 }
 
-static struct ipt_target ipt_standard_target;
-
-static inline int
-check_entry(struct ipt_entry *e, const char *name, unsigned int size,
-	    unsigned int *i)
+static inline int check_target(struct ipt_entry *e, const char *name)
 {
 	struct ipt_entry_target *t;
 	struct ipt_target *target;
 	int ret;
-	unsigned int j;
+
+	t = ipt_get_target(e);
+	target = t->u.kernel.target;
+	ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
+			      name, e->comefrom, e->ip.proto,
+			      e->ip.invflags & IPT_INV_PROTO);
+	if (!ret && t->u.kernel.target->checkentry
+		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
+						      t->u.target_size
+						      - sizeof(*t),
+						      e->comefrom)) {
+		duprintf("check_target: check failed for `%s'.\n",
+			 t->u.kernel.target->name);
+		ret = -EINVAL;
+	}
+	return ret;
+
+}
+
+static inline int check_entry(struct ipt_entry *e, const char *name)
+{
+	struct ipt_entry_target *t;
 
 	if (!ip_checkentry(&e->ip)) {
-		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+		duprintf("check_entry: ip check failed %p %s.\n", e, name);
 		return -EINVAL;
 	}
 
 	if (e->target_offset + sizeof(struct ipt_entry_target) > e->next_offset)
 		return -EINVAL;
 
+
+	t = ipt_get_target(e);
+	if (e->target_offset + t->u.target_size > e->next_offset)
+		return -EINVAL;
+
+	return 0;
+}
+
+static inline int
+find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
+	    unsigned int *i)
+{
+	struct ipt_entry_target *t;
+	struct ipt_target *target;
+	int ret;
+	unsigned int j;
+
+	ret = check_entry(e, name);
+	if (ret != 0)
+		return ret;
+
 	j = 0;
-	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
+	ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, e->comefrom,
+									&j);
 	if (ret != 0)
 		goto cleanup_matches;
 
 	t = ipt_get_target(e);
-	ret = -EINVAL;
-	if (e->target_offset + t->u.target_size > e->next_offset)
-		goto cleanup_matches;
 	target = try_then_request_module(xt_find_target(AF_INET,
 						     t->u.user.name,
 						     t->u.user.revision),
@@ -575,28 +618,10 @@ check_entry(struct ipt_entry *e, const c
 	}
 	t->u.kernel.target = target;
 
-	ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
-			      name, e->comefrom, e->ip.proto,
-			      e->ip.invflags & IPT_INV_PROTO);
+	ret = check_target(e, name);
 	if (ret)
 		goto err;
 
-	if (t->u.kernel.target == &ipt_standard_target) {
-		if (!standard_check(t, size)) {
-			ret = -EINVAL;
-			goto cleanup_matches;
-		}
-	} else if (t->u.kernel.target->checkentry
-		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
-						      t->u.target_size
-						      - sizeof(*t),
-						      e->comefrom)) {
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 t->u.kernel.target->name);
-		ret = -EINVAL;
-		goto err;
-	}
-
 	(*i)++;
 	return 0;
  err:
@@ -726,17 +751,18 @@ translate_table(const char *name,
 		}
 	}
 
+	if (!mark_source_chains(newinfo, valid_hooks, entry0))
+		return -ELOOP;
+
 	/* Finally, each sanity check must pass */
 	i = 0;
 	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				check_entry, name, size, &i);
-
-	if (ret != 0)
-		goto cleanup;
-
-	ret = -ELOOP;
-	if (!mark_source_chains(newinfo, valid_hooks, entry0))
-		goto cleanup;
+				find_check_entry, name, size, &i);
+	if (ret != 0) {
+		IPT_ENTRY_ITERATE(entry0, newinfo->size,
+				  cleanup_entry, &i);
+		return ret;
+	}
 
 	/* And one copy for every other CPU */
 	for_each_possible_cpu(i) {
@@ -744,9 +770,6 @@ translate_table(const char *name,
 			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
-	return 0;
-cleanup:
-	IPT_ENTRY_ITERATE(entry0, newinfo->size, cleanup_entry, &i);
 	return ret;
 }
 
@@ -817,7 +840,7 @@ static inline struct xt_counters * alloc
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc_node(countersize, numa_node_id());
+	counters = ub_vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -908,13 +931,13 @@ copy_entries_to_user(unsigned int total_
 #ifdef CONFIG_COMPAT
 struct compat_delta {
 	struct compat_delta *next;
-	u_int16_t offset;
+	unsigned int offset;
 	short delta;
 };
 
 static struct compat_delta *compat_offsets = NULL;
 
-static int compat_add_offset(u_int16_t offset, short delta)
+static int compat_add_offset(unsigned int offset, short delta)
 {
 	struct compat_delta *tmp;
 
@@ -946,7 +969,7 @@ static void compat_flush_offsets(void)
 	}
 }
 
-static short compat_calc_jump(u_int16_t offset)
+static short compat_calc_jump(unsigned int offset)
 {
 	struct compat_delta *tmp;
 	short delta;
@@ -1031,7 +1054,7 @@ static int compat_calc_entry(struct ipt_
 		void *base, struct xt_table_info *newinfo)
 {
 	struct ipt_entry_target *t;
-	u_int16_t entry_offset;
+	unsigned int entry_offset;
 	int off, i, ret;
 
 	off = 0;
@@ -1194,7 +1217,7 @@ __do_replace(const char *name, unsigned 
 	void *loc_cpu_old_entry;
 
 	ret = 0;
-	counters = vmalloc(num_counters * sizeof(struct xt_counters));
+	counters = ub_vmalloc_best(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
@@ -1371,7 +1394,7 @@ do_add_counters(void __user *user, unsig
 	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc_node(len - size, numa_node_id());
+	paddc = ub_vmalloc_node(len - size, numa_node_id());
 	if (!paddc)
 		return -ENOMEM;
 
@@ -1474,7 +1497,7 @@ out:
 }
 
 static inline int
-compat_check_calc_match(struct ipt_entry_match *m,
+compat_find_calc_match(struct ipt_entry_match *m,
 	    const char *name,
 	    const struct ipt_ip *ip,
 	    unsigned int hookmask,
@@ -1502,6 +1525,31 @@ compat_check_calc_match(struct ipt_entry
 }
 
 static inline int
+compat_release_match(struct ipt_entry_match *m, unsigned int *i)
+{
+	if (i && (*i)-- == 0)
+		return 1;
+
+	module_put(m->u.kernel.match->me);
+	return 0;
+}
+
+static inline int
+compat_release_entry(struct ipt_entry *e, unsigned int *i)
+{
+	struct ipt_entry_target *t;
+
+	if (i && (*i)-- == 0)
+		return 1;
+
+	/* Cleanup all matches */
+	IPT_MATCH_ITERATE(e, compat_release_match, NULL);
+	t = ipt_get_target(e);
+	module_put(t->u.kernel.target->me);
+	return 0;
+}
+
+static inline int
 check_compat_entry_size_and_hooks(struct ipt_entry *e,
 			   struct xt_table_info *newinfo,
 			   unsigned int *size,
@@ -1514,7 +1562,7 @@ check_compat_entry_size_and_hooks(struct
 {
 	struct ipt_entry_target *t;
 	struct ipt_target *target;
-	u_int16_t entry_offset;
+	unsigned int entry_offset;
 	int ret, off, h, j;
 
 	duprintf("check_compat_entry_size_and_hooks %p\n", e);
@@ -1531,35 +1579,29 @@ check_compat_entry_size_and_hooks(struct
 		return -EINVAL;
 	}
 
-	if (!ip_checkentry(&e->ip)) {
-		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
-		return -EINVAL;
-	}
+	ret = check_entry(e, name);
+	if (ret)
+		return ret;
 
-	if (e->target_offset + sizeof(struct compat_xt_entry_target) >
-								e->next_offset)
-		return -EINVAL;
 
 	off = 0;
 	entry_offset = (void *)e - (void *)base;
 	j = 0;
-	ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
+	ret = IPT_MATCH_ITERATE(e, compat_find_calc_match, name, &e->ip,
 			e->comefrom, &off, &j);
 	if (ret != 0)
-		goto out;
+		goto release_matches;
 
 	t = ipt_get_target(e);
-	ret = -EINVAL;
-	if (e->target_offset + t->u.target_size > e->next_offset)
-		goto out;
 	target = try_then_request_module(xt_find_target(AF_INET,
 						     t->u.user.name,
 						     t->u.user.revision),
 					 "ipt_%s", t->u.user.name);
 	if (IS_ERR(target) || !target) {
-		duprintf("check_entry: `%s' not found\n", t->u.user.name);
+		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+							t->u.user.name);
 		ret = target ? PTR_ERR(target) : -ENOENT;
-		goto out;
+		goto release_matches;
 	}
 	t->u.kernel.target = target;
 
@@ -1587,39 +1629,22 @@ check_compat_entry_size_and_hooks(struct
 	(*i)++;
 	return 0;
 out:
-	IPT_MATCH_ITERATE(e, cleanup_match, &j);
+	module_put(t->u.kernel.target->me);
+release_matches:
+	IPT_MATCH_ITERATE(e, compat_release_match, &j);
 	return ret;
 }
 
 static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
-	void **dstptr, compat_uint_t *size, const char *name,
-	const struct ipt_ip *ip, unsigned int hookmask)
+	void **dstptr, compat_uint_t *size)
 {
-	struct ipt_entry_match *dm;
 	struct ipt_match *match;
-	int ret;
 
-	dm = (struct ipt_entry_match *)*dstptr;
 	match = m->u.kernel.match;
 	if (match->compat)
 		match->compat(m, dstptr, size, COMPAT_FROM_USER);
 	else
 		xt_compat_match(m, dstptr, size, COMPAT_FROM_USER);
-
-	ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm),
-			     name, hookmask, ip->proto,
-			     ip->invflags & IPT_INV_PROTO);
-	if (ret)
-		return ret;
-
-	if (m->u.kernel.match->checkentry
-	    && !m->u.kernel.match->checkentry(name, ip, match, dm->data,
-					      dm->u.match_size - sizeof(*dm),
-					      hookmask)) {
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 m->u.kernel.match->name);
-		return -EINVAL;
-	}
 	return 0;
 }
 
@@ -1639,10 +1664,9 @@ static int compat_copy_entry_from_user(s
 	memcpy(de, e, sizeof(struct ipt_entry));
 
 	*dstptr += sizeof(struct compat_ipt_entry);
-	ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
-			name, &de->ip, de->comefrom);
+	ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size);
 	if (ret)
-		goto out;
+		return ret;
 	de->target_offset = e->target_offset - (origsize - *size);
 	t = ipt_get_target(e);
 	target = t->u.kernel.target;
@@ -1658,29 +1682,28 @@ static int compat_copy_entry_from_user(s
 		if ((unsigned char *)de - base < newinfo->underflow[h])
 			newinfo->underflow[h] -= origsize - *size;
 	}
+	return ret;
+}
 
-	t = ipt_get_target(de);
-	target = t->u.kernel.target;
-	ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
-			      name, e->comefrom, e->ip.proto,
-			      e->ip.invflags & IPT_INV_PROTO);
+static inline int check_entry_data(struct ipt_entry *e, const char *name,
+					unsigned int *i)
+{
+	int j, ret;
+
+	j = 0;
+	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
+	if (ret != 0)
+		goto cleanup_matches;
+
+	ret = check_target(e, name);
 	if (ret)
-		goto out;
+		goto cleanup_matches;
 
-	ret = -EINVAL;
-	if (t->u.kernel.target == &ipt_standard_target) {
-		if (!standard_check(t, *size))
-			goto out;
-	} else if (t->u.kernel.target->checkentry
-		   && !t->u.kernel.target->checkentry(name, de, target,
-				t->data, t->u.target_size - sizeof(*t),
-				de->comefrom)) {
-		duprintf("ip_tables: compat: check failed for `%s'.\n",
-			 t->u.kernel.target->name);
-		goto out;
-	}
-	ret = 0;
-out:
+	(*i)++;
+	return 0;
+
+cleanup_matches:
+	IPT_MATCH_ITERATE(e, cleanup_match, &j);
 	return ret;
 }
 
@@ -1694,7 +1717,7 @@ translate_compat_table(const char *name,
 		unsigned int *hook_entries,
 		unsigned int *underflows)
 {
-	unsigned int i;
+	unsigned int i, j;
 	struct xt_table_info *newinfo, *info;
 	void *pos, *entry0, *entry1;
 	unsigned int size;
@@ -1731,18 +1754,18 @@ translate_compat_table(const char *name,
 	}
 
 	/* Check hooks all assigned */
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+	for (j = 0; j < NF_IP_NUMHOOKS; j++) {
 		/* Only hooks which are valid */
-		if (!(valid_hooks & (1 << i)))
+		if (!(valid_hooks & (1 << j)))
 			continue;
-		if (info->hook_entry[i] == 0xFFFFFFFF) {
+		if (info->hook_entry[j] == 0xFFFFFFFF) {
 			duprintf("Invalid hook entry %u %u\n",
-				 i, hook_entries[i]);
+				 j, hook_entries[j]);
 			goto out_unlock;
 		}
-		if (info->underflow[i] == 0xFFFFFFFF) {
+		if (info->underflow[j] == 0xFFFFFFFF) {
 			duprintf("Invalid underflow %u %u\n",
-				 i, underflows[i]);
+				 j, underflows[j]);
 			goto out_unlock;
 		}
 	}
@@ -1753,9 +1776,9 @@ translate_compat_table(const char *name,
 		goto out_unlock;
 
 	newinfo->number = number;
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
-		newinfo->hook_entry[i] = info->hook_entry[i];
-		newinfo->underflow[i] = info->underflow[i];
+	for (j = 0; j < NF_IP_NUMHOOKS; j++) {
+		newinfo->hook_entry[j] = info->hook_entry[j];
+		newinfo->underflow[j] = info->underflow[j];
 	}
 	entry1 = newinfo->entries[raw_smp_processor_id()];
 	pos = entry1;
@@ -1772,6 +1795,18 @@ translate_compat_table(const char *name,
 	if (!mark_source_chains(newinfo, valid_hooks, entry1))
 		goto free_newinfo;
 
+	j = 0;
+	ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, check_entry_data, name,
+							&j);
+	if (ret) {
+		i -= j;
+		IPT_ENTRY_ITERATE_CONTINUE(entry1, newinfo->size, j,
+						compat_release_entry, &i);
+		IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &j);
+		xt_free_table_info(newinfo);
+		return ret;
+	}
+
 	/* And one copy for every other CPU */
 	for_each_possible_cpu(i)
 		if (newinfo->entries[i] && newinfo->entries[i] != entry1)
@@ -1785,8 +1820,10 @@ translate_compat_table(const char *name,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
+	IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &i);
 	return ret;
 out_unlock:
+	compat_flush_offsets();
 	xt_compat_unlock(AF_INET);
 	goto out;
 }
@@ -1848,15 +1885,22 @@ compat_do_replace(void __user *user, uns
 	return ret;
 }
 
+static int do_ipt_set_ctl(struct sock *, int, void __user *, unsigned int);
+
 static int
 compat_do_ipt_set_ctl(struct sock *sk,	int cmd, void __user *user,
 		unsigned int len)
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET].next)
+		return -ENOENT;
+#endif
+
 	switch (cmd) {
 	case IPT_SO_SET_REPLACE:
 		ret = compat_do_replace(user, len);
@@ -1867,8 +1911,7 @@ compat_do_ipt_set_ctl(struct sock *sk,	i
 		break;
 
 	default:
-		duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
-		ret = -EINVAL;
+		ret = do_ipt_set_ctl(sk, cmd, user, len);
 	}
 
 	return ret;
@@ -2002,11 +2045,21 @@ compat_get_entries(struct compat_ipt_get
 	return ret;
 }
 
+static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
+
 static int
 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 {
 	int ret;
 
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
+		return -EPERM;
+
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET].next)
+		return -ENOENT;
+#endif
+
 	switch (cmd) {
 	case IPT_SO_GET_INFO:
 		ret = get_info(user, len, 1);
@@ -2015,8 +2068,7 @@ compat_do_ipt_get_ctl(struct sock *sk, i
 		ret = compat_get_entries(user, len);
 		break;
 	default:
-		duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd);
-		ret = -EINVAL;
+		ret = do_ipt_get_ctl(sk, cmd, user, len);
 	}
 	return ret;
 }
@@ -2027,9 +2079,14 @@ do_ipt_set_ctl(struct sock *sk,	int cmd,
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET].next)
+		return -ENOENT;
+#endif
+
 	switch (cmd) {
 	case IPT_SO_SET_REPLACE:
 		ret = do_replace(user, len);
@@ -2052,9 +2109,14 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET].next)
+		return -ENOENT;
+#endif
+
 	switch (cmd) {
 	case IPT_SO_GET_INFO:
 		ret = get_info(user, len, 0);
@@ -2099,17 +2161,18 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
 	return ret;
 }
 
-int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
+struct xt_table *ipt_register_table(struct xt_table *table,
+		const struct ipt_replace *repl)
 {
 	int ret;
 	struct xt_table_info *newinfo;
 	static struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+		= { 0, 0, 0, 0, { 0 }, { 0 }, { } };
 	void *loc_cpu_entry;
 
 	newinfo = xt_alloc_table_info(repl->size);
 	if (!newinfo)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* choose the copy on our node/cpu
 	 * but dont care of preemption
@@ -2124,28 +2187,30 @@ int ipt_register_table(struct xt_table *
 			      repl->underflow);
 	if (ret != 0) {
 		xt_free_table_info(newinfo);
-		return ret;
+		return ERR_PTR(ret);
 	}
 
-	ret = xt_register_table(table, &bootstrap, newinfo);
-	if (ret != 0) {
+	table = virt_xt_register_table(table, &bootstrap, newinfo);
+	if (IS_ERR(table))
 		xt_free_table_info(newinfo);
-		return ret;
-	}
 
-	return 0;
+	return table;
 }
 
 void ipt_unregister_table(struct ipt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
+	struct module *me;
 
- 	private = xt_unregister_table(table);
+	me = table->me;
+ 	private = virt_xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	if (private->number > private->initial_entries)
+		module_put(me);
 	xt_free_table_info(private);
 }
 
@@ -2250,12 +2315,30 @@ static struct ipt_match icmp_matchstruct
 	.checkentry	= icmp_checkentry,
 };
 
+static int init_iptables(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+	if (get_exec_env()->_xt_tables[AF_INET].next != NULL)
+		return -EEXIST;
+#endif
+
+	return xt_proto_init(AF_INET);
+}
+
+static void fini_iptables(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+	get_exec_env()->_xt_tables[AF_INET].next = NULL;
+#endif
+	xt_proto_fini(AF_INET);
+}
+
 static int __init ip_tables_init(void)
 {
 	int ret;
 
-	ret = xt_proto_init(AF_INET);
-	if (ret < 0)
+	ret = init_iptables();
+	if (ret)
 		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
@@ -2274,6 +2357,9 @@ static int __init ip_tables_init(void)
 	if (ret < 0)
 		goto err5;
 
+	KSYMRESOLVE(init_iptables);
+	KSYMRESOLVE(fini_iptables);
+	KSYMMODRESOLVE(ip_tables);
 	printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
 
@@ -2284,24 +2370,25 @@ err4:
 err3:
 	xt_unregister_target(&ipt_standard_target);
 err2:
-	xt_proto_fini(AF_INET);
+	fini_iptables();
 err1:
 	return ret;
 }
 
 static void __exit ip_tables_fini(void)
 {
+	KSYMMODUNRESOLVE(ip_tables);
+	KSYMUNRESOLVE(init_iptables);
+	KSYMUNRESOLVE(fini_iptables);
 	nf_unregister_sockopt(&ipt_sockopts);
-
 	xt_unregister_match(&icmp_matchstruct);
 	xt_unregister_target(&ipt_error_target);
 	xt_unregister_target(&ipt_standard_target);
-
-	xt_proto_fini(AF_INET);
+	fini_iptables();
 }
 
 EXPORT_SYMBOL(ipt_register_table);
 EXPORT_SYMBOL(ipt_unregister_table);
 EXPORT_SYMBOL(ipt_do_table);
-module_init(ip_tables_init);
+subsys_initcall(ip_tables_init);
 module_exit(ip_tables_fini);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_CLUSTERIP.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_CLUSTERIP.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_CLUSTERIP.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_CLUSTERIP.c	2017-01-13 08:40:40.000000000 -0500
@@ -283,7 +283,7 @@ clusterip_hashfn(struct sk_buff *skb, st
 	}
 
 	/* node numbers are 1..n, not 0..n */
-	return ((hashval % config->num_total_nodes)+1);
+	return (((u64)hashval * config->num_total_nodes) >> 32) + 1;
 }
 
 static inline int
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_ECN.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_ECN.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_ECN.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_ECN.c	2017-01-13 08:40:40.000000000 -0500
@@ -52,7 +52,7 @@ static inline int
 set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
 {
 	struct tcphdr _tcph, *tcph;
-	u_int16_t diffs[2];
+	u_int16_t oldval;
 
 	/* Not enought header? */
 	tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
@@ -70,22 +70,25 @@ set_ect_tcp(struct sk_buff **pskb, const
 		return 0;
 	tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
 
+#if defined(CONFIG_VE)
+	if ((*pskb)->ip_summed == CHECKSUM_HW && !inward)
+		(*pskb)->proto_csum_blank = 1;
+#else
 	if ((*pskb)->ip_summed == CHECKSUM_HW &&
 	    skb_checksum_help(*pskb, inward))
 		return 0;
+#endif
 
-	diffs[0] = ((u_int16_t *)tcph)[6];
+	oldval = ((u_int16_t *)tcph)[6];
 	if (einfo->operation & IPT_ECN_OP_SET_ECE)
 		tcph->ece = einfo->proto.tcp.ece;
 	if (einfo->operation & IPT_ECN_OP_SET_CWR)
 		tcph->cwr = einfo->proto.tcp.cwr;
-	diffs[1] = ((u_int16_t *)tcph)[6];
-	diffs[0] = diffs[0] ^ 0xFFFF;
 
-	if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY)
-		tcph->check = csum_fold(csum_partial((char *)diffs,
-						     sizeof(diffs),
-						     tcph->check^0xFFFF));
+	tcph->check = nf_proto_csum_update((*pskb),
+					   oldval ^ 0xFFFF,
+					   ((u_int16_t *)tcph)[6],
+					   tcph->check, 0);
 	return 1;
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_hashlimit.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_hashlimit.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_hashlimit.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_hashlimit.c	2017-01-13 08:40:23.000000000 -0500
@@ -31,6 +31,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/list.h>
+#include <linux/sched.h>
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_hashlimit.h>
@@ -44,7 +45,15 @@ MODULE_AUTHOR("Harald Welte <laforge@net
 MODULE_DESCRIPTION("iptables match for limiting per hash-bucket");
 
 /* need to declare this at the top */
+#if defined(CONFIG_VE_IPTABLES)
+#define hashlimit_procdir	\
+		(get_exec_env()->_ipt_hashlimit->hashlimit_procdir)
+#define hashlimit_htables	\
+		(get_exec_env()->_ipt_hashlimit->hashlimit_htables)
+#else
 static struct proc_dir_entry *hashlimit_procdir;
+static HLIST_HEAD(hashlimit_htables);
+#endif /* CONFIG_VE_IPTABLES */
 static struct file_operations dl_file_ops;
 
 /* hash table crap */
@@ -92,9 +101,12 @@ struct ipt_hashlimit_htable {
 
 static DEFINE_SPINLOCK(hashlimit_lock);	/* protects htables list */
 static DEFINE_MUTEX(hlimit_mutex);	/* additional checkentry protection */
-static HLIST_HEAD(hashlimit_htables);
+
 static kmem_cache_t *hashlimit_cachep __read_mostly;
 
+static int init_ipt_hashlimit(void);
+static void fini_ipt_hashlimit(void);
+
 static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b)
 {
 	return (ent->dst.dst_ip == b->dst_ip 
@@ -506,6 +518,9 @@ hashlimit_checkentry(const char *tablena
 	if (r->name[sizeof(r->name) - 1] != '\0')
 		return 0;
 
+	if (init_ipt_hashlimit())
+		return 0;
+
 	/* This is the best we've got: We cannot release and re-grab lock,
 	 * since checkentry() is called before ip_tables.c grabs ipt_mutex.  
 	 * We also cannot grab the hashtable spinlock, since htable_create will 
@@ -533,13 +548,99 @@ hashlimit_destroy(const struct xt_match 
 	struct ipt_hashlimit_info *r = matchinfo;
 
 	htable_put(r->hinfo);
+	if (!ve_is_super(get_exec_env()) && hlist_empty(&hashlimit_htables))
+		fini_ipt_hashlimit();
+}
+
+#ifdef CONFIG_COMPAT
+static int hashlimit_compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct xt_entry_match *pm;
+	struct ipt_hashlimit_info *pinfo;
+	struct compat_ipt_hashlimit_info rinfo;
+	u_int16_t msize;
+
+	pm = (struct xt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct compat_xt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct ipt_hashlimit_info *)pm->data;
+	memset(&rinfo, 0, sizeof(struct compat_ipt_hashlimit_info));
+	memcpy(&rinfo, pinfo, offsetof(struct compat_ipt_hashlimit_info, hinfo));
+	if (__copy_to_user(*dstptr + sizeof(struct compat_xt_entry_match),
+				&rinfo, sizeof(struct compat_ipt_hashlimit_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int hashlimit_compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_xt_entry_match *pm;
+	struct xt_entry_match *dstpm;
+	struct compat_ipt_hashlimit_info *pinfo;
+	struct ipt_hashlimit_info rinfo;
+	u_int16_t msize;
+
+	pm = (struct compat_xt_entry_match *)match;
+	dstpm = (struct xt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memset(*dstptr, 0, sizeof(struct xt_entry_match));
+	memcpy(*dstptr, pm, sizeof(struct compat_xt_entry_match));
+	pinfo = (struct compat_ipt_hashlimit_info *)pm->data;
+	memset(&rinfo, 0, sizeof(struct ipt_hashlimit_info));
+	memcpy(&rinfo, pinfo, offsetof(struct compat_ipt_hashlimit_info, hinfo));
+	memcpy(*dstptr + sizeof(struct xt_entry_match), &rinfo,
+		sizeof(struct ipt_hashlimit_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int hashlimit_compat(void *match, void **dstptr,
+		int *size, int convert)
+{
+	int ret, off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_hashlimit_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_hashlimit_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = hashlimit_compat_to_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = hashlimit_compat_from_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
 }
+#endif /*CONFIG_COMPAT*/
 
 static struct ipt_match ipt_hashlimit = {
 	.name		= "hashlimit",
 	.match		= hashlimit_match,
 	.matchsize	= sizeof(struct ipt_hashlimit_info),
 	.checkentry	= hashlimit_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= hashlimit_compat,
+#endif
 	.destroy	= hashlimit_destroy,
 	.me		= THIS_MODULE
 };
@@ -648,6 +749,51 @@ static struct file_operations dl_file_op
 	.release = seq_release
 };
 
+static int init_ipt_hashlimit(void)
+{
+	struct ve_struct *env;
+
+	env = get_exec_env();
+#if defined(CONFIG_VE_IPTABLES)
+	if (env->_ipt_hashlimit)
+		return 0;
+
+	env->_ipt_hashlimit = kmalloc(sizeof(struct ve_ipt_hashlimit), GFP_KERNEL);
+	if (!env->_ipt_hashlimit)
+		return -ENOMEM;
+
+	memset(env->_ipt_hashlimit, 0, sizeof(struct ve_ipt_hashlimit));
+#endif
+
+	INIT_HLIST_HEAD(&hashlimit_htables);
+
+	hashlimit_procdir = proc_mkdir("ipt_hashlimit", proc_net);
+	if (!hashlimit_procdir)
+		goto out_mem;
+
+	return 0;
+
+out_mem:
+#if defined(CONFIG_VE_IPTABLES)
+	kfree(env->_ipt_hashlimit);
+	env->_ipt_hashlimit = NULL;
+#endif
+	return -ENOMEM;
+}
+
+static void fini_ipt_hashlimit(void)
+{
+	struct ve_struct *env;
+	env = get_exec_env();
+
+	remove_proc_entry("ipt_hashlimit", proc_net);
+
+#if defined(CONFIG_VE_IPTABLES)
+	kfree(env->_ipt_hashlimit);
+	env->_ipt_hashlimit = NULL;
+#endif
+}
+
 static int init_or_fini(int fini)
 {
 	int ret = 0;
@@ -669,17 +815,14 @@ static int init_or_fini(int fini)
 		goto cleanup_unreg_match;
 	}
 
-	hashlimit_procdir = proc_mkdir("ipt_hashlimit", proc_net);
-	if (!hashlimit_procdir) {
-		printk(KERN_ERR "Unable to create proc dir entry\n");
-		ret = -ENOMEM;
+	ret = init_ipt_hashlimit();
+	if (ret)
 		goto cleanup_free_slab;
-	}
 
 	return ret;
 
 cleanup:
-	remove_proc_entry("ipt_hashlimit", proc_net);
+	fini_ipt_hashlimit();
 cleanup_free_slab:
 	kmem_cache_destroy(hashlimit_cachep);
 cleanup_unreg_match:
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_LOG.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_LOG.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_LOG.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_LOG.c	2017-01-13 08:40:23.000000000 -0500
@@ -51,32 +51,32 @@ static void dump_packet(const struct nf_
 
 	ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
 	if (ih == NULL) {
-		printk("TRUNCATED");
+		ve_printk(VE_LOG, "TRUNCATED");
 		return;
 	}
 
 	/* Important fields:
 	 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
 	/* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
-	printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
+	ve_printk(VE_LOG, "SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
 	       NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
 
 	/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
-	printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+	ve_printk(VE_LOG, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
 	       ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
 	       ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
 
 	/* Max length: 6 "CE DF MF " */
 	if (ntohs(ih->frag_off) & IP_CE)
-		printk("CE ");
+		ve_printk(VE_LOG, "CE ");
 	if (ntohs(ih->frag_off) & IP_DF)
-		printk("DF ");
+		ve_printk(VE_LOG, "DF ");
 	if (ntohs(ih->frag_off) & IP_MF)
-		printk("MF ");
+		ve_printk(VE_LOG, "MF ");
 
 	/* Max length: 11 "FRAG:65535 " */
 	if (ntohs(ih->frag_off) & IP_OFFSET)
-		printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+		ve_printk(VE_LOG, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
 
 	if ((logflags & IPT_LOG_IPOPT)
 	    && ih->ihl * 4 > sizeof(struct iphdr)) {
@@ -87,15 +87,15 @@ static void dump_packet(const struct nf_
 		op = skb_header_pointer(skb, iphoff+sizeof(_iph),
 					optsize, _opt);
 		if (op == NULL) {
-			printk("TRUNCATED");
+			ve_printk(VE_LOG, "TRUNCATED");
 			return;
 		}
 
 		/* Max length: 127 "OPT (" 15*4*2chars ") " */
-		printk("OPT (");
+		ve_printk(VE_LOG, "OPT (");
 		for (i = 0; i < optsize; i++)
-			printk("%02X", op[i]);
-		printk(") ");
+			ve_printk(VE_LOG, "%02X", op[i]);
+		ve_printk(VE_LOG, ") ");
 	}
 
 	switch (ih->protocol) {
@@ -103,7 +103,7 @@ static void dump_packet(const struct nf_
 		struct tcphdr _tcph, *th;
 
 		/* Max length: 10 "PROTO=TCP " */
-		printk("PROTO=TCP ");
+		ve_printk(VE_LOG, "PROTO=TCP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -112,41 +112,41 @@ static void dump_packet(const struct nf_
 		th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
 					sizeof(_tcph), &_tcph);
 		if (th == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		printk("SPT=%u DPT=%u ",
+		ve_printk(VE_LOG, "SPT=%u DPT=%u ",
 		       ntohs(th->source), ntohs(th->dest));
 		/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
 		if (logflags & IPT_LOG_TCPSEQ)
-			printk("SEQ=%u ACK=%u ",
+			ve_printk(VE_LOG, "SEQ=%u ACK=%u ",
 			       ntohl(th->seq), ntohl(th->ack_seq));
 		/* Max length: 13 "WINDOW=65535 " */
-		printk("WINDOW=%u ", ntohs(th->window));
+		ve_printk(VE_LOG, "WINDOW=%u ", ntohs(th->window));
 		/* Max length: 9 "RES=0x3F " */
-		printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+		ve_printk(VE_LOG, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
 		/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
 		if (th->cwr)
-			printk("CWR ");
+			ve_printk(VE_LOG, "CWR ");
 		if (th->ece)
-			printk("ECE ");
+			ve_printk(VE_LOG, "ECE ");
 		if (th->urg)
-			printk("URG ");
+			ve_printk(VE_LOG, "URG ");
 		if (th->ack)
-			printk("ACK ");
+			ve_printk(VE_LOG, "ACK ");
 		if (th->psh)
-			printk("PSH ");
+			ve_printk(VE_LOG, "PSH ");
 		if (th->rst)
-			printk("RST ");
+			ve_printk(VE_LOG, "RST ");
 		if (th->syn)
-			printk("SYN ");
+			ve_printk(VE_LOG, "SYN ");
 		if (th->fin)
-			printk("FIN ");
+			ve_printk(VE_LOG, "FIN ");
 		/* Max length: 11 "URGP=65535 " */
-		printk("URGP=%u ", ntohs(th->urg_ptr));
+		ve_printk(VE_LOG, "URGP=%u ", ntohs(th->urg_ptr));
 
 		if ((logflags & IPT_LOG_TCPOPT)
 		    && th->doff * 4 > sizeof(struct tcphdr)) {
@@ -159,15 +159,15 @@ static void dump_packet(const struct nf_
 						iphoff+ih->ihl*4+sizeof(_tcph),
 						optsize, _opt);
 			if (op == NULL) {
-				printk("TRUNCATED");
+				ve_printk(VE_LOG, "TRUNCATED");
 				return;
 			}
 
 			/* Max length: 127 "OPT (" 15*4*2chars ") " */
-			printk("OPT (");
+			ve_printk(VE_LOG, "OPT (");
 			for (i = 0; i < optsize; i++)
-				printk("%02X", op[i]);
-			printk(") ");
+				ve_printk(VE_LOG, "%02X", op[i]);
+			ve_printk(VE_LOG, ") ");
 		}
 		break;
 	}
@@ -175,7 +175,7 @@ static void dump_packet(const struct nf_
 		struct udphdr _udph, *uh;
 
 		/* Max length: 10 "PROTO=UDP " */
-		printk("PROTO=UDP ");
+		ve_printk(VE_LOG, "PROTO=UDP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -184,13 +184,13 @@ static void dump_packet(const struct nf_
 		uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
 					sizeof(_udph), &_udph);
 		if (uh == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		printk("SPT=%u DPT=%u LEN=%u ",
+		ve_printk(VE_LOG, "SPT=%u DPT=%u LEN=%u ",
 		       ntohs(uh->source), ntohs(uh->dest),
 		       ntohs(uh->len));
 		break;
@@ -216,7 +216,7 @@ static void dump_packet(const struct nf_
 			    [ICMP_ADDRESSREPLY] = 12 };
 
 		/* Max length: 11 "PROTO=ICMP " */
-		printk("PROTO=ICMP ");
+		ve_printk(VE_LOG, "PROTO=ICMP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -225,19 +225,19 @@ static void dump_packet(const struct nf_
 		ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
 					 sizeof(_icmph), &_icmph);
 		if (ich == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Max length: 18 "TYPE=255 CODE=255 " */
-		printk("TYPE=%u CODE=%u ", ich->type, ich->code);
+		ve_printk(VE_LOG, "TYPE=%u CODE=%u ", ich->type, ich->code);
 
 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
 		if (ich->type <= NR_ICMP_TYPES
 		    && required_len[ich->type]
 		    && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
@@ -246,19 +246,19 @@ static void dump_packet(const struct nf_
 		case ICMP_ECHOREPLY:
 		case ICMP_ECHO:
 			/* Max length: 19 "ID=65535 SEQ=65535 " */
-			printk("ID=%u SEQ=%u ",
+			ve_printk(VE_LOG, "ID=%u SEQ=%u ",
 			       ntohs(ich->un.echo.id),
 			       ntohs(ich->un.echo.sequence));
 			break;
 
 		case ICMP_PARAMETERPROB:
 			/* Max length: 14 "PARAMETER=255 " */
-			printk("PARAMETER=%u ",
+			ve_printk(VE_LOG, "PARAMETER=%u ",
 			       ntohl(ich->un.gateway) >> 24);
 			break;
 		case ICMP_REDIRECT:
 			/* Max length: 24 "GATEWAY=255.255.255.255 " */
-			printk("GATEWAY=%u.%u.%u.%u ",
+			ve_printk(VE_LOG, "GATEWAY=%u.%u.%u.%u ",
 			       NIPQUAD(ich->un.gateway));
 			/* Fall through */
 		case ICMP_DEST_UNREACH:
@@ -266,16 +266,16 @@ static void dump_packet(const struct nf_
 		case ICMP_TIME_EXCEEDED:
 			/* Max length: 3+maxlen */
 			if (!iphoff) { /* Only recurse once. */
-				printk("[");
+				ve_printk(VE_LOG, "[");
 				dump_packet(info, skb,
 					    iphoff + ih->ihl*4+sizeof(_icmph));
-				printk("] ");
+				ve_printk(VE_LOG, "] ");
 			}
 
 			/* Max length: 10 "MTU=65535 " */
 			if (ich->type == ICMP_DEST_UNREACH
 			    && ich->code == ICMP_FRAG_NEEDED)
-				printk("MTU=%u ", ntohs(ich->un.frag.mtu));
+				ve_printk(VE_LOG, "MTU=%u ", ntohs(ich->un.frag.mtu));
 		}
 		break;
 	}
@@ -287,26 +287,26 @@ static void dump_packet(const struct nf_
 			break;
 		
 		/* Max length: 9 "PROTO=AH " */
-		printk("PROTO=AH ");
+		ve_printk(VE_LOG, "PROTO=AH ");
 
 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
 		ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
 					sizeof(_ahdr), &_ahdr);
 		if (ah == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Length: 15 "SPI=0xF1234567 " */
-		printk("SPI=0x%x ", ntohl(ah->spi));
+		ve_printk(VE_LOG, "SPI=0x%x ", ntohl(ah->spi));
 		break;
 	}
 	case IPPROTO_ESP: {
 		struct ip_esp_hdr _esph, *eh;
 
 		/* Max length: 10 "PROTO=ESP " */
-		printk("PROTO=ESP ");
+		ve_printk(VE_LOG, "PROTO=ESP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -315,25 +315,25 @@ static void dump_packet(const struct nf_
 		eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
 					sizeof(_esph), &_esph);
 		if (eh == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Length: 15 "SPI=0xF1234567 " */
-		printk("SPI=0x%x ", ntohl(eh->spi));
+		ve_printk(VE_LOG, "SPI=0x%x ", ntohl(eh->spi));
 		break;
 	}
 	/* Max length: 10 "PROTO 255 " */
 	default:
-		printk("PROTO=%u ", ih->protocol);
+		ve_printk(VE_LOG, "PROTO=%u ", ih->protocol);
 	}
 
 	/* Max length: 15 "UID=4294967295 " */
  	if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
 		read_lock_bh(&skb->sk->sk_callback_lock);
 		if (skb->sk->sk_socket && skb->sk->sk_socket->file)
- 			printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
+ 			ve_printk(VE_LOG, "UID=%u ", skb->sk->sk_socket->file->f_uid);
 		read_unlock_bh(&skb->sk->sk_callback_lock);
 	}
 
@@ -374,7 +374,7 @@ ipt_log_packet(unsigned int pf,
 		loginfo = &default_loginfo;
 
 	spin_lock_bh(&log_lock);
-	printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
+	ve_printk(VE_LOG, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
 	       prefix,
 	       in ? in->name : "",
 	       out ? out->name : "");
@@ -384,29 +384,29 @@ ipt_log_packet(unsigned int pf,
 		struct net_device *physoutdev = skb->nf_bridge->physoutdev;
 
 		if (physindev && in != physindev)
-			printk("PHYSIN=%s ", physindev->name);
+			ve_printk(VE_LOG, "PHYSIN=%s ", physindev->name);
 		if (physoutdev && out != physoutdev)
-			printk("PHYSOUT=%s ", physoutdev->name);
+			ve_printk(VE_LOG, "PHYSOUT=%s ", physoutdev->name);
 	}
 #endif
 
 	if (in && !out) {
 		/* MAC logging for input chain only. */
-		printk("MAC=");
+		ve_printk(VE_LOG, "MAC=");
 		if (skb->dev && skb->dev->hard_header_len
 		    && skb->mac.raw != (void*)skb->nh.iph) {
 			int i;
 			unsigned char *p = skb->mac.raw;
 			for (i = 0; i < skb->dev->hard_header_len; i++,p++)
-				printk("%02x%c", *p,
+				ve_printk(VE_LOG, "%02x%c", *p,
 				       i==skb->dev->hard_header_len - 1
 				       ? ' ':':');
 		} else
-			printk(" ");
+			ve_printk(VE_LOG, " ");
 	}
 
 	dump_packet(loginfo, skb, 0);
-	printk("\n");
+	ve_printk(VE_LOG, "\n");
 	spin_unlock_bh(&log_lock);
 }
 
@@ -481,7 +481,7 @@ static int __init ipt_log_init(void)
 		/* we cannot make module load fail here, since otherwise
 		 * iptables userspace would abort */
 	}
-	
+
 	return 0;
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_MASQUERADE.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_MASQUERADE.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_MASQUERADE.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_MASQUERADE.c	2017-01-13 08:40:23.000000000 -0500
@@ -108,6 +108,7 @@ masquerade_target(struct sk_buff **pskb,
 	return ip_nat_setup_info(ct, &newrange, hooknum);
 }
 
+#if 0
 static inline int
 device_cmp(struct ip_conntrack *i, void *ifindex)
 {
@@ -163,6 +164,7 @@ static struct notifier_block masq_dev_no
 static struct notifier_block masq_inet_notifier = {
 	.notifier_call	= masq_inet_event,
 };
+#endif
 
 static struct ipt_target masquerade = {
 	.name		= "MASQUERADE",
@@ -180,12 +182,16 @@ static int __init ipt_masquerade_init(vo
 
 	ret = ipt_register_target(&masquerade);
 
+#if 0
+/*	These notifiers are unnecessary and may
+	lead to oops in virtual environments */
 	if (ret == 0) {
 		/* Register for device down reports */
 		register_netdevice_notifier(&masq_dev_notifier);
 		/* Register IP address change reports */
 		register_inetaddr_notifier(&masq_inet_notifier);
 	}
+#endif
 
 	return ret;
 }
@@ -193,8 +199,8 @@ static int __init ipt_masquerade_init(vo
 static void __exit ipt_masquerade_fini(void)
 {
 	ipt_unregister_target(&masquerade);
-	unregister_netdevice_notifier(&masq_dev_notifier);
-	unregister_inetaddr_notifier(&masq_inet_notifier);	
+/*	unregister_netdevice_notifier(&masq_dev_notifier);
+	unregister_inetaddr_notifier(&masq_inet_notifier);	*/
 }
 
 module_init(ipt_masquerade_init);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_recent.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_recent.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_recent.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_recent.c	2017-01-13 08:40:23.000000000 -0500
@@ -45,6 +45,18 @@ MODULE_PARM_DESC(ip_pkt_list_tot, "numbe
 MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
 MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
 
+#include <linux/sched.h>
+
+#if defined(CONFIG_VE_IPTABLES)
+#define tables		(get_exec_env()->_ipt_recent->tables)
+#define proc_dir	(get_exec_env()->_ipt_recent->proc_dir)
+#else
+static LIST_HEAD(tables);
+static struct proc_dir_entry	*proc_dir;
+#endif /* CONFIG_VE_IPTABLES */
+
+static int init_ipt_recent(void);
+static void fini_ipt_recent(void);
 
 struct recent_entry {
 	struct list_head	list;
@@ -68,12 +80,10 @@ struct recent_table {
 	struct list_head	iphash[0];
 };
 
-static LIST_HEAD(tables);
 static DEFINE_SPINLOCK(recent_lock);
 static DEFINE_MUTEX(recent_mutex);
 
 #ifdef CONFIG_PROC_FS
-static struct proc_dir_entry	*proc_dir;
 static struct file_operations	recent_fops;
 #endif
 
@@ -262,6 +272,9 @@ ipt_recent_checkentry(const char *tablen
 	    strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN)
 		return 0;
 
+	if (init_ipt_recent())
+		return 0;
+
 	mutex_lock(&recent_mutex);
 	t = recent_table_lookup(info->name);
 	if (t != NULL) {
@@ -306,6 +319,13 @@ ipt_recent_destroy(const struct xt_match
 {
 	const struct ipt_recent_info *info = matchinfo;
 	struct recent_table *t;
+	struct ve_struct *env;
+
+	env = get_exec_env();
+#ifdef CONFIG_VE_IPTABLES
+	if (!env->_ipt_recent)
+		return;
+#endif
 
 	mutex_lock(&recent_mutex);
 	t = recent_table_lookup(info->name);
@@ -320,6 +340,8 @@ ipt_recent_destroy(const struct xt_match
 		recent_table_free(t);
 	}
 	mutex_unlock(&recent_mutex);
+	if ((!ve_is_super(env)) && list_empty(&tables))
+		fini_ipt_recent();
 }
 
 #ifdef CONFIG_PROC_FS
@@ -480,6 +502,59 @@ static struct ipt_match recent_match = {
 	.me		= THIS_MODULE,
 };
 
+static int init_ipt_recent(void)
+{
+	struct ve_struct *env;
+	int err = 0;
+
+	env = get_exec_env();
+#ifdef CONFIG_VE_IPTABLES
+	if (env->_ipt_recent)
+		return 0;
+
+	env->_ipt_recent =
+		kmalloc(sizeof(struct ve_ipt_recent), GFP_KERNEL);
+	if (!env->_ipt_recent) {
+		err = -ENOMEM;
+		goto out;
+	}
+	memset(env->_ipt_recent, 0, sizeof(struct ve_ipt_recent));
+
+	INIT_LIST_HEAD(&tables);
+#endif
+#ifdef CONFIG_PROC_FS
+	if (err)
+		return err;
+	proc_dir = proc_mkdir("ipt_recent", proc_net);
+	if (proc_dir == NULL) {
+		err = -ENOMEM;
+		goto out_mem;
+	}
+#endif
+out:
+	return err;
+out_mem:
+#ifdef CONFIG_VE_IPTABLES
+	kfree(env->_ipt_recent);
+#endif
+	goto out;
+}
+
+static void fini_ipt_recent(void)
+{
+	struct ve_struct *env;
+
+	env = get_exec_env();
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("ipt_recent", proc_net);
+#endif
+#ifdef CONFIG_VE_IPTABLES
+	if (env->_ipt_recent)
+		kfree(env->_ipt_recent);
+	env->_ipt_recent = NULL;
+#endif
+}
+
 static int __init ipt_recent_init(void)
 {
 	int err;
@@ -489,25 +564,24 @@ static int __init ipt_recent_init(void)
 	ip_list_hash_size = 1 << fls(ip_list_tot);
 
 	err = ipt_register_match(&recent_match);
-#ifdef CONFIG_PROC_FS
 	if (err)
 		return err;
-	proc_dir = proc_mkdir("ipt_recent", proc_net);
-	if (proc_dir == NULL) {
+
+	err = init_ipt_recent();
+	if (err) {
 		ipt_unregister_match(&recent_match);
-		err = -ENOMEM;
+		return err;
 	}
-#endif
-	return err;
+
+	return 0;
 }
 
 static void __exit ipt_recent_exit(void)
 {
 	BUG_ON(!list_empty(&tables));
+
+	fini_ipt_recent();
 	ipt_unregister_match(&recent_match);
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("ipt_recent", proc_net);
-#endif
 }
 
 module_init(ipt_recent_init);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_REDIRECT.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_REDIRECT.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_REDIRECT.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_REDIRECT.c	2017-01-13 08:40:23.000000000 -0500
@@ -84,8 +84,14 @@ redirect_target(struct sk_buff **pskb,
 		
 		rcu_read_lock();
 		indev = __in_dev_get_rcu((*pskb)->dev);
-		if (indev && (ifa = indev->ifa_list))
+		if (indev && (ifa = indev->ifa_list)) {
+			/* because of venet device specific, we should use
+			 * second ifa in the list */
+			if (IN_LOOPBACK(ntohl(ifa->ifa_local)) &&
+					ifa->ifa_next)
+				ifa = ifa->ifa_next;
 			newdst = ifa->ifa_local;
+		}
 		rcu_read_unlock();
 
 		if (!newdst)
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_REJECT.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_REJECT.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_REJECT.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_REJECT.c	2017-01-13 08:40:40.000000000 -0500
@@ -168,6 +168,8 @@ static void send_reset(struct sk_buff *o
 		tcph->ack = 1;
 	}
 
+	skb_reset_proto_csum(nskb);
+	nskb->ip_summed = CHECKSUM_NONE;
 	tcph->rst	= 1;
 	tcph->check	= tcp_v4_check(tcph, sizeof(struct tcphdr),
 				       niph->saddr, niph->daddr,
@@ -274,7 +276,7 @@ static int check(const char *tablename,
 	const struct ipt_entry *e = e_void;
 
 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
-		printk("REJECT: ECHOREPLY no longer supported.\n");
+		ve_printk(VE_LOG, "REJECT: ECHOREPLY no longer supported.\n");
 		return 0;
 	} else if (rejinfo->with == IPT_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_TCPMSS.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_TCPMSS.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_TCPMSS.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_TCPMSS.c	2017-01-13 08:40:40.000000000 -0500
@@ -27,14 +27,6 @@ MODULE_DESCRIPTION("iptables TCP MSS mod
 #define DEBUGP(format, args...)
 #endif
 
-static u_int16_t
-cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
-	u_int32_t diffs[] = { oldvalinv, newval };
-	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
-                                      oldcheck^0xFFFF));
-}
-
 static inline unsigned int
 optlen(const u_int8_t *opt, unsigned int offset)
 {
@@ -62,9 +54,14 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return NF_DROP;
 
+#if defined(CONFIG_VE)
+	if ((*pskb)->ip_summed == CHECKSUM_HW && out != NULL)
+		(*pskb)->proto_csum_blank = 1;
+#else
 	if ((*pskb)->ip_summed == CHECKSUM_HW &&
 	    skb_checksum_help(*pskb, out == NULL))
 		return NF_DROP;
+#endif
 
 	iph = (*pskb)->nh.iph;
 	tcplen = (*pskb)->len - iph->ihl*4;
@@ -119,9 +116,10 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 			opt[i+2] = (newmss & 0xff00) >> 8;
 			opt[i+3] = (newmss & 0x00ff);
 
-			tcph->check = cheat_check(htons(oldmss)^0xFFFF,
-						  htons(newmss),
-						  tcph->check);
+			tcph->check = nf_proto_csum_update(*pskb,
+							   htons(oldmss)^0xFFFF,
+							   htons(newmss),
+							   tcph->check, 0);
 
 			DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
 			       "->%u.%u.%u.%u:%hu changed TCP MSS option"
@@ -161,8 +159,10 @@ ipt_tcpmss_target(struct sk_buff **pskb,
  	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
 	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
 
-	tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF,
-				  htons(tcplen + TCPOLEN_MSS), tcph->check);
+	tcph->check = nf_proto_csum_update(*pskb,
+					   htons(tcplen) ^ 0xFFFF,
+					   htons(tcplen + TCPOLEN_MSS),
+					   tcph->check, 1);
 	tcplen += TCPOLEN_MSS;
 
 	opt[0] = TCPOPT_MSS;
@@ -170,16 +170,19 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	opt[2] = (newmss & 0xff00) >> 8;
 	opt[3] = (newmss & 0x00ff);
 
-	tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check);
+	tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt),
+					   tcph->check, 0);
 
 	oldval = ((u_int16_t *)tcph)[6];
 	tcph->doff += TCPOLEN_MSS/4;
-	tcph->check = cheat_check(oldval ^ 0xFFFF,
-				  ((u_int16_t *)tcph)[6], tcph->check);
+	tcph->check = nf_proto_csum_update(*pskb,
+					   oldval ^ 0xFFFF,
+					   ((u_int16_t *)tcph)[6],
+					   tcph->check, 0);
 
 	newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
-	iph->check = cheat_check(iph->tot_len ^ 0xFFFF,
-				 newtotlen, iph->check);
+	iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF,
+				    newtotlen, iph->check);
 	iph->tot_len = newtotlen;
 
 	DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
@@ -224,13 +227,14 @@ ipt_tcpmss_checkentry(const char *tablen
 			((hook_mask & ~((1 << NF_IP_FORWARD)
 			   	| (1 << NF_IP_LOCAL_OUT)
 			   	| (1 << NF_IP_POST_ROUTING))) != 0)) {
-		printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+		ve_printk(VE_LOG, "TCPMSS: path-MTU clamping only supported in"
+				" FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return 0;
 	}
 
 	if (IPT_MATCH_ITERATE(e, find_syn_match))
 		return 1;
-	printk("TCPMSS: Only works on TCP SYN packets\n");
+	ve_printk(VE_LOG, "TCPMSS: Only works on TCP SYN packets\n");
 	return 0;
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_TOS.c kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_TOS.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/netfilter/ipt_TOS.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/netfilter/ipt_TOS.c	2017-01-13 08:40:23.000000000 -0500
@@ -66,7 +66,7 @@ checkentry(const char *tablename,
 	    && tos != IPTOS_RELIABILITY
 	    && tos != IPTOS_MINCOST
 	    && tos != IPTOS_NORMALSVC) {
-		printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
+		ve_printk(VE_LOG, KERN_WARNING "TOS: bad tos value %#x\n", tos);
 		return 0;
 	}
 	return 1;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/proc.c kernel-2.6.18-417.el5-028stab121/net/ipv4/proc.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/proc.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/proc.c	2017-01-13 08:40:22.000000000 -0500
@@ -60,6 +60,9 @@ static int fold_prot_inuse(struct proto 
  */
 static int sockstat_seq_show(struct seq_file *seq, void *v)
 {
+	if (!ve_is_super(get_exec_env()))
+		return 0;
+
 	socket_seq_show(seq);
 	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
 		   fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
@@ -281,7 +284,7 @@ static void icmpmsg_put(struct seq_file 
 
 	count = 0;
 	for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
-		val = fold_field((void **) icmpmsg_statistics, i);
+		val = fold_field((void **) ve_icmpmsg_statistics, i);
 		if (val) {
 			type[count] = i;
 			vals[count++] = val;
@@ -307,18 +310,18 @@ static void icmp_put(struct seq_file *se
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " Out%s", icmpmibmap[i].name);
 	seq_printf(seq, "\nIcmp: %lu %lu",
-		fold_field((void **) icmp_statistics, ICMP_MIB_INMSGS),
-		fold_field((void **) icmp_statistics, ICMP_MIB_INERRORS));
+		fold_field((void **) ve_icmp_statistics, ICMP_MIB_INMSGS),
+		fold_field((void **) ve_icmp_statistics, ICMP_MIB_INERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			fold_field((void **) icmpmsg_statistics,
+			fold_field((void **) ve_icmpmsg_statistics,
 				icmpmibmap[i].index));
 	seq_printf(seq, " %lu %lu",
-		fold_field((void **) icmp_statistics, ICMP_MIB_OUTMSGS),
-		fold_field((void **) icmp_statistics, ICMP_MIB_OUTERRORS));
+		fold_field((void **) ve_icmp_statistics, ICMP_MIB_OUTMSGS),
+		fold_field((void **) ve_icmp_statistics, ICMP_MIB_OUTERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			fold_field((void **) icmpmsg_statistics,
+			fold_field((void **) ve_icmpmsg_statistics,
 				icmpmibmap[i].index | 0x100 ));
 }
 
@@ -335,11 +338,12 @@ static int snmp_seq_show(struct seq_file
 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
 
 	seq_printf(seq, "\nIp: %d %d",
-			ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
+			ve_ipv4_devconf.forwarding ? 1 : 2,
+			sysctl_ip_default_ttl);
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) ip_statistics, 
+			   fold_field((void **) ve_ip_statistics, 
 				      snmp4_ipstats_list[i].entry));
 
 	icmp_put(seq);  /* RFC 2011 compatibility */
@@ -354,11 +358,11 @@ static int snmp_seq_show(struct seq_file
 		/* MaxConn field is signed, RFC 2012 */
 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
 			seq_printf(seq, " %ld",
-				   fold_field((void **) tcp_statistics, 
+				   fold_field((void **) ve_tcp_statistics, 
 					      snmp4_tcp_list[i].entry));
 		else
 			seq_printf(seq, " %lu",
-				   fold_field((void **) tcp_statistics,
+				   fold_field((void **) ve_tcp_statistics,
 					      snmp4_tcp_list[i].entry));
 	}
 
@@ -369,7 +373,7 @@ static int snmp_seq_show(struct seq_file
 	seq_puts(seq, "\nUdp:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) udp_statistics, 
+			   fold_field((void **) ve_udp_statistics, 
 				      snmp4_udp_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -405,7 +409,7 @@ static int netstat_seq_show(struct seq_f
 	seq_puts(seq, "\nTcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) net_statistics, 
+			   fold_field((void **) ve_net_statistics, 
 				      snmp4_net_list[i].entry));
 
 	seq_puts(seq, "\nIpExt:");
@@ -415,7 +419,7 @@ static int netstat_seq_show(struct seq_f
 	seq_puts(seq, "\nIpExt:");
 	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **)ip_statistics,
+			   fold_field((void **)ve_ip_statistics,
 					   snmp4_ipextstats_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -439,20 +443,20 @@ int __init ip_misc_proc_init(void)
 {
 	int rc = 0;
 
-	if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops))
+	if (!proc_glob_fops_create("net/netstat", S_IRUGO, &netstat_seq_fops))
 		goto out_netstat;
 
-	if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops))
+	if (!proc_glob_fops_create("net/snmp", S_IRUGO, &snmp_seq_fops))
 		goto out_snmp;
 
-	if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops))
+	if (!proc_glob_fops_create("net/sockstat", S_IRUGO, &sockstat_seq_fops))
 		goto out_sockstat;
 out:
 	return rc;
 out_sockstat:
-	proc_net_remove("snmp");
+	remove_proc_glob_entry("net/snmp", NULL);
 out_snmp:
-	proc_net_remove("netstat");
+	remove_proc_glob_entry("net/netstat", NULL);
 out_netstat:
 	rc = -ENOMEM;
 	goto out;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/raw.c kernel-2.6.18-417.el5-028stab121/net/ipv4/raw.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/raw.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/raw.c	2017-01-13 08:40:21.000000000 -0500
@@ -113,7 +113,8 @@ struct sock *__raw_v4_lookup(struct sock
 		if (inet->num == num 					&&
 		    !(inet->daddr && inet->daddr != raddr) 		&&
 		    !(inet->rcv_saddr && inet->rcv_saddr != laddr)	&&
-		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) &&
+		    ve_accessible_strict(sk->owner_env, get_exec_env()))
 			goto found; /* gotcha */
 	}
 	sk = NULL;
@@ -568,6 +569,14 @@ static void raw_close(struct sock *sk, l
 	sk_common_release(sk);
 }
 
+static int raw_destroy(struct sock *sk)
+{
+	lock_sock(sk);
+	ip_flush_pending_frames(sk);
+	release_sock(sk);
+	return 0;
+}
+
 /* This gets rid of all the nasties in af_inet. -DaveM */
 static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -778,6 +787,7 @@ struct proto raw_prot = {
 	.name		   = "RAW",
 	.owner		   = THIS_MODULE,
 	.close		   = raw_close,
+	.destroy	   = raw_destroy,
 	.connect	   = ip4_datagram_connect,
 	.disconnect	   = udp_disconnect,
 	.ioctl		   = raw_ioctl,
@@ -813,8 +823,12 @@ static struct sock *raw_get_first(struct
 		struct hlist_node *node;
 
 		sk_for_each(sk, node, &raw_v4_htable[state->bucket])
-			if (sk->sk_family == PF_INET)
+			if (sk->sk_family == PF_INET) {
+				if (!ve_accessible(sk->owner_env,
+							get_exec_env()))
+					continue;
 				goto found;
+			}
 	}
 	sk = NULL;
 found:
@@ -828,8 +842,13 @@ static struct sock *raw_get_next(struct 
 	do {
 		sk = sk_next(sk);
 try_again:
-		;
-	} while (sk && sk->sk_family != PF_INET);
+		if (!sk)
+			break;
+		if (sk->sk_family != PF_INET)
+			continue;
+		if (ve_accessible(sk->owner_env, get_exec_env()))
+			break;
+	} while (1);
 
 	if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
 		sk = sk_head(&raw_v4_htable[state->bucket]);
@@ -946,13 +965,13 @@ static struct file_operations raw_seq_fo
 
 int __init raw_proc_init(void)
 {
-	if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops))
+	if (!proc_glob_fops_create("net/raw", S_IRUGO, &raw_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void __init raw_proc_exit(void)
 {
-	proc_net_remove("raw");
+	remove_proc_glob_entry("net/raw", NULL);
 }
 #endif /* CONFIG_PROC_FS */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/route.c kernel-2.6.18-417.el5-028stab121/net/ipv4/route.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/route.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/route.c	2017-01-13 08:40:21.000000000 -0500
@@ -117,6 +117,8 @@
 
 #define RT_GC_TIMEOUT (300*HZ)
 
+int ip_rt_src_check		= 1;
+
 static int ip_rt_min_delay		= 2 * HZ;
 static int ip_rt_max_delay		= 10 * HZ;
 static int ip_rt_max_size;
@@ -270,11 +272,28 @@ static unsigned int rt_hash_code(u32 dad
 	rt_hash_code((__force u32)(__be32)(daddr),\
 		     (__force u32)(__be32)(saddr) ^ ((idx) << 5))
 
+void prepare_rt_cache(void)
+{
+#ifdef CONFIG_VE
+	struct rtable *r;
+	int i;
+
+	for (i = rt_hash_mask; i >= 0; i--) {
+		spin_lock_bh(rt_hash_lock_addr(i));
+		for (r = rt_hash_table[i].chain; r; r = r->u.rt_next) {
+			r->fl.owner_env = get_ve0();
+		}
+		spin_unlock_bh(rt_hash_lock_addr(i));
+        }
+#endif
+}
+
 #ifdef CONFIG_PROC_FS
 struct rt_cache_iter_state {
 	int bucket;
 };
 
+static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r);
 static struct rtable *rt_cache_get_first(struct seq_file *seq)
 {
 	struct rtable *r = NULL;
@@ -287,6 +306,8 @@ static struct rtable *rt_cache_get_first
 			break;
 		rcu_read_unlock_bh();
 	}
+	if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
+		r = rt_cache_get_next(seq, r);
 	return r;
 }
 
@@ -294,6 +315,7 @@ static struct rtable *rt_cache_get_next(
 {
 	struct rt_cache_iter_state *st = rcu_dereference(seq->private);
 
+loop:
 	r = r->u.rt_next;
 	while (!r) {
 		rcu_read_unlock_bh();
@@ -302,6 +324,8 @@ static struct rtable *rt_cache_get_next(
 		rcu_read_lock_bh();
 		r = rt_hash_table[st->bucket].chain;
 	}
+	if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
+		goto loop;
 	return r;
 }
 
@@ -587,7 +611,8 @@ static inline int compare_keys(struct fl
 {
 	return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
 	       fl1->oif     == fl2->oif &&
-	       fl1->iif     == fl2->iif;
+	       fl1->iif     == fl2->iif &&
+	       ve_accessible_strict(fl1->owner_env, fl2->owner_env);
 }
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
@@ -742,28 +767,107 @@ nofree:
 	mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
 }
 
+typedef unsigned long rt_flush_gen_t;
+
+#ifdef CONFIG_VE
+
+static rt_flush_gen_t rt_flush_gen;
+
+/* called under rt_flush_lock */
+static void set_rt_flush_required(struct ve_struct *env)
+{
+	/*
+	 * If the global generation rt_flush_gen is equal to G, then
+	 * the pass considering entries labelled by G is yet to come.
+	 */
+	env->rt_flush_required = rt_flush_gen;
+}
+
+static spinlock_t rt_flush_lock;
+static rt_flush_gen_t reset_rt_flush_required(void)
+{
+	rt_flush_gen_t g;
+
+	spin_lock_bh(&rt_flush_lock);
+	g = rt_flush_gen++;
+	spin_unlock_bh(&rt_flush_lock);
+	return g;
+}
+
+static int check_rt_flush_required(struct ve_struct *env, rt_flush_gen_t gen)
+{
+	/* can be checked without the lock */
+	return env->rt_flush_required >= gen;
+}
+
+#else
+
+static void set_rt_flush_required(struct ve_struct *env)
+{
+}
+
+static rt_flush_gen_t reset_rt_flush_required(void)
+{
+	return 0;
+}
+
+#endif
+
 /* This can run from both BH and non-BH contexts, the latter
  * in the case of a forced flush event.
  */
 static void rt_run_flush(unsigned long dummy)
 {
 	int i;
-	struct rtable *rth, *next;
+	struct rtable * rth, * next;
+	struct rtable * tail;
+	rt_flush_gen_t gen;
 
 	rt_deadline = 0;
 
 	get_random_bytes(&rt_hash_rnd, 4);
 
+	gen = reset_rt_flush_required();
+
 	for (i = rt_hash_mask; i >= 0; i--) {
+#ifdef CONFIG_VE
+		struct rtable ** prev, * p;
+
+		spin_lock_bh(rt_hash_lock_addr(i));
+		rth = rt_hash_table[i].chain;
+
+		/* defer releasing the head of the list after spin_unlock */
+		for (tail = rth; tail; tail = tail->u.rt_next)
+			if (!check_rt_flush_required(tail->fl.owner_env, gen))
+				break;
+		if (rth != tail)
+			rt_hash_table[i].chain = tail;
+
+		/* call rt_free on entries after the tail requiring flush */
+		prev = &rt_hash_table[i].chain;
+		for (p = *prev; p; p = next) {
+			next = p->u.rt_next;
+			if (!check_rt_flush_required(p->fl.owner_env, gen)) {
+				prev = &p->u.rt_next;
+			} else {
+				*prev = next;
+				rt_free(p);
+			}
+		}
+
+#else
 		if (!in_softirq() && need_resched())
 			cond_resched();
 		spin_lock_bh(rt_hash_lock_addr(i));
 		rth = rt_hash_table[i].chain;
 		if (rth)
 			rt_hash_table[i].chain = NULL;
+		tail = NULL;
+
+#endif
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
-		for (; rth; rth = next) {
+		for (; rth != tail; rth = next) {
 			next = rth->u.rt_next;
 			rt_free(rth);
 		}
@@ -802,6 +906,8 @@ void rt_cache_flush(int delay)
 			delay = tmo;
 	}
 
+	set_rt_flush_required(get_exec_env());
+
 	if (delay <= 0) {
 		spin_unlock_bh(&rt_flush_lock);
 		rt_run_flush(0);
@@ -814,12 +920,34 @@ void rt_cache_flush(int delay)
 	mod_timer(&rt_flush_timer, now+delay);
 	spin_unlock_bh(&rt_flush_lock);
 }
+EXPORT_SYMBOL(rt_cache_flush);
 
 static void rt_secret_rebuild(unsigned long dummy)
 {
+	int i;
+	struct rtable *rth, *next;
 	unsigned long now = jiffies;
 
-	rt_cache_flush(0);
+	spin_lock_bh(&rt_flush_lock);
+	del_timer(&rt_flush_timer);
+	spin_unlock_bh(&rt_flush_lock);
+
+	rt_deadline = 0;
+	get_random_bytes(&rt_hash_rnd, 4);
+
+	for (i = rt_hash_mask; i >= 0; i--) {
+		spin_lock_bh(rt_hash_lock_addr(i));
+		rth = rt_hash_table[i].chain;
+		if (rth)
+			rt_hash_table[i].chain = NULL;
+		spin_unlock_bh(rt_hash_lock_addr(i));
+
+		for (; rth; rth = next) {
+			next = rth->u.rt_next;
+			rt_free(rth);
+		}
+	}
+
 	mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
 }
 
@@ -1263,6 +1391,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 	u32  skeys[2] = { saddr, 0 };
 	int  ikeys[2] = { dev->ifindex, 0 };
 	struct netevent_redirect netevent;
+	struct ve_struct *ve;
+  
+	ve = get_exec_env();
 
 	if (!in_dev)
 		return;
@@ -1297,6 +1428,10 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 				if (rth->fl.fl4_dst != daddr ||
 				    rth->fl.fl4_src != skeys[i] ||
 				    rth->fl.oif != ikeys[k] ||
+#ifdef CONFIG_VE
+				    !ve_accessible_strict(rth->fl.owner_env,
+					    		  ve) ||
+#endif
 				    rth->fl.iif != 0) {
 					rthp = &rth->u.rt_next;
 					continue;
@@ -1335,6 +1470,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 				rt->u.dst.neighbour	= NULL;
 				rt->u.dst.hh		= NULL;
 				rt->u.dst.xfrm		= NULL;
+#ifdef CONFIG_VE
+				rt->fl.owner_env = ve;
+#endif
 
 				rt->rt_flags		|= RTCF_REDIRECTED;
 
@@ -1784,6 +1922,9 @@ static int ip_route_input_mc(struct sk_b
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -1929,6 +2070,9 @@ static inline int __mkroute_input(struct
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 	rth->rt_gateway	= daddr;
@@ -2174,6 +2318,9 @@ local_input:
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -2258,6 +2405,9 @@ int ip_route_input(struct sk_buff *skb, 
 #ifdef CONFIG_IP_ROUTE_FWMARK
 		    rth->fl.fl4_fwmark == skb->nfmark &&
 #endif
+#ifdef CONFIG_VE
+		    rth->fl.owner_env == get_exec_env() &&
+#endif
 		    rth->fl.fl4_tos == tos) {
 			rth->u.dst.lastuse = jiffies;
 			dst_hold(&rth->u.dst);
@@ -2385,6 +2535,9 @@ static inline int __mkroute_output(struc
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->rt_dst	= fl->fl4_dst;
 	rth->rt_src	= fl->fl4_src;
 	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
@@ -2553,10 +2706,13 @@ static int ip_route_output_slow(struct r
 		    ZERONET(oldflp->fl4_src))
 			goto out;
 
-		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-		dev_out = ip_dev_find(oldflp->fl4_src);
-		if (dev_out == NULL)
-			goto out;
+		if (ip_rt_src_check) {
+			/* It is equivalent to
+			   inet_addr_type(saddr) == RTN_LOCAL */
+			dev_out = ip_dev_find(oldflp->fl4_src);
+			if (dev_out == NULL)
+				goto out;
+		}
 
 		/* I removed check for oif == dev_out->oif here.
 		   It was wrong for two reasons:
@@ -2583,6 +2739,12 @@ static int ip_route_output_slow(struct r
 			   Luckily, this hack is good workaround.
 			 */
 
+			if (dev_out == NULL) {
+				dev_out = ip_dev_find(oldflp->fl4_src);
+				if (dev_out == NULL)
+					goto out;
+			}
+
 			fl.oif = dev_out->ifindex;
 			goto make_route;
 		}
@@ -2732,6 +2894,7 @@ int __ip_route_output_key(struct rtable 
 #ifdef CONFIG_IP_ROUTE_FWMARK
 		    rth->fl.fl4_fwmark == flp->fl4_fwmark &&
 #endif
+		    ve_accessible_strict(rth->fl.owner_env, get_exec_env()) &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
 
@@ -2931,7 +3094,7 @@ static int rt_fill_info(struct sk_buff *
 		u32 dst = rt->rt_dst;
 
 		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
-		    ipv4_devconf.mc_forwarding) {
+		    ve_ipv4_devconf.mc_forwarding) {
 			int err = ipmr_get_route(skb, r, nowait);
 			if (err <= 0) {
 				if (!nowait) {
@@ -3082,22 +3245,22 @@ void ip_rt_multicast_event(struct in_dev
 }
 
 #ifdef CONFIG_SYSCTL
-static int flush_delay;
+int ipv4_flush_delay;
 
-static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
 					struct file *filp, void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
 	if (write) {
 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-		rt_cache_flush(flush_delay);
+		rt_cache_flush(ipv4_flush_delay);
 		return 0;
 	} 
 
 	return -EINVAL;
 }
 
-static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
 						int __user *name,
 						int nlen,
 						void __user *oldval,
@@ -3180,7 +3343,7 @@ ctl_table ipv4_route_table[] = {
         {
 		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
 		.procname	= "flush",
-		.data		= &flush_delay,
+		.data		= &ipv4_flush_delay,
 		.maxlen		= sizeof(int),
 		.mode		= 0200,
 		.proc_handler	= &ipv4_sysctl_rtcache_flush,
@@ -3484,15 +3647,18 @@ int __init ip_rt_init(void)
 #ifdef CONFIG_PROC_FS
 	{
 	struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
-	if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
-	    !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, 
-			    		     proc_net_stat))) {
+
+	if (!proc_glob_fops_create("net/rt_cache",
+				S_IRUGO, &rt_cache_seq_fops))
+		return -ENOMEM;
+
+	if (!(rtstat_pde = create_proc_glob_entry("net/stat/rt_cache",
+				S_IRUGO, NULL)))
 		return -ENOMEM;
-	}
 	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
 	}
 #ifdef CONFIG_NET_CLS_ROUTE
-	create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
+	create_proc_read_entry("net/rt_acct", 0, NULL, ip_rt_acct_read, NULL);
 #endif
 #endif
 #ifdef CONFIG_XFRM
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/sysctl_net_ipv4.c kernel-2.6.18-417.el5-028stab121/net/ipv4/sysctl_net_ipv4.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/sysctl_net_ipv4.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/sysctl_net_ipv4.c	2017-01-13 08:40:41.000000000 -0500
@@ -23,6 +23,10 @@
 
 /* From af_inet.c */
 extern int sysctl_ip_nonlocal_bind;
+extern unsigned int sysctl_ve_ifa_nr;
+
+int sysctl_tcp_use_sg = 1;
+EXPORT_SYMBOL(sysctl_tcp_use_sg);
 
 #ifdef CONFIG_SYSCTL
 static int zero;
@@ -35,16 +39,15 @@ struct ipv4_config ipv4_config;
 
 #ifdef CONFIG_SYSCTL
 
-static
 int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
 			void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	int val = ipv4_devconf.forwarding;
+	int val = ve_ipv4_devconf.forwarding;
 	int ret;
 
 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
-	if (write && ipv4_devconf.forwarding != val) {
+	if (write && ve_ipv4_devconf.forwarding != val) {
 		rtnl_lock();
 		inet_forward_change();
 		rtnl_unlock();
@@ -54,7 +57,7 @@ int ipv4_sysctl_forward(ctl_table *ctl, 
 	return ret;
 }
 
-static int ipv4_sysctl_forward_strategy(ctl_table *table,
+int ipv4_sysctl_forward_strategy(ctl_table *table,
 			 int __user *name, int nlen,
 			 void __user *oldval, size_t __user *oldlenp,
 			 void __user *newval, size_t newlen, 
@@ -398,7 +401,8 @@ ctl_table ipv4_table[] = {
 		.data		= &sysctl_tcp_syncookies,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec_ve_immutable,
+		.virt_handler	= 1,
 	},
 #endif
 	{
@@ -506,6 +510,14 @@ ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+	{
+		.ctl_name	= NET_TCP_USE_SG,
+		.procname	= "tcp_use_sg",
+		.data		= &sysctl_tcp_use_sg,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 
 #endif
 	{
@@ -699,6 +711,22 @@ ctl_table ipv4_table[] = {
 		.extra1		= &zero
 	},
 	{
+		.ctl_name       = NET_TCP_MAX_TW_KMEM_FRACTION,
+		.procname       = "tcp_max_tw_kmem_fraction",
+		.data           = &sysctl_tcp_max_tw_kmem_fraction,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = &proc_dointvec
+	},
+	{
+		.ctl_name       = NET_TCP_MAX_TW_BUCKETS_UB,
+		.procname       = "tcp_max_tw_buckets_ub",
+		.data           = &sysctl_tcp_max_tw_buckets_ub,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = &proc_dointvec
+	},
+	{
 		.ctl_name	= NET_TCP_NO_METRICS_SAVE,
 		.procname	= "tcp_no_metrics_save",
 		.data		= &sysctl_tcp_nometrics_save,
@@ -844,6 +872,14 @@ ctl_table ipv4_table[] = {
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "ve-ifa-nr",
+		.data		= &sysctl_ve_ifa_nr,
+		.maxlen		= sizeof(sysctl_ve_ifa_nr),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/tcp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/tcp.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp.c	2017-01-13 08:40:23.000000000 -0500
@@ -247,6 +247,7 @@
  *	TCP_CLOSE		socket is finished
  */
 
+#include <linux/kmem_cache.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
@@ -265,6 +266,10 @@
 #include <net/ip.h>
 #include <net/netdma.h>
 
+#include <ub/ub_orphan.h>
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
+
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
@@ -322,6 +327,7 @@ unsigned int tcp_poll(struct file *file,
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 	struct tcp_sock *tp = tcp_sk(sk);
+	int check_send_space;
 
 	sock_poll_wait(file, sk->sk_sleep, wait);
 	if (sk->sk_state == TCP_LISTEN)
@@ -334,6 +340,21 @@ unsigned int tcp_poll(struct file *file,
 
 	mask = 0;
 
+	check_send_space = 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (!(sk->sk_shutdown & SEND_SHUTDOWN) && sock_has_ubc(sk)) {
+		unsigned long size;
+		size = MAX_TCP_HEADER + tp->mss_cache;
+		if (size > SOCK_MIN_UBCSPACE)
+			size = SOCK_MIN_UBCSPACE;
+		size = skb_charge_size(size);   
+		if (ub_sock_makewres_tcp(sk, size)) {
+			check_send_space = 0;
+			ub_sock_sndqueueadd_tcp(sk, size);
+		}
+	}
+#endif
+
 	/*
 	 * POLLHUP is certainly not done right. But poll() doesn't
 	 * have a notion of HUP in just one direction, and for a
@@ -377,7 +398,7 @@ unsigned int tcp_poll(struct file *file,
 		     sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
 			mask |= POLLIN | POLLRDNORM;
 
-		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+		if (check_send_space && !(sk->sk_shutdown & SEND_SHUTDOWN)) {
 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
 				mask |= POLLOUT | POLLWRNORM;
 			} else {  /* send SIGIO later */
@@ -533,16 +554,24 @@ static ssize_t do_tcp_sendpages(struct s
 		int copy, i, can_coalesce;
 		int offset = poffset % PAGE_SIZE;
 		int size = min_t(size_t, psize, PAGE_SIZE - offset);
+		unsigned long chargesize = 0;
 
 		if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
 new_segment:
+			chargesize = 0;
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
+			chargesize = skb_charge_size(MAX_TCP_HEADER +
+					tp->mss_cache);
+			if (ub_sock_getwres_tcp(sk, chargesize) < 0)
+				goto wait_for_ubspace;
 			skb = sk_stream_alloc_pskb(sk, 0, 0,
 						   sk->sk_allocation);
 			if (!skb)
 				goto wait_for_memory;
+			ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+			chargesize = 0;
 
 			skb_entail(sk, tp, skb);
 			copy = size_goal;
@@ -598,10 +627,15 @@ new_segment:
 wait_for_sndbuf:
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
+		ub_sock_retwres_tcp(sk, chargesize,
+			skb_charge_size(MAX_TCP_HEADER + tp->mss_cache));
+		chargesize = 0;
+wait_for_ubspace:
 		if (copied)
 			tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
-		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+		err = __sk_stream_wait_memory(sk, &timeo, chargesize);
+		if (err != 0)
 			goto do_error;
 
 		mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
@@ -638,9 +672,6 @@ ssize_t tcp_sendpage(struct socket *sock
 	return res;
 }
 
-#define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
-#define TCP_OFF(sk)	(sk->sk_sndmsg_off)
-
 static inline int select_size(struct sock *sk, struct tcp_sock *tp)
 {
 	int tmp = tp->mss_cache;
@@ -700,6 +731,7 @@ int tcp_sendmsg(struct kiocb *iocb, stru
 	while (--iovlen >= 0) {
 		int seglen = iov->iov_len;
 		unsigned char __user *from = iov->iov_base;
+		unsigned long chargesize = 0;
 
 		iov++;
 
@@ -710,18 +742,27 @@ int tcp_sendmsg(struct kiocb *iocb, stru
 
 			if (!sk->sk_send_head ||
 			    (copy = size_goal - skb->len) <= 0) {
+				unsigned long size;
 
 new_segment:
 				/* Allocate new segment. If the interface is SG,
 				 * allocate skb fitting to single page.
 				 */
+				chargesize = 0;
 				if (!sk_stream_memory_free(sk))
 					goto wait_for_sndbuf;
-
-				skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
-							   0, sk->sk_allocation);
+				size = select_size(sk, tp);
+				chargesize = skb_charge_size(MAX_TCP_HEADER +
+						size);
+				if (ub_sock_getwres_tcp(sk, chargesize) < 0)
+					goto wait_for_ubspace;
+				skb = sk_stream_alloc_pskb(sk, size, 0,
+						sk->sk_allocation);
 				if (!skb)
 					goto wait_for_memory;
+				ub_skb_set_charge(skb, sk, chargesize,
+						UB_TCPSNDBUF);
+				chargesize = 0;
 
 				/*
 				 * Check whether we can use HW checksum.
@@ -767,6 +808,7 @@ new_segment:
 				} else if (page) {
 					if (off == PAGE_SIZE) {
 						put_page(page);
+						ub_sock_tcp_detachpage(sk);
 						TCP_PAGE(sk) = page = NULL;
 						off = 0;
 					}
@@ -780,6 +822,9 @@ new_segment:
 					goto wait_for_memory;
 
 				if (!page) {
+					chargesize = PAGE_SIZE;
+					if (ub_sock_tcp_chargepage(sk) < 0)
+						goto wait_for_ubspace;
 					/* Allocate new cache page. */
 					if (!(page = sk_stream_alloc_page(sk)))
 						goto wait_for_memory;
@@ -811,7 +856,8 @@ new_segment:
 					} else if (off + copy < PAGE_SIZE) {
 						get_page(page);
 						TCP_PAGE(sk) = page;
-					}
+					} else
+						ub_sock_tcp_detachpage(sk);
 				}
 
 				TCP_OFF(sk) = off + copy;
@@ -842,10 +888,15 @@ new_segment:
 wait_for_sndbuf:
 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
+			ub_sock_retwres_tcp(sk, chargesize,
+				skb_charge_size(MAX_TCP_HEADER+tp->mss_cache));
+			chargesize = 0;
+wait_for_ubspace:
 			if (copied)
 				tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
-			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+			err = __sk_stream_wait_memory(sk, &timeo, chargesize);
+			if (err != 0)
 				goto do_error;
 
 			mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
@@ -943,7 +994,18 @@ void tcp_cleanup_rbuf(struct sock *sk, i
 #if TCP_DEBUG
 	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 
-	BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
+	if (!(skb==NULL || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq))) {
+		printk("KERNEL: assertion: skb==NULL || "
+				"before(tp->copied_seq, skb->end_seq)\n");
+		printk("VE%u pid %d comm %.16s\n", 
+				(get_exec_env() ? VEID(get_exec_env()) : 0),
+				current->pid, current->comm);
+		printk("copied=%d, copied_seq=%d, rcv_nxt=%d\n", copied,
+				tp->copied_seq, tp->rcv_nxt);
+		printk("skb->len=%d, skb->seq=%d, skb->end_seq=%d\n",
+				skb->len, TCP_SKB_CB(skb)->seq, 
+				TCP_SKB_CB(skb)->end_seq);
+	}
 #endif
 
 	if (inet_csk_ack_scheduled(sk)) {
@@ -1193,7 +1255,23 @@ int tcp_recvmsg(struct kiocb *iocb, stru
 				goto found_ok_skb;
 			if (skb->h.th->fin)
 				goto found_fin_ok;
-			BUG_TRAP(flags & MSG_PEEK);
+			if (!(flags & MSG_PEEK)) {
+				printk("KERNEL: assertion: flags&MSG_PEEK\n");
+				printk("VE%u pid %d comm %.16s\n", 
+						(get_exec_env() ? 
+						 VEID(get_exec_env()) : 0),
+						current->pid, current->comm);
+				printk("flags=0x%x, len=%d, copied_seq=%d, "
+						"rcv_nxt=%d\n", flags,
+						(int)len, tp->copied_seq,
+						tp->rcv_nxt);
+				printk("skb->len=%d, *seq=%d, skb->seq=%d, "
+						"skb->end_seq=%d, offset=%d\n",
+						skb->len, *seq, 
+						TCP_SKB_CB(skb)->seq,
+						TCP_SKB_CB(skb)->end_seq, 
+						offset);
+			}
 			skb = skb->next;
 		} while (skb != (struct sk_buff *)&sk->sk_receive_queue);
 
@@ -1256,8 +1334,19 @@ int tcp_recvmsg(struct kiocb *iocb, stru
 
 			tp->ucopy.len = len;
 
-			BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
-				 (flags & (MSG_PEEK | MSG_TRUNC)));
+			if (!(tp->copied_seq == tp->rcv_nxt || 
+						(flags&(MSG_PEEK|MSG_TRUNC)))) {
+				printk("KERNEL: assertion: tp->copied_seq == "
+						"tp->rcv_nxt || ...\n");
+				printk("VE%u pid %d comm %.16s\n", 
+						(get_exec_env() ?
+						 VEID(get_exec_env()) : 0),
+						current->pid, current->comm);
+				printk("flags=0x%x, len=%d, copied_seq=%d, "
+						"rcv_nxt=%d\n", flags,
+						(int)len, tp->copied_seq,
+						tp->rcv_nxt);
+			}
 
 			/* Ugly... If prequeue is not empty, we have to
 			 * process it before releasing socket, otherwise
@@ -1638,7 +1727,7 @@ adjudge_to_death:
 	state = sk->sk_state;
 	sock_hold(sk);
 	sock_orphan(sk);
-	atomic_inc(sk->sk_prot->orphan_count);
+	ub_inc_orphan_count(sk);
 
 	/* It is the last release_sock in its life. It will remove backlog. */
 	release_sock(sk);
@@ -1688,13 +1777,19 @@ adjudge_to_death:
 		}
 	}
 	if (sk->sk_state != TCP_CLOSE) {
+		int orphans = ub_get_orphan_count(sk);
+
 		sk_mem_reclaim(sk);
-		if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans ||
-		    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-		     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
-			if (net_ratelimit())
+		if (ub_too_many_orphans(sk, orphans)) {
+			if (net_ratelimit()) {
+				int ubid = 0;
+#ifdef CONFIG_USER_RESOURCE
+				ubid = sock_has_ubc(sk) ?
+				   top_beancounter(sock_bc(sk)->ub)->ub_uid : 0;
+#endif
 				printk(KERN_INFO "TCP: too many of orphaned "
-				       "sockets\n");
+				       "sockets (%d in CT%d)\n", orphans, ubid);
+			}
 			tcp_set_state(sk, TCP_CLOSE);
 			tcp_send_active_reset(sk, GFP_ATOMIC);
 			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
@@ -1770,6 +1865,7 @@ int tcp_disconnect(struct sock *sk, int 
 	tp->snd_ssthresh = 0x7fffffff;
 	tp->snd_cwnd_cnt = 0;
 	tp->bytes_acked = 0;
+	tp->advmss = 65535;
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
@@ -2370,6 +2466,7 @@ int tcp_gro_complete(struct sk_buff *skb
 EXPORT_SYMBOL(tcp_gro_complete);
 
 extern void __skb_cb_too_small_for_tcp(int, int);
+extern unsigned int nr_free_lowpages(void);
 extern struct tcp_congestion_ops tcp_reno;
 
 static __initdata unsigned long thash_entries;
@@ -2387,6 +2484,7 @@ void __init tcp_init(void)
 	struct sk_buff *skb = NULL;
 	unsigned long limit;
 	int order, i, max_share;
+	unsigned long goal;
 
 	if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb))
 		__skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
@@ -2395,7 +2493,7 @@ void __init tcp_init(void)
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
-				  SLAB_HWCACHE_ALIGN, NULL, NULL);
+				  SLAB_HWCACHE_ALIGN | SLAB_UBC, NULL, NULL);
 	if (!tcp_hashinfo.bind_bucket_cachep)
 		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
 
@@ -2453,10 +2551,19 @@ void __init tcp_init(void)
 		sysctl_max_syn_backlog = 128;
 	}
 
+	goal = nr_free_lowpages() / 6;
+	while (order >= 3 && (1536<<order) > goal)
+		order--;
+
 	sysctl_tcp_mem[0] =  768 << order;
 	sysctl_tcp_mem[1] = 1024 << order;
 	sysctl_tcp_mem[2] = 1536 << order;
 
+	if (sysctl_tcp_mem[2] - sysctl_tcp_mem[1] > 4096)
+		sysctl_tcp_mem[1] = sysctl_tcp_mem[2] - 4096;
+	if (sysctl_tcp_mem[1] - sysctl_tcp_mem[0] > 4096)
+		sysctl_tcp_mem[0] = sysctl_tcp_mem[1] - 4096;
+
 	limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
 	max_share = min(4UL*1024*1024, limit);
 
@@ -2487,4 +2594,4 @@ EXPORT_SYMBOL(tcp_sendpage);
 EXPORT_SYMBOL(tcp_setsockopt);
 EXPORT_SYMBOL(tcp_shutdown);
 EXPORT_SYMBOL(tcp_statistics);
-EXPORT_SYMBOL_GPL(tcp_cleanup_rbuf);
+EXPORT_SYMBOL(tcp_cleanup_rbuf);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/tcp_input.c kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp_input.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/tcp_input.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp_input.c	2017-01-13 08:40:17.000000000 -0500
@@ -72,6 +72,8 @@
 #include <asm/unaligned.h>
 #include <net/netdma.h>
 
+#include <ub/ub_tcp.h>
+
 int sysctl_tcp_timestamps = 1;
 int sysctl_tcp_window_scaling = 1;
 int sysctl_tcp_sack = 1;
@@ -254,7 +256,7 @@ static void tcp_grow_window(struct sock 
 	/* Check #1 */
 	if (tp->rcv_ssthresh < tp->window_clamp &&
 	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
-	    !tcp_memory_pressure) {
+	    ub_tcp_rmem_allows_expand(sk)) {
 		int incr;
 
 		/* Check #2. Increase window, if skb with such overhead
@@ -323,6 +325,8 @@ static void tcp_init_buffer_space(struct
 
 	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
+
+	ub_tcp_update_maxadvmss(sk);
 }
 
 /* 5. Recalculate window clamp after socket hit its memory bounds. */
@@ -334,7 +338,7 @@ static void tcp_clamp_window(struct sock
 
 	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-	    !tcp_memory_pressure &&
+	    !ub_tcp_memory_pressure(sk) &&
 	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
 		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
 				    sysctl_tcp_rmem[2]);
@@ -3169,8 +3173,28 @@ static void tcp_ofo_queue(struct sock *s
 	}
 }
 
+static int tcp_prune_ofo_queue(struct sock *sk);
 static int tcp_prune_queue(struct sock *sk);
 
+static inline int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb)
+{
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+	    !sk_stream_rmem_schedule(sk, skb)) {
+
+		if (tcp_prune_queue(sk) < 0)
+			return -1;
+
+		if (!sk_stream_rmem_schedule(sk, skb)) {
+			if (!tcp_prune_ofo_queue(sk))
+				return -1;
+
+			if (!sk_stream_rmem_schedule(sk, skb))
+				return -1;
+		}
+	}
+	return 0;
+}
+
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcphdr *th = skb->h.th;
@@ -3220,12 +3244,9 @@ static void tcp_data_queue(struct sock *
 		if (eaten <= 0) {
 queue_and_out:
 			if (eaten < 0 &&
-			    (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-			     !sk_rmem_schedule(sk, skb->truesize))) {
-				if (tcp_prune_queue(sk) < 0 ||
-				    !sk_rmem_schedule(sk, skb->truesize))
-					goto drop;
-			}
+			    tcp_try_rmem_schedule(sk, skb))
+				goto drop_part;
+
 			skb_set_owner_r(skb, sk);
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
 		}
@@ -3268,6 +3289,12 @@ out_of_window:
 drop:
 		__kfree_skb(skb);
 		return;
+
+drop_part:
+		if (after(tp->copied_seq, tp->rcv_nxt))
+			tp->rcv_nxt = tp->copied_seq;
+		__kfree_skb(skb);
+		return;
 	}
 
 	/* Out of window. F.e. zero window probe. */
@@ -3294,12 +3321,8 @@ drop:
 
 	TCP_ECN_check_ce(tp, skb);
 
-	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-	    !sk_rmem_schedule(sk, skb->truesize)) {
-		if (tcp_prune_queue(sk) < 0 ||
-		    !sk_rmem_schedule(sk, skb->truesize))
-			goto drop;
-	}
+	if (tcp_try_rmem_schedule(sk, skb))
+		goto drop;
 
 	/* Disable header prediction. */
 	tp->pred_flags = 0;
@@ -3439,6 +3462,10 @@ tcp_collapse(struct sock *sk, struct sk_
 		nskb = alloc_skb(copy+header, GFP_ATOMIC);
 		if (!nskb)
 			return;
+		if (ub_tcprcvbuf_charge_forced(skb->sk, nskb) < 0) {
+			kfree_skb(nskb);
+			return;
+		}
 		skb_reserve(nskb, header);
 		memcpy(nskb->head, skb->head, header);
 		nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
@@ -3518,6 +3545,32 @@ static void tcp_collapse_ofo_queue(struc
 	}
 }
 
+/*
+ * Purge the out-of-order queue.
+ * Return true if queue was pruned.
+ */
+static int tcp_prune_ofo_queue(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int res = 0;
+
+	if (!skb_queue_empty(&tp->out_of_order_queue)) {
+		NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
+		__skb_queue_purge(&tp->out_of_order_queue);
+
+		/* Reset SACK state.  A conforming SACK implementation will
+		 * do the same at a timeout based retransmit.  When a connection
+		 * is in a sad state like this, we care only about integrity
+		 * of the connection not performance.
+		 */
+		if (tp->rx_opt.sack_ok)
+			tcp_sack_reset(&tp->rx_opt);
+		sk_stream_mem_reclaim(sk);
+		res = 1;
+	}
+	return res;
+}
+
 /* Reduce allocated memory if we can, trying to get
  * the socket within its memory limits again.
  *
@@ -3535,7 +3588,7 @@ static int tcp_prune_queue(struct sock *
 
 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
 		tcp_clamp_window(sk, tp);
-	else if (tcp_memory_pressure)
+	else if (ub_tcp_memory_pressure(sk))
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
 	tcp_collapse_ofo_queue(sk);
@@ -3551,20 +3604,7 @@ static int tcp_prune_queue(struct sock *
 	/* Collapsing did not help, destructive actions follow.
 	 * This must not ever occur. */
 
-	/* First, purge the out_of_order queue. */
-	if (!skb_queue_empty(&tp->out_of_order_queue)) {
-		NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
-		__skb_queue_purge(&tp->out_of_order_queue);
-
-		/* Reset SACK state.  A conforming SACK implementation will
-		 * do the same at a timeout based retransmit.  When a connection
-		 * is in a sad state like this, we care only about integrity
-		 * of the connection not performance.
-		 */
-		if (tp->rx_opt.sack_ok)
-			tcp_sack_reset(&tp->rx_opt);
-		sk_mem_reclaim(sk);
-	}
+	tcp_prune_ofo_queue(sk);
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
 		return 0;
@@ -3613,7 +3653,7 @@ static int tcp_should_expand_sndbuf(stru
 		return 0;
 
 	/* If we are under global TCP memory pressure, do not expand.  */
-	if (tcp_memory_pressure)
+	if (ub_tcp_memory_pressure(sk))
 		return 0;
 
 	/* If we are under soft global TCP memory pressure, do not expand.  */
@@ -4058,6 +4098,10 @@ int tcp_rcv_established(struct sock *sk,
 
 				if ((int)skb->truesize > sk->sk_forward_alloc)
 					goto step5;
+				/* This is OK not to try to free memory here.
+				 * Do this below on slow path. Den */
+				if (ub_tcprcvbuf_charge(sk, skb) < 0)
+					goto step5;
 
 				NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/tcp_ipv4.c kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp_ipv4.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/tcp_ipv4.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp_ipv4.c	2017-01-13 08:40:41.000000000 -0500
@@ -73,6 +73,8 @@
 #include <net/netdma.h>
 #include <net/secure_seq.h>
 
+#include <ub/ub_tcp.h>
+
 #include <linux/inet.h>
 #include <linux/ipv6.h>
 #include <linux/stddef.h>
@@ -627,7 +629,8 @@ static void tcp_v4_timewait_ack(struct s
 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
-			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
+		tcptw->tw_rcv_wnd >> (tw->tw_rcv_wscale& TW_WSCALE_MASK),
+		tcptw->tw_ts_recent);
 
 	inet_twsk_put(tw);
 }
@@ -693,8 +696,9 @@ static void syn_flood_warning(struct sk_
 	if (time_after(jiffies, (warntime + HZ * 60))) {
 		warntime = jiffies;
 		printk(KERN_INFO
-		       "possible SYN flooding on port %d. Sending cookies.\n",
-		       ntohs(skb->h.th->dest));
+		       "possible SYN flooding on ctid %u, port %d. "
+		       "Sending cookies.\n",
+		       skb->owner_env->veid, ntohs(tcp_hdr(skb)->dest));
 	}
 }
 #endif
@@ -730,6 +734,7 @@ struct request_sock_ops tcp_request_sock
 	.destructor	=	tcp_v4_reqsk_destructor,
 	.send_reset	=	tcp_v4_send_reset,
 };
+EXPORT_SYMBOL_GPL(tcp_request_sock_ops);
 
 static struct timewait_sock_ops tcp_timewait_sock_ops = {
 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
@@ -1009,12 +1014,15 @@ static int tcp_v4_checksum_init(struct s
  */
 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
+	struct user_beancounter *ub;
+
+	ub = set_exec_ub(sock_bc(sk)->ub);
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		TCP_CHECK_TIMER(sk);
 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
 			goto reset;
 		TCP_CHECK_TIMER(sk);
-		return 0;
+		goto restore_context;
 	}
 
 	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
@@ -1028,7 +1036,7 @@ int tcp_v4_do_rcv(struct sock *sk, struc
 		if (nsk != sk) {
 			if (tcp_child_process(sk, nsk, skb))
 				goto reset;
-			return 0;
+			goto restore_context;
 		}
 	}
 
@@ -1036,6 +1044,9 @@ int tcp_v4_do_rcv(struct sock *sk, struc
 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
 		goto reset;
 	TCP_CHECK_TIMER(sk);
+
+restore_context:
+	(void)set_exec_ub(ub);
 	return 0;
 
 reset:
@@ -1047,7 +1058,7 @@ discard:
 	 * might be destroyed here. This current version compiles correctly,
 	 * but you have been warned.
 	 */
-	return 0;
+	goto restore_context;
 
 csum_err:
 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -1304,6 +1315,8 @@ static int tcp_v4_init_sock(struct sock 
 	tp->snd_cwnd_clamp = ~0;
 	tp->mss_cache = 536;
 
+	tp->advmss = 65535; /* max value */
+
 	tp->reordering = sysctl_tcp_reordering;
 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
 
@@ -1353,6 +1366,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
 	 * If sendmsg cached page exists, toss it.
 	 */
 	if (sk->sk_sndmsg_page) {
+		/* queue is empty, uncharge */
+		ub_sock_tcp_detachpage(sk);
 		__free_page(sk->sk_sndmsg_page);
 		sk->sk_sndmsg_page = NULL;
 	}
@@ -1367,16 +1382,34 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCP sock list dumping. */
 
-static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
+static inline struct inet_timewait_sock *tw_head(struct hlist_head *head,
+		envid_t veid)
 {
-	return hlist_empty(head) ? NULL :
-		list_entry(head->first, struct inet_timewait_sock, tw_node);
+	struct inet_timewait_sock *tw;
+	struct hlist_node *pos;
+
+	if (hlist_empty(head))
+		return NULL;
+	hlist_for_each_entry(tw, pos, head, tw_node) {
+		if (!ve_accessible_veid(tw->tw_owner_env, veid))
+			continue;
+		return tw;
+	}
+	return NULL;
 }
 
-static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
+static inline struct inet_timewait_sock *
+	tw_next(struct inet_timewait_sock *tw, envid_t veid)
 {
-	return tw->tw_node.next ?
-		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
+	while (1) {
+		if (tw->tw_node.next == NULL)
+			return NULL;
+		tw = hlist_entry(tw->tw_node.next, typeof(*tw), tw_node);
+		if (!ve_accessible_veid(tw->tw_owner_env, veid))
+			continue;
+		return tw;
+	}
+	return NULL;	/* make compiler happy */
 }
 
 static void *listening_get_next(struct seq_file *seq, void *cur)
@@ -1385,7 +1418,9 @@ static void *listening_get_next(struct s
 	struct hlist_node *node;
 	struct sock *sk = cur;
 	struct tcp_iter_state* st = seq->private;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	if (!sk) {
 		st->bucket = 0;
 		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
@@ -1425,6 +1460,8 @@ get_req:
 	}
 get_sk:
 	sk_for_each_from(sk, node) {
+		if (!ve_accessible(sk->owner_env, ve))
+			continue;
 		if (sk->sk_family == st->family) {
 			cur = sk;
 			goto out;
@@ -1465,7 +1502,9 @@ static void *established_get_first(struc
 {
 	struct tcp_iter_state* st = seq->private;
 	void *rc = NULL;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
 		struct sock *sk;
 		struct hlist_node *node;
@@ -1476,6 +1515,8 @@ static void *established_get_first(struc
 
 		read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
+			if (!ve_accessible(sk->owner_env, ve))
+				continue;
 			if (sk->sk_family != st->family) {
 				continue;
 			}
@@ -1485,6 +1526,8 @@ static void *established_get_first(struc
 		st->state = TCP_SEQ_STATE_TIME_WAIT;
 		inet_twsk_for_each(tw, node,
 				   &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
+			if (!ve_accessible_veid(tw->tw_owner_env, VEID(ve)))
+				continue;
 			if (tw->tw_family != st->family) {
 				continue;
 			}
@@ -1504,16 +1547,17 @@ static void *established_get_next(struct
 	struct inet_timewait_sock *tw;
 	struct hlist_node *node;
 	struct tcp_iter_state* st = seq->private;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	++st->num;
 
 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
 		tw = cur;
-		tw = tw_next(tw);
+		tw = tw_next(tw, VEID(ve));
 get_tw:
-		while (tw && tw->tw_family != st->family) {
-			tw = tw_next(tw);
-		}
+		while (tw && tw->tw_family != st->family)
+			tw = tw_next(tw, VEID(ve));
 		if (tw) {
 			cur = tw;
 			goto out;
@@ -1535,12 +1579,15 @@ get_tw:
 		sk = sk_next(sk);
 
 	sk_for_each_from(sk, node) {
+		if (!ve_accessible(sk->owner_env, ve))
+			continue;
 		if (sk->sk_family == st->family)
 			goto found;
 	}
 
 	st->state = TCP_SEQ_STATE_TIME_WAIT;
-	tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
+	tw = tw_head(&tcp_hashinfo.ehash[st->bucket +
+			tcp_hashinfo.ehash_size].chain, VEID(ve));
 	goto get_tw;
 found:
 	cur = sk;
@@ -1685,7 +1732,7 @@ int tcp_proc_register(struct tcp_seq_afi
 	afinfo->seq_fops->llseek	= seq_lseek;
 	afinfo->seq_fops->release	= seq_release_private;
 	
-	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+	p = proc_glob_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
 	if (p)
 		p->data = afinfo;
 	else
@@ -1697,7 +1744,8 @@ void tcp_proc_unregister(struct tcp_seq_
 {
 	if (!afinfo)
 		return;
-	proc_net_remove(afinfo->name);
+
+	remove_proc_glob_entry(afinfo->name, NULL);
 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 
 }
 
@@ -1837,7 +1885,7 @@ out:
 static struct file_operations tcp4_seq_fops;
 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "tcp",
+	.name		= "net/tcp",
 	.family		= AF_INET,
 	.seq_show	= tcp4_seq_show,
 	.seq_fops	= &tcp4_seq_fops,
@@ -1932,6 +1980,92 @@ void __init tcp_v4_init(struct net_proto
 		panic("Failed to create the TCP control socket.\n");
 }
 
+#ifdef CONFIG_VE
+static void tcp_kill_ve_onesk(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* Check the assumed state of the socket. */
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		printk(KERN_WARNING "Killing sk: dead %d, state %d, "
+			"wrseq %u unseq %u, wrqu %d.\n",
+			sock_flag(sk, SOCK_DEAD), sk->sk_state,
+			tp->write_seq, tp->snd_una,
+			!skb_queue_empty(&sk->sk_write_queue));
+		sk->sk_err = ECONNRESET;
+		sk->sk_error_report(sk);
+	}
+
+	tcp_send_active_reset(sk, GFP_ATOMIC);
+	switch (sk->sk_state) {
+		case TCP_FIN_WAIT1:
+		case TCP_CLOSING:
+			/* In these 2 states the peer may want us to retransmit
+			 * some data and/or FIN.  Entering "resetting mode"
+			 * instead.
+			 */
+			tcp_time_wait(sk, TCP_CLOSE, 0);
+			break;
+		case TCP_FIN_WAIT2:
+			/* By some reason the socket may stay in this state
+			 * without turning into a TW bucket.  Fix it.
+			 */
+			tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
+			break;
+		default:
+			/* Just jump into CLOSED state. */
+			tcp_done(sk);
+			break;
+	}
+}
+
+void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
+{
+	struct inet_ehash_bucket *head;
+	int i, retry;
+
+	/* alive */
+again:
+	retry = 0;
+	local_bh_disable();
+	head = tcp_hashinfo.ehash;
+	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
+		struct sock *sk;
+		struct hlist_node *node;
+more_work:
+		write_lock(&head[i].lock);
+		sk_for_each(sk, node, &head[i].chain) {
+			if (ve_accessible_strict(sk->owner_env, envid)) {
+				sock_hold(sk);
+				write_unlock(&head[i].lock);
+
+				bh_lock_sock(sk);
+				if (sock_owned_by_user(sk)) {
+					retry = 1;
+					bh_unlock_sock(sk);
+					sock_put(sk);
+					break;
+				}
+				/* sk might have disappeared from the hash before
+				 * we got the lock */
+				if (sk->sk_state != TCP_CLOSE)
+					tcp_kill_ve_onesk(sk);
+				bh_unlock_sock(sk);
+				sock_put(sk);
+				goto more_work;
+			}
+		}
+		write_unlock(&head[i].lock);
+	}
+	local_bh_enable();
+	if (retry) {
+		schedule_timeout_interruptible(HZ);
+		goto again;
+	}
+}
+EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
+#endif
+
 EXPORT_SYMBOL(ipv4_specific);
 EXPORT_SYMBOL(tcp_hashinfo);
 EXPORT_SYMBOL(tcp_prot);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/tcp_minisocks.c kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp_minisocks.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/tcp_minisocks.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp_minisocks.c	2017-01-13 08:40:21.000000000 -0500
@@ -28,6 +28,9 @@
 #include <net/inet_common.h>
 #include <net/xfrm.h>
 
+#include <ub/ub_net.h>
+#include <ub/ub_orphan.h>
+
 #ifdef CONFIG_SYSCTL
 #define SYNC_INIT 0 /* let the user enable it */
 #else
@@ -38,6 +41,11 @@ int sysctl_tcp_syncookies = SYNC_INIT; 
 EXPORT_SYMBOL(sysctl_tcp_syncookies);
 
 int sysctl_tcp_abort_on_overflow;
+int sysctl_tcp_max_tw_kmem_fraction = 384;
+int sysctl_tcp_max_tw_buckets_ub = 16536;
+
+EXPORT_SYMBOL(sysctl_tcp_max_tw_kmem_fraction);
+EXPORT_SYMBOL(sysctl_tcp_max_tw_buckets_ub);
 
 struct inet_timewait_death_row tcp_death_row = {
 	.sysctl_max_tw_buckets = NR_FILE * 2,
@@ -54,6 +62,7 @@ struct inet_timewait_death_row tcp_death
 	.twcal_hand	= -1,
 	.twcal_timer	= TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
 					    (unsigned long)&tcp_death_row),
+	.ub_managed	= 1,
 };
 
 EXPORT_SYMBOL_GPL(tcp_death_row);
@@ -282,7 +291,8 @@ void tcp_time_wait(struct sock *sk, int 
 	if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
 		recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
 
-	if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
+	if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets &&
+			ub_timewait_check(sk, &tcp_death_row))
 		tw = inet_twsk_alloc(sk, state);
 
 	if (tw != NULL) {
@@ -295,6 +305,8 @@ void tcp_time_wait(struct sock *sk, int 
 		tcptw->tw_rcv_wnd	= tcp_receive_window(tp);
 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+		if (sk->sk_user_data != NULL)
+			tw->tw_rcv_wscale |= TW_WSCALE_SPEC;
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		if (tw->tw_family == PF_INET6) {
@@ -308,6 +320,8 @@ void tcp_time_wait(struct sock *sk, int 
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
+		tw->tw_owner_env = VEID(sk->owner_env);
+
 		/* Linkage updates. */
 		__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
 
@@ -331,8 +345,15 @@ void tcp_time_wait(struct sock *sk, int 
 		 * socket up.  We've got bigger problems than
 		 * non-graceful socket closings.
 		 */
-		if (net_ratelimit())
-			printk(KERN_INFO "TCP: time wait bucket table overflow\n");
+		if (net_ratelimit()) {
+			int ubid = 0;
+#ifdef CONFIG_USER_RESOURCE
+			ubid = sock_has_ubc(sk) ? 
+				top_beancounter(sock_bc(sk)->ub)->ub_uid : 0;
+#endif
+			printk(KERN_INFO "TCP: time wait bucket table "
+			       "overflow (CT%d)\n", ubid);
+		}
 	}
 
 	tcp_update_metrics(sk);
@@ -356,6 +377,8 @@ struct sock *tcp_create_openreq_child(st
 		struct tcp_sock *newtp;
 
 		/* Now setup tcp_sock */
+		newsk->owner_env = sk->owner_env;
+
 		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
 		newtp->rcv_nxt = treq->rcv_isn + 1;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/tcp_output.c kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp_output.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/tcp_output.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp_output.c	2017-01-13 08:40:23.000000000 -0500
@@ -42,6 +42,9 @@
 #include <linux/module.h>
 #include <linux/smp_lock.h>
 
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
+
 /* People can turn this off for buggy TCP's found in printers etc. */
 int sysctl_tcp_retrans_collapse = 1;
 
@@ -339,6 +342,13 @@ static void tcp_syn_build_options(__u32 
 		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
 }
 
+static int skb_header_size(struct sock *sk, int tcp_hlen)
+{
+	struct ip_options *opt = inet_sk(sk)->opt;
+	return tcp_hlen + sizeof(struct iphdr) +
+		(opt ? opt->optlen : 0)	+ ETH_HLEN /* For hard header */;
+}
+
 /* This routine actually transmits TCP packets queued in by
  * tcp_do_sendmsg().  This is used by both the initial
  * transmission and possible later retransmissions.
@@ -357,6 +367,7 @@ static int tcp_transmit_skb(struct sock 
 	struct tcp_sock *tp;
 	struct tcp_skb_cb *tcb;
 	int tcp_header_size;
+	int header_size;
 	struct tcphdr *th;
 	int sysctl_flags;
 	int err;
@@ -411,7 +422,21 @@ static int tcp_transmit_skb(struct sock 
 				    (tp->rx_opt.eff_sacks *
 				     TCPOLEN_SACK_PERBLOCK));
 	}
-		
+
+	/* Unfortunately, we can have skb from outside world here
+	 * with size insufficient for header. It is impossible to make
+	 * guess when we queue skb, so the decision should be made
+	 * here. Den
+	 */
+	header_size = skb_header_size(sk, tcp_header_size);
+	if (skb->data - header_size < skb->head) {
+		int delta = header_size - skb_headroom(skb);
+		err = pskb_expand_head(skb, SKB_DATA_ALIGN(delta),
+				0, GFP_ATOMIC);
+		if (err)
+			return err;
+	}
+
 	if (tcp_packets_in_flight(tp) == 0)
 		tcp_ca_event(sk, CA_EVENT_TX_START);
 
@@ -489,6 +514,8 @@ static int tcp_transmit_skb(struct sock 
 #undef SYSCTL_FLAG_SACK
 }
 
+EXPORT_SYMBOL(tcp_transmit_skb);
+
 
 /* This routine just queue's the buffer 
  *
@@ -554,15 +581,21 @@ int tcp_fragment(struct sock *sk, struct
 	if (nsize < 0)
 		nsize = 0;
 
-	if (skb_cloned(skb) &&
-	    skb_is_nonlinear(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-		return -ENOMEM;
+	if (skb_cloned(skb) && skb_is_nonlinear(skb)) {
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			return -ENOMEM;
+		ub_skb_uncharge(skb);
+		ub_tcpsndbuf_charge_forced(sk, skb);
+	}
 
 	/* Get a new skb... force flag on. */
 	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
 	if (buff == NULL)
 		return -ENOMEM; /* We'll just try again later. */
+	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
+		kfree_skb(buff);
+		return -ENOMEM;
+	}
 
 	sk->sk_wmem_queued += buff->truesize;
 	sk_mem_charge(sk, buff->truesize);
@@ -859,7 +892,7 @@ unsigned int tcp_current_mss(struct sock
 	return mss_now;
 }
 
-EXPORT_SYMBOL_GPL(tcp_current_mss);
+EXPORT_SYMBOL(tcp_current_mss);
 
 /* Congestion window validation. (RFC2861) */
 
@@ -1072,6 +1105,11 @@ static int tso_fragment(struct sock *sk,
 	if (unlikely(buff == NULL))
 		return -ENOMEM;
 
+	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
+		kfree_skb(buff);
+		return -ENOMEM;
+	}
+
 	sk->sk_wmem_queued += buff->truesize;
 	sk_mem_charge(sk, buff->truesize);
 	buff->truesize += nlen;
@@ -1396,7 +1434,7 @@ void __tcp_push_pending_frames(struct so
 			tcp_check_probe_timer(sk, tp);
 	}
 }
-EXPORT_SYMBOL_GPL(__tcp_push_pending_frames);
+EXPORT_SYMBOL(__tcp_push_pending_frames);
 
 /* Send _single_ skb sitting at the send head. This function requires
  * true push pending frames to setup probe timer etc.
@@ -1437,6 +1475,8 @@ void tcp_push_one(struct sock *sk, unsig
 	}
 }
 
+EXPORT_SYMBOL(tcp_push_one);
+
 /* This function returns the amount that we can raise the
  * usable window based on the following constraints
  *  
@@ -1510,7 +1550,7 @@ u32 __tcp_select_window(struct sock *sk)
 	if (free_space < full_space/2) {
 		icsk->icsk_ack.quick = 0;
 
-		if (tcp_memory_pressure)
+		if (ub_tcp_shrink_rcvbuf(sk))
 			tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
 
 		if (free_space < mss)
@@ -1944,6 +1984,7 @@ void tcp_send_fin(struct sock *sk)
 				break;
 			yield();
 		}
+		ub_tcpsndbuf_charge_forced(sk, skb);
 
 		/* Reserve space for headers and prepare control bits. */
 		skb_reserve(skb, MAX_TCP_HEADER);
@@ -1961,6 +2002,7 @@ void tcp_send_fin(struct sock *sk)
 	}
 	__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF);
 }
+EXPORT_SYMBOL(tcp_send_fin);
 
 /* We get here when a process closes a file descriptor (either due to
  * an explicit close() or as a byproduct of exit()'ing) and there
@@ -1995,6 +2037,7 @@ void tcp_send_active_reset(struct sock *
 	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
 }
+EXPORT_SYMBOL(tcp_send_active_reset);
 
 /* WARNING: This routine must only be called when we have already sent
  * a SYN packet that crossed the incoming SYN that caused this routine
@@ -2015,6 +2058,10 @@ int tcp_send_synack(struct sock *sk)
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
 				return -ENOMEM;
+			if (ub_tcpsndbuf_charge(sk, nskb) < 0) {
+				kfree_skb(nskb);
+				return -ENOMEM;
+			}
 			__skb_unlink(skb, &sk->sk_write_queue);
 			skb_header_release(nskb);
 			__skb_queue_head(&sk->sk_write_queue, nskb);
@@ -2114,6 +2161,7 @@ static void tcp_connect_init(struct sock
 	struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u8 rcv_wscale;
+	static int once = 0;
 
 	/* We'll fix this up when we get a response from the other end.
 	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
@@ -2128,9 +2176,23 @@ static void tcp_connect_init(struct sock
 	tcp_mtup_init(sk);
 	tcp_sync_mss(sk, dst_mtu(dst));
 
+	if (!once && dst_metric(dst, RTAX_ADVMSS) == 0) {
+		once = 1;
+
+		printk("Oops in connect_init! dst->advmss=%d\n",
+						dst_metric(dst, RTAX_ADVMSS));
+		printk("dst: pmtu=%u\n", dst_metric(dst, RTAX_MTU));
+		printk("sk->state=%d, tp: ack.rcv_mss=%d, mss_cache=%d, "
+				"advmss=%d, user_mss=%d\n",
+				sk->sk_state, inet_csk(sk)->icsk_ack.rcv_mss,
+				tp->mss_cache, tp->advmss, tp->rx_opt.user_mss);
+	}
+
 	if (!tp->window_clamp)
 		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
 	tp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	if (tp->advmss == 0)
+		tp->advmss = 1460;
 	tcp_initialize_rcv_mss(sk);
 
 	tcp_select_initial_window(tcp_full_space(sk),
@@ -2171,6 +2233,10 @@ int tcp_connect(struct sock *sk)
 	buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
 	if (unlikely(buff == NULL))
 		return -ENOBUFS;
+	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
+		kfree_skb(buff);
+		return -ENOBUFS;
+	}
 
 	/* Reserve space for headers. */
 	skb_reserve(buff, MAX_TCP_HEADER);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/tcp_timer.c kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp_timer.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/tcp_timer.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/tcp_timer.c	2017-01-13 08:40:23.000000000 -0500
@@ -22,6 +22,8 @@
 
 #include <linux/module.h>
 #include <net/tcp.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_tcp.h>
 
 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
@@ -32,6 +34,8 @@ int sysctl_tcp_retries1 = TCP_RETR1;
 int sysctl_tcp_retries2 = TCP_RETR2;
 int sysctl_tcp_orphan_retries;
 
+EXPORT_SYMBOL(sysctl_tcp_keepalive_time);
+
 static void tcp_write_timer(unsigned long);
 static void tcp_delack_timer(unsigned long);
 static void tcp_keepalive_timer (unsigned long data);
@@ -67,7 +71,8 @@ static void tcp_write_err(struct sock *s
 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int orphans = atomic_read(&tcp_orphan_count);
+	int orphans = ub_get_orphan_count(sk);
+	int orph = orphans;
 
 	/* If peer does not open window for long time, or did not transmit 
 	 * anything for long time, penalize it. */
@@ -78,12 +83,16 @@ static int tcp_out_of_resources(struct s
 	if (sk->sk_err_soft)
 		orphans <<= 1;
 
-	if (orphans >= sysctl_tcp_max_orphans ||
-	    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-	     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
-		if (net_ratelimit())
-			printk(KERN_INFO "Out of socket memory\n");
-
+	if (ub_too_many_orphans(sk, orphans)) {
+		if (net_ratelimit()) {
+			int ubid = 0;
+#ifdef CONFIG_USER_RESOURCE
+			ubid = sock_has_ubc(sk) ?
+				top_beancounter(sock_bc(sk)->ub)->ub_uid : 0;
+#endif
+			printk(KERN_INFO "Orphaned socket dropped "
+			       "(%d,%d in CT%d)\n", orph, orphans, ubid);
+		}
 		/* Catch exceptional cases, when connection requires reset.
 		 *      1. Last segment was sent recently. */
 		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
@@ -207,9 +216,12 @@ static int tcp_write_timeout(struct sock
 static void tcp_delack_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock*)data;
+	struct ve_struct *env;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	env = set_exec_env(sk->owner_env);
+
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
 		/* Try again later. */
@@ -258,13 +270,16 @@ static void tcp_delack_timer(unsigned lo
 	TCP_CHECK_TIMER(sk);
 
 out:
-	if (tcp_memory_pressure)
+	if (ub_tcp_memory_pressure(sk))
 		sk_mem_reclaim(sk);
 out_unlock:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
 
+EXPORT_SYMBOL(tcp_delack_timer);
+
 static void tcp_probe_timer(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -314,11 +329,14 @@ static void tcp_probe_timer(struct sock 
  *	The TCP retransmit timer.
  */
 
-static void tcp_retransmit_timer(struct sock *sk)
+static noinline void tcp_retransmit_timer(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct ve_struct *env;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	env = set_exec_env(sk->owner_env);
+
 	if (!tp->packets_out)
 		goto out;
 
@@ -415,15 +433,21 @@ out_reset_timer:
 	if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0))
 		__sk_dst_reset(sk);
 
-out:;
+out:
+	(void)set_exec_env(env);
 }
 
+EXPORT_SYMBOL(tcp_retransmit_timer);
+
 static void tcp_write_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock*)data;
+	struct ve_struct *env;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	int event;
 
+	env = set_exec_env(sk->owner_env);
+
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
 		/* Try again later */
@@ -457,8 +481,11 @@ out:
 out_unlock:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
 
+EXPORT_SYMBOL(tcp_write_timer);
+
 /*
  *	Timer for listening sockets
  */
@@ -484,10 +511,13 @@ void tcp_set_keepalive(struct sock *sk, 
 static void tcp_keepalive_timer (unsigned long data)
 {
 	struct sock *sk = (struct sock *) data;
+	struct ve_struct *env;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 elapsed;
 
+	env = set_exec_env(sk->owner_env);
+
 	/* Only process if socket is not in use. */
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
@@ -567,4 +597,7 @@ death:	
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
+
+EXPORT_SYMBOL(tcp_keepalive_timer);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv4/udp.c kernel-2.6.18-417.el5-028stab121/net/ipv4/udp.c
--- kernel-2.6.18-417.el5.orig/net/ipv4/udp.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv4/udp.c	2017-01-13 08:40:40.000000000 -0500
@@ -117,7 +117,8 @@
 DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
 EXPORT_SYMBOL(udp_statistics);
 
-struct hlist_head udp_hash[UDP_HTABLE_SIZE];
+struct hlist_head *udp_hash;
+unsigned int udp_hash_size;    /* must be power of 2 */
 DEFINE_RWLOCK(udp_hash_lock);
 
 int sysctl_udp_mem[3] __read_mostly;
@@ -131,16 +132,27 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
 atomic_t udp_memory_allocated;
 EXPORT_SYMBOL(udp_memory_allocated);
 
-static int udp_v4_get_port(struct sock *sk, unsigned short snum)
+/**
+ *  udp_get_port  -  common port lookup for IPv4 and IPv6
+ *
+ *  @sk:          socket struct in question
+ *  @snum:        port number to look up
+ *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
+ */
+int udp_get_port(struct sock *sk, unsigned short snum,
+		 int (*saddr_cmp)(const struct sock *, const struct sock *))
 {
 	struct hlist_node *node;
 	struct sock *sk2;
 	struct inet_sock *inet = inet_sk(sk);
+	struct ve_struct *env;
 
+	env = sk->owner_env;
 	write_lock_bh(&udp_hash_lock);
 	if (!snum) {
-		int i, low, high, remaining;
+		int low, high;
 		unsigned rover, best, best_size_so_far;
+		unsigned int i, iterations, remaining;
 
 		inet_get_local_port_range(&low, &high);
 		remaining = (high - low) + 1;
@@ -148,16 +160,18 @@ static int udp_v4_get_port(struct sock *
 		best_size_so_far = UINT_MAX;
 		best = rover = net_random() % remaining + low;
 
-		if (!udp_lport_inuse(rover) &&
+		if (!udp_lport_inuse(rover, env) &&
 		    !inet_is_reserved_local_port(rover))
 			goto gotit;
 
+		iterations = min(udp_hash_size, remaining);
+
 		/* 1st pass: look for empty (or shortest) hash chain */
-		for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+		for (i = 0; i < iterations; i++) {
 			struct hlist_head *list;
 			int size = 0;
 
-			list = &udp_hash[rover & (UDP_HTABLE_SIZE - 1)];
+			list = &udp_hash[udp_hashfn(rover, VEID(env))];
 			if (hlist_empty(list) &&
 			    !inet_is_reserved_local_port(rover))
 				goto gotit;
@@ -169,21 +183,25 @@ static int udp_v4_get_port(struct sock *
 			best = rover;
 		next:
 			/* fold back if end of range */
-			if (++rover > high)
+			if (++rover > high) {
 				rover = low + ((rover - low)
-				            & (UDP_HTABLE_SIZE - 1));
+				            & (udp_hash_size - 1));
+				if (rover >= high)
+					rover = low;
+			}
 		}
 		/* 2nd pass: find hole in shortest hash chain */
 		rover = best;
-		for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
-			if (!udp_lport_inuse(rover) &&
+		do {
+			if (!udp_lport_inuse(rover, env) &&
 			    !inet_is_reserved_local_port(rover))
 				goto gotit;
-			rover += UDP_HTABLE_SIZE;
+			rover += udp_hash_size;
 			if (rover > high)
 				rover = low + ((rover - low)
-				            & (UDP_HTABLE_SIZE - 1));
-		}
+				            & (udp_hash_size - 1));
+		} while (rover != best);
+
 		/* All ports in use! */
 		goto fail;
 
@@ -191,25 +209,24 @@ gotit:
 		snum = rover;
 	} else {
 		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
+			    &udp_hash[udp_hashfn(snum, VEID(env))]) {
 			struct inet_sock *inet2 = inet_sk(sk2);
 
 			if (inet2->num == snum &&
 			    sk2 != sk &&
-			    !ipv6_only_sock(sk2) &&
+			    ve_accessible_strict(sk2->owner_env, env) &&
 			    (!sk2->sk_bound_dev_if ||
 			     !sk->sk_bound_dev_if ||
 			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-			    (!inet2->rcv_saddr ||
-			     !inet->rcv_saddr ||
-			     inet2->rcv_saddr == inet->rcv_saddr) &&
-			    (!sk2->sk_reuse || !sk->sk_reuse))
+			    (*saddr_cmp)(sk, sk2) &&
+			    (sk->sk_reuse != 2 &&
+			     (!sk2->sk_reuse || !sk->sk_reuse)))
 				goto fail;
 		}
 	}
 	inet->num = snum;
 	if (sk_unhashed(sk)) {
-		struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+		struct hlist_head *h = &udp_hash[udp_hashfn(snum, VEID(env))];
 
 		sk_add_node(sk, h);
 		sock_prot_inc_use(sk->sk_prot);
@@ -222,6 +239,20 @@ fail:
 	return 1;
 }
 
+static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+{
+	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
+
+	return 	(!ipv6_only_sock(sk2)  &&
+		 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
+		  inet1->rcv_saddr == inet2->rcv_saddr));
+}
+
+static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+	return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
+}
+
 static void udp_v4_hash(struct sock *sk)
 {
 	BUG();
@@ -247,11 +278,15 @@ static struct sock *udp_v4_lookup_longwa
 	struct hlist_node *node;
 	unsigned short hnum = ntohs(dport);
 	int badness = -1;
+	struct ve_struct *env;
 
-	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+	env = get_exec_env();
+	sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (inet->num == hnum && !ipv6_only_sock(sk)) {
+		if (inet->num == hnum &&
+		    ve_accessible_strict(sk->owner_env, env) &&
+		    !ipv6_only_sock(sk)) {
 			int score = (sk->sk_family == PF_INET ? 1 : 0);
 			if (inet->rcv_saddr) {
 				if (inet->rcv_saddr != daddr)
@@ -1126,7 +1161,8 @@ static int udp_v4_mcast_deliver(struct s
 	int dif;
 
 	read_lock(&udp_hash_lock);
-	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
+				VEID(skb->owner_env))]);
 	dif = skb->dev->ifindex;
 	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (sk) {
@@ -1482,10 +1518,14 @@ static struct sock *udp_get_first(struct
 {
 	struct sock *sk;
 	struct udp_iter_state *state = seq->private;
+	struct ve_struct *env;
 
-	for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
+	env = get_exec_env();
+	for (state->bucket = 0; state->bucket < udp_hash_size; ++state->bucket) {
 		struct hlist_node *node;
 		sk_for_each(sk, node, &udp_hash[state->bucket]) {
+			if (!ve_accessible(sk->owner_env, env))
+				continue;
 			if (sk->sk_family == state->family)
 				goto found;
 		}
@@ -1502,10 +1542,15 @@ static struct sock *udp_get_next(struct 
 	do {
 		sk = sk_next(sk);
 try_again:
-		;
-	} while (sk && sk->sk_family != state->family);
+		if (!sk)
+			break;
+		if (sk->sk_family != state->family)
+			continue;
+		if (ve_accessible(sk->owner_env, get_exec_env()))
+			break;
+	} while (1);
 
-	if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
+	if (!sk && ++state->bucket < udp_hash_size) {
 		sk = sk_head(&udp_hash[state->bucket]);
 		goto try_again;
 	}
@@ -1588,7 +1633,7 @@ int udp_proc_register(struct udp_seq_afi
 	afinfo->seq_fops->llseek	= seq_lseek;
 	afinfo->seq_fops->release	= seq_release_private;
 
-	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+	p = proc_glob_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
 	if (p)
 		p->data = afinfo;
 	else
@@ -1600,7 +1645,8 @@ void udp_proc_unregister(struct udp_seq_
 {
 	if (!afinfo)
 		return;
-	proc_net_remove(afinfo->name);
+
+	remove_proc_glob_entry(afinfo->name, NULL);
 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
 }
 
@@ -1643,7 +1689,7 @@ static int udp4_seq_show(struct seq_file
 static struct file_operations udp4_seq_fops;
 static struct udp_seq_afinfo udp4_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "udp",
+	.name		= "net/udp",
 	.family		= AF_INET,
 	.seq_show	= udp4_seq_show,
 	.seq_fops	= &udp4_seq_fops,
@@ -1660,9 +1706,21 @@ void udp4_proc_exit(void)
 }
 #endif /* CONFIG_PROC_FS */
 
+static __initdata unsigned long uhash_entries;
+static int __init set_uhash_entries(char *str)
+{
+	if (!str)
+		return 0;
+
+	uhash_entries = simple_strtoul(str, &str, 0);
+	return 1;
+}
+__setup("uhash_entries=", set_uhash_entries);
+
 void __init udp_init(void)
 {
 	unsigned long limit;
+	unsigned int i;
 
 	/* Set the pressure threshold up by the same strategy of TCP. It is a
 	 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
@@ -1675,14 +1733,27 @@ void __init udp_init(void)
 	sysctl_udp_mem[1] = limit;
 	sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
 
+	udp_hash = alloc_large_system_hash("UDP bind", sizeof(struct list_head),
+					   uhash_entries,
+					   (num_physpages >= 256 * 1024) ? 17 : 19,
+					   0,
+					   &udp_hash_size,
+					   NULL,
+					   64*1024);
+	udp_hash_size = 1 << udp_hash_size;
+	for (i = 0; i < udp_hash_size; i++)
+		INIT_HLIST_HEAD(udp_hash + i);
+
 	sysctl_udp_rmem_min = SK_MEM_QUANTUM;
 	sysctl_udp_wmem_min = SK_MEM_QUANTUM;
 }
 
 EXPORT_SYMBOL(udp_disconnect);
 EXPORT_SYMBOL(udp_hash);
+EXPORT_SYMBOL(udp_hash_size);
 EXPORT_SYMBOL(udp_hash_lock);
 EXPORT_SYMBOL(udp_ioctl);
+EXPORT_SYMBOL(udp_get_port);
 EXPORT_SYMBOL(udp_prot);
 EXPORT_SYMBOL(udp_sendmsg);
 EXPORT_SYMBOL(udp_poll);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/addrconf.c kernel-2.6.18-417.el5-028stab121/net/ipv6/addrconf.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/addrconf.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/addrconf.c	2017-01-13 08:40:41.000000000 -0500
@@ -99,6 +99,7 @@
 #define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
 
 #ifdef CONFIG_SYSCTL
+static struct addrconf_sysctl_table * __addrconf_sysctl_register(struct inet6_dev *idev, struct net_device_extended *ext, char *devname, int ifindex, struct ipv6_devconf *p);
 static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
 static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
 #endif
@@ -130,8 +131,6 @@ static DEFINE_SPINLOCK(addrconf_verify_l
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
 
-static int addrconf_ifdown(struct net_device *dev, int how);
-
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
 static void addrconf_dad_timer(unsigned long data);
@@ -147,7 +146,7 @@ static int ipv6_chk_same_addr(const stru
 
 static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
 
-struct ipv6_devconf ipv6_devconf = {
+struct ipv6_devconf global_ipv6_devconf = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -183,7 +182,7 @@ struct ipv6_devconf_extensions ipv6_devc
 	.accept_dad = 1,
 };
 
-struct ipv6_devconf ipv6_devconf_dflt = {
+struct ipv6_devconf global_ipv6_devconf_dflt = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -220,6 +219,12 @@ struct ipv6_devconf_extensions ipv6_devc
 
 int sysctl_ip6_odad = 0;
 
+#ifdef CONFIG_VE
+#define ipv6_devconf_dflt	(*(get_exec_env()->_ipv6_devconf_dflt))
+#else
+#define ipv6_devconf_dflt	global_ipv6_devconf_dflt
+#endif
+
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
@@ -430,9 +435,8 @@ static struct inet6_dev * ipv6_add_dev(s
 	    dev->type == ARPHRD_TUNNEL ||
 	    dev->type == ARPHRD_NONE ||
 	    dev->type == ARPHRD_SIT) {
-		printk(KERN_INFO
-		       "%s: Disabled Privacy Extensions\n",
-		       dev->name);
+		ADBG((KERN_INFO "%s: Disabled Privacy Extensions\n",
+			dev->name));
 		ndev->cnf.use_tempaddr = -1;
 	} else {
 		in6_dev_hold(ndev);
@@ -519,8 +523,8 @@ static void addrconf_forward_change(void
 		rcu_read_lock();
 		idev = __in6_dev_get(dev);
 		if (idev) {
-			int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
-			idev->cnf.forwarding = ipv6_devconf.forwarding;
+			int changed = (!idev->cnf.forwarding) ^ (!ve_ipv6_devconf.forwarding);
+			idev->cnf.forwarding = ve_ipv6_devconf.forwarding;
 			if (changed)
 				dev_forward_change(idev);
 		}
@@ -615,7 +619,7 @@ ipv6_add_addr(struct inet6_dev *idev, co
 		goto out;
 	}
 
-	ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
+	ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC_UBC);
 
 	if (ifa == NULL) {
 		ADBG(("ipv6_add_addr: malloc failed\n"));
@@ -783,12 +787,12 @@ static void ipv6_del_addr(struct inet6_i
 	}
 	write_unlock_bh(&idev->lock);
 
+	addrconf_del_timer(ifp);
+
 	ipv6_ifa_notify(RTM_DELADDR, ifp);
 
 	atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp);
 
-	addrconf_del_timer(ifp);
-
 	/*
 	 * Purge or update corresponding prefix
 	 *
@@ -1252,9 +1256,10 @@ int ipv6_chk_addr(struct in6_addr *addr,
 	read_lock_bh(&addrconf_hash_lock);
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
-		    !(ifp->flags&IFA_F_TENTATIVE)) {
+		    !(ifp->flags&IFA_F_TENTATIVE) &&
+		    ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env())) {
 			if (dev == NULL || ifp->idev->dev == dev ||
-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
+			    !((ifp->scope&(IFA_LINK|IFA_HOST)) || strict))
 				break;
 		}
 	}
@@ -1270,7 +1275,9 @@ int ipv6_chk_same_addr(const struct in6_
 
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
-			if (dev == NULL || ifp->idev->dev == dev)
+			if ((dev == NULL &&
+			     ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env()))
+			    || ifp->idev->dev == dev)
 				break;
 		}
 	}
@@ -1284,9 +1291,10 @@ struct inet6_ifaddr * ipv6_get_ifaddr(st
 
 	read_lock_bh(&addrconf_hash_lock);
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
-		if (ipv6_addr_equal(&ifp->addr, addr)) {
+		if (ipv6_addr_equal(&ifp->addr, addr) &&
+		    ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env())) {
 			if (dev == NULL || ifp->idev->dev == dev ||
-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+			    !((ifp->scope&(IFA_LINK|IFA_HOST)) || strict)) {
 				in6_ifa_hold(ifp);
 				break;
 			}
@@ -1816,7 +1824,7 @@ ok:
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 			if (sysctl_ip6_odad &&
-			    !ipv6_devconf.forwarding)
+			    !ve_ipv6_devconf.forwarding)
 				addr_flags = IFA_F_OPTIMISTIC;
 #endif
 
@@ -1987,7 +1995,7 @@ err_exit:
 /*
  *	Manual configuration of address on an interface
  */
-static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
+int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
 			  __u32 prefered_lft, __u32 valid_lft)
 {
 	struct inet6_ifaddr *ifp;
@@ -2078,7 +2086,7 @@ int addrconf_add_ifaddr(void __user *arg
 	struct in6_ifreq ireq;
 	int err;
 	
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 	
 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -2096,7 +2104,7 @@ int addrconf_del_ifaddr(void __user *arg
 	struct in6_ifreq ireq;
 	int err;
 	
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -2241,7 +2249,7 @@ static void addrconf_add_linklocal(struc
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	if (sysctl_ip6_odad &&
-	    !ipv6_devconf.forwarding)
+	    !ve_ipv6_devconf.forwarding)
 		addr_flags |= IFA_F_OPTIMISTIC;
 #endif
 
@@ -2500,7 +2508,7 @@ static struct notifier_block ipv6_dev_no
 	.priority = 0
 };
 
-static int addrconf_ifdown(struct net_device *dev, int how)
+int addrconf_ifdown(struct net_device *dev, int how)
 {
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa, **bifa;
@@ -2509,7 +2517,7 @@ static int addrconf_ifdown(struct net_de
 
 	ASSERT_RTNL();
 
-	if (dev == &loopback_dev && how == 1)
+	if ((dev->flags & IFF_LOOPBACK) && how == 1)
 		how = 0;
 
 	rt6_ifdown(dev);
@@ -2522,7 +2530,7 @@ static int addrconf_ifdown(struct net_de
 	/* Step 1: remove reference to ipv6 device from parent device.
 	           Do not dev_put!
 	 */
-	if (how == 1) {
+	if (how) {
 		idev->dead = 1;
 
 		/* protected by rtnl_lock */
@@ -2554,12 +2562,12 @@ static int addrconf_ifdown(struct net_de
 	write_lock_bh(&idev->lock);
 
 	/* Step 3: clear flags for stateless addrconf */
-	if (how != 1)
+	if (!how)
 		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
 
 	/* Step 4: clear address list */
 #ifdef CONFIG_IPV6_PRIVACY
-	if (how == 1 && del_timer(&idev->regen_timer))
+	if (how && del_timer(&idev->regen_timer))
 		in6_dev_put(idev);
 
 	/* clear tempaddr list */
@@ -2602,7 +2610,7 @@ put_ifa:
 
 	/* Step 5: Discard multicast list */
 
-	if (how == 1)
+	if (how)
 		ipv6_mc_destroy_dev(idev);
 	else
 		ipv6_mc_down(idev);
@@ -2613,7 +2621,7 @@ put_ifa:
 	
 	/* Shot the device (if unregistered) */
 
-	if (how == 1) {
+	if (how) {
 #ifdef CONFIG_SYSCTL
 		addrconf_sysctl_unregister(&idev->cnf);
 		neigh_sysctl_unregister(idev->nd_parms);
@@ -2624,10 +2632,12 @@ put_ifa:
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(addrconf_ifdown);
 
 static void addrconf_rs_timer(unsigned long data)
 {
 	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	struct ve_struct *old_env = set_exec_env(ifp->idev->dev->owner_env);
 
 	if (ifp->idev->cnf.forwarding)
 		goto out;
@@ -2666,6 +2676,7 @@ static void addrconf_rs_timer(unsigned l
 
 out:
 	in6_ifa_put(ifp);
+	set_exec_env(old_env);
 }
 
 /*
@@ -2707,7 +2718,8 @@ static void addrconf_dad_start(struct in
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
 	    (ext && ext->ipv6_devconf_ext.accept_dad < 1) ||
 	    !(dev->flags&IFF_MULTICAST) ||
-	    !(ifp->flags&IFA_F_TENTATIVE)) {
+	    !(ifp->flags&IFA_F_TENTATIVE) ||
+	    dev->owner_env->disable_net) {
 		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
 		spin_unlock_bh(&ifp->lock);
 		read_unlock_bh(&idev->lock);
@@ -2748,6 +2760,7 @@ static void addrconf_dad_timer(unsigned 
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr unspec;
 	struct in6_addr mcaddr;
+	struct ve_struct *old_env = set_exec_env(ifp->idev->dev->owner_env);
 
 	if (!ifp->probes && addrconf_dad_end(ifp))
 		goto out;
@@ -2790,6 +2803,7 @@ static void addrconf_dad_timer(unsigned 
 	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
 out:
 	in6_ifa_put(ifp);
+	set_exec_env(old_env);
 }
 
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
@@ -2857,8 +2871,11 @@ static struct inet6_ifaddr *if6_get_firs
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
 		ifa = inet6_addr_lst[state->bucket];
-		if (ifa)
-			break;
+		while (ifa) {
+			if (ve_accessible_strict(ifa->idev->dev->owner_env, get_exec_env()))
+				return ifa;
+			ifa = ifa->lst_next;
+		}
 	}
 	return ifa;
 }
@@ -2869,6 +2886,11 @@ static struct inet6_ifaddr *if6_get_next
 
 	ifa = ifa->lst_next;
 try_again:
+	while (ifa) {
+		if (ve_accessible_strict(ifa->idev->dev->owner_env, get_exec_env()))
+			break;
+		ifa = ifa->lst_next;
+	}
 	if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
 		ifa = inet6_addr_lst[state->bucket];
 		goto try_again;
@@ -2959,14 +2981,14 @@ static struct file_operations if6_fops =
 
 int __init if6_proc_init(void)
 {
-	if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
+	if (!proc_glob_fops_create("net/if_inet6", S_IRUGO, &if6_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void if6_proc_exit(void)
 {
-	proc_net_remove("if_inet6");
+	remove_proc_glob_entry("net/if_inet6", NULL);
 }
 #endif	/* CONFIG_PROC_FS */
 
@@ -2979,6 +3001,7 @@ static void addrconf_verify(unsigned lon
 	struct inet6_ifaddr *ifp;
 	unsigned long now, next;
 	int i;
+	struct ve_struct *old_env;
 
 	spin_lock_bh(&addrconf_verify_lock);
 	now = jiffies;
@@ -2999,6 +3022,8 @@ restart:
 			if (ifp->flags & IFA_F_PERMANENT)
 				continue;
 
+			old_env = set_exec_env(ifp->idev->dev->owner_env);
+
 			spin_lock(&ifp->lock);
 			age = (now - ifp->tstamp) / HZ;
 
@@ -3014,9 +3039,11 @@ restart:
 				in6_ifa_hold(ifp);
 				read_unlock(&addrconf_hash_lock);
 				ipv6_del_addr(ifp);
+				set_exec_env(old_env);
 				goto restart;
 			} else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
 				spin_unlock(&ifp->lock);
+				set_exec_env(old_env);
 				continue;
 			} else if (age >= ifp->prefered_lft) {
 				/* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
@@ -3038,6 +3065,7 @@ restart:
 
 					ipv6_ifa_notify(0, ifp);
 					in6_ifa_put(ifp);
+					set_exec_env(old_env);
 					goto restart;
 				}
 #ifdef CONFIG_IPV6_PRIVACY
@@ -3059,6 +3087,7 @@ restart:
 						ipv6_create_tempaddr(ifpub, ifp);
 						in6_ifa_put(ifpub);
 						in6_ifa_put(ifp);
+						set_exec_env(old_env);
 						goto restart;
 					}
 				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
@@ -3071,6 +3100,7 @@ restart:
 					next = ifp->tstamp + ifp->prefered_lft * HZ;
 				spin_unlock(&ifp->lock);
 			}
+			set_exec_env(old_env);
 		}
 		read_unlock(&addrconf_hash_lock);
 	}
@@ -3801,7 +3831,7 @@ int addrconf_sysctl_forward(ctl_table *c
 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
 	if (write && valp != &ipv6_devconf_dflt.forwarding) {
-		if (valp != &ipv6_devconf.forwarding) {
+		if (valp != &ve_ipv6_devconf.forwarding) {
 			if ((!*valp) ^ (!val)) {
 				struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
 				if (idev == NULL)
@@ -3809,7 +3839,7 @@ int addrconf_sysctl_forward(ctl_table *c
 				dev_forward_change(idev);
 			}
 		} else {
-			ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
+			ipv6_devconf_dflt.forwarding = ve_ipv6_devconf.forwarding;
 			addrconf_forward_change();
 		}
 		if (*valp)
@@ -3852,7 +3882,7 @@ static int addrconf_sysctl_forward_strat
 	}
 
 	if (valp != &ipv6_devconf_dflt.forwarding) {
-		if (valp != &ipv6_devconf.forwarding) {
+		if (valp != &ve_ipv6_devconf.forwarding) {
 			struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
 			int changed;
 			if (unlikely(idev == NULL))
@@ -3966,7 +3996,7 @@ static struct addrconf_sysctl_table
         	{
 			.ctl_name	=	NET_IPV6_FORWARDING,
 			.procname	=	"forwarding",
-         		.data		=	&ipv6_devconf.forwarding,
+         		.data		=	&global_ipv6_devconf.forwarding,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&addrconf_sysctl_forward,
@@ -3975,7 +4005,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_HOP_LIMIT,
 			.procname	=	"hop_limit",
-         		.data		=	&ipv6_devconf.hop_limit,
+         		.data		=	&global_ipv6_devconf.hop_limit,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 			.proc_handler	=	proc_dointvec,
@@ -3983,7 +4013,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_MTU,
 			.procname	=	"mtu",
-			.data		=	&ipv6_devconf.mtu6,
+			.data		=	&global_ipv6_devconf.mtu6,
          		.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3991,7 +4021,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_ACCEPT_RA,
 			.procname	=	"accept_ra",
-         		.data		=	&ipv6_devconf.accept_ra,
+         		.data		=	&global_ipv6_devconf.accept_ra,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3999,7 +4029,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_ACCEPT_REDIRECTS,
 			.procname	=	"accept_redirects",
-         		.data		=	&ipv6_devconf.accept_redirects,
+         		.data		=	&global_ipv6_devconf.accept_redirects,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -4007,7 +4037,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_AUTOCONF,
 			.procname	=	"autoconf",
-         		.data		=	&ipv6_devconf.autoconf,
+         		.data		=	&global_ipv6_devconf.autoconf,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -4015,7 +4045,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_DAD_TRANSMITS,
 			.procname	=	"dad_transmits",
-         		.data		=	&ipv6_devconf.dad_transmits,
+         		.data		=	&global_ipv6_devconf.dad_transmits,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -4023,7 +4053,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_RTR_SOLICITS,
 			.procname	=	"router_solicitations",
-         		.data		=	&ipv6_devconf.rtr_solicits,
+         		.data		=	&global_ipv6_devconf.rtr_solicits,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -4031,7 +4061,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_RTR_SOLICIT_INTERVAL,
 			.procname	=	"router_solicitation_interval",
-         		.data		=	&ipv6_devconf.rtr_solicit_interval,
+         		.data		=	&global_ipv6_devconf.rtr_solicit_interval,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec_jiffies,
@@ -4040,7 +4070,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_RTR_SOLICIT_DELAY,
 			.procname	=	"router_solicitation_delay",
-         		.data		=	&ipv6_devconf.rtr_solicit_delay,
+         		.data		=	&global_ipv6_devconf.rtr_solicit_delay,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec_jiffies,
@@ -4049,7 +4079,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_FORCE_MLD_VERSION,
 			.procname	=	"force_mld_version",
-         		.data		=	&ipv6_devconf.force_mld_version,
+         		.data		=	&global_ipv6_devconf.force_mld_version,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -4075,7 +4105,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_USE_TEMPADDR,
 			.procname	=	"use_tempaddr",
-	 		.data		=	&ipv6_devconf.use_tempaddr,
+	 		.data		=	&global_ipv6_devconf.use_tempaddr,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -4083,7 +4113,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_TEMP_VALID_LFT,
 			.procname	=	"temp_valid_lft",
-	 		.data		=	&ipv6_devconf.temp_valid_lft,
+	 		.data		=	&global_ipv6_devconf.temp_valid_lft,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -4091,7 +4121,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_TEMP_PREFERED_LFT,
 			.procname	=	"temp_prefered_lft",
-	 		.data		=	&ipv6_devconf.temp_prefered_lft,
+	 		.data		=	&global_ipv6_devconf.temp_prefered_lft,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -4099,7 +4129,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_REGEN_MAX_RETRY,
 			.procname	=	"regen_max_retry",
-	 		.data		=	&ipv6_devconf.regen_max_retry,
+	 		.data		=	&global_ipv6_devconf.regen_max_retry,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -4107,7 +4137,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_MAX_DESYNC_FACTOR,
 			.procname	=	"max_desync_factor",
-	 		.data		=	&ipv6_devconf.max_desync_factor,
+	 		.data		=	&global_ipv6_devconf.max_desync_factor,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -4116,7 +4146,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_MAX_ADDRESSES,
 			.procname	=	"max_addresses",
-			.data		=	&ipv6_devconf.max_addresses,
+			.data		=	&global_ipv6_devconf.max_addresses,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 			.proc_handler	=	&proc_dointvec,
@@ -4124,7 +4154,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_ACCEPT_RA_DEFRTR,
 			.procname	=	"accept_ra_defrtr",
-         		.data		=	&ipv6_devconf.accept_ra_defrtr,
+         		.data		=	&global_ipv6_devconf.accept_ra_defrtr,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -4132,7 +4162,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_ACCEPT_RA_PINFO,
 			.procname	=	"accept_ra_pinfo",
-         		.data		=	&ipv6_devconf.accept_ra_pinfo,
+         		.data		=	&global_ipv6_devconf.accept_ra_pinfo,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -4141,7 +4171,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_ACCEPT_RA_RTR_PREF,
 			.procname	=	"accept_ra_rtr_pref",
-			.data		=	&ipv6_devconf.accept_ra_rtr_pref,
+			.data		=	&global_ipv6_devconf.accept_ra_rtr_pref,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 			.proc_handler	=	&proc_dointvec,
@@ -4149,7 +4179,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_RTR_PROBE_INTERVAL,
 			.procname	=	"router_probe_interval",
-			.data		=	&ipv6_devconf.rtr_probe_interval,
+			.data		=	&global_ipv6_devconf.rtr_probe_interval,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 			.proc_handler	=	&proc_dointvec_jiffies,
@@ -4159,7 +4189,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN,
 			.procname	=	"accept_ra_rt_info_max_plen",
-			.data		=	&ipv6_devconf.accept_ra_rt_info_max_plen,
+			.data		=	&global_ipv6_devconf.accept_ra_rt_info_max_plen,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 			.proc_handler	=	&proc_dointvec,
@@ -4216,26 +4246,28 @@ static struct addrconf_sysctl_table
 	},
 };
 
-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+static struct addrconf_sysctl_table *
+__addrconf_sysctl_register(struct inet6_dev *idev,
+		struct net_device_extended *ext,
+		char *dev_name, int ifindex,
+		struct ipv6_devconf *p)
 {
 	int i;
-	struct net_device *dev = idev ? idev->dev : NULL;
 	struct addrconf_sysctl_table *t;
-	struct net_device_extended *ext;
 	struct ipv6_devconf_extensions *dext;
-	char *dev_name = NULL;
 
 	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (t == NULL)
-		return;
+		return NULL;
+
 	memcpy(t, &addrconf_sysctl, sizeof(*t));
 	for (i=0; t->addrconf_vars[i].data; i++) {
 		if (t->addrconf_vars[i].ctl_name <= NET_IPV6_SYSCTL_EXTENDED_BOUNDARY) {
-			t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+			t->addrconf_vars[i].data += (char*)p - (char*)&global_ipv6_devconf;
 			t->addrconf_vars[i].de = NULL;
 			t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
+			t->addrconf_vars[i].owner_env = get_exec_env();
 		} else {
-			ext = (dev == NULL) ? NULL : dev_extended(dev);
 			if (ext == NULL) {
 				dext = &ipv6_devconf_extensions_dflt;
 				if (p != &ipv6_devconf_dflt)
@@ -4256,6 +4288,9 @@ static void addrconf_sysctl_register(str
 			 */
 			t->addrconf_vars[i].de = NULL;
 			t->addrconf_vars[i].extra1 = idev;
+			t->addrconf_vars[i].owner_env = get_exec_env();
+			if (!ve_is_super(get_exec_env()))
+				t->addrconf_vars[i].mode = 0444;
 			switch (t->addrconf_vars[i].ctl_name) {
 			case NET_IPV6_DISABLE:
 				t->addrconf_vars[i].data = &dext->disable_ipv6;
@@ -4269,13 +4304,6 @@ static void addrconf_sysctl_register(str
 		}
 
 	}
-	if (dev) {
-		dev_name = dev->name; 
-		t->addrconf_dev[0].ctl_name = dev->ifindex;
-	} else {
-		dev_name = "default";
-		t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-	}
 
 	/* 
 	 * Make a copy of dev_name, because '.procname' is regarded as const 
@@ -4286,6 +4314,7 @@ static void addrconf_sysctl_register(str
 	if (!dev_name)
 	    goto free;
 
+	t->addrconf_dev[0].ctl_name = ifindex;
 	t->addrconf_dev[0].procname = dev_name;
 
 	t->addrconf_dev[0].child = t->addrconf_vars;
@@ -4300,9 +4329,7 @@ static void addrconf_sysctl_register(str
 	t->sysctl_header = register_sysctl_table(t->addrconf_root_dir, 0);
 	if (t->sysctl_header == NULL)
 		goto free_procname;
-	else
-		p->sysctl = t;
-	return;
+	return t;
 
 	/* error path */
  free_procname:
@@ -4310,7 +4337,21 @@ static void addrconf_sysctl_register(str
  free:
 	kfree(t);
 
-	return;
+	return NULL;
+}
+
+static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+{
+	struct net_device *dev;
+	char *dev_name;
+	int ifindex;
+
+	dev = idev->dev;
+	dev_name = dev->name; 
+	ifindex = dev->ifindex;
+
+	p->sysctl = __addrconf_sysctl_register(idev, dev_extended(dev),
+			dev_name, ifindex, p);
 }
 
 static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
@@ -4324,9 +4365,61 @@ static void addrconf_sysctl_unregister(s
 	}
 }
 
+#ifdef CONFIG_VE
+int addrconf_sysctl_init(struct ve_struct *ve)
+{
+	int err = 0;
+	struct ipv6_devconf *conf, *conf_def;
+
+	err = -ENOMEM;
 
-#endif
+	conf = kmalloc(sizeof(*conf), GFP_KERNEL);
+	if (!conf)
+		goto err1;
+
+	memcpy(conf, &global_ipv6_devconf, sizeof(*conf));
+	conf->sysctl = __addrconf_sysctl_register(NULL, NULL, "all",
+			NET_PROTO_CONF_ALL, conf);
+	if (!conf->sysctl)
+		goto err2;
+
+	conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
+	if (!conf_def)
+		goto err3;
+
+	memcpy(conf_def, &global_ipv6_devconf_dflt, sizeof(*conf_def));
+	conf_def->sysctl = __addrconf_sysctl_register(NULL, NULL, "default",
+			NET_PROTO_CONF_DEFAULT, conf_def);
+	if (!conf_def->sysctl)
+		goto err4;
 
+	ve->_ipv6_devconf = conf;
+	ve->_ipv6_devconf_dflt = conf_def;
+	return 0;
+
+err4:
+	kfree(conf_def);
+err3:
+	addrconf_sysctl_unregister(conf);
+err2:
+	kfree(conf);
+err1:
+	return err;
+}
+
+void addrconf_sysctl_fini(struct ve_struct *ve)
+{
+	addrconf_sysctl_unregister(ve->_ipv6_devconf);
+	addrconf_sysctl_unregister(ve->_ipv6_devconf_dflt);
+}
+
+void addrconf_sysctl_free(struct ve_struct *ve)
+{
+	kfree(ve->_ipv6_devconf);
+	kfree(ve->_ipv6_devconf_dflt);
+}
+#endif /* CONFIG_VE */
+#endif /* CONFIG_SYSCTL */
 /*
  *      Device notifier
  */
@@ -4355,6 +4448,11 @@ int __init addrconf_init(void)
 		return err;
 	}
 
+#ifdef CONFIG_VE
+	get_ve0()->_ipv6_devconf = &global_ipv6_devconf;
+	get_ve0()->_ipv6_devconf_dflt = &global_ipv6_devconf_dflt;
+#endif
+
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
 	 * before it can bring up and give link-local addresses
@@ -4390,7 +4488,8 @@ int __init addrconf_init(void)
 #ifdef CONFIG_SYSCTL
 	addrconf_sysctl.sysctl_header =
 		register_sysctl_table(addrconf_sysctl.addrconf_root_dir, 0);
-	addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
+	__addrconf_sysctl_register(NULL, NULL, "default",
+			NET_PROTO_CONF_DEFAULT, &global_ipv6_devconf_dflt);
 #endif
 
 	return 0;
@@ -4407,8 +4506,8 @@ void __exit addrconf_cleanup(void)
 
 	rtnetlink_links[PF_INET6] = NULL;
 #ifdef CONFIG_SYSCTL
-	addrconf_sysctl_unregister(&ipv6_devconf_dflt);
-	addrconf_sysctl_unregister(&ipv6_devconf);
+	addrconf_sysctl_unregister(&global_ipv6_devconf_dflt);
+	addrconf_sysctl_unregister(&global_ipv6_devconf);
 #endif
 
 	rtnl_lock();
@@ -4448,6 +4547,6 @@ void __exit addrconf_cleanup(void)
 	rtnl_unlock();
 
 #ifdef CONFIG_PROC_FS
-	proc_net_remove("if_inet6");
+	remove_proc_glob_entry("net/if_inet6", NULL);
 #endif
 }
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/af_inet6.c kernel-2.6.18-417.el5-028stab121/net/ipv6/af_inet6.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/af_inet6.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/af_inet6.c	2017-01-13 08:40:40.000000000 -0500
@@ -56,9 +56,13 @@
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
+#include <net/inet6_connection_sock.h>
 #ifdef CONFIG_IPV6_TUNNEL
 #include <net/ip6_tunnel.h>
 #endif
+#include <ub/ub_net.h>
+
+#include <linux/ve_proto.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -83,7 +87,7 @@ MODULE_PARM_DESC(disable, "Disable IPv6 
 module_param_named(disable_ipv6, ipv6_devconf_extensions_dflt.disable_ipv6, int, 0444);
 MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
 
-module_param_named(autoconf, ipv6_devconf_dflt.autoconf, int, 0444);
+module_param_named(autoconf, global_ipv6_devconf_dflt.autoconf, int, 0444);
 MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
 
 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
@@ -153,6 +157,10 @@ lookup_protocol:
 			goto out_rcu_unlock;
 	}
 
+	err = vz_security_protocol_check(answer->protocol);
+	if (err < 0)
+		goto out_rcu_unlock;
+
 	err = -EPERM;
 	if (answer->capability > 0 && !capable(answer->capability))
 		goto out_rcu_unlock;
@@ -170,6 +178,13 @@ lookup_protocol:
 	if (sk == NULL)
 		goto out;
 
+	err = -ENOBUFS;
+	if (ub_sock_charge(sk, PF_INET6, sock->type))
+		goto out_sk_free;
+	/* if charge was successful, sock_init_data() MUST be called to
+	 * set sk->sk_type. otherwise sk will be uncharged to wrong resource
+	 */
+
 	sock_init_data(sock, sk);
 
 	err = 0;
@@ -244,6 +259,9 @@ out:
 out_rcu_unlock:
 	rcu_read_unlock();
 	goto out;
+out_sk_free:
+	sk_free(sk);
+	return err;
 }
 
 
@@ -736,16 +754,16 @@ snmp6_mib_free(void *ptr[2])
 
 static int __init init_ipv6_mibs(void)
 {
-	if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
+	if (snmp6_mib_init((void **)ve_ipv6_statistics, sizeof (struct ipstats_mib),
 			   __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
-	if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
+	if (snmp6_mib_init((void **)ve_icmpv6_statistics, sizeof (struct icmpv6_mib),
 			   __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp_mib;
 	if (snmp6_mib_init((void **)icmpv6msg_statistics, sizeof (struct icmpv6msg_mib),
 			   __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmpmsg_mib;
-	if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
+	if (snmp6_mib_init((void **)ve_udp_stats_in6, sizeof (struct udp_mib),
 			   __alignof__(struct udp_mib)) < 0)
 		goto err_udp_mib;
 	return 0;
@@ -753,9 +771,9 @@ static int __init init_ipv6_mibs(void)
 err_udp_mib:
 	snmp6_mib_free((void **)icmpv6msg_statistics);
 err_icmpmsg_mib:
-	snmp6_mib_free((void **)icmpv6_statistics);
+	snmp6_mib_free((void **)ve_icmpv6_statistics);
 err_icmp_mib:
-	snmp6_mib_free((void **)ipv6_statistics);
+	snmp6_mib_free((void **)ve_ipv6_statistics);
 err_ip_mib:
 	return -ENOMEM;
 	
@@ -763,10 +781,29 @@ err_ip_mib:
 
 static void cleanup_ipv6_mibs(void)
 {
-	snmp6_mib_free((void **)ipv6_statistics);
-	snmp6_mib_free((void **)icmpv6_statistics);
-	snmp6_mib_free((void **)udp_stats_in6);
-}
+	snmp6_mib_free((void **)ve_ipv6_statistics);
+	snmp6_mib_free((void **)ve_icmpv6_statistics);
+	snmp6_mib_free((void **)ve_udp_stats_in6);
+}
+
+static struct ve_ipv6_ops ve_ipv6_real_ops = {
+	.snmp_proc_init = ve_snmp_proc_init,
+	.snmp_proc_fini = ve_snmp_proc_fini,
+	.addrconf_sysctl_init = addrconf_sysctl_init,
+	.addrconf_sysctl_fini = addrconf_sysctl_fini,
+	.addrconf_sysctl_free = addrconf_sysctl_free,
+	.ndisc_init = ve_ndisc_init,
+	.ndisc_fini = ve_ndisc_fini,
+	.route_init = init_ve_route6,
+	.route_fini = fini_ve_route6,
+	.ifdown = addrconf_ifdown,
+	.frag_cleanup = ip6_frag_cleanup,
+	.addr_add = inet6_addr_add,
+	.sock_mc_join = ipv6_sock_mc_join,
+	.reqsk_alloc = __inet6_reqsk_alloc,
+	.reqsk_queue = inet6_csk_reqsk_queue_hash_add,
+	.make_sk_mapped = inet6_make_mapped,
+};
 
 static int __init inet6_init(void)
 {
@@ -774,6 +811,11 @@ static int __init inet6_init(void)
         struct list_head *r;
 	int err = 0;
 
+	if (find_module_by_name("vzmon")) {
+		printk(KERN_WARNING "Can't load ipv6 module due to service vz is started\n");
+		return -EBUSY;
+	}
+
 #ifdef MODULE
 #if 0 /* FIXME --RR */
 	if (!mod_member_present(&__this_module, can_unload))
@@ -892,6 +934,7 @@ static int __init inet6_init(void)
 
 	ipv6_packet_init();
 	err = 0;
+	ve_ipv6_ops_init(&ve_ipv6_real_ops);
 out:
 	return err;
 
@@ -942,6 +985,7 @@ static void __exit inet6_exit(void)
 	if (disable_ipv6_mod)
 		return;
 
+	BUG();
 	/* First of all disallow new sockets creation. */
 	sock_unregister(PF_INET6);
 #ifdef CONFIG_PROC_FS
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/anycast.c kernel-2.6.18-417.el5-028stab121/net/ipv6/anycast.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/anycast.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/anycast.c	2017-01-13 08:40:21.000000000 -0500
@@ -82,7 +82,7 @@ int ipv6_sock_ac_join(struct sock *sk, i
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev;
 	struct ipv6_ac_socklist *pac;
-	int	ishost = !ipv6_devconf.forwarding;
+	int	ishost = !ve_ipv6_devconf.forwarding;
 	int	err = 0;
 
 	if (!capable(CAP_NET_ADMIN))
@@ -334,9 +334,7 @@ int ipv6_dev_ac_inc(struct net_device *d
 	idev->ac_list = aca;
 	write_unlock_bh(&idev->lock);
 
-	dst_hold(&rt->u.dst);
-	if (ip6_ins_rt(rt, NULL, NULL, NULL))
-		dst_release(&rt->u.dst);
+	ip6_ins_rt(rt, NULL, NULL, NULL);
 
 	addrconf_join_solict(dev, &aca->aca_addr);
 
@@ -378,10 +376,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
 	dst_hold(&aca->aca_rt->u.dst);
-	if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL))
-		dst_free(&aca->aca_rt->u.dst);
-	else
-		dst_release(&aca->aca_rt->u.dst);
+	ip6_del_rt(aca->aca_rt, NULL, NULL, NULL);
 
 	aca_put(aca);
 	return 0;
@@ -452,6 +447,8 @@ static inline struct ifacaddr6 *ac6_get_
 	     state->dev;
 	     state->dev = state->dev->next) {
 		struct inet6_dev *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in6_dev_get(state->dev);
 		if (!idev)
 			continue;
@@ -482,6 +479,8 @@ static struct ifacaddr6 *ac6_get_next(st
 			state->idev = NULL;
 			break;
 		}
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		state->idev = in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
@@ -576,7 +575,7 @@ static struct file_operations ac6_seq_fo
 
 int __init ac6_proc_init(void)
 {
-	if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops))
+	if (!proc_glob_fops_create("net/anycast6", S_IRUGO, &ac6_seq_fops))
 		return -ENOMEM;
 
 	return 0;
@@ -584,7 +583,7 @@ int __init ac6_proc_init(void)
 
 void ac6_proc_exit(void)
 {
-	proc_net_remove("anycast6");
+	remove_proc_glob_entry("net/anycast6", NULL);
 }
 #endif
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/fib6_rules.c kernel-2.6.18-417.el5-028stab121/net/ipv6/fib6_rules.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/fib6_rules.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/fib6_rules.c	2017-01-13 08:40:22.000000000 -0500
@@ -33,7 +33,12 @@ struct fib6_rule
 	u8			tclass;
 };
 
-static struct fib_rules_ops fib6_rules_ops;
+static struct fib_rules_ops _fib6_rules_ops;
+#ifdef CONFIG_VE
+#define fib6_rules_ops (*get_exec_env()->_fib6_ops)
+#else
+#define fib6_rules_ops _fib6_rules_ops
+#endif
 
 static struct fib6_rule main_rule = {
 	.common = {
@@ -54,7 +59,82 @@ static struct fib6_rule local_rule = {
 	},
 };
 
-static LIST_HEAD(fib6_rules);
+#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
+#ifdef CONFIG_VE
+static inline void init_rule_struct(struct fib6_rule *r,
+		u32 pref, unsigned char table, unsigned char action)
+{
+	memset(r, 0, sizeof(struct fib6_rule));
+	atomic_set(&r->common.refcnt, 1);
+	r->common.pref = pref;
+	r->common.table = table;
+	r->common.action = action;
+}
+#endif
+
+int fib6_rules_create(void)
+{
+#ifdef CONFIG_VE
+	struct fib6_rule *main_rule, *loc_rule;
+	struct fib_rules_ops *ops;
+
+	ops = kmalloc(sizeof(struct fib_rules_ops), GFP_KERNEL_UBC);
+	if (ops == NULL)
+		goto out_ops;
+	memcpy(ops, &_fib6_rules_ops, sizeof(struct fib_rules_ops));
+	INIT_LIST_HEAD(&ops->rules_list);
+
+	main_rule = kmalloc(sizeof(struct fib6_rule), GFP_KERNEL_UBC);
+	if (main_rule == NULL)
+		goto out_main;
+
+	loc_rule = kmalloc(sizeof(struct fib6_rule), GFP_KERNEL_UBC);
+	if (loc_rule == NULL)
+		goto out_loc;
+
+	init_rule_struct(main_rule, 0x7FFE, RT_TABLE_MAIN, RTN_UNICAST);
+	init_rule_struct(loc_rule, 0, RT_TABLE_LOCAL, RTN_UNICAST);
+
+	list_add_tail(&loc_rule->common.list, &ops->rules_list);
+	list_add_tail(&main_rule->common.list, &ops->rules_list);
+
+	get_exec_env()->_fib6_ops = ops;
+	fib_rules_register(ops);
+	return 0;
+
+out_loc:
+	kfree(main_rule);
+out_main:
+	kfree(ops);
+out_ops:
+	return -1;
+#else
+	return 0;
+#endif
+}
+
+void fib6_rules_destroy(void)
+{
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+	struct fib_rule *r;
+	struct list_head *pos, *tmp;
+
+	ve = get_exec_env();
+	rtnl_lock();
+	list_for_each_safe (pos, tmp, &ve->_fib6_ops->rules_list) {
+		r = list_entry(pos, struct fib_rule, list);
+
+		list_del_rcu(pos);
+		fib_rule_put(r);
+	}
+	rtnl_unlock();
+
+	fib_rules_unregister(ve->_fib6_ops);
+	kfree(ve->_fib6_ops);
+#endif
+}
+#endif
 
 struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
 				   pol_lookup_t lookup)
@@ -279,7 +359,7 @@ static u32 fib6_rule_default_pref(void)
 	return 0x3FFF;
 }
 
-static struct fib_rules_ops fib6_rules_ops = {
+static struct fib_rules_ops _fib6_rules_ops = {
 	.family			= AF_INET6,
 	.rule_size		= sizeof(struct fib6_rule),
 	.action			= fib6_rule_action,
@@ -290,19 +370,25 @@ static struct fib_rules_ops fib6_rules_o
 	.default_pref		= fib6_rule_default_pref,
 	.nlgroup		= RTNLGRP_IPV6_RULE,
 	.policy			= fib6_rule_policy,
-	.rules_list		= &fib6_rules,
+	.rules_list		= LIST_HEAD_INIT(_fib6_rules_ops.rules_list),
 	.owner			= THIS_MODULE,
 };
 
 void __init fib6_rules_init(void)
 {
-	list_add_tail(&local_rule.common.list, &fib6_rules);
-	list_add_tail(&main_rule.common.list, &fib6_rules);
+#ifdef CONFIG_VE
+	get_ve0()->_fib6_ops = &_fib6_rules_ops;
+#endif
+	list_add_tail(&local_rule.common.list, &fib6_rules_ops.rules_list);
+	list_add_tail(&main_rule.common.list, &fib6_rules_ops.rules_list);
 
-	fib_rules_register(&fib6_rules_ops);
+	fib_rules_register(&_fib6_rules_ops);
 }
 
 void fib6_rules_cleanup(void)
 {
-	fib_rules_unregister(&fib6_rules_ops);
+	fib_rules_unregister(&_fib6_rules_ops);
+#ifdef CONFIG_VE
+	get_ve0()->_fib6_ops = NULL;
+#endif
 }
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/inet6_connection_sock.c kernel-2.6.18-417.el5-028stab121/net/ipv6/inet6_connection_sock.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/inet6_connection_sock.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/inet6_connection_sock.c	2017-01-13 08:40:22.000000000 -0500
@@ -25,6 +25,8 @@
 #include <net/ip6_route.h>
 #include <net/sock.h>
 #include <net/inet6_connection_sock.h>
+#include <ub/ub_net.h>
+#include <ub/ub_orphan.h>
 
 int inet6_csk_bind_conflict(const struct sock *sk,
 			    const struct inet_bind_bucket *tb)
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/inet6_hashtables.c kernel-2.6.18-417.el5-028stab121/net/ipv6/inet6_hashtables.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/inet6_hashtables.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/inet6_hashtables.c	2017-01-13 08:40:21.000000000 -0500
@@ -69,14 +69,15 @@ struct sock *__inet6_lookup_established(
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport);
+	struct ve_struct *env = get_exec_env();
+	unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport, VEID(env));
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
 
 	prefetch(head->chain.first);
 	read_lock(&head->lock);
 	sk_for_each(sk, node, &head->chain) {
 		/* For IPV6 do the cheaper port and family tests first. */
-		if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif))
+		if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif, env))
 			goto hit; /* You sunk my battleship! */
 	}
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
@@ -89,6 +90,7 @@ struct sock *__inet6_lookup_established(
 
 			if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	&&
 			    ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)	&&
+			    ve_accessible_strict(tw->tw_owner_env, VEID(env)) &&
 			    (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
 				goto hit;
 		}
@@ -111,9 +113,15 @@ struct sock *inet6_lookup_listener(struc
 	const struct hlist_node *node;
 	struct sock *result = NULL;
 	int score, hiscore = 0;
+	struct ve_struct *env;
+
+	env = get_exec_env();
 
 	read_lock(&hashinfo->lhash_lock);
-	sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
+	sk_for_each(sk, node, &hashinfo->listening_hash[
+			inet_lhashfn(hnum, VEID(env))]) {
+		if (!ve_accessible_strict(sk->owner_env, env))
+			continue;
 		if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
 			const struct ipv6_pinfo *np = inet6_sk(sk);
 			
@@ -164,7 +172,8 @@ EXPORT_SYMBOL_GPL(inet6_lookup);
 
 static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 				     struct sock *sk, const __u16 lport,
-				     struct inet_timewait_sock **twp)
+				     struct inet_timewait_sock **twp,
+				     struct ve_struct *ve)
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_sock *inet = inet_sk(sk);
@@ -174,7 +183,7 @@ static int __inet6_check_established(str
 	const int dif = sk->sk_bound_dev_if;
 	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
 	const unsigned int hash = inet6_ehashfn(daddr, lport, saddr,
-						inet->dport);
+						inet->dport, VEID(ve));
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	struct sock *sk2;
 	const struct hlist_node *node;
@@ -194,7 +203,8 @@ static int __inet6_check_established(str
 		   sk2->sk_family	       == PF_INET6	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
+		   ve_accessible_strict(tw->tw_owner_env, VEID(ve))) {
 			if (twsk_unique(sk, sk2, twp))
 				goto unique;
 			else
@@ -205,7 +215,7 @@ static int __inet6_check_established(str
 
 	/* And established part... */
 	sk_for_each(sk2, node, &head->chain) {
-		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
+		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif, ve))
 			goto not_unique;
 	}
 
@@ -258,7 +268,9 @@ int inet6_hash_connect(struct inet_timew
  	struct inet_bind_hashbucket *head;
  	struct inet_bind_bucket *tb;
 	int ret;
+	struct ve_struct *ve;
 
+	ve = sk->owner_env;
  	if (snum == 0) {
  		int i, port, low, high, remaining;
 		static u32 hint;
@@ -274,7 +286,8 @@ int inet6_hash_connect(struct inet_timew
 			port = low + (i + offset) % remaining;
 			if (inet_is_reserved_local_port(port))
 				continue;
- 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			head = &hinfo->bhash[inet_bhashfn(port,
+				hinfo->bhash_size, VEID(ve))];
  			spin_lock(&head->lock);
 
  			/* Does not bother with rcv_saddr checks,
@@ -282,20 +295,21 @@ int inet6_hash_connect(struct inet_timew
  			 * unique enough.
  			 */
 			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
+ 				if (tb->port == port &&
+				    ve_accessible_strict(tb->owner_env, ve)) {
  					BUG_TRAP(!hlist_empty(&tb->owners));
  					if (tb->fastreuse >= 0)
  						goto next_port;
  					if (!__inet6_check_established(death_row,
 								       sk, port,
-								       &tw))
+								       &tw, ve))
  						goto ok;
  					goto next_port;
  				}
  			}
 
  			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
-						     head, port);
+						     head, port, ve);
  			if (!tb) {
  				spin_unlock(&head->lock);
  				break;
@@ -330,7 +344,7 @@ ok:
 		goto out;
  	}
 
- 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size, VEID(ve))];
  	tb   = inet_csk(sk)->icsk_bind_hash;
 	spin_lock_bh(&head->lock);
 
@@ -341,7 +355,7 @@ ok:
 	} else {
 		spin_unlock(&head->lock);
 		/* No definite answer... Walk to established hash table */
-		ret = __inet6_check_established(death_row, sk, snum, NULL);
+		ret = __inet6_check_established(death_row, sk, snum, NULL, ve);
 out:
 		local_bh_enable();
 		return ret;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/ip6_fib.c kernel-2.6.18-417.el5-028stab121/net/ipv6/ip6_fib.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/ip6_fib.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/ip6_fib.c	2017-01-13 08:40:21.000000000 -0500
@@ -155,12 +155,28 @@ static struct fib6_table fib6_main_tbl =
 	},
 };
 
+#ifdef CONFIG_VE
+static inline void prepare_fib6_table(void)
+{
+	get_ve0()->_fib6_table = &fib6_main_tbl;
+}
+
+#define fib6_main_tbl	(*(get_exec_env()->_fib6_table))
+#else
+#define prepare_fib6_table()	do { } while (0)
+#endif
+
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 #define FIB_TABLE_HASHSZ 256
 #else
 #define FIB_TABLE_HASHSZ 1
 #endif
+
+#ifdef CONFIG_VE
+#define fib_table_hash	(get_exec_env()->_fib6_table_hash)
+#else
 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+#endif
 
 static void fib6_link_table(struct fib6_table *tb)
 {
@@ -174,11 +190,16 @@ static void fib6_link_table(struct fib6_
 
 	h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
 
-	/*
-	 * No protection necessary, this is the only list mutatation
-	 * operation, tables never disappear once they exist.
-	 */
+	write_lock_bh(&tb->tb6_lock);
 	hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
+	write_unlock_bh(&tb->tb6_lock);
+}
+
+static void fib6_unlink_table(struct fib6_table *tb)
+{
+	write_lock_bh(&tb->tb6_lock);
+	hlist_del_rcu(&tb->tb6_hlist);
+	write_unlock_bh(&tb->tb6_lock);
 }
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -190,6 +211,16 @@ static struct fib6_table fib6_local_tbl 
 	},
 };
 
+#ifdef CONFIG_VE
+static inline void prepare_fib6_local_table(void)
+{
+	get_ve0()->_fib6_local_table = &fib6_local_tbl;
+}
+#define fib6_local_tbl	(*(get_exec_env())->_fib6_local_table)
+#else
+#define prepare_fib6_local_table()	do { } while (0)
+#endif
+
 static struct fib6_table *fib6_alloc_table(u32 id)
 {
 	struct fib6_table *table;
@@ -242,12 +273,18 @@ struct fib6_table *fib6_get_table(u32 id
 	return NULL;
 }
 
-static void __init fib6_tables_init(void)
+void fib6_tables_init(void)
 {
 	fib6_link_table(&fib6_main_tbl);
 	fib6_link_table(&fib6_local_tbl);
 }
 
+void fib6_tables_cleanup(void)
+{
+	fib6_unlink_table(&fib6_main_tbl);
+	fib6_unlink_table(&fib6_local_tbl);
+}
+
 #else
 
 struct fib6_table *fib6_new_table(u32 id)
@@ -266,11 +303,16 @@ struct dst_entry *fib6_rule_lookup(struc
 	return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags);
 }
 
-static void __init fib6_tables_init(void)
+void fib6_tables_init(void)
 {
 	fib6_link_table(&fib6_main_tbl);
 }
 
+void fib6_tables_cleanup(void)
+{
+	fib6_unlink_table(&fib6_main_tbl);
+}
+
 #endif
 
 static int fib6_dump_node(struct fib6_walker_t *w)
@@ -1355,9 +1397,11 @@ void fib6_clean_all(int (*func)(struct r
 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 		hlist_for_each_entry_rcu(table, node, &fib_table_hash[h],
 					 tb6_hlist) {
+			struct ve_struct *old_env = set_exec_env(table->owner_env);
 			write_lock_bh(&table->tb6_lock);
 			fib6_clean_tree(&table->tb6_root, func, prune, arg);
 			write_unlock_bh(&table->tb6_lock);
+			set_exec_env(old_env);
 		}
 	}
 	rcu_read_unlock();
@@ -1425,6 +1469,8 @@ static int fib6_age(struct rt6_info *rt,
 
 static DEFINE_SPINLOCK(fib6_gc_lock);
 
+LIST_HEAD(fib6_table_list);
+
 void fib6_run_gc(unsigned long dummy)
 {
 	if (dummy != ~0UL) {
@@ -1457,11 +1503,15 @@ int __init fib6_init(void)
 {
 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
 					   sizeof(struct fib6_node),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 					   NULL, NULL);
 	if (!fib6_node_kmem)
 		return -ENOMEM;
 
+	prepare_fib6_table();
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	prepare_fib6_local_table();
+#endif
 	fib6_tables_init();
 
 	return 0;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/ip6_flowlabel.c kernel-2.6.18-417.el5-028stab121/net/ipv6/ip6_flowlabel.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/ip6_flowlabel.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/ip6_flowlabel.c	2017-01-13 08:40:21.000000000 -0500
@@ -415,6 +415,9 @@ int ipv6_flowlabel_opt(struct sock *sk, 
 	struct ipv6_fl_socklist *sfl, **sflp;
 	struct ip6_flowlabel *fl;
 
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	if (optlen < sizeof(freq))
 		return -EINVAL;
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/ip6_output.c kernel-2.6.18-417.el5-028stab121/net/ipv6/ip6_output.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/ip6_output.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/ip6_output.c	2017-01-13 08:40:41.000000000 -0500
@@ -154,6 +154,7 @@ int ip6_output(struct sk_buff *skb)
 	else
 		return ip6_output2(skb);
 }
+EXPORT_SYMBOL(ip6_output);
 
 /*
  *	xmit an sk_buff (used by TCP)
@@ -323,7 +324,7 @@ int ip6_forward(struct sk_buff *skb)
 	struct ipv6hdr *hdr = skb->nh.ipv6h;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	
-	if (ipv6_devconf.forwarding == 0)
+	if (ve_ipv6_devconf.forwarding == 0)
 		goto error;
 
 	if (skb_warn_if_lro(skb))
@@ -334,6 +335,9 @@ int ip6_forward(struct sk_buff *skb)
 		goto drop;
 	}
 
+	if (skb->pkt_type != PACKET_HOST)
+		goto drop;
+
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/*
@@ -423,6 +427,20 @@ int ip6_forward(struct sk_buff *skb)
 		return -EMSGSIZE;
 	}
 
+	/*
+	 * We try to optimize forwarding of VE packets:
+	 * do not decrement TTL (and so save skb_cow)
+	 * during forwarding of outgoing pkts from VE.
+	 * For incoming pkts we still do ttl decr,
+	 * since such skb is not cloned and does not require
+	 * actual cow. So, there is at least one place
+	 * in pkts path with mandatory ttl decr, that is
+	 * sufficient to prevent routing loops.
+	 */
+	hdr = skb->nh.ipv6h;
+	if (skb->dev->features & NETIF_F_VENET) /* src is VENET device */
+		goto no_ttl_decr;
+
 	if (skb_cow(skb, dst->dev->hard_header_len)) {
 		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 		goto drop;
@@ -434,6 +452,7 @@ int ip6_forward(struct sk_buff *skb)
  
 	hdr->hop_limit--;
 
+no_ttl_decr:
 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 	return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/mcast.c kernel-2.6.18-417.el5-028stab121/net/ipv6/mcast.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/mcast.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/mcast.c	2017-01-13 08:40:26.000000000 -0500
@@ -156,7 +156,7 @@ static int ip6_mc_leave_src(struct sock 
 #define IGMP6_UNSOLICITED_IVAL	(10*HZ)
 #define MLD_QRV_DEFAULT		2
 
-#define MLD_V1_SEEN(idev) (ipv6_devconf.force_mld_version == 1 || \
+#define MLD_V1_SEEN(idev) (ve_ipv6_devconf.force_mld_version == 1 || \
 		(idev)->cnf.force_mld_version == 1 || \
 		((idev)->mc_v1_seen && \
 		time_before(jiffies, (idev)->mc_v1_seen)))
@@ -2168,15 +2168,18 @@ static void igmp6_leave_group(struct ifm
 static void mld_gq_timer_expire(unsigned long data)
 {
 	struct inet6_dev *idev = (struct inet6_dev *)data;
+	struct ve_struct *old_env = set_exec_env(idev->dev->owner_env);
 
 	idev->mc_gq_running = 0;
 	mld_send_report(idev, NULL);
 	__in6_dev_put(idev);
+	set_exec_env(old_env);
 }
 
 static void mld_ifc_timer_expire(unsigned long data)
 {
 	struct inet6_dev *idev = (struct inet6_dev *)data;
+	struct ve_struct *old_env = set_exec_env(idev->dev->owner_env);
 
 	mld_send_cr(idev);
 	if (idev->mc_ifc_count) {
@@ -2185,6 +2188,7 @@ static void mld_ifc_timer_expire(unsigne
 			mld_ifc_start_timer(idev, idev->mc_maxdelay);
 	}
 	__in6_dev_put(idev);
+	set_exec_env(old_env);
 }
 
 static void mld_ifc_event(struct inet6_dev *idev)
@@ -2199,6 +2203,7 @@ static void mld_ifc_event(struct inet6_d
 static void igmp6_timer_handler(unsigned long data)
 {
 	struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
+	struct ve_struct *old_env = set_exec_env(ma->idev->dev->owner_env);
 
 	if (MLD_V1_SEEN(ma->idev))
 		igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
@@ -2210,6 +2215,7 @@ static void igmp6_timer_handler(unsigned
 	ma->mca_flags &= ~MAF_TIMER_RUNNING;
 	spin_unlock(&ma->mca_lock);
 	ma_put(ma);
+	set_exec_env(old_env);
 }
 
 /* Device going down */
@@ -2327,6 +2333,8 @@ static inline struct ifmcaddr6 *igmp6_mc
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct inet6_dev *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in6_dev_get(state->dev);
 		if (!idev)
 			continue;
@@ -2357,6 +2365,8 @@ static struct ifmcaddr6 *igmp6_mc_get_ne
 			state->idev = NULL;
 			break;
 		}
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		state->idev = in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
@@ -2471,6 +2481,8 @@ static inline struct ip6_sf_list *igmp6_
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct inet6_dev *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in6_dev_get(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
@@ -2510,6 +2522,8 @@ static struct ip6_sf_list *igmp6_mcf_get
 				state->idev = NULL;
 				goto out;
 			}
+			if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+				continue;
 			state->idev = in6_dev_get(state->dev);
 			if (!state->idev)
 				continue;
@@ -2651,8 +2665,8 @@ int __init igmp6_init(struct net_proto_f
 	np->hop_limit = 1;
 
 #ifdef CONFIG_PROC_FS
-	proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops);
-	proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
+	proc_glob_fops_create("net/igmp6", S_IRUGO, &igmp6_mc_seq_fops);
+	proc_glob_fops_create("net/mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
 #endif
 
 	return 0;
@@ -2664,7 +2678,7 @@ void igmp6_cleanup(void)
 	igmp6_socket = NULL; /* for safety */
 
 #ifdef CONFIG_PROC_FS
-	proc_net_remove("mcfilter6");
-	proc_net_remove("igmp6");
+	remove_proc_glob_entry("net/mcfilter6", NULL);
+	remove_proc_glob_entry("net/igmp6", NULL);
 #endif
 }
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/ndisc.c kernel-2.6.18-417.el5-028stab121/net/ipv6/ndisc.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/ndisc.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/ndisc.c	2017-01-13 08:40:23.000000000 -0500
@@ -124,7 +124,7 @@ static struct neigh_ops ndisc_direct_ops
 	.queue_xmit =		dev_queue_xmit,
 };
 
-struct neigh_table nd_tbl = {
+struct neigh_table global_nd_tbl = {
 	.family =	AF_INET6,
 	.entry_size =	sizeof(struct neighbour) + sizeof(struct in6_addr),
 	.key_len =	sizeof(struct in6_addr),
@@ -135,7 +135,7 @@ struct neigh_table nd_tbl = {
 	.proxy_redo =	pndisc_redo,
 	.id =		"ndisc_cache",
 	.parms = {
-		.tbl =			&nd_tbl,
+		.tbl =			&global_nd_tbl,
 		.base_reachable_time =	30 * HZ,
 		.retrans_time =	 1 * HZ,
 		.gc_staletime =	60 * HZ,
@@ -602,7 +602,9 @@ static void ndisc_send_na(struct net_dev
 			inc_opt = 0;
 	}
 
-	skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+	skb = sock_alloc_send_skb(sk,
+				  (MAX_HEADER + sizeof(struct ipv6hdr) +
+				   len + LL_RESERVED_SPACE(dev)),
 				  1, &err);
 
 	if (skb == NULL) {
@@ -715,7 +717,9 @@ void ndisc_send_ns(struct net_device *de
 	if (send_llinfo)
 		len += ndisc_opt_addr_space(dev);
 
-	skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+	skb = sock_alloc_send_skb(sk,
+				  (MAX_HEADER + sizeof(struct ipv6hdr) +
+				   len + LL_RESERVED_SPACE(dev)),
 				  1, &err);
 	if (skb == NULL) {
 		ND_PRINTK0(KERN_ERR
@@ -812,7 +816,9 @@ void ndisc_send_rs(struct net_device *de
 	if (send_sllao)
 		len += ndisc_opt_addr_space(dev);
 
-        skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+        skb = sock_alloc_send_skb(sk,
+				  (MAX_HEADER + sizeof(struct ipv6hdr) +
+				   len + LL_RESERVED_SPACE(dev)),
 				  1, &err);
 	if (skb == NULL) {
 		ND_PRINTK0(KERN_ERR
@@ -1642,7 +1648,9 @@ void ndisc_send_redirect(struct sk_buff 
 	rd_len &= ~0x7;
 	len += rd_len;
 
-	buff = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+	buff = sock_alloc_send_skb(sk,
+				   (MAX_HEADER + sizeof(struct ipv6hdr) +
+				    len + LL_RESERVED_SPACE(dev)),
 				   1, &err);
 	if (buff == NULL) {
 		ND_PRINTK0(KERN_ERR
@@ -1926,7 +1934,9 @@ int __init ndisc_init(struct net_proto_f
          * Initialize the neighbour table
          */
 	
-	neigh_table_init(&nd_tbl);
+	get_ve0()->ve_nd_tbl = &global_nd_tbl;
+	if (neigh_table_init(&nd_tbl))
+		panic("cannot initialize IPv6 NDISC tables\n");
 
 #ifdef CONFIG_SYSCTL
 	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, 
@@ -1949,3 +1959,51 @@ void ndisc_cleanup(void)
 	sock_release(ndisc_socket);
 	ndisc_socket = NULL; /* For safety. */
 }
+
+#ifdef CONFIG_VE
+int ve_ndisc_init(struct ve_struct *ve)
+{
+	struct ve_struct *old_env;
+	int err;
+
+	ve->ve_nd_tbl = kmalloc(sizeof(struct neigh_table), GFP_KERNEL);
+	if (ve->ve_nd_tbl == NULL)
+		return -ENOMEM;
+
+	*(ve->ve_nd_tbl) = global_nd_tbl;
+	ve->ve_nd_tbl->parms.tbl = ve->ve_nd_tbl;
+	old_env = set_exec_env(ve);
+	err = neigh_table_init(ve->ve_nd_tbl);
+	if (err)
+		goto out_free;
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, 
+			      "ipv6",
+			      &ndisc_ifinfo_sysctl_change,
+			      &ndisc_ifinfo_sysctl_strategy);
+#endif
+	err = 0;
+
+out:
+	set_exec_env(old_env);
+	return err;
+
+out_free:
+	kfree(ve->ve_nd_tbl);
+	ve->ve_nd_tbl = NULL;
+	goto out;
+}
+
+void ve_ndisc_fini(struct ve_struct *ve)
+{
+	if (ve->ve_nd_tbl) {
+#ifdef CONFIG_SYSCTL
+		neigh_sysctl_unregister(&ve->ve_nd_tbl->parms);
+#endif
+		ve->ve_nd_tbl->kmem_cachep = NULL;
+		neigh_table_clear(ve->ve_nd_tbl);
+		kfree(ve->ve_nd_tbl);
+		ve->ve_nd_tbl = NULL;
+	}
+}
+#endif /* CONFIG_VE */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6_queue.c kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6_queue.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6_queue.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6_queue.c	2017-01-13 08:40:23.000000000 -0500
@@ -505,7 +505,7 @@ ipq_rcv_skb(struct sk_buff *skb)
 	if (type <= IPQM_BASE)
 		return;
 	
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (security_netlink_recv(skb, CAP_VE_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);	
 
 	write_lock_bh(&queue_lock);
@@ -541,8 +541,11 @@ ipq_rcv_sk(struct sock *sk, int len)
 	mutex_lock(&ipqnl_mutex);
 			
 	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
+		struct ve_struct *env;
 		skb = skb_dequeue(&sk->sk_receive_queue);
+		env = set_exec_env(skb->owner_env);
 		ipq_rcv_skb(skb);
+		(void)set_exec_env(env);
 		kfree_skb(skb);
 	}
 		
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6table_filter.c kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6table_filter.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6table_filter.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6table_filter.c	2017-01-13 08:40:23.000000000 -0500
@@ -11,12 +11,20 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("ip6tables filter table");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_filter	(get_exec_env()->_ve_ip6t_filter_pf)
+#else
+#define	ve_packet_filter	&packet_filter
+#endif
+
 #define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT))
 
 /* Standard entry. */
@@ -43,7 +51,7 @@ static struct
 	struct ip6t_replace repl;
 	struct ip6t_standard entries[3];
 	struct ip6t_error term;
-} initial_table __initdata
+} initial_table
 = { { "filter", FILTER_VALID_HOOKS, 4,
       sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
       { [NF_IP6_LOCAL_IN] = 0,
@@ -108,7 +116,7 @@ ip6t_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ip6t_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static unsigned int
@@ -128,7 +136,7 @@ ip6t_local_out_hook(unsigned int hook,
 	}
 #endif
 
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ip6t_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static struct nf_hook_ops ip6t_ops[] = {
@@ -159,22 +167,19 @@ static struct nf_hook_ops ip6t_ops[] = {
 static int forward = NF_ACCEPT;
 module_param(forward, bool, 0000);
 
-static int __init ip6table_filter_init(void)
+int init_ip6table_filter(void)
 {
 	int ret;
-
-	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
-		return -EINVAL;
-	}
-
-	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
+	struct ip6t_table *tmp_filter;
 
 	/* Register table */
-	ret = ip6t_register_table(&packet_filter, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_filter = ip6t_register_table(&packet_filter,
+			&initial_table.repl);
+	if (IS_ERR(tmp_filter))
+		return PTR_ERR(tmp_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = tmp_filter;
+#endif
 
 	/* Register hooks */
 	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
@@ -184,14 +189,50 @@ static int __init ip6table_filter_init(v
 	return ret;
 
  cleanup_table:
-	ip6t_unregister_table(&packet_filter);
+	ip6t_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
 	return ret;
 }
 
-static void __exit ip6table_filter_fini(void)
+void fini_ip6table_filter(void)
 {
 	nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
-	ip6t_unregister_table(&packet_filter);
+	ip6t_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
+}
+
+static int __init ip6table_filter_init(void)
+{
+	int err;
+
+	if (forward < 0 || forward > NF_MAX_VERDICT) {
+		printk("iptables forward must be 0 or 1\n");
+		return -EINVAL;
+	}
+
+	/* Entry 1 is the FORWARD hook */
+	initial_table.entries[1].target.verdict = -forward - 1;
+
+	err = init_ip6table_filter();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_ip6table_filter);
+	KSYMRESOLVE(fini_ip6table_filter);
+	KSYMMODRESOLVE(ip6table_filter);
+	return 0;
+}
+
+static void __exit ip6table_filter_fini(void)
+{
+	KSYMMODUNRESOLVE(ip6table_filter);
+	KSYMUNRESOLVE(init_ip6table_filter);
+	KSYMUNRESOLVE(fini_ip6table_filter);
+	fini_ip6table_filter();
 }
 
 module_init(ip6table_filter_init);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6table_mangle.c kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6table_mangle.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6table_mangle.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6table_mangle.c	2017-01-13 08:40:23.000000000 -0500
@@ -12,6 +12,7 @@
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -53,7 +54,7 @@ static struct
 	struct ip6t_replace repl;
 	struct ip6t_standard entries[5];
 	struct ip6t_error term;
-} initial_table __initdata
+} initial_table
 = { { "mangle", MANGLE_VALID_HOOKS, 6,
       sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
       { [NF_IP6_PRE_ROUTING] 	= 0,
@@ -130,6 +131,13 @@ static struct ip6t_table packet_mangler 
 	.af		= AF_INET6,
 };
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_mangler	(get_exec_env()->_ip6t_mangle_table)
+#else
+#define ve_packet_mangler	&packet_mangler
+#endif
+
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6t_route_hook(unsigned int hook,
@@ -138,7 +146,7 @@ ip6t_route_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	return ip6t_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
 }
 
 static unsigned int
@@ -174,7 +182,7 @@ ip6t_local_hook(unsigned int hook,
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
 
-	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	ret = ip6t_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
 
 	if (ret != NF_DROP && ret != NF_STOLEN 
 		&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
@@ -225,14 +233,19 @@ static struct nf_hook_ops ip6t_ops[] = {
 	},
 };
 
-static int __init ip6table_mangle_init(void)
+int init_ip6table_mangle(void)
 {
 	int ret;
+	struct ip6t_table *tmp_mangler;
 
 	/* Register table */
-	ret = ip6t_register_table(&packet_mangler, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_mangler = ip6t_register_table(&packet_mangler,
+			&initial_table.repl);
+	if (IS_ERR(tmp_mangler))
+		return PTR_ERR(tmp_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = tmp_mangler;
+#endif
 
 	/* Register hooks */
 	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
@@ -242,14 +255,42 @@ static int __init ip6table_mangle_init(v
 	return ret;
 
  cleanup_table:
-	ip6t_unregister_table(&packet_mangler);
+	ip6t_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
 	return ret;
 }
 
-static void __exit ip6table_mangle_fini(void)
+void fini_ip6table_mangle(void)
 {
 	nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
-	ip6t_unregister_table(&packet_mangler);
+	ip6t_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
+}
+
+static int __init ip6table_mangle_init(void)
+{
+	int err;
+
+	err = init_ip6table_mangle();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_ip6table_mangle);
+	KSYMRESOLVE(fini_ip6table_mangle);
+	KSYMMODRESOLVE(ip6table_mangle);
+	return 0;
+}
+
+static void __exit ip6table_mangle_fini(void)
+{
+	KSYMMODUNRESOLVE(ip6table_mangle);
+	KSYMUNRESOLVE(init_ip6table_mangle);
+	KSYMUNRESOLVE(fini_ip6table_mangle);
+	fini_ip6table_mangle();
 }
 
 module_init(ip6table_mangle_init);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6table_raw.c kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6table_raw.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6table_raw.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6table_raw.c	2017-01-13 08:40:23.000000000 -0500
@@ -145,11 +145,12 @@ static struct nf_hook_ops ip6t_ops[] = {
 static int __init ip6table_raw_init(void)
 {
 	int ret;
+	struct ip6t_table *tmp;
 
 	/* Register table */
-	ret = ip6t_register_table(&packet_raw, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp = ip6t_register_table(&packet_raw, &initial_table.repl);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
 
 	/* Register hooks */
 	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6_tables.c kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6_tables.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6_tables.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6_tables.c	2017-01-13 08:40:40.000000000 -0500
@@ -32,9 +32,13 @@
 #include <linux/mutex.h>
 #include <linux/proc_fs.h>
 #include <linux/cpumask.h>
+#include <ub/ub_mem.h>
 
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter/x_tables.h>
+#include <linux/nfcalls.h>
+
+#include <net/compat.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -94,6 +98,9 @@ do {								\
 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
 #endif
 
+static int do_ip6t_get_ctl(struct sock *, int, void __user *, int *);
+
+
 /* Check for an extension */
 int 
 ip6t_ext_hdr(u8 nexthdr)
@@ -431,8 +438,8 @@ mark_source_chains(struct xt_table_info 
 				= (void *)ip6t_get_target(e);
 
 			if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) {
-				printk("iptables: loop hook %u pos %u %08X.\n",
-				       hook, pos, e->comefrom);
+				ve_printk(VE_LOG, "iptables: loop hook %u pos %u %08X.\n",
+					hook, pos, e->comefrom);
 				return 0;
 			}
 			e->comefrom
@@ -446,6 +453,13 @@ mark_source_chains(struct xt_table_info 
 			    && unconditional(&e->ipv6)) {
 				unsigned int oldpos, size;
 
+				if (t->verdict < -NF_MAX_VERDICT - 1) {
+					duprintf("mark_source_chains: bad "
+						"negative verdict (%i)\n",
+							t->verdict);
+					return 0;
+				}
+
 				/* Return: backtrack through the last
 				   big jump. */
 				do {
@@ -483,6 +497,14 @@ mark_source_chains(struct xt_table_info 
 				if (strcmp(t->target.u.user.name,
 					   IP6T_STANDARD_TARGET) == 0
 				    && newpos >= 0) {
+					if (newpos > newinfo->size -
+						sizeof(struct ip6t_entry)) {
+						duprintf("mark_source_chains: "
+							"bad verdict (%i)\n",
+								newpos);
+						return 0;
+					}
+
 					/* This a jump; chase it. */
 					duprintf("Jump rule %u -> %u\n",
 						 pos, newpos);
@@ -581,23 +603,40 @@ err:
 static struct ip6t_target ip6t_standard_target;
 
 static inline int
-check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
-	    unsigned int *i)
+check_entry(struct ip6t_entry *e, const char *name)
 {
 	struct ip6t_entry_target *t;
-	struct ip6t_target *target;
-	int ret;
-	unsigned int j;
 
 	if (!ip6_checkentry(&e->ipv6)) {
-		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+		duprintf("check_entry: ip check failed %p %s.\n", e, name);
 		return -EINVAL;
 	}
 
-	if (e->target_offset + sizeof(struct ip6t_entry_target) >
-								e->next_offset)
+	if (e->target_offset + sizeof(struct ip6t_entry_target) > e->next_offset)
+		return -EINVAL;
+
+
+	t = ip6t_get_target(e);
+	if (e->target_offset + t->u.target_size > e->next_offset)
 		return -EINVAL;
 
+	return 0;
+}
+
+static int
+find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
+		 unsigned int *i)
+{
+	struct ip6t_entry_target *t;
+	struct ip6t_target *target;
+	int ret;
+	unsigned int j;
+
+	/* For purposes of check_entry casting the compat entry is fine */
+	ret = check_entry(e, name);
+	if (ret != 0)
+		return ret;
+
 	j = 0;
 	ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j);
 	if (ret != 0)
@@ -650,6 +689,30 @@ check_entry(struct ip6t_entry *e, const 
 }
 
 static inline int
+check_target(struct ip6t_entry *e, const char *name)
+{
+	struct ip6t_entry_target *t;
+	struct ip6t_target *target;
+	int ret;
+
+	t = ip6t_get_target(e);
+	target = t->u.kernel.target;
+	ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t),
+			      name, e->comefrom, e->ipv6.proto,
+			      e->ipv6.invflags & IP6T_INV_PROTO);
+	if (!ret && t->u.kernel.target->checkentry &&
+	    !t->u.kernel.target->checkentry(name, e, target, t->data,
+		    			    t->u.target_size - sizeof(*t),
+					    e->comefrom)) {
+		duprintf("check_target: check failed for `%s'.\n",
+			 t->u.kernel.target->name);
+		ret = -EINVAL;
+	}
+	return ret;
+
+}
+
+static inline int
 check_entry_size_and_hooks(struct ip6t_entry *e,
 			   struct xt_table_info *newinfo,
 			   unsigned char *base,
@@ -723,7 +786,7 @@ translate_table(const char *name,
 		const unsigned int *underflows)
 {
 	unsigned int i;
-	int ret;
+	int ret = 0;
 
 	newinfo->size = size;
 	newinfo->number = number;
@@ -772,14 +835,15 @@ translate_table(const char *name,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				check_entry, name, size, &i);
-
-	if (ret != 0)
-		goto cleanup;
+				find_check_entry, name, size, &i);
+	if (ret != 0) {
+		IP6T_ENTRY_ITERATE(entry0, newinfo->size,
+				  cleanup_entry, &i);
+		return ret;
+	}
 
-	ret = -ELOOP;
 	if (!mark_source_chains(newinfo, valid_hooks, entry0))
-		goto cleanup;
+		return -ELOOP;
 
 	/* And one copy for every other CPU */
 	for_each_possible_cpu(i) {
@@ -787,9 +851,6 @@ translate_table(const char *name,
 			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
-	return 0;
-cleanup:
-	IP6T_ENTRY_ITERATE(entry0, newinfo->size, cleanup_entry, &i);
 	return ret;
 }
 
@@ -866,7 +927,7 @@ copy_entries_to_user(unsigned int total_
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc(countersize);
+	counters = ub_vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
 		return -ENOMEM;
@@ -931,6 +992,243 @@ copy_entries_to_user(unsigned int total_
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_delta {
+	struct compat_delta	*next;
+	unsigned int		offset;
+	short			delta;
+};
+
+static struct compat_delta *compat_offsets;
+
+static int compat_add_offset(unsigned int offset, short delta)
+{
+	struct compat_delta *tmp;
+
+	tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+	tmp->offset			= offset;
+	tmp->delta			= delta;
+	if (compat_offsets) {
+		tmp->next		= compat_offsets->next;
+		compat_offsets->next	= tmp;
+	} else {
+		compat_offsets		= tmp;
+		tmp->next		= NULL;
+	}
+	return 0;
+}
+
+static void compat_flush_offsets(void)
+{
+	struct compat_delta *tmp, *next;
+
+	if (compat_offsets) {
+		for(tmp = compat_offsets; tmp; tmp = next) {
+			next = tmp->next;
+			kfree(tmp);
+		}
+		compat_offsets = NULL;
+	}
+}
+
+static short compat_calc_jump(unsigned int offset)
+{
+	struct compat_delta *tmp;
+	short delta = 0;
+
+	for(tmp = compat_offsets; tmp; tmp = tmp->next)
+		if (tmp->offset < offset)
+			delta += tmp->delta;
+	return delta;
+}
+
+struct compat_ip6t_standard_target {
+	struct compat_xt_entry_target		target;
+	compat_int_t				verdict;
+};
+
+struct compat_ip6t_standard {
+	struct compat_ip6t_entry		entry;
+	struct compat_ip6t_standard_target	target;
+};
+
+#define IP6T_ST_LEN		XT_ALIGN(sizeof(struct ip6t_standard_target))
+#define IP6T_ST_COMPAT_LEN	COMPAT_XT_ALIGN(sizeof(struct compat_ip6t_standard_target))
+#define IP6T_ST_OFFSET		(IP6T_ST_LEN - IP6T_ST_COMPAT_LEN)
+
+static int ip6t_compat_standard_fn(void *target, void **dstptr, int *size, int convert)
+{
+	struct compat_ip6t_standard_target compat_st, *pcompat_st;
+	struct ip6t_standard_target st, *pst;
+	int ret;
+
+	ret = 0;
+	switch (convert) {
+		case COMPAT_TO_USER:
+			pst = target;
+			memcpy(&compat_st.target, &pst->target, sizeof(compat_st.target));
+			compat_st.verdict = pst->verdict;
+			if (compat_st.verdict > 0)
+				compat_st.verdict -= compat_calc_jump(compat_st.verdict);
+			compat_st.target.u.user.target_size = IP6T_ST_COMPAT_LEN;
+			if (copy_to_user(*dstptr, &compat_st, IP6T_ST_COMPAT_LEN))
+				ret = -EFAULT;
+			*size	-= IP6T_ST_OFFSET;
+			*dstptr	+= IP6T_ST_COMPAT_LEN;
+			break;
+		case COMPAT_FROM_USER:
+			pcompat_st = target;
+			memcpy(&st.target, &pcompat_st->target, IP6T_ST_COMPAT_LEN);
+			st.verdict = pcompat_st->verdict;
+			if (st.verdict > 0)
+				st.verdict += compat_calc_jump(st.verdict);
+			st.target.u.user.target_size = IP6T_ST_LEN;
+			memcpy(*dstptr, &st, IP6T_ST_LEN);
+			*size	+= IP6T_ST_OFFSET;
+			*dstptr	+= IP6T_ST_LEN;
+			break;
+		case COMPAT_CALC_SIZE:
+			*size	+= IP6T_ST_OFFSET;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+
+static inline int
+compat_calc_match(struct ip6t_entry_match *m, int *size)
+{
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
+	else
+		xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
+	return 0;
+}
+
+static int compat_calc_entry(const struct ip6t_entry *e,
+			     const struct xt_table_info *info,
+			     const void *base, struct xt_table_info *newinfo)
+{
+	struct ip6t_entry_target *t;
+	unsigned int entry_offset;
+	int off, i, ret;
+
+	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+	entry_offset = (void *)e - base;
+
+	IP6T_MATCH_ITERATE(e, compat_calc_match, &off);
+
+	t = ip6t_get_target(e);
+	if (t->u.kernel.target->compat)
+		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
+	else
+		xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
+
+	newinfo->size -= off;
+
+	ret = compat_add_offset(entry_offset, off);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
+		if (info->hook_entry[i] && (e < (struct ip6t_entry *)(base + info->hook_entry[i])))
+			newinfo->hook_entry[i] -= off;
+		if (info->underflow[i] &&  (e < (struct ip6t_entry *)(base + info->underflow[i])))
+			newinfo->underflow[i] -= off;
+	}
+	return 0;
+}
+
+static int compat_table_info(const struct xt_table_info *info,
+			     struct xt_table_info *newinfo)
+{
+	void *loc_cpu_entry;
+	int i;
+
+	if (!newinfo || !info)
+		return -EINVAL;
+
+	/*
+	 * We dont care about newinfo->entries[] since
+	 * we don't even touch them.
+	 */
+	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+	newinfo->size	= info->size;
+	newinfo->number = info->number;
+
+	for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
+		newinfo->hook_entry[i]	= info->hook_entry[i];
+		newinfo->underflow[i]	= info->underflow[i];
+	}
+	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+
+	return IP6T_ENTRY_ITERATE(loc_cpu_entry, info->size,
+				  compat_calc_entry, info, loc_cpu_entry, newinfo);
+}
+#endif
+
+static int get_info(void __user *user, const int *len, int compat)
+{
+	char name[IP6T_TABLE_MAXNAMELEN];
+	struct ip6t_table *t;
+	int ret;
+
+	if (*len != sizeof(struct ip6t_getinfo)) {
+		duprintf("length %u != %zu\n", *len,
+			 sizeof(struct ip6t_getinfo));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(name, user, sizeof(name)) != 0)
+		return -EFAULT;
+
+	name[sizeof(name)-1] = '\0';
+#ifdef CONFIG_COMPAT
+	if (compat)
+		xt_compat_lock(AF_INET6);
+#endif
+	t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
+				    "ip6table_%s", name);
+	if (t && !IS_ERR(t)) {
+		struct ip6t_getinfo info;
+		const struct xt_table_info *private = t->private;
+#ifdef CONFIG_COMPAT
+		struct xt_table_info xt_info;
+
+		if (compat) {
+			ret = compat_table_info(private, &xt_info);
+			compat_flush_offsets();
+			private = &xt_info;
+		}
+#endif
+		memset(&info, 0, sizeof(info));
+		memcpy(info.hook_entry, private->hook_entry, sizeof(info.hook_entry));
+		memcpy(info.underflow,  private->underflow,  sizeof(info.underflow));
+		info.valid_hooks	= t->valid_hooks;
+		info.num_entries	= private->number;
+		info.size		= private->size;
+		strcpy(info.name, name);
+
+		if (copy_to_user(user, &info, *len) != 0)
+			ret = -EFAULT;
+		else
+			ret = 0;
+
+		xt_table_unlock(t);
+		module_put(t->me);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+	if (compat)
+		xt_compat_unlock(AF_INET6);
+#endif
+	return ret;
+}
+
 static int
 get_entries(const struct ip6t_get_entries *entries,
 	    struct ip6t_get_entries __user *uptr)
@@ -959,73 +1257,49 @@ get_entries(const struct ip6t_get_entrie
 }
 
 static int
-do_replace(void __user *user, unsigned int len)
+__do_replace(const char *name, unsigned int valid_hooks,
+	     struct xt_table_info *newinfo, unsigned int num_counters,
+	     void __user *counters_ptr)
 {
-	int ret;
-	struct ip6t_replace tmp;
-	struct xt_table *t;
-	struct xt_table_info *newinfo, *oldinfo;
+	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
-	void *loc_cpu_entry, *loc_cpu_old_entry;
-
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-		return -EFAULT;
-
-	/* overflow check */
-	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
-		return -ENOMEM;
-	tmp.name[sizeof(tmp.name)-1] = 0;
-
-	newinfo = xt_alloc_table_info(tmp.size);
-	if (!newinfo)
-		return -ENOMEM;
-
-	/* choose the copy that is on our node/cpu */
-	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
-	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
-			   tmp.size) != 0) {
-		ret = -EFAULT;
-		goto free_newinfo;
-	}
+	unsigned long counters_size;
+	void *loc_cpu_old_entry;
+	struct ip6t_table *t;
+	int ret = 0;
 
-	counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
+	counters_size = num_counters * sizeof(struct xt_counters);
+	counters = ub_vmalloc_best(counters_size);
 	if (!counters) {
 		ret = -ENOMEM;
-		goto free_newinfo;
+		goto out;
 	}
+	memset(counters, 0, counters_size);
 
-	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
-			      tmp.hook_entry, tmp.underflow);
-	if (ret != 0)
-		goto free_newinfo_counters;
-
-	duprintf("ip_tables: Translated table\n");
-
-	t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name),
-				    "ip6table_%s", tmp.name);
+	t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
+				    "ip6table_%s", name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free_newinfo_counters_untrans;
 	}
 
 	/* You lied! */
-	if (tmp.valid_hooks != t->valid_hooks) {
+	if (valid_hooks != t->valid_hooks) {
 		duprintf("Valid hook crap: %08X vs %08X\n",
-			 tmp.valid_hooks, t->valid_hooks);
+			 valid_hooks, t->valid_hooks);
 		ret = -EINVAL;
 		goto put_module;
 	}
 
-	oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
+	oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
 	if (!oldinfo)
 		goto put_module;
 
 	/* Update module usage count based on number of rules */
 	duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
 		oldinfo->number, oldinfo->initial_entries, newinfo->number);
-	if ((oldinfo->number > oldinfo->initial_entries) || 
-	    (newinfo->number <= oldinfo->initial_entries)) 
+	if ((oldinfo->number > oldinfo->initial_entries) ||
+	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 	if ((oldinfo->number > oldinfo->initial_entries) &&
 	    (newinfo->number <= oldinfo->initial_entries))
@@ -1035,10 +1309,11 @@ do_replace(void __user *user, unsigned i
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, NULL);
+
 	xt_free_table_info(oldinfo);
-	if (copy_to_user(tmp.counters, counters,
-			 sizeof(struct xt_counters) * tmp.num_counters) != 0)
+	if (copy_to_user(counters_ptr, counters,
+			 sizeof(struct xt_counters) * num_counters) != 0)
 		ret = -EFAULT;
 	vfree(counters);
 	xt_table_unlock(t);
@@ -1048,16 +1323,65 @@ do_replace(void __user *user, unsigned i
 	module_put(t->me);
 	xt_table_unlock(t);
  free_newinfo_counters_untrans:
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
- free_newinfo_counters:
 	vfree(counters);
+ out:
+	return ret;
+}
+
+static int
+do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct ip6t_replace tmp;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* Hack: Causes ipchains to give correct error msg --RR */
+	if (len != sizeof(tmp) + tmp.size)
+		return -ENOPROTOOPT;
+
+	/* overflow check */
+	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+		return -ENOMEM;
+	tmp.name[sizeof(tmp.name)-1] = 0;
+
+	newinfo = xt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
+
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	ret = translate_table(tmp.name, tmp.valid_hooks,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
+			      tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
+
+	duprintf("ip_tables: Translated table\n");
+
+	ret = __do_replace(tmp.name, tmp.valid_hooks,
+			   newinfo, tmp.num_counters,
+			   tmp.counters);
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
+
+ free_newinfo_untrans:
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
 static inline int
 add_counter_to_entry(struct ip6t_entry *e,
 		     const struct xt_counters addme[],
@@ -1079,31 +1403,62 @@ add_counter_to_entry(struct ip6t_entry *
 }
 
 static int
-do_add_counters(void __user *user, unsigned int len)
+do_add_counters(const void __user *user, unsigned int len, int compat)
 {
-	unsigned int i;
-	struct xt_counters_info tmp, *paddc;
-	struct xt_table_info *private;
+	const struct xt_table_info *private;
+	struct xt_counters_info tmp;
+	struct xt_counters *paddc;
+	const void *loc_cpu_entry;
+	unsigned int num_counters, i;
 	struct xt_table *t;
-	int ret = 0;
-	void *loc_cpu_entry;
+	char *name;
+	void *ptmp;
+	int size, ret = 0;
+
+#ifdef CONFIG_COMPAT
+	struct compat_xt_counters_info compat_tmp;
+
+	if (compat) {
+		ptmp = &compat_tmp;
+		size = sizeof(struct compat_xt_counters_info);
+	} else
+#endif
+	{
+		ptmp = &tmp;
+		size = sizeof(struct xt_counters_info);
+	}
 
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+	/* Header */
+	if (copy_from_user(ptmp, user, size) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
+#ifdef CONFIG_COMPAT
+	if (compat) {
+		num_counters	= compat_tmp.num_counters;
+		name		= compat_tmp.name;
+		compat_tmp.name[sizeof(compat_tmp.name)-1] = 0;
+	} else
+#endif
+	{
+		num_counters	= tmp.num_counters;
+		name		= tmp.name;
+		tmp.name[sizeof(tmp.name)-1] = 0;
+	}
+
+	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc(len);
+	paddc = ub_vmalloc_node(len - size, numa_node_id());
 	if (!paddc)
 		return -ENOMEM;
 
-	if (copy_from_user(paddc, user, len) != 0) {
+	/* Counters to be added */
+	if (copy_from_user(paddc, user + size, len - size) != 0) {
 		ret = -EFAULT;
 		goto free;
 	}
 
-	t = xt_find_table_lock(AF_INET6, tmp.name);
+	t = xt_find_table_lock(AF_INET6, name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
@@ -1111,19 +1466,16 @@ do_add_counters(void __user *user, unsig
 
 	write_lock_bh(&t->lock);
 	private = t->private;
-	if (private->number != tmp.num_counters) {
+	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
 	i = 0;
 	/* Choose the copy that is on our node */
-	loc_cpu_entry = private->entries[smp_processor_id()];
-	IP6T_ENTRY_ITERATE(loc_cpu_entry,
-			  private->size,
-			  add_counter_to_entry,
-			  paddc->counters,
-			  &i);
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size,
+			   add_counter_to_entry, paddc, &i);
  unlock_up_free:
 	write_unlock_bh(&t->lock);
 	xt_table_unlock(t);
@@ -1134,82 +1486,870 @@ do_add_counters(void __user *user, unsig
 	return ret;
 }
 
-static int
-do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
-{
-	int ret;
+#ifdef CONFIG_COMPAT
+struct compat_ip6t_replace {
+	char			name[IP6T_TABLE_MAXNAMELEN];
+	u32			valid_hooks;
+	u32			num_entries;
+	u32			size;
+	u32			hook_entry[NF_IP6_NUMHOOKS];
+	u32			underflow[NF_IP6_NUMHOOKS];
+	u32			num_counters;
+	compat_uptr_t		counters;
+	struct compat_ip6t_entry entries[0];
+};
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
+int compat_xt6_match_to_user(const struct xt_entry_match *m,
+			     void __user **dstptr, unsigned int *size)
+{
+	const struct xt_match *match = m->u.kernel.match;
+	struct compat_xt_entry_match __user *cm = *dstptr;
+	int off =  XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(match->matchsize);
+	u_int16_t msize = m->u.user.match_size - off;
+
+	duprintf("%s: %s %d %d\n",
+		 __func__,
+		 m->u.kernel.match->name,
+		 m->u.user.match_size,
+		 msize);
+
+	if (copy_to_user(cm, m, sizeof(*cm)) ||
+	    put_user(msize, &cm->u.user.match_size) ||
+	    copy_to_user(cm->u.user.name, m->u.kernel.match->name,
+			 strlen(m->u.kernel.match->name) + 1))
+		return -EFAULT;
 
-	switch (cmd) {
-	case IP6T_SO_SET_REPLACE:
-		ret = do_replace(user, len);
-		break;
+	if (copy_to_user(cm->data, m->data, msize - sizeof(*cm)))
+		return -EFAULT;
+
+	*size	-= off;
+	*dstptr	+= msize;
+	return 0;
+}
+
+static inline int
+compat_copy_match_to_user(struct ip6t_entry_match *m, void __user **dstptr, compat_uint_t *size)
+{
+	if (m->u.kernel.match->compat) {
+		struct compat_xt_entry_match __user *cm = *dstptr;
+		int ret = 0;
+
+		ret = m->u.kernel.match->compat(m, dstptr, size, COMPAT_TO_USER);
+		if (ret)
+			goto match_err;
+
+		if (copy_to_user(cm->u.user.name, m->u.kernel.match->name,
+				 strlen(m->u.kernel.match->name) + 1))
+			ret = -EFAULT;
+match_err:
+		duprintf("%s: %s %d\n", __func__,
+			 m->u.kernel.match->name, ret);
+		return ret;
+	} else
+		return compat_xt6_match_to_user(m, dstptr, size);
+}
+
+int compat_xt6_target_to_user(const struct xt_entry_target *t,
+			      void __user **dstptr, unsigned int *size)
+{
+	const struct xt_target *target = t->u.kernel.target;
+	struct compat_xt_entry_target __user *ct = *dstptr;
+	int off = XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(target->targetsize);
+	u_int16_t tsize = t->u.user.target_size - off;
+
+	duprintf("%s: %s %d %d\n",
+		 __func__,
+		 t->u.kernel.target->name,
+		 t->u.user.target_size,
+		 tsize);
+
+	if (copy_to_user(ct, t, sizeof(*ct)) ||
+	    put_user(tsize, &ct->u.user.target_size) ||
+	    copy_to_user(ct->u.user.name, t->u.kernel.target->name,
+			 strlen(t->u.kernel.target->name) + 1))
+		return -EFAULT;
+
+	if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct)))
+		return -EFAULT;
+
+	*size	-= off;
+	*dstptr	+= tsize;
+	return 0;
+}
+
+static int compat_copy_entry_to_user(struct ip6t_entry *e,
+				     void __user **dstptr,
+				     compat_uint_t *size,
+				     struct xt_counters *counters,
+				     unsigned int *i)
+{
+	struct compat_ip6t_entry __user *ce;
+	struct ip6t_entry_target __user *t;
+	u_int16_t target_offset, next_offset;
+	compat_uint_t origsize;
+	int ret;
+
+	ret = -EFAULT;
+	origsize = *size;
+	ce = (struct compat_ip6t_entry __user *)*dstptr;
+	if (copy_to_user(ce, e, sizeof(struct compat_ip6t_entry)) != 0 ||
+	    copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])) != 0)
+		return -EFAULT;
+
+	*dstptr	+= sizeof(struct compat_ip6t_entry);
+	*size	-= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+
+	ret = IP6T_MATCH_ITERATE(e, compat_copy_match_to_user, dstptr, size);
+	target_offset = e->target_offset - (origsize - *size);
+	if (ret)
+		goto out;
+
+	t = ip6t_get_target(e);
+	if (t->u.kernel.target->compat) {
+		struct compat_xt_entry_target __user *ct = *dstptr;
+		ret = t->u.kernel.target->compat(t, dstptr, size, COMPAT_TO_USER);
+		if (ret)
+			goto target_err;
+
+		if (copy_to_user(ct->u.user.name, t->u.kernel.target->name,
+				 strlen(t->u.kernel.target->name) + 1)) {
+			ret = -EFAULT;
+			goto target_err;
+		}
+target_err:
+		duprintf("%s: %s %d\n", __func__,
+			 t->u.kernel.target->name, ret);
+	} else
+		ret = compat_xt6_target_to_user(t, dstptr, size);
+	if (ret)
+		goto out;
+
+	ret = -EFAULT;
+	next_offset = e->next_offset - (origsize - *size);
+	if (put_user(target_offset, &ce->target_offset) != 0 ||
+	    put_user(next_offset, &ce->next_offset) != 0)
+		goto out;
+
+	(*i)++;
+	return 0;
+out:
+	return ret;
+}
+
+static int
+compat_find_calc_match(struct ip6t_entry_match *m,
+	    const char *name,
+	    const struct ip6t_ip6 *ipv6,
+	    unsigned int hookmask,
+	    int *size, int *i)
+{
+	struct ip6t_match *match;
+
+	duprintf("compat_find_calc_match: lookup for `%s':%d\n",
+		 m->u.user.name, m->u.user.revision);
+
+	match = try_then_request_module(xt_find_match(AF_INET6,
+						      m->u.user.name,
+						      m->u.user.revision),
+					"ip6t_%s", m->u.user.name);
+	if (IS_ERR(match) || !match) {
+		duprintf("compat_find_calc_match: `%s' not found\n",
+				m->u.user.name);
+		return match ? PTR_ERR(match) : -ENOENT;
+	}
+	m->u.kernel.match = match;
+
+	duprintf("compat_find_calc_match: found `%s':%d\n",
+		 match->name, match->revision);
+
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
+	else
+		xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
+
+	(*i)++;
+	return 0;
+}
+
+static inline int
+compat_release_match(struct ip6t_entry_match *m, unsigned int *i)
+{
+	if (i && (*i)-- == 0)
+		return 1;
+
+	module_put(m->u.kernel.match->me);
+	return 0;
+}
+
+static inline int
+compat_release_entry(struct ip6t_entry *e, unsigned int *i, bool compat_matches)
+{
+	struct ip6t_entry_target *t;
+
+	if (i && (*i)-- == 0)
+		return 1;
+
+	/* Cleanup all matches */
+	if (compat_matches)
+		COMPAT_IP6T_MATCH_ITERATE(e, compat_release_match, NULL);
+	else
+		IP6T_MATCH_ITERATE(e, compat_release_match, NULL);
+	t = ip6t_get_target(e);
+	module_put(t->u.kernel.target->me);
+	return 0;
+}
+
+void
+compat_xt6_target_from_user(struct xt_entry_target *t,
+			    void **dstptr, unsigned int *size)
+{
+	const struct xt_target *target = t->u.kernel.target;
+	struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
+	int pad, off = XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(target->targetsize);
+	u_int16_t tsize = ct->u.user.target_size;
+
+	t = *dstptr;
+	memcpy(t, ct, sizeof(*ct));
+	memcpy(t->data, ct->data, tsize - sizeof(*ct));
+	pad = XT_ALIGN(target->targetsize) - target->targetsize;
+	if (pad > 0)
+		memset(t->data + target->targetsize, 0, pad);
+
+	tsize += off;
+	t->u.user.target_size = tsize;
+
+	*size	+= off;
+	*dstptr	+= tsize;
+}
+
+static inline int
+check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
+				  struct xt_table_info *newinfo,
+				  unsigned int *size,
+				  unsigned char *base,
+				  unsigned char *limit,
+				  unsigned int *hook_entries,
+				  unsigned int *underflows,
+				  unsigned int *i,
+				  const char *name)
+{
+	struct ip6t_entry_target *t;
+	struct ip6t_target *target;
+	unsigned int entry_offset;
+	int ret, off, h, j;
+
+	duprintf("check_compat_entry_size_and_hooks %p\n", e);
+	if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
+	    (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) {
+		duprintf("Bad offset %p, limit = %p\n", e, limit);
+		return -EINVAL;
+	}
+
+	if (e->next_offset < sizeof(struct compat_ip6t_entry) +
+			     sizeof(struct compat_xt_entry_target)) {
+		duprintf("checking: element %p size %u\n",
+			 e, e->next_offset);
+		return -EINVAL;
+	}
+
+	/* For purposes of check_entry casting the compat entry is fine */
+	ret = check_entry((struct ip6t_entry *)e, name);
+	if (ret)
+		return ret;
+
+	/* offset due to counters alignment */
+	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+	entry_offset = (void *)e - (void *)base;
+	j = 0;
+	ret = COMPAT_IP6T_MATCH_ITERATE(e, compat_find_calc_match, name, &e->ipv6,
+					e->comefrom, &off, &j);
+	if (ret != 0)
+		goto release_matches;
+
+	t = compat_ip6t_get_target(e);
+	target = try_then_request_module(xt_find_target(AF_INET6,
+							t->u.user.name,
+							t->u.user.revision),
+					 "ip6t_%s", t->u.user.name);
+	if (IS_ERR(target) || !target) {
+		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+			 t->u.user.name);
+		ret = target ? PTR_ERR(target) : -ENOENT;
+		goto release_matches;
+	}
+	t->u.kernel.target = target;
+
+	if (t->u.kernel.target->compat)
+		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
+	else
+		xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
+
+	/* new inkernel table will be bigger of course */
+	*size += off;
+
+	/* remember it for further use */
+	ret = compat_add_offset(entry_offset, off);
+	if (ret)
+		goto out;
+
+	/* Check hooks & underflows */
+	for (h = 0; h < NF_IP6_NUMHOOKS; h++) {
+		if ((unsigned char *)e - base == hook_entries[h])
+			newinfo->hook_entry[h] = hook_entries[h];
+		if ((unsigned char *)e - base == underflows[h])
+			newinfo->underflow[h] = underflows[h];
+	}
+
+	/* Clear counters and comefrom */
+	memset(&e->counters, 0, sizeof(e->counters));
+	e->comefrom = 0;
+
+	(*i)++;
+	return 0;
+out:
+	module_put(t->u.kernel.target->me);
+release_matches:
+	COMPAT_IP6T_MATCH_ITERATE(e, compat_release_match, &j);
+	return ret;
+}
+
+static int
+compat_xt6_match_from_user(struct xt_entry_match *m,
+			   void **dstptr, compat_uint_t *size)
+{
+	const struct xt_match *match = m->u.kernel.match;
+	struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
+	int pad, off = XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(match->matchsize);
+	u_int16_t msize = cm->u.user.match_size;
+
+	m = *dstptr;
+	memcpy(m, cm, sizeof(*cm));
+	memcpy(m->data, cm->data, msize - sizeof(*cm));
+	pad = XT_ALIGN(match->matchsize) - match->matchsize;
+	if (pad > 0)
+		memset(m->data + match->matchsize, 0, pad);
+
+	msize += off;
+	m->u.user.match_size = msize;
+
+	*size	+= off;
+	*dstptr	+= msize;
+	return 0;
+}
+
+static int
+compat_copy_match_from_user(struct ip6t_entry_match *m,
+			    void **dstptr, compat_uint_t *size)
+{
+	struct ip6t_match *match = m->u.kernel.match;
+	if (match->compat)
+		match->compat(m, dstptr, size, COMPAT_FROM_USER);
+	else
+		compat_xt6_match_from_user(m, dstptr, size);
+	return 0;
+}
+
+static int
+compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
+			    unsigned int *size, const char *name,
+			    struct xt_table_info *newinfo, unsigned char *base)
+{
+	struct ip6t_entry_target *t;
+	struct ip6t_target *target;
+	struct ip6t_entry *de;
+	unsigned int origsize;
+	int ret, h;
+
+	ret = 0;
+	origsize = *size;
+	de = (struct ip6t_entry *)*dstptr;
+	memcpy(de, e, sizeof(struct ip6t_entry));
+	memcpy(&de->counters, &e->counters, sizeof(e->counters));
+
+	*dstptr	+= sizeof(struct ip6t_entry);
+	*size	+= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+
+	ret = COMPAT_IP6T_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size);
+	if (ret)
+		return ret;
+	de->target_offset = e->target_offset - (origsize - *size);
+	t = compat_ip6t_get_target(e);
+	target = t->u.kernel.target;
+	if (target->compat)
+		target->compat(t, dstptr, size, COMPAT_FROM_USER);
+	else
+		compat_xt6_target_from_user(t, dstptr, size);
+
+	de->next_offset = e->next_offset - (origsize - *size);
+	for (h = 0; h < NF_IP6_NUMHOOKS; h++) {
+		if ((unsigned char *)de - base < newinfo->hook_entry[h])
+			newinfo->hook_entry[h] -= origsize - *size;
+		if ((unsigned char *)de - base < newinfo->underflow[h])
+			newinfo->underflow[h] -= origsize - *size;
+	}
+	return ret;
+}
+
+static int
+compat_check_match(struct ip6t_entry_match *m, const char *name,
+		   const struct ip6t_ip6 *ipv6, unsigned int hookmask,
+		   unsigned int *i)
+{
+	struct ip6t_match *match;
+	int ret;
+
+	match = m->u.kernel.match;
+	ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m),
+			     name, hookmask, ipv6->proto,
+			     ipv6->invflags & IP6T_INV_PROTO);
+	if (!ret && m->u.kernel.match->checkentry
+	    && !m->u.kernel.match->checkentry(name, ipv6, match, m->data,
+					      m->u.match_size - sizeof(*m),
+					      hookmask)) {
+		duprintf("compat_check_match: check failed for `%s'.\n",
+			 m->u.kernel.match->name);
+		ret = -EINVAL;
+	}
+	if (!ret)
+		(*i)++;
+	return ret;
+}
+
+static int
+compat_check_entry_data(struct ip6t_entry *e, const char *name, unsigned int *i)
+{
+	int j, ret;
+
+	j = 0;
+	ret = IP6T_MATCH_ITERATE(e, compat_check_match, name, &e->ipv6, e->comefrom, &j);
+	if (ret != 0)
+		goto cleanup_matches;
+
+	ret = check_target(e, name);
+	if (ret)
+		goto cleanup_matches;
+
+	(*i)++;
+	return 0;
+
+cleanup_matches:
+	IP6T_MATCH_ITERATE(e, cleanup_match, &j);
+	return ret;
+}
+
+struct compat_ip6t_get_entries {
+	char				name[IP6T_TABLE_MAXNAMELEN];
+	compat_uint_t			size;
+	struct compat_ip6t_entry	entrytable[0];
+};
+
+static inline struct xt_counters *alloc_counters(struct ip6t_table *table)
+{
+	struct xt_table_info *private = table->private;
+	struct xt_counters *counters;
+	unsigned int countersize;
+
+	/*
+	 * We need atomic snapshot of counters: rest doesn't change
+	 * (other than comefrom, which userspace doesn't care about).
+	 */
+	countersize	= sizeof(struct xt_counters) * private->number;
+	counters	= ub_vmalloc_node(countersize, numa_node_id());
+
+	if (counters == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	memset(counters, 0, countersize);
+
+	/* First, sum counters... */
+	write_lock_bh(&table->lock);
+	get_counters(private, counters);
+	write_unlock_bh(&table->lock);
+
+	return counters;
+}
+
+static int
+compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
+			    void __user *ce)
+{
+	struct xt_table_info *private = table->private;
+	struct xt_counters *counters;
+	unsigned int size;
+	void *loc_cpu_entry;
+	void __user *pos;
+	unsigned int i;
+	int ret = 0;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
+	/*
+	 * Choose the copy that is on our node/cpu.
+	 * This choice is lazy (because current thread is
+	 * allowed to migrate to another cpu).
+	 */
+	loc_cpu_entry	= private->entries[raw_smp_processor_id()];
+	pos		= ce;
+	size		= total_size;
+
+	i = 0;
+	ret = IP6T_ENTRY_ITERATE(loc_cpu_entry, total_size,
+				 compat_copy_entry_to_user, &pos, &size,
+				 counters, &i);
+	if (ret)
+		goto free_counters;
+	ret = 0;
+
+free_counters:
+	vfree(counters);
+	return ret;
+}
+
+static int
+compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len)
+{
+	struct compat_ip6t_get_entries get;
+	struct xt_table *t;
+	int ret;
+
+	if (*len < sizeof(get)) {
+		duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
+	get.name[sizeof(get.name)-1] = 0;
+
+	if (*len != sizeof(get) + get.size) {
+		duprintf("compat_get_entries: %u != %zu\n",
+			 *len, sizeof(get) + get.size);
+		return -EINVAL;
+	}
+
+	xt_compat_lock(AF_INET6);
+	t = xt_find_table_lock(AF_INET6, get.name);
+	if (t && !IS_ERR(t)) {
+		const struct xt_table_info *private = t->private;
+		struct xt_table_info newinfo;
+
+		duprintf("t->private->number = %u\n", private->number);
+
+		ret = compat_table_info(private, &newinfo);
+		if (!ret && get.size == newinfo.size) {
+			ret = compat_copy_entries_to_user(private->size, t,
+							  uptr->entrytable);
+		} else if (!ret) {
+			duprintf("compat_get_entries: I've got %u not %u!\n",
+				 private->size, get.size);
+			ret = -EAGAIN;
+		}
+		compat_flush_offsets();
+		module_put(t->me);
+		xt_table_unlock(t);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+
+	xt_compat_unlock(AF_INET6);
+	return ret;
+}
+
+static int
+compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
+		return -EPERM;
+
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET6].next)
+		return -ENOENT;
+#endif
+
+	switch (cmd) {
+	case IP6T_SO_GET_INFO:
+		ret = get_info(user, len, 1);
+		break;
+	case IP6T_SO_GET_ENTRIES:
+		ret = compat_get_entries(user, len);
+		break;
+	default:
+		ret = do_ip6t_get_ctl(sk, cmd, user, len);
+	}
+	return ret;
+}
+
+static int
+translate_compat_table(const char *name,
+		       unsigned int valid_hooks,
+		       struct xt_table_info **pinfo,
+		       void **pentry0,
+		       unsigned int total_size,
+		       unsigned int num_entries,
+		       unsigned int *hook_entries,
+		       unsigned int *underflows)
+{
+	struct xt_table_info *newinfo, *info;
+	void *pos, *entry0, *entry1;
+	unsigned int i, j, size;
+	int ret;
+
+	info		= *pinfo;
+	entry0		= *pentry0;
+	size		= total_size;
+	info->number	= num_entries;
+
+	/* Init all hooks to impossible value. */
+	for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
+		info->hook_entry[i]	= 0xFFFFFFFF;
+		info->underflow[i]	= 0xFFFFFFFF;
+	}
+
+	duprintf("translate_compat_table: size %u\n", info->size);
+	i = 0;
+	xt_compat_lock(AF_INET6);
+	compat_flush_offsets();
+	/* Walk through entries, checking offsets. */
+	ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
+					check_compat_entry_size_and_hooks,
+					info, &size, entry0,
+					entry0 + total_size,
+					hook_entries, underflows, &i, name);
+	if (ret != 0)
+		goto out_unlock;
+
+	ret = -EINVAL;
+	if (i != num_entries) {
+		duprintf("translate_compat_table: %u not %u entries\n",
+			 i, num_entries);
+		goto out_unlock;
+	}
+
+	/* Check hooks all assigned */
+	for (j = 0; j < NF_IP6_NUMHOOKS; j++) {
+		/* Only hooks which are valid */
+		if (!(valid_hooks & (1 << j)))
+			continue;
+		if (info->hook_entry[j] == 0xFFFFFFFF) {
+			duprintf("Invalid hook entry %u %u\n",
+				 j, hook_entries[j]);
+			goto out_unlock;
+		}
+		if (info->underflow[j] == 0xFFFFFFFF) {
+			duprintf("Invalid underflow %u %u\n",
+				 j, underflows[j]);
+			goto out_unlock;
+		}
+	}
+
+	ret = -ENOMEM;
+	newinfo = xt_alloc_table_info(size);
+	if (!newinfo)
+		goto out_unlock;
+
+	newinfo->number = num_entries;
+	for (j = 0; j < NF_IP6_NUMHOOKS; j++) {
+		newinfo->hook_entry[j]	= info->hook_entry[j];
+		newinfo->underflow[j]	= info->underflow[j];
+	}
+
+	entry1	= newinfo->entries[raw_smp_processor_id()];
+	pos	= entry1;
+	size	= total_size;
+	ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
+					compat_copy_entry_from_user, &pos, &size,
+					name, newinfo, entry1);
+	compat_flush_offsets();
+	xt_compat_unlock(AF_INET6);
+	if (ret)
+		goto free_newinfo;
+
+	ret = -ELOOP;
+	if (!mark_source_chains(newinfo, valid_hooks, entry1))
+		goto free_newinfo;
+
+	j = 0;
+	ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry_data, name, &j);
+	if (ret) {
+		i -= j;
+		IP6T_ENTRY_ITERATE_CONTINUE(entry1, newinfo->size, j,
+					    compat_release_entry, &i, false);
+		IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &j);
+		xt_free_table_info(newinfo);
+		return ret;
+	}
+
+	/* And one copy for every other CPU */
+	for_each_possible_cpu(i)
+		if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+			memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+	*pinfo = newinfo;
+	*pentry0 = entry1;
+	xt_free_table_info(info);
+	return 0;
+
+free_newinfo:
+	xt_free_table_info(newinfo);
+out:
+	COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &i, true);
+	return ret;
+out_unlock:
+	compat_flush_offsets();
+	xt_compat_unlock(AF_INET6);
+	goto out;
+}
+
+static int
+compat_do_replace(struct compat_ip6t_replace __user *user, unsigned int len)
+{
+	struct compat_ip6t_replace repl;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+	int ret;
+
+	/*
+	 * The header of a table, no entries yet
+	 */
+	if (copy_from_user(&repl, user, sizeof(repl)) != 0)
+		return -EFAULT;
+	repl.name[sizeof(repl.name)-1] = 0;
+
+	/* Hack: Causes ipchains to give correct error msg --RR */
+	if (len != sizeof(repl) + repl.size)
+		return -ENOPROTOOPT;
+
+	/* overflow check */
+	if (repl.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS - SMP_CACHE_BYTES)
+		return -ENOMEM;
+	if (repl.num_counters >= INT_MAX / sizeof(struct xt_counters))
+		return -ENOMEM;
+
+	/*
+	 * The new in-kernel table, it's allocated with a smaller
+	 * size than we need in kernel but translate_compat_table
+	 * will substitude it with properly sized table.
+	 */
+	newinfo = xt_alloc_table_info(repl.size);
+	if (!newinfo)
+		return -ENOMEM;
+
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, (void *)user + sizeof(repl), repl.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	/*
+	 * Along with relocating (increasing in size) new table
+	 * it remembers the offsets in offsets list
+	 */
+	ret = translate_compat_table(repl.name, repl.valid_hooks,
+				     &newinfo, &loc_cpu_entry, repl.size,
+				     repl.num_entries, repl.hook_entry, repl.underflow);
+	if (ret != 0)
+		goto free_newinfo;
+
+	duprintf("compat_do_replace: Translated table\n");
+
+	ret = __do_replace(repl.name, repl.valid_hooks, newinfo, repl.num_counters,
+			   compat_ptr(repl.counters));
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
+
+ free_newinfo_untrans:
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+ free_newinfo:
+	xt_free_table_info(newinfo);
+	duprintf("compat_do_replace: %d\n", ret);
+	return ret;
+}
+
+static int
+compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
+		       unsigned int len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
+		return -EPERM;
+
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET6].next)
+		return -ENOENT;
+#endif
+
+	switch (cmd) {
+	case IP6T_SO_SET_REPLACE:
+		ret = compat_do_replace((struct compat_ip6t_replace *)user, len);
+		break;
 
 	case IP6T_SO_SET_ADD_COUNTERS:
-		ret = do_add_counters(user, len);
+		ret = do_add_counters(user, len, 1);
 		break;
 
 	default:
-		duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
+		duprintf("compat_do_ip6t_set_ctl:  unknown request %i\n", cmd);
 		ret = -EINVAL;
+
 	}
 
 	return ret;
 }
+#endif /* CONFIG_COMPAT */
 
 static int
-do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
-	switch (cmd) {
-	case IP6T_SO_GET_INFO: {
-		char name[IP6T_TABLE_MAXNAMELEN];
-		struct xt_table *t;
-
-		if (*len != sizeof(struct ip6t_getinfo)) {
-			duprintf("length %u != %u\n", *len,
-				 sizeof(struct ip6t_getinfo));
-			ret = -EINVAL;
-			break;
-		}
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET6].next)
+		return -ENOENT;
+#endif
 
-		if (copy_from_user(name, user, sizeof(name)) != 0) {
-			ret = -EFAULT;
-			break;
-		}
-		name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
+	switch (cmd) {
+	case IP6T_SO_SET_REPLACE:
+		ret = do_replace(user, len);
+		break;
 
-		t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
-					    "ip6table_%s", name);
-		if (t && !IS_ERR(t)) {
-			struct ip6t_getinfo info;
-			struct xt_table_info *private = t->private;
-
-			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, private->hook_entry,
-			       sizeof(info.hook_entry));
-			memcpy(info.underflow, private->underflow,
-			       sizeof(info.underflow));
-			info.num_entries = private->number;
-			info.size = private->size;
-			memcpy(info.name, name, sizeof(info.name));
+	case IP6T_SO_SET_ADD_COUNTERS:
+		ret = do_add_counters(user, len, 0);
+		break;
 
-			if (copy_to_user(user, &info, *len) != 0)
-				ret = -EFAULT;
-			else
-				ret = 0;
-			xt_table_unlock(t);
-			module_put(t->me);
-		} else
-			ret = t ? PTR_ERR(t) : -ENOENT;
+	default:
+		duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
+		ret = -EINVAL;
 	}
-	break;
+
+	return ret;
+}
+
+static int
+do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
+		return -EPERM;
+
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET6].next)
+		return -ENOENT;
+#endif
+
+	switch (cmd) {
+	case IP6T_SO_GET_INFO:
+		ret = get_info(user, len, 0);
+		break;
 
 	case IP6T_SO_GET_ENTRIES: {
 		struct ip6t_get_entries get;
@@ -1263,18 +2403,18 @@ do_ip6t_get_ctl(struct sock *sk, int cmd
 	return ret;
 }
 
-int ip6t_register_table(struct xt_table *table,
+struct ip6t_table *ip6t_register_table(struct xt_table *table,
 			const struct ip6t_replace *repl)
 {
 	int ret;
 	struct xt_table_info *newinfo;
 	static struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+		= { 0, 0, 0, 0, { 0 }, { 0 }, { } };
 	void *loc_cpu_entry;
 
 	newinfo = xt_alloc_table_info(repl->size);
 	if (!newinfo)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* choose the copy on our node/cpu */
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
@@ -1287,28 +2427,29 @@ int ip6t_register_table(struct xt_table 
 			      repl->underflow);
 	if (ret != 0) {
 		xt_free_table_info(newinfo);
-		return ret;
+		return ERR_PTR(ret);
 	}
 
-	ret = xt_register_table(table, &bootstrap, newinfo);
-	if (ret != 0) {
+	table = virt_xt_register_table(table, &bootstrap, newinfo);
+	if (IS_ERR(table))
 		xt_free_table_info(newinfo);
-		return ret;
-	}
-
-	return 0;
+	return table;
 }
 
 void ip6t_unregister_table(struct xt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
+	struct module *me;
 
-	private = xt_unregister_table(table);
+	me = table->me;
+	private = virt_xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	if (private->number > private->initial_entries)
+		module_put(me);
 	xt_free_table_info(private);
 }
 
@@ -1375,6 +2516,9 @@ static struct ip6t_target ip6t_standard_
 	.name		= IP6T_STANDARD_TARGET,
 	.targetsize	= sizeof(int),
 	.family		= AF_INET6,
+#ifdef CONFIG_COMPAT
+	.compat		= ip6t_compat_standard_fn,
+#endif
 };
 
 static struct ip6t_target ip6t_error_target = {
@@ -1389,9 +2533,15 @@ static struct nf_sockopt_ops ip6t_sockop
 	.set_optmin	= IP6T_BASE_CTL,
 	.set_optmax	= IP6T_SO_SET_MAX+1,
 	.set		= do_ip6t_set_ctl,
+#ifdef CONFIG_COMPAT
+	.compat_set	= compat_do_ip6t_set_ctl,
+#endif
 	.get_optmin	= IP6T_BASE_CTL,
 	.get_optmax	= IP6T_SO_GET_MAX+1,
 	.get		= do_ip6t_get_ctl,
+#ifdef CONFIG_COMPAT
+	.compat_get	= compat_do_ip6t_get_ctl,
+#endif
 };
 
 static struct ip6t_match icmp6_matchstruct = {
@@ -1403,12 +2553,30 @@ static struct ip6t_match icmp6_matchstru
 	.family		= AF_INET6,
 };
 
+static int init_ip6tables(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+	if (get_exec_env()->_xt_tables[AF_INET6].next != NULL)
+		return -EEXIST;
+#endif
+
+	return xt_proto_init(AF_INET6);
+}
+
+static void fini_ip6tables(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+	get_exec_env()->_xt_tables[AF_INET6].next = NULL;
+#endif
+	xt_proto_fini(AF_INET6);
+}
+
 static int __init ip6_tables_init(void)
 {
 	int ret;
 
-	ret = xt_proto_init(AF_INET6);
-	if (ret < 0)
+	ret = init_ip6tables();
+	if (ret)
 		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
@@ -1427,6 +2595,9 @@ static int __init ip6_tables_init(void)
 	if (ret < 0)
 		goto err5;
 
+	KSYMRESOLVE(init_ip6tables);
+	KSYMRESOLVE(fini_ip6tables);
+	KSYMMODRESOLVE(ip6_tables);
 	printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
 
@@ -1437,18 +2608,21 @@ err4:
 err3:
 	xt_unregister_target(&ip6t_standard_target);
 err2:
-	xt_proto_fini(AF_INET6);
+	fini_ip6tables();
 err1:
 	return ret;
 }
 
 static void __exit ip6_tables_fini(void)
 {
+	KSYMMODUNRESOLVE(ip6_tables);
+	KSYMUNRESOLVE(init_ip6tables);
+	KSYMUNRESOLVE(fini_ip6tables);
 	nf_unregister_sockopt(&ip6t_sockopts);
 	xt_unregister_match(&icmp6_matchstruct);
 	xt_unregister_target(&ip6t_error_target);
 	xt_unregister_target(&ip6t_standard_target);
-	xt_proto_fini(AF_INET6);
+	fini_ip6tables();
 }
 
 /*
@@ -1533,5 +2707,5 @@ EXPORT_SYMBOL(ip6t_do_table);
 EXPORT_SYMBOL(ip6t_ext_hdr);
 EXPORT_SYMBOL(ipv6_find_hdr);
 
-module_init(ip6_tables_init);
+subsys_initcall(ip6_tables_init);
 module_exit(ip6_tables_fini);
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6t_LOG.c kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6t_LOG.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/netfilter/ip6t_LOG.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/netfilter/ip6t_LOG.c	2017-01-13 08:40:23.000000000 -0500
@@ -59,15 +59,15 @@ static void dump_packet(const struct nf_
 
 	ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
 	if (ih == NULL) {
-		printk("TRUNCATED");
+		ve_printk(VE_LOG, "TRUNCATED");
 		return;
 	}
 
 	/* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
-	printk("SRC=" NIP6_FMT " DST=" NIP6_FMT " ", NIP6(ih->saddr), NIP6(ih->daddr));
+	ve_printk(VE_LOG, "SRC=" NIP6_FMT " DST=" NIP6_FMT " ", NIP6(ih->saddr), NIP6(ih->daddr));
 
 	/* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
-	printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
+	ve_printk(VE_LOG, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
 	       ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
 	       (ntohl(*(u_int32_t *)ih) & 0x0ff00000) >> 20,
 	       ih->hop_limit,
@@ -81,34 +81,34 @@ static void dump_packet(const struct nf_
 
 		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
 		if (hp == NULL) {
-			printk("TRUNCATED");
+			ve_printk(VE_LOG, "TRUNCATED");
 			return;
 		}
 
 		/* Max length: 48 "OPT (...) " */
 		if (logflags & IP6T_LOG_IPOPT)
-			printk("OPT ( ");
+			ve_printk(VE_LOG, "OPT ( ");
 
 		switch (currenthdr) {
 		case IPPROTO_FRAGMENT: {
 			struct frag_hdr _fhdr, *fh;
 
-			printk("FRAG:");
+			ve_printk(VE_LOG, "FRAG:");
 			fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
 						&_fhdr);
 			if (fh == NULL) {
-				printk("TRUNCATED ");
+				ve_printk(VE_LOG, "TRUNCATED ");
 				return;
 			}
 
 			/* Max length: 6 "65535 " */
-			printk("%u ", ntohs(fh->frag_off) & 0xFFF8);
+			ve_printk(VE_LOG, "%u ", ntohs(fh->frag_off) & 0xFFF8);
 
 			/* Max length: 11 "INCOMPLETE " */
 			if (fh->frag_off & htons(0x0001))
-				printk("INCOMPLETE ");
+				ve_printk(VE_LOG, "INCOMPLETE ");
 
-			printk("ID:%08x ", ntohl(fh->identification));
+			ve_printk(VE_LOG, "ID:%08x ", ntohl(fh->identification));
 
 			if (ntohs(fh->frag_off) & 0xFFF8)
 				fragment = 1;
@@ -122,7 +122,7 @@ static void dump_packet(const struct nf_
 		case IPPROTO_HOPOPTS:
 			if (fragment) {
 				if (logflags & IP6T_LOG_IPOPT)
-					printk(")");
+					ve_printk(VE_LOG, ")");
 				return;
 			}
 			hdrlen = ipv6_optlen(hp);
@@ -133,10 +133,10 @@ static void dump_packet(const struct nf_
 				struct ip_auth_hdr _ahdr, *ah;
 
 				/* Max length: 3 "AH " */
-				printk("AH ");
+				ve_printk(VE_LOG, "AH ");
 
 				if (fragment) {
-					printk(")");
+					ve_printk(VE_LOG, ")");
 					return;
 				}
 
@@ -147,13 +147,13 @@ static void dump_packet(const struct nf_
 					 * Max length: 26 "INCOMPLETE [65535 	
 					 *  bytes] )"
 					 */
-					printk("INCOMPLETE [%u bytes] )",
+					ve_printk(VE_LOG, "INCOMPLETE [%u bytes] )",
 					       skb->len - ptr);
 					return;
 				}
 
 				/* Length: 15 "SPI=0xF1234567 */
-				printk("SPI=0x%x ", ntohl(ah->spi));
+				ve_printk(VE_LOG, "SPI=0x%x ", ntohl(ah->spi));
 
 			}
 
@@ -164,10 +164,10 @@ static void dump_packet(const struct nf_
 				struct ip_esp_hdr _esph, *eh;
 
 				/* Max length: 4 "ESP " */
-				printk("ESP ");
+				ve_printk(VE_LOG, "ESP ");
 
 				if (fragment) {
-					printk(")");
+					ve_printk(VE_LOG, ")");
 					return;
 				}
 
@@ -177,23 +177,23 @@ static void dump_packet(const struct nf_
 				eh = skb_header_pointer(skb, ptr, sizeof(_esph),
 							&_esph);
 				if (eh == NULL) {
-					printk("INCOMPLETE [%u bytes] )",
+					ve_printk(VE_LOG, "INCOMPLETE [%u bytes] )",
 					       skb->len - ptr);
 					return;
 				}
 
 				/* Length: 16 "SPI=0xF1234567 )" */
-				printk("SPI=0x%x )", ntohl(eh->spi) );
+				ve_printk(VE_LOG, "SPI=0x%x )", ntohl(eh->spi) );
 
 			}
 			return;
 		default:
 			/* Max length: 20 "Unknown Ext Hdr 255" */
-			printk("Unknown Ext Hdr %u", currenthdr);
+			ve_printk(VE_LOG, "Unknown Ext Hdr %u", currenthdr);
 			return;
 		}
 		if (logflags & IP6T_LOG_IPOPT)
-			printk(") ");
+			ve_printk(VE_LOG, ") ");
 
 		currenthdr = hp->nexthdr;
 		ptr += hdrlen;
@@ -204,7 +204,7 @@ static void dump_packet(const struct nf_
 		struct tcphdr _tcph, *th;
 
 		/* Max length: 10 "PROTO=TCP " */
-		printk("PROTO=TCP ");
+		ve_printk(VE_LOG, "PROTO=TCP ");
 
 		if (fragment)
 			break;
@@ -212,40 +212,40 @@ static void dump_packet(const struct nf_
 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
 		th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph);
 		if (th == NULL) {
-			printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ", skb->len - ptr);
 			return;
 		}
 
 		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		printk("SPT=%u DPT=%u ",
+		ve_printk(VE_LOG, "SPT=%u DPT=%u ",
 		       ntohs(th->source), ntohs(th->dest));
 		/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
 		if (logflags & IP6T_LOG_TCPSEQ)
-			printk("SEQ=%u ACK=%u ",
+			ve_printk(VE_LOG, "SEQ=%u ACK=%u ",
 			       ntohl(th->seq), ntohl(th->ack_seq));
 		/* Max length: 13 "WINDOW=65535 " */
-		printk("WINDOW=%u ", ntohs(th->window));
+		ve_printk(VE_LOG, "WINDOW=%u ", ntohs(th->window));
 		/* Max length: 9 "RES=0x3C " */
-		printk("RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+		ve_printk(VE_LOG, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
 		/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
 		if (th->cwr)
-			printk("CWR ");
+			ve_printk(VE_LOG, "CWR ");
 		if (th->ece)
-			printk("ECE ");
+			ve_printk(VE_LOG, "ECE ");
 		if (th->urg)
-			printk("URG ");
+			ve_printk(VE_LOG, "URG ");
 		if (th->ack)
-			printk("ACK ");
+			ve_printk(VE_LOG, "ACK ");
 		if (th->psh)
-			printk("PSH ");
+			ve_printk(VE_LOG, "PSH ");
 		if (th->rst)
-			printk("RST ");
+			ve_printk(VE_LOG, "RST ");
 		if (th->syn)
-			printk("SYN ");
+			ve_printk(VE_LOG, "SYN ");
 		if (th->fin)
-			printk("FIN ");
+			ve_printk(VE_LOG, "FIN ");
 		/* Max length: 11 "URGP=65535 " */
-		printk("URGP=%u ", ntohs(th->urg_ptr));
+		ve_printk(VE_LOG, "URGP=%u ", ntohs(th->urg_ptr));
 
 		if ((logflags & IP6T_LOG_TCPOPT)
 		    && th->doff * 4 > sizeof(struct tcphdr)) {
@@ -258,15 +258,15 @@ static void dump_packet(const struct nf_
 						ptr + sizeof(struct tcphdr),
 						optsize, _opt);
 			if (op == NULL) {
-				printk("OPT (TRUNCATED)");
+				ve_printk(VE_LOG, "OPT (TRUNCATED)");
 				return;
 			}
 
 			/* Max length: 127 "OPT (" 15*4*2chars ") " */
-			printk("OPT (");
+			ve_printk(VE_LOG, "OPT (");
 			for (i =0; i < optsize; i++)
-				printk("%02X", op[i]);
-			printk(") ");
+				ve_printk(VE_LOG, "%02X", op[i]);
+			ve_printk(VE_LOG, ") ");
 		}
 		break;
 	}
@@ -274,7 +274,7 @@ static void dump_packet(const struct nf_
 		struct udphdr _udph, *uh;
 
 		/* Max length: 10 "PROTO=UDP " */
-		printk("PROTO=UDP ");
+		ve_printk(VE_LOG, "PROTO=UDP ");
 
 		if (fragment)
 			break;
@@ -282,12 +282,12 @@ static void dump_packet(const struct nf_
 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
 		uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph);
 		if (uh == NULL) {
-			printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ", skb->len - ptr);
 			return;
 		}
 
 		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		printk("SPT=%u DPT=%u LEN=%u ",
+		ve_printk(VE_LOG, "SPT=%u DPT=%u LEN=%u ",
 		       ntohs(uh->source), ntohs(uh->dest),
 		       ntohs(uh->len));
 		break;
@@ -296,7 +296,7 @@ static void dump_packet(const struct nf_
 		struct icmp6hdr _icmp6h, *ic;
 
 		/* Max length: 13 "PROTO=ICMPv6 " */
-		printk("PROTO=ICMPv6 ");
+		ve_printk(VE_LOG, "PROTO=ICMPv6 ");
 
 		if (fragment)
 			break;
@@ -304,18 +304,18 @@ static void dump_packet(const struct nf_
 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
 		ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
 		if (ic == NULL) {
-			printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ", skb->len - ptr);
 			return;
 		}
 
 		/* Max length: 18 "TYPE=255 CODE=255 " */
-		printk("TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
+		ve_printk(VE_LOG, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
 
 		switch (ic->icmp6_type) {
 		case ICMPV6_ECHO_REQUEST:
 		case ICMPV6_ECHO_REPLY:
 			/* Max length: 19 "ID=65535 SEQ=65535 " */
-			printk("ID=%u SEQ=%u ",
+			ve_printk(VE_LOG, "ID=%u SEQ=%u ",
 				ntohs(ic->icmp6_identifier),
 				ntohs(ic->icmp6_sequence));
 			break;
@@ -326,35 +326,35 @@ static void dump_packet(const struct nf_
 
 		case ICMPV6_PARAMPROB:
 			/* Max length: 17 "POINTER=ffffffff " */
-			printk("POINTER=%08x ", ntohl(ic->icmp6_pointer));
+			ve_printk(VE_LOG, "POINTER=%08x ", ntohl(ic->icmp6_pointer));
 			/* Fall through */
 		case ICMPV6_DEST_UNREACH:
 		case ICMPV6_PKT_TOOBIG:
 		case ICMPV6_TIME_EXCEED:
 			/* Max length: 3+maxlen */
 			if (recurse) {
-				printk("[");
+				ve_printk(VE_LOG, "[");
 				dump_packet(info, skb, ptr + sizeof(_icmp6h),
 					    0);
-				printk("] ");
+				ve_printk(VE_LOG, "] ");
 			}
 
 			/* Max length: 10 "MTU=65535 " */
 			if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
-				printk("MTU=%u ", ntohl(ic->icmp6_mtu));
+				ve_printk(VE_LOG, "MTU=%u ", ntohl(ic->icmp6_mtu));
 		}
 		break;
 	}
 	/* Max length: 10 "PROTO=255 " */
 	default:
-		printk("PROTO=%u ", currenthdr);
+		ve_printk(VE_LOG, "PROTO=%u ", currenthdr);
 	}
 
 	/* Max length: 15 "UID=4294967295 " */
 	if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
 		read_lock_bh(&skb->sk->sk_callback_lock);
 		if (skb->sk->sk_socket && skb->sk->sk_socket->file)
-			printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
+			ve_printk(VE_LOG, "UID=%u ", skb->sk->sk_socket->file->f_uid);
 		read_unlock_bh(&skb->sk->sk_callback_lock);
 	}
 }
@@ -382,14 +382,14 @@ ip6t_log_packet(unsigned int pf,
 		loginfo = &default_loginfo;
 
 	spin_lock_bh(&log_lock);
-	printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, 
+	ve_printk(VE_LOG, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, 
 		prefix,
 		in ? in->name : "",
 		out ? out->name : "");
 	if (in && !out) {
 		unsigned int len;
 		/* MAC logging for input chain only. */
-		printk("MAC=");
+		ve_printk(VE_LOG, "MAC=");
 		if (skb->dev && (len = skb->dev->hard_header_len) &&
 		    skb->mac.raw != skb->nh.raw) {
 			unsigned char *p = skb->mac.raw;
@@ -401,23 +401,23 @@ ip6t_log_packet(unsigned int pf,
 
 			if (p != NULL) {
 				for (i = 0; i < len; i++)
-					printk("%02x%s", p[i],
+					ve_printk(VE_LOG, "%02x%s", p[i],
 					       i == len - 1 ? "" : ":");
 			}
-			printk(" ");
+			ve_printk(VE_LOG, " ");
 
 			if (skb->dev->type == ARPHRD_SIT) {
 				struct iphdr *iph = (struct iphdr *)skb->mac.raw;
-				printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ",
+				ve_printk(VE_LOG, "TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ",
 				       NIPQUAD(iph->saddr),
 				       NIPQUAD(iph->daddr));
 			}
 		} else
-			printk(" ");
+			ve_printk(VE_LOG, " ");
 	}
 
 	dump_packet(loginfo, skb, (u8*)skb->nh.ipv6h - skb->data, 1);
-	printk("\n");
+	ve_printk(VE_LOG, "\n");
 	spin_unlock_bh(&log_lock);
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/proc.c kernel-2.6.18-417.el5-028stab121/net/ipv6/proc.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/proc.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/proc.c	2017-01-13 08:40:23.000000000 -0500
@@ -24,13 +24,18 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
+#include <linux/ve.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_VE
+#define proc_net_devsnmp6	(get_exec_env()->_proc_net_devsnmp6)
+#else
 static struct proc_dir_entry *proc_net_devsnmp6;
+#endif
 
 static int fold_prot_inuse(struct proto *proto)
 {
@@ -187,10 +192,10 @@ static int snmp6_seq_show(struct seq_fil
 		snmp6_seq_show_item(seq, (void **)idev->statsx.icmpv6, snmp6_icmp6_list);
 		snmp6_seq_show_icmpv6msg(seq, (void **)idev->statsx.icmpv6msg);
 	} else {
-		snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
-		snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
+		snmp6_seq_show_item(seq, (void **)ve_ipv6_statistics, snmp6_ipstats_list);
+		snmp6_seq_show_item(seq, (void **)ve_icmpv6_statistics, snmp6_icmp6_list);
 		snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics);
-		snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
+		snmp6_seq_show_item(seq, (void **)ve_udp_stats_in6, snmp6_udp6_list);
 	}
 	return 0;
 }
@@ -254,15 +259,25 @@ int snmp6_unregister_dev(struct inet6_de
 	return 0;
 }
 
+int ve_snmp_proc_init(struct ve_struct *ve)
+{
+	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
+	return proc_net_devsnmp6 == NULL ? -ENOMEM : 0;
+}
+
+void ve_snmp_proc_fini(struct ve_struct *ve)
+{
+	proc_net_remove("dev_snmp6");
+}
+
 int __init ipv6_misc_proc_init(void)
 {
 	int rc = 0;
 
-	if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops))
+	if (!proc_glob_fops_create("net/snmp6", S_IRUGO, &snmp6_seq_fops))
 		goto proc_snmp6_fail;
 
-	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
-	if (!proc_net_devsnmp6)
+	if (ve_snmp_proc_init(get_ve0()))
 		goto proc_dev_snmp6_fail;
 
 	if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops))
@@ -271,9 +286,9 @@ out:
 	return rc;
 
 proc_sockstat6_fail:
-	proc_net_remove("dev_snmp6");
+	ve_snmp_proc_fini(get_ve0());
 proc_dev_snmp6_fail:
-	proc_net_remove("snmp6");
+	remove_proc_glob_entry("net/snmp6", NULL);
 proc_snmp6_fail:
 	rc = -ENOMEM;
 	goto out;
@@ -282,7 +297,7 @@ proc_snmp6_fail:
 void ipv6_misc_proc_exit(void)
 {
 	proc_net_remove("sockstat6");
-	proc_net_remove("dev_snmp6");
+	ve_snmp_proc_fini(get_ve0());
 	proc_net_remove("snmp6");
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/raw.c kernel-2.6.18-417.el5-028stab121/net/ipv6/raw.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/raw.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/raw.c	2017-01-13 08:40:21.000000000 -0500
@@ -99,6 +99,10 @@ struct sock *__raw_v6_lookup(struct sock
 			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
 				continue;
 
+			if (!ve_accessible_strict(sk->owner_env,
+						get_exec_env()))
+				continue;
+
 			if (!ipv6_addr_any(&np->rcv_saddr)) {
 				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
 					goto found;
@@ -1062,6 +1066,14 @@ static void rawv6_close(struct sock *sk,
 	sk_common_release(sk);
 }
 
+static int raw6_destroy(struct sock *sk)
+{
+	lock_sock(sk);
+	ip6_flush_pending_frames(sk);
+	release_sock(sk);
+	return 0;
+}
+
 static int rawv6_init_sk(struct sock *sk)
 {
 	if (inet_sk(sk)->num == IPPROTO_ICMPV6) {
@@ -1076,6 +1088,7 @@ struct proto rawv6_prot = {
 	.name		   = "RAWv6",
 	.owner		   = THIS_MODULE,
 	.close		   = rawv6_close,
+	.destroy	   = raw6_destroy,
 	.connect	   = ip6_datagram_connect,
 	.disconnect	   = udp_disconnect,
 	.ioctl		   = rawv6_ioctl,
@@ -1125,8 +1138,13 @@ static struct sock *raw6_get_next(struct
 	do {
 		sk = sk_next(sk);
 try_again:
-		;
-	} while (sk && sk->sk_family != PF_INET6);
+		if (!sk)
+			break;
+		if (sk->sk_family != PF_INET6)
+			continue;
+		if (ve_accessible(sk->owner_env, get_exec_env()))
+			break;
+	} while (1);
 
 	if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) {
 		sk = sk_head(&raw_v6_htable[state->bucket]);
@@ -1244,13 +1262,13 @@ static struct file_operations raw6_seq_f
 
 int __init raw6_proc_init(void)
 {
-	if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops))
+	if (!proc_glob_fops_create("net/raw6", S_IRUGO, &raw6_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void raw6_proc_exit(void)
 {
-	proc_net_remove("raw6");
+	remove_proc_glob_entry("net/raw6", NULL);
 }
 #endif	/* CONFIG_PROC_FS */
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/reassembly.c kernel-2.6.18-417.el5-028stab121/net/ipv6/reassembly.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/reassembly.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/reassembly.c	2017-01-13 08:40:23.000000000 -0500
@@ -95,6 +95,7 @@ struct frag_queue
 #define FIRST_IN		2
 #define LAST_IN			1
 	__u16			nhoffset;
+	struct ve_struct *owner_env;
 };
 
 /* Hash table. */
@@ -293,6 +294,9 @@ static void ip6_frag_expire(unsigned lon
 {
 	struct frag_queue *fq = (struct frag_queue *) data;
 	struct net_device *dev = NULL;
+	struct ve_struct *envid;
+
+	envid = set_exec_env(fq->owner_env);
 
 	spin_lock(&fq->lock);
 
@@ -326,6 +330,8 @@ out:
 		dev_put(dev);
 	spin_unlock(&fq->lock);
 	fq_put(fq, NULL);
+
+	(void)set_exec_env(envid);
 }
 
 /* Creation primitives. */
@@ -345,7 +351,8 @@ static struct frag_queue *ip6_frag_inter
 	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
 		if (fq->id == fq_in->id && 
 		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
-		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
+		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr) &&
+		    fq->owner_env == get_exec_env()) {
 			atomic_inc(&fq->refcnt);
 			write_unlock(&ip6_frag_lock);
 			fq_in->last_in |= COMPLETE;
@@ -387,6 +394,7 @@ ip6_frag_create(u32 id, struct in6_addr 
 	fq->timer.data = (long) fq;
 	spin_lock_init(&fq->lock);
 	atomic_set(&fq->refcnt, 1);
+	fq->owner_env = get_exec_env();
 
 	return ip6_frag_intern(fq);
 
@@ -408,7 +416,8 @@ fq_find(u32 id, struct in6_addr *src, st
 	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
 		if (fq->id == id && 
 		    ipv6_addr_equal(src, &fq->saddr) &&
-		    ipv6_addr_equal(dst, &fq->daddr)) {
+		    ipv6_addr_equal(dst, &fq->daddr) &&
+		    fq->owner_env == get_exec_env()) {
 			atomic_inc(&fq->refcnt);
 			read_unlock(&ip6_frag_lock);
 			return fq;
@@ -709,6 +718,9 @@ static int ipv6_frag_rcv(struct sk_buff 
 		    fq->meat == fq->len)
 			ret = ip6_frag_reasm(fq, skbp, dev);
 
+		if (ret > 0)
+			(*skbp)->owner_env = skb->owner_env;
+
 		spin_unlock(&fq->lock);
 		fq_put(fq, NULL);
 		return ret;
@@ -719,6 +731,43 @@ static int ipv6_frag_rcv(struct sk_buff 
 	return -1;
 }
 
+void ip6_frag_cleanup(struct ve_struct *envid)
+{
+	int i, progress;
+
+	local_bh_disable();
+	do {
+		progress = 0;
+		for (i = 0; i < IP6Q_HASHSZ; i++) {
+			struct frag_queue *fq;
+			struct hlist_node *p, *n;
+
+			if (hlist_empty(&ip6_frag_hash[i]))
+				continue;
+inner_restart:
+			read_lock(&ip6_frag_lock);
+			hlist_for_each_entry_safe(fq, p, n,
+					&ip6_frag_hash[i], list) {
+				if (!ve_accessible_strict(fq->owner_env, envid))
+					continue;
+				atomic_inc(&fq->refcnt);
+				read_unlock(&ip6_frag_lock);
+
+				spin_lock(&fq->lock);
+				if (!(fq->last_in&COMPLETE))
+					fq_kill(fq);
+				spin_unlock(&fq->lock);
+
+				fq_put(fq, NULL);
+				progress = 1;
+				goto inner_restart;
+			}
+			read_unlock(&ip6_frag_lock);
+		}
+	} while(progress);
+	local_bh_enable();
+}
+
 static struct inet6_protocol frag_protocol =
 {
 	.handler	=	ipv6_frag_rcv,
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/route.c kernel-2.6.18-417.el5-028stab121/net/ipv6/route.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/route.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/route.c	2017-01-13 08:40:41.000000000 -0500
@@ -51,7 +51,6 @@
 #include <net/addrconf.h>
 #include <net/tcp.h>
 #include <linux/rtnetlink.h>
-#include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/netevent.h>
 
@@ -142,7 +141,6 @@ struct rt6_info ip6_null_entry = {
 		.dst = {
 			.__refcnt	= ATOMIC_INIT(1),
 			.__use		= 1,
-			.dev		= &loopback_dev,
 			.obsolete	= -1,
 			.error		= -ENETUNREACH,
 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
@@ -168,7 +166,6 @@ struct rt6_info ip6_prohibit_entry = {
 		.dst = {
 			.__refcnt	= ATOMIC_INIT(1),
 			.__use		= 1,
-			.dev		= &loopback_dev,
 			.obsolete	= -1,
 			.error		= -EACCES,
 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
@@ -188,7 +185,6 @@ struct rt6_info ip6_blk_hole_entry = {
 		.dst = {
 			.__refcnt	= ATOMIC_INIT(1),
 			.__use		= 1,
-			.dev		= &loopback_dev,
 			.obsolete	= -1,
 			.error		= -EINVAL,
 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
@@ -781,7 +777,7 @@ out2:
 	return rt;
 }
 
-void ip6_route_input(struct sk_buff *skb)
+void __ip6_route_input(struct sk_buff *skb, struct in6_addr *daddr)
 {
 	struct ipv6hdr *iph = skb->nh.ipv6h;
 	int flags = RT6_LOOKUP_F_HAS_SADDR;
@@ -789,7 +785,7 @@ void ip6_route_input(struct sk_buff *skb
 		.iif = skb->dev->ifindex,
 		.nl_u = {
 			.ip6_u = {
-				.daddr = iph->daddr,
+				.daddr = *daddr,
 				.saddr = iph->saddr,
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
 				.fwmark = skb->nfmark,
@@ -800,11 +796,17 @@ void ip6_route_input(struct sk_buff *skb
 		.proto = iph->nexthdr,
 	};
 
-	if (rt6_need_strict(&iph->daddr))
+	if (rt6_need_strict(daddr))
 		flags |= RT6_LOOKUP_F_IFACE;
 
 	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
 }
+EXPORT_SYMBOL(__ip6_route_input);
+
+void ip6_route_input(struct sk_buff *skb)
+{
+	__ip6_route_input(skb, &skb->nh.ipv6h->daddr);
+}
 
 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
 					     struct flowi *fl, int flags)
@@ -1142,7 +1144,7 @@ static int ipv6_get_mtu(struct net_devic
 
 int ipv6_get_hoplimit(struct net_device *dev)
 {
-	int hoplimit = ipv6_devconf.hop_limit;
+	int hoplimit = ve_ipv6_devconf.hop_limit;
 	struct inet6_dev *idev;
 
 	idev = in6_dev_get(dev);
@@ -1596,10 +1598,14 @@ void rt6_pmtu_discovery(struct in6_addr 
 {
 	struct rt6_info *rt, *nrt;
 	int allfrag = 0;
-
+again:
 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
 	if (rt == NULL)
 		return;
+	if (unlikely(rt6_check_expired(rt))) {
+		ip6_del_rt(rt, NULL, NULL, NULL);
+		goto again;
+	}
 
 	if (pmtu >= dst_mtu(&rt->u.dst))
 		goto out;
@@ -1828,7 +1834,7 @@ int ipv6_route_ioctl(unsigned int cmd, v
 	switch(cmd) {
 	case SIOCADDRT:		/* Add a route */
 	case SIOCDELRT:		/* Delete a route */
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_VE_NET_ADMIN))
 			return -EPERM;
 		err = copy_from_user(&rtmsg, arg,
 				     sizeof(struct in6_rtmsg));
@@ -1944,10 +1950,12 @@ struct rt6_info *addrconf_dst_alloc(stru
 		rt->rt6i_flags |= RTF_ANYCAST;
 	else
 		rt->rt6i_flags |= RTF_LOCAL;
-	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
-	if (rt->rt6i_nexthop == NULL) {
+	rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, rt->rt6i_dev);
+	if (IS_ERR(rt->rt6i_nexthop)) {
+		void *err = rt->rt6i_nexthop;
+		rt->rt6i_nexthop = NULL;
 		dst_free((struct dst_entry *) rt);
-		return ERR_PTR(-ENOMEM);
+		return err;
 	}
 
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
@@ -2163,8 +2171,12 @@ static int rt6_fill_node(struct sk_buff 
 		goto rtattr_failure;
 	if (rt->u.dst.neighbour)
 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
-	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
+	if (rt->u.dst.dev) {
+		struct net_device *odev = rt->rt6i_dev;
+		if (rt == &ip6_null_entry)
+			odev = &loopback_dev;
+		RTA_PUT(skb, RTA_OIF, sizeof(int), &odev->ifindex);
+	}
 	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
 	if (rt->rt6i_expires)
@@ -2532,9 +2544,15 @@ int __init ip6_route_init(void)
 {
 	struct proc_dir_entry *p;
 
+	ip6_null_entry.u.dst.dev = &loopback_dev;
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	ip6_prohibit_entry.u.dst.dev = &loopback_dev;
+	ip6_blk_hole_entry.u.dst.dev = &loopback_dev;
+#endif
+
 	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
 						     sizeof(struct rt6_info),
-						     0, SLAB_HWCACHE_ALIGN,
+						     0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						     NULL, NULL);
 	if (!ip6_dst_ops.kmem_cachep)
 		return -ENOMEM;
@@ -2547,9 +2565,11 @@ int __init ip6_route_init(void)
 	}
 
 #ifdef 	CONFIG_PROC_FS
-	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
-	if (p)
+	p = create_proc_glob_entry("net/ipv6_route", 0, NULL);
+	if (p) {
 		p->owner = THIS_MODULE;
+		p->get_info = rt6_proc_info;
+	}
 
 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
 #endif
@@ -2568,7 +2588,7 @@ void ip6_route_cleanup(void)
 	fib6_rules_cleanup();
 #endif
 #ifdef CONFIG_PROC_FS
-	proc_net_remove("ipv6_route");
+	remove_proc_entry("net/ipv6_route", NULL);
 	proc_net_remove("rt6_stats");
 #endif
 #ifdef CONFIG_XFRM
@@ -2578,3 +2598,53 @@ void ip6_route_cleanup(void)
 	fib6_gc_cleanup();
 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
 }
+
+#ifdef CONFIG_VE
+int init_ve_route6(struct ve_struct *ve)
+{
+	ve->_fib6_table = kzalloc(sizeof(struct fib6_table), GFP_KERNEL_UBC);
+	if (!ve->_fib6_table)
+		return -ENOMEM;
+	ve->_fib6_table->owner_env = ve;
+	ve->_fib6_table->tb6_id	= RT6_TABLE_MAIN;
+	ve->_fib6_table->tb6_root.leaf = &ip6_null_entry;
+	ve->_fib6_table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT |
+								RTN_RTINFO;
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	ve->_fib6_local_table = kzalloc(sizeof(struct fib6_table),
+							GFP_KERNEL_UBC);
+	if (!ve->_fib6_local_table)
+		goto fail_local;
+	ve->_fib6_local_table->owner_env = ve;
+	ve->_fib6_local_table->tb6_id	= RT6_TABLE_LOCAL;
+	ve->_fib6_local_table->tb6_root.leaf = &ip6_null_entry;
+	ve->_fib6_local_table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT |
+								RTN_RTINFO;
+	if (fib6_rules_create() < 0)
+		goto fail_rules;
+#endif
+	fib6_tables_init();
+	return 0;
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+fail_rules:
+	kfree(ve->_fib6_local_table);
+fail_local:
+	kfree(ve->_fib6_table);
+	return -ENOMEM;
+#endif
+}
+
+void fini_ve_route6(struct ve_struct *ve)
+{
+	if (ve->_fib6_table) {
+		rt6_ifdown(NULL);
+		fib6_tables_cleanup();
+		kfree(ve->_fib6_table);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+		kfree(ve->_fib6_local_table);
+		fib6_rules_destroy();
+#endif
+	}
+}
+#endif
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/sit.c kernel-2.6.18-417.el5-028stab121/net/ipv6/sit.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/sit.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/sit.c	2017-01-13 08:40:26.000000000 -0500
@@ -53,6 +53,13 @@
 #include <net/xfrm.h>
 #include <net/dsfield.h>
 
+#include <linux/vzcalluser.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+
+#include <linux/cpt_image.h>
+#include <linux/cpt_exports.h>
+
 /*
    This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
 
@@ -62,40 +69,43 @@
 #define HASH_SIZE  16
 #define HASH(addr) ((addr^(addr>>4))&0xF)
 
-static int ipip6_fb_tunnel_init(struct net_device *dev);
-static int ipip6_tunnel_init(struct net_device *dev);
-static void ipip6_tunnel_setup(struct net_device *dev);
+struct ve_sit {
+	struct net_device *ipip6_fb_tunnel_dev;
 
-static struct net_device *ipip6_fb_tunnel_dev;
+	struct ip_tunnel *tunnels_r_l[HASH_SIZE];
+	struct ip_tunnel *tunnels_r[HASH_SIZE];
+	struct ip_tunnel *tunnels_l[HASH_SIZE];
+	struct ip_tunnel *tunnels_wc[1];
+	struct ip_tunnel **tunnels[4];
+};
 
-static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_r[HASH_SIZE];
-static struct ip_tunnel *tunnels_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_wc[1];
-static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
+static int ipip6_fb_tunnel_init(struct net_device *dev);
+static void ipip6_tunnel_setup(struct net_device *dev);
+static int ipip6_tunnel_init(struct net_device *dev);
 
 static DEFINE_RWLOCK(ipip6_lock);
 
-static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local)
+static struct ip_tunnel * ipip6_tunnel_lookup(struct ve_sit *vs,
+		u32 remote, u32 local)
 {
 	unsigned h0 = HASH(remote);
 	unsigned h1 = HASH(local);
 	struct ip_tunnel *t;
 
-	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+	for (t = vs->tunnels_r_l[h0^h1]; t; t = t->next) {
 		if (local == t->parms.iph.saddr &&
 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 			return t;
 	}
-	for (t = tunnels_r[h0]; t; t = t->next) {
+	for (t = vs->tunnels_r[h0]; t; t = t->next) {
 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 			return t;
 	}
-	for (t = tunnels_l[h1]; t; t = t->next) {
+	for (t = vs->tunnels_l[h1]; t; t = t->next) {
 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 			return t;
 	}
-	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+	if ((t = vs->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 		return t;
 	return NULL;
 }
@@ -106,6 +116,7 @@ static struct ip_tunnel ** ipip6_bucket(
 	u32 local = t->parms.iph.saddr;
 	unsigned h = 0;
 	int prio = 0;
+	struct ve_sit *vs = get_exec_env()->ve_sit;
 
 	if (remote) {
 		prio |= 2;
@@ -115,7 +126,7 @@ static struct ip_tunnel ** ipip6_bucket(
 		prio |= 1;
 		h ^= HASH(local);
 	}
-	return &tunnels[prio][h];
+	return &vs->tunnels[prio][h];
 }
 
 static void ipip6_tunnel_unlink(struct ip_tunnel *t)
@@ -151,6 +162,7 @@ static struct ip_tunnel * ipip6_tunnel_l
 	unsigned h = 0;
 	int prio = 0;
 	char name[IFNAMSIZ];
+	struct ve_sit *vs = get_exec_env()->ve_sit;
 
 	if (remote) {
 		prio |= 2;
@@ -160,7 +172,7 @@ static struct ip_tunnel * ipip6_tunnel_l
 		prio |= 1;
 		h ^= HASH(local);
 	}
-	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+	for (tp = &vs->tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 			return t;
 	}
@@ -204,9 +216,11 @@ failed:
 
 static void ipip6_tunnel_uninit(struct net_device *dev)
 {
-	if (dev == ipip6_fb_tunnel_dev) {
+	struct ve_sit *vs = dev->owner_env->ve_sit;
+
+	if (dev == vs->ipip6_fb_tunnel_dev) {
 		write_lock_bh(&ipip6_lock);
-		tunnels_wc[0] = NULL;
+		vs->tunnels_wc[0] = NULL;
 		write_unlock_bh(&ipip6_lock);
 		dev_put(dev);
 	} else {
@@ -228,6 +242,10 @@ static void ipip6_err(struct sk_buff *sk
 	int type = skb->h.icmph->type;
 	int code = skb->h.icmph->code;
 	struct ip_tunnel *t;
+	struct ve_struct *ve;
+
+	if (skb->owner_env->ve_sit == NULL)
+		return;
 
 	switch (type) {
 	default:
@@ -258,7 +276,13 @@ static void ipip6_err(struct sk_buff *sk
 	}
 
 	read_lock(&ipip6_lock);
-	t = ipip6_tunnel_lookup(iph->daddr, iph->saddr);
+
+	ve = skb->owner_env;
+	if (ve->ve_sit == NULL)
+		goto out;
+
+	t = ipip6_tunnel_lookup(ve->ve_sit, iph->daddr, iph->saddr);
+
 	if (t == NULL || t->parms.iph.daddr == 0)
 		goto out;
 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
@@ -369,14 +393,17 @@ static int ipip6_rcv(struct sk_buff *skb
 {
 	struct iphdr *iph;
 	struct ip_tunnel *tunnel;
+	struct ve_struct *ve;
 
-	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+	ve = set_exec_env(skb->owner_env);
+
+	if (ve->ve_sit == NULL || !pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
 
 	iph = skb->nh.iph;
 
 	read_lock(&ipip6_lock);
-	if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+	if ((tunnel = ipip6_tunnel_lookup(ve->ve_sit, iph->saddr, iph->daddr)) != NULL) {
 		secpath_reset(skb);
 		skb->mac.raw = skb->nh.raw;
 		skb->nh.raw = skb->data;
@@ -392,6 +419,7 @@ static int ipip6_rcv(struct sk_buff *skb
 		ipip6_ecn_decapsulate(iph, skb);
 		netif_rx(skb);
 		read_unlock(&ipip6_lock);
+		set_exec_env(ve);
 		return 0;
 	}
 
@@ -399,6 +427,7 @@ static int ipip6_rcv(struct sk_buff *skb
 	read_unlock(&ipip6_lock);
 out:
 	kfree_skb(skb);
+	set_exec_env(ve);
 	return 0;
 }
 
@@ -436,6 +465,9 @@ static int ipip6_tunnel_xmit(struct sk_b
 	int    mtu;
 	struct in6_addr *addr6;	
 	int addr_type;
+	struct ve_struct *ve;
+
+	ve = set_exec_env(dev->owner_env);
 
 	if (tunnel->recursion++) {
 		tunnel->stat.collisions++;
@@ -539,8 +571,7 @@ static int ipip6_tunnel_xmit(struct sk_b
 			ip_rt_put(rt);
   			stats->tx_dropped++;
 			dev_kfree_skb(skb);
-			tunnel->recursion--;
-			return 0;
+			goto out;
 		}
 		if (skb->sk)
 			skb_set_owner_w(new_skb, skb->sk);
@@ -579,7 +610,9 @@ static int ipip6_tunnel_xmit(struct sk_b
 	nf_reset(skb);
 
 	IPTUNNEL_XMIT();
+out:
 	tunnel->recursion--;
+	set_exec_env(ve);
 	return 0;
 
 tx_error_icmp:
@@ -587,8 +620,7 @@ tx_error_icmp:
 tx_error:
 	stats->tx_errors++;
 	dev_kfree_skb(skb);
-	tunnel->recursion--;
-	return 0;
+	goto out;
 }
 
 static void ipip6_tunnel_bind_dev(struct net_device *dev)
@@ -633,11 +665,15 @@ ipip6_tunnel_ioctl (struct net_device *d
 	int err = 0;
 	struct ip_tunnel_parm p;
 	struct ip_tunnel *t;
+	struct ve_sit *vs = get_exec_env()->ve_sit;
+
+	/* this ioctl is called only having a fb dev in VE */
+	BUG_ON(vs == NULL);
 
 	switch (cmd) {
 	case SIOCGETTUNNEL:
 		t = NULL;
-		if (dev == ipip6_fb_tunnel_dev) {
+		if (dev == vs->ipip6_fb_tunnel_dev) {
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 				err = -EFAULT;
 				break;
@@ -654,7 +690,7 @@ ipip6_tunnel_ioctl (struct net_device *d
 	case SIOCADDTUNNEL:
 	case SIOCCHGTUNNEL:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			goto done;
 
 		err = -EFAULT;
@@ -670,7 +706,7 @@ ipip6_tunnel_ioctl (struct net_device *d
 
 		t = ipip6_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 
-		if (dev != ipip6_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+		if (dev != vs->ipip6_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 			if (t != NULL) {
 				if (t->dev != dev) {
 					err = -EEXIST;
@@ -712,10 +748,10 @@ ipip6_tunnel_ioctl (struct net_device *d
 
 	case SIOCDELTUNNEL:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			goto done;
 
-		if (dev == ipip6_fb_tunnel_dev) {
+		if (dev == vs->ipip6_fb_tunnel_dev) {
 			err = -EFAULT;
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 				goto done;
@@ -723,7 +759,7 @@ ipip6_tunnel_ioctl (struct net_device *d
 			if ((t = ipip6_tunnel_locate(&p, 0)) == NULL)
 				goto done;
 			err = -EPERM;
-			if (t == netdev_priv(ipip6_fb_tunnel_dev))
+			if (t == netdev_priv(vs->ipip6_fb_tunnel_dev))
 				goto done;
 			dev = t->dev;
 		}
@@ -751,6 +787,108 @@ static int ipip6_tunnel_change_mtu(struc
 	return 0;
 }
 
+static void cpt_dump_sit(struct net_device *dev,
+		struct cpt_ops *ops, struct cpt_context *ctx)
+{
+	struct cpt_tunnel_image v;
+	struct ip_tunnel *t;
+
+	t = netdev_priv(dev);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_IPIP_TUNNEL;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_VOID;
+
+	/* mark fb dev */
+	v.cpt_tnl_flags = CPT_TUNNEL_SIT;
+	if (dev == get_exec_env()->ve_sit->ipip6_fb_tunnel_dev)
+		v.cpt_tnl_flags |= CPT_TUNNEL_FBDEV;
+
+	v.cpt_i_flags = t->parms.i_flags;
+	v.cpt_o_flags = t->parms.o_flags;
+	v.cpt_i_key = t->parms.i_key;
+	v.cpt_o_key = t->parms.o_key;
+
+	BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
+	memcpy(&v.cpt_iphdr, &t->parms.iph, sizeof(t->parms.iph));
+
+	ops->write(&v, sizeof(v), ctx);
+}
+
+static int rst_restore_sit(loff_t start, struct cpt_netdev_image *di,
+			struct cpt_ops *ops, struct cpt_context *ctx)
+{
+	int err = -ENODEV;
+	struct cpt_tunnel_image v;
+	struct net_device *dev;
+	struct ip_tunnel *t;
+	loff_t pos;
+	int fbdev;
+
+	pos = start + di->cpt_hdrlen;
+	err = ops->get_object(CPT_OBJ_NET_IPIP_TUNNEL,
+			pos, &v, sizeof(v), ctx);
+	if (err)
+		return err;
+
+	/* some sanity */
+	if (v.cpt_content != CPT_CONTENT_VOID)
+		return -EINVAL;
+
+	if (!(v.cpt_tnl_flags & CPT_TUNNEL_SIT))
+		return 1;
+
+	if (v.cpt_tnl_flags & CPT_TUNNEL_FBDEV) {
+		fbdev = 1;
+		err = 0;
+		dev = get_exec_env()->ve_sit->ipip6_fb_tunnel_dev;
+	} else {
+		fbdev = 0;
+		err = -ENOMEM;
+		dev = alloc_netdev(sizeof(struct ip_tunnel), di->cpt_name,
+				ipip6_tunnel_setup);
+		if (!dev)
+			goto out;
+	}
+
+	t = netdev_priv(dev);
+	t->parms.i_flags = v.cpt_i_flags;
+	t->parms.o_flags = v.cpt_o_flags;
+	t->parms.i_key = v.cpt_i_key;
+	t->parms.o_key = v.cpt_o_key;
+
+	BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
+	memcpy(&t->parms.iph, &v.cpt_iphdr, sizeof(t->parms.iph));
+
+	if (!fbdev) {
+		dev->init = ipip6_tunnel_init;
+		err = register_netdevice(dev);
+		if (err) {
+			free_netdev(dev);
+			goto out;
+		}
+
+		dev_hold(dev);
+		ipip6_tunnel_link(t);
+	}
+out:
+	return err;
+}
+
+static struct net_device_stats *cpt_sit_stats_ptr(struct net_device *dev)
+{
+	return &((struct ip_tunnel *)netdev_priv(dev))->stat;
+}
+
+static struct dev_cpt_ops sit_cpt_ops = {
+	.cpt_object = CPT_OBJ_NET_IPIP_TUNNEL,
+	.name = "sit",
+	.dump = cpt_dump_sit,
+	.restore = rst_restore_sit,
+	.stats = cpt_sit_stats_ptr,
+};
+
 static void ipip6_tunnel_setup(struct net_device *dev)
 {
 	SET_MODULE_OWNER(dev);
@@ -767,6 +905,10 @@ static void ipip6_tunnel_setup(struct ne
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
+
+	dev->features		|= NETIF_F_VIRTUAL;
+
+	dev->cpt_ops = &sit_cpt_ops;
 }
 
 static int ipip6_tunnel_init(struct net_device *dev)
@@ -786,7 +928,7 @@ static int ipip6_tunnel_init(struct net_
 	return 0;
 }
 
-static int __init ipip6_fb_tunnel_init(struct net_device *dev)
+static int ipip6_fb_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
@@ -800,7 +942,7 @@ static int __init ipip6_fb_tunnel_init(s
 	iph->ttl		= 64;
 
 	dev_hold(dev);
-	tunnels_wc[0]		= tunnel;
+	get_exec_env()->ve_sit->tunnels_wc[0] = tunnel;
 	return 0;
 }
 
@@ -809,27 +951,110 @@ static struct net_protocol sit_protocol 
 	.err_handler	=	ipip6_err,
 };
 
-static void __exit sit_destroy_tunnels(void)
+static void sit_destroy_tunnels(struct ve_struct *ve)
 {
 	int prio;
+	struct ve_sit *vs = ve->ve_sit;
 
 	for (prio = 1; prio < 4; prio++) {
 		int h;
 		for (h = 0; h < HASH_SIZE; h++) {
 			struct ip_tunnel *t;
-			while ((t = tunnels[prio][h]) != NULL)
+			while ((t = vs->tunnels[prio][h]) != NULL)
 				unregister_netdevice(t->dev);
 		}
 	}
 }
 
+static int sit_create_tunnels(struct ve_struct *ve)
+{
+	int err;
+	struct ve_sit *vs;
+
+	vs = kzalloc(sizeof(struct ve_sit), GFP_KERNEL);
+	if (ve == NULL)
+		return -ENOMEM;
+
+	vs->tunnels[0] = vs->tunnels_wc;
+	vs->tunnels[1] = vs->tunnels_l;
+	vs->tunnels[2] = vs->tunnels_r;
+	vs->tunnels[3] = vs->tunnels_r_l;
+
+	ve->ve_sit = vs;
+
+	vs->ipip6_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", 
+					   ipip6_tunnel_setup);
+	if (!vs->ipip6_fb_tunnel_dev) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	vs->ipip6_fb_tunnel_dev->init = ipip6_fb_tunnel_init;
+
+	if ((err = register_netdev(vs->ipip6_fb_tunnel_dev)))
+		goto err2;
+
+	return 0;
+
+err2:
+	free_netdev(vs->ipip6_fb_tunnel_dev);
+err1:
+	ve->ve_sit = NULL;
+	return err;
+}
+
+static int ve_sit_init(void *x)
+{
+	int err;
+	struct ve_struct *ve = x;
+
+	if (!(ve->features & VE_FEATURE_SIT))
+		return 0;
+
+	err = sit_create_tunnels(ve);
+	if (err == 0)
+		__module_get(THIS_MODULE);
+
+	return err;
+}
+
+static void ve_sit_fini(void *x)
+{
+	struct ve_struct *ve = x;
+
+	if (!(ve->features & VE_FEATURE_SIT)) {
+		BUG_ON(ve->ve_sit != NULL);
+		return;
+	}
+
+	rtnl_lock();
+	sit_destroy_tunnels(ve);
+	unregister_netdevice(ve->ve_sit->ipip6_fb_tunnel_dev);
+	rtnl_unlock();
+
+	kfree(ve->ve_sit);
+	ve->ve_sit = NULL;
+
+	module_put(THIS_MODULE);
+}
+
+static struct ve_hook sit_hook = {
+	.owner = THIS_MODULE,
+	.priority = HOOK_PRIO_NET,
+	.init = ve_sit_init,
+	.fini = ve_sit_fini,
+};
+
 void __exit sit_cleanup(void)
 {
+	unregister_dev_cpt_ops(&sit_cpt_ops);
+	ve_hook_unregister(&sit_hook);
+
 	inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
 
 	rtnl_lock();
-	sit_destroy_tunnels();
-	unregister_netdevice(ipip6_fb_tunnel_dev);
+	sit_destroy_tunnels(get_ve0());
+	unregister_netdevice(get_ve0()->ve_sit->ipip6_fb_tunnel_dev);
 	rtnl_unlock();
 }
 
@@ -844,22 +1069,15 @@ int __init sit_init(void)
 		return -EAGAIN;
 	}
 
-	ipip6_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", 
-					   ipip6_tunnel_setup);
-	if (!ipip6_fb_tunnel_dev) {
-		err = -ENOMEM;
-		goto err1;
-	}
 
-	ipip6_fb_tunnel_dev->init = ipip6_fb_tunnel_init;
-
-	if ((err =  register_netdev(ipip6_fb_tunnel_dev)))
-		goto err2;
+	err = sit_create_tunnels(get_ve0());
+	if (err)
+		goto err1;
 
+	ve_hook_register(VE_SS_CHAIN, &sit_hook);
+	register_dev_cpt_ops(&sit_cpt_ops);
  out:
 	return err;
- err2:
-	free_netdev(ipip6_fb_tunnel_dev);
  err1:
 	inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
 	goto out;
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/tcp_ipv6.c kernel-2.6.18-417.el5-028stab121/net/ipv6/tcp_ipv6.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/tcp_ipv6.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/tcp_ipv6.c	2017-01-13 08:40:41.000000000 -0500
@@ -62,6 +62,8 @@
 #include <net/timewait_sock.h>
 #include <net/secure_seq.h>
 
+#include <ub/ub_tcp.h>
+
 #include <asm/uaccess.h>
 
 #include <linux/proc_fs.h>
@@ -530,13 +532,15 @@ static inline void syn_flood_warning(str
 #ifdef CONFIG_SYN_COOKIES
 	if (sysctl_tcp_syncookies)
 		printk(KERN_INFO
-		       "TCPv6: Possible SYN flooding on port %d. "
-		       "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
+		       "TCPv6: Possible SYN flooding on ctid %u, port %d. "
+		       "Sending cookies.\n", 
+		       skb->owner_env->veid, ntohs(tcp_hdr(skb)->dest));
 	else
 #endif
 		printk(KERN_INFO
-		       "TCPv6: Possible SYN flooding on port %d. "
-		       "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
+		       "TCPv6: Possible SYN flooding on ctid %u, port %d. "
+		       "Dropping request.\n", 
+		       skb->owner_env->veid, ntohs(tcp_hdr(skb)->dest));
 }
 
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
@@ -554,6 +558,11 @@ struct request_sock_ops tcp6_request_soc
 	.send_reset	=	tcp_v6_send_reset
 };
 
+struct request_sock *__inet6_reqsk_alloc(void)
+{
+	return reqsk_alloc(&tcp6_request_sock_ops);
+}
+
 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
 	.twsk_unique	= tcp_twsk_unique,
@@ -1136,6 +1145,8 @@ static int tcp_v6_do_rcv(struct sock *sk
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp;
 	struct sk_buff *opt_skb = NULL;
+	struct user_beancounter *ub;
+
 
 	/* Imagine: socket is IPv6. IPv4 packet arrives,
 	   goes to IPv4 receive handler and backlogged.
@@ -1148,6 +1159,8 @@ static int tcp_v6_do_rcv(struct sock *sk
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_do_rcv(sk, skb);
 
+	ub = set_exec_ub(sock_bc(sk)->ub);
+
 	if (sk_filter(sk, skb, 0))
 		goto discard;
 
@@ -1179,7 +1192,7 @@ static int tcp_v6_do_rcv(struct sock *sk
 		TCP_CHECK_TIMER(sk);
 		if (opt_skb)
 			goto ipv6_pktoptions;
-		return 0;
+		goto restore_context;
 	}
 
 	if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
@@ -1200,7 +1213,7 @@ static int tcp_v6_do_rcv(struct sock *sk
 				goto reset;
 			if (opt_skb)
 				__kfree_skb(opt_skb);
-			return 0;
+			goto restore_context;
 		}
 	}
 
@@ -1210,6 +1223,9 @@ static int tcp_v6_do_rcv(struct sock *sk
 	TCP_CHECK_TIMER(sk);
 	if (opt_skb)
 		goto ipv6_pktoptions;
+
+restore_context:
+	(void)set_exec_ub(ub);
 	return 0;
 
 reset:
@@ -1218,7 +1234,7 @@ discard:
 	if (opt_skb)
 		__kfree_skb(opt_skb);
 	kfree_skb(skb);
-	return 0;
+	goto restore_context;
 csum_err:
 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
 	goto discard;
@@ -1250,7 +1266,7 @@ ipv6_pktoptions:
 
 	if (opt_skb)
 		kfree_skb(opt_skb);
-	return 0;
+	goto restore_context;
 }
 
 static int tcp_v6_rcv(struct sk_buff **pskb)
@@ -1445,6 +1461,15 @@ static struct inet_connection_sock_af_op
 #endif
 };
 
+void inet6_make_mapped(struct sock *sk)
+{
+	if (sk->sk_type == SOCK_STREAM &&
+			sk->sk_protocol == IPPROTO_TCP) {
+		inet_csk(sk)->icsk_af_ops = &ipv6_mapped;
+		sk->sk_backlog_rcv = tcp_v4_do_rcv;
+	}
+}
+
 /* NOTE: A lot of things set to zero explicitly by call to
  *       sk_alloc() so need not be done here.
  */
@@ -1649,7 +1674,7 @@ out:
 static struct file_operations tcp6_seq_fops;
 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "tcp6",
+	.name		= "net/tcp6",
 	.family		= AF_INET6,
 	.seq_show	= tcp6_seq_show,
 	.seq_fops	= &tcp6_seq_fops,
diff -upr kernel-2.6.18-417.el5.orig/net/ipv6/udp.c kernel-2.6.18-417.el5-028stab121/net/ipv6/udp.c
--- kernel-2.6.18-417.el5.orig/net/ipv6/udp.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/ipv6/udp.c	2017-01-13 08:40:40.000000000 -0500
@@ -40,7 +40,6 @@
 
 #include <net/sock.h>
 #include <net/snmp.h>
-
 #include <net/ipv6.h>
 #include <net/ndisc.h>
 #include <net/protocol.h>
@@ -61,91 +60,9 @@
 
 DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
 
-/* Grrr, addr_type already calculated by caller, but I don't want
- * to add some silly "cookie" argument to this method just for that.
- */
 static int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
-	struct sock *sk2;
-	struct hlist_node *node;
-
-	write_lock_bh(&udp_hash_lock);
-	if (!snum) {
-		int i, low, high, remaining;
-		unsigned rover, best, best_size_so_far;
-
-		inet_get_local_port_range(&low, &high);
-		remaining = (high - low) + 1;
-
-		best_size_so_far = UINT_MAX;
-		best = rover = net_random() % remaining + low;
-
-		if (!udp_lport_inuse(rover) &&
-		    !inet_is_reserved_local_port(rover))
-			goto gotit;
-
-		/* 1st pass: look for empty (or shortest) hash chain */
-		for (i = 0; i < UDP_HTABLE_SIZE; i++) {
-			int size = 0;
-			struct hlist_head *list;
-
-			list = &udp_hash[rover & (UDP_HTABLE_SIZE - 1)];
-			if (hlist_empty(list) &&
-			    !inet_is_reserved_local_port(rover))
-				goto gotit;
-
-			sk_for_each(sk2, node, list)
-				if (++size >= best_size_so_far)
-					goto next;
-			best_size_so_far = size;
-			best = rover;
-		next:
-			/* fold back if end of range */
-			if (++rover > high)
-				rover = low + ((rover - low)
-				            & (UDP_HTABLE_SIZE - 1));
-		}
-		/* 2nd pass: find hole in shortest hash chain */
-		rover = best;
-		for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
-			if (!udp_lport_inuse(rover) &&
-			    !inet_is_reserved_local_port(rover))
-				goto gotit;
-			rover += UDP_HTABLE_SIZE;
-			if (rover > high)
-				rover = low + ((rover - low)
-				            & (UDP_HTABLE_SIZE - 1));
-		}
-		/* All ports in use! */
-		goto fail;
-
-gotit:
-		snum = rover;
-	} else {
-		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
-			if (inet_sk(sk2)->num == snum &&
-			    sk2 != sk &&
-			    (!sk2->sk_bound_dev_if ||
-			     !sk->sk_bound_dev_if ||
-			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-			    (!sk2->sk_reuse || !sk->sk_reuse) &&
-			    ipv6_rcv_saddr_equal(sk, sk2))
-				goto fail;
-		}
-	}
-
-	inet_sk(sk)->num = snum;
-	if (sk_unhashed(sk)) {
-		sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
-		sock_prot_inc_use(sk->sk_prot);
-	}
-	write_unlock_bh(&udp_hash_lock);
-	return 0;
-
-fail:
-	write_unlock_bh(&udp_hash_lock);
-	return 1;
+	return udp_get_port(sk, snum, ipv6_rcv_saddr_equal);
 }
 
 static void udp_v6_hash(struct sock *sk)
@@ -170,12 +87,15 @@ static struct sock *udp_v6_lookup(struct
 	struct hlist_node *node;
 	unsigned short hnum = ntohs(dport);
 	int badness = -1;
+	struct ve_struct *env;
 
  	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+	env = get_exec_env();
+	sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (inet->num == hnum && sk->sk_family == PF_INET6) {
+		if (inet->num == hnum && sk->sk_family == PF_INET6 &&
+				ve_accessible_strict(sk->owner_env, env)) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 			int score = 0;
 			if (inet->dport) {
@@ -438,7 +358,8 @@ static void udpv6_mcast_deliver(struct u
 	int dif;
 
 	read_lock(&udp_hash_lock);
-	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
+				VEID(skb->owner_env))]);
 	dif = skb->dev->ifindex;
 	sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (!sk) {
@@ -1143,7 +1064,7 @@ static int udp6_seq_show(struct seq_file
 static struct file_operations udp6_seq_fops;
 static struct udp_seq_afinfo udp6_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "udp6",
+	.name		= "net/udp6",
 	.family		= AF_INET6,
 	.seq_show	= udp6_seq_show,
 	.seq_fops	= &udp6_seq_fops,
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/core.c kernel-2.6.18-417.el5-028stab121/net/netfilter/core.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/core.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/core.c	2017-01-13 08:40:40.000000000 -0500
@@ -61,13 +61,30 @@ struct list_head nf_hooks[NPROTO][NF_MAX
 #endif
 EXPORT_SYMBOL(nf_hooks);
 static DEFINE_SPINLOCK(nf_hook_lock);
+#ifdef CONFIG_VE_IPTABLES
+#define VE_NF_HOOKS(env, x, y) \
+       ((struct list_head (*)[NF_MAX_HOOKS])(env->_nf_hooks))[x][y]
+#else
+#define VE_NF_HOOKS(env, x, y) nf_hooks[x][y]
+#endif
 
 int nf_register_hook(struct nf_hook_ops *reg)
 {
 	struct list_head *i;
+	struct ve_struct *env;
+
+	env = get_exec_env();
+	if (!ve_is_super(env)) {
+		struct nf_hook_ops *tmp;
+		tmp = kmalloc(sizeof(struct nf_hook_ops), GFP_KERNEL);
+		if (!tmp)
+			return -ENOMEM;
+		memcpy(tmp, reg, sizeof(struct nf_hook_ops));
+		reg = tmp;
+	}
 
 	spin_lock_bh(&nf_hook_lock);
-	list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
+	list_for_each(i, &VE_NF_HOOKS(env, reg->pf, reg->hooknum)) {
 		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
 			break;
 	}
@@ -81,11 +98,29 @@ EXPORT_SYMBOL(nf_register_hook);
 
 void nf_unregister_hook(struct nf_hook_ops *reg)
 {
+	struct nf_hook_ops *i;
+	struct ve_struct *env;
+
+	env = get_exec_env();
+	if (!ve_is_super(env)) {
+		list_for_each_entry_rcu(i,
+			&VE_NF_HOOKS(env, reg->pf, reg->hooknum), list) {
+		if (reg->hook == i->hook) {
+			reg = i;
+			break;
+			}
+		}
+		if (reg != i)
+			return;
+	}
+
 	spin_lock_bh(&nf_hook_lock);
 	list_del_rcu(&reg->list);
 	spin_unlock_bh(&nf_hook_lock);
 
 	synchronize_net();
+	if (!ve_is_super(env))
+		kfree(reg);
 }
 EXPORT_SYMBOL(nf_unregister_hook);
 
@@ -170,13 +205,15 @@ int nf_hook_slow(int pf, unsigned int ho
 	struct list_head *elem;
 	unsigned int verdict;
 	int ret = 0;
+	struct ve_struct *env;
 
 	/* We may already have this, but read-locks nest anyway */
 	rcu_read_lock();
 
-	elem = &nf_hooks[pf][hook];
+	env = get_exec_env();
+	elem = &VE_NF_HOOKS(env, pf, hook);
 next_hook:
-	verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
+	verdict = nf_iterate(&VE_NF_HOOKS(env, pf, hook), pskb, hook, indev,
 			     outdev, &elem, okfn, hook_thresh);
 	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
 		ret = 1;
@@ -226,6 +263,28 @@ copy_skb:
 }
 EXPORT_SYMBOL(skb_make_writable);
 
+u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum)
+{
+	u_int32_t diff[] = { oldval, newval };
+
+	return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum));
+}
+EXPORT_SYMBOL(nf_csum_update);
+
+u_int16_t nf_proto_csum_update(struct sk_buff *skb,
+			       u_int32_t oldval, u_int32_t newval,
+			       u_int16_t csum, int pseudohdr)
+{
+	if (!skb_partial_checksummed(skb)) {
+		csum = nf_csum_update(oldval, newval, csum);
+		if (skb->ip_summed == CHECKSUM_HW && pseudohdr)
+			skb->csum = nf_csum_update(oldval, newval, skb->csum);
+	} else if (pseudohdr)
+		csum = ~nf_csum_update(oldval, newval, ~csum);
+
+	return csum;
+}
+EXPORT_SYMBOL(nf_proto_csum_update);
 
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
@@ -249,13 +308,54 @@ struct proc_dir_entry *proc_net_netfilte
 EXPORT_SYMBOL(proc_net_netfilter);
 #endif
 
-void __init netfilter_init(void)
+void init_nf_hooks(struct list_head (*nh)[NF_MAX_HOOKS])
 {
 	int i, h;
 	for (i = 0; i < NPROTO; i++) {
 		for (h = 0; h < NF_MAX_HOOKS; h++)
-			INIT_LIST_HEAD(&nf_hooks[i][h]);
+			INIT_LIST_HEAD(&nh[i][h]);
 	}
+}
+
+int init_netfilter(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+       struct ve_struct *envid;
+
+       envid = get_exec_env();
+       envid->_nf_hooks = kmalloc(sizeof(nf_hooks), GFP_KERNEL);
+       if (envid->_nf_hooks == NULL)
+               return -ENOMEM;
+
+       /* FIXME: charge ubc */
+
+       init_nf_hooks(envid->_nf_hooks);
+       return 0;
+#else
+       init_nf_hooks(nf_hooks);
+       return 0;
+#endif
+}
+EXPORT_SYMBOL(init_netfilter);
+
+#ifdef CONFIG_VE_IPTABLES
+void fini_netfilter(void)
+{
+       struct ve_struct *envid;
+
+       envid = get_exec_env();
+       if (envid->_nf_hooks != NULL)
+               kfree(envid->_nf_hooks);
+       envid->_nf_hooks = NULL;
+
+       /* FIXME: uncharge ubc */
+}
+EXPORT_SYMBOL(fini_netfilter);
+#endif
+
+void __init netfilter_init(void)
+{
+       init_netfilter();
 
 #ifdef CONFIG_PROC_FS
 	proc_net_netfilter = proc_mkdir("netfilter", proc_net);
@@ -268,3 +368,4 @@ void __init netfilter_init(void)
 	if (netfilter_log_init() < 0)
 		panic("cannot initialize nf_log");
 }
+
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/nfnetlink.c kernel-2.6.18-417.el5-028stab121/net/netfilter/nfnetlink.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/nfnetlink.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/nfnetlink.c	2017-01-13 08:40:19.000000000 -0500
@@ -228,7 +228,7 @@ static int nfnetlink_rcv_msg(struct sk_b
 		 NFNL_SUBSYS_ID(nlh->nlmsg_type),
 		 NFNL_MSG_TYPE(nlh->nlmsg_type));
 
-	if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
+	if (security_netlink_recv(skb, CAP_VE_NET_ADMIN)) {
 		DEBUGP("missing CAP_NET_ADMIN\n");
 		*errp = -EPERM;
 		return -1;
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/nf_queue.c kernel-2.6.18-417.el5-028stab121/net/netfilter/nf_queue.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/nf_queue.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/nf_queue.c	2017-01-13 08:40:23.000000000 -0500
@@ -185,12 +185,12 @@ void nf_reinject(struct sk_buff *skb, st
 	/* Drop reference to owner of hook which queued us. */
 	module_put(info->elem->owner);
 
-	list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
+	list_for_each_rcu(i, &ve_nf_hooks[info->pf][info->hook]) {
 		if (i == elem) 
   			break;
   	}
   
-	if (i == &nf_hooks[info->pf][info->hook]) {
+	if (i == &ve_nf_hooks[info->pf][info->hook]) {
 		/* The module which sent it to userspace is gone. */
 		NFDEBUG("%s: module disappeared, dropping packet.\n",
 			__FUNCTION__);
@@ -211,7 +211,7 @@ void nf_reinject(struct sk_buff *skb, st
 
 	if (verdict == NF_ACCEPT) {
 	next_hook:
-		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
+		verdict = nf_iterate(&ve_nf_hooks[info->pf][info->hook],
 				     &skb, info->hook, 
 				     info->indev, info->outdev, &elem,
 				     info->okfn, INT_MIN);
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/nf_sockopt.c kernel-2.6.18-417.el5-028stab121/net/netfilter/nf_sockopt.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/nf_sockopt.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/nf_sockopt.c	2017-01-13 08:40:23.000000000 -0500
@@ -80,6 +80,11 @@ static int nf_sockopt(struct sock *sk, i
 	struct nf_sockopt_ops *ops;
 	int ret;
 
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_nf_hooks)
+		return -ENOPROTOOPT;
+#endif
+
 	if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
 		return -EINTR;
 
@@ -138,6 +143,11 @@ static int compat_nf_sockopt(struct sock
 	struct nf_sockopt_ops *ops;
 	int ret;
 
+#ifdef CONFIG_VE_IPTABLES
+       if (!get_exec_env()->_nf_hooks)
+               return -ENOPROTOOPT;
+#endif
+
 	if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
 		return -EINTR;
 
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/x_tables.c kernel-2.6.18-417.el5-028stab121/net/netfilter/x_tables.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/x_tables.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/x_tables.c	2017-01-13 08:40:23.000000000 -0500
@@ -24,6 +24,10 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp.h>
+#include <linux/nfcalls.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
 
 
 MODULE_LICENSE("GPL");
@@ -42,6 +46,14 @@ struct xt_af {
 
 static struct xt_af *xt;
 
+#ifdef CONFIG_VE_IPTABLES
+/* include ve.h and define get_exec_env */
+#include <linux/sched.h>
+#define xt_tables(af)	(get_exec_env()->_xt_tables[af])
+#else
+#define xt_tables(af)	xt[af].tables
+#endif
+
 #ifdef DEBUG_IP_FIREWALL_USER
 #define duprintf(format, args...) printk(format , ## args)
 #else
@@ -60,6 +72,46 @@ static const char *xt_prefix[NPROTO] = {
 	[NF_ARP]	= "arp",
 };
 
+#ifdef CONFIG_USER_RESOURCE
+static inline struct user_beancounter *xt_table_ub(struct xt_table_info *info)
+{
+	struct user_beancounter *ub;
+
+	for (ub = mem_ub(info); ub->parent != NULL; ub = ub->parent);
+	return ub;
+}
+
+static void uncharge_xtables(struct xt_table_info *info, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = xt_table_ub(info);
+	uncharge_beancounter(ub, UB_NUMXTENT, size);
+}
+
+static int recharge_xtables(int check_ub,
+		struct xt_table_info *new, struct xt_table_info *old)
+{
+	struct user_beancounter *ub;
+	long change;
+
+	ub = xt_table_ub(new);
+	BUG_ON(check_ub && ub != xt_table_ub(old));
+
+	change = (long)new->number - (long)old->number;
+	if (change > 0) {
+		if (charge_beancounter(ub, UB_NUMXTENT, change, UB_SOFT))
+			return -ENOMEM;
+	} else if (change < 0)
+		uncharge_beancounter(ub, UB_NUMXTENT, -change);
+
+	return 0;
+}
+#else
+#define recharge_xtables(c, new, old)	(0)
+#define uncharge_xtables(info, s)	do { } while (0)
+#endif	/* CONFIG_USER_RESOURCE */
+
 /* Registration hooks for targets. */
 int
 xt_register_target(struct xt_target *target)
@@ -71,7 +123,7 @@ xt_register_target(struct xt_target *tar
 		return ret;
 	list_add(&target->list, &xt[af].target);
 	mutex_unlock(&xt[af].mutex);
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(xt_register_target);
 
@@ -98,7 +150,7 @@ xt_register_match(struct xt_match *match
 	list_add(&match->list, &xt[af].match);
 	mutex_unlock(&xt[af].mutex);
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(xt_register_match);
 
@@ -248,24 +300,25 @@ int xt_check_match(const struct xt_match
 		   unsigned short proto, int inv_proto)
 {
 	if (XT_ALIGN(match->matchsize) != size) {
-		printk("%s_tables: %s match: invalid size %Zu != %u\n",
-		       xt_prefix[family], match->name,
-		       XT_ALIGN(match->matchsize), size);
+		ve_printk(VE_LOG, "%s_tables: %s match: invalid size %Zu != "
+			"%u\n", xt_prefix[family], match->name,
+			XT_ALIGN(match->matchsize), size);
 		return -EINVAL;
 	}
 	if (match->table && strcmp(match->table, table)) {
-		printk("%s_tables: %s match: only valid in %s table, not %s\n",
-		       xt_prefix[family], match->name, match->table, table);
+		ve_printk(VE_LOG, "%s_tables: %s match: only valid in %s table,"
+			" not %s\n", xt_prefix[family], match->name,
+			match->table, table);
 		return -EINVAL;
 	}
 	if (match->hooks && (hook_mask & ~match->hooks) != 0) {
-		printk("%s_tables: %s match: bad hook_mask %u\n",
+		ve_printk(VE_LOG, "%s_tables: %s match: bad hook_mask %u\n",
 		       xt_prefix[family], match->name, hook_mask);
 		return -EINVAL;
 	}
 	if (match->proto && (match->proto != proto || inv_proto)) {
-		printk("%s_tables: %s match: only valid for protocol %u\n",
-		       xt_prefix[family], match->name, match->proto);
+		ve_printk(VE_LOG, "%s_tables: %s match: only valid for protocol"
+			" %u\n", xt_prefix[family], match->name, match->proto);
 		return -EINVAL;
 	}
 	return 0;
@@ -325,24 +378,26 @@ int xt_check_target(const struct xt_targ
 		    unsigned short proto, int inv_proto)
 {
 	if (XT_ALIGN(target->targetsize) != size) {
-		printk("%s_tables: %s target: invalid size %Zu != %u\n",
-		       xt_prefix[family], target->name,
-		       XT_ALIGN(target->targetsize), size);
+		ve_printk(VE_LOG, "%s_tables: %s target: invalid size %Zu != "
+			"%u\n", xt_prefix[family], target->name,
+			XT_ALIGN(target->targetsize), size);
 		return -EINVAL;
 	}
 	if (target->table && strcmp(target->table, table)) {
-		printk("%s_tables: %s target: only valid in %s table, not %s\n",
-		       xt_prefix[family], target->name, target->table, table);
+		ve_printk(VE_LOG, "%s_tables: %s target: only valid in %s "
+			"table, not %s\n", xt_prefix[family], target->name,
+			target->table, table);
 		return -EINVAL;
 	}
 	if (target->hooks && (hook_mask & ~target->hooks) != 0) {
-		printk("%s_tables: %s target: bad hook_mask %u\n",
+		ve_printk(VE_LOG, "%s_tables: %s target: bad hook_mask %u\n",
 		       xt_prefix[family], target->name, hook_mask);
 		return -EINVAL;
 	}
 	if (target->proto && (target->proto != proto || inv_proto)) {
-		printk("%s_tables: %s target: only valid for protocol %u\n",
-		       xt_prefix[family], target->name, target->proto);
+		ve_printk(VE_LOG, "%s_tables: %s target: only valid for "
+			"protocol %u\n", xt_prefix[family], target->name,
+			target->proto);
 		return -EINVAL;
 	}
 	return 0;
@@ -406,19 +461,19 @@ struct xt_table_info *xt_alloc_table_inf
 	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
 		return NULL;
 
-	newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL);
+	newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL_UBC);
 	if (!newinfo)
 		return NULL;
 
-	newinfo->size = size;
+	newinfo->alloc_size = newinfo->size = size;
 
 	for_each_possible_cpu(cpu) {
 		if (size <= PAGE_SIZE)
 			newinfo->entries[cpu] = kmalloc_node(size,
-							GFP_KERNEL,
+							GFP_KERNEL_UBC,
 							cpu_to_node(cpu));
 		else
-			newinfo->entries[cpu] = vmalloc_node(size,
+			newinfo->entries[cpu] = ub_vmalloc_node(size,
 							cpu_to_node(cpu));
 
 		if (newinfo->entries[cpu] == NULL) {
@@ -436,7 +491,7 @@ void xt_free_table_info(struct xt_table_
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		if (info->size <= PAGE_SIZE)
+		if (info->alloc_size <= PAGE_SIZE)
 			kfree(info->entries[cpu]);
 		else
 			vfree(info->entries[cpu]);
@@ -453,7 +508,7 @@ struct xt_table *xt_find_table_lock(int 
 	if (mutex_lock_interruptible(&xt[af].mutex) != 0)
 		return ERR_PTR(-EINTR);
 
-	list_for_each_entry(t, &xt[af].tables, list)
+	list_for_each_entry(t, &xt_tables(af), list)
 		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
 			return t;
 	mutex_unlock(&xt[af].mutex);
@@ -501,6 +556,13 @@ xt_replace_table(struct xt_table *table,
 		return NULL;
 	}
 	oldinfo = private;
+
+	if (recharge_xtables(num_counters != 0, newinfo, oldinfo)) {
+		write_unlock_bh(&table->lock);
+		*error = -ENOMEM;
+		return NULL;
+	}
+
 	table->private = newinfo;
 	newinfo->initial_entries = oldinfo->initial_entries;
 	write_unlock_bh(&table->lock);
@@ -521,7 +583,7 @@ int xt_register_table(struct xt_table *t
 		return ret;
 
 	/* Don't autoload: we'd eat our tail... */
-	if (list_named_find(&xt[table->af].tables, table->name)) {
+	if (list_named_find(&xt_tables(table->af), table->name)) {
 		ret = -EEXIST;
 		goto unlock;
 	}
@@ -538,7 +600,7 @@ int xt_register_table(struct xt_table *t
 	/* save number of initial entries */
 	private->initial_entries = private->number;
 
-	list_prepend(&xt[table->af].tables, table);
+	list_prepend(&xt_tables(table->af), table);
 
 	ret = 0;
  unlock:
@@ -547,19 +609,67 @@ int xt_register_table(struct xt_table *t
 }
 EXPORT_SYMBOL_GPL(xt_register_table);
 
+struct xt_table * virt_xt_register_table(struct xt_table *table,
+		      struct xt_table_info *bootstrap,
+		      struct xt_table_info *newinfo)
+{
+	int ret;
+	struct module *mod = table->me;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct xt_table *tmp;
+		__module_get(mod);
+		ret = -ENOMEM;
+		tmp = ub_kmalloc(sizeof(struct xt_table), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, table, sizeof(struct xt_table));
+		table = tmp;
+	}
+
+	ret = xt_register_table(table, bootstrap, newinfo);
+	if (ret)
+		goto out;
+
+	return table;
+out:
+	if (!ve_is_super(get_exec_env())) {
+		kfree(table);
+nomem:
+		module_put(mod);
+	}
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(virt_xt_register_table);
+
 void *xt_unregister_table(struct xt_table *table)
 {
 	struct xt_table_info *private;
 
 	mutex_lock(&xt[table->af].mutex);
 	private = table->private;
-	LIST_DELETE(&xt[table->af].tables, table);
+	LIST_DELETE(&xt_tables(table->af), table);
 	mutex_unlock(&xt[table->af].mutex);
 
+	uncharge_xtables(private, private->number);
+
 	return private;
 }
 EXPORT_SYMBOL_GPL(xt_unregister_table);
 
+void *virt_xt_unregister_table(struct xt_table *table)
+{
+	void *ret;
+
+	ret = xt_unregister_table(table);
+	if (!ve_is_super(get_exec_env())) {
+		module_put(table->me);
+		kfree(table);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(virt_xt_unregister_table);
+
 #ifdef CONFIG_PROC_FS
 static char *xt_proto_prefix[NPROTO] = {
 	[AF_INET]	= "ip",
@@ -594,7 +704,7 @@ static struct list_head *type2list(u_int
 		list = &xt[af].match;
 		break;
 	case TABLE:
-		list = &xt[af].tables;
+		list = &xt_tables(af);
 		break;
 	default:
 		list = NULL;
@@ -707,6 +817,7 @@ int xt_proto_init(int af)
 		return -EINVAL;
 
 
+	INIT_LIST_HEAD(&xt_tables(af));
 #ifdef CONFIG_PROC_FS
 	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
@@ -795,6 +906,6 @@ static void __exit xt_fini(void)
 	kfree(xt);
 }
 
-module_init(xt_init);
+subsys_initcall(xt_init);
 module_exit(xt_fini);
 
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/xt_connlimit.c kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_connlimit.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/xt_connlimit.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_connlimit.c	2017-01-13 08:40:41.000000000 -0500
@@ -156,6 +156,9 @@ connlimit_mt(const struct sk_buff *skb, 
 	int connections;
 	struct iphdr *iph;
 
+	if (!ve_ip_ct_initialized())
+		goto hotdrop;
+
 	ct = ip_conntrack_get(skb, &ctinfo);
 	if (ct != NULL)
 		tuple_ptr = &ct->tuplehash[0].tuple;
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/xt_connmark.c kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_connmark.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/xt_connmark.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_connmark.c	2017-01-13 08:40:15.000000000 -0500
@@ -82,11 +82,102 @@ destroy(const struct xt_match *match, vo
 #endif
 }
 
+#ifdef CONFIG_COMPAT
+static int connmark_compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct xt_entry_match *pm;
+	struct xt_connmark_info *pinfo;
+	struct compat_xt_connmark_info rinfo;
+	u_int16_t msize;
+
+	pm = (struct xt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct compat_xt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_connmark_info *)pm->data;
+	memset(&rinfo, 0, sizeof(struct compat_xt_connmark_info));
+	/* mark & mask fit in 32bit due to check in checkentry() */
+	rinfo.mark = (compat_ulong_t)pinfo->mark;
+	rinfo.mask = (compat_ulong_t)pinfo->mask;
+	rinfo.invert = pinfo->invert;
+	if (__copy_to_user(*dstptr + sizeof(struct compat_xt_entry_match),
+				&rinfo, sizeof(struct compat_xt_connmark_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int connmark_compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_xt_entry_match *pm;
+	struct xt_entry_match *dstpm;
+	struct compat_xt_connmark_info *pinfo;
+	struct xt_connmark_info rinfo;
+	u_int16_t msize;
+
+	pm = (struct compat_xt_entry_match *)match;
+	dstpm = (struct xt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memset(*dstptr, 0, sizeof(struct xt_entry_match));
+	memcpy(*dstptr, pm, sizeof(struct compat_xt_entry_match));
+
+	pinfo = (struct compat_xt_connmark_info *)pm->data;
+	memset(&rinfo, 0, sizeof(struct xt_connmark_info));
+	rinfo.mark = pinfo->mark;
+	rinfo.mask = pinfo->mask;
+	rinfo.invert = pinfo->invert;
+
+	memcpy(*dstptr + sizeof(struct xt_entry_match), &rinfo,
+		sizeof(struct xt_connmark_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int connmark_compat(void *match, void **dstptr,
+		int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_connmark_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_connmark_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = connmark_compat_to_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = connmark_compat_from_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif /*CONFIG_COMPAT*/
+
 static struct xt_match connmark_match = {
 	.name		= "connmark",
 	.match		= match,
 	.matchsize	= sizeof(struct xt_connmark_info),
 	.checkentry	= checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= connmark_compat,
+#endif
 	.destroy	= destroy,
 	.family		= AF_INET,
 	.me		= THIS_MODULE
@@ -97,6 +188,9 @@ static struct xt_match connmark6_match =
 	.match		= match,
 	.matchsize	= sizeof(struct xt_connmark_info),
 	.checkentry	= checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= connmark_compat,
+#endif
 	.destroy	= destroy,
 	.family		= AF_INET6,
 	.me		= THIS_MODULE
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/xt_CONNMARK.c kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_CONNMARK.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/xt_CONNMARK.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_CONNMARK.c	2017-01-13 08:40:15.000000000 -0500
@@ -97,11 +97,102 @@ checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int connmark_reg_compat_to_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct xt_entry_target *pt;
+	struct xt_connmark_target_info *pinfo;
+	struct compat_xt_connmark_target_info rinfo;
+	u_int16_t tsize;
+
+	pt = (struct xt_entry_target *)target;
+	tsize = pt->u.user.target_size;
+	if (__copy_to_user(*dstptr, pt, sizeof(struct compat_xt_entry_target)))
+		return -EFAULT;
+	pinfo = (struct xt_connmark_target_info *)pt->data;
+	memset(&rinfo, 0, sizeof(struct compat_xt_connmark_target_info));
+	/* mark & mask fit in 32bit due to check in checkentry() */
+	rinfo.mark = (compat_ulong_t)pinfo->mark;
+	rinfo.mask = (compat_ulong_t)pinfo->mask;
+	rinfo.mode = pinfo->mode;
+	if (__copy_to_user(*dstptr + sizeof(struct compat_xt_entry_target),
+			&rinfo, sizeof(struct compat_xt_connmark_target_info)))
+		return -EFAULT;
+	tsize -= off;
+	if (put_user(tsize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int connmark_reg_compat_from_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct compat_xt_entry_target *pt;
+	struct xt_entry_target *dstpt;
+	struct compat_xt_connmark_target_info *pinfo;
+	struct xt_connmark_target_info rinfo;
+	u_int16_t tsize;
+
+	pt = (struct compat_xt_entry_target *)target;
+	dstpt = (struct xt_entry_target *)*dstptr;
+	tsize = pt->u.user.target_size;
+	memset(*dstptr, 0, sizeof(struct xt_entry_target));
+	memcpy(*dstptr, pt, sizeof(struct compat_xt_entry_target));
+
+	pinfo = (struct compat_xt_connmark_target_info *)pt->data;
+	memset(&rinfo, 0, sizeof(struct xt_connmark_target_info));
+	rinfo.mark = pinfo->mark;
+	rinfo.mask = pinfo->mask;
+	rinfo.mode = pinfo->mode;
+
+	memcpy(*dstptr + sizeof(struct xt_entry_target),
+				&rinfo, sizeof(struct xt_connmark_target_info));
+	tsize += off;
+	dstpt->u.user.target_size = tsize;
+	*size += off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int connmark_reg_compat(void *target, void **dstptr,
+		int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_connmark_target_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_connmark_target_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = connmark_reg_compat_to_user(target,
+					dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = connmark_reg_compat_from_user(target,
+					dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif /*CONFIG_COMPAT*/
+
 static struct xt_target connmark_reg = {
 	.name		= "CONNMARK",
 	.target		= target,
 	.targetsize	= sizeof(struct xt_connmark_target_info),
 	.checkentry	= checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= connmark_reg_compat,
+#endif
 	.family		= AF_INET,
 	.me		= THIS_MODULE
 };
@@ -111,6 +202,9 @@ static struct xt_target connmark6_reg = 
 	.target		= target,
 	.targetsize	= sizeof(struct xt_connmark_target_info),
 	.checkentry	= checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= connmark_reg_compat,
+#endif
 	.family		= AF_INET6,
 	.me		= THIS_MODULE
 };
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/xt_conntrack.c kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_conntrack.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/xt_conntrack.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_conntrack.c	2017-01-13 08:40:23.000000000 -0500
@@ -229,12 +229,109 @@ destroy(const struct xt_match *match, vo
 #endif
 }
 
+#ifdef CONFIG_COMPAT
+static int conntrack_match_compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct xt_entry_match *pm;
+	struct xt_conntrack_info *pinfo;
+	struct compat_xt_conntrack_info rinfo;
+	u_int16_t msize;
+
+	pm = (struct xt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct compat_xt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_conntrack_info *)pm->data;
+	memset(&rinfo, 0, sizeof(struct compat_xt_conntrack_info));
+	/* expires_{min,max} fit in 32bit cause they are read only args */
+	memcpy(&rinfo, pinfo,
+		offsetof(struct compat_xt_conntrack_info, expires_min));
+	rinfo.expires_min = (compat_ulong_t)pinfo->expires_min;
+	rinfo.expires_max = (compat_ulong_t)pinfo->expires_max;
+	rinfo.flags = pinfo->flags;
+	rinfo.invflags = pinfo->invflags;
+	if (__copy_to_user(*dstptr + sizeof(struct compat_xt_entry_match),
+			&rinfo, sizeof(struct compat_xt_conntrack_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int conntrack_match_compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_xt_entry_match *pm;
+	struct xt_entry_match *dstpm;
+	struct compat_xt_conntrack_info *pinfo;
+	struct xt_conntrack_info rinfo;
+	u_int16_t msize;
+
+	pm = (struct compat_xt_entry_match *)match;
+	dstpm = (struct xt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memset(*dstptr, 0, sizeof(struct xt_entry_match));
+	memcpy(*dstptr, pm, sizeof(struct compat_xt_entry_match));
+
+	pinfo = (struct compat_xt_conntrack_info *)pm->data;
+	memset(&rinfo, 0, sizeof(struct xt_conntrack_info));
+	memcpy(&rinfo, pinfo,
+		offsetof(struct compat_xt_conntrack_info, expires_min));
+	rinfo.expires_min = pinfo->expires_min;
+	rinfo.expires_max = pinfo->expires_max;
+	rinfo.flags = pinfo->flags;
+	rinfo.invflags = pinfo->invflags;
+
+	memcpy(*dstptr + sizeof(struct xt_entry_match),
+				&rinfo, sizeof(struct xt_conntrack_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int conntrack_match_compat(void *match, void **dstptr,
+		int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_conntrack_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_conntrack_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = conntrack_match_compat_to_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = conntrack_match_compat_from_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif /*CONFIG_COMPAT*/
+
 static struct xt_match conntrack_match = {
 	.name		= "conntrack",
 	.match		= match,
 	.checkentry	= checkentry,
 	.destroy	= destroy,
 	.matchsize	= sizeof(struct xt_conntrack_info),
+#ifdef CONFIG_COMPAT
+	.compat		= conntrack_match_compat,
+#endif
 	.family		= AF_INET,
 	.me		= THIS_MODULE,
 };
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/xt_limit.c kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_limit.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/xt_limit.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_limit.c	2017-01-13 08:40:23.000000000 -0500
@@ -118,7 +118,7 @@ ipt_limit_checkentry(const char *tablena
 	/* Check for overflow. */
 	if (r->burst == 0
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
-		printk("Overflow in xt_limit, try lower: %u/%u\n",
+		ve_printk(VE_LOG, "Overflow in xt_limit, try lower: %u/%u\n",
 		       r->avg, r->burst);
 		return 0;
 	}
@@ -136,11 +136,96 @@ ipt_limit_checkentry(const char *tablena
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int ipt_limit_compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct xt_entry_match *pm;
+	struct xt_rateinfo *pinfo;
+	struct compat_xt_rateinfo rinfo;
+	u_int16_t msize;
+
+	pm = (struct xt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct xt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_rateinfo *)pm->data;
+	memset(&rinfo, 0, sizeof(struct compat_xt_rateinfo));
+	rinfo.avg = pinfo->avg;
+	rinfo.burst = pinfo->burst;
+	if (__copy_to_user(*dstptr + sizeof(struct xt_entry_match),
+				&rinfo, sizeof(struct compat_xt_rateinfo)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int ipt_limit_compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_xt_entry_match *pm;
+	struct xt_entry_match *dstpm;
+	struct compat_xt_rateinfo *pinfo;
+	struct xt_rateinfo rinfo;
+	u_int16_t msize;
+
+	pm = (struct compat_xt_entry_match *)match;
+	dstpm = (struct xt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_xt_entry_match));
+	pinfo = (struct compat_xt_rateinfo *)pm->data;
+	memset(&rinfo, 0, sizeof(struct xt_rateinfo));
+	rinfo.avg = pinfo->avg;
+	rinfo.burst = pinfo->burst;
+	memcpy(*dstptr + sizeof(struct compat_xt_entry_match),
+				&rinfo, sizeof(struct xt_rateinfo));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int ipt_limit_compat(void *match, void **dstptr,
+		int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_rateinfo)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_rateinfo));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = ipt_limit_compat_to_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = ipt_limit_compat_from_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct xt_match ipt_limit_reg = {
 	.name		= "limit",
 	.match		= ipt_limit_match,
 	.matchsize	= sizeof(struct xt_rateinfo),
 	.checkentry	= ipt_limit_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_limit_compat,
+#endif
 	.family		= AF_INET,
 	.me		= THIS_MODULE,
 };
@@ -149,6 +234,9 @@ static struct xt_match limit6_reg = {
 	.match		= ipt_limit_match,
 	.matchsize	= sizeof(struct xt_rateinfo),
 	.checkentry	= ipt_limit_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_limit_compat,
+#endif
 	.family		= AF_INET6,
 	.me		= THIS_MODULE,
 };
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/xt_mark.c kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_mark.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/xt_mark.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_mark.c	2017-01-13 08:40:15.000000000 -0500
@@ -51,11 +51,102 @@ checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int mark_match_compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct xt_entry_match *pm;
+	struct xt_mark_info *pinfo;
+	struct compat_xt_mark_info rinfo;
+	u_int16_t msize;
+
+	pm = (struct xt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct compat_xt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_mark_info *)pm->data;
+	memset(&rinfo, 0, sizeof(struct compat_xt_mark_info));
+	/* mark & mask fit in 32bit due to check in checkentry() */
+	rinfo.mark = (compat_ulong_t)pinfo->mark;
+	rinfo.mask = (compat_ulong_t)pinfo->mask;
+	rinfo.invert = pinfo->invert;
+	if (__copy_to_user(*dstptr + sizeof(struct compat_xt_entry_match),
+			&rinfo, sizeof(struct compat_xt_mark_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int mark_match_compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_xt_entry_match *pm;
+	struct xt_entry_match *dstpm;
+	struct compat_xt_mark_info *pinfo;
+	struct xt_mark_info rinfo;
+	u_int16_t msize;
+
+	pm = (struct compat_xt_entry_match *)match;
+	dstpm = (struct xt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memset(*dstptr, 0, sizeof(struct xt_entry_match));
+	memcpy(*dstptr, pm, sizeof(struct compat_xt_entry_match));
+
+	pinfo = (struct compat_xt_mark_info *)pm->data;
+	memset(&rinfo, 0, sizeof(struct xt_mark_info));
+	rinfo.mark = pinfo->mark;
+	rinfo.mask = pinfo->mask;
+	rinfo.invert = pinfo->invert;
+
+	memcpy(*dstptr + sizeof(struct xt_entry_match),
+				&rinfo, sizeof(struct xt_mark_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int mark_match_compat(void *match, void **dstptr,
+		int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_mark_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_mark_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = mark_match_compat_to_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = mark_match_compat_from_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif /*CONFIG_COMPAT*/
+
 static struct xt_match mark_match = {
 	.name		= "mark",
 	.match		= match,
 	.matchsize	= sizeof(struct xt_mark_info),
 	.checkentry	= checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= mark_match_compat,
+#endif
 	.family		= AF_INET,
 	.me		= THIS_MODULE,
 };
@@ -65,6 +156,9 @@ static struct xt_match mark6_match = {
 	.match		= match,
 	.matchsize	= sizeof(struct xt_mark_info),
 	.checkentry	= checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= mark_match_compat,
+#endif
 	.family		= AF_INET6,
 	.me		= THIS_MODULE,
 };
diff -upr kernel-2.6.18-417.el5.orig/net/netfilter/xt_MARK.c kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_MARK.c
--- kernel-2.6.18-417.el5.orig/net/netfilter/xt_MARK.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netfilter/xt_MARK.c	2017-01-13 08:40:23.000000000 -0500
@@ -82,7 +82,8 @@ checkentry_v0(const char *tablename,
 	struct xt_mark_target_info *markinfo = targinfo;
 
 	if (markinfo->mark > 0xffffffff) {
-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+		ve_printk(VE_LOG, KERN_WARNING "MARK: Only supports 32bit wide"
+								" mark\n");
 		return 0;
 	}
 	return 1;
@@ -101,17 +102,104 @@ checkentry_v1(const char *tablename,
 	if (markinfo->mode != XT_MARK_SET
 	    && markinfo->mode != XT_MARK_AND
 	    && markinfo->mode != XT_MARK_OR) {
-		printk(KERN_WARNING "MARK: unknown mode %u\n",
+		ve_printk(VE_LOG, KERN_WARNING "MARK: unknown mode %u\n",
 		       markinfo->mode);
 		return 0;
 	}
 	if (markinfo->mark > 0xffffffff) {
-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+		ve_printk(VE_LOG, KERN_WARNING "MARK: Only supports 32bit wide"
+								" mark\n");
 		return 0;
 	}
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int mark_reg_v1_compat_to_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct xt_entry_target *pt;
+	struct xt_mark_target_info_v1 *pinfo;
+	struct compat_xt_mark_target_info_v1 rinfo;
+	u_int16_t tsize;
+
+	pt = (struct xt_entry_target *)target;
+	tsize = pt->u.user.target_size;
+	if (__copy_to_user(*dstptr, pt, sizeof(struct compat_xt_entry_target)))
+		return -EFAULT;
+	pinfo = (struct xt_mark_target_info_v1 *)pt->data;
+	memset(&rinfo, 0, sizeof(struct compat_xt_mark_target_info_v1));
+	/* mark fit in 32bit due to check in checkentry() */
+	rinfo.mark = (compat_ulong_t)pinfo->mark;
+	rinfo.mode = pinfo->mode;
+	if (__copy_to_user(*dstptr + sizeof(struct compat_xt_entry_target),
+			&rinfo, sizeof(struct compat_xt_mark_target_info_v1)))
+		return -EFAULT;
+	tsize -= off;
+	if (put_user(tsize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int mark_reg_v1_compat_from_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct compat_xt_entry_target *pt;
+	struct xt_entry_target *dstpt;
+	struct compat_xt_mark_target_info_v1 *pinfo;
+	struct xt_mark_target_info_v1 rinfo;
+	u_int16_t tsize;
+
+	pt = (struct compat_xt_entry_target *)target;
+	dstpt = (struct xt_entry_target *)*dstptr;
+	tsize = pt->u.user.target_size;
+	memset(*dstptr, 0, sizeof(struct xt_entry_target));
+	memcpy(*dstptr, pt, sizeof(struct compat_xt_entry_target));
+
+	pinfo = (struct compat_xt_mark_target_info_v1 *)pt->data;
+	memset(&rinfo, 0, sizeof(struct xt_mark_target_info_v1));
+	rinfo.mark = pinfo->mark;
+	rinfo.mode = pinfo->mode;
+
+	memcpy(*dstptr + sizeof(struct xt_entry_target),
+				&rinfo, sizeof(struct xt_mark_target_info_v1));
+	tsize += off;
+	dstpt->u.user.target_size = tsize;
+	*size += off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int mark_reg_v1_compat(void *target, void **dstptr,
+		int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_mark_target_info_v1)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_mark_target_info_v1));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = mark_reg_v1_compat_to_user(target,
+					dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = mark_reg_v1_compat_from_user(target,
+					dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif /*CONFIG_COMPAT*/
+
 static struct xt_target ipt_mark_reg_v0 = {
 	.name		= "MARK",
 	.target		= target_v0,
@@ -129,6 +217,9 @@ static struct xt_target ipt_mark_reg_v1 
 	.targetsize	= sizeof(struct xt_mark_target_info_v1),
 	.table		= "mangle",
 	.checkentry	= checkentry_v1,
+#ifdef CONFIG_COMPAT
+	.compat		= mark_reg_v1_compat,
+#endif
 	.me		= THIS_MODULE,
 	.family		= AF_INET,
 	.revision	= 1,
diff -upr kernel-2.6.18-417.el5.orig/net/netlink/af_netlink.c kernel-2.6.18-417.el5-028stab121/net/netlink/af_netlink.c
--- kernel-2.6.18-417.el5.orig/net/netlink/af_netlink.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netlink/af_netlink.c	2017-01-13 08:40:40.000000000 -0500
@@ -60,36 +60,17 @@
 #include <net/sock.h>
 #include <net/scm.h>
 #include <net/netlink.h>
+#include <net/netlink_sock.h>
 
-#define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
+#include <ub/beancounter.h>
+#include <ub/ub_net.h>
+#include <ub/ub_sk.h>
 
-struct netlink_sock {
-	/* struct sock has to be the first member of netlink_sock */
-	struct sock		sk;
-	u32			pid;
-	u32			dst_pid;
-	u32			dst_group;
-	u32			flags;
-	u32			subscriptions;
-	u32			ngroups;
-	unsigned long		*groups;
-	unsigned long		state;
-	wait_queue_head_t	wait;
-	struct netlink_callback	*cb;
-	spinlock_t		cb_lock;
-	void			(*data_ready)(struct sock *sk, int bytes);
-	struct module		*module;
-	kernel_cap_t		f_eff_cap;
-};
+#define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
 
 #define NETLINK_KERNEL_SOCKET	0x1
 #define NETLINK_RECV_PKTINFO	0x2
 
-static inline struct netlink_sock *nlk_sk(struct sock *sk)
-{
-	return (struct netlink_sock *)sk;
-}
-
 struct nl_pid_hash {
 	struct hlist_head *table;
 	unsigned long rehash_time;
@@ -219,7 +200,10 @@ static __inline__ struct sock *netlink_l
 	read_lock(&nl_table_lock);
 	head = nl_pid_hashfn(hash, pid);
 	sk_for_each(sk, node, head) {
-		if (nlk_sk(sk)->pid == pid) {
+		/* VEs should find sockets, created by kernel */
+		if ((nlk_sk(sk)->pid == pid) &&
+				(!pid || ve_accessible_strict(sk->owner_env,
+							      get_exec_env()))){
 			sock_hold(sk);
 			goto found;
 		}
@@ -337,7 +321,9 @@ static int netlink_insert(struct sock *s
 	head = nl_pid_hashfn(hash, pid);
 	len = 0;
 	sk_for_each(osk, node, head) {
-		if (nlk_sk(osk)->pid == pid)
+		if ((nlk_sk(sk)->pid == pid) &&
+				ve_accessible_strict(sk->owner_env,
+					get_exec_env()))
 			break;
 		len++;
 	}
@@ -390,6 +376,8 @@ static int __netlink_create(struct socke
 	sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
 	if (!sk)
 		return -ENOMEM;
+	if (ub_other_sock_charge(sk))
+		goto out_free;
 
 	sock_init_data(sock, sk);
 
@@ -400,6 +388,10 @@ static int __netlink_create(struct socke
 	sk->sk_destruct = netlink_sock_destruct;
 	sk->sk_protocol = protocol;
 	return 0;
+
+out_free:
+	sk_free(sk);
+	return -ENOMEM;
 }
 
 static int netlink_create(struct socket *sock, int protocol)
@@ -459,6 +451,7 @@ static int netlink_release(struct socket
 		return 0;
 
 	netlink_remove(sk);
+	sock_orphan(sk);
 	nlk = nlk_sk(sk);
 
 	spin_lock(&nlk->cb_lock);
@@ -473,7 +466,6 @@ static int netlink_release(struct socket
 	/* OK. Socket is unlinked, and, therefore,
 	   no new packets will arrive */
 
-	sock_orphan(sk);
 	sock->sk = NULL;
 	wake_up_interruptible_all(&nlk->wait);
 
@@ -514,7 +506,7 @@ static int netlink_autobind(struct socke
 	struct hlist_head *head;
 	struct sock *osk;
 	struct hlist_node *node;
-	s32 pid = current->tgid;
+	s32 pid = virt_pid(current);
 	int err;
 	static s32 rover = -4097;
 
@@ -523,7 +515,9 @@ retry:
 	netlink_table_grab();
 	head = nl_pid_hashfn(hash, pid);
 	sk_for_each(osk, node, head) {
-		if (nlk_sk(osk)->pid == pid) {
+		if ((nlk_sk(osk)->pid == pid) &&
+				ve_accessible_strict(osk->owner_env,
+					get_exec_env())) {
 			/* Bind collision, search negative pid values. */
 			pid = rover--;
 			if (rover > -4097)
@@ -548,7 +542,7 @@ retry:
 static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
 { 
 	return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
-	       capable(CAP_NET_ADMIN);
+	       capable(CAP_VE_NET_ADMIN);
 } 
 
 static void
@@ -820,6 +814,21 @@ static inline struct sk_buff *netlink_tr
 	return skb;
 }
 
+static int netlink_rcv_sync(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	if (nlk->netlink_rcv == NULL)
+		return 0;
+
+	ret = skb->len;
+	skb_set_owner_r(skb, sk);
+	nlk->netlink_rcv(skb);
+	sock_put(sk);
+	return ret;
+}
+
 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock)
 {
 	struct sock *sk;
@@ -835,6 +844,13 @@ retry:
 		kfree_skb(skb);
 		return PTR_ERR(sk);
 	}
+
+	if (nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET) {
+		err = netlink_rcv_sync(sk, skb);
+		if (err != 0)
+			return err;
+	}
+
 	err = netlink_attachskb(sk, skb, nonblock, timeo, ssk);
 	if (err == 1)
 		goto retry;
@@ -893,6 +909,9 @@ static inline int do_one_broadcast(struc
 	    !test_bit(p->group - 1, nlk->groups))
 		goto out;
 
+	if (!ve_accessible_strict(get_exec_env(), sk->owner_env))
+		goto out;
+
 	if (p->failure) {
 		netlink_overrun(sk);
 		goto out;
@@ -990,6 +1009,9 @@ static inline int do_one_set_err(struct 
 	    !test_bit(p->group - 1, nlk->groups))
 		goto out;
 
+	if (!ve_accessible_strict(get_exec_env(), sk->owner_env))
+		goto out;
+
 	sk->sk_err = p->code;
 	sk->sk_error_report(sk);
 out:
@@ -1125,13 +1147,18 @@ static int netlink_sendmsg(struct kiocb 
 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
-	struct sockaddr_nl *addr=msg->msg_name;
+	struct sockaddr_nl *addr = msg->msg_name;
 	u32 dst_pid;
-	u32 dst_group;
 	struct sk_buff *skb;
 	int err;
 	struct scm_cookie scm;
 	u32 netlink_skb_flags = 0;
+	struct sock *dstsk;
+	long timeo;
+	int no_ubc, no_buf;
+	unsigned long chargesize;
+
+	DECLARE_WAITQUEUE(wait, current);
 
 	if (msg->msg_flags&MSG_OOB)
 		return -EOPNOTSUPP;
@@ -1142,19 +1169,18 @@ static int netlink_sendmsg(struct kiocb 
 	if (err < 0)
 		return err;
 
+	/* Broadcasts from user to kernel are disabled. This is OK
+	 * according to ANK */
 	if (msg->msg_namelen) {
 		if (addr->nl_family != AF_NETLINK)
 			return -EINVAL;
 		dst_pid = addr->nl_pid;
-		dst_group = ffs(addr->nl_groups);
-		if ((dst_group || dst_pid) &&
+		if ((addr->nl_groups || dst_pid) &&
 		    !netlink_allowed(sock, NL_NONROOT_SEND))
 			return -EPERM;
 		netlink_skb_flags |= NETLINK_SKB_DST;
-	} else {
+	} else
 		dst_pid = nlk->dst_pid;
-		dst_group = nlk->dst_group;
-	}
 
 	if (!nlk->pid) {
 		err = netlink_autobind(sock);
@@ -1167,12 +1193,12 @@ static int netlink_sendmsg(struct kiocb 
 		goto out;
 	err = -ENOBUFS;
 	skb = alloc_skb(len, GFP_KERNEL);
-	if (skb==NULL)
+	if (skb == NULL)
 		goto out;
 
 	NETLINK_CB(skb).pid	= nlk->pid;
 	NETLINK_CB(skb).dst_pid = dst_pid;
-	NETLINK_CB(skb).dst_group = dst_group;
+	NETLINK_CB(skb).dst_group = 0;
 	NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
 	selinux_get_task_sid(current, &(NETLINK_CB(skb).sid));
 	memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
@@ -1186,25 +1212,93 @@ static int netlink_sendmsg(struct kiocb 
 	 */
 
 	err = -EFAULT;
-	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
-		kfree_skb(skb);
-		goto out;
-	}
+	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
+		goto out_free;
 
 	err = security_netlink_send(sk, skb);
-	if (err) {
-		kfree_skb(skb);
-		goto out;
+	if (err)
+		goto out_free;
+
+	timeo = sock_sndtimeo(sk, msg->msg_flags&MSG_DONTWAIT);
+retry:
+	dstsk = netlink_getsockbypid(sk, dst_pid);
+	if (IS_ERR(dstsk)) {
+		err = PTR_ERR(dstsk);
+		goto out_free;
+	}
+	if (nlk_sk(dstsk)->flags & NETLINK_KERNEL_SOCKET) {
+		err = netlink_rcv_sync(dstsk, skb);
+		if (err != 0)
+			return err;
 	}
 
-	if (dst_group) {
-		atomic_inc(&skb->users);
-		netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
+	nlk = nlk_sk(dstsk);
+#ifdef NL_EMULATE_DEV
+	if (nlk->handler) {
+		skb_orphan(skb);
+		err = nlk->handler(protocol, skb);
+		goto out_put;
 	}
-	err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
+#endif
+
+	/* BTW, it could be done once, before the retry loop */
+	chargesize = skb_charge_fullsize(skb);
+	no_ubc = ub_sock_getwres_other(sk, chargesize);
+	no_buf = atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
+		test_bit(0, &nlk->state);
+	if (no_ubc || no_buf) {
+		wait_queue_head_t *sleep;
+
+		if (!no_ubc)
+			ub_sock_retwres_other(sk, chargesize,
+					      SOCK_MIN_UBCSPACE_CH);
+		err = -EAGAIN;
+		if (timeo == 0) {
+			kfree_skb(skb);
+			goto out_put;
+		}
+
+		/* wake up comes to different queues */
+		sleep = no_ubc ? sk->sk_sleep : &nlk->wait;
+		__set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(sleep, &wait);
 
+		/* this if can't be moved upper because ub_sock_snd_queue_add()
+		 * may change task state to TASK_RUNNING */
+		if (no_ubc)
+			ub_sock_sndqueueadd_other(sk, chargesize);
+
+		if ((atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
+		     test_bit(0, &nlk->state) || no_ubc) &&
+		    !sock_flag(dstsk, SOCK_DEAD))
+			timeo = schedule_timeout(timeo);
+
+		__set_current_state(TASK_RUNNING);
+		remove_wait_queue(sleep, &wait);
+		if (no_ubc)
+			ub_sock_sndqueuedel(sk);
+		sock_put(dstsk);
+
+		if (!signal_pending(current))
+			goto retry;
+		err = sock_intr_errno(timeo);
+		goto out_free;
+	}
+
+	skb_orphan(skb);
+	skb_set_owner_r(skb, dstsk);
+	ub_skb_set_charge(skb, sk, chargesize, UB_OTHERSOCKBUF);
+	skb_queue_tail(&dstsk->sk_receive_queue, skb);
+	dstsk->sk_data_ready(dstsk, len);
+	err = len;
+out_put:
+	sock_put(dstsk);
 out:
 	return err;
+
+out_free:
+	kfree_skb(skb);
+	return err;
 }
 
 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
@@ -1322,6 +1416,7 @@ netlink_kernel_create(int unit, unsigned
 
 	nlk = nlk_sk(sk);
 	nlk->flags |= NETLINK_KERNEL_SOCKET;
+	nlk->netlink_rcv = NULL;
 
 	netlink_table_grab();
 	nl_table[unit].groups = groups;
@@ -1367,6 +1462,10 @@ static int netlink_dump(struct sock *sk)
 	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
+	if (ub_nlrcvbuf_charge(skb, sk) < 0) {
+		kfree_skb(skb);
+		return -EACCES;
+	}
 
 	spin_lock(&nlk->cb_lock);
 
@@ -1428,9 +1527,9 @@ int netlink_dump_start(struct sock *ssk,
 		return -ECONNREFUSED;
 	}
 	nlk = nlk_sk(sk);
-	/* A dump is in progress... */
+	/* A dump or destruction is in progress... */
 	spin_lock(&nlk->cb_lock);
-	if (nlk->cb) {
+	if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
 		spin_unlock(&nlk->cb_lock);
 		netlink_destroy_callback(cb);
 		sock_put(sk);
@@ -1534,8 +1633,18 @@ void netlink_run_queue(struct sock *sk, 
 		*qlen = skb_queue_len(&sk->sk_receive_queue);
 
 	for (; *qlen; (*qlen)--) {
+		int ret;
+		struct ve_struct *old_env;
+		struct user_beancounter *old_ub;
 		skb = skb_dequeue(&sk->sk_receive_queue);
-		if (netlink_rcv_skb(skb, cb)) {
+
+		old_env = set_exec_env(skb->owner_env);
+		old_ub = set_exec_ub(skb_bc(skb)->ub);
+		ret = netlink_rcv_skb(skb, cb);
+		(void)set_exec_ub(old_ub);
+		(void)set_exec_env(old_env);
+
+		if (ret) {
 			if (skb->len)
 				skb_queue_head(&sk->sk_receive_queue, skb);
 			else {
@@ -1819,6 +1928,7 @@ static int __init netlink_proto_init(voi
 
 	sock_register(&netlink_family_ops);
 #ifdef CONFIG_PROC_FS
+	/* FIXME: virtualize before give access from VEs */
 	proc_net_fops_create("netlink", 0, &netlink_seq_fops);
 #endif
 	/* The netlink device handler may be needed early. */ 
diff -upr kernel-2.6.18-417.el5.orig/net/netlink/attr.c kernel-2.6.18-417.el5-028stab121/net/netlink/attr.c
--- kernel-2.6.18-417.el5.orig/net/netlink/attr.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netlink/attr.c	2017-01-13 08:40:21.000000000 -0500
@@ -152,7 +152,7 @@ int nla_parse(struct nlattr *tb[], int m
 	}
 
 	if (unlikely(rem > 0))
-		printk(KERN_WARNING "netlink: %d bytes leftover after parsing "
+		ve_printk(VE_LOG, KERN_WARNING "netlink: %d bytes leftover after parsing "
 		       "attributes.\n", rem);
 
 	err = 0;
diff -upr kernel-2.6.18-417.el5.orig/net/netlink/genetlink.c kernel-2.6.18-417.el5-028stab121/net/netlink/genetlink.c
--- kernel-2.6.18-417.el5.orig/net/netlink/genetlink.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/netlink/genetlink.c	2017-01-13 08:40:19.000000000 -0500
@@ -311,7 +311,8 @@ static int genl_rcv_msg(struct sk_buff *
 		goto errout;
 	}
 
-	if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb, CAP_NET_ADMIN)) {
+	if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb,
+				CAP_VE_NET_ADMIN)) {
 		err = -EPERM;
 		goto errout;
 	}
diff -upr kernel-2.6.18-417.el5.orig/net/packet/af_packet.c kernel-2.6.18-417.el5-028stab121/net/packet/af_packet.c
--- kernel-2.6.18-417.el5.orig/net/packet/af_packet.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/packet/af_packet.c	2017-01-13 08:40:34.000000000 -0500
@@ -79,6 +79,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
+#include <ub/ub_net.h>
+
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
 #endif
@@ -213,6 +215,9 @@ struct packet_sock {
 	unsigned int            pg_vec_order;
 	unsigned int		pg_vec_pages;
 	unsigned int		pg_vec_len;
+	enum tpacket_versions	tp_version;
+	unsigned int		tp_hdrlen;
+	unsigned int		tp_reserve;
 #endif
 };
 
@@ -230,17 +235,54 @@ extern int skb_checksum_setup(struct sk_
 
 #ifdef CONFIG_PACKET_MMAP
 
-static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
+static void *packet_lookup_frame(struct packet_sock *po, unsigned int position,
+				 int status)
 {
 	unsigned int pg_vec_pos, frame_offset;
-	char *frame;
+	union {
+		struct tpacket_hdr *h1;
+		struct tpacket2_hdr *h2;
+		void *raw;
+	} h;
 
 	pg_vec_pos = position / po->frames_per_block;
 	frame_offset = position % po->frames_per_block;
 
-	frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
-	
-	return frame;
+	h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
+	switch (po->tp_version) {
+	case TPACKET_V1:
+		if (status != h.h1->tp_status ? TP_STATUS_USER :
+						TP_STATUS_KERNEL)
+			return NULL;
+		break;
+	case TPACKET_V2:
+	case TPACKET_V1_COMPAT:
+		if (status != h.h2->tp_status ? TP_STATUS_USER :
+						TP_STATUS_KERNEL)
+			return NULL;
+		break;
+	}
+	return h.raw;
+}
+
+static void __packet_set_status(struct packet_sock *po, void *frame, int status)
+{
+	union {
+		struct tpacket_hdr *h1;
+		struct tpacket2_hdr *h2;
+		void *raw;
+	} h;
+
+	h.raw = frame;
+	switch (po->tp_version) {
+	case TPACKET_V1:
+		h.h1->tp_status = status;
+		break;
+	case TPACKET_V2:
+	case TPACKET_V1_COMPAT:
+		h.h2->tp_status = status;
+		break;
+	}
 }
 #endif
 
@@ -294,7 +336,8 @@ static int packet_rcv_spkt(struct sk_buf
 	 *	so that this procedure is noop.
 	 */
 
-	if (skb->pkt_type == PACKET_LOOPBACK)
+	if (skb->pkt_type == PACKET_LOOPBACK ||
+			!ve_accessible(skb->owner_env, sk->owner_env))
 		goto out;
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
@@ -486,6 +529,11 @@ static int packet_rcv(struct sk_buff *sk
 	sk = pt->af_packet_priv;
 	po = pkt_sk(sk);
 
+	if (!ve_accessible(skb->owner_env, sk->owner_env))
+		goto drop;
+
+	skb_orphan(skb);
+
 	skb->dev = dev;
 
 	if (dev->hard_header) {
@@ -556,6 +604,9 @@ static int packet_rcv(struct sk_buff *sk
 	if (pskb_trim(skb, snaplen))
 		goto drop_n_acct;
 
+	if (ub_sockrcvbuf_charge(sk, skb))
+		goto drop_n_acct;
+
 	skb_set_owner_r(skb, sk);
 	skb->dev = NULL;
 	dst_release(skb->dst);
@@ -592,12 +643,16 @@ static int tpacket_rcv(struct sk_buff *s
 	struct sock *sk;
 	struct packet_sock *po;
 	struct sockaddr_ll *sll;
-	struct tpacket_hdr *h;
+	union {
+		struct tpacket_hdr *h1;
+		struct tpacket2_hdr *h2;
+		void *raw;
+	} h;
 	u8 * skb_head = skb->data;
 	int skb_len = skb->len;
 	unsigned snaplen;
 	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
-	unsigned short macoff, netoff;
+	unsigned short macoff, netoff, hdrlen;
 	struct sk_buff *copy_skb = NULL;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
@@ -606,6 +661,11 @@ static int tpacket_rcv(struct sk_buff *s
 	sk = pt->af_packet_priv;
 	po = pkt_sk(sk);
 
+	if (!ve_accessible(skb->owner_env, sk->owner_env))
+		goto drop;
+
+	skb_orphan(skb);
+
 	if (dev->hard_header) {
 		if (sk->sk_type != SOCK_DGRAM)
 			skb_push(skb, skb->data - skb->mac.raw);
@@ -631,10 +691,13 @@ static int tpacket_rcv(struct sk_buff *s
 		status |= TP_STATUS_CSUMNOTREADY;
 
 	if (sk->sk_type == SOCK_DGRAM) {
-		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
+		macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
+				  po->tp_reserve;
 	} else {
 		unsigned maclen = skb->nh.raw - skb->data;
-		netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
+		netoff = TPACKET_ALIGN(po->tp_hdrlen +
+				       (maclen < 16 ? 16 : maclen)) +
+			po->tp_reserve;
 		macoff = netoff - maclen;
 	}
 
@@ -656,10 +719,15 @@ static int tpacket_rcv(struct sk_buff *s
 			snaplen = 0;
 	}
 
+	if (copy_skb &&
+	    ub_sockrcvbuf_charge(sk, copy_skb)) {
+		spin_lock(&sk->sk_receive_queue.lock);
+		goto ring_is_full;
+	}
+
 	spin_lock(&sk->sk_receive_queue.lock);
-	h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
-	
-	if (h->tp_status)
+	h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL);
+	if (!h.raw)
 		goto ring_is_full;
 	po->head = po->head != po->frame_max ? po->head+1 : 0;
 	po->stats.tp_packets++;
@@ -671,20 +739,46 @@ static int tpacket_rcv(struct sk_buff *s
 		status &= ~TP_STATUS_LOSING;
 	spin_unlock(&sk->sk_receive_queue.lock);
 
-	skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
+	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
 
-	h->tp_len = skb->len;
-	h->tp_snaplen = snaplen;
-	h->tp_mac = macoff;
-	h->tp_net = netoff;
-	if (skb->tstamp.off_sec == 0) { 
+	if (skb->tstamp.off_sec == 0) {
 		__net_timestamp(skb);
 		sock_enable_timestamp(sk);
 	}
-	h->tp_sec = skb->tstamp.off_sec;
-	h->tp_usec = skb->tstamp.off_usec;
 
-	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
+	switch (po->tp_version) {
+	case TPACKET_V1:
+		h.h1->tp_len = skb->len;
+		h.h1->tp_snaplen = snaplen;
+		h.h1->tp_mac = macoff;
+		h.h1->tp_net = netoff;
+		h.h1->tp_sec = skb->tstamp.off_sec;
+		h.h1->tp_usec = skb->tstamp.off_usec;
+		hdrlen = sizeof(*h.h1);
+		break;
+	case TPACKET_V2:
+		h.h2->tp_len = skb->len;
+		h.h2->tp_snaplen = snaplen;
+		h.h2->tp_mac = macoff;
+		h.h2->tp_net = netoff;
+		h.h2->tp_sec = skb->tstamp.off_sec;
+		h.h2->tp_nsec = skb->tstamp.off_usec * NSEC_PER_USEC;
+		hdrlen = sizeof(*h.h2);
+		break;
+	case TPACKET_V1_COMPAT:
+		h.h2->tp_len = skb->len;
+		h.h2->tp_snaplen = snaplen;
+		h.h2->tp_mac = macoff;
+		h.h2->tp_net = netoff;
+		h.h2->tp_sec = skb->tstamp.off_sec;
+		h.h2->tp_nsec = skb->tstamp.off_usec;
+		hdrlen = sizeof(*h.h2);
+		break;
+	default:
+		BUG();
+	}
+
+	sll = h.raw + TPACKET_ALIGN(hdrlen);
 	sll->sll_halen = 0;
 	if (dev->hard_header_parse)
 		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
@@ -697,14 +791,14 @@ static int tpacket_rcv(struct sk_buff *s
 	else
 		sll->sll_ifindex = dev->ifindex;
 
-	h->tp_status = status;
+	__packet_set_status(po, h.raw, status);
 	mb();
 
 	{
 		struct page *p_start, *p_end;
-		u8 *h_end = (u8 *)h + macoff + snaplen - 1;
+		u8 *h_end = h.raw + macoff + snaplen - 1;
 
-		p_start = virt_to_page(h);
+		p_start = virt_to_page(h.raw);
 		p_end = virt_to_page(h_end);
 		while (p_start <= p_end) {
 			flush_dcache_page(p_start);
@@ -1033,6 +1127,8 @@ static int packet_create(struct socket *
 	sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
 	if (sk == NULL)
 		goto out;
+	if (ub_other_sock_charge(sk))
+		goto out_free;
 
 	sock->ops = &packet_ops;
 #ifdef CONFIG_SOCK_PACKET
@@ -1071,6 +1167,9 @@ static int packet_create(struct socket *
 	sk_add_node(sk, &packet_sklist);
 	write_unlock_bh(&packet_sklist_lock);
 	return(0);
+
+out_free:
+	sk_free(sk);
 out:
 	return err;
 }
@@ -1416,6 +1515,38 @@ packet_setsockopt(struct socket *sock, i
 		pkt_sk(sk)->copy_thresh = val;
 		return 0;
 	}
+	case PACKET_VERSION:
+	{
+		int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (po->pg_vec)
+			return -EBUSY;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+		switch (val) {
+		case TPACKET_V1:
+		case TPACKET_V2:
+			po->tp_version = val;
+			return 0;
+		default:
+			return -EINVAL;
+		}
+	}
+	case PACKET_RESERVE:
+	{
+		unsigned int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (po->pg_vec)
+			return -EBUSY;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+		po->tp_reserve = val;
+		return 0;
+	}
 #endif
 	case PACKET_AUXDATA:
 	{
@@ -1491,6 +1622,47 @@ static int packet_getsockopt(struct sock
 
 		data = &val;
 		break;
+#ifdef CONFIG_PACKET_MMAP
+	case PACKET_VERSION:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		switch (po->tp_version) {
+			case TPACKET_V1_COMPAT:
+				val = TPACKET_V1;
+			default:
+				val = po->tp_version;
+		}
+		data = &val;
+		break;
+	case PACKET_HDRLEN:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		if (copy_from_user(&val, optval, len))
+			return -EFAULT;
+#ifdef CONFIG_IA32_EMULATION
+		if (val == TPACKET_V1 && test_thread_flag(TIF_IA32))
+			val = TPACKET_V1_COMPAT;
+#endif
+		switch (val) {
+		case TPACKET_V1:
+			val = sizeof(struct tpacket_hdr);
+			break;
+		case TPACKET_V2:
+		case TPACKET_V1_COMPAT:
+			val = sizeof(struct tpacket2_hdr);
+			break;
+		default:
+			return -EINVAL;
+		}
+		data = &val;
+		break;
+	case PACKET_RESERVE:
+		if (len > sizeof(unsigned int))
+			len = sizeof(unsigned int);
+		val = po->tp_reserve;
+		data = &val;
+		break;
+#endif
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1508,11 +1680,16 @@ static int packet_notifier(struct notifi
 	struct sock *sk;
 	struct hlist_node *node;
 	struct net_device *dev = (struct net_device*)data;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	read_lock(&packet_sklist_lock);
 	sk_for_each(sk, node, &packet_sklist) {
 		struct packet_sock *po = pkt_sk(sk);
 
+		if (!ve_accessible_strict(sk->owner_env, ve))
+			continue;
+
 		switch (msg) {
 		case NETDEV_UNREGISTER:
 #ifdef CONFIG_PACKET_MULTICAST
@@ -1620,11 +1797,8 @@ static unsigned int packet_poll(struct f
 	spin_lock_bh(&sk->sk_receive_queue.lock);
 	if (po->pg_vec) {
 		unsigned last = po->head ? po->head-1 : po->frame_max;
-		struct tpacket_hdr *h;
 
-		h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
-
-		if (h->tp_status)
+		if (packet_lookup_frame(po, last, TP_STATUS_USER))
 			mask |= POLLIN | POLLRDNORM;
 	}
 	spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -1723,11 +1897,26 @@ static int packet_set_ring(struct sock *
 		if (unlikely(po->pg_vec))
 			return -EBUSY;
 
+#ifdef CONFIG_IA32_EMULATION
+		if (po->tp_version == TPACKET_V1 && test_thread_flag(TIF_IA32))
+			po->tp_version = TPACKET_V1_COMPAT;
+#endif
+		switch (po->tp_version) {
+		case TPACKET_V1:
+			po->tp_hdrlen = TPACKET_HDRLEN;
+			break;
+		case TPACKET_V2:
+		case TPACKET_V1_COMPAT:
+			po->tp_hdrlen = TPACKET2_HDRLEN;
+			break;
+		}
+
 		if (unlikely((int)req->tp_block_size <= 0))
 			return -EINVAL;
 		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
 			return -EINVAL;
-		if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
+		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
+						  po->tp_reserve))
 			return -EINVAL;
 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
 			return -EINVAL;
@@ -1747,13 +1936,11 @@ static int packet_set_ring(struct sock *
 
 		l = 0;
 		for (i = 0; i < req->tp_block_nr; i++) {
-			char *ptr = pg_vec[i];
-			struct tpacket_hdr *header;
+			void *ptr = pg_vec[i];
 			int k;
 
 			for (k = 0; k < po->frames_per_block; k++) {
-				header = (struct tpacket_hdr *) ptr;
-				header->tp_status = TP_STATUS_KERNEL;
+				__packet_set_status(po, ptr, TP_STATUS_KERNEL);
 				ptr += req->tp_frame_size;
 			}
 		}
@@ -1923,6 +2110,8 @@ static inline struct sock *packet_seq_id
 	struct hlist_node *node;
 
 	sk_for_each(s, node, &packet_sklist) {
+		if (!ve_accessible(s->owner_env, get_exec_env()))
+			continue;
 		if (!off--)
 			return s;
 	}
@@ -1938,9 +2127,14 @@ static void *packet_seq_start(struct seq
 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	++*pos;
-	return  (v == SEQ_START_TOKEN) 
-		? sk_head(&packet_sklist) 
-		: sk_next((struct sock*)v) ;
+	do {
+		v = (v == SEQ_START_TOKEN) 
+			? sk_head(&packet_sklist) 
+			: sk_next((struct sock*)v);
+	} while (v != NULL &&
+			!ve_accessible(((struct sock*)v)->owner_env,
+				get_exec_env()));
+	return v;
 }
 
 static void packet_seq_stop(struct seq_file *seq, void *v)
@@ -1996,7 +2190,7 @@ static struct file_operations packet_seq
 
 static void __exit packet_exit(void)
 {
-	proc_net_remove("packet");
+	remove_proc_glob_entry("net/packet", NULL);
 	unregister_netdevice_notifier(&packet_netdev_notifier);
 	sock_unregister(PF_PACKET);
 	proto_unregister(&packet_proto);
@@ -2011,7 +2205,7 @@ static int __init packet_init(void)
 
 	sock_register(&packet_family_ops);
 	register_netdevice_notifier(&packet_netdev_notifier);
-	proc_net_fops_create("packet", 0, &packet_seq_fops);
+	proc_glob_fops_create("net/packet", 0, &packet_seq_fops);
 out:
 	return rc;
 }
diff -upr kernel-2.6.18-417.el5.orig/net/rds/connection.c kernel-2.6.18-417.el5-028stab121/net/rds/connection.c
--- kernel-2.6.18-417.el5.orig/net/rds/connection.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/rds/connection.c	2017-01-13 08:40:21.000000000 -0500
@@ -52,7 +52,7 @@ static struct hlist_head *rds_conn_bucke
 {
 	/* Pass NULL, don't need struct net for hash */
 	unsigned long hash = inet_ehashfn(be32_to_cpu(laddr), 0,
-					  be32_to_cpu(faddr), 0);
+					  be32_to_cpu(faddr), 0, 0);
 	return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK];
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/sched/sch_cbq.c kernel-2.6.18-417.el5-028stab121/net/sched/sch_cbq.c
--- kernel-2.6.18-417.el5.orig/net/sched/sch_cbq.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sched/sch_cbq.c	2017-01-13 08:40:16.000000000 -0500
@@ -931,8 +931,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int 
 
 			if (cl->deficit <= 0) {
 				q->active[prio] = cl;
-				cl = cl->next_alive;
 				cl->deficit += cl->quantum;
+				cl = cl->next_alive;
 			}
 			return skb;
 
@@ -1098,6 +1098,38 @@ static void cbq_adjust_levels(struct cbq
 	} while ((this = this->tparent) != NULL);
 }
 
+#define DEFQSCALE 1000
+
+static void check_quantum(struct cbq_sched_data *q, int prio)
+{
+	struct cbq_class *cl;
+	unsigned h;
+
+	if (q->quanta[prio] / q->nclasses[prio])
+		return;
+	
+	printk("sch_cbq: Oops: invalid q->quanta[%d]=%u for nclasses=%d\n",
+	       prio, q->quanta[prio], q->nclasses[prio]);
+	
+	printk("Recalculate quanta\nquanta=");
+	q->quanta[prio] = 0;
+	for (h = 0; h < 16; h++) {
+		for (cl = q->classes[h]; cl; cl = cl->next) {
+			if (cl->priority == prio) {
+				printk("%ld + ", cl->weight);
+				q->quanta[prio] += cl->weight;
+			}
+		}
+	}
+	printk("\n");
+
+	if (q->quanta[prio] / q->nclasses[prio])
+		return;
+	
+	printk("Weight of the classes is bad, force default quantum\n");
+	q->quanta[prio] = q->nclasses[prio] * DEFQSCALE;
+}
+
 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
 {
 	struct cbq_class *cl;
@@ -1108,17 +1140,20 @@ static void cbq_normalize_quanta(struct 
 
 	for (h=0; h<16; h++) {
 		for (cl = q->classes[h]; cl; cl = cl->next) {
+			long mtu;
 			/* BUGGGG... Beware! This expression suffer of
 			   arithmetic overflows!
 			 */
 			if (cl->priority == prio) {
-				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
-					q->quanta[prio];
-			}
-			if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
-				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
-				cl->quantum = cl->qdisc->dev->mtu/2 + 1;
+				check_quantum(q, prio); /* Avoid div by zero! */
+				cl->quantum = (cl->weight * cl->allot) /
+					(q->quanta[prio] / q->nclasses[prio]);
 			}
+			mtu = cl->qdisc->dev->mtu;
+			if (cl->quantum <= mtu/2)
+				cl->quantum = mtu/2 + 1;
+			else if (cl->quantum > 32*mtu) 
+				cl->quantum = 32*mtu;
 		}
 	}
 }
diff -upr kernel-2.6.18-417.el5.orig/net/sched/sch_generic.c kernel-2.6.18-417.el5-028stab121/net/sched/sch_generic.c
--- kernel-2.6.18-417.el5.orig/net/sched/sch_generic.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sched/sch_generic.c	2017-01-13 08:40:21.000000000 -0500
@@ -95,6 +95,7 @@ static inline int qdisc_restart(struct n
 
 	/* Dequeue packet */
 	if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
+		struct ve_struct *envid;
 		unsigned nolock = (dev->features & NETIF_F_LLTX);
 
 		dev->gso_skb = NULL;
@@ -108,6 +109,7 @@ static inline int qdisc_restart(struct n
 		 * of lock congestion it should return -1 and the packet
 		 * will be requeued.
 		 */
+		envid = set_exec_env(skb->owner_env);
 		if (!nolock) {
 			if (!netif_tx_trylock(dev)) {
 			collision:
@@ -122,6 +124,7 @@ static inline int qdisc_restart(struct n
 					kfree_skb(skb);
 					if (net_ratelimit())
 						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+					(void)set_exec_env(envid);
 					return -1;
 				}
 				__get_cpu_var(netdev_rx_stat).cpu_collision++;
@@ -142,6 +145,7 @@ static inline int qdisc_restart(struct n
 						netif_tx_unlock(dev);
 					}
 					spin_lock(&dev->queue_lock);
+					(void)set_exec_env(envid);
 					return -1;
 				}
 				if (ret == NETDEV_TX_LOCKED && nolock) {
@@ -175,6 +179,7 @@ requeue:
 		else
 			q->ops->requeue(skb, q);
 		netif_schedule(dev);
+		(void)set_exec_env(envid);
 		return 1;
 	}
 	BUG_ON((int) q->q.qlen < 0);
@@ -649,3 +654,4 @@ EXPORT_SYMBOL(qdisc_destroy);
 EXPORT_SYMBOL(qdisc_reset);
 EXPORT_SYMBOL(qdisc_lock_tree);
 EXPORT_SYMBOL(qdisc_unlock_tree);
+EXPORT_SYMBOL(dev_shutdown);
diff -upr kernel-2.6.18-417.el5.orig/net/sched/sch_teql.c kernel-2.6.18-417.el5-028stab121/net/sched/sch_teql.c
--- kernel-2.6.18-417.el5.orig/net/sched/sch_teql.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sched/sch_teql.c	2017-01-13 08:40:19.000000000 -0500
@@ -189,6 +189,9 @@ static int teql_qdisc_init(struct Qdisc 
 	struct teql_master *m = (struct teql_master*)sch->ops;
 	struct teql_sched_data *q = qdisc_priv(sch);
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	if (dev->hard_header_len > m->dev->hard_header_len)
 		return -EINVAL;
 
diff -upr kernel-2.6.18-417.el5.orig/net/socket.c kernel-2.6.18-417.el5-028stab121/net/socket.c
--- kernel-2.6.18-417.el5.orig/net/socket.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/socket.c	2017-01-13 08:40:40.000000000 -0500
@@ -86,6 +86,8 @@
 #include <linux/kmod.h>
 #include <linux/audit.h>
 #include <linux/wireless.h>
+#include <linux/in.h>
+#include <linux/in6.h>
 #include <trace/socket.h>
 
 #include <asm/uaccess.h>
@@ -97,10 +99,10 @@
 #include <linux/netfilter.h>
 
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
-			 size_t size, loff_t pos);
-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
-			  size_t size, loff_t pos);
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+			 unsigned long nr_segs, loff_t pos);
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+			  unsigned long nr_segs, loff_t pos);
 static int sock_mmap(struct file *file, struct vm_area_struct * vma);
 
 static int sock_close(struct inode *inode, struct file *file);
@@ -207,15 +209,6 @@ static DEFINE_PER_CPU(int, sockets_in_us
  *	divide and look after the messy bits.
  */
 
-#define MAX_SOCK_ADDR	128		/* 108 for Unix domain - 
-					   16 for IP, 16 for IPX,
-					   24 for IPv6,
-					   about 80 for AX.25 
-					   must be at least one bigger than
-					   the AF_UNIX size (see net/unix/af_unix.c
-					   :unix_mkname()).  
-					 */
-					 
 /**
  *	move_addr_to_kernel	-	copy a socket address into kernel space
  *	@uaddr: Address in user space
@@ -375,11 +368,11 @@ static struct dentry_operations sockfs_d
  *	but we take care of internal coherence yet.
  */
 
-static int sock_alloc_fd(struct file **filep)
+static int sock_alloc_fd(struct file **filep, int flags)
 {
 	int fd;
 
-	fd = get_unused_fd();
+	fd = get_unused_fd_flags(flags);
 	if (likely(fd >= 0)) {
 		struct file *file = get_empty_filp();
 
@@ -393,7 +386,7 @@ static int sock_alloc_fd(struct file **f
 	return fd;
 }
 
-static int sock_attach_fd(struct socket *sock, struct file *file)
+static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
 {
 	struct qstr this;
 	char name[32];
@@ -414,20 +407,20 @@ static int sock_attach_fd(struct socket 
 	sock->file = file;
 	file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
 	file->f_mode = FMODE_READ | FMODE_WRITE;
-	file->f_flags = O_RDWR;
+	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 	file->f_pos = 0;
 	file->private_data = sock;
 
 	return 0;
 }
 
-int sock_map_fd(struct socket *sock)
+int sock_map_fd_flags(struct socket *sock, int flags)
 {
 	struct file *newfile;
-	int fd = sock_alloc_fd(&newfile);
+	int fd = sock_alloc_fd(&newfile, flags);
 
 	if (likely(fd >= 0)) {
-		int err = sock_attach_fd(sock, newfile);
+		int err = sock_attach_fd(sock, newfile, flags);
 
 		if (unlikely(err < 0)) {
 			put_filp(newfile);
@@ -439,6 +432,11 @@ int sock_map_fd(struct socket *sock)
 	return fd;
 }
 
+int sock_map_fd(struct socket *sock)
+{
+	return sock_map_fd_flags(sock, 0);
+}
+
 static struct socket *sock_from_file(struct file *file, int *err)
 {
 	struct inode *inode;
@@ -533,7 +531,7 @@ struct socket *sock_alloc(void)
 	return sock;
 }
 
-EXPORT_SYMBOL_GPL(sock_alloc);
+EXPORT_SYMBOL(sock_alloc);
 
 /*
  *	In theory you can't get an open on this inode, but /proc provides
@@ -562,6 +560,9 @@ const struct file_operations bad_sock_fo
  
 void sock_release(struct socket *sock)
 {
+	if (sock->sk)
+		ub_sock_sndqueuedel(sock->sk);
+
 	if (sock->ops) {
 		struct module *owner = sock->ops->owner;
 
@@ -731,7 +732,7 @@ static ssize_t sock_sendpage(struct file
 }
 
 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
-		char __user *ubuf, size_t size, struct sock_iocb *siocb)
+		struct sock_iocb *siocb)
 {
 	if (!is_sync_kiocb(iocb)) {
 		siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
@@ -741,9 +742,6 @@ static struct sock_iocb *alloc_sock_iocb
 	}
 
 	siocb->kiocb = iocb;
-	siocb->async_iov.iov_base = ubuf;
-	siocb->async_iov.iov_len = size;
-
 	iocb->private = siocb;
 	return siocb;
 }
@@ -786,21 +784,21 @@ static ssize_t sock_readv(struct file *f
 	return ret;
 }
 
-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
-			 size_t count, loff_t pos)
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+				unsigned long nr_segs, loff_t pos)
 {
 	struct sock_iocb siocb, *x;
 
 	if (pos != 0)
 		return -ESPIPE;
-	if (count == 0)		/* Match SYS5 behaviour */
+
+	if (iocb->ki_left == 0)	/* Match SYS5 behaviour */
 		return 0;
 
-	x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
+	x = alloc_sock_iocb(iocb, &siocb);
 	if (!x)
 		return -ENOMEM;
-	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
-			&x->async_iov, 1);
+	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
 }
 
 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
@@ -843,22 +841,22 @@ static ssize_t sock_writev(struct file *
 	return ret;
 }
 
-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
-			  size_t count, loff_t pos)
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+			  unsigned long nr_segs, loff_t pos)
 {
 	struct sock_iocb siocb, *x;
 
 	if (pos != 0)
 		return -ESPIPE;
-	if (count == 0)		/* Match SYS5 behaviour */
+
+	if (iocb->ki_left == 0)	/* Match SYS5 behaviour */
 		return 0;
 
-	x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
+	x = alloc_sock_iocb(iocb, &siocb);
 	if (!x)
 		return -ENOMEM;
 
-	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
-			&x->async_iov, 1);
+	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
 }
 
 
@@ -1158,6 +1156,49 @@ int sock_wake_async(struct socket *sock,
 	return 0;
 }
 
+int vz_security_family_check(int family)
+{
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		return 0;
+
+	switch (family) {
+	case PF_UNSPEC:
+	case PF_PACKET:
+	case PF_NETLINK:
+	case PF_UNIX:
+	case PF_INET:
+	case PF_INET6:
+	case PF_PPPOX:
+		break;
+	default:
+		return -EAFNOSUPPORT;
+        }
+#endif
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vz_security_family_check);
+
+int vz_security_protocol_check(int protocol)
+{
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		return 0;
+
+	switch (protocol) {
+	case  IPPROTO_IP:
+	case  IPPROTO_TCP:
+	case  IPPROTO_UDP:
+	case  IPPROTO_RAW:
+		break;
+	default:
+		return -EAFNOSUPPORT;
+	}
+#endif
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vz_security_protocol_check);
+
 static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
 {
 	int err;
@@ -1185,6 +1226,11 @@ static int __sock_create(int family, int
 		family = PF_PACKET;
 	}
 
+	/* VZ compatibility layer */
+	err = vz_security_family_check(family);
+	if (err < 0)
+		return err;
+
 	err = security_socket_create(family, type, protocol, kern);
 	if (err)
 		return err;
@@ -1285,12 +1331,22 @@ asmlinkage long sys_socket(int family, i
 {
 	int retval;
 	struct socket *sock;
+	int flags;
+
+	/* Check the SOCK_* constants for consistency.  */
+	BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
+	BUILD_BUG_ON((SOCK_CLOEXEC | SOCK_NONBLOCK) & SOCK_TYPE_MASK);
+
+	flags = type & ~SOCK_TYPE_MASK;
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+		return -EINVAL;
+	type &= SOCK_TYPE_MASK;
 
 	retval = sock_create(family, type, protocol, &sock);
 	if (retval < 0)
 		goto out;
 
-	retval = sock_map_fd(sock);
+	retval = sock_map_fd_flags(sock, flags);
 	if (retval < 0)
 		goto out_release;
 
@@ -1312,6 +1368,12 @@ asmlinkage long sys_socketpair(int famil
 	struct socket *sock1, *sock2;
 	int fd1, fd2, err;
 	struct file *newfile1, *newfile2;
+	int flags;
+
+	flags = type & ~SOCK_TYPE_MASK;
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+		return -EINVAL;
+	type &= SOCK_TYPE_MASK;
 
 	/*
 	 * Obtain the first socket and check if the underlying protocol
@@ -1330,13 +1392,13 @@ asmlinkage long sys_socketpair(int famil
 	if (err < 0) 
 		goto out_release_both;
 
-	fd1 = sock_alloc_fd(&newfile1);
+	fd1 = sock_alloc_fd(&newfile1, flags);
 	if (unlikely(fd1 < 0)) {
 		err = fd1;
 		goto out_release_both;
 	}
 
-	fd2 = sock_alloc_fd(&newfile2);
+	fd2 = sock_alloc_fd(&newfile2, flags);
 	if (unlikely(fd2 < 0)) {
 		err = fd2;
 		put_filp(newfile1);
@@ -1344,12 +1406,12 @@ asmlinkage long sys_socketpair(int famil
 		goto out_release_both;
 	}
 
-	err = sock_attach_fd(sock1, newfile1);
+	err = sock_attach_fd(sock1, newfile1, flags);
 	if (unlikely(err < 0)) {
 		goto out_fd2;
 	}
 
-	err = sock_attach_fd(sock2, newfile2);
+	err = sock_attach_fd(sock2, newfile2, flags);
 	if (unlikely(err < 0)) {
 		fput(newfile1);
 		goto out_fd1;
@@ -1465,13 +1527,17 @@ asmlinkage long sys_listen(int fd, int b
  *	clean when we restucture accept also.
  */
 
-asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen)
+asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
+			    int __user *upeer_addrlen, int flags)
 {
 	struct socket *sock, *newsock;
 	struct file *newfile;
 	int err, len, newfd, fput_needed;
 	char address[MAX_SOCK_ADDR];
 
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+		return -EINVAL;
+
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
@@ -1489,16 +1555,16 @@ asmlinkage long sys_accept(int fd, struc
 	 */
 	__module_get(newsock->ops->owner);
 
-	newfd = sock_alloc_fd(&newfile);
+	newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
 	if (unlikely(newfd < 0)) {
 		err = newfd;
 		sock_release(newsock);
 		goto out_put;
 	}
 
-	err = sock_attach_fd(newsock, newfile);
+	err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK);
 	if (err < 0)
-		goto out_fd;
+		goto out_fd_simple;
 
 	err = security_socket_accept(sock, newsock);
 	if (err)
@@ -1534,12 +1600,22 @@ out_put:
 	fput_light(sock->file, fput_needed);
 out:
 	return err;
+out_fd_simple:
+	sock_release(newsock);
+	put_filp(newfile);
+	put_unused_fd(newfd);
+	goto out_put;
 out_fd:
 	fput(newfile);
 	put_unused_fd(newfd);
 	goto out_put;
 }
 
+asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+			   int __user *upeer_addrlen)
+{
+	return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
+}
 
 /*
  *	Attempt to connect to a socket with the server address.  The address
@@ -2268,6 +2344,10 @@ asmlinkage long sys_socketcall(int call,
 			err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
 					   (struct timespec __user *)a[4]);
 			break;
+		case SYS_ACCEPT4:
+			err = sys_accept4(a0, (struct sockaddr __user *)a1,
+				  (int __user *)a[2], a[3]);
+			break;
 		default:
 			err = -EINVAL;
 			break;
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/cache.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/cache.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/cache.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/cache.c	2017-01-13 08:40:23.000000000 -0500
@@ -315,6 +315,36 @@ static struct file_operations cache_flus
 static void do_cache_clean(void *data);
 static DECLARE_WORK(cache_cleaner, do_cache_clean, NULL);
 
+struct cache_detail *cache_alloc(struct cache_detail *orig, int hsize)
+{
+	struct cache_detail *n;
+	struct cache_head **table;
+
+	n = kmemdup(orig, sizeof(struct cache_detail), GFP_KERNEL);
+	if (n == NULL)
+		return NULL;
+
+	table = kzalloc(hsize * sizeof(struct cache_head *), GFP_KERNEL);
+	if (table == NULL) {
+		kfree(n);
+		return NULL;
+	}
+
+	n->hash_table = table;
+	return n;
+}
+EXPORT_SYMBOL(cache_alloc);
+
+void cache_free(struct cache_detail *cd)
+{
+	if (cache_unregister(cd))
+		printk("Can't unregister cache %s\n", cd->name);
+	else
+		kfree(cd->hash_table);
+	kfree(cd);
+}
+EXPORT_SYMBOL(cache_free);
+
 void cache_register(struct cache_detail *cd)
 {
 	cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/clnt.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/clnt.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/clnt.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/clnt.c	2017-01-13 08:40:23.000000000 -0500
@@ -65,6 +65,39 @@ static u32 *	call_header(struct rpc_task
 static u32 *	call_verify(struct rpc_task *task);
 
 
+/*
+ * Grand abort timeout (stop the client if occures)
+ */
+int xprt_abort_timeout = RPC_MAX_ABORT_TIMEOUT;
+EXPORT_SYMBOL(xprt_abort_timeout);
+
+static int rpc_abort_hard(struct rpc_task *task)
+{
+	struct rpc_clnt *clnt;
+	clnt = task->tk_client;
+
+	if (clnt->cl_pr_time == 0) {
+		clnt->cl_pr_time = jiffies;
+		return 0;
+	}
+	if (!clnt->cl_broken && xprt_abort_timeout == RPC_MAX_ABORT_TIMEOUT)
+		return 0;
+	if (time_before(jiffies, clnt->cl_pr_time + xprt_abort_timeout * HZ))
+		return 0;
+
+	printk(KERN_ERR "CT#%u: RPC client %p (server %s) is marked 'broken'. "
+		"Unmount/mount to get it working again.\n",
+		get_exec_env()->veid, clnt, clnt->cl_server);
+	clnt->cl_broken = 1;
+	rpc_killall_tasks(clnt);
+	return -ETIMEDOUT;
+}
+
+static void rpc_abort_clear(struct rpc_task *task)
+{
+	task->tk_client->cl_pr_time = 0;
+}
+
 static int
 rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
 {
@@ -184,10 +217,10 @@ rpc_new_client(struct rpc_xprt *xprt, ch
 	}
 
 	/* save the nodename */
-	clnt->cl_nodelen = strlen(system_utsname.nodename);
+	clnt->cl_nodelen = strlen(utsname()->nodename);
 	if (clnt->cl_nodelen > UNX_MAXNODENAME)
 		clnt->cl_nodelen = UNX_MAXNODENAME;
-	memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
+	memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
 	return clnt;
 
 out_no_auth:
@@ -285,6 +318,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	new->cl_autobind = 0;
 	new->cl_oneshot = 0;
 	new->cl_dead = 0;
+	new->cl_broken = 0;
 	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
@@ -322,8 +356,9 @@ rpc_shutdown_client(struct rpc_clnt *cln
 	}
 
 	if (atomic_read(&clnt->cl_users) < 0) {
-		printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n",
-				clnt, atomic_read(&clnt->cl_users));
+		printk(KERN_ERR "CT#%u: RPC: rpc_shutdown_client clnt %p "
+			"tasks=%d\n", get_exec_env()->veid,
+			clnt, atomic_read(&clnt->cl_users));
 #ifdef RPC_DEBUG
 		rpc_show_tasks();
 #endif
@@ -372,19 +407,29 @@ out_free:
 	return 0;
 }
 
+/* For synchronization while rpc client becomes dead */
+DEFINE_SPINLOCK(rpc_client_lock);
+EXPORT_SYMBOL(rpc_client_lock);
+
 /*
  * Release an RPC client
  */
 void
 rpc_release_client(struct rpc_clnt *clnt)
 {
+	int destroy_flag;
+
 	dprintk("RPC:      rpc_release_client(%p, %d)\n",
 				clnt, atomic_read(&clnt->cl_users));
 
-	if (!atomic_dec_and_test(&clnt->cl_users))
+	if (!atomic_dec_and_lock(&clnt->cl_users, &rpc_client_lock)) {
 		return;
+	}
 	wake_up(&destroy_wait);
-	if (clnt->cl_oneshot || clnt->cl_dead)
+	destroy_flag = clnt->cl_oneshot | clnt->cl_dead;
+	spin_unlock(&rpc_client_lock);
+
+	if (destroy_flag)
 		rpc_destroy_client(clnt);
 }
 
@@ -486,7 +531,7 @@ int rpc_call_sync(struct rpc_clnt *clnt,
 	int		status;
 
 	/* If this client is slain all further I/O fails */
-	if (clnt->cl_dead) 
+	if (clnt->cl_dead || clnt->cl_broken) 
 		return -EIO;
 
 	BUG_ON(flags & RPC_TASK_ASYNC);
@@ -527,7 +572,7 @@ rpc_call_async(struct rpc_clnt *clnt, st
 
 	/* If this client is slain all further I/O fails */
 	status = -EIO;
-	if (clnt->cl_dead) 
+	if (clnt->cl_dead || clnt->cl_broken) 
 		goto out_release;
 
 	flags |= RPC_TASK_ASYNC;
@@ -839,6 +884,7 @@ call_bind_status(struct rpc_task *task)
 	if (task->tk_status >= 0) {
 		dprintk("RPC: %4d call_bind_status (status %d)\n",
 					task->tk_pid, task->tk_status);
+		rpc_abort_clear(task);
 		task->tk_status = 0;
 		task->tk_action = call_connect;
 		return;
@@ -929,7 +975,7 @@ call_connect_status(struct rpc_task *tas
 	case -ENOTCONN:
 	case -EAGAIN:
 		task->tk_action = call_bind;
-		if (!RPC_IS_SOFT(task))
+		if (!RPC_IS_SOFT(task) && !rpc_abort_hard(task))
 			return;
 		/* if soft mounted, test if we've timed out */
 	case -ETIMEDOUT:
@@ -1035,8 +1081,8 @@ call_status(struct rpc_task *task)
 		rpc_exit(task, status);
 		break;
 	default:
-		printk("%s: RPC call returned error %d\n",
-			       clnt->cl_protname, -status);
+		printk("CT#%u: %s: RPC call returned error %d\n",
+			get_exec_env()->veid, clnt->cl_protname, -status);
 		rpc_exit(task, status);
 	}
 }
@@ -1059,16 +1105,18 @@ call_timeout(struct rpc_task *task)
 	dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
 	task->tk_timeouts++;
 
-	if (RPC_IS_SOFT(task)) {
-		printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
-				clnt->cl_protname, clnt->cl_server);
+	if (RPC_IS_SOFT(task) || rpc_abort_hard(task)) {
+		printk(KERN_NOTICE "CT#%u: %s: server %s not responding, "
+			"timed out\n", get_exec_env()->veid,
+			clnt->cl_protname, clnt->cl_server);
 		rpc_exit(task, -EIO);
 		return;
 	}
 
 	if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
 		task->tk_flags |= RPC_CALL_MAJORSEEN;
-		printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
+		printk(KERN_NOTICE "CT#%u: %s: server %s not responding, "
+			"still trying\n", get_exec_env()->veid,
 			clnt->cl_protname, clnt->cl_server);
 	}
 	rpc_force_rebind(clnt);
@@ -1094,13 +1142,14 @@ call_decode(struct rpc_task *task)
 				task->tk_pid, task->tk_status);
 
 	if (task->tk_flags & RPC_CALL_MAJORSEEN) {
-		printk(KERN_NOTICE "%s: server %s OK\n",
+		printk(KERN_NOTICE "CT#%u: %s: server %s OK\n",
+			get_exec_env()->veid,
 			clnt->cl_protname, clnt->cl_server);
 		task->tk_flags &= ~RPC_CALL_MAJORSEEN;
 	}
 
 	if (task->tk_status < 12) {
-		if (!RPC_IS_SOFT(task)) {
+		if (!RPC_IS_SOFT(task) && !rpc_abort_hard(task)) {
 			task->tk_action = call_bind;
 			clnt->cl_stats->rpcretrans++;
 			goto out_retry;
@@ -1111,6 +1160,7 @@ call_decode(struct rpc_task *task)
 		goto out_retry;
 	}
 
+	rpc_abort_clear(task);
 	/*
 	 * Ensure that we see all writes made by xprt_complete_rqst()
 	 * before it changed req->rq_received.
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/pmap_clnt.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/pmap_clnt.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/pmap_clnt.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/pmap_clnt.c	2017-01-13 08:40:23.000000000 -0500
@@ -234,7 +234,7 @@ rpc_register(u32 prog, u32 vers, int pro
 	error = rpc_call_sync(pmap_clnt, &msg, 0);
 
 	if (error < 0) {
-		printk(KERN_WARNING
+		ve_printk(VE_LOG, KERN_WARNING
 			"RPC: failed to contact portmap (errno %d).\n",
 			error);
 	}
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/rpc_pipe.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/rpc_pipe.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/rpc_pipe.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/rpc_pipe.c	2017-01-13 08:40:26.000000000 -0500
@@ -437,8 +437,15 @@ static struct rpc_filelist authfiles[] =
 struct vfsmount *rpc_get_mount(void)
 {
 	int err;
+	struct ve_struct *ve;
+	struct user_beancounter *ub;
 
+	ve = set_exec_env(get_ve0());
+	ub = set_exec_ub(get_ub0());
 	err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count);
+	set_exec_ub(ub);
+	set_exec_env(ve);
+
 	if (err != 0)
 		return ERR_PTR(err);
 	return rpc_mount;
@@ -838,6 +845,13 @@ init_once(void * foo, kmem_cache_t * cac
 
 int register_rpc_pipefs(void)
 {
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+	if (!ve_is_super(ve))
+		return register_ve_fs_type(ve, &rpc_pipe_fs_type,
+				&ve->rpc_pipefs_fstype, NULL);
+
 	rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
 				sizeof(struct rpc_inode),
 				0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
@@ -851,6 +865,14 @@ int register_rpc_pipefs(void)
 
 void unregister_rpc_pipefs(void)
 {
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+	if (!ve_is_super(ve)) {
+		unregister_ve_fs_type(ve->rpc_pipefs_fstype, NULL);
+		return;
+	}
+
 	if (kmem_cache_destroy(rpc_inode_cachep))
 		printk(KERN_WARNING "RPC: unable to free inode cache\n");
 	unregister_filesystem(&rpc_pipe_fs_type);
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/sched.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/sched.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/sched.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/sched.c	2017-01-13 08:40:40.000000000 -0500
@@ -29,6 +29,9 @@
 static int			rpc_task_id;
 #endif
 
+static int rpc_serialize = 0;
+module_param(rpc_serialize, int, 0440);
+
 /*
  * RPC slabs and memory pools
  */
@@ -64,6 +67,7 @@ static LIST_HEAD(all_tasks);
  * rpciod-related stuff
  */
 struct workqueue_struct *rpciod_workqueue;
+DECLARE_RWSEM(rpc_async_task_lock);
 
 /*
  * Spinlock for other critical sections of code.
@@ -282,6 +286,14 @@ static void rpc_set_active(struct rpc_ta
 #endif
 	/* Add to global list of all tasks */
 	list_add_tail(&task->tk_task, &all_tasks);
+
+	/* Prevent the task to run if client is marked as dead */
+	if (task->tk_client != NULL && (task->tk_client->cl_dead ||
+					task->tk_client->cl_broken)) {
+		task->tk_flags |= RPC_TASK_KILLED;
+		rpc_exit(task, -EIO);
+		rpc_wake_up_task(task);
+	}
 	spin_unlock(&rpc_sched_lock);
 }
 
@@ -357,16 +369,6 @@ static void rpc_make_runnable(struct rpc
 }
 
 /*
- * Place a newly initialized task on the workqueue.
- */
-static inline void
-rpc_schedule_run(struct rpc_task *task)
-{
-	rpc_set_active(task);
-	rpc_make_runnable(task);
-}
-
-/*
  * Prepare for sleeping on a wait queue.
  * By always appending tasks to the list we ensure FIFO behavior.
  * NB: An RPC task will only receive interrupt-driven events as long
@@ -708,7 +710,10 @@ void rpc_release_calldata(const struct r
 static int __rpc_execute(struct rpc_task *task)
 {
 	int		status = 0;
+	struct ve_struct *env, *old_env;
 
+	env = get_ve(task->tk_client->cl_xprt->owner_env);
+	old_env = set_exec_env(env);
 	dprintk("RPC: %4d rpc_execute flgs %x\n",
 				task->tk_pid, task->tk_flags);
 
@@ -762,10 +767,16 @@ static int __rpc_execute(struct rpc_task
 		rpc_clear_running(task);
 		if (RPC_IS_ASYNC(task)) {
 			/* Careful! we may have raced... */
-			if (RPC_IS_QUEUED(task))
+			if (RPC_IS_QUEUED(task)) {
+				(void)set_exec_env(old_env);
+				put_ve(env);
 				return 0;
-			if (rpc_test_and_set_running(task))
+			}
+			if (rpc_test_and_set_running(task)) {
+				(void)set_exec_env(old_env);
+				put_ve(env);
 				return 0;
+			}
 			continue;
 		}
 
@@ -794,6 +805,8 @@ static int __rpc_execute(struct rpc_task
 	dprintk("RPC: %4d, return %d, status %d\n", task->tk_pid, status, task->tk_status);
 	/* Release all resources associated with the task */
 	rpc_release_task(task);
+	(void)set_exec_env(old_env);
+	put_ve(env);
 	return status;
 }
 
@@ -816,7 +829,9 @@ rpc_execute(struct rpc_task *task)
 
 static void rpc_async_schedule(void *arg)
 {
+	down_read(&rpc_async_task_lock);
 	__rpc_execute((struct rpc_task *)arg);
+	up_read(&rpc_async_task_lock);
 }
 
 /**
@@ -1154,10 +1169,12 @@ fail:
 
 void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
 {
+	rpc_set_active(child);
+
 	spin_lock_bh(&childq.lock);
 	/* N.B. Is it possible for the child to have already finished? */
 	__rpc_sleep_on(&childq, task, func, NULL);
-	rpc_schedule_run(child);
+	rpc_make_runnable(child);
 	spin_unlock_bh(&childq.lock);
 }
 
@@ -1188,6 +1205,55 @@ void rpc_killall_tasks(struct rpc_clnt *
 	spin_unlock(&rpc_sched_lock);
 }
 
+void rpc_kill_client(struct rpc_clnt *clnt)
+{
+	if (!IS_ERR(clnt)) {
+		clnt->cl_broken = 1;
+		clnt->cl_pr_time = jiffies - xprt_abort_timeout * HZ - 1;
+		rpc_killall_tasks(clnt);
+	}
+}
+
+void rpc_killall_ve_tasks(struct ve_struct *ve)
+{
+	struct rpc_task	*rovr;
+	struct list_head *le;
+	struct rpc_clnt *clnt;
+
+	dprintk("RPC:      killing all tasks for CT%u\n", ve->veid);
+
+	/* Make sure no async RPC task is in progress */
+	down_write(&rpc_async_task_lock);
+
+	/*
+	 * Spin lock all_tasks to prevent changes...
+	 */
+	spin_lock(&rpc_sched_lock);
+	alltask_for_each(rovr, le, &all_tasks) {
+		clnt = rovr->tk_client;
+		if (!clnt)
+			continue;
+
+		if (!ve_accessible_strict(clnt->cl_xprt->owner_env, ve))
+			continue;
+
+		if (RPC_IS_ACTIVATED(rovr)) {
+			rovr->tk_flags |= RPC_TASK_KILLED;
+			rpc_exit(rovr, -EIO);
+			rpc_wake_up_task(rovr);
+		}
+
+		if (clnt->cl_broken)
+			continue;
+
+		clnt->cl_broken = 1;
+		clnt->cl_pr_time = jiffies - xprt_abort_timeout * HZ - 1;
+	}
+	spin_unlock(&rpc_sched_lock);
+
+	up_write(&rpc_async_task_lock);
+}
+
 int rpciod_up(void)
 {
 	return try_module_get(THIS_MODULE) ? 0 : -EINVAL;
@@ -1209,7 +1275,12 @@ static int rpciod_start(void)
 	 * Create the rpciod thread and wait for it to start.
 	 */
 	dprintk("RPC:       creating workqueue rpciod\n");
-	wq = create_workqueue("rpciod");
+	if (rpc_serialize) {
+		wq = create_singlethread_workqueue("rpciod");
+	} else {
+		wq = create_workqueue("rpciod");
+	}
+
 	rpciod_workqueue = wq;
 	return rpciod_workqueue != NULL;
 }
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/stats.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/stats.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/stats.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/stats.c	2017-01-13 08:40:23.000000000 -0500
@@ -25,7 +25,9 @@
 
 #define RPCDBG_FACILITY	RPCDBG_MISC
 
+#ifndef CONFIG_VE
 struct proc_dir_entry	*proc_net_rpc = NULL;
+#endif
 
 /*
  * Get RPC client stats
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/sunrpc_syms.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/sunrpc_syms.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/sunrpc_syms.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/sunrpc_syms.c	2017-01-13 08:40:23.000000000 -0500
@@ -22,6 +22,7 @@
 #include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 
+#include <linux/ve_proto.h>
 
 /* RPC scheduler */
 EXPORT_SYMBOL(rpc_execute);
@@ -44,6 +45,8 @@ EXPORT_SYMBOL(rpc_bind_new_program);
 EXPORT_SYMBOL(rpc_destroy_client);
 EXPORT_SYMBOL(rpc_shutdown_client);
 EXPORT_SYMBOL(rpc_killall_tasks);
+EXPORT_SYMBOL(rpc_kill_client);
+EXPORT_SYMBOL(rpc_async_task_lock);
 EXPORT_SYMBOL(rpc_call_sync);
 EXPORT_SYMBOL(rpc_call_async);
 EXPORT_SYMBOL(rpc_call_setup);
@@ -73,6 +76,7 @@ EXPORT_SYMBOL(put_rpccred);
 
 /* RPC server stuff */
 EXPORT_SYMBOL(svc_create);
+EXPORT_SYMBOL(__svc_create);
 EXPORT_SYMBOL(svc_create_thread);
 EXPORT_SYMBOL(svc_exit_thread);
 EXPORT_SYMBOL(svc_destroy);
@@ -140,7 +144,56 @@ EXPORT_SYMBOL(nlm_debug);
 
 extern int register_rpc_pipefs(void);
 extern void unregister_rpc_pipefs(void);
-extern struct cache_detail ip_map_cache;
+extern int ve_ip_map_init(void);
+extern void ve_ip_map_exit(void);
+extern void rpc_killall_ve_tasks(struct ve_struct *ve);
+
+static int ve_rpc_init(void *d)
+{
+	int err = -ENOMEM;
+
+	rpc_proc_init();
+	if (proc_net_rpc == NULL)
+		goto err_proc;
+
+	err = ve_ip_map_init();
+	if (err)
+		goto err_map;
+
+	err = register_rpc_pipefs();
+	if (err)
+		goto err_pipefs;
+
+	return 0;
+
+err_pipefs:
+	ve_ip_map_exit();
+err_map:
+	rpc_proc_exit();
+err_proc:
+	return err;
+}
+
+static void ve_rpc_fini(void *d)
+{
+	struct ve_struct *ve = d;
+
+	rpc_killall_ve_tasks(ve);
+
+	if (!ve->rpc_pipefs_fstype)
+		return;
+
+	unregister_rpc_pipefs();
+	ve_ip_map_exit();
+	rpc_proc_exit();
+}
+
+static struct ve_hook rpc_hook = {
+	.init = ve_rpc_init,
+	.fini = ve_rpc_fini,
+	.owner	  = THIS_MODULE,
+	.priority = HOOK_PRIO_NET_PRE,
+};
 
 static int __init
 init_sunrpc(void)
@@ -157,7 +210,9 @@ init_sunrpc(void)
 #ifdef CONFIG_PROC_FS
 	rpc_proc_init();
 #endif
-	cache_register(&ip_map_cache);
+	ve_ip_map_init();
+
+	ve_hook_register(VE_SS_CHAIN, &rpc_hook);
 out:
 	return err;
 }
@@ -165,10 +220,11 @@ out:
 static void __exit
 cleanup_sunrpc(void)
 {
+	ve_hook_unregister(&rpc_hook);
+
 	unregister_rpc_pipefs();
 	rpc_destroy_mempool();
-	if (cache_unregister(&ip_map_cache))
-		printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n");
+	ve_ip_map_exit();
 #ifdef RPC_DEBUG
 	rpc_unregister_sysctl();
 #endif
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/svcauth_unix.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/svcauth_unix.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/svcauth_unix.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/svcauth_unix.c	2017-01-13 08:40:23.000000000 -0500
@@ -259,8 +259,11 @@ static int ip_map_show(struct seq_file *
 	return 0;
 }
 	
+#ifdef CONFIG_VE
+#define ip_map_cache	(*(get_exec_env()->_ip_map_cache))
+#endif
 
-struct cache_detail ip_map_cache = {
+static struct cache_detail __ip_map_cache = {
 	.owner		= THIS_MODULE,
 	.hash_size	= IP_HASHMAX,
 	.hash_table	= ip_table,
@@ -275,6 +278,24 @@ struct cache_detail ip_map_cache = {
 	.alloc		= ip_map_alloc,
 };
 
+int ve_ip_map_init(void)
+{
+	struct cache_detail *cd;
+
+	cd = cache_alloc(&__ip_map_cache, IP_HASHMAX);
+	if (cd == NULL)
+		return -ENOMEM;
+
+	cache_register(cd);
+	get_exec_env()->_ip_map_cache = cd;
+	return 0;
+}
+
+void ve_ip_map_exit(void)
+{
+	cache_free(get_exec_env()->_ip_map_cache);
+}
+
 static struct ip_map *ip_map_lookup(char *class, struct in_addr addr)
 {
 	struct ip_map ip;
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/svc.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/svc.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/svc.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/svc.c	2017-01-13 08:40:23.000000000 -0500
@@ -27,18 +27,20 @@
  * Create an RPC service
  */
 struct svc_serv *
-svc_create(struct svc_program *prog, unsigned int bufsize)
+__svc_create(struct svc_program *prog, unsigned int bufsize, struct svc_stat *stat)
 {
 	struct svc_serv	*serv;
 	int vers;
 	unsigned int xdrsize;
 
+	BUG_ON(stat == NULL);
+
 	if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
 		return NULL;
 	serv->sv_name      = prog->pg_name;
 	serv->sv_program   = prog;
 	serv->sv_nrthreads = 1;
-	serv->sv_stats     = prog->pg_stats;
+	serv->sv_stats     = stat;
 	if (bufsize > RPCSVC_MAXPAYLOAD)
 		bufsize = RPCSVC_MAXPAYLOAD;
 	serv->sv_max_payload = bufsize? bufsize : 4096;
@@ -69,6 +71,12 @@ svc_create(struct svc_program *prog, uns
 	return serv;
 }
 
+struct svc_serv *
+svc_create(struct svc_program *prog, unsigned int bufsize)
+{
+	return __svc_create(prog, bufsize, prog->pg_stats);
+}
+
 /*
  * Destroy an RPC service
  */
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/svcsock.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/svcsock.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/svcsock.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/svcsock.c	2017-01-13 08:40:22.000000000 -0500
@@ -393,6 +393,9 @@ svc_sendto(struct svc_rqst *rqstp, struc
 	size_t		base = xdr->page_base;
 	unsigned int	pglen = xdr->page_len;
 	unsigned int	flags = MSG_MORE;
+	struct ve_struct *old_env;
+
+	old_env = set_exec_env(sock->sk->owner_env);
 
 	slen = xdr->len;
 
@@ -459,6 +462,8 @@ out:
 			rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
 		rqstp->rq_addr.sin_addr.s_addr);
 
+	(void)set_exec_env(old_env);
+
 	return len;
 }
 
@@ -520,8 +525,11 @@ svc_recv_available(struct svc_sock *svsk
 {
 	struct socket	*sock = svsk->sk_sock;
 	int		avail, err;
+	struct ve_struct *old_env;
 
+	old_env = set_exec_env(sock->sk->owner_env);
 	err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
+	(void)set_exec_env(old_env);
 
 	return (err >= 0)? avail : err;
 }
@@ -536,6 +544,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
 	struct socket	*sock;
 	struct sockaddr_in daddr;
 	int		len, alen;
+	struct ve_struct *old_env;
 
 	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
 	sock = rqstp->rq_sock->sk_sock;
@@ -547,7 +556,9 @@ svc_recvfrom(struct svc_rqst *rqstp, str
 
 	msg.msg_flags	= MSG_DONTWAIT;
 
+	old_env = set_exec_env(sock->sk->owner_env);
 	len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
+	(void)set_exec_env(old_env);
 
 	/* sock_recvmsg doesn't fill in the name/namelen, so we must..
 	 * possibly we should cache this in the svc_sock structure
@@ -1565,6 +1576,8 @@ svc_delete_socket(struct svc_sock *svsk)
 	serv = svsk->sk_server;
 	sk = svsk->sk_sk;
 
+	/* XXX: serialization? */
+	sk->sk_user_data = NULL;
 	sk->sk_state_change = svsk->sk_ostate;
 	sk->sk_data_ready = svsk->sk_odata;
 	sk->sk_write_space = svsk->sk_owspace;
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/sysctl.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/sysctl.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/sysctl.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/sysctl.c	2017-01-13 08:40:22.000000000 -0500
@@ -123,6 +123,8 @@ static unsigned int min_slot_table_size 
 static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
 static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
 static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
+static int xprt_min_abort_timeout = RPC_MIN_ABORT_TIMEOUT;
+static int xprt_max_abort_timeout = RPC_MAX_ABORT_TIMEOUT;
 
 static ctl_table debug_table[] = {
 	{
@@ -201,6 +203,17 @@ static ctl_table debug_table[] = {
 		.extra1		= &xprt_min_resvport_limit,
 		.extra2		= &xprt_max_resvport_limit
 	},
+	{
+		.ctl_name	= CTL_ABORT_TIMEOUT,
+		.procname	= "abort_timeout",
+		.data		= &xprt_abort_timeout,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &xprt_min_abort_timeout,
+		.extra2		= &xprt_max_abort_timeout
+	},
 	{ .ctl_name = 0 }
 };
 
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/timer.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/timer.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/timer.c	2006-11-29 14:28:40.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/timer.c	2017-01-13 08:40:27.000000000 -0500
@@ -21,6 +21,7 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/timer.h>
+#include <linux/module.h>
 
 #define RPC_RTO_MAX (60*HZ)
 #define RPC_RTO_INIT (HZ/5)
@@ -42,6 +43,7 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigne
 		rt->ntimeouts[i] = 0;
 	}
 }
+EXPORT_SYMBOL(rpc_init_rtt);
 
 /*
  * NB: When computing the smoothed RTT and standard deviation,
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/xprt.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/xprt.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/xprt.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/xprt.c	2017-01-13 08:40:23.000000000 -0500
@@ -482,10 +482,13 @@ int xprt_adjust_timeout(struct rpc_rqst 
 static void xprt_autoclose(void *args)
 {
 	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+	struct ve_struct *ve;
 
+	ve = set_exec_env(xprt->owner_env);
 	xprt_disconnect(xprt);
 	xprt->ops->close(xprt);
 	xprt_release_write(xprt, NULL);
+	(void)set_exec_env(ve);
 }
 
 /**
@@ -892,6 +895,7 @@ static struct rpc_xprt *xprt_setup(int p
 
 	xprt->addr = *ap;
 	xprt->tcp_flags |= XPRT_SRCADDR_PRESENT;
+	xprt->owner_env = get_ve(get_exec_env());
 
 	switch (proto) {
 	case IPPROTO_UDP:
@@ -907,6 +911,7 @@ static struct rpc_xprt *xprt_setup(int p
 		break;
 	}
 	if (result) {
+		put_ve(xprt->owner_env);
 		kfree(xprt);
 		return ERR_PTR(result);
 	}
@@ -974,6 +979,7 @@ static void xprt_destroy(struct kref *kr
 	del_timer_sync(&xprt->timer);
 	cancel_work_sync(&xprt->task_cleanup);
 	xprt->ops->destroy(xprt);
+	put_ve(xprt->owner_env);
 	kfree(xprt);
 }
 /**
@@ -985,6 +991,7 @@ void xprt_put(struct rpc_xprt *xprt)
 {
 	kref_put(&xprt->kref, xprt_destroy);
 }
+EXPORT_SYMBOL(xprt_put);
 
 /**
  * xprt_get - return a reference to an RPC transport.
@@ -996,3 +1003,6 @@ struct rpc_xprt *xprt_get(struct rpc_xpr
 	kref_get(&xprt->kref);
 	return xprt;
 }
+EXPORT_SYMBOL(xprt_get);
+
+EXPORT_SYMBOL(xprt_disconnect);
diff -upr kernel-2.6.18-417.el5.orig/net/sunrpc/xprtsock.c kernel-2.6.18-417.el5-028stab121/net/sunrpc/xprtsock.c
--- kernel-2.6.18-417.el5.orig/net/sunrpc/xprtsock.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/sunrpc/xprtsock.c	2017-01-13 08:40:23.000000000 -0500
@@ -449,18 +449,23 @@ out_release:
  */
 static void xs_close(struct rpc_xprt *xprt)
 {
-	struct socket *sock = xprt->sock;
-	struct sock *sk = xprt->inet;
-
-	if (!sk)
-		goto clear_close_wait;
+	struct socket *sock;
+	struct sock *sk;
 
 	dprintk("RPC:      xs_close xprt %p\n", xprt);
 
-	write_lock_bh(&sk->sk_callback_lock);
-	xprt->inet = NULL;
+	spin_lock_bh(&xprt->transport_lock);
+	if (xprt->sock == NULL) {
+		spin_unlock_bh(&xprt->transport_lock);
+		goto clear_close_wait;
+	}
+	sock = xprt->sock;
+	sk = xprt->inet;
 	xprt->sock = NULL;
+	xprt->inet = NULL;
+	spin_unlock_bh(&xprt->transport_lock);
 
+	write_lock_bh(&sk->sk_callback_lock);
 	sk->sk_user_data = NULL;
 	sk->sk_data_ready = xprt->old_data_ready;
 	sk->sk_state_change = xprt->old_state_change;
@@ -1081,7 +1086,13 @@ static void xs_udp_connect_worker(void *
 	struct rpc_xprt *xprt = (struct rpc_xprt *) args;
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
+	struct ve_struct *ve, *old_ve;
 
+	ve = xprt->owner_env;
+	old_ve = set_exec_env(ve);
+	down_read(&ve->op_sem);
+	if (!ve->is_running)
+		goto out;
 	if (xprt->shutdown || xprt->addr.sin_port == 0)
 		goto out;
 
@@ -1128,6 +1139,8 @@ static void xs_udp_connect_worker(void *
 out:
 	xprt_wake_pending_tasks(xprt, status);
 	xprt_clear_connecting(xprt);
+	up_read(&ve->op_sem);
+	(void)set_exec_env(old_ve);
 }
 
 /*
@@ -1176,7 +1189,13 @@ static void xs_tcp_connect_worker(void *
 	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
+	struct ve_struct *ve, *old_ve;
 
+	ve = xprt->owner_env;
+	old_ve = set_exec_env(ve);
+	down_read(&ve->op_sem);
+	if (!ve->is_running)
+		goto out;
 	if (xprt->shutdown || xprt->addr.sin_port == 0)
 		goto out;
 
@@ -1256,6 +1275,8 @@ out:
 	xprt_wake_pending_tasks(xprt, status);
 out_clear:
 	xprt_clear_connecting(xprt);
+	up_read(&ve->op_sem);
+	(void)set_exec_env(old_ve);
 }
 
 /**
diff -upr kernel-2.6.18-417.el5.orig/net/tux/proto_http.c kernel-2.6.18-417.el5-028stab121/net/tux/proto_http.c
--- kernel-2.6.18-417.el5.orig/net/tux/proto_http.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/tux/proto_http.c	2017-01-13 08:40:20.000000000 -0500
@@ -1256,7 +1256,7 @@ static void send_ret_redirect (tux_req_t
 		size += req->host_len;
 	else {
 		down_read(&uts_sem);
-		uts_len = strlen(system_utsname.nodename);
+		uts_len = strlen(init_utsname()->nodename);
 		size += uts_len;
 	}
 	if (req->objectname[0] != '/')
@@ -1280,7 +1280,7 @@ static void send_ret_redirect (tux_req_t
 		memcpy(buf, req->host, req->host_len);
 		buf += req->host_len;
 	} else {
-		memcpy(buf, system_utsname.nodename, uts_len);
+		memcpy(buf, init_utsname()->nodename, uts_len);
 		up_read(&uts_sem);
 		buf += uts_len;
 	}
diff -upr kernel-2.6.18-417.el5.orig/net/unix/af_unix.c kernel-2.6.18-417.el5-028stab121/net/unix/af_unix.c
--- kernel-2.6.18-417.el5.orig/net/unix/af_unix.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/unix/af_unix.c	2017-01-13 08:40:41.000000000 -0500
@@ -117,6 +117,9 @@
 #include <net/checksum.h>
 #include <linux/security.h>
 
+#include <ub/ub_net.h>
+#include <ub/beancounter.h>
+
 int sysctl_unix_max_dgram_qlen = 10;
 
 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
@@ -252,6 +255,8 @@ static struct sock *__unix_find_socket_b
 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 		struct unix_sock *u = unix_sk(s);
 
+		if (!ve_accessible_strict(s->owner_env, get_exec_env()))
+			continue;
 		if (u->addr->len == len &&
 		    !memcmp(u->addr->name, sunname, len))
 			goto found;
@@ -275,24 +280,34 @@ static inline struct sock *unix_find_soc
 	return s;
 }
 
-static struct sock *unix_find_socket_byinode(struct inode *i)
+static inline struct sock *__unix_find_socket_byinode(struct inode *i, int check_listen)
 {
 	struct sock *s;
 	struct hlist_node *node;
 
-	spin_lock(&unix_table_lock);
 	sk_for_each(s, node,
 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 		struct dentry *dentry = unix_sk(s)->dentry;
 
+		if (check_listen && unix_sk(s)->sk.sk_state != TCP_LISTEN)
+			continue;
+
 		if(dentry && dentry->d_inode == i)
-		{
-			sock_hold(s);
 			goto found;
-		}
 	}
 	s = NULL;
 found:
+	return s;
+}
+
+static struct sock *unix_find_socket_byinode(struct inode *i)
+{
+	struct sock *s;
+
+	spin_lock(&unix_table_lock);
+	s = __unix_find_socket_byinode(i, 0);
+	if (s != NULL)
+		sock_hold(s);
 	spin_unlock(&unix_table_lock);
 	return s;
 }
@@ -456,7 +471,7 @@ static int unix_listen(struct socket *so
 	sk->sk_max_ack_backlog	= backlog;
 	sk->sk_state		= TCP_LISTEN;
 	/* set credentials so connect can copy them */
-	sk->sk_peercred.pid	= current->tgid;
+	sk->sk_peercred.pid	= virt_tgid(current);
 	sk->sk_peercred.uid	= current->euid;
 	sk->sk_peercred.gid	= current->egid;
 	err = 0;
@@ -578,6 +593,8 @@ static struct sock * unix_create1(struct
 	sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
 	if (!sk)
 		goto out;
+	if (ub_other_sock_charge(sk))
+		goto out_sk_free;
 
 	atomic_inc(&unix_nr_socks);
 
@@ -599,6 +616,9 @@ static struct sock * unix_create1(struct
 	unix_insert_socket(unix_sockets_unbound, sk);
 out:
 	return sk;
+out_sk_free:
+	sk_free(sk);
+	return NULL;
 }
 
 static int unix_create(struct socket *sock, int protocol)
@@ -744,6 +764,62 @@ fail:
 	return NULL;
 }
 
+int unix_attach_addr(struct sock *sk, struct sockaddr_un *sunaddr, int addr_len)
+{
+	int err;
+	unsigned hash;
+	struct unix_address *addr;
+
+	err = unix_mkname(sunaddr, addr_len, &hash);
+	if (err < 0)
+		return err;
+
+	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
+	if (addr == NULL)
+		return -ENOMEM;
+
+	memcpy(addr->name, sunaddr, addr_len);
+	addr->len = addr_len;
+	addr->hash = hash ^ sk->sk_type;
+	atomic_set(&addr->refcnt, 1);
+	unix_sk(sk)->addr = addr;
+
+	return 0;
+}
+EXPORT_SYMBOL(unix_attach_addr);
+
+int unix_bind_path(struct sock *sk, struct dentry *dentry, struct vfsmount *mnt)
+{
+	struct hlist_head *list;
+	struct unix_sock *u;
+
+	u = unix_sk(sk);
+	BUG_ON(u->addr == NULL);
+
+	spin_lock(&unix_table_lock);
+
+	if (sk->sk_state == TCP_LISTEN) {
+		if (__unix_find_socket_byinode(dentry->d_inode, 1)) {
+			spin_unlock(&unix_table_lock);
+			dput(dentry);
+			mntput(mnt);
+			return -EBUSY;
+		}
+	}
+
+	list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
+
+	u->dentry = dentry;
+	u->mnt = mnt;
+
+	__unix_remove_socket(sk);
+	__unix_insert_socket(list, sk);
+
+	spin_unlock(&unix_table_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(unix_bind_path);
 
 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
@@ -959,6 +1035,7 @@ static int unix_stream_connect(struct so
 	int st;
 	int err;
 	long timeo;
+	unsigned long chargesize;
 
 	err = unix_mkname(sunaddr, addr_len, &hash);
 	if (err < 0)
@@ -987,6 +1064,10 @@ static int unix_stream_connect(struct so
 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
 	if (skb == NULL)
 		goto out;
+	chargesize = skb_charge_fullsize(skb);
+	if (ub_sock_getwres_other(newsk, chargesize) < 0)
+		goto out;	
+	ub_skb_set_charge(skb, newsk, chargesize, UB_OTHERSOCKBUF);
 
 restart:
 	/*  Find listening sock. */
@@ -1072,7 +1153,7 @@ restart:
 	unix_peer(newsk)	= sk;
 	newsk->sk_state		= TCP_ESTABLISHED;
 	newsk->sk_type		= sk->sk_type;
-	newsk->sk_peercred.pid	= current->tgid;
+	newsk->sk_peercred.pid	= virt_tgid(current);
 	newsk->sk_peercred.uid	= current->euid;
 	newsk->sk_peercred.gid	= current->egid;
 	newu = unix_sk(newsk);
@@ -1133,7 +1214,7 @@ static int unix_socketpair(struct socket
 	sock_hold(skb);
 	unix_peer(ska)=skb;
 	unix_peer(skb)=ska;
-	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
+	ska->sk_peercred.pid = skb->sk_peercred.pid = virt_tgid(current);
 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
 
@@ -1237,7 +1318,7 @@ static void unix_detach_fds(struct scm_c
 		unix_notinflight(scm->fp->fp[i]);
 }
 
-static void unix_destruct_fds(struct sk_buff *skb)
+void unix_destruct_fds(struct sk_buff *skb)
 {
 	struct scm_cookie scm;
 	memset(&scm, 0, sizeof(scm));
@@ -1248,6 +1329,7 @@ static void unix_destruct_fds(struct sk_
 	scm_destroy(&scm);
 	sock_wfree(skb);
 }
+EXPORT_SYMBOL_GPL(unix_destruct_fds);
 
 #define MAX_RECURSION_LEVEL 4
 extern struct sock * unix_get_socket(struct file *filp);
@@ -1500,6 +1582,16 @@ static int unix_stream_sendmsg(struct ki
 
 		size = len-sent;
 
+		if (msg->msg_flags & MSG_DONTWAIT)
+			ub_sock_makewres_other(sk, skb_charge_size(size));
+		if (sock_bc(sk) != NULL) {
+			unsigned long res = sock_bc(sk)->poll_reserv;
+
+			if (res >= SOCK_MIN_UBCSPACE &&
+			    skb_charge_size(size) > res)
+				size = skb_charge_datalen(res);
+		}
+
 		/* Keep two messages in the pipe so it schedules better */
 		if (size > ((sk->sk_sndbuf >> 1) - 64))
 			size = (sk->sk_sndbuf >> 1) - 64;
@@ -1511,7 +1603,8 @@ static int unix_stream_sendmsg(struct ki
 		 *	Grab a buffer
 		 */
 		 
-		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
+		skb = sock_alloc_send_skb2(sk, size, SOCK_MIN_UBCSPACE,
+				msg->msg_flags&MSG_DONTWAIT, &err);
 
 		if (skb==NULL)
 			goto out_err;
@@ -1959,6 +2052,7 @@ static unsigned int unix_poll(struct fil
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask;
+	int no_ub_res;
 
 	sock_poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
@@ -1971,6 +2065,10 @@ static unsigned int unix_poll(struct fil
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP;
 
+	no_ub_res = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
+	if (no_ub_res)
+		ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
+
 	/* readable? */
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
 	    (sk->sk_shutdown & RCV_SHUTDOWN))
@@ -1984,7 +2082,7 @@ static unsigned int unix_poll(struct fil
 	 * we set writable also when the other side has shut down the
 	 * connection. This prevents stuck sockets.
 	 */
-	if (unix_writable(sk))
+	if (!no_ub_res && unix_writable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 
 	return mask;
@@ -2136,7 +2234,7 @@ static int __init af_unix_init(void)
 
 	sock_register(&unix_family_ops);
 #ifdef CONFIG_PROC_FS
-	proc_net_fops_create("unix", 0, &unix_seq_fops);
+	proc_glob_fops_create("net/unix", 0, &unix_seq_fops);
 #endif
 	unix_sysctl_register();
 out:
@@ -2147,7 +2245,7 @@ static void __exit af_unix_exit(void)
 {
 	sock_unregister(PF_UNIX);
 	unix_sysctl_unregister();
-	proc_net_remove("unix");
+	remove_proc_glob_entry("net/unix", NULL);
 	proto_unregister(&unix_proto);
 }
 
diff -upr kernel-2.6.18-417.el5.orig/net/unix/garbage.c kernel-2.6.18-417.el5-028stab121/net/unix/garbage.c
--- kernel-2.6.18-417.el5.orig/net/unix/garbage.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/unix/garbage.c	2017-01-13 08:40:24.000000000 -0500
@@ -81,6 +81,7 @@
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
 #include <linux/wait.h>
+#include <linux/module.h>
 
 #include <net/sock.h>
 #include <net/af_unix.h>
@@ -139,6 +140,7 @@ void unix_inflight(struct file *fp)
 		spin_unlock(&unix_gc_lock);
 	}
 }
+EXPORT_SYMBOL_GPL(unix_notinflight);
 
 void unix_notinflight(struct file *fp)
 {
diff -upr kernel-2.6.18-417.el5.orig/net/xfrm/xfrm_user.c kernel-2.6.18-417.el5-028stab121/net/xfrm/xfrm_user.c
--- kernel-2.6.18-417.el5.orig/net/xfrm/xfrm_user.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/net/xfrm/xfrm_user.c	2017-01-13 08:40:19.000000000 -0500
@@ -1558,7 +1558,7 @@ static int xfrm_user_rcv_msg(struct sk_b
 	link = &xfrm_dispatch[type];
 
 	/* All operations require privileges, even GET */
-	if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
+	if (security_netlink_recv(skb, CAP_VE_NET_ADMIN)) {
 		*errp = -EPERM;
 		return -1;
 	}
diff -upr kernel-2.6.18-417.el5.orig/scripts/mod/modpost.c kernel-2.6.18-417.el5-028stab121/scripts/mod/modpost.c
--- kernel-2.6.18-417.el5.orig/scripts/mod/modpost.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/scripts/mod/modpost.c	2017-01-13 08:40:16.000000000 -0500
@@ -1291,16 +1291,20 @@ static void add_header(struct buffer *b,
 /**
  * Record CRCs for unresolved symbols
  **/
-static void add_versions(struct buffer *b, struct module *mod)
+static int add_versions(struct buffer *b, struct module *mod)
 {
 	struct symbol *s, *exp;
-
+	int err;
+	
+	err = 0;
 	for (s = mod->unres; s; s = s->next) {
 		exp = find_symbol(s->name);
 		if (!exp || exp->module == mod) {
-			if (have_vmlinux && !s->weak)
+			if (have_vmlinux && !s->weak) {
 				warn("\"%s\" [%s.ko] undefined!\n",
 				     s->name, mod->name);
+				err = 1;
+			}
 			continue;
 		}
 		s->module = exp->module;
@@ -1309,7 +1313,7 @@ static void add_versions(struct buffer *
 	}
 
 	if (!modversions)
-		return;
+		return err;
 
 	buf_printf(b, "\n");
 	buf_printf(b, "static const struct modversion_info ____versions[]\n");
@@ -1329,6 +1333,7 @@ static void add_versions(struct buffer *
 	}
 
 	buf_printf(b, "};\n");
+	return err;
 }
 
 static void add_depends(struct buffer *b, struct module *mod,
@@ -1593,7 +1598,7 @@ int main(int argc, char **argv)
 	char *dump_write = NULL;
 	char *markers_read = NULL;
 	char *markers_write = NULL;
-	int opt;
+	int opt, err;
 
 	while ((opt = getopt(argc, argv, "i:I:mo:aM:K:")) != -1) {
 		switch(opt) {
@@ -1639,6 +1644,7 @@ int main(int argc, char **argv)
 		check_exports(mod);
 	}
 
+	err = 0;
 	for (mod = modules; mod; mod = mod->next) {
 		if (mod->skip)
 			continue;
@@ -1646,7 +1652,7 @@ int main(int argc, char **argv)
 		buf.pos = 0;
 
 		add_header(&buf, mod);
-		add_versions(&buf, mod);
+		err |= add_versions(&buf, mod);
 		add_depends(&buf, mod, modules);
 		add_moddevtable(&buf, mod);
 		add_srcversion(&buf, mod);
@@ -1664,5 +1670,5 @@ int main(int argc, char **argv)
 	if (markers_write)
 		write_markers(markers_write);
 
-	return 0;
+	return err;
 }
diff -upr kernel-2.6.18-417.el5.orig/security/commoncap.c kernel-2.6.18-417.el5-028stab121/security/commoncap.c
--- kernel-2.6.18-417.el5.orig/security/commoncap.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/security/commoncap.c	2017-01-13 08:40:40.000000000 -0500
@@ -34,6 +34,10 @@ EXPORT_SYMBOL(cap_netlink_send);
 
 int cap_netlink_recv(struct sk_buff *skb, int cap)
 {
+	if (likely(cap == CAP_VE_NET_ADMIN) &&
+			cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
+		return 0;
+
 	if (!cap_raised(NETLINK_CB(skb).eff_cap, cap))
 		return -EPERM;
 	return 0;
@@ -196,7 +200,7 @@ int cap_inode_setxattr(struct dentry *de
 {
 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -205,7 +209,7 @@ int cap_inode_removexattr(struct dentry 
 {
 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -311,10 +315,11 @@ void cap_task_reparent_to_init (struct t
 
 int cap_syslog (int type)
 {
-	if (dmesg_restrict && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
+	if (dmesg_restrict && !capable(CAP_SYS_ADMIN) &&
+		 ve_is_super(get_exec_env()))
+			return -EPERM;
 
-	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
+	if ((type != 3 && type != 10) && !capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	return 0;
 }
diff -upr kernel-2.6.18-417.el5.orig/security/Kconfig kernel-2.6.18-417.el5-028stab121/security/Kconfig
--- kernel-2.6.18-417.el5.orig/security/Kconfig	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/security/Kconfig	2017-01-13 08:40:41.000000000 -0500
@@ -4,6 +4,8 @@
 
 menu "Security options"
 
+source grsecurity/Kconfig
+
 config KEYS
 	bool "Enable access key retention support"
 	help
@@ -47,13 +49,13 @@ config SECURITY_DMESG_RESTRICT
 	  syslog via dmesg(8).
 
 	  If this option is not selected, no restrictions will be enforced
-	  unless the dmesg_restrict sysctl is explicitly set to (1).
+	  unless the dmesg_restrict sysctl is explicitly set to 1.
 
 	  If you are unsure how to answer this question, answer N.
 
 config SECURITY
 	bool "Enable different security models"
-	depends on SYSFS
+	depends on SYSFS && !VE
 	help
 	  This allows you to choose different security modules to be
 	  configured into your kernel.
diff -upr kernel-2.6.18-417.el5.orig/security/min_addr.c kernel-2.6.18-417.el5-028stab121/security/min_addr.c
--- kernel-2.6.18-417.el5.orig/security/min_addr.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/security/min_addr.c	2017-01-13 08:40:21.000000000 -0500
@@ -34,12 +34,21 @@ int mmap_min_addr_handler(struct ctl_tab
 {
 	int ret;
 
-	if (write && !capable(CAP_SYS_RAWIO))
-		return -EPERM;
+	if (!ve_is_super(get_exec_env())) {
+		if (write) {
+			if (!capable(CAP_VE_SYS_ADMIN))
+				return -EPERM;
+			return 0;
+		}
+	} else {
+		if (write && !capable(CAP_SYS_RAWIO))
+			return -EPERM;
+	}
 
 	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
 
-	update_mmap_min_addr();
+	if (write)
+		update_mmap_min_addr();
 
 	return ret;
 }
diff -upr kernel-2.6.18-417.el5.orig/security/selinux/hooks.c kernel-2.6.18-417.el5-028stab121/security/selinux/hooks.c
--- kernel-2.6.18-417.el5.orig/security/selinux/hooks.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/security/selinux/hooks.c	2017-01-13 08:40:40.000000000 -0500
@@ -1839,7 +1839,7 @@ static inline void flush_unauthorized_fi
 	mutex_lock(&tty_mutex);
 	tty = current->signal->tty;
 	if (tty) {
-		file_list_lock();
+		file_list_lock(&tty->tty_files);
 		if (!list_empty(&tty->tty_files)) {
 			struct inode *inode;
 
@@ -1848,7 +1848,7 @@ static inline void flush_unauthorized_fi
 			   than using file_has_perm, as this particular open
 			   file may belong to another process and we are only
 			   interested in the inode-based check here. */
-			file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list);
+			file = list_first_entry(&tty->tty_files.fl_list, struct file, f_u.fu_list);
 			inode = file->f_dentry->d_inode;
 			if (inode_has_perm(current, inode,
 					   FILE__READ | FILE__WRITE, NULL)) {
@@ -1857,7 +1857,7 @@ static inline void flush_unauthorized_fi
 				current->signal->tty_old_pgrp = 0;
 			}
 		}
-		file_list_unlock();
+		file_list_unlock(&tty->tty_files);
 	}
 	mutex_unlock(&tty_mutex);
 
@@ -4671,12 +4671,12 @@ static int selinux_setprocattr(struct ta
 			struct task_struct *g, *t;
 			struct mm_struct *mm = p->mm;
 			read_lock(&tasklist_lock);
-			do_each_thread(g, t)
+			do_each_thread_ve(g, t)
 				if (t->mm == mm && t != p) {
 					read_unlock(&tasklist_lock);
 					return -EPERM;
 				}
-			while_each_thread(g, t);
+			while_each_thread_ve(g, t);
 			read_unlock(&tasklist_lock);
                 }
 
diff -upr kernel-2.6.18-417.el5.orig/security/selinux/Kconfig kernel-2.6.18-417.el5-028stab121/security/selinux/Kconfig
--- kernel-2.6.18-417.el5.orig/security/selinux/Kconfig	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/security/selinux/Kconfig	2017-01-13 08:40:19.000000000 -0500
@@ -1,6 +1,6 @@
 config SECURITY_SELINUX
 	bool "NSA SELinux Support"
-	depends on SECURITY_NETWORK && AUDIT && NET && INET
+	depends on SECURITY_NETWORK && AUDIT && NET && INET && !VE
 	select NETWORK_SECMARK
 	default n
 	help
diff -upr kernel-2.6.18-417.el5.orig/security/selinux/selinuxfs.c kernel-2.6.18-417.el5-028stab121/security/selinux/selinuxfs.c
--- kernel-2.6.18-417.el5.orig/security/selinux/selinuxfs.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/security/selinux/selinuxfs.c	2017-01-13 08:40:40.000000000 -0500
@@ -943,8 +943,10 @@ static struct file_operations sel_commit
  * fs/proc/generic.c proc_kill_inodes */
 static void sel_remove_bools(struct dentry *de)
 {
-	struct list_head *p, *node;
+	struct list_head *node;
 	struct super_block *sb = de->d_sb;
+	struct file *filp;
+	int cpu;
 
 	spin_lock(&dcache_lock);
 	node = de->d_subdirs.next;
@@ -965,9 +967,8 @@ static void sel_remove_bools(struct dent
 
 	spin_unlock(&dcache_lock);
 
-	file_list_lock();
-	list_for_each(p, &sb->s_files) {
-		struct file * filp = list_entry(p, struct file, f_u.fu_list);
+	file_list_lock_sb(sb);
+	for_each_sb_file(filp, sb, cpu) {
 		struct dentry * dentry = filp->f_dentry;
 
 		if (dentry->d_parent != de) {
@@ -975,7 +976,7 @@ static void sel_remove_bools(struct dent
 		}
 		filp->f_op = NULL;
 	}
-	file_list_unlock();
+	file_list_unlock_sb(sb);
 }
 
 #define BOOL_DIR_NAME "booleans"
diff -upr kernel-2.6.18-417.el5.orig/sound/core/info.c kernel-2.6.18-417.el5-028stab121/sound/core/info.c
--- kernel-2.6.18-417.el5.orig/sound/core/info.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/sound/core/info.c	2017-01-13 08:40:22.000000000 -0500
@@ -545,7 +545,7 @@ int __init snd_info_init(void)
 {
 	struct proc_dir_entry *p;
 
-	p = snd_create_proc_entry("asound", S_IFDIR | S_IRUGO | S_IXUGO, &proc_root);
+	p = snd_create_proc_entry("asound", S_IFDIR | S_IRUGO | S_IXUGO, NULL);
 	if (p == NULL)
 		return -ENOMEM;
 	snd_proc_root = p;
diff -upr kernel-2.6.18-417.el5.orig/sound/core/info_oss.c kernel-2.6.18-417.el5-028stab121/sound/core/info_oss.c
--- kernel-2.6.18-417.el5.orig/sound/core/info_oss.c	2017-01-13 07:39:15.000000000 -0500
+++ kernel-2.6.18-417.el5-028stab121/sound/core/info_oss.c	2017-01-13 08:40:15.000000000 -0500
@@ -96,11 +96,11 @@ static void snd_sndstat_proc_read(struct
 {
 	snd_iprintf(buffer, "Sound Driver:3.8.1a-980706 (ALSA v" CONFIG_SND_VERSION " emulation code)\n");
 	snd_iprintf(buffer, "Kernel: %s %s %s %s %s\n",
-		    system_utsname.sysname,
-		    system_utsname.nodename,
-		    system_utsname.release,
-		    system_utsname.version,
-		    system_utsname.machine);
+		    init_utsname()->sysname,
+		    init_utsname()->nodename,
+		    init_utsname()->release,
+		    init_utsname()->version,
+		    init_utsname()->machine);
 	snd_iprintf(buffer, "Config options: 0\n");
 	snd_iprintf(buffer, "\nInstalled drivers: \n");
 	snd_iprintf(buffer, "Type 10: ALSA emulation\n");
