diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d5ed94d..ff1fea8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -15,6 +15,7 @@ config X86_64
 ### Arch settings
 config X86
 	def_bool y
+	select HAVE_IPIPE_HOSTRT if IPIPE
 	select HAVE_AOUT if X86_32
 	select HAVE_READQ
 	select HAVE_WRITEQ
@@ -155,7 +156,7 @@ config GENERIC_CALIBRATE_DELAY
 
 config GENERIC_TIME_VSYSCALL
 	bool
-	default X86_64
+	default X86_64 || IPIPE
 
 config ARCH_HAS_CPU_RELAX
 	def_bool y
@@ -507,6 +508,7 @@ config SCHED_OMIT_FRAME_POINTER
 
 menuconfig PARAVIRT_GUEST
 	bool "Paravirtualized guest support"
+ 	depends on !IPIPE
 	---help---
 	  Say Y here to get to see options related to running Linux under
 	  various hypervisors.  This option alone does not add any kernel code.
@@ -539,6 +541,7 @@ source "arch/x86/lguest/Kconfig"
 
 config PARAVIRT
 	bool "Enable paravirtualization code"
+ 	depends on !IPIPE
 	---help---
 	  This changes the kernel so it can modify itself when it is run
 	  under a hypervisor, potentially improving performance significantly
@@ -780,6 +783,8 @@ config IRQ_TIME_ACCOUNTING
 
 source "kernel/Kconfig.preempt"
 
+source "kernel/ipipe/Kconfig"
+
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors"
 	depends on X86_32 && !SMP && !X86_32_NON_STANDARD
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3c89694..0d9dd89 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -415,7 +415,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+#ifdef CONFIG_IPIPE
+#define ack_APIC_irq() do { } while(0)
+static inline void __ack_APIC_irq(void)
+#else /* !CONFIG_IPIPE */
+#define __ack_APIC_irq() ack_APIC_irq()
 static inline void ack_APIC_irq(void)
+#endif /* CONFIG_IPIPE */
 {
 	/*
 	 * ack_APIC_irq() actually gets compiled as a single instruction
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 8ac7695..a6a2239 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -152,6 +152,7 @@
 # define MAX_LOCAL_APIC 32768
 #endif
 
+#ifndef __ASSEMBLY__
 /*
  * All x86-64 systems are xAPIC compatible.
  * In the following, "apicid" is a physical APIC ID.
@@ -427,4 +428,7 @@ struct local_apic {
 #else
  #define BAD_APICID 0xFFFFu
 #endif
+
+#endif /* !__ASSEMBLY__ */
+
 #endif /* _ASM_X86_APICDEF_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 57650ab..f2292de 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -16,7 +16,11 @@ BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
 BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
 
+#ifdef CONFIG_IPIPE
+.irpc idx, "012"
+#else
 .irpc idx, "01234567"
+#endif
 BUILD_INTERRUPT3(invalidate_interrupt\idx,
 		 (INVALIDATE_TLB_VECTOR_START)+\idx,
 		 smp_invalidate_interrupt)
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 0274ec5..7b1216f 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -35,6 +35,13 @@ extern void spurious_interrupt(void);
 extern void thermal_interrupt(void);
 extern void reschedule_interrupt(void);
 extern void mce_self_interrupt(void);
+#ifdef CONFIG_IPIPE
+void ipipe_ipi0(void);
+void ipipe_ipi1(void);
+void ipipe_ipi2(void);
+void ipipe_ipi3(void);
+void ipipe_ipiX(void);
+#endif
 
 extern void invalidate_interrupt(void);
 extern void invalidate_interrupt0(void);
@@ -124,6 +131,7 @@ extern void smp_apic_timer_interrupt(struct pt_regs *);
 extern void smp_spurious_interrupt(struct pt_regs *);
 extern void smp_x86_platform_ipi(struct pt_regs *);
 extern void smp_error_interrupt(struct pt_regs *);
+extern void smp_perf_pending_interrupt(struct pt_regs *);
 #ifdef CONFIG_X86_IO_APIC
 extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
 #endif
@@ -136,6 +144,7 @@ extern void smp_invalidate_interrupt(struct pt_regs *);
 #else
 extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
 #endif
+extern asmlinkage void smp_reboot_interrupt(void);
 #endif
 
 extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index ef32890..e824edc 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -310,11 +310,14 @@ static inline void __clear_fpu(struct task_struct *tsk)
 static inline void kernel_fpu_begin(void)
 {
 	struct thread_info *me = current_thread_info();
+	unsigned long flags;
 	preempt_disable();
+	local_irq_save_hw_cond(flags);
 	if (me->status & TS_USEDFPU)
 		__save_init_fpu(me->task);
 	else
 		clts();
+	local_irq_restore_hw_cond(flags);
 }
 
 static inline void kernel_fpu_end(void)
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index a203659..637db3a 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask;
 #define SLAVE_ICW4_DEFAULT	0x01
 #define PIC_ICW4_AEOI		2
 
-extern raw_spinlock_t i8259A_lock;
+IPIPE_DECLARE_RAW_SPINLOCK(i8259A_lock);
 
 /* the PIC may need a careful delay on some platforms, hence specific calls */
 static inline unsigned char inb_pic(unsigned int port)
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 0b72282..6574056 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -68,6 +68,9 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest
 	 * to the APIC.
 	 */
 	unsigned int cfg;
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
 
 	/*
 	 * Wait for idle.
@@ -83,6 +86,8 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest
 	 * Send the IPI. The write to APIC_ICR fires this off.
 	 */
 	native_apic_mem_write(APIC_ICR, cfg);
+
+	local_irq_restore_hw(flags);
 }
 
 /*
diff --git a/arch/x86/include/asm/ipipe.h b/arch/x86/include/asm/ipipe.h
new file mode 100644
index 0000000..708a3d9
--- /dev/null
+++ b/arch/x86/include/asm/ipipe.h
@@ -0,0 +1,158 @@
+/*   -*- linux-c -*-
+ *   arch/x86/include/asm/ipipe.h
+ *
+ *   Copyright (C) 2007 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __X86_IPIPE_H
+#define __X86_IPIPE_H
+
+#ifdef CONFIG_IPIPE
+
+#ifndef IPIPE_ARCH_STRING
+#define IPIPE_ARCH_STRING	"2.11-03"
+#define IPIPE_MAJOR_NUMBER	2
+#define IPIPE_MINOR_NUMBER	11
+#define IPIPE_PATCH_NUMBER	3
+#endif
+
+DECLARE_PER_CPU(struct pt_regs, __ipipe_tick_regs);
+
+DECLARE_PER_CPU(unsigned long, __ipipe_cr2);
+
+static inline unsigned __ipipe_get_irq_vector(int irq)
+{
+#ifdef CONFIG_X86_IO_APIC
+	unsigned __ipipe_get_ioapic_irq_vector(int irq);
+	return __ipipe_get_ioapic_irq_vector(irq);
+#elif defined(CONFIG_X86_LOCAL_APIC)
+	return irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS ?
+		ipipe_apic_irq_vector(irq) : irq + IRQ0_VECTOR;
+#else
+	return irq + IRQ0_VECTOR;
+#endif
+}
+
+#ifdef CONFIG_X86_32
+# include "ipipe_32.h"
+#else
+# include "ipipe_64.h"
+#endif
+
+/*
+ * The logical processor id and the current Linux task are read from the PDA,
+ * so this is always safe, regardless of the underlying stack.
+ */
+#define ipipe_processor_id()	raw_smp_processor_id()
+#define ipipe_safe_current()	current
+
+#define prepare_arch_switch(next)		\
+do {						\
+	ipipe_schedule_notify(current, next);	\
+	local_irq_disable_hw();			\
+} while(0)
+
+#define task_hijacked(p)						\
+	({ int x = __ipipe_root_domain_p;				\
+	if (x) local_irq_enable_hw(); !x; })
+
+struct ipipe_domain;
+
+struct ipipe_sysinfo {
+	int sys_nr_cpus;	/* Number of CPUs on board */
+	int sys_hrtimer_irq;	/* hrtimer device IRQ */
+	u64 sys_hrtimer_freq;	/* hrtimer device frequency */
+	u64 sys_hrclock_freq;	/* hrclock device frequency */
+	u64 sys_cpu_freq;	/* CPU frequency (Hz) */
+};
+
+/* Private interface -- Internal use only */
+
+#define __ipipe_check_platform()	do { } while(0)
+#define __ipipe_init_platform()		do { } while(0)
+#define __ipipe_enable_irq(irq)		irq_to_desc(irq)->chip->enable(irq)
+#define __ipipe_disable_irq(irq)	irq_to_desc(irq)->chip->disable(irq)
+#define __ipipe_tick_irq		__ipipe_hrtimer_irq /* compat */
+
+#ifdef CONFIG_SMP
+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd);
+#else
+#define __ipipe_hook_critical_ipi(ipd) do { } while(0)
+#endif
+
+#define __ipipe_disable_irqdesc(ipd, irq)	do { } while(0)
+
+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq);
+
+void __ipipe_enable_pipeline(void);
+
+void __ipipe_do_critical_sync(unsigned irq, void *cookie);
+
+void __ipipe_serial_debug(const char *fmt, ...);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#define ipipe_update_tick_evtdev(evtdev)				\
+	do {								\
+		if (strcmp((evtdev)->name, "lapic") == 0)		\
+			__ipipe_hrtimer_irq =				\
+				ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR); \
+		else							\
+			__ipipe_hrtimer_irq = 0;			\
+	} while (0)
+#else
+#define ipipe_update_tick_evtdev(evtdev)				\
+	__ipipe_hrtimer_irq = 0
+#endif
+
+int __ipipe_check_lapic(void);
+
+int __ipipe_check_tickdev(const char *devname);
+
+#define __ipipe_syscall_watched_p(p, sc)	\
+	(ipipe_notifier_enabled_p(p) || (unsigned long)sc >= NR_syscalls)
+
+#define __ipipe_root_tick_p(regs)	((regs)->flags & X86_EFLAGS_IF)
+
+#else /* !CONFIG_IPIPE */
+
+#define ipipe_update_tick_evtdev(evtdev)	do { } while (0)
+#define task_hijacked(p)			0
+
+#endif /* CONFIG_IPIPE */
+
+#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE)
+#define __ipipe_move_root_irq(irq)					\
+	do {								\
+		if (irq < NR_IRQS) {					\
+			struct irq_desc *desc = irq_to_desc(irq);	\
+			struct irq_chip *chip = desc->irq_data.chip;	\
+			if (chip->irq_move)				\
+				chip->irq_move(&desc->irq_data);	\
+		}							\
+	} while (0)
+#else /* !(CONFIG_SMP && CONFIG_IPIPE) */
+#define __ipipe_move_root_irq(irq)	do { } while (0)
+#endif /* !(CONFIG_SMP && CONFIG_IPIPE) */
+
+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_LOCAL_APIC)
+void ipipe_init_vector_irq(int cpu);
+#else
+static inline void ipipe_init_vector_irq(int cpu) { }
+#endif
+
+#endif	/* !__X86_IPIPE_H */
diff --git a/arch/x86/include/asm/ipipe_32.h b/arch/x86/include/asm/ipipe_32.h
new file mode 100644
index 0000000..26be5e3
--- /dev/null
+++ b/arch/x86/include/asm/ipipe_32.h
@@ -0,0 +1,105 @@
+/*   -*- linux-c -*-
+ *   arch/x86/include/asm/ipipe_32.h
+ *
+ *   Copyright (C) 2002-2005 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __X86_IPIPE_32_H
+#define __X86_IPIPE_32_H
+
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/threads.h>
+#include <linux/ipipe_percpu.h>
+#include <asm/ptrace.h>
+
+#define ipipe_read_tsc(t)  __asm__ __volatile__("rdtsc" : "=A" (t))
+
+#define ipipe_tsc2ns(t) \
+({ \
+	unsigned long long delta = (t)*1000; \
+	do_div(delta, cpu_khz/1000+1); \
+	(unsigned long)delta; \
+})
+
+#define ipipe_tsc2us(t) \
+({ \
+    unsigned long long delta = (t); \
+    do_div(delta, cpu_khz/1000+1); \
+    (unsigned long)delta; \
+})
+
+/* Private interface -- Internal use only */
+
+extern unsigned int cpu_khz;
+extern int __ipipe_hrtimer_irq;
+#define __ipipe_cpu_freq	({ unsigned long long __freq = 1000ULL * cpu_khz; __freq; })
+#define __ipipe_hrtimer_freq	({ unsigned long long __freq = cpu_has_apic?__ipipe_cpu_freq:PIT_TICK_RATE; __freq; })
+#define __ipipe_hrclock_freq	({ unsigned long long __freq = cpu_has_tsc?__ipipe_cpu_freq:PIT_TICK_RATE; __freq; })
+
+int __ipipe_handle_irq(struct pt_regs *regs);
+
+static inline unsigned long __ipipe_ffnz(unsigned long ul)
+{
+      __asm__("bsrl %1, %0":"=r"(ul)
+      :	"r"(ul));
+	return ul;
+}
+
+struct irq_desc;
+
+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc);
+
+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc);
+
+static inline void __do_root_xirq(ipipe_irq_handler_t handler,
+				  unsigned int irq)
+{
+	struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs);
+
+	regs->orig_ax = ~__ipipe_get_irq_vector(irq);
+
+	__asm__ __volatile__("pushfl\n\t"
+			     "orl %[x86if],(%%esp)\n\t"
+			     "pushl %%cs\n\t"
+			     "pushl $__xirq_end\n\t"
+			     "pushl %%eax\n\t"
+			     "pushl %%gs\n\t"
+			     "pushl %%fs\n\t"
+			     "pushl %%es\n\t"
+			     "pushl %%ds\n\t"
+			     "pushl %%eax\n\t"
+			     "pushl %%ebp\n\t"
+			     "pushl %%edi\n\t"
+			     "pushl %%esi\n\t"
+			     "pushl %%edx\n\t"
+			     "pushl %%ecx\n\t"
+			     "pushl %%ebx\n\t"
+			     "movl  %2,%%eax\n\t"
+			     "call *%1\n\t"
+			     "jmp ret_from_intr\n\t"
+			     "__xirq_end: cli\n"
+			     : /* no output */
+			     : "a" (~irq), "r" (handler), "rm" (regs),
+			       [x86if] "i" (X86_EFLAGS_IF));
+}
+
+#define __ipipe_do_root_xirq(ipd, irq)			\
+	__do_root_xirq((ipd)->irqs[irq].handler, irq)
+
+#endif	/* !__X86_IPIPE_32_H */
diff --git a/arch/x86/include/asm/ipipe_64.h b/arch/x86/include/asm/ipipe_64.h
new file mode 100644
index 0000000..0fb92b4
--- /dev/null
+++ b/arch/x86/include/asm/ipipe_64.h
@@ -0,0 +1,116 @@
+/*   -*- linux-c -*-
+ *   arch/x86/include/asm/ipipe_64.h
+ *
+ *   Copyright (C) 2007 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __X86_IPIPE_64_H
+#define __X86_IPIPE_64_H
+
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/ipipe_percpu.h>
+#ifdef CONFIG_SMP
+#include <asm/mpspec.h>
+#include <linux/thread_info.h>
+#endif
+
+#define ipipe_read_tsc(t)  do {		\
+	unsigned int __a,__d;			\
+	asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
+	(t) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
+} while(0)
+
+extern unsigned int cpu_khz;
+extern int __ipipe_hrtimer_irq;
+#define __ipipe_cpu_freq	({ unsigned long long __freq = (1000ULL * cpu_khz); __freq; })
+#define __ipipe_hrtimer_freq	__ipipe_cpu_freq
+#define __ipipe_hrclock_freq	__ipipe_cpu_freq
+
+#define ipipe_tsc2ns(t)	(((t) * 1000UL) / (__ipipe_cpu_freq / 1000000UL))
+#define ipipe_tsc2us(t)	((t) / (__ipipe_cpu_freq / 1000000UL))
+
+/* Private interface -- Internal use only */
+
+int __ipipe_handle_irq(struct pt_regs *regs);
+
+static inline unsigned long __ipipe_ffnz(unsigned long ul)
+{
+      __asm__("bsrq %1, %0":"=r"(ul)
+	      :	"rm"(ul));
+      return ul;
+}
+
+struct irq_desc;
+
+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc);
+
+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc);
+
+static inline void __do_root_xirq(ipipe_irq_handler_t handler,
+				  unsigned int irq)
+{
+	struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs);
+
+	regs->orig_ax = ~__ipipe_get_irq_vector(irq);
+
+	__asm__ __volatile__("movq  %%rsp, %%rax\n\t"
+			     "pushq $0\n\t"
+			     "pushq %%rax\n\t"
+			     "pushfq\n\t"
+			     "orq %[x86if],(%%rsp)\n\t"
+			     "pushq %[kernel_cs]\n\t"
+			     "pushq $__xirq_end\n\t"
+			     "pushq %[vector]\n\t"
+			     "subq  $9*8,%%rsp\n\t"
+			     "movq  %%rdi,8*8(%%rsp)\n\t"
+			     "movq  %%rsi,7*8(%%rsp)\n\t"
+			     "movq  %%rdx,6*8(%%rsp)\n\t"
+			     "movq  %%rcx,5*8(%%rsp)\n\t"
+			     "movq  %%rax,4*8(%%rsp)\n\t"
+			     "movq  %%r8,3*8(%%rsp)\n\t"
+			     "movq  %%r9,2*8(%%rsp)\n\t"
+			     "movq  %%r10,1*8(%%rsp)\n\t"
+			     "movq  %%r11,(%%rsp)\n\t"
+			     "call  *%[handler]\n\t"
+			     "cli\n\t"
+			     "jmp exit_intr\n\t"
+			     "__xirq_end: cli\n"
+			     : /* no output */
+			     : [kernel_cs] "i" (__KERNEL_CS),
+			       [vector] "rm" (regs->orig_ax),
+			       [handler] "r" (handler), "D" (regs),
+			       [x86if] "i" (X86_EFLAGS_IF)
+			     : "rax");
+}
+
+#define __ipipe_do_root_xirq(ipd, irq)			\
+	__do_root_xirq((ipd)->irqs[irq].handler, irq)
+
+#ifdef CONFIG_PREEMPT
+#define __ipipe_check_root_resched()			\
+	(preempt_count() == 0 && need_resched() &&	\
+	 per_cpu(irq_count, ipipe_processor_id()) < 0)
+#else
+#define __ipipe_check_root_resched()	0
+#endif
+
+#endif	/* !__X86_IPIPE_64_H */
diff --git a/arch/x86/include/asm/ipipe_base.h b/arch/x86/include/asm/ipipe_base.h
new file mode 100644
index 0000000..80c7289
--- /dev/null
+++ b/arch/x86/include/asm/ipipe_base.h
@@ -0,0 +1,216 @@
+/*   -*- linux-c -*-
+ *   arch/x86/include/asm/ipipe_base.h
+ *
+ *   Copyright (C) 2007-2009 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __X86_IPIPE_BASE_H
+#define __X86_IPIPE_BASE_H
+
+#include <linux/threads.h>
+#include <asm/apicdef.h>
+#include <asm/irq_vectors.h>
+#include <asm/bitsperlong.h>
+
+#ifdef CONFIG_X86_32
+#define IPIPE_NR_FAULTS		33 /* 32 from IDT + iret_error */
+#else
+#define IPIPE_NR_FAULTS		32
+#endif
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+/*
+ * System interrupts are mapped beyond the last defined external IRQ
+ * number.
+ */
+#define IPIPE_NR_XIRQS		(NR_IRQS + 32)
+#define IPIPE_FIRST_APIC_IRQ	NR_IRQS
+#define IPIPE_SERVICE_VECTOR0	(INVALIDATE_TLB_VECTOR_END + 1)
+#define IPIPE_SERVICE_IPI0	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0)
+#define IPIPE_SERVICE_VECTOR1	(INVALIDATE_TLB_VECTOR_END + 2)
+#define IPIPE_SERVICE_IPI1	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1)
+#define IPIPE_SERVICE_VECTOR2	(INVALIDATE_TLB_VECTOR_END + 3)
+#define IPIPE_SERVICE_IPI2	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2)
+#define IPIPE_SERVICE_VECTOR3	(INVALIDATE_TLB_VECTOR_END + 4)
+#define IPIPE_SERVICE_IPI3	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3)
+#ifdef CONFIG_SMP
+#define IPIPE_CRITICAL_VECTOR	(INVALIDATE_TLB_VECTOR_END + 5)
+#define IPIPE_CRITICAL_IPI	ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR)
+#endif
+#define ipipe_apic_irq_vector(irq)  ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR)
+#define ipipe_apic_vector_irq(vec)  ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ)
+#else /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */
+#define IPIPE_NR_XIRQS		NR_IRQS
+#endif /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */
+
+#define ipipe_ipi_p(ipi) \
+	(ipi >= IPIPE_SERVICE_IPI0 && ipi <= IPIPE_SERVICE_IPI3)
+
+/* Pseudo-vectors used for kernel events */
+#define IPIPE_FIRST_EVENT	IPIPE_NR_FAULTS
+#define IPIPE_EVENT_SYSCALL	(IPIPE_FIRST_EVENT)
+#define IPIPE_EVENT_SCHEDULE	(IPIPE_FIRST_EVENT + 1)
+#define IPIPE_EVENT_SIGWAKE	(IPIPE_FIRST_EVENT + 2)
+#define IPIPE_EVENT_SETSCHED	(IPIPE_FIRST_EVENT + 3)
+#define IPIPE_EVENT_INIT	(IPIPE_FIRST_EVENT + 4)
+#define IPIPE_EVENT_EXIT	(IPIPE_FIRST_EVENT + 5)
+#define IPIPE_EVENT_CLEANUP	(IPIPE_FIRST_EVENT + 6)
+#define IPIPE_EVENT_RETURN	(IPIPE_FIRST_EVENT + 7)
+#define IPIPE_EVENT_HOSTRT	(IPIPE_FIRST_EVENT + 8)
+#define IPIPE_LAST_EVENT	IPIPE_EVENT_HOSTRT
+#define IPIPE_NR_EVENTS		(IPIPE_LAST_EVENT + 1)
+
+#define ex_do_divide_error			0
+#define ex_do_debug				1
+/* NMI not pipelined. */
+#define ex_do_int3				3
+#define ex_do_overflow				4
+#define ex_do_bounds				5
+#define ex_do_invalid_op			6
+#define ex_do_device_not_available		7
+/* Double fault not pipelined. */
+#define ex_do_coprocessor_segment_overrun	9
+#define ex_do_invalid_TSS			10
+#define ex_do_segment_not_present		11
+#define ex_do_stack_segment			12
+#define ex_do_general_protection		13
+#define ex_do_page_fault			14
+#define ex_do_spurious_interrupt_bug		15
+#define ex_do_coprocessor_error			16
+#define ex_do_alignment_check			17
+#define ex_machine_check_vector			18
+#define ex_reserved				ex_machine_check_vector
+#define ex_do_simd_coprocessor_error		19
+#define ex_do_iret_error			32
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
+
+#include <asm/alternative.h>
+
+#ifdef CONFIG_X86_32
+#define GET_ROOT_STATUS_ADDR					\
+	"pushfl; cli;"						\
+	"movl %%fs:this_cpu_off, %%eax;"			\
+	"lea ipipe_percpu_darray(%%eax), %%eax;"
+#define PUT_ROOT_STATUS_ADDR	"popfl;"
+#define TEST_AND_SET_ROOT_STATUS \
+	"btsl $0,(%%eax);"
+#define TEST_ROOT_STATUS \
+	"btl $0,(%%eax);"
+#define ROOT_TEST_CLOBBER_LIST  "eax"
+#else /* CONFIG_X86_64 */
+#define GET_ROOT_STATUS_ADDR					\
+	"pushfq; cli;"						\
+	"movq %%gs:this_cpu_off, %%rax;"		\
+	"lea ipipe_percpu_darray(%%rax), %%rax;"
+#define PUT_ROOT_STATUS_ADDR	"popfq;"
+#define TEST_AND_SET_ROOT_STATUS \
+	"btsl $0,(%%rax);"
+#define TEST_ROOT_STATUS \
+	"btl $0,(%%rax);"
+#define ROOT_TEST_CLOBBER_LIST  "rax"
+#endif /* CONFIG_X86_64 */
+
+static inline void __ipipe_stall_root(void)
+{
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     TEST_AND_SET_ROOT_STATUS
+			     PUT_ROOT_STATUS_ADDR
+			     : : : ROOT_TEST_CLOBBER_LIST, "memory");
+}
+
+static inline unsigned long __ipipe_test_and_stall_root(void)
+{
+	int oldbit;
+
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     TEST_AND_SET_ROOT_STATUS
+			     "sbbl %0,%0;"
+			     PUT_ROOT_STATUS_ADDR
+			     :"=r" (oldbit)
+			     : : ROOT_TEST_CLOBBER_LIST, "memory");
+	return oldbit;
+}
+
+static inline unsigned long __ipipe_test_root(void)
+{
+	int oldbit;
+
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     TEST_ROOT_STATUS
+			     "sbbl %0,%0;"
+			     PUT_ROOT_STATUS_ADDR
+			     :"=r" (oldbit)
+			     : : ROOT_TEST_CLOBBER_LIST);
+	return oldbit;
+}
+
+#else /* !CONFIG_SMP */
+
+#if __GNUC__ >= 4
+/* Alias to ipipe_root_cpudom_var(status) */
+extern unsigned long __ipipe_root_status;
+#else
+extern unsigned long *const __ipipe_root_status_addr;
+#define __ipipe_root_status	(*__ipipe_root_status_addr)
+#endif
+
+static inline void __ipipe_stall_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	__asm__ __volatile__("btsl $0,%0;"
+			     :"+m" (*p) : : "memory");
+}
+
+static inline unsigned long __ipipe_test_and_stall_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	int oldbit;
+
+	__asm__ __volatile__("btsl $0,%1;"
+			     "sbbl %0,%0;"
+			     :"=r" (oldbit), "+m" (*p)
+			     : : "memory");
+	return oldbit;
+}
+
+static inline unsigned long __ipipe_test_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	int oldbit;
+
+	__asm__ __volatile__("btl $0,%1;"
+			     "sbbl %0,%0;"
+			     :"=r" (oldbit)
+			     :"m" (*p));
+	return oldbit;
+}
+
+#endif /* !CONFIG_SMP */
+
+void __ipipe_halt_root(void);
+
+void __ipipe_serial_debug(const char *fmt, ...);
+
+#endif	/* !__ASSEMBLY__ */
+
+#define __IPIPE_FEATURE_SYSINFO_V2	1
+
+#endif	/* !__X86_IPIPE_BASE_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6af0894..9c9f5f3 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -96,10 +96,17 @@
 #define THRESHOLD_APIC_VECTOR		0xf9
 #define REBOOT_VECTOR			0xf8
 
+#ifdef CONFIG_IPIPE
+/* f0-f2 used for TLB flush, f3-f7 reserved for the I-pipe */
+#define INVALIDATE_TLB_VECTOR_END	0xf2
+#define INVALIDATE_TLB_VECTOR_START	0xf0
+#define NUM_INVALIDATE_TLB_VECTORS	3
+#else /* !CONFIG_IPIPE */
 /* f0-f7 used for spreading out TLB flushes: */
 #define INVALIDATE_TLB_VECTOR_END	0xf7
 #define INVALIDATE_TLB_VECTOR_START	0xf0
 #define NUM_INVALIDATE_TLB_VECTORS	   8
+#endif
 
 /*
  * Local APIC timer IRQ vector is on a different priority level,
@@ -120,6 +127,9 @@
 
 #define UV_BAU_MESSAGE			0xea
 
+/* I-pipe: Lowest number of vectors above */
+#define FIRST_SYSTEM_VECTOR		0xea
+
 /*
  * Self IPI vector for machine checks
  */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 5745ce8..fe9c928 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -4,6 +4,11 @@
 #include <asm/processor-flags.h>
 
 #ifndef __ASSEMBLY__
+
+#include <linux/ipipe_base.h>
+#include <linux/ipipe_trace.h>
+#include <linux/compiler.h>
+
 /*
  * Interrupt control:
  */
@@ -54,6 +59,13 @@ static inline void native_halt(void)
 	asm volatile("hlt": : :"memory");
 }
 
+static inline int native_irqs_disabled(void)
+{
+	unsigned long flags = native_save_fl();
+
+	return !(flags & X86_EFLAGS_IF);
+}
+
 #endif
 
 #ifdef CONFIG_PARAVIRT
@@ -63,22 +75,46 @@ static inline void native_halt(void)
 
 static inline unsigned long arch_local_save_flags(void)
 {
+#ifdef CONFIG_IPIPE
+ 	unsigned long flags;
+
+	flags = (!__ipipe_test_root()) << 9;
+	barrier();
+	return flags;
+#else
 	return native_save_fl();
+#endif
 }
 
 static inline void arch_local_irq_restore(unsigned long flags)
 {
+#ifdef CONFIG_IPIPE
+	barrier();
+	__ipipe_restore_root(!(flags & X86_EFLAGS_IF));
+#else
 	native_restore_fl(flags);
+#endif
 }
 
 static inline void arch_local_irq_disable(void)
 {
+#ifdef CONFIG_IPIPE
+	ipipe_check_context(ipipe_root_domain);
+	__ipipe_stall_root();
+	barrier();
+#else
 	native_irq_disable();
+#endif
 }
 
 static inline void arch_local_irq_enable(void)
 {
+#ifdef CONFIG_IPIPE
+	barrier();
+	__ipipe_unstall_root();
+#else
 	native_irq_enable();
+#endif
 }
 
 /*
@@ -87,7 +123,12 @@ static inline void arch_local_irq_enable(void)
  */
 static inline void arch_safe_halt(void)
 {
+#ifdef CONFIG_IPIPE
+	barrier();
+	__ipipe_halt_root();
+#else
 	native_safe_halt();
+#endif
 }
 
 /*
@@ -99,6 +140,20 @@ static inline void halt(void)
 	native_halt();
 }
 
+/* Merge virtual+real interrupt mask bits into a single word. */
+static inline unsigned long arch_mangle_irq_bits(int virt, unsigned long real)
+{
+	return (real & ~(1L << 31)) | ((unsigned long)(virt != 0) << 31);
+}
+
+/* Converse operation of arch_mangle_irq_bits() */
+static inline int arch_demangle_irq_bits(unsigned long *x)
+{
+	int virt = (*x & (1L << 31)) != 0;
+	*x &= ~(1L << 31);
+	return virt;
+}
+
 /*
  * For spinlocks, etc:
  */
@@ -113,6 +168,14 @@ static inline unsigned long arch_local_irq_save(void)
 #define ENABLE_INTERRUPTS(x)	sti
 #define DISABLE_INTERRUPTS(x)	cli
 
+#ifdef CONFIG_IPIPE
+#define ENABLE_INTERRUPTS_HW_COND	sti
+#define DISABLE_INTERRUPTS_HW_COND	cli
+#else /* !CONFIG_IPIPE */
+#define ENABLE_INTERRUPTS_HW_COND
+#define DISABLE_INTERRUPTS_HW_COND
+#endif /* !CONFIG_IPIPE */
+
 #ifdef CONFIG_X86_64
 #define SWAPGS	swapgs
 /*
@@ -163,6 +226,80 @@ static inline int arch_irqs_disabled(void)
 	return arch_irqs_disabled_flags(flags);
 }
 
+/*
+ * FIXME: we should really align on native_* at some point, instead of
+ * introducing yet another layer (i.e. *_hw()).
+ */
+#define local_irq_save_hw_notrace(flags)	\
+	do {					\
+		(flags) = native_save_fl();	\
+		native_irq_disable();		\
+	} while (0)
+
+static inline void local_irq_restore_hw_notrace(unsigned long flags)
+{
+	native_restore_fl(flags);
+}
+
+static inline void local_irq_disable_hw_notrace(void)
+{
+	native_irq_disable();
+}
+
+static inline void local_irq_enable_hw_notrace(void)
+{
+	native_irq_enable();
+}
+
+static inline int irqs_disabled_hw(void)
+{
+	return native_irqs_disabled();
+}
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+
+#define local_irq_disable_hw() do {			\
+		if (!native_irqs_disabled()) {		\
+			native_irq_disable();		\
+			ipipe_trace_begin(0x80000000);	\
+		}					\
+	} while (0)
+
+#define local_irq_enable_hw() do {			\
+		if (native_irqs_disabled()) {		\
+			ipipe_trace_end(0x80000000);	\
+			native_irq_enable();		\
+		}					\
+	} while (0)
+
+#define local_irq_save_hw(flags) do {			\
+		(flags) = native_save_fl();		\
+		if ((flags) & X86_EFLAGS_IF) {		\
+			native_irq_disable();		\
+			ipipe_trace_begin(0x80000001);	\
+		}					\
+	} while (0)
+
+#define local_irq_restore_hw(flags) do {		\
+		if ((flags) & X86_EFLAGS_IF)		\
+			ipipe_trace_end(0x80000001);	\
+		native_restore_fl(flags);		\
+	} while (0)
+
+#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */
+
+#define local_irq_save_hw(flags)	local_irq_save_hw_notrace(flags)
+#define local_irq_restore_hw(flags)	local_irq_restore_hw_notrace(flags)
+#define local_irq_enable_hw()		local_irq_enable_hw_notrace()
+#define local_irq_disable_hw()		local_irq_disable_hw_notrace()
+
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+
+#define local_save_flags_hw(flags)			\
+	do {						\
+		(flags) = native_save_fl();		\
+	} while (0)
+
 #else
 
 #ifdef CONFIG_X86_64
@@ -181,7 +318,10 @@ static inline int arch_irqs_disabled(void)
 	pushl %eax;				\
 	pushl %ecx;				\
 	pushl %edx;				\
+	pushfl;					\
+	sti;					\
 	call lockdep_sys_exit;			\
+	popfl;					\
 	popl %edx;				\
 	popl %ecx;				\
 	popl %eax;
@@ -190,8 +330,38 @@ static inline int arch_irqs_disabled(void)
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+# ifdef CONFIG_IPIPE
+#  ifdef CONFIG_X86_64
+#   define TRACE_IRQS_ON			\
+	call trace_hardirqs_on_thunk;		\
+	pushq %rax;				\
+	PER_CPU(ipipe_percpu_darray, %rax);	\
+	btrl $0,(%rax);				\
+	popq %rax
+#   define TRACE_IRQS_OFF			\
+	pushq %rax;				\
+	PER_CPU(ipipe_percpu_darray, %rax);	\
+	btsl $0,(%rax);				\
+	popq %rax;				\
+	call trace_hardirqs_off_thunk
+#  else /* CONFIG_X86_32 */
+#   define TRACE_IRQS_ON			\
+	call trace_hardirqs_on_thunk;		\
+	pushl %eax;				\
+	PER_CPU(ipipe_percpu_darray, %eax);	\
+	btrl $0,(%eax);				\
+	popl %eax
+#   define TRACE_IRQS_OFF			\
+	pushl %eax;				\
+	PER_CPU(ipipe_percpu_darray, %eax);	\
+	btsl $0,(%eax);				\
+	popl %eax;				\
+	call trace_hardirqs_off_thunk
+#  endif /* CONFIG_X86_32 */
+# else /* !CONFIG_IPIPE */
 #  define TRACE_IRQS_ON		call trace_hardirqs_on_thunk;
 #  define TRACE_IRQS_OFF	call trace_hardirqs_off_thunk;
+# endif /* !CONFIG_IPIPE */
 #else
 #  define TRACE_IRQS_ON
 #  define TRACE_IRQS_OFF
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8b5393e..25b3411 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -30,11 +30,14 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 #endif
 }
 
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
-			     struct task_struct *tsk)
+static inline void __switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			       struct task_struct *tsk)
 {
 	unsigned cpu = smp_processor_id();
 
+#ifdef CONFIG_IPIPE_DEBUG_INTERNAL
+	WARN_ON_ONCE(!irqs_disabled_hw());
+#endif
 	if (likely(prev != next)) {
 #ifdef CONFIG_SMP
 		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
@@ -71,10 +74,23 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 #endif
 }
 
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	unsigned long flags;
+	local_irq_save_hw_cond(flags);
+	__switch_mm(prev, next, tsk);
+	local_irq_restore_hw_cond(flags);
+}
+
+#define ipipe_mm_switch_protect(flags)	local_irq_save_hw_cond(flags)
+#define ipipe_mm_switch_unprotect(flags) \
+	local_irq_restore_hw_cond(flags)
+
 #define activate_mm(prev, next)			\
 do {						\
 	paravirt_activate_mm((prev), (next));	\
-	switch_mm((prev), (next), NULL);	\
+	__switch_mm((prev), (next), NULL);	\
 } while (0);
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 7639dbf..e28697e 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,7 +1,11 @@
 #ifndef _ASM_X86_PAGE_64_DEFS_H
 #define _ASM_X86_PAGE_64_DEFS_H
 
+#ifdef CONFIG_IPIPE
+#define THREAD_ORDER	2
+#else
 #define THREAD_ORDER	1
+#endif
 #define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
 #define CURRENT_MASK (~(THREAD_SIZE - 1))
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 45636ce..b53f573 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -439,6 +439,7 @@ struct thread_struct {
 	unsigned short		ds;
 	unsigned short		fsindex;
 	unsigned short		gsindex;
+ 	unsigned long		rip;
 #endif
 #ifdef CONFIG_X86_32
 	unsigned long		ip;
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 33ecc3e..ee98176 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -127,8 +127,12 @@ do {									\
 #define switch_to(prev, next, last) \
 	asm volatile(SAVE_CONTEXT					  \
 	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
+ 	     "movq $thread_return,%P[threadrip](%[prev])\n\t" /* save RIP */	  \
 	     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
-	     "call __switch_to\n\t"					  \
+ 	     "pushq %P[threadrip](%[next])\n\t" /* restore RIP */	  \
+ 	     "jmp __switch_to\n\t"					  \
+  	     ".globl thread_return\n\t"					  \
+	     "thread_return:\n\t"					  \
 	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
 	     __switch_canary						  \
 	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
@@ -140,6 +144,7 @@ do {									\
 	       __switch_canary_oparam					  \
 	     : [next] "S" (next), [prev] "D" (prev),			  \
 	       [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
+	       [threadrip] "i" (offsetof(struct task_struct, thread.rip)), \
 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
 	       [_tif_fork] "i" (_TIF_FORK),			  	  \
 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
@@ -307,8 +312,13 @@ static inline void native_wbinvd(void)
 #else
 #define read_cr0()	(native_read_cr0())
 #define write_cr0(x)	(native_write_cr0(x))
+#ifdef CONFIG_IPIPE
+#define read_cr2()	__raw_get_cpu_var(__ipipe_cr2)
+#define write_cr2(x)	__raw_get_cpu_var(__ipipe_cr2) = (x)
+#else /* !CONFIG_IPIPE */
 #define read_cr2()	(native_read_cr2())
 #define write_cr2(x)	(native_write_cr2(x))
+#endif /* !CONFIG_IPIPE */
 #define read_cr3()	(native_read_cr3())
 #define write_cr3(x)	(native_write_cr3(x))
 #define read_cr4()	(native_read_cr4())
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 0310da6..248f58c 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -83,8 +83,8 @@ extern int panic_on_unrecovered_nmi;
 void math_error(struct pt_regs *, int, int);
 void math_emulate(struct math_emu_info *);
 #ifndef CONFIG_X86_32
-asmlinkage void smp_thermal_interrupt(void);
 asmlinkage void mce_threshold_interrupt(void);
 #endif
+asmlinkage void smp_thermal_interrupt(void);
 
 #endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 1ca132f..f8b9a98 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -14,6 +14,7 @@
  */
 typedef unsigned long long cycles_t;
 
+extern struct clocksource clocksource_tsc;
 extern unsigned int cpu_khz;
 extern unsigned int tsc_khz;
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34244b2..ca78912 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_MCA)		+= mca_32.o
 obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
+obj-$(CONFIG_IPIPE)		+= ipipe.o
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
 obj-$(CONFIG_SMP)		+= smp.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 76b96d7..65ca733 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -475,7 +475,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
 		return;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 
 	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
@@ -495,7 +495,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 		break;
 	}
 
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 /*
@@ -1007,7 +1007,7 @@ void lapic_shutdown(void)
 	if (!cpu_has_apic && !apic_from_smp_config())
 		return;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 
 #ifdef CONFIG_X86_32
 	if (!enabled_via_apicbase)
@@ -1017,7 +1017,12 @@ void lapic_shutdown(void)
 		disable_local_APIC();
 
 
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
+}
+
+int __ipipe_check_lapic(void)
+{
+	return !(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY);
 }
 
 /*
@@ -1265,7 +1270,7 @@ void __cpuinit setup_local_APIC(void)
 			value = apic_read(APIC_ISR + i*0x10);
 			for (j = 31; j >= 0; j--) {
 				if (value & (1<<j)) {
-					ack_APIC_irq();
+					__ack_APIC_irq();
 					acked++;
 				}
 			}
@@ -1795,7 +1800,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 	 */
 	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
 	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-		ack_APIC_irq();
+		__ack_APIC_irq();
 
 	inc_irq_stat(irq_spurious_count);
 
@@ -2064,13 +2069,13 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
 #endif
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	disable_local_APIC();
 
 	if (intr_remapping_enabled)
 		disable_intr_remapping();
 
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 	return 0;
 }
 
@@ -2085,7 +2090,7 @@ static int lapic_resume(struct sys_device *dev)
 	if (!apic_pm_state.active)
 		return 0;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	if (intr_remapping_enabled) {
 		ioapic_entries = alloc_ioapic_entries();
 		if (!ioapic_entries) {
@@ -2151,7 +2156,7 @@ static int lapic_resume(struct sys_device *dev)
 		free_ioapic_entries(ioapic_entries);
 	}
 restore:
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 
 	return ret;
 }
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 09d3b17..0820b62 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -72,9 +72,9 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
 {
 	unsigned long flags;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e437778..2c0f203 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -73,8 +73,8 @@
  */
 int sis_apic_bug = -1;
 
-static DEFINE_RAW_SPINLOCK(ioapic_lock);
-static DEFINE_RAW_SPINLOCK(vector_lock);
+static IPIPE_DEFINE_RAW_SPINLOCK(ioapic_lock);
+static IPIPE_DEFINE_RAW_SPINLOCK(vector_lock);
 
 /*
  * # of IRQ routing registers
@@ -329,6 +329,8 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 	writel(value, &io_apic->data);
 }
 
+#if !defined(CONFIG_IPIPE) || defined(CONFIG_SMP)
+
 static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
 {
 	struct irq_pin_list *entry;
@@ -352,6 +354,8 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
 	return false;
 }
 
+#endif /* !CONFIG_IPIPE || CONFIG_SMP */
+
 union entry_union {
 	struct { u32 w1, w2; };
 	struct IO_APIC_route_entry entry;
@@ -515,18 +519,24 @@ static void io_apic_sync(struct irq_pin_list *entry)
 	readl(&io_apic->data);
 }
 
-static void mask_ioapic(struct irq_cfg *cfg)
+static inline void __mask_ioapic(struct irq_cfg *cfg)
+{
+	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+}
+
+static void mask_ioapic(unsigned int irq, struct irq_cfg *cfg)
 {
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+  	ipipe_irq_lock(irq);
+	__mask_ioapic(cfg);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void mask_ioapic_irq(struct irq_data *data)
 {
-	mask_ioapic(data->chip_data);
+	mask_ioapic(data->irq, data->chip_data);
 }
 
 static void __unmask_ioapic(struct irq_cfg *cfg)
@@ -534,18 +544,19 @@ static void __unmask_ioapic(struct irq_cfg *cfg)
 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
 }
 
-static void unmask_ioapic(struct irq_cfg *cfg)
+static void unmask_ioapic(unsigned int irq, struct irq_cfg *cfg)
 {
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
 	__unmask_ioapic(cfg);
+	ipipe_irq_unlock(irq);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void unmask_ioapic_irq(struct irq_data *data)
 {
-	unmask_ioapic(data->chip_data);
+	unmask_ioapic(data->irq, data->chip_data);
 }
 
 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
@@ -1214,6 +1225,7 @@ void __setup_vector_irq(int cpu)
 		if (!cpumask_test_cpu(cpu, cfg->domain))
 			per_cpu(vector_irq, cpu)[vector] = -1;
 	}
+	ipipe_init_vector_irq(cpu);
 	raw_spin_unlock(&vector_lock);
 }
 
@@ -2151,6 +2163,7 @@ static unsigned int startup_ioapic_irq(struct irq_data *data)
 			was_pending = 1;
 	}
 	__unmask_ioapic(data->chip_data);
+ 	ipipe_irq_unlock(irq);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
@@ -2324,7 +2337,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 	irq_enter();
 
 	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+	for (vector = FIRST_EXTERNAL_VECTOR + 1; vector < NR_VECTORS;
+	     vector++) {
 		unsigned int irq;
 		unsigned int irr;
 		struct irq_desc *desc;
@@ -2404,35 +2418,19 @@ static inline void irq_complete_move(struct irq_cfg *cfg) { }
 
 static void ack_apic_edge(struct irq_data *data)
 {
+#ifndef CONFIG_IPIPE
 	irq_complete_move(data->chip_data);
 	move_native_irq(data->irq);
-	ack_APIC_irq();
+#endif /* CONFIG_IPIPE */
+	__ack_APIC_irq();
 }
 
 atomic_t irq_mis_count;
 
-/*
- * IO-APIC versions below 0x20 don't support EOI register.
- * For the record, here is the information about various versions:
- *     0Xh     82489DX
- *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
- *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
- *     30h-FFh Reserved
- *
- * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
- * version as 0x2. This is an error with documentation and these ICH chips
- * use io-apic's of version 0x20.
- *
- * For IO-APIC's with EOI register, we use that to do an explicit EOI.
- * Otherwise, we simulate the EOI message manually by changing the trigger
- * mode to edge and then back to level, with RTE being masked during this.
-*/
-static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+static void __eoi_ioapic_irq(struct irq_cfg *cfg)
 {
 	struct irq_pin_list *entry;
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
 	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		if (mp_ioapics[entry->apic].apicver >= 0x20) {
 			/*
@@ -2450,21 +2448,82 @@ static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
 			__unmask_and_level_IO_APIC_irq(entry);
 		}
 	}
+}
+
+#if !defined(CONFIG_IPIPE) || defined(CONFIG_SMP)
+
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ *     0Xh     82489DX
+ *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
+ *     30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+*/
+
+static inline void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	__eoi_ioapic_irq(cfg);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
+#endif /* !IPIPE || SMP */
+
+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
+
+static void move_xxapic_irq(struct irq_data *data)
+{
+	unsigned int irq = data->irq;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = data->chip_data;
+
+	if (desc->handle_irq == &handle_edge_irq) {
+		raw_spin_lock(&desc->lock);
+		irq_complete_move(cfg);
+		move_native_irq(irq);
+		raw_spin_unlock(&desc->lock);
+	} else if (desc->handle_irq == &handle_fasteoi_irq) {
+		raw_spin_lock(&desc->lock);
+		irq_complete_move(cfg);
+		if (irq_remapped(cfg))
+			eoi_ioapic_irq(irq, cfg);
+		if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
+			if (!io_apic_level_ack_pending(cfg))
+				move_masked_irq(irq);
+			unmask_ioapic(irq, cfg);
+		}
+		raw_spin_unlock(&desc->lock);
+	} else
+		WARN_ON_ONCE(1);
+}
+
+#endif /* CONFIG_IPIPE && CONFIG_SMP */
+
 static void ack_apic_level(struct irq_data *data)
 {
 	struct irq_cfg *cfg = data->chip_data;
-	int i, do_unmask_irq = 0, irq = data->irq;
 	unsigned long v;
+	int i;
+#ifndef CONFIG_IPIPE
+	int do_unmask_irq = 0, irq = data->irq;
 
 	irq_complete_move(cfg);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
 	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;
-		mask_ioapic(cfg);
+		mask_ioapic(irq, cfg);
 	}
 #endif
 
@@ -2552,19 +2611,38 @@ static void ack_apic_level(struct irq_data *data)
 		 */
 		if (!io_apic_level_ack_pending(cfg))
 			move_masked_irq(irq);
-		unmask_ioapic(cfg);
+		unmask_ioapic(irq, cfg);
 	}
+#else /* CONFIG_IPIPE */
+	/*
+	 * Prevent low priority IRQs grabbed by high priority domains
+	 * from being delayed, waiting for a high priority interrupt
+	 * handler running in a low priority domain to complete.
+	 * This code assumes hw interrupts off.
+	 */
+	i = cfg->vector;
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+	raw_spin_lock(&ioapic_lock);
+	if (unlikely(!(v & (1 << (i & 0x1f))))) {
+		/* IO-APIC erratum: see comment above. */
+		atomic_inc(&irq_mis_count);
+		__eoi_ioapic_irq(cfg);
+	}
+	__mask_ioapic(cfg);
+	raw_spin_unlock(&ioapic_lock);
+	__ack_APIC_irq();
+#endif /* CONFIG_IPIPE */
 }
 
 #ifdef CONFIG_INTR_REMAP
 static void ir_ack_apic_edge(struct irq_data *data)
 {
-	ack_APIC_irq();
+	__ack_APIC_irq();
 }
 
 static void ir_ack_apic_level(struct irq_data *data)
 {
-	ack_APIC_irq();
+	__ack_APIC_irq();
 	eoi_ioapic_irq(data->irq, data->chip_data);
 }
 #endif /* CONFIG_INTR_REMAP */
@@ -2578,6 +2656,9 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.irq_eoi		= ack_apic_level,
 #ifdef CONFIG_SMP
 	.irq_set_affinity	= ioapic_set_affinity,
+#ifdef CONFIG_IPIPE
+	.irq_move		= move_xxapic_irq,
+#endif
 #endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
@@ -2592,6 +2673,9 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
 	.irq_eoi		= ir_ack_apic_level,
 #ifdef CONFIG_SMP
 	.irq_set_affinity	= ir_ioapic_set_affinity,
+#ifdef CONFIG_IPIPE
+	.irq_move		= move_xxapic_irq,
+#endif
 #endif
 #endif
 	.irq_retrigger		= ioapic_retrigger_irq,
@@ -2636,23 +2720,29 @@ static inline void init_IO_APIC_traps(void)
 
 static void mask_lapic_irq(struct irq_data *data)
 {
-	unsigned long v;
+	unsigned long v, flags;
 
+ 	local_irq_save_hw_cond(flags);
+ 	ipipe_irq_lock(data->irq);
 	v = apic_read(APIC_LVT0);
 	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+  	local_irq_restore_hw_cond(flags);
 }
 
 static void unmask_lapic_irq(struct irq_data *data)
 {
-	unsigned long v;
+	unsigned long v, flags;
 
+  	local_irq_save_hw_cond(flags);
 	v = apic_read(APIC_LVT0);
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+  	ipipe_irq_unlock(data->irq);
+  	local_irq_restore_hw_cond(flags);
 }
 
 static void ack_lapic_irq(struct irq_data *data)
 {
-	ack_APIC_irq();
+	__ack_APIC_irq();
 }
 
 static struct irq_chip lapic_chip __read_mostly = {
@@ -2660,6 +2750,9 @@ static struct irq_chip lapic_chip __read_mostly = {
 	.irq_mask	= mask_lapic_irq,
 	.irq_unmask	= unmask_lapic_irq,
 	.irq_ack	= ack_lapic_irq,
+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
+	.irq_move	= move_xxapic_irq,
+#endif
 };
 
 static void lapic_register_intr(int irq)
@@ -2818,7 +2911,7 @@ static inline void __init check_timer(void)
 			int idx;
 			idx = find_irq_entry(apic1, pin1, mp_INT);
 			if (idx != -1 && irq_trigger(idx))
-				unmask_ioapic(cfg);
+				unmask_ioapic(0, cfg);
 		}
 		if (timer_irq_works()) {
 			if (disable_timer_pin_1 > 0)
@@ -2861,6 +2954,10 @@ static inline void __init check_timer(void)
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
 
 	lapic_register_intr(0);
+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_64)
+	irq_to_desc(0)->ipipe_ack = __ipipe_ack_edge_irq;
+	irq_to_desc(0)->ipipe_end = __ipipe_end_edge_irq;
+#endif
 	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
 	legacy_pic->unmask(0);
 
@@ -3245,6 +3342,9 @@ static struct irq_chip msi_chip = {
 	.irq_ack		= ack_apic_edge,
 #ifdef CONFIG_SMP
 	.irq_set_affinity	= msi_set_affinity,
+#ifdef CONFIG_IPIPE
+	.irq_move		= move_xxapic_irq,
+#endif
 #endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
@@ -3257,6 +3357,9 @@ static struct irq_chip msi_ir_chip = {
 	.irq_ack		= ir_ack_apic_edge,
 #ifdef CONFIG_SMP
 	.irq_set_affinity	= ir_msi_set_affinity,
+#ifdef CONFIG_IPIPE
+	.irq_move		= move_xxapic_irq,
+#endif
 #endif
 #endif
 	.irq_retrigger		= ioapic_retrigger_irq,
@@ -3558,6 +3661,9 @@ static struct irq_chip ht_irq_chip = {
 	.irq_ack		= ack_apic_edge,
 #ifdef CONFIG_SMP
 	.irq_set_affinity	= ht_set_affinity,
+#ifdef CONFIG_IPIPE
+	.irq_move		= move_xxapic_irq,
+#endif
 #endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
@@ -3859,6 +3965,18 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
 	return 0;
 }
 
+#ifdef CONFIG_IPIPE
+unsigned __ipipe_get_ioapic_irq_vector(int irq)
+{
+	if (irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS)
+		return ipipe_apic_irq_vector(irq);
+	else if (irq == IRQ_MOVE_CLEANUP_VECTOR)
+		return irq;
+	else
+		return irq_cfg(irq)->vector;
+}
+#endif /* CONFIG_IPIPE */
+
 /*
  * This function currently is only a helper for the i386 smp boot process where
  * we need to reprogram the ioredtbls to cater for the cpus which have come online
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 08385e0..f5ad117 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -29,12 +29,12 @@ void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
 	 * to an arbitrary mask, so I do a unicast to each CPU instead.
 	 * - mbligh
 	 */
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for_each_cpu(query_cpu, mask) {
 		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
 				query_cpu), vector, APIC_DEST_PHYSICAL);
 	}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
@@ -46,14 +46,14 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
 
 	/* See Hack comment above */
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for_each_cpu(query_cpu, mask) {
 		if (query_cpu == this_cpu)
 			continue;
 		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
 				 query_cpu), vector, APIC_DEST_PHYSICAL);
 	}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
@@ -68,12 +68,12 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
 	 * should be modified to do 1 message per cluster ID - mbligh
 	 */
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for_each_cpu(query_cpu, mask)
 		__default_send_IPI_dest_field(
 			apic->cpu_to_logical_apicid(query_cpu), vector,
 			apic->dest_logical);
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
@@ -85,7 +85,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
 
 	/* See Hack comment above */
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for_each_cpu(query_cpu, mask) {
 		if (query_cpu == this_cpu)
 			continue;
@@ -93,7 +93,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
 			apic->cpu_to_logical_apicid(query_cpu), vector,
 			apic->dest_logical);
 		}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 #ifdef CONFIG_X86_32
@@ -109,10 +109,10 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
 	if (WARN_ONCE(!mask, "empty IPI mask"))
 		return;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
 	__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 void default_send_IPI_allbutself(int vector)
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cf69c59..23baa6d 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -61,13 +61,13 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
 
 	x2apic_wrmsr_fence();
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for_each_cpu(query_cpu, mask) {
 		__x2apic_send_IPI_dest(
 			per_cpu(x86_cpu_to_logical_apicid, query_cpu),
 			vector, apic->dest_logical);
 	}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 static void
@@ -79,7 +79,7 @@ static void
 
 	x2apic_wrmsr_fence();
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for_each_cpu(query_cpu, mask) {
 		if (query_cpu == this_cpu)
 			continue;
@@ -87,7 +87,7 @@ static void
 				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
 				vector, apic->dest_logical);
 	}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 static void x2apic_send_IPI_allbutself(int vector)
@@ -98,7 +98,7 @@ static void x2apic_send_IPI_allbutself(int vector)
 
 	x2apic_wrmsr_fence();
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for_each_online_cpu(query_cpu) {
 		if (query_cpu == this_cpu)
 			continue;
@@ -106,7 +106,7 @@ static void x2apic_send_IPI_allbutself(int vector)
 				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
 				vector, apic->dest_logical);
 	}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 static void x2apic_send_IPI_all(int vector)
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8972f38..19bccee 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -62,12 +62,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
 
 	x2apic_wrmsr_fence();
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for_each_cpu(query_cpu, mask) {
 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
 				       vector, APIC_DEST_PHYSICAL);
 	}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 static void
@@ -79,14 +79,14 @@ static void
 
 	x2apic_wrmsr_fence();
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for_each_cpu(query_cpu, mask) {
 		if (query_cpu != this_cpu)
 			__x2apic_send_IPI_dest(
 				per_cpu(x86_cpu_to_apicid, query_cpu),
 				vector, APIC_DEST_PHYSICAL);
 	}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 static void x2apic_send_IPI_allbutself(int vector)
@@ -97,14 +97,14 @@ static void x2apic_send_IPI_allbutself(int vector)
 
 	x2apic_wrmsr_fence();
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for_each_online_cpu(query_cpu) {
 		if (query_cpu == this_cpu)
 			continue;
 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
 				       vector, APIC_DEST_PHYSICAL);
 	}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 static void x2apic_send_IPI_all(int vector)
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 68a3343..f7e0927 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -18,7 +18,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base,
 
 	arr = CX86_ARR_BASE + (reg << 1) + reg;	/* avoid multiplication by 3 */
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 
 	ccr3 = getCx86(CX86_CCR3);
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
@@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base,
 	rcr = getCx86(CX86_RCR_BASE + reg);
 	setCx86(CX86_CCR3, ccr3);			/* disable MAPEN */
 
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 
 	shift = ((unsigned char *) base)[1] & 0x0f;
 	*base >>= PAGE_SHIFT;
@@ -178,6 +178,7 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
 			  unsigned long size, mtrr_type type)
 {
 	unsigned char arr, arr_type, arr_size;
+	unsigned long flags;
 
 	arr = CX86_ARR_BASE + (reg << 1) + reg;	/* avoid multiplication by 3 */
 
@@ -221,6 +222,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
 		}
 	}
 
+	local_irq_save_hw(flags);
+
 	prepare_set();
 
 	base <<= PAGE_SHIFT;
@@ -230,6 +233,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
 	setCx86(CX86_RCR_BASE + reg, arr_type);
 
 	post_set();
+
+	local_irq_restore_hw(flags);
 }
 
 typedef struct {
@@ -247,8 +252,10 @@ static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 };
 
 static void cyrix_set_all(void)
 {
+	unsigned long flags;
 	int i;
 
+	local_irq_save_hw(flags);
 	prepare_set();
 
 	/* the CCRs are not contiguous */
@@ -263,6 +270,7 @@ static void cyrix_set_all(void)
 	}
 
 	post_set();
+	local_irq_restore_hw(flags);
 }
 
 static const struct mtrr_ops cyrix_mtrr_ops = {
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 9f27228..caef22b 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -717,7 +717,7 @@ static void generic_set_all(void)
 	unsigned long mask, count;
 	unsigned long flags;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	prepare_set();
 
 	/* Actually set the state */
@@ -727,7 +727,7 @@ static void generic_set_all(void)
 	pat_init();
 
 	post_set();
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 
 	/* Use the atomic bitops to update the global mask */
 	for (count = 0; count < sizeof mask * 8; ++count) {
@@ -751,12 +751,12 @@ static void generic_set_all(void)
 static void generic_set_mtrr(unsigned int reg, unsigned long base,
 			     unsigned long size, mtrr_type type)
 {
-	unsigned long flags;
+	unsigned long flags, _flags;
 	struct mtrr_var_range *vr;
 
 	vr = &mtrr_state.var_ranges[reg];
 
-	local_irq_save(flags);
+	local_irq_save_full(flags, _flags);
 	prepare_set();
 
 	if (size == 0) {
@@ -777,7 +777,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
 	}
 
 	post_set();
-	local_irq_restore(flags);
+	local_irq_restore_full(flags, _flags);
 }
 
 int generic_validate_add_page(unsigned long base, unsigned long size,
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index df20723..89c7380 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -344,6 +344,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
 	local_irq_enable();
 	do_exit(SIGBUS);
 }
+EXPORT_SYMBOL_GPL(die_nmi);
 
 static int __init oops_setup(char *s)
 {
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 74cc1ed..479f528 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -91,6 +91,9 @@ void show_registers(struct pt_regs *regs)
 	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
 		TASK_COMM_LEN, current->comm, task_pid_nr(current),
 		current_thread_info(), current, task_thread_info(current));
+#ifdef CONFIG_IPIPE
+	printk(KERN_EMERG "I-pipe domain %s\n", ipipe_current_domain->name);
+#endif /* CONFIG_IPIPE */
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index a6b6fcf..7862049 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -283,6 +283,11 @@ void show_registers(struct pt_regs *regs)
 	printk("CPU %d ", cpu);
 	print_modules();
 	__show_regs(regs, 1);
+#ifdef CONFIG_IPIPE
+	if (ipipe_current_domain != ipipe_root_domain)
+		printk("I-pipe domain %s\n", ipipe_current_domain->name);
+	else
+#endif /* CONFIG_IPIPE */
 	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
 		cur->comm, cur->pid, task_thread_info(cur), cur);
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9ca3b0e..bc97baf 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -44,6 +44,7 @@
 #include <linux/linkage.h>
 #include <asm/thread_info.h>
 #include <asm/irqflags.h>
+#include <asm/ipipe_base.h>
 #include <asm/errno.h>
 #include <asm/segment.h>
 #include <asm/smp.h>
@@ -80,6 +81,58 @@
 
 #define nr_syscalls ((syscall_table_size)/4)
 
+#ifdef CONFIG_IPIPE
+#define CATCH_ROOT_SYSCALL(bypass_check,bypass_nocheck)	\
+				movl  %esp,%eax ; \
+				call __ipipe_syscall_root ; \
+				testl  %eax,%eax ; \
+				movl PT_EAX(%esp),%eax ; \
+				js    bypass_check ; \
+				jne   bypass_nocheck ; \
+				movl PT_ORIG_EAX(%esp),%eax
+#define PUSH_XCODE(v)		pushl $ ex_ ## v
+#define PUSH_XCODE_CFI(v)	pushl $ ex_ ## v ; CFI_ADJUST_CFA_OFFSET 4
+#define PUSH_XVEC(v)		pushl $ ex_ ## v
+#define PUSH_XVEC_CFI(v)	pushl $ ex_ ## v ; CFI_ADJUST_CFA_OFFSET 4
+#define HANDLE_EXCEPTION(code)	movl %code,%ecx ; \
+				call __ipipe_handle_exception ; \
+				testl %eax,%eax	; \
+				jnz restore_nocheck
+#define DIVERT_EXCEPTION(code)	movl $(__USER_DS), %ecx	; \
+				movl %ecx, %ds ; \
+				movl %ecx, %es ; \
+				movl %esp, %eax	; \
+				movl $ex_ ## code,%edx ; \
+				call __ipipe_divert_exception ; \
+				testl %eax,%eax	; \
+				jnz restore_nocheck
+#define PREEMPT_SCHEDULE_IRQ	call __ipipe_preempt_schedule_irq
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+# define IPIPE_TRACE_IRQ_ENTER \
+	lea PT_EIP-4(%esp), %ebp; \
+	movl PT_ORIG_EAX(%esp), %eax; \
+	call ipipe_trace_begin
+# define IPIPE_TRACE_IRQ_EXIT \
+	pushl %eax; \
+	movl PT_ORIG_EAX+4(%esp), %eax; \
+	call ipipe_trace_end; \
+	popl %eax
+#else  /* !CONFIG_IPIPE_TRACE_IRQSOFF */
+#define IPIPE_TRACE_IRQ_ENTER
+#define IPIPE_TRACE_IRQ_EXIT
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+#else /* !CONFIG_IPIPE */
+#define CATCH_ROOT_SYSCALL(bypass_check,bypass_nocheck)
+#define PUSH_XCODE(v)			pushl $v
+#define PUSH_XCODE_CFI(v)		pushl $v ; CFI_ADJUST_CFA_OFFSET 4
+#define PUSH_XVEC(v)			pushl v
+#define PUSH_XVEC_CFI(v)		pushl v ; CFI_ADJUST_CFA_OFFSET 4
+#define HANDLE_EXCEPTION(code)		call *%code
+#define DIVERT_EXCEPTION(code)
+#define PREEMPT_SCHEDULE_IRQ		call preempt_schedule_irq
+#endif /* CONFIG_IPIPE */
+
 #ifdef CONFIG_PREEMPT
 #define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
@@ -296,6 +349,7 @@
 .endm
 
 ENTRY(ret_from_fork)
+	ENABLE_INTERRUPTS_HW_COND
 	CFI_STARTPROC
 	pushl_cfi %eax
 	call schedule_tail
@@ -323,7 +377,7 @@ END(ret_from_fork)
 	RING0_PTREGS_FRAME
 ret_from_exception:
 	preempt_stop(CLBR_ANY)
-ret_from_intr:
+ENTRY(ret_from_intr)
 	GET_THREAD_INFO(%ebp)
 check_userspace:
 	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
@@ -356,7 +410,7 @@ need_resched:
 	jz restore_all
 	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
 	jz restore_all
-	call preempt_schedule_irq
+	PREEMPT_SCHEDULE_IRQ
 	jmp need_resched
 END(resume_kernel)
 #endif
@@ -416,6 +470,7 @@ sysenter_past_esp:
 .previous
 
 	GET_THREAD_INFO(%ebp)
+	CATCH_ROOT_SYSCALL(sysenter_tail,sysenter_exit)
 
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 	jnz sysenter_audit
@@ -424,6 +479,7 @@ sysenter_do_call:
 	jae syscall_badsys
 	call *sys_call_table(,%eax,4)
 	movl %eax,PT_EAX(%esp)
+sysenter_tail:
 	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
@@ -498,6 +554,7 @@ ENTRY(system_call)
 	pushl_cfi %eax			# save orig_eax
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
+ 	CATCH_ROOT_SYSCALL(syscall_exit,restore_nocheck)
 					# system call tracing in operation / emulation
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 	jnz syscall_trace_entry
@@ -536,7 +593,7 @@ irq_return:
 .section .fixup,"ax"
 ENTRY(iret_exc)
 	pushl $0			# no error code
-	pushl $do_iret_error
+  	PUSH_XCODE(do_iret_error)
 	jmp error_code
 .previous
 .section __ex_table,"a"
@@ -602,6 +659,7 @@ work_pending:
 	testb $_TIF_NEED_RESCHED, %cl
 	jz work_notifysig
 work_resched:
+	ENABLE_INTERRUPTS_HW_COND
 	call schedule
 	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
@@ -617,6 +675,7 @@ work_resched:
 
 work_notifysig:				# deal with pending signals and
 					# notify-resume requests
+	ENABLE_INTERRUPTS_HW_COND
 #ifdef CONFIG_VM86
 	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
 	movl %esp, %eax
@@ -819,6 +878,49 @@ END(irq_entries_start)
 END(interrupt)
 .previous
 
+#ifdef CONFIG_IPIPE
+	.p2align CONFIG_X86_L1_CACHE_SHIFT
+common_interrupt:
+	addl $-0x80,(%esp)	/* Adjust vector into the [-256,-1] range */
+	SAVE_ALL
+	IPIPE_TRACE_IRQ_ENTER
+	movl %esp, %eax
+	call __ipipe_handle_irq
+	IPIPE_TRACE_IRQ_EXIT
+	testl %eax,%eax
+	jnz  ret_from_intr
+	jmp restore_nocheck
+	CFI_ENDPROC
+
+	.pushsection .kprobes.text, "ax"
+#define BUILD_INTERRUPT3(name, nr, fn)	\
+ENTRY(name)				\
+	RING0_INT_FRAME;		\
+	pushl $~(nr);			\
+	CFI_ADJUST_CFA_OFFSET 4;	\
+	SAVE_ALL;			\
+	IPIPE_TRACE_IRQ_ENTER;		\
+ 	movl %esp, %eax;		\
+	call __ipipe_handle_irq;	\
+	IPIPE_TRACE_IRQ_EXIT;		\
+	testl %eax,%eax;		\
+	jnz  ret_from_intr;		\
+	jmp restore_nocheck;	\
+	CFI_ENDPROC
+
+#define BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(name, nr, smp_##name)
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	BUILD_INTERRUPT(ipipe_ipi0,IPIPE_SERVICE_VECTOR0)
+	BUILD_INTERRUPT(ipipe_ipi1,IPIPE_SERVICE_VECTOR1)
+	BUILD_INTERRUPT(ipipe_ipi2,IPIPE_SERVICE_VECTOR2)
+	BUILD_INTERRUPT(ipipe_ipi3,IPIPE_SERVICE_VECTOR3)
+#ifdef CONFIG_SMP
+	BUILD_INTERRUPT(ipipe_ipiX,IPIPE_CRITICAL_VECTOR)
+#endif
+#endif
+
+#else /* !CONFIG_IPIPE */
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
@@ -852,13 +954,15 @@ ENDPROC(name)
 
 #define BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(name, nr, smp_##name)
 
+#endif /* !CONFIG_IPIPE */
+
 /* The include is where all of the SMP etc. interrupts come from */
 #include <asm/entry_arch.h>
 
 ENTRY(coprocessor_error)
 	RING0_INT_FRAME
 	pushl_cfi $0
-	pushl_cfi $do_coprocessor_error
+ 	PUSH_XCODE_CFI(do_coprocessor_error)
 	jmp error_code
 	CFI_ENDPROC
 END(coprocessor_error)
@@ -868,7 +972,7 @@ ENTRY(simd_coprocessor_error)
 	pushl_cfi $0
 #ifdef CONFIG_X86_INVD_BUG
 	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
-661:	pushl_cfi $do_general_protection
+661:	PUSH_XCODE_CFI(do_general_protection)
 662:
 .section .altinstructions,"a"
 	.balign 4
@@ -879,11 +983,11 @@ ENTRY(simd_coprocessor_error)
 	.byte 664f-663f
 .previous
 .section .altinstr_replacement,"ax"
-663:	pushl $do_simd_coprocessor_error
+663:	PUSH_XCODE(do_simd_coprocessor_error)
 664:
 .previous
 #else
-	pushl_cfi $do_simd_coprocessor_error
+	PUSH_XCODE_CFI(do_simd_coprocessor_error)
 #endif
 	jmp error_code
 	CFI_ENDPROC
@@ -892,7 +996,7 @@ END(simd_coprocessor_error)
 ENTRY(device_not_available)
 	RING0_INT_FRAME
 	pushl_cfi $-1			# mark this as an int
-	pushl_cfi $do_device_not_available
+	PUSH_XCODE_CFI(do_device_not_available)
 	jmp error_code
 	CFI_ENDPROC
 END(device_not_available)
@@ -915,7 +1019,7 @@ END(native_irq_enable_sysexit)
 ENTRY(overflow)
 	RING0_INT_FRAME
 	pushl_cfi $0
-	pushl_cfi $do_overflow
+	PUSH_XCODE_CFI(do_overflow)
 	jmp error_code
 	CFI_ENDPROC
 END(overflow)
@@ -923,7 +1027,7 @@ END(overflow)
 ENTRY(bounds)
 	RING0_INT_FRAME
 	pushl_cfi $0
-	pushl_cfi $do_bounds
+	PUSH_XCODE_CFI(do_bounds)
 	jmp error_code
 	CFI_ENDPROC
 END(bounds)
@@ -931,7 +1035,7 @@ END(bounds)
 ENTRY(invalid_op)
 	RING0_INT_FRAME
 	pushl_cfi $0
-	pushl_cfi $do_invalid_op
+	PUSH_XCODE_CFI(do_invalid_op)
 	jmp error_code
 	CFI_ENDPROC
 END(invalid_op)
@@ -939,35 +1043,35 @@ END(invalid_op)
 ENTRY(coprocessor_segment_overrun)
 	RING0_INT_FRAME
 	pushl_cfi $0
-	pushl_cfi $do_coprocessor_segment_overrun
+	PUSH_XCODE_CFI(do_coprocessor_segment_overrun)
 	jmp error_code
 	CFI_ENDPROC
 END(coprocessor_segment_overrun)
 
 ENTRY(invalid_TSS)
 	RING0_EC_FRAME
-	pushl_cfi $do_invalid_TSS
+	PUSH_XCODE_CFI(do_invalid_TSS)
 	jmp error_code
 	CFI_ENDPROC
 END(invalid_TSS)
 
 ENTRY(segment_not_present)
 	RING0_EC_FRAME
-	pushl_cfi $do_segment_not_present
+	PUSH_XCODE_CFI(do_segment_not_present)
 	jmp error_code
 	CFI_ENDPROC
 END(segment_not_present)
 
 ENTRY(stack_segment)
 	RING0_EC_FRAME
-	pushl_cfi $do_stack_segment
+	PUSH_XCODE_CFI(do_stack_segment)
 	jmp error_code
 	CFI_ENDPROC
 END(stack_segment)
 
 ENTRY(alignment_check)
 	RING0_EC_FRAME
-	pushl_cfi $do_alignment_check
+	PUSH_XCODE_CFI(do_alignment_check)
 	jmp error_code
 	CFI_ENDPROC
 END(alignment_check)
@@ -975,7 +1079,7 @@ END(alignment_check)
 ENTRY(divide_error)
 	RING0_INT_FRAME
 	pushl_cfi $0			# no error code
-	pushl_cfi $do_divide_error
+	PUSH_XCODE_CFI(do_divide_error)
 	jmp error_code
 	CFI_ENDPROC
 END(divide_error)
@@ -984,7 +1088,7 @@ END(divide_error)
 ENTRY(machine_check)
 	RING0_INT_FRAME
 	pushl_cfi $0
-	pushl_cfi machine_check_vector
+	PUSH_XVEC_CFI(machine_check_vector)
 	jmp error_code
 	CFI_ENDPROC
 END(machine_check)
@@ -993,7 +1097,7 @@ END(machine_check)
 ENTRY(spurious_interrupt_bug)
 	RING0_INT_FRAME
 	pushl_cfi $0
-	pushl_cfi $do_spurious_interrupt_bug
+	PUSH_XCODE_CFI(do_spurious_interrupt_bug)
 	jmp error_code
 	CFI_ENDPROC
 END(spurious_interrupt_bug)
@@ -1223,7 +1327,7 @@ syscall_table_size=(.-sys_call_table)
 
 ENTRY(page_fault)
 	RING0_EC_FRAME
-	pushl_cfi $do_page_fault
+	PUSH_XCODE_CFI(do_page_fault)
 	ALIGN
 error_code:
 	/* the function address is in %gs's slot on the stack */
@@ -1260,9 +1364,11 @@ error_code:
 	movl $(__USER_DS), %ecx
 	movl %ecx, %ds
 	movl %ecx, %es
+#ifndef CONFIG_IPIPE
 	TRACE_IRQS_OFF
+#endif
 	movl %esp,%eax			# pt_regs pointer
-	call *%edi
+	HANDLE_EXCEPTION(edi)
 	jmp ret_from_exception
 	CFI_ENDPROC
 END(page_fault)
@@ -1302,6 +1408,7 @@ debug_stack_correct:
 	pushl_cfi $-1			# mark this as an int
 	SAVE_ALL
 	TRACE_IRQS_OFF
+ 	DIVERT_EXCEPTION(do_debug)
 	xorl %edx,%edx			# error code 0
 	movl %esp,%eax			# pt_regs pointer
 	call do_debug
@@ -1392,6 +1499,7 @@ ENTRY(int3)
 	pushl_cfi $-1			# mark this as an int
 	SAVE_ALL
 	TRACE_IRQS_OFF
+ 	DIVERT_EXCEPTION(do_int3)
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
 	call do_int3
@@ -1401,7 +1509,7 @@ END(int3)
 
 ENTRY(general_protection)
 	RING0_EC_FRAME
-	pushl_cfi $do_general_protection
+	PUSH_XCODE_CFI(do_general_protection)
 	jmp error_code
 	CFI_ENDPROC
 END(general_protection)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bbd5c80..4e177f3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -48,6 +48,7 @@
 #include <asm/unistd.h>
 #include <asm/thread_info.h>
 #include <asm/hw_irq.h>
+#include <asm/ipipe_base.h>
 #include <asm/page_types.h>
 #include <asm/irqflags.h>
 #include <asm/paravirt.h>
@@ -61,6 +62,13 @@
 #define __AUDIT_ARCH_LE	   0x40000000
 
 	.code64
+
+#ifdef CONFIG_IPIPE
+#define PREEMPT_SCHEDULE_IRQ		call __ipipe_preempt_schedule_irq
+#else /* !CONFIG_IPIPE */
+#define PREEMPT_SCHEDULE_IRQ		call preempt_schedule_irq
+#endif /* !CONFIG_IPIPE */
+
 #ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
@@ -326,16 +334,21 @@ ENTRY(save_args)
 	 * moving irq_enter into assembly, which would be too much work)
 	 */
 1:	incl PER_CPU_VAR(irq_count)
+#ifndef CONFIG_IPIPE
 	jne 2f
 	popq_cfi %rax			/* move return address... */
 	mov PER_CPU_VAR(irq_stack_ptr),%rsp
 	EMPTY_FRAME 0
 	pushq_cfi %rbp			/* backlink for unwinder */
 	pushq_cfi %rax			/* ... to the new stack */
+#endif
 	/*
 	 * We entered an interrupt context - irqs are off:
 	 */
-2:	TRACE_IRQS_OFF
+2:
+#ifndef CONFIG_IPIPE
+	TRACE_IRQS_OFF
+#endif
 	ret
 	CFI_ENDPROC
 END(save_args)
@@ -400,6 +413,7 @@ ENTRY(ret_from_fork)
 
 	pushq_cfi kernel_eflags(%rip)
 	popfq_cfi				# reset kernel eflags
+  	ENABLE_INTERRUPTS_HW_COND
 
 	call schedule_tail			# rdi: 'prev' task parameter
 
@@ -475,6 +489,17 @@ ENTRY(system_call_after_swapgs)
 	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
 	movq  %rcx,RIP-ARGOFFSET(%rsp)
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
+#ifdef CONFIG_IPIPE
+	pushq %rdi
+	pushq %rax
+	leaq -(ARGOFFSET-16)(%rsp),%rdi	# regs for handler
+	call	__ipipe_syscall_root_thunk
+	testl %eax, %eax
+	popq %rax
+	popq %rdi
+	js    ret_from_sys_call
+	jnz   sysret_fastexit
+#endif
 	GET_THREAD_INFO(%rcx)
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
 	jnz tracesys
@@ -504,6 +529,7 @@ sysret_check:
 	 * sysretq will re-enable interrupts:
 	 */
 	TRACE_IRQS_ON
+sysret_fastexit:
 	movq RIP-ARGOFFSET(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
 	RESTORE_ARGS 0,-ARG_SKIP,1
@@ -515,6 +541,8 @@ sysret_check:
 	/* Handle reschedules */
 	/* edx:	work, edi: workmask */
 sysret_careful:
+	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),%edx
+	jnz ret_from_sys_call_trace
 	bt $TIF_NEED_RESCHED,%edx
 	jnc sysret_signal
 	TRACE_IRQS_ON
@@ -524,6 +552,16 @@ sysret_careful:
 	popq_cfi %rdi
 	jmp sysret_check
 
+ret_from_sys_call_trace:
+	TRACE_IRQS_ON
+	sti
+	SAVE_REST
+	FIXUP_TOP_OF_STACK %rdi
+	movq %rsp,%rdi
+	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
+	RESTORE_REST
+	jmp int_ret_from_sys_call
+
 	/* Handle a signal */
 sysret_signal:
 	TRACE_IRQS_ON
@@ -791,7 +829,29 @@ END(interrupt)
 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
 	call save_args
 	PARTIAL_FRAME 0
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+	pushq %rbp
+	leaq RIP-8(%rdi), %rbp	# make interrupted address show up in trace
+	pushq %rdi
+	movq ORIG_RAX(%rdi), %rdi	# IRQ number
+	notq %rdi			# ...is inverted, fix up
+	call ipipe_trace_begin
+	popq %rdi
+	popq %rbp
+
+	call \func
+
+	pushq %rbp
+	pushq %rax
+	movq 8-ARGOFFSET+ORIG_RAX(%rbp), %rdi
+	leaq 8-ARGOFFSET+RIP-8(%rbp), %rbp
+	notq %rdi
+	call ipipe_trace_end
+	popq %rax
+	popq %rbp
+#else
 	call \func
+#endif
 	.endm
 
 /*
@@ -804,9 +864,27 @@ END(interrupt)
 	 */
 	.p2align CONFIG_X86_L1_CACHE_SHIFT
 common_interrupt:
+#ifdef CONFIG_IPIPE
+	XCPT_FRAME
+	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
+	interrupt __ipipe_handle_irq
+	testl %eax, %eax
+	jnz ret_from_intr
+	decl PER_CPU_VAR(irq_count)
+	leaveq
+	CFI_RESTORE		rbp
+	CFI_DEF_CFA_REGISTER	rsp
+	CFI_ADJUST_CFA_OFFSET	-8
+	addq $8, %rsp
+	CFI_ADJUST_CFA_OFFSET	-8
+	testl $3,CS-ARGOFFSET(%rsp)
+	jz restore_args
+	jmp retint_swapgs_notrace
+#else /* !CONFIG_IPIPE */
 	XCPT_FRAME
 	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 	interrupt do_IRQ
+#endif /* !CONFIG_IPIPE */
 	/* 0(%rsp): old_rsp-ARGOFFSET */
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -821,7 +899,7 @@ ret_from_intr:
 	/* we did not save rbx, restore only from ARGOFFSET */
 	addq $8, %rsp
 	CFI_ADJUST_CFA_OFFSET	-8
-exit_intr:
+ENTRY(exit_intr)
 	GET_THREAD_INFO(%rcx)
 	testl $3,CS-ARGOFFSET(%rsp)
 	je retint_kernel
@@ -841,20 +919,20 @@ retint_check:
 	jnz  retint_careful
 
 retint_swapgs:		/* return to user-space */
+	TRACE_IRQS_IRETQ
 	/*
 	 * The iretq could re-enable interrupts:
 	 */
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_IRETQ
+retint_swapgs_notrace:
 	SWAPGS
+retint_noswapgs:
 	jmp restore_args
 
 retint_restore_args:	/* return to kernel space */
-	DISABLE_INTERRUPTS(CLBR_ANY)
+	TRACE_IRQS_IRETQ
 	/*
 	 * The iretq could re-enable interrupts:
 	 */
-	TRACE_IRQS_IRETQ
 restore_args:
 	RESTORE_ARGS 0,8,0
 
@@ -934,7 +1012,15 @@ ENTRY(retint_kernel)
 	jnc  retint_restore_args
 	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
 	jnc  retint_restore_args
-	call preempt_schedule_irq
+#ifdef CONFIG_IPIPE
+	/*
+	 * We may have preempted call_softirq before __do_softirq raised or
+	 * after it lowered the preemption counter.
+	 */
+	cmpl $0,PER_CPU_VAR(irq_count)
+	jge  retint_restore_args
+#endif
+	PREEMPT_SCHEDULE_IRQ
 	jmp exit_intr
 #endif
 
@@ -952,11 +1038,29 @@ END(common_interrupt)
 ENTRY(\sym)
 	INTR_FRAME
 	pushq_cfi $~(\num)
+#ifdef CONFIG_IPIPE
+	interrupt __ipipe_handle_irq
+	testl %eax, %eax
+	jnz ret_from_intr
+	decl PER_CPU_VAR(irq_count)
+	leaveq
+	CFI_RESTORE		rbp
+	CFI_DEF_CFA_REGISTER	rsp
+	CFI_ADJUST_CFA_OFFSET	-8
+	addq $8, %rsp
+	CFI_ADJUST_CFA_OFFSET	-8
+	testl $3,CS-ARGOFFSET(%rsp)
+	jz restore_args
+	jmp retint_swapgs_notrace
+	CFI_ENDPROC
+	.endm
+#else /* !CONFIG_IPIPE */
 	interrupt \do_sym
 	jmp ret_from_intr
 	CFI_ENDPROC
 END(\sym)
 .endm
+#endif /* !CONFIG_IPIPE */
 
 #ifdef CONFIG_SMP
 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
@@ -975,7 +1079,11 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
 	x86_platform_ipi smp_x86_platform_ipi
 
 #ifdef CONFIG_SMP
+#ifdef CONFIG_IPIPE
+.irpc idx, "012"
+#else
 .irpc idx, "01234567"
+#endif
 apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
 	invalidate_interrupt\idx smp_invalidate_interrupt
 .endr
@@ -1013,7 +1121,7 @@ apicinterrupt IRQ_WORK_VECTOR \
 /*
  * Exception entry points.
  */
-.macro zeroentry sym do_sym
+.macro zeroentry sym do_sym ex_code
 ENTRY(\sym)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1024,13 +1132,28 @@ ENTRY(\sym)
 	DEFAULT_FRAME 0
 	movq %rsp,%rdi		/* pt_regs pointer */
 	xorl %esi,%esi		/* no error code */
+#ifdef CONFIG_IPIPE
+	movq $\ex_code,%rdx
+	call __ipipe_handle_exception   /* handle(regs, error_code, ex_code) */
+	TRACE_IRQS_OFF
+	testl %eax, %eax
+	jz error_exit
+	movl %ebx,%eax
+	RESTORE_REST
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	testl %eax,%eax
+	jne retint_noswapgs
+	jmp retint_swapgs_notrace
+#else /* !CONFIG_IPIPE */
+	TRACE_IRQS_OFF
 	call \do_sym
+#endif /* !CONFIG_IPIPE */
 	jmp error_exit		/* %ebx: no swapgs flag */
 	CFI_ENDPROC
 END(\sym)
 .endm
 
-.macro paranoidzeroentry sym do_sym
+.macro paranoidzeroentry sym do_sym ex_code=0
 ENTRY(\sym)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1038,17 +1161,32 @@ ENTRY(\sym)
 	subq $ORIG_RAX-R15, %rsp
 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
 	call save_paranoid
-	TRACE_IRQS_OFF
 	movq %rsp,%rdi		/* pt_regs pointer */
+#ifdef CONFIG_IPIPE
+	.if \ex_code
+	movq $\ex_code,%rsi
+	call __ipipe_divert_exception   /* handle(regs, ex_code) */
+	TRACE_IRQS_OFF
+	testl %eax,%eax
+	jnz 1f
+	movq %rsp,%rdi
+	.endif
+#else
+	TRACE_IRQS_OFF
+#endif
 	xorl %esi,%esi		/* no error code */
 	call \do_sym
+#ifdef CONFIG_IPIPE
+	xorl %eax,%eax		/* tell paranoid_exit to propagate the exception */
+1:
+#endif
 	jmp paranoid_exit	/* %ebx: no swapgs flag */
 	CFI_ENDPROC
 END(\sym)
 .endm
 
 #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
-.macro paranoidzeroentry_ist sym do_sym ist
+.macro paranoidzeroentry_ist sym do_sym ist ex_code=0
 ENTRY(\sym)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1058,16 +1196,29 @@ ENTRY(\sym)
 	call save_paranoid
 	TRACE_IRQS_OFF
 	movq %rsp,%rdi		/* pt_regs pointer */
+#ifdef CONFIG_IPIPE
+	.if \ex_code
+	movq $\ex_code,%rsi
+	call __ipipe_divert_exception   /* handle(regs, ex_code) */
+	testl %eax,%eax
+	jnz 1f
+	movq %rsp,%rdi
+	.endif
+#endif
 	xorl %esi,%esi		/* no error code */
 	subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
 	call \do_sym
 	addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
+#ifdef CONFIG_IPIPE
+	xorl %eax,%eax		/* tell paranoid_exit to propagate the exception */
+1:
+#endif
 	jmp paranoid_exit	/* %ebx: no swapgs flag */
 	CFI_ENDPROC
 END(\sym)
 .endm
 
-.macro errorentry sym do_sym
+.macro errorentry sym do_sym ex_code
 ENTRY(\sym)
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1078,14 +1229,29 @@ ENTRY(\sym)
 	movq %rsp,%rdi			/* pt_regs pointer */
 	movq ORIG_RAX(%rsp),%rsi	/* get error code */
 	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
+#ifdef CONFIG_IPIPE
+	movq $\ex_code,%rdx
+	call __ipipe_handle_exception   /* handle(regs, error_code, ex_code) */
+	TRACE_IRQS_OFF
+	testl %eax, %eax
+	jz error_exit
+	movl %ebx,%eax
+	RESTORE_REST
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	testl %eax,%eax
+	jne retint_noswapgs
+	jmp retint_swapgs_notrace
+#else /* !CONFIG_IPIPE */
+	TRACE_IRQS_OFF
 	call \do_sym
+#endif /* !CONFIG_IPIPE */
 	jmp error_exit			/* %ebx: no swapgs flag */
 	CFI_ENDPROC
 END(\sym)
 .endm
 
 	/* error code is on the stack already */
-.macro paranoiderrorentry sym do_sym
+.macro paranoiderrorentry sym do_sym ex_code=0
 ENTRY(\sym)
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1095,27 +1261,40 @@ ENTRY(\sym)
 	DEFAULT_FRAME 0
 	TRACE_IRQS_OFF
 	movq %rsp,%rdi			/* pt_regs pointer */
+#ifdef CONFIG_IPIPE
+	.if \ex_code
+	movq $\ex_code,%rsi
+	call __ipipe_divert_exception   /* handle(regs, ex_code) */
+	testl %eax,%eax
+	jnz 1f
+	movq %rsp,%rdi
+	.endif
+#endif
 	movq ORIG_RAX(%rsp),%rsi	/* get error code */
 	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
 	call \do_sym
+#ifdef CONFIG_IPIPE
+	xorl %eax,%eax			/* tell paranoid_exit to propagate the exception */
+1:
+#endif
 	jmp paranoid_exit		/* %ebx: no swapgs flag */
 	CFI_ENDPROC
 END(\sym)
 .endm
 
-zeroentry divide_error do_divide_error
-zeroentry overflow do_overflow
-zeroentry bounds do_bounds
-zeroentry invalid_op do_invalid_op
-zeroentry device_not_available do_device_not_available
+zeroentry divide_error do_divide_error ex_do_divide_error
+zeroentry overflow do_overflow ex_do_overflow
+zeroentry bounds do_bounds ex_do_bounds
+zeroentry invalid_op do_invalid_op ex_do_invalid_op
+zeroentry device_not_available do_device_not_available ex_do_device_not_available
 paranoiderrorentry double_fault do_double_fault
-zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
-errorentry invalid_TSS do_invalid_TSS
-errorentry segment_not_present do_segment_not_present
-zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
-zeroentry coprocessor_error do_coprocessor_error
-errorentry alignment_check do_alignment_check
-zeroentry simd_coprocessor_error do_simd_coprocessor_error
+zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun ex_do_coprocessor_segment_overrun
+errorentry invalid_TSS do_invalid_TSS ex_do_invalid_TSS
+errorentry segment_not_present do_segment_not_present ex_do_segment_not_present
+zeroentry spurious_interrupt_bug do_spurious_interrupt_bug ex_do_spurious_interrupt_bug
+zeroentry coprocessor_error do_coprocessor_error ex_do_coprocessor_error
+errorentry alignment_check do_alignment_check ex_do_alignment_check
+zeroentry simd_coprocessor_error do_simd_coprocessor_error ex_do_simd_coprocessor_error
 
 	/* Reload gs selector with exception handling */
 	/* edi:  new selector */
@@ -1199,15 +1378,19 @@ ENTRY(call_softirq)
 	CFI_REL_OFFSET rbp,0
 	mov  %rsp,%rbp
 	CFI_DEF_CFA_REGISTER rbp
+	DISABLE_INTERRUPTS_HW_COND
 	incl PER_CPU_VAR(irq_count)
 	cmove PER_CPU_VAR(irq_stack_ptr),%rsp
+	ENABLE_INTERRUPTS_HW_COND
 	push  %rbp			# backlink for old unwinder
 	call __do_softirq
+	DISABLE_INTERRUPTS_HW_COND
 	leaveq
 	CFI_RESTORE		rbp
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_ADJUST_CFA_OFFSET   -8
 	decl PER_CPU_VAR(irq_count)
+	ENABLE_INTERRUPTS_HW_COND
 	ret
 	CFI_ENDPROC
 END(call_softirq)
@@ -1319,16 +1502,16 @@ apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
  */
 	.pushsection .kprobes.text, "ax"
 
-paranoidzeroentry_ist debug do_debug DEBUG_STACK
-paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
+paranoidzeroentry_ist debug do_debug DEBUG_STACK ex_do_debug
+paranoidzeroentry_ist int3 do_int3 DEBUG_STACK ex_do_int3
 paranoiderrorentry stack_segment do_stack_segment
 #ifdef CONFIG_XEN
 zeroentry xen_debug do_debug
 zeroentry xen_int3 do_int3
 errorentry xen_stack_segment do_stack_segment
 #endif
-errorentry general_protection do_general_protection
-errorentry page_fault do_page_fault
+errorentry general_protection do_general_protection ex_do_general_protection
+errorentry page_fault do_page_fault ex_do_page_fault
 #ifdef CONFIG_KVM_GUEST
 errorentry async_page_fault do_async_page_fault
 #endif
@@ -1354,8 +1537,13 @@ ENTRY(paranoid_exit)
 	DEFAULT_FRAME
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
+paranoid_notrace:
 	testl %ebx,%ebx				/* swapgs needed? */
 	jnz paranoid_restore
+#ifdef CONFIG_IPIPE
+	testl %eax,%eax
+	jnz paranoid_swapgs
+#endif
 	testl $3,CS(%rsp)
 	jnz   paranoid_userspace
 paranoid_swapgs:
@@ -1426,7 +1614,6 @@ ENTRY(error_entry)
 error_swapgs:
 	SWAPGS
 error_sti:
-	TRACE_IRQS_OFF
 	ret
 
 /*
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 2dfd315..3253978 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -11,6 +11,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/ipipe.h>
 
 #include <asm/i8253.h>
 #include <asm/hpet.h>
@@ -130,6 +131,12 @@ static cycle_t pit_read(struct clocksource *cs)
 	int count;
 	u32 jifs;
 
+#ifdef CONFIG_IPIPE
+	if (!__ipipe_pipeline_head_p(ipipe_root_domain))
+		/* We don't really own the PIT. */
+		return (cycle_t)(jiffies * LATCH) + (LATCH - 1) - old_count;
+#endif /* CONFIG_IPIPE */
+
 	raw_spin_lock_irqsave(&i8253_lock, flags);
 	/*
 	 * Although our caller may have the read side of xtime_lock,
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 20757cb..269c884 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -32,7 +32,7 @@
 static void init_8259A(int auto_eoi);
 
 static int i8259A_auto_eoi;
-DEFINE_RAW_SPINLOCK(i8259A_lock);
+IPIPE_DEFINE_RAW_SPINLOCK(i8259A_lock);
 
 /*
  * 8259A PIC functions to handle ISA devices:
@@ -60,6 +60,7 @@ static void mask_8259A_irq(unsigned int irq)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
+	ipipe_irq_lock(irq);
 	cached_irq_mask |= mask;
 	if (irq & 8)
 		outb(cached_slave_mask, PIC_SLAVE_IMR);
@@ -75,15 +76,18 @@ static void disable_8259A_irq(struct irq_data *data)
 
 static void unmask_8259A_irq(unsigned int irq)
 {
-	unsigned int mask = ~(1 << irq);
+	unsigned int mask = (1 << irq);
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask &= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
+	if (cached_irq_mask & mask) {
+		cached_irq_mask &= ~mask;
+		if (irq & 8)
+ 			outb(cached_slave_mask, PIC_SLAVE_IMR);
+		else
+			outb(cached_master_mask, PIC_MASTER_IMR);
+		ipipe_irq_unlock(irq);
+	}
 	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
@@ -170,6 +174,18 @@ static void mask_and_ack_8259A(struct irq_data *data)
 	 */
 	if (cached_irq_mask & irqmask)
 		goto spurious_8259A_irq;
+#ifdef CONFIG_IPIPE
+	if (irq == 0) {
+		/*
+		 * Fast timer ack -- don't mask (unless supposedly
+		 * spurious). We trace outb's in order to detect
+		 * broken hardware inducing large delays.
+		 */
+		outb(0x60, PIC_MASTER_CMD);	/* Specific EOI to master. */
+		raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+		return;
+	}
+#endif /* CONFIG_IPIPE */
 	cached_irq_mask |= irqmask;
 
 handle_real_irq:
diff --git a/arch/x86/kernel/ipipe.c b/arch/x86/kernel/ipipe.c
new file mode 100644
index 0000000..b2e2a5b
--- /dev/null
+++ b/arch/x86/kernel/ipipe.c
@@ -0,0 +1,857 @@
+/*   -*- linux-c -*-
+ *   linux/arch/x86/kernel/ipipe.c
+ *
+ *   Copyright (C) 2002-2007 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Architecture-dependent I-PIPE support for x86.
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+#include <linux/clockchips.h>
+#include <linux/kprobes.h>
+#include <linux/ipipe_tickdev.h>
+#include <asm/unistd.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <asm/hw_irq.h>
+#include <asm/irq.h>
+#include <asm/desc.h>
+#include <asm/io.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/tlbflush.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_X86_IO_APIC
+#include <asm/io_apic.h>
+#endif	/* CONFIG_X86_IO_APIC */
+#include <asm/apic.h>
+#endif	/* CONFIG_X86_LOCAL_APIC */
+#include <asm/traps.h>
+#include <asm/tsc.h>
+
+int __ipipe_hrtimer_irq = 0;	/* Legacy timer */
+
+DEFINE_PER_CPU(struct pt_regs, __ipipe_tick_regs);
+
+DEFINE_PER_CPU(unsigned long, __ipipe_cr2);
+EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_cr2);
+
+/*
+ * ipipe_trigger_irq() -- Push the interrupt at front of the pipeline
+ * just like if it has been actually received from a hw source. Also
+ * works for virtual interrupts.
+ */
+int ipipe_trigger_irq(unsigned int irq)
+{
+	struct pt_regs regs;
+	unsigned long flags;
+
+#ifdef CONFIG_IPIPE_DEBUG
+	if (irq >= IPIPE_NR_IRQS)
+		return -EINVAL;
+	if (ipipe_virtual_irq_p(irq)) {
+		if (!test_bit(irq - IPIPE_VIRQ_BASE,
+			      &__ipipe_virtual_irq_map))
+			return -EINVAL;
+	} else if (irq_to_desc(irq) == NULL)
+		return -EINVAL;
+#endif
+	local_irq_save_hw(flags);
+	regs.flags = flags;
+	regs.orig_ax = irq;	/* Positive value - IRQ won't be acked */
+	regs.cs = __KERNEL_CS;
+	__ipipe_handle_irq(&regs);
+	local_irq_restore_hw(flags);
+
+	return 1;
+}
+
+int ipipe_get_sysinfo(struct ipipe_sysinfo *info)
+{
+	info->sys_nr_cpus = num_online_cpus();
+	info->sys_cpu_freq = __ipipe_cpu_freq;
+	info->sys_hrtimer_irq = __ipipe_hrtimer_irq;
+	info->sys_hrtimer_freq = __ipipe_hrtimer_freq;
+	info->sys_hrclock_freq = __ipipe_hrclock_freq;
+
+	return 0;
+}
+
+#ifdef CONFIG_X86_UV
+asmlinkage void uv_bau_message_interrupt(struct pt_regs *regs);
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
+asmlinkage void smp_threshold_interrupt(void);
+#endif
+#ifdef CONFIG_X86_NEW_MCE
+asmlinkage void smp_mce_self_interrupt(void);
+#endif
+
+static void __ipipe_ack_irq(unsigned irq, struct irq_desc *desc)
+{
+	desc->ipipe_ack(irq, desc);
+}
+
+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq)
+{
+	irq_to_desc(irq)->status &= ~IRQ_DISABLED;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+static void __ipipe_noack_apic(unsigned irq, struct irq_desc *desc)
+{
+}
+
+static void __ipipe_ack_apic(unsigned irq, struct irq_desc *desc)
+{
+	__ack_APIC_irq();
+}
+
+static void __ipipe_null_handler(unsigned irq, void *cookie)
+{
+}
+
+void ipipe_init_vector_irq(int cpu)
+{
+	unsigned int vector;
+
+	per_cpu(vector_irq, cpu)[IRQ_MOVE_CLEANUP_VECTOR] =
+		IRQ_MOVE_CLEANUP_VECTOR;
+
+	for (vector = first_system_vector; vector < NR_VECTORS; vector++)
+		if (per_cpu(vector_irq, cpu)[vector] == -1)
+			per_cpu(vector_irq, cpu)[vector] =
+				ipipe_apic_vector_irq(vector);
+}
+
+#endif	/* CONFIG_X86_LOCAL_APIC */
+
+/* __ipipe_enable_pipeline() -- We are running on the boot CPU, hw
+   interrupts are off, and secondary CPUs are still lost in space. */
+
+void __init __ipipe_enable_pipeline(void)
+{
+	unsigned int vector, irq;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+	/* Map the APIC system vectors. */
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR),
+			     (ipipe_irq_handler_t)&smp_apic_timer_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR),
+			     (ipipe_irq_handler_t)&smp_spurious_interrupt,
+			     NULL,
+			     &__ipipe_noack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(ERROR_APIC_VECTOR),
+			     (ipipe_irq_handler_t)&smp_error_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(THERMAL_APIC_VECTOR),
+			     (ipipe_irq_handler_t)&smp_thermal_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+#endif /* CONFIG_X86_THERMAL_VECTOR */
+
+#ifdef CONFIG_X86_MCE_THRESHOLD
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(THRESHOLD_APIC_VECTOR),
+			     (ipipe_irq_handler_t)&smp_threshold_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+#endif /* CONFIG_X86_MCE_THRESHOLD */
+
+#ifdef CONFIG_X86_NEW_MCE
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(MCE_SELF_VECTOR),
+			     (ipipe_irq_handler_t)&smp_mce_self_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+#endif /* CONFIG_X86_MCE_THRESHOLD */
+
+#ifdef CONFIG_X86_UV
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(UV_BAU_MESSAGE),
+			     (ipipe_irq_handler_t)&uv_bau_message_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+#endif /* CONFIG_X86_UV */
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(X86_PLATFORM_IPI_VECTOR),
+			     (ipipe_irq_handler_t)&smp_x86_platform_ipi,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+#ifdef CONFIG_IRQ_WORK
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IRQ_WORK_VECTOR),
+			     (ipipe_irq_handler_t)&irq_work_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+#endif /* CONFIG_IRQ_WORK */
+
+#endif	/* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_SMP
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(RESCHEDULE_VECTOR),
+			     (ipipe_irq_handler_t)&smp_reschedule_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	for (vector = INVALIDATE_TLB_VECTOR_START;
+	     vector <= INVALIDATE_TLB_VECTOR_END; ++vector)
+		ipipe_virtualize_irq(ipipe_root_domain,
+				     ipipe_apic_vector_irq(vector),
+				     (ipipe_irq_handler_t)&smp_invalidate_interrupt,
+				     NULL,
+				     &__ipipe_ack_apic,
+				     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR),
+			     (ipipe_irq_handler_t)&smp_call_function_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(CALL_FUNCTION_SINGLE_VECTOR),
+			     (ipipe_irq_handler_t)&smp_call_function_single_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     IRQ_MOVE_CLEANUP_VECTOR,
+			     (ipipe_irq_handler_t)&smp_irq_move_cleanup_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(REBOOT_VECTOR),
+			     (ipipe_irq_handler_t)&smp_reboot_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+#else
+	(void)vector;
+#endif	/* CONFIG_SMP */
+
+	/* Finally, virtualize the remaining ISA and IO-APIC
+	 * interrupts. Interrupts which have already been virtualized
+	 * will just beget a silent -EPERM error since
+	 * IPIPE_SYSTEM_MASK has been passed for them, that's ok. */
+
+	for (irq = 0; irq < NR_IRQS; irq++)
+		/*
+		 * Fails for IPIPE_CRITICAL_IPI and IRQ_MOVE_CLEANUP_VECTOR,
+		 * but that's ok.
+		 */
+		ipipe_virtualize_irq(ipipe_root_domain,
+				     irq,
+				     (ipipe_irq_handler_t)&do_IRQ,
+				     NULL,
+				     &__ipipe_ack_irq,
+				     IPIPE_STDROOT_MASK);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	/* Eventually allow these vectors to be reprogrammed. */
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI0].control &= ~IPIPE_SYSTEM_MASK;
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI1].control &= ~IPIPE_SYSTEM_MASK;
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI2].control &= ~IPIPE_SYSTEM_MASK;
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI3].control &= ~IPIPE_SYSTEM_MASK;
+#endif	/* CONFIG_X86_LOCAL_APIC */
+}
+
+#ifdef CONFIG_SMP
+
+cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask)
+{
+	cpumask_t oldmask;
+
+	if (irq_to_desc(irq)->chip->set_affinity == NULL)
+		return CPU_MASK_NONE;
+
+	if (cpus_empty(cpumask))
+		return CPU_MASK_NONE; /* Return mask value -- no change. */
+
+	cpus_and(cpumask, cpumask, cpu_online_map);
+	if (cpus_empty(cpumask))
+		return CPU_MASK_NONE;	/* Error -- bad mask value or non-routable IRQ. */
+
+	cpumask_copy(&oldmask, irq_to_desc(irq)->affinity);
+	irq_to_desc(irq)->chip->set_affinity(irq, &cpumask);
+
+	return oldmask;
+}
+
+int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask)
+{
+	unsigned long flags;
+	int self;
+
+	local_irq_save_hw(flags);
+
+	self = cpu_isset(ipipe_processor_id(),cpumask);
+	cpu_clear(ipipe_processor_id(), cpumask);
+
+	if (!cpus_empty(cpumask))
+		apic->send_IPI_mask(&cpumask, ipipe_apic_irq_vector(ipi));
+
+	if (self)
+		ipipe_trigger_irq(ipi);
+
+	local_irq_restore_hw(flags);
+
+	return 0;
+}
+
+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd)
+{
+	ipd->irqs[IPIPE_CRITICAL_IPI].acknowledge = &__ipipe_ack_apic;
+	ipd->irqs[IPIPE_CRITICAL_IPI].handler = &__ipipe_do_critical_sync;
+	ipd->irqs[IPIPE_CRITICAL_IPI].cookie = NULL;
+	/* Immediately handle in the current domain but *never* pass */
+	ipd->irqs[IPIPE_CRITICAL_IPI].control =
+		IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK|IPIPE_SYSTEM_MASK;
+}
+
+#endif	/* CONFIG_SMP */
+
+static inline void __fixup_if(int s, struct pt_regs *regs)
+{
+	/*
+	 * Have the saved hw state look like the domain stall bit, so
+	 * that __ipipe_unstall_iret_root() restores the proper
+	 * pipeline state for the root stage upon exit.
+	 */
+	if (s)
+		regs->flags &= ~X86_EFLAGS_IF;
+	else
+		regs->flags |= X86_EFLAGS_IF;
+}
+
+void __ipipe_halt_root(void)
+{
+	struct ipipe_percpu_domain_data *p;
+
+	/* Emulate sti+hlt sequence over the root domain. */
+
+	local_irq_disable_hw();
+
+	p = ipipe_root_cpudom_ptr();
+
+	trace_hardirqs_on();
+	__clear_bit(IPIPE_STALL_FLAG, &p->status);
+
+	if (unlikely(__ipipe_ipending_p(p))) {
+		__ipipe_sync_pipeline();
+		local_irq_enable_hw();
+	} else {
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+		ipipe_trace_end(0x8000000E);
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+		asm volatile("sti; hlt": : :"memory");
+	}
+}
+
+static void do_machine_check_vector(struct pt_regs *regs, long error_code)
+{
+#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_32
+	extern void (*machine_check_vector)(struct pt_regs *, long error_code);
+	machine_check_vector(regs, error_code);
+#else
+	do_machine_check(regs, error_code);
+#endif
+#endif /* CONFIG_X86_MCE */
+}
+
+/* Work around genksyms's issue with over-qualification in decls. */
+
+typedef void dotraplinkage __ipipe_exhandler(struct pt_regs *, long);
+
+typedef __ipipe_exhandler *__ipipe_exptr;
+
+static __ipipe_exptr __ipipe_std_extable[] = {
+
+	[ex_do_divide_error] = &do_divide_error,
+	[ex_do_overflow] = &do_overflow,
+	[ex_do_bounds] = &do_bounds,
+	[ex_do_invalid_op] = &do_invalid_op,
+	[ex_do_coprocessor_segment_overrun] = &do_coprocessor_segment_overrun,
+	[ex_do_invalid_TSS] = &do_invalid_TSS,
+	[ex_do_segment_not_present] = &do_segment_not_present,
+	[ex_do_stack_segment] = &do_stack_segment,
+	[ex_do_general_protection] = do_general_protection,
+	[ex_do_page_fault] = (__ipipe_exptr)&do_page_fault,
+	[ex_do_spurious_interrupt_bug] = &do_spurious_interrupt_bug,
+	[ex_do_coprocessor_error] = &do_coprocessor_error,
+	[ex_do_alignment_check] = &do_alignment_check,
+	[ex_machine_check_vector] = &do_machine_check_vector,
+	[ex_do_simd_coprocessor_error] = &do_simd_coprocessor_error,
+	[ex_do_device_not_available] = &do_device_not_available,
+#ifdef CONFIG_X86_32
+	[ex_do_iret_error] = &do_iret_error,
+#endif
+};
+
+#ifdef CONFIG_KGDB
+#include <linux/kgdb.h>
+
+static int __ipipe_xlate_signo[] = {
+
+	[ex_do_divide_error] = SIGFPE,
+	[ex_do_debug] = SIGTRAP,
+	[2] = -1,
+	[ex_do_int3] = SIGTRAP,
+	[ex_do_overflow] = SIGSEGV,
+	[ex_do_bounds] = SIGSEGV,
+	[ex_do_invalid_op] = SIGILL,
+	[ex_do_device_not_available] = -1,
+	[8] = -1,
+	[ex_do_coprocessor_segment_overrun] = SIGFPE,
+	[ex_do_invalid_TSS] = SIGSEGV,
+	[ex_do_segment_not_present] = SIGBUS,
+	[ex_do_stack_segment] = SIGBUS,
+	[ex_do_general_protection] = SIGSEGV,
+	[ex_do_page_fault] = SIGSEGV,
+	[ex_do_spurious_interrupt_bug] = -1,
+	[ex_do_coprocessor_error] = -1,
+	[ex_do_alignment_check] = SIGBUS,
+	[ex_machine_check_vector] = -1,
+	[ex_do_simd_coprocessor_error] = -1,
+	[20 ... 31] = -1,
+#ifdef CONFIG_X86_32
+	[ex_do_iret_error] = SIGSEGV,
+#endif
+};
+#endif /* CONFIG_KGDB */
+
+int __ipipe_handle_exception(struct pt_regs *regs, long error_code, int vector)
+{
+	bool root_entry = false;
+	unsigned long flags = 0;
+	unsigned long cr2 = 0;
+
+	if (ipipe_root_domain_p) {
+		root_entry = true;
+
+		local_save_flags(flags);
+		/*
+		 * Replicate hw interrupt state into the virtual mask
+		 * before calling the I-pipe event handler over the
+		 * root domain. Also required later when calling the
+		 * Linux exception handler.
+		 */
+		if (irqs_disabled_hw())
+			local_irq_disable();
+	}
+#ifdef CONFIG_KGDB
+	/* catch exception KGDB is interested in over non-root domains */
+	else if (__ipipe_xlate_signo[vector] >= 0 &&
+		 !kgdb_handle_exception(vector, __ipipe_xlate_signo[vector],
+					error_code, regs))
+		return 1;
+#endif /* CONFIG_KGDB */
+
+	if (vector == ex_do_page_fault)
+		cr2 = native_read_cr2();
+
+	if (unlikely(ipipe_trap_notify(vector, regs))) {
+		if (root_entry)
+			local_irq_restore_nosync(flags);
+		return 1;
+	}
+
+	if (likely(ipipe_root_domain_p)) {
+		/*
+		 * If root is not the topmost domain or in case we faulted in
+		 * the iret path of x86-32, regs.flags does not match the root
+		 * domain state. The fault handler or the low-level return
+		 * code may evaluate it. So fix this up, either by the root
+		 * state sampled on entry or, if we migrated to root, with the
+		 * current state.
+		 */
+		__fixup_if(root_entry ? raw_irqs_disabled_flags(flags) :
+					raw_irqs_disabled(), regs);
+	} else {
+		/* Detect unhandled faults over non-root domains. */
+		struct ipipe_domain *ipd = ipipe_current_domain;
+
+		/* Switch to root so that Linux can handle the fault cleanly. */
+		__ipipe_current_domain = ipipe_root_domain;
+
+		ipipe_trace_panic_freeze();
+
+		/* Always warn about user land and unfixable faults. */
+		if (user_mode_vm(regs) ||
+		    !search_exception_tables(instruction_pointer(regs))) {
+			printk(KERN_ERR "BUG: Unhandled exception over domain"
+			       " %s at 0x%lx - switching to ROOT\n",
+			       ipd->name, instruction_pointer(regs));
+			dump_stack();
+			ipipe_trace_panic_dump();
+#ifdef CONFIG_IPIPE_DEBUG
+		/* Also report fixable ones when debugging is enabled. */
+		} else {
+			printk(KERN_WARNING "WARNING: Fixable exception over "
+			       "domain %s at 0x%lx - switching to ROOT\n",
+			       ipd->name, instruction_pointer(regs));
+			dump_stack();
+			ipipe_trace_panic_dump();
+#endif /* CONFIG_IPIPE_DEBUG */
+		}
+	}
+
+	if (vector == ex_do_page_fault)
+		write_cr2(cr2);
+
+	__ipipe_std_extable[vector](regs, error_code);
+
+	/*
+	 * Relevant for 64-bit: Restore root domain state as the low-level
+	 * return code will not align it to regs.flags.
+	 */
+	if (root_entry)
+		local_irq_restore_nosync(flags);
+
+	return 0;
+}
+
+int __ipipe_divert_exception(struct pt_regs *regs, int vector)
+{
+	bool root_entry = false;
+	unsigned long flags = 0;
+
+	if (ipipe_root_domain_p) {
+		root_entry = true;
+
+		local_save_flags(flags);
+
+		if (irqs_disabled_hw()) {
+			/*
+			 * Same root state handling as in
+			 * __ipipe_handle_exception.
+			 */
+			local_irq_disable();
+		}
+	}
+#ifdef CONFIG_KGDB
+	/* catch int1 and int3 over non-root domains */
+	else {
+#ifdef CONFIG_X86_32
+		if (vector != ex_do_device_not_available)
+#endif
+		{
+			unsigned int condition = 0;
+
+			if (vector == 1)
+				get_debugreg(condition, 6);
+			if (!kgdb_handle_exception(vector, SIGTRAP, condition, regs))
+				return 1;
+		}
+	}
+#endif /* CONFIG_KGDB */
+
+	if (unlikely(ipipe_trap_notify(vector, regs))) {
+		if (root_entry)
+			local_irq_restore_nosync(flags);
+		return 1;
+	}
+
+	/* see __ipipe_handle_exception */
+	if (likely(ipipe_root_domain_p))
+		__fixup_if(root_entry ? raw_irqs_disabled_flags(flags) :
+					raw_irqs_disabled(), regs);
+	/*
+	 * No need to restore root state in the 64-bit case, the Linux handler
+	 * and the return code will take care of it.
+	 */
+
+	return 0;
+}
+
+int __ipipe_syscall_root(struct pt_regs *regs)
+{
+	unsigned long flags;
+	int ret;
+
+	/*
+	 * This routine either returns:
+	 * 0 -- if the syscall is to be passed to Linux;
+	 * >0 -- if the syscall should not be passed to Linux, and no
+	 * tail work should be performed;
+	 * <0 -- if the syscall should not be passed to Linux but the
+	 * tail work has to be performed (for handling signals etc).
+	 */
+
+	if (!__ipipe_syscall_watched_p(current, regs->orig_ax) ||
+	    !__ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL))
+		return 0;
+
+	ret = __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL, regs);
+
+	local_irq_save_hw(flags);
+
+	if (current->ipipe_flags & PF_EVTRET) {
+		current->ipipe_flags &= ~PF_EVTRET;
+		__ipipe_dispatch_event(IPIPE_EVENT_RETURN, regs);
+	}
+
+	if (!ipipe_root_domain_p)
+		return 1;
+
+	/*
+	 * If allowed, sync pending VIRQs before _TIF_NEED_RESCHED is
+	 * tested.
+	 */
+	if (__ipipe_ipending_p(ipipe_root_cpudom_ptr()))
+		__ipipe_sync_pipeline();
+
+	if (!ret)
+		local_irq_restore_hw(flags);
+
+	return -ret;
+}
+
+/*
+ * __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic
+ * interrupt protection log is maintained here for each domain.	 Hw
+ * interrupts are off on entry.
+ */
+int __ipipe_handle_irq(struct pt_regs *regs)
+{
+	struct ipipe_domain *this_domain, *next_domain;
+	int irq, vector = regs->orig_ax;
+	struct list_head *head, *pos;
+	struct pt_regs *tick_regs;
+	int m_ack;
+
+	if (vector < 0) {
+		irq = __get_cpu_var(vector_irq)[~vector];
+		BUG_ON(irq < 0);
+		m_ack = 0;
+	} else { /* This is a self-triggered one. */
+		irq = vector;
+		m_ack = 1;
+	}
+
+	this_domain = ipipe_current_domain;
+
+	if (test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control))
+		head = &this_domain->p_link;
+	else {
+		head = __ipipe_pipeline.next;
+		next_domain = list_entry(head, struct ipipe_domain, p_link);
+		if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))) {
+			if (!m_ack && next_domain->irqs[irq].acknowledge)
+				next_domain->irqs[irq].acknowledge(irq, irq_to_desc(irq));
+			__ipipe_dispatch_wired(next_domain, irq);
+			goto finalize_nosync;
+		}
+	}
+
+	/* Ack the interrupt. */
+
+	pos = head;
+
+	while (pos != &__ipipe_pipeline) {
+		next_domain = list_entry(pos, struct ipipe_domain, p_link);
+		if (test_bit(IPIPE_HANDLE_FLAG, &next_domain->irqs[irq].control)) {
+			__ipipe_set_irq_pending(next_domain, irq);
+			if (!m_ack && next_domain->irqs[irq].acknowledge) {
+				next_domain->irqs[irq].acknowledge(irq, irq_to_desc(irq));
+				m_ack = 1;
+			}
+		}
+		if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control))
+			break;
+		pos = next_domain->p_link.next;
+	}
+
+	/*
+	 * If the interrupt preempted the head domain, then do not
+	 * even try to walk the pipeline, unless an interrupt is
+	 * pending for it.
+	 */
+	if (test_bit(IPIPE_AHEAD_FLAG, &this_domain->flags) &&
+	    !__ipipe_ipending_p(ipipe_head_cpudom_ptr()))
+		goto finalize_nosync;
+
+	/*
+	 * Now walk the pipeline, yielding control to the highest
+	 * priority domain that has pending interrupt(s) or
+	 * immediately to the current domain if the interrupt has been
+	 * marked as 'sticky'. This search does not go beyond the
+	 * current domain in the pipeline.
+	 */
+
+	__ipipe_walk_pipeline(head);
+
+finalize_nosync:
+
+	/*
+	 * Given our deferred dispatching model for regular IRQs, we
+	 * only record CPU regs for the last timer interrupt, so that
+	 * the timer handler charges CPU times properly. It is assumed
+	 * that other interrupt handlers don't actually care for such
+	 * information.
+	 */
+
+	if (irq == __ipipe_hrtimer_irq) {
+		tick_regs = &__raw_get_cpu_var(__ipipe_tick_regs);
+		tick_regs->flags = regs->flags;
+		tick_regs->cs = regs->cs;
+		tick_regs->ip = regs->ip;
+		tick_regs->bp = regs->bp;
+#ifdef CONFIG_X86_64
+		tick_regs->ss = regs->ss;
+		tick_regs->sp = regs->sp;
+#endif
+		if (!__ipipe_root_domain_p)
+			tick_regs->flags &= ~X86_EFLAGS_IF;
+	}
+
+	if (user_mode(regs) && (current->ipipe_flags & PF_EVTRET) != 0) {
+		current->ipipe_flags &= ~PF_EVTRET;
+		__ipipe_dispatch_event(IPIPE_EVENT_RETURN, regs);
+	}
+
+	if (!__ipipe_root_domain_p ||
+	    test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
+		return 0;
+
+	return 1;
+}
+
+int __ipipe_check_tickdev(const char *devname)
+{
+	int ret = 1;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (!strcmp(devname, "lapic")) {
+		ret = __ipipe_check_lapic();
+		if (ret)
+			return ret;
+		printk(KERN_INFO "I-pipe: cannot use LAPIC as a tick device\n");
+		if (cpu_has_amd_erratum(amd_erratum_400))
+			printk(KERN_INFO "I-pipe: disable C1E power state in your BIOS\n");
+	}
+#endif
+
+	return ret;
+}
+
+#ifdef CONFIG_X86_32
+void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+		       struct clocksource *clock, u32 mult)
+{
+       if (clock == &clocksource_tsc)
+	       ipipe_update_hostrt(wall_time, clock);
+}
+
+void update_vsyscall_tz(void)
+{
+}
+#endif /* CONFIG_X86_32 */
+
+EXPORT_SYMBOL_GPL(__ipipe_hrtimer_irq);
+
+#ifdef CONFIG_SPARSE_IRQ
+EXPORT_SYMBOL_GPL(irq_to_desc);
+#endif
+struct task_struct *__switch_to(struct task_struct *prev_p,
+				struct task_struct *next_p);
+EXPORT_SYMBOL_GPL(__switch_to);
+EXPORT_SYMBOL_GPL(show_stack);
+
+EXPORT_PER_CPU_SYMBOL_GPL(init_tss);
+#ifdef CONFIG_SMP
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_tlbstate);
+#endif /* CONFIG_SMP */
+
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+EXPORT_SYMBOL(tasklist_lock);
+#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */
+
+#if defined(CONFIG_CC_STACKPROTECTOR) && defined(CONFIG_X86_64)
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
+#endif
+
+EXPORT_SYMBOL(__ipipe_halt_root);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 387b6a0..386fa0c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -39,7 +39,7 @@ void ack_bad_irq(unsigned int irq)
 	 * completely.
 	 * But only ack when the APIC is enabled -AK
 	 */
-	ack_APIC_irq();
+	__ack_APIC_irq();
 }
 
 #define irq_stats(x)		(&per_cpu(irq_stat, x))
@@ -232,11 +232,12 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 	unsigned vector = ~regs->orig_ax;
 	unsigned irq;
 
+	irq = __this_cpu_read(vector_irq[vector]);
+	__ipipe_move_root_irq(irq);
+
 	exit_idle();
 	irq_enter();
 
-	irq = __this_cpu_read(vector_irq[vector]);
-
 	if (!handle_irq(irq, regs)) {
 		ack_APIC_irq();
 
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index c752e97..8aff110 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -167,11 +167,13 @@ static void __init smp_intr_init(void)
 	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
 	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
 	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+#ifndef CONFIG_IPIPE
 	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
 	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
 	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
 	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
 	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
+#endif
 
 	/* IPI for generic function call */
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
@@ -186,6 +188,10 @@ static void __init smp_intr_init(void)
 
 	/* IPI used for rebooting/stopping */
 	alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32)
+	/* IPI for critical lock */
+	alloc_intr_gate(IPIPE_CRITICAL_VECTOR, ipipe_ipiX);
+#endif
 #endif
 #endif /* CONFIG_SMP */
 }
@@ -220,6 +226,13 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
 # endif
 
+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32)
+	alloc_intr_gate(IPIPE_SERVICE_VECTOR0, ipipe_ipi0);
+	alloc_intr_gate(IPIPE_SERVICE_VECTOR1, ipipe_ipi1);
+	alloc_intr_gate(IPIPE_SERVICE_VECTOR2, ipipe_ipi2);
+	alloc_intr_gate(IPIPE_SERVICE_VECTOR3, ipipe_ipi3);
+#endif
+	ipipe_init_vector_irq(0);
 #endif
 }
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ff45541..77a6ee4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -37,6 +37,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 		if (ret)
 			return ret;
 		fpu_copy(&dst->thread.fpu, &src->thread.fpu);
+	} else {
+#ifdef CONFIG_IPIPE
+		/* unconditionally allocate, RT domain may need it */
+		memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
+		ret = fpu_alloc(&dst->thread.fpu);
+		if (ret)
+			return ret;
+#endif
 	}
 	return 0;
 }
@@ -58,6 +66,10 @@ void arch_task_cache_init(void)
         	kmem_cache_create("task_xstate", xstate_size,
 				  __alignof__(union thread_xstate),
 				  SLAB_PANIC | SLAB_NOTRACK, NULL);
+#ifdef CONFIG_IPIPE
+	memset(&current->thread.fpu, 0, sizeof(current->thread.fpu));
+	fpu_alloc(&current->thread.fpu);
+#endif
 }
 
 /*
@@ -406,7 +418,7 @@ EXPORT_SYMBOL(default_idle);
 
 void stop_this_cpu(void *dummy)
 {
-	local_irq_disable();
+	local_irq_disable_hw();
 	/*
 	 * Remove this CPU:
 	 */
@@ -602,6 +614,11 @@ static void c1e_idle(void)
 
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
+#ifdef CONFIG_IPIPE
+#define default_to_mwait (boot_option_idle_override == IDLE_FORCE_MWAIT)
+#else
+#define default_to_mwait 1
+#endif
 #ifdef CONFIG_SMP
 	if (pm_idle == poll_idle && smp_num_siblings > 1) {
 		printk_once(KERN_WARNING "WARNING: polling idle and HT enabled,"
@@ -611,7 +628,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 	if (pm_idle)
 		return;
 
-	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
+	if (default_to_mwait && cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
 		/*
 		 * One CPU supports mwait => All CPUs supports mwait
 		 */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d12878..60a9773 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -252,10 +252,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	regs->cs		= __USER_CS;
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
+#ifndef CONFIG_IPIPE	/* Lazily handled, init_fpu() will reset the state. */
 	/*
 	 * Free the old FP and other extended state
 	 */
 	free_thread_xstate(current);
+#endif
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
@@ -292,7 +294,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
-	int cpu = smp_processor_id();
+	int cpu = raw_smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 	bool preload_fpu;
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bd387e8..87bfc16 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -56,6 +56,8 @@ asmlinkage extern void ret_from_fork(void);
 DEFINE_PER_CPU(unsigned long, old_rsp);
 static DEFINE_PER_CPU(unsigned char, is_idle);
 
+asmlinkage extern void thread_return(void);
+
 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
 
 void idle_notifier_register(struct notifier_block *n)
@@ -273,6 +275,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 	p->thread.sp = (unsigned long) childregs;
 	p->thread.sp0 = (unsigned long) (childregs+1);
 	p->thread.usersp = me->thread.usersp;
+ 	p->thread.rip = (unsigned long) thread_return;
 
 	set_tsk_thread_flag(p, TIF_FORK);
 
@@ -339,10 +342,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
 	regs->ss		= _ss;
 	regs->flags		= X86_EFLAGS_IF;
 	set_fs(USER_DS);
+#ifndef CONFIG_IPIPE	/* Lazily handled, init_fpu() will reset the state. */
 	/*
 	 * Free the old FP and other extended state
 	 */
 	free_thread_xstate(current);
+#endif
 }
 
 void
@@ -375,7 +380,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread;
 	struct thread_struct *next = &next_p->thread;
-	int cpu = smp_processor_id();
+	int cpu = raw_smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 	unsigned fsindex, gsindex;
 	bool preload_fpu;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f65e5b5..32d8f15 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -19,6 +19,7 @@
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/signal.h>
+#include <linux/unistd.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
 
@@ -1412,6 +1413,10 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
 {
 	bool step;
 
+#ifdef CONFIG_IPIPE
+	if (syscall_get_nr(current, regs) >= NR_syscalls)
+		return;
+#endif
 	if (unlikely(current->audit_context))
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 513deac..8021cf4 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -188,9 +188,9 @@ static void native_stop_other_cpus(int wait)
 			udelay(1);
 	}
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	disable_local_APIC();
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 /*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 08776a9..141f878 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -299,7 +299,7 @@ static void __cpuinit smp_callin(void)
 /*
  * Activate a secondary processor.
  */
-notrace static void __cpuinit start_secondary(void *unused)
+static void __cpuinit start_secondary(void *unused)
 {
 	/*
 	 * Don't put *anything* before cpu_init(), SMP booting is too
@@ -894,7 +894,7 @@ do_rest:
 int __cpuinit native_cpu_up(unsigned int cpu)
 {
 	int apicid = apic->cpu_present_to_apicid(cpu);
-	unsigned long flags;
+ 	unsigned long flags, _flags;
 	int err;
 
 	WARN_ON(irqs_disabled());
@@ -933,9 +933,9 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 	 * Check TSC synchronization with the AP (keep irqs disabled
 	 * while doing so):
 	 */
-	local_irq_save(flags);
+	local_irq_save_full(flags, _flags);
 	check_tsc_sync_source(cpu);
-	local_irq_restore(flags);
+	local_irq_restore_full(flags, _flags);
 
 	while (!cpu_online(cpu)) {
 		cpu_relax();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9b6716..efd5a6d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -730,6 +730,7 @@ void __math_state_restore(void)
 	 */
 	if (unlikely(restore_fpu_checking(tsk))) {
 		stts();
+		local_irq_enable_hw_cond();
 		force_sig(SIGSEGV, tsk);
 		return;
 	}
@@ -752,6 +753,7 @@ asmlinkage void math_state_restore(void)
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = thread->task;
+	unsigned long flags;
 
 	if (!tsk_used_math(tsk)) {
 		local_irq_enable();
@@ -768,9 +770,11 @@ asmlinkage void math_state_restore(void)
 		local_irq_disable();
 	}
 
+  	local_irq_save_hw_cond(flags);
 	clts();				/* Allow maths ops (or we recurse) */
 
 	__math_state_restore();
+ 	local_irq_restore_hw_cond(flags);
 }
 EXPORT_SYMBOL_GPL(math_state_restore);
 
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index ffe5755..1bd34e9 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -409,9 +409,9 @@ unsigned long native_calibrate_tsc(void)
 	unsigned long flags, latch, ms, fast_calibrate;
 	int hpet = is_hpet_enabled(), i, loopmin;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	fast_calibrate = quick_pit_calibrate();
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 	if (fast_calibrate)
 		return fast_calibrate;
 
@@ -454,11 +454,11 @@ unsigned long native_calibrate_tsc(void)
 		 * calibration, which will take at least 50ms, and
 		 * read the end value.
 		 */
-		local_irq_save(flags);
+		local_irq_save_hw(flags);
 		tsc1 = tsc_read_refs(&ref1, hpet);
 		tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
 		tsc2 = tsc_read_refs(&ref2, hpet);
-		local_irq_restore(flags);
+		local_irq_restore_hw(flags);
 
 		/* Pick the lowest PIT TSC calibration so far */
 		tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
@@ -741,7 +741,7 @@ core_initcall(cpufreq_tsc);
 
 /* clocksource code */
 
-static struct clocksource clocksource_tsc;
+struct clocksource clocksource_tsc;
 
 /*
  * We compare the TSC to the cycle_last value in the clocksource
@@ -787,7 +787,7 @@ static void resume_tsc(struct clocksource *cs)
 	clocksource_tsc.cycle_last = 0;
 }
 
-static struct clocksource clocksource_tsc = {
+struct clocksource clocksource_tsc = {
 	.name                   = "tsc",
 	.rating                 = 300,
 	.read                   = read_tsc,
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 863f875..6fe6883 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -148,12 +148,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
 		do_exit(SIGSEGV);
 	}
 
+ 	local_irq_disable_hw_cond();
 	tss = &per_cpu(init_tss, get_cpu());
 	current->thread.sp0 = current->thread.saved_sp0;
 	current->thread.sysenter_cs = __KERNEL_CS;
 	load_sp0(tss, &current->thread);
 	current->thread.saved_sp0 = 0;
 	put_cpu();
+ 	local_irq_enable_hw_cond();
 
 	ret = KVM86->regs32;
 
@@ -324,12 +326,14 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	tsk->thread.saved_fs = info->regs32->fs;
 	tsk->thread.saved_gs = get_user_gs(info->regs32);
 
+ 	local_irq_disable_hw_cond();
 	tss = &per_cpu(init_tss, get_cpu());
 	tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
 	if (cpu_has_sep)
 		tsk->thread.sysenter_cs = 0;
 	load_sp0(tss, &tsk->thread);
 	put_cpu();
+ 	local_irq_enable_hw_cond();
 
 	tsk->thread.screen_bitmap = info->screen_bitmap;
 	if (info->flags & VM86_SCREEN_BITMAP)
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dcbb28c..2ca4253 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -32,6 +32,7 @@
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/notifier.h>
+#include <linux/ipipe_tickdev.h>
 
 #include <asm/vsyscall.h>
 #include <asm/pgtable.h>
@@ -90,6 +91,9 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
 	vsyscall_gtod_data.wall_to_monotonic = *wtm;
 	vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+
+	if (clock == &clocksource_tsc)
+		ipipe_update_hostrt(wall_time, clock);
 }
 
 /* RED-PEN may want to readd seq locking, but then the variable should be
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d8a15a1..15482e6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3572,7 +3572,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	clgi();
 
-	local_irq_enable();
+	local_irq_enable_hw();
 
 	asm volatile (
 		"push %%"R"bp; \n\t"
@@ -3653,7 +3653,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	reload_tss(vcpu);
 
-	local_irq_disable();
+	local_irq_disable_hw();
 
 	stgi();
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bf89ec2..5ef8af9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -883,9 +883,11 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
 {
-	preempt_disable();
+	unsigned long flags;
+
+	ipipe_preempt_disable(flags);
 	__vmx_load_host_state(vmx);
-	preempt_enable();
+	ipipe_preempt_enable(flags);
 }
 
 /*
@@ -1097,6 +1099,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 	unsigned long *msr_bitmap;
 
 	vmx_load_host_state(vmx);
+	local_irq_disable_hw_cond();
 	save_nmsrs = 0;
 #ifdef CONFIG_X86_64
 	if (is_long_mode(&vmx->vcpu)) {
@@ -1126,6 +1129,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 		move_msr_up(vmx, index, save_nmsrs++);
 
 	vmx->save_nmsrs = save_nmsrs;
+	local_irq_enable_hw_cond();
 
 	if (cpu_has_vmx_msr_bitmap()) {
 		if (is_long_mode(&vmx->vcpu))
@@ -4171,7 +4175,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	vmx_vcpu_load(&vmx->vcpu, cpu);
 	vmx->vcpu.cpu = cpu;
 	err = vmx_vcpu_setup(vmx);
+	local_irq_disable_hw_cond();
 	vmx_vcpu_put(&vmx->vcpu);
+	local_irq_enable_hw_cond();
 	put_cpu();
 	if (err)
 		goto free_vmcs;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bcc0efc..edb12fe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -38,6 +38,7 @@
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
+#include <linux/ipipe.h>
 #include <linux/user-return-notifier.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
@@ -109,6 +110,7 @@ struct kvm_shared_msrs_global {
 struct kvm_shared_msrs {
 	struct user_return_notifier urn;
 	bool registered;
+	bool dirty;
 	struct kvm_shared_msr_values {
 		u64 host;
 		u64 curr;
@@ -163,22 +165,36 @@ static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
 		vcpu->arch.apf.gfns[i] = ~0;
 }
 
+static void kvm_restore_shared_msrs(struct kvm_shared_msrs *locals)
+{
+	struct kvm_shared_msr_values *values;
+	unsigned long flags;
+	unsigned int slot;
+
+	local_irq_save_hw_cond(flags);
+	if (locals->dirty) {
+		for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
+			values = &locals->values[slot];
+			if (values->host != values->curr) {
+				wrmsrl(shared_msrs_global.msrs[slot],
+				       values->host);
+				values->curr = values->host;
+			}
+		}
+		locals->dirty = false;
+	}
+	local_irq_restore_hw_cond(flags);
+}
+
 static void kvm_on_user_return(struct user_return_notifier *urn)
 {
-	unsigned slot;
 	struct kvm_shared_msrs *locals
 		= container_of(urn, struct kvm_shared_msrs, urn);
-	struct kvm_shared_msr_values *values;
 
-	for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
-		values = &locals->values[slot];
-		if (values->host != values->curr) {
-			wrmsrl(shared_msrs_global.msrs[slot], values->host);
-			values->curr = values->host;
-		}
-	}
+	kvm_restore_shared_msrs(locals);
 	locals->registered = false;
 	user_return_notifier_unregister(urn);
+	ipipe_unregister_root_preempt_handler();
 }
 
 static void shared_msr_update(unsigned slot, u32 msr)
@@ -224,6 +240,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 		return;
 	smsr->values[slot].curr = value;
 	wrmsrl(shared_msrs_global.msrs[slot], value);
+	smsr->dirty = true;
 	if (!smsr->registered) {
 		smsr->urn.on_user_return = kvm_on_user_return;
 		user_return_notifier_register(&smsr->urn);
@@ -2110,9 +2127,27 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
+	unsigned long flags;
+
+	local_irq_save_hw_cond(flags);
+
 	kvm_x86_ops->vcpu_put(vcpu);
 	kvm_put_guest_fpu(vcpu);
 	vcpu->arch.last_host_tsc = native_read_tsc();
+
+	if (!smsr->dirty)
+		ipipe_unregister_root_preempt_handler();
+	local_irq_restore_hw_cond(flags);
+}
+
+static void kvm_ipipe_root_preempt(void *cookie)
+{
+	struct kvm_vcpu *vcpu = cookie;
+
+	kvm_arch_vcpu_put(vcpu);
+	kvm_restore_shared_msrs(&__get_cpu_var(shared_msrs));
+	ipipe_unregister_root_preempt_handler();
 }
 
 static int is_efer_nx(void)
@@ -5207,6 +5242,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	preempt_disable();
+	local_irq_disable();
+	local_irq_disable_hw_cond();
+
+	ipipe_register_root_preempt_handler(kvm_ipipe_root_preempt, vcpu);
 
 	kvm_x86_ops->prepare_guest_switch(vcpu);
 	if (vcpu->fpu_active)
@@ -5216,12 +5255,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	atomic_set(&vcpu->guest_mode, 1);
 	smp_wmb();
 
-	local_irq_disable();
-
 	if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
 	    || need_resched() || signal_pending(current)) {
 		atomic_set(&vcpu->guest_mode, 0);
 		smp_wmb();
+		local_irq_enable_hw_cond();
 		local_irq_enable();
 		preempt_enable();
 		kvm_x86_ops->cancel_injection(vcpu);
@@ -5258,6 +5296,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	atomic_set(&vcpu->guest_mode, 0);
 	smp_wmb();
+	local_irq_enable_hw_cond();
 	local_irq_enable();
 
 	++vcpu->stat.exits;
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index c9f2d9b..78d780a 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -30,7 +30,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
 	void *p;
 	int i;
 
-	if (unlikely(in_interrupt()))
+	if (unlikely(!ipipe_root_domain_p || in_interrupt()))
 		return __memcpy(to, from, len);
 
 	p = to;
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index bf9a7d5..98609ae 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -65,6 +65,10 @@
 	thunk lockdep_sys_exit_thunk,lockdep_sys_exit
 #endif
 	
+#ifdef CONFIG_IPIPE
+	thunk_retrax __ipipe_syscall_root_thunk,__ipipe_syscall_root
+#endif
+	
 	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
 	CFI_STARTPROC
 	SAVE_ARGS
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 20e3f87..ad63b6e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -352,9 +352,9 @@ void vmalloc_sync_all(void)
  *
  * This assumes no large pages in there.
  */
-static noinline __kprobes int vmalloc_fault(unsigned long address)
+static inline int vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 {
-	pgd_t *pgd, *pgd_ref;
+	pgd_t *pgd_ref;
 	pud_t *pud, *pud_ref;
 	pmd_t *pmd, *pmd_ref;
 	pte_t *pte, *pte_ref;
@@ -370,7 +370,6 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
 	 * happen within a race in page table update. In the later
 	 * case just flush:
 	 */
-	pgd = pgd_offset(current->active_mm, address);
 	pgd_ref = pgd_offset_k(address);
 	if (pgd_none(*pgd_ref))
 		return -1;
@@ -418,6 +417,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
 	return 0;
 }
 
+static noinline __kprobes int vmalloc_fault(unsigned long address)
+{
+	pgd_t *pgd = pgd_offset(current->active_mm, address);
+	return vmalloc_sync_one(pgd, address);
+}
+
 static const char errata93_warning[] =
 KERN_ERR 
 "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
@@ -973,6 +978,9 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	/* Get the faulting address: */
 	address = read_cr2();
 
+	if (!__ipipe_pipeline_head_p(ipipe_root_domain))
+		local_irq_enable_hw_cond();
+
 	/*
 	 * Detect and handle instructions that would cause a page fault for
 	 * both a tracked kernel page and a userspace page.
@@ -1164,3 +1172,43 @@ good_area:
 
 	up_read(&mm->mmap_sem);
 }
+
+#ifdef CONFIG_IPIPE
+void __ipipe_pin_range_globally(unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_X86_32
+	unsigned long next, addr = start;
+
+	do {
+		unsigned long flags;
+		struct page *page;
+
+		next = pgd_addr_end(addr, end);
+		spin_lock_irqsave(&pgd_lock, flags);
+		list_for_each_entry(page, &pgd_list, lru)
+			vmalloc_sync_one(page_address(page), addr);
+		spin_unlock_irqrestore(&pgd_lock, flags);
+
+	} while (addr = next, addr != end);
+#else
+	unsigned long next, addr = start;
+	int ret = 0;
+
+	do {
+		struct page *page;
+
+		next = pgd_addr_end(addr, end);
+		spin_lock(&pgd_lock);
+		list_for_each_entry(page, &pgd_list, lru) {
+			pgd_t *pgd;
+			pgd = (pgd_t *)page_address(page) + pgd_index(addr);
+			ret = vmalloc_sync_one(pgd, addr);
+			if (ret)
+				break;
+		}
+		spin_unlock(&pgd_lock);
+		addr = next;
+	} while (!ret && addr != end);
+#endif
+}
+#endif /* CONFIG_IPIPE */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6acc724..072f1f2 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -61,11 +61,15 @@ static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
  */
 void leave_mm(int cpu)
 {
+ 	unsigned long flags;
+
 	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
 		BUG();
+ 	local_irq_save_hw_cond(flags);
 	cpumask_clear_cpu(cpu,
 			  mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
 	load_cr3(swapper_pg_dir);
+ 	local_irq_restore_hw_cond(flags);
 }
 EXPORT_SYMBOL_GPL(leave_mm);
 
@@ -196,6 +200,9 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
 		apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
 			      INVALIDATE_TLB_VECTOR_START + sender);
 
+#ifdef CONFIG_IPIPE
+		WARN_ON_ONCE(irqs_disabled_hw());
+#endif
 		while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
 			cpu_relax();
 	}
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 3796f99..a27ebec 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1688,6 +1688,11 @@ static int __init uv_bau_init(void)
 
 	uv_enable_timeouts();
 	alloc_intr_gate(vector, uv_bau_message_intr1);
+#ifdef CONFIG_IPIPE
+	for_each_possible_cpu(cur_cpu)
+		per_cpu(vector_irq, cur_cpu)[vector] =
+			ipipe_apic_vector_irq(vector);
+#endif
 
 	for_each_possible_blade(uvhub) {
 		if (uv_blade_nr_possible_cpus(uvhub)) {
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 834842a..86f2406 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -20,7 +20,7 @@
  * With multiple simultaneous hypertransport irq devices it might pay
  * to make this more fine grained.  But start with simple, stupid, and correct.
  */
-static DEFINE_SPINLOCK(ht_irq_lock);
+static IPIPE_DEFINE_SPINLOCK(ht_irq_lock);
 
 struct ht_irq_cfg {
 	struct pci_dev *dev;
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d..4bd63f3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -771,6 +771,7 @@ static int exec_mmap(struct mm_struct *mm)
 {
 	struct task_struct *tsk;
 	struct mm_struct * old_mm, *active_mm;
+	unsigned long flags;
 
 	/* Notify parent that we're no longer interested in the old VM */
 	tsk = current;
@@ -794,12 +795,14 @@ static int exec_mmap(struct mm_struct *mm)
 	task_lock(tsk);
 	active_mm = tsk->active_mm;
 	tsk->mm = mm;
+	ipipe_mm_switch_protect(flags);
 	tsk->active_mm = mm;
 	activate_mm(active_mm, mm);
 	if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
 		atomic_dec(&old_mm->oom_disable_count);
 		atomic_inc(&tsk->mm->oom_disable_count);
 	}
+	ipipe_mm_switch_unprotect(flags);
 	task_unlock(tsk);
 	arch_pick_mmap_layout(mm);
 	if (old_mm) {
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 5e4f776..a18c14f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -142,6 +142,10 @@ static const char *task_state_array[] = {
 	"x (dead)",		/*  64 */
 	"K (wakekill)",		/* 128 */
 	"W (waking)",		/* 256 */
+#ifdef CONFIG_IPIPE
+	"A (atomic switch)",	/* 512 */
+	"N (wakeup disabled)",	/* 1024 */
+#endif
 };
 
 static inline const char *get_task_state(struct task_struct *tsk)
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index e994197..4914a26 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -58,11 +58,11 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	unsigned long flags;
 	int temp;
 
-	raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */
+	local_irq_save_hw(flags); /* Don't trace it in an irqsoff handler */
 	temp = v->counter;
 	temp += i;
 	v->counter = temp;
-	raw_local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 
 	return temp;
 }
@@ -79,11 +79,11 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	unsigned long flags;
 	int temp;
 
-	raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */
+	local_irq_save_hw(flags);
 	temp = v->counter;
 	temp -= i;
 	v->counter = temp;
-	raw_local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 
 	return temp;
 }
@@ -145,9 +145,9 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 	unsigned long flags;
 
 	mask = ~mask;
-	raw_local_irq_save(flags); /* Don't trace it in a irqsoff handler */
+	local_irq_save_hw(flags);
 	*addr &= mask;
-	raw_local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 /* Assume that atomic operations are already serializing */
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index ecc44a8..5caf6e9 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -21,20 +21,20 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
  * this is the substitute */
 #define _atomic_spin_lock_irqsave(l,f) do {	\
 	arch_spinlock_t *s = ATOMIC_HASH(l);	\
-	local_irq_save(f);			\
+	local_irq_save_hw(f);			\
 	arch_spin_lock(s);			\
 } while(0)
 
 #define _atomic_spin_unlock_irqrestore(l,f) do {	\
 	arch_spinlock_t *s = ATOMIC_HASH(l);		\
 	arch_spin_unlock(s);				\
-	local_irq_restore(f);				\
+	local_irq_restore_hw(f);			\
 } while(0)
 
 
 #else
-#  define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0)
-#  define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0)
+#  define _atomic_spin_lock_irqsave(l,f) do { local_irq_save_hw(f); } while (0)
+#  define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore_hw(f); } while (0)
 #endif
 
 /*
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
index 2533fdd..41865db 100644
--- a/include/asm-generic/cmpxchg-local.h
+++ b/include/asm-generic/cmpxchg-local.h
@@ -21,7 +21,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
 	if (size == 8 && sizeof(unsigned long) != 8)
 		wrong_size_cmpxchg(ptr);
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	switch (size) {
 	case 1: prev = *(u8 *)ptr;
 		if (prev == old)
@@ -42,7 +42,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
 	default:
 		wrong_size_cmpxchg(ptr);
 	}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 	return prev;
 }
 
@@ -55,11 +55,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr,
 	u64 prev;
 	unsigned long flags;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	prev = *(u64 *)ptr;
 	if (prev == old)
 		*(u64 *)ptr = new;
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 	return prev;
 }
 
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index d17784e..d2a6f27 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -67,6 +67,20 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 
 #define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
 #define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var)))
+#ifdef CONFIG_IPIPE
+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
+extern int __ipipe_check_percpu_access(void);
+#define __ipipe_local_cpu_offset				\
+	({							\
+		WARN_ON_ONCE(__ipipe_check_percpu_access());	\
+		__my_cpu_offset;				\
+	})
+#else
+#define __ipipe_local_cpu_offset  __my_cpu_offset
+#endif
+#define __ipipe_get_cpu_var(var) \
+	(*SHIFT_PERCPU_PTR(&(var), __ipipe_local_cpu_offset))
+#endif /* CONFIG_IPIPE */
 
 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void setup_per_cpu_areas(void);
@@ -82,6 +96,7 @@ extern void setup_per_cpu_areas(void);
 #define per_cpu(var, cpu)	(*((void)(cpu), VERIFY_PERCPU_PTR(&(var))))
 #define __get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
 #define __raw_get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
+#define __ipipe_get_cpu_var(var) __raw_get_cpu_var(var)
 #define this_cpu_ptr(ptr)	per_cpu_ptr(ptr, 0)
 #define __this_cpu_ptr(ptr)	this_cpu_ptr(ptr)
 
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 32f9fd6..0fcf21b 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -217,6 +217,7 @@ extern void irq_exit(void);
 
 #define nmi_enter()						\
 	do {							\
+		ipipe_nmi_enter();				\
 		ftrace_nmi_enter();				\
 		BUG_ON(in_nmi());				\
 		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
@@ -233,6 +234,7 @@ extern void irq_exit(void);
 		BUG_ON(!in_nmi());				\
 		sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		ftrace_nmi_exit();				\
+		ipipe_nmi_exit();				\
 	} while (0)
 
 #endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/ipipe.h b/include/linux/ipipe.h
new file mode 100644
index 0000000..a0a8a0d
--- /dev/null
+++ b/include/linux/ipipe.h
@@ -0,0 +1,781 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe.h
+ *
+ * Copyright (C) 2002-2007 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_H
+#define __LINUX_IPIPE_H
+
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/linkage.h>
+#include <linux/ipipe_base.h>
+#include <asm/ipipe.h>
+#include <asm/bug.h>
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+
+#include <linux/cpumask.h>
+#include <asm/system.h>
+
+static inline int ipipe_disable_context_check(int cpu)
+{
+	return xchg(&per_cpu(ipipe_percpu_context_check, cpu), 0);
+}
+
+static inline void ipipe_restore_context_check(int cpu, int old_state)
+{
+	per_cpu(ipipe_percpu_context_check, cpu) = old_state;
+}
+
+static inline void ipipe_context_check_off(void)
+{
+	int cpu;
+	for_each_online_cpu(cpu)
+		per_cpu(ipipe_percpu_context_check, cpu) = 0;
+}
+
+#else	/* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+static inline int ipipe_disable_context_check(int cpu)
+{
+	return 0;
+}
+
+static inline void ipipe_restore_context_check(int cpu, int old_state) { }
+
+static inline void ipipe_context_check_off(void) { }
+
+#endif	/* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+#ifdef CONFIG_IPIPE_DEBUG_INTERNAL
+#define IPIPE_WARN(c)		WARN_ON(c)
+#define IPIPE_WARN_ONCE(c)	WARN_ON_ONCE(c)
+#else
+#define IPIPE_WARN(c)		do { (void)(c); } while (0)
+#define IPIPE_WARN_ONCE(c)	do { (void)(c); } while (0)
+#endif
+
+#ifdef CONFIG_IPIPE
+
+#define IPIPE_VERSION_STRING	IPIPE_ARCH_STRING
+#define IPIPE_RELEASE_NUMBER	((IPIPE_MAJOR_NUMBER << 16) | \
+				 (IPIPE_MINOR_NUMBER <<  8) | \
+				 (IPIPE_PATCH_NUMBER))
+
+#ifndef BROKEN_BUILTIN_RETURN_ADDRESS
+#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0))
+#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1))
+#endif /* !BUILTIN_RETURN_ADDRESS */
+
+#define IPIPE_ROOT_PRIO		100
+#define IPIPE_ROOT_ID		0
+#define IPIPE_ROOT_NPTDKEYS	4	/* Must be <= BITS_PER_LONG */
+
+#define IPIPE_RESET_TIMER	0x1
+#define IPIPE_GRAB_TIMER	0x2
+
+/* Global domain flags */
+#define IPIPE_SPRINTK_FLAG	0	/* Synchronous printk() allowed */
+#define IPIPE_AHEAD_FLAG	1	/* Domain always heads the pipeline */
+
+/* Interrupt control bits */
+#define IPIPE_HANDLE_FLAG	0
+#define IPIPE_PASS_FLAG		1
+#define IPIPE_ENABLE_FLAG	2
+#define IPIPE_DYNAMIC_FLAG	IPIPE_HANDLE_FLAG
+#define IPIPE_STICKY_FLAG	3
+#define IPIPE_SYSTEM_FLAG	4
+#define IPIPE_LOCK_FLAG		5
+#define IPIPE_WIRED_FLAG	6
+#define IPIPE_EXCLUSIVE_FLAG	7
+
+#define IPIPE_HANDLE_MASK	(1 << IPIPE_HANDLE_FLAG)
+#define IPIPE_PASS_MASK		(1 << IPIPE_PASS_FLAG)
+#define IPIPE_ENABLE_MASK	(1 << IPIPE_ENABLE_FLAG)
+#define IPIPE_DYNAMIC_MASK	IPIPE_HANDLE_MASK
+#define IPIPE_STICKY_MASK	(1 << IPIPE_STICKY_FLAG)
+#define IPIPE_SYSTEM_MASK	(1 << IPIPE_SYSTEM_FLAG)
+#define IPIPE_LOCK_MASK		(1 << IPIPE_LOCK_FLAG)
+#define IPIPE_WIRED_MASK	(1 << IPIPE_WIRED_FLAG)
+#define IPIPE_EXCLUSIVE_MASK	(1 << IPIPE_EXCLUSIVE_FLAG)
+
+#define IPIPE_DEFAULT_MASK	(IPIPE_HANDLE_MASK|IPIPE_PASS_MASK)
+#define IPIPE_STDROOT_MASK	(IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYSTEM_MASK)
+
+#define IPIPE_EVENT_SELF        0x80000000
+
+#define IPIPE_NR_CPUS		NR_CPUS
+
+/* This accessor assumes hw IRQs are off on SMP; allows assignment. */
+#define __ipipe_current_domain	__ipipe_get_cpu_var(ipipe_percpu_domain)
+/* This read-only accessor makes sure that hw IRQs are off on SMP. */
+#define ipipe_current_domain				\
+	({						\
+		struct ipipe_domain *__ipd__;		\
+		unsigned long __flags__;		\
+		local_irq_save_hw_smp(__flags__);	\
+		__ipd__ = __ipipe_current_domain;	\
+		local_irq_restore_hw_smp(__flags__);	\
+		__ipd__;				\
+	})
+
+#define ipipe_virtual_irq_p(irq)	((irq) >= IPIPE_VIRQ_BASE && \
+					 (irq) < IPIPE_NR_IRQS)
+
+#define IPIPE_SAME_HANDLER	((ipipe_irq_handler_t)(-1))
+
+struct irq_desc;
+
+typedef void (*ipipe_irq_ackfn_t)(unsigned irq, struct irq_desc *desc);
+
+typedef int (*ipipe_event_handler_t)(unsigned event,
+				     struct ipipe_domain *from,
+				     void *data);
+struct ipipe_domain {
+
+	int slot;			/* Slot number in percpu domain data array. */
+	struct list_head p_link;	/* Link in pipeline */
+	ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */
+	unsigned long long evself;	/* Self-monitored event bits. */
+
+	struct ipipe_irqdesc {
+		unsigned long control;
+		ipipe_irq_ackfn_t acknowledge;
+		ipipe_irq_handler_t handler;
+		void *cookie;
+	} ____cacheline_aligned irqs[IPIPE_NR_IRQS];
+
+	int priority;
+	void *pdd;
+	unsigned long flags;
+	unsigned domid;
+	const char *name;
+	struct mutex mutex;
+};
+
+#define IPIPE_HEAD_PRIORITY	(-1) /* For domains always heading the pipeline */
+
+struct ipipe_domain_attr {
+
+	unsigned domid;		/* Domain identifier -- Magic value set by caller */
+	const char *name;	/* Domain name -- Warning: won't be dup'ed! */
+	int priority;		/* Priority in interrupt pipeline */
+	void (*entry) (void);	/* Domain entry point */
+	void *pdd;		/* Per-domain (opaque) data pointer */
+};
+
+#define __ipipe_irq_cookie(ipd, irq)		(ipd)->irqs[irq].cookie
+#define __ipipe_irq_handler(ipd, irq)		(ipd)->irqs[irq].handler
+#define __ipipe_cpudata_irq_hits(ipd, cpu, irq)	ipipe_percpudom(ipd, irqall, cpu)[irq]
+
+extern unsigned __ipipe_printk_virq;
+
+extern unsigned long __ipipe_virtual_irq_map;
+
+extern struct list_head __ipipe_pipeline;
+
+extern int __ipipe_event_monitors[];
+
+typedef void (*ipipe_root_preempt_handler_t)(void *cookie);
+
+DECLARE_PER_CPU(ipipe_root_preempt_handler_t, __ipipe_root_preempt_handler);
+DECLARE_PER_CPU(void *, __ipipe_root_preempt_cookie);
+
+/* Private interface */
+
+void ipipe_init_early(void);
+
+void ipipe_init(void);
+
+#ifdef CONFIG_PROC_FS
+void ipipe_init_proc(void);
+
+#ifdef CONFIG_IPIPE_TRACE
+void __ipipe_init_tracer(void);
+#else /* !CONFIG_IPIPE_TRACE */
+#define __ipipe_init_tracer()       do { } while(0)
+#endif /* CONFIG_IPIPE_TRACE */
+
+#else	/* !CONFIG_PROC_FS */
+#define ipipe_init_proc()	do { } while(0)
+#endif	/* CONFIG_PROC_FS */
+
+void __ipipe_init_stage(struct ipipe_domain *ipd);
+
+void __ipipe_cleanup_domain(struct ipipe_domain *ipd);
+
+void __ipipe_add_domain_proc(struct ipipe_domain *ipd);
+
+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd);
+
+void __ipipe_flush_printk(unsigned irq, void *cookie);
+
+void __ipipe_walk_pipeline(struct list_head *pos);
+
+void __ipipe_pend_irq(unsigned irq, struct list_head *head);
+
+int __ipipe_dispatch_event(unsigned event, void *data);
+
+void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq);
+
+void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq);
+
+void __ipipe_sync_stage(void);
+
+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq);
+
+void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq);
+
+void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq);
+
+void __ipipe_pin_range_globally(unsigned long start, unsigned long end);
+
+/* Must be called hw IRQs off. */
+static inline void ipipe_irq_lock(unsigned irq)
+{
+	__ipipe_lock_irq(__ipipe_current_domain, ipipe_processor_id(), irq);
+}
+
+/* Must be called hw IRQs off. */
+static inline void ipipe_irq_unlock(unsigned irq)
+{
+	__ipipe_unlock_irq(__ipipe_current_domain, irq);
+}
+
+#ifndef __ipipe_sync_pipeline
+#define __ipipe_sync_pipeline() __ipipe_sync_stage()
+#endif
+
+#ifndef __ipipe_do_root_xirq
+#define __ipipe_do_root_xirq(ipd, irq)			\
+	(ipd)->irqs[irq].handler(irq, (ipd)->irqs[irq].cookie)
+#endif
+
+#ifndef __ipipe_check_root_resched
+#ifdef CONFIG_PREEMPT
+#define __ipipe_check_root_resched()	\
+	(preempt_count() == 0 && need_resched())
+#else
+#define __ipipe_check_root_resched()	0
+#endif
+#endif
+
+#ifndef __ipipe_run_irqtail
+#define __ipipe_run_irqtail(irq) do { } while(0)
+#endif
+
+#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link == __ipipe_pipeline.next)
+
+#define __ipipe_ipending_p(p)	((p)->irqpend_himap != 0)
+
+/*
+ * Keep the following as a macro, so that client code could check for
+ * the support of the invariant pipeline head optimization.
+ */
+#define __ipipe_pipeline_head() \
+	list_entry(__ipipe_pipeline.next, struct ipipe_domain, p_link)
+
+#define local_irq_enable_hw_cond()		local_irq_enable_hw()
+#define local_irq_disable_hw_cond()		local_irq_disable_hw()
+#define local_irq_save_hw_cond(flags)		local_irq_save_hw(flags)
+#define local_irq_restore_hw_cond(flags)	local_irq_restore_hw(flags)
+
+#ifdef CONFIG_SMP
+cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask);
+int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask);
+#define local_irq_save_hw_smp(flags)		local_irq_save_hw(flags)
+#define local_irq_restore_hw_smp(flags)		local_irq_restore_hw(flags)
+#else /* !CONFIG_SMP */
+#define local_irq_save_hw_smp(flags)		do { (void)(flags); } while(0)
+#define local_irq_restore_hw_smp(flags)		do { } while(0)
+#endif /* CONFIG_SMP */
+
+#define local_irq_save_full(vflags, rflags)		\
+	do {						\
+		local_irq_save(vflags);			\
+		local_irq_save_hw(rflags);		\
+	} while(0)
+
+#define local_irq_restore_full(vflags, rflags)		\
+	do {						\
+		local_irq_restore_hw(rflags);		\
+		local_irq_restore(vflags);		\
+	} while(0)
+
+static inline void __local_irq_restore_nosync(unsigned long x)
+{
+	struct ipipe_percpu_domain_data *p = ipipe_root_cpudom_ptr();
+
+	if (raw_irqs_disabled_flags(x)) {
+		__set_bit(IPIPE_STALL_FLAG, &p->status);
+		trace_hardirqs_off();
+	} else {
+		trace_hardirqs_on();
+		__clear_bit(IPIPE_STALL_FLAG, &p->status);
+	}
+}
+
+static inline void local_irq_restore_nosync(unsigned long x)
+{
+	unsigned long flags;
+	local_irq_save_hw_smp(flags);
+	__local_irq_restore_nosync(x);
+	local_irq_restore_hw_smp(flags);
+}
+
+#define __ipipe_root_domain_p	(__ipipe_current_domain == ipipe_root_domain)
+#define ipipe_root_domain_p	(ipipe_current_domain == ipipe_root_domain)
+
+/* This has to be called with hw IRQs off. */
+#define __ipipe_head_domain_p   __ipipe_pipeline_head_p(__ipipe_current_domain)
+
+static inline int __ipipe_event_monitored_p(int ev)
+{
+	if (__ipipe_event_monitors[ev] > 0)
+		return 1;
+
+	return (ipipe_current_domain->evself & (1LL << ev)) != 0;
+}
+
+#define ipipe_notifier_enabled_p(p)	\
+	(((p)->ipipe_flags) & PF_EVNOTIFY)
+
+#define ipipe_sigwake_notify(p)						\
+	do {								\
+		if (ipipe_notifier_enabled_p(p) &&			\
+		    __ipipe_event_monitored_p(IPIPE_EVENT_SIGWAKE))	\
+			__ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE, p);	\
+	} while (0)
+
+#define ipipe_exit_notify(p)						\
+	do {								\
+		if (ipipe_notifier_enabled_p(p) &&			\
+		    __ipipe_event_monitored_p(IPIPE_EVENT_EXIT))	\
+			__ipipe_dispatch_event(IPIPE_EVENT_EXIT, p);	\
+	} while (0)
+
+#define ipipe_setsched_notify(p)					\
+	do {								\
+		if (ipipe_notifier_enabled_p(p) &&			\
+		    __ipipe_event_monitored_p(IPIPE_EVENT_SETSCHED))	\
+			__ipipe_dispatch_event(IPIPE_EVENT_SETSCHED, p); \
+	} while (0)
+
+#define ipipe_schedule_notify(prev, next)				\
+do {									\
+	if ((ipipe_notifier_enabled_p(next) ||				\
+	     ipipe_notifier_enabled_p(prev)) &&				\
+	    __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE))		\
+		__ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE, next);	\
+} while (0)
+
+#define ipipe_trap_notify(ex, regs)					\
+	({								\
+		unsigned long __flags__;				\
+		int __ret__ = 0;					\
+		local_irq_save_hw_smp(__flags__);			\
+		if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)) || \
+		     ipipe_notifier_enabled_p(current)) &&		\
+		    __ipipe_event_monitored_p(ex)) {			\
+			local_irq_restore_hw_smp(__flags__);		\
+			__ret__ = __ipipe_dispatch_event(ex, regs);	\
+		} else							\
+			local_irq_restore_hw_smp(__flags__);		\
+		__ret__;						\
+	})
+
+#define ipipe_init_notify(p)						\
+	do {								\
+		if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT))	\
+			__ipipe_dispatch_event(IPIPE_EVENT_INIT, p);	\
+	} while (0)
+
+#define ipipe_cleanup_notify(mm)					\
+	do {								\
+		if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP))	\
+			__ipipe_dispatch_event(IPIPE_EVENT_CLEANUP, mm); \
+	} while (0)
+
+/* Public interface */
+
+int ipipe_register_domain(struct ipipe_domain *ipd,
+			  struct ipipe_domain_attr *attr);
+
+int ipipe_unregister_domain(struct ipipe_domain *ipd);
+
+void ipipe_suspend_domain(void);
+
+int ipipe_virtualize_irq(struct ipipe_domain *ipd,
+			 unsigned irq,
+			 ipipe_irq_handler_t handler,
+			 void *cookie,
+			 ipipe_irq_ackfn_t acknowledge,
+			 unsigned modemask);
+
+int ipipe_control_irq(struct ipipe_domain *ipd,
+		      unsigned int irq,
+		      unsigned clrmask,
+		      unsigned setmask);
+
+unsigned ipipe_alloc_virq(void);
+
+int ipipe_free_virq(unsigned virq);
+
+int ipipe_trigger_irq(unsigned irq);
+
+static inline void __ipipe_propagate_irq(unsigned irq)
+{
+	struct list_head *next = __ipipe_current_domain->p_link.next;
+	if (next == &ipipe_root.p_link) {
+		/* Fast path: root must handle all interrupts. */
+		__ipipe_set_irq_pending(&ipipe_root, irq);
+		return;
+	}
+	__ipipe_pend_irq(irq, next);
+}
+
+static inline void __ipipe_schedule_irq(unsigned irq)
+{
+	__ipipe_pend_irq(irq, &__ipipe_current_domain->p_link);
+}
+
+static inline void __ipipe_schedule_irq_head(unsigned irq)
+{
+	__ipipe_set_irq_pending(__ipipe_pipeline_head(), irq);
+}
+
+static inline void __ipipe_schedule_irq_root(unsigned irq)
+{
+	__ipipe_set_irq_pending(&ipipe_root, irq);
+}
+
+static inline void ipipe_propagate_irq(unsigned irq)
+{
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+	__ipipe_propagate_irq(irq);
+	local_irq_restore_hw(flags);
+}
+
+static inline void ipipe_schedule_irq(unsigned irq)
+{
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+	__ipipe_schedule_irq(irq);
+	local_irq_restore_hw(flags);
+}
+
+static inline void ipipe_schedule_irq_head(unsigned irq)
+{
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+	__ipipe_schedule_irq_head(irq);
+	local_irq_restore_hw(flags);
+}
+
+static inline void ipipe_schedule_irq_root(unsigned irq)
+{
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+	__ipipe_schedule_irq_root(irq);
+	local_irq_restore_hw(flags);
+}
+
+void ipipe_stall_pipeline_from(struct ipipe_domain *ipd);
+
+unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd);
+
+unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd);
+
+static inline void ipipe_unstall_pipeline_from(struct ipipe_domain *ipd)
+{
+	ipipe_test_and_unstall_pipeline_from(ipd);
+}
+
+void ipipe_restore_pipeline_from(struct ipipe_domain *ipd,
+					  unsigned long x);
+
+static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd)
+{
+	return test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+}
+
+static inline void ipipe_stall_pipeline_head(void)
+{
+	local_irq_disable_hw();
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status));
+}
+
+static inline unsigned long ipipe_test_and_stall_pipeline_head(void)
+{
+	local_irq_disable_hw();
+	return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status));
+}
+
+void ipipe_unstall_pipeline_head(void);
+
+void __ipipe_restore_pipeline_head(unsigned long x);
+
+static inline void ipipe_restore_pipeline_head(unsigned long x)
+{
+#ifdef CONFIG_IPIPE_DEBUG
+	if (WARN_ON_ONCE(!irqs_disabled_hw()))
+		local_irq_disable_hw();
+#endif
+	if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status))) & 1)
+		__ipipe_restore_pipeline_head(x);
+}
+
+#define ipipe_unstall_pipeline() \
+	ipipe_unstall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_test_and_unstall_pipeline() \
+	ipipe_test_and_unstall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_test_pipeline() \
+	ipipe_test_pipeline_from(ipipe_current_domain)
+
+#define ipipe_test_and_stall_pipeline() \
+	ipipe_test_and_stall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_stall_pipeline() \
+	ipipe_stall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_restore_pipeline(x) \
+	ipipe_restore_pipeline_from(ipipe_current_domain, (x))
+
+void ipipe_init_attr(struct ipipe_domain_attr *attr);
+
+int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo);
+
+void __ipipe_do_critical_sync(unsigned irq, void *cookie);
+
+unsigned long ipipe_critical_enter(void (*syncfn) (void));
+
+void ipipe_critical_exit(unsigned long flags);
+
+void ipipe_prepare_panic(void);
+
+static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd)
+{
+	set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags);
+}
+
+static inline void ipipe_set_printk_async(struct ipipe_domain *ipd)
+{
+	clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags);
+}
+
+static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd)
+{
+	/* Must be called hw interrupts off. */
+	__set_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status));
+}
+
+static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd)
+{
+	/* Must be called hw interrupts off. */
+	__clear_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status));
+}
+
+static inline int ipipe_test_foreign_stack(void)
+{
+	/* Must be called hw interrupts off. */
+	return test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status));
+}
+
+#ifndef ipipe_safe_current
+#define ipipe_safe_current()					\
+({								\
+	struct task_struct *p;					\
+	unsigned long flags;					\
+	local_irq_save_hw_smp(flags);				\
+	p = ipipe_test_foreign_stack() ? &init_task : current;	\
+	local_irq_restore_hw_smp(flags);			\
+	p; \
+})
+#endif
+
+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
+					unsigned event,
+					ipipe_event_handler_t handler);
+
+cpumask_t ipipe_set_irq_affinity(unsigned irq,
+				 cpumask_t cpumask);
+
+int ipipe_send_ipi(unsigned ipi,
+		   cpumask_t cpumask);
+
+int ipipe_setscheduler_root(struct task_struct *p,
+			    int policy,
+			    int prio);
+
+int ipipe_reenter_root(struct task_struct *prev,
+		       int policy,
+		       int prio);
+
+int ipipe_alloc_ptdkey(void);
+
+int ipipe_free_ptdkey(int key);
+
+int ipipe_set_ptd(int key,
+		  void *value);
+
+void *ipipe_get_ptd(int key);
+
+struct vm_area_struct;
+int ipipe_disable_ondemand_mappings(struct task_struct *tsk);
+int ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma);
+
+static inline void ipipe_nmi_enter(void)
+{
+	int cpu = ipipe_processor_id();
+
+	per_cpu(ipipe_nmi_saved_root, cpu) = ipipe_root_cpudom_var(status);
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+	per_cpu(ipipe_saved_context_check_state, cpu) =
+		ipipe_disable_context_check(cpu);
+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
+}
+
+static inline void ipipe_nmi_exit(void)
+{
+	int cpu = ipipe_processor_id();
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+	ipipe_restore_context_check
+		(cpu, per_cpu(ipipe_saved_context_check_state, cpu));
+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
+
+	if (!test_bit(IPIPE_STALL_FLAG, &per_cpu(ipipe_nmi_saved_root, cpu)))
+		__clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
+}
+
+#define ipipe_enable_notifier(p)			\
+	do {						\
+		(p)->ipipe_flags |= PF_EVNOTIFY;	\
+	} while (0)
+
+#define ipipe_disable_notifier(p)				\
+	do {							\
+		(p)->ipipe_flags &= ~(PF_EVNOTIFY|PF_EVTRET);	\
+	} while (0)
+
+/* hw IRQs off. */
+#define ipipe_return_notify(p)						\
+	do {								\
+		if (ipipe_notifier_enabled_p(p) &&			\
+		    __ipipe_event_monitored_p(IPIPE_EVENT_RETURN))	\
+			(p)->ipipe_flags |= PF_EVTRET;			\
+	} while (0)
+
+#define ipipe_clear_flags(p)		do { (p)->ipipe_flags = 0; } while (0)
+
+static inline void
+ipipe_register_root_preempt_handler(ipipe_root_preempt_handler_t handler,
+				    void *cookie)
+{
+	int cpu = ipipe_processor_id();
+
+	per_cpu(__ipipe_root_preempt_cookie, cpu) = cookie;
+	barrier();
+	per_cpu(__ipipe_root_preempt_handler, cpu) = handler;
+}
+
+static inline void ipipe_unregister_root_preempt_handler(void)
+{
+	per_cpu(__ipipe_root_preempt_handler, ipipe_processor_id()) = NULL;
+}
+
+static inline void ipipe_root_preempt_notify(void)
+{
+	ipipe_root_preempt_handler_t handler;
+	int cpu = ipipe_processor_id();
+
+	handler = per_cpu(__ipipe_root_preempt_handler, cpu);
+	if (unlikely(handler))
+		handler(per_cpu(__ipipe_root_preempt_cookie, cpu));
+}
+
+#else	/* !CONFIG_IPIPE */
+
+#define ipipe_init_early()		do { } while(0)
+#define ipipe_init()			do { } while(0)
+#define ipipe_suspend_domain()		do { } while(0)
+#define ipipe_sigwake_notify(p)		do { } while(0)
+#define ipipe_setsched_notify(p)	do { } while(0)
+#define ipipe_init_notify(p)		do { } while(0)
+#define ipipe_exit_notify(p)		do { } while(0)
+#define ipipe_cleanup_notify(mm)	do { } while(0)
+#define ipipe_trap_notify(t,r)		0
+#define ipipe_init_proc()		do { } while(0)
+
+#define ipipe_register_root_preempt_handler(h, c)	do { } while (0)
+#define ipipe_unregister_root_preempt_handler()		do { } while (0)
+#define ipipe_root_preempt_notify()			do { } while (0)
+
+static inline void __ipipe_pin_range_globally(unsigned long start,
+					      unsigned long end)
+{
+}
+
+static inline int ipipe_test_foreign_stack(void)
+{
+	return 0;
+}
+
+#define local_irq_enable_hw_cond()		do { } while(0)
+#define local_irq_disable_hw_cond()		do { } while(0)
+#define local_irq_save_hw_cond(flags)		do { (void)(flags); } while(0)
+#define local_irq_restore_hw_cond(flags)	do { } while(0)
+#define local_irq_save_hw_smp(flags)		do { (void)(flags); } while(0)
+#define local_irq_restore_hw_smp(flags)		do { } while(0)
+
+#define ipipe_irq_lock(irq)		do { } while(0)
+#define ipipe_irq_unlock(irq)		do { } while(0)
+
+#define __ipipe_root_domain_p		1
+#define ipipe_root_domain_p		1
+#define ipipe_safe_current()		current
+#define ipipe_processor_id()		smp_processor_id()
+#define ipipe_clear_flags(p)		do { } while (0)
+
+#define ipipe_nmi_enter()		do { } while (0)
+#define ipipe_nmi_exit()		do { } while (0)
+
+#define local_irq_disable_head()	local_irq_disable()
+
+#define local_irq_save_full(vflags, rflags)	do { (void)(vflags); local_irq_save(rflags); } while(0)
+#define local_irq_restore_full(vflags, rflags)	do { (void)(vflags); local_irq_restore(rflags); } while(0)
+#define local_irq_restore_nosync(vflags)	local_irq_restore(vflags)
+
+#define __ipipe_pipeline_head_p(ipd)	1
+
+#endif	/* CONFIG_IPIPE */
+
+#endif	/* !__LINUX_IPIPE_H */
diff --git a/include/linux/ipipe_base.h b/include/linux/ipipe_base.h
new file mode 100644
index 0000000..f725a66
--- /dev/null
+++ b/include/linux/ipipe_base.h
@@ -0,0 +1,134 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_base.h
+ *
+ * Copyright (C) 2002-2007 Philippe Gerum.
+ *               2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_BASE_H
+#define __LINUX_IPIPE_BASE_H
+
+#ifdef CONFIG_IPIPE
+
+#include <asm/ipipe_base.h>
+
+#define __bpl_up(x)		(((x)+(BITS_PER_LONG-1)) & ~(BITS_PER_LONG-1))
+/* Number of virtual IRQs (must be a multiple of BITS_PER_LONG) */
+#define IPIPE_NR_VIRQS		BITS_PER_LONG
+/* First virtual IRQ # (must be aligned on BITS_PER_LONG) */
+#define IPIPE_VIRQ_BASE		__bpl_up(IPIPE_NR_XIRQS)
+/* Total number of IRQ slots */
+#define IPIPE_NR_IRQS		(IPIPE_VIRQ_BASE+IPIPE_NR_VIRQS)
+
+#define IPIPE_IRQ_LOMAPSZ	(IPIPE_NR_IRQS / BITS_PER_LONG)
+#if IPIPE_IRQ_LOMAPSZ > BITS_PER_LONG
+/*
+ * We need a 3-level mapping. This allows us to handle up to 32k IRQ
+ * vectors on 32bit machines, 256k on 64bit ones.
+ */
+#define __IPIPE_3LEVEL_IRQMAP	1
+#define IPIPE_IRQ_MDMAPSZ	(__bpl_up(IPIPE_IRQ_LOMAPSZ) / BITS_PER_LONG)
+#else
+/*
+ * 2-level mapping is enough. This allows us to handle up to 1024 IRQ
+ * vectors on 32bit machines, 4096 on 64bit ones.
+ */
+#define __IPIPE_2LEVEL_IRQMAP	1
+#endif
+
+/* Per-cpu pipeline status */
+#define IPIPE_STALL_FLAG	0	/* Stalls a pipeline stage -- guaranteed at bit #0 */
+#define IPIPE_NOSTACK_FLAG	1	/* Domain currently runs on a foreign stack */
+
+#define IPIPE_STALL_MASK	(1L << IPIPE_STALL_FLAG)
+#define IPIPE_NOSTACK_MASK	(1L << IPIPE_NOSTACK_FLAG)
+
+typedef void (*ipipe_irq_handler_t)(unsigned int irq,
+				    void *cookie);
+
+extern struct ipipe_domain ipipe_root;
+
+#define ipipe_root_domain (&ipipe_root)
+
+void __ipipe_unstall_root(void);
+
+void __ipipe_restore_root(unsigned long x);
+
+#define ipipe_preempt_disable(flags)		\
+	do {					\
+		local_irq_save_hw(flags);	\
+		if (__ipipe_root_domain_p)	\
+			preempt_disable();	\
+	} while (0)
+
+#define ipipe_preempt_enable(flags)			\
+	do {						\
+		if (__ipipe_root_domain_p) {		\
+			preempt_enable_no_resched();	\
+			local_irq_restore_hw(flags);	\
+			preempt_check_resched();	\
+		} else					\
+			local_irq_restore_hw(flags);	\
+	} while (0)
+
+#define ipipe_get_cpu(flags)	({ ipipe_preempt_disable(flags); ipipe_processor_id(); })
+#define ipipe_put_cpu(flags)	ipipe_preempt_enable(flags)
+ 
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+void ipipe_check_context(struct ipipe_domain *border_ipd);
+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+static inline void ipipe_check_context(struct ipipe_domain *border_ipd) { }
+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+/* Generic features */
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+#define __IPIPE_FEATURE_REQUEST_TICKDEV		1
+#endif
+#define __IPIPE_FEATURE_DELAYED_ATOMICSW	1
+#define __IPIPE_FEATURE_FASTPEND_IRQ		1
+#define __IPIPE_FEATURE_TRACE_EVENT		1
+#define __IPIPE_FEATURE_ENABLE_NOTIFIER		1
+#ifdef CONFIG_HAVE_IPIPE_HOSTRT
+#define __IPIPE_FEATURE_HOSTRT			1
+#endif
+#define __IPIPE_FEATURE_PREPARE_PANIC		1
+#define __IPIPE_FEATURE_CONTROL_IRQ		1
+#define __IPIPE_FEATURE_ROOTPREEMPT_NOTIFIER	1
+
+#else /* !CONFIG_IPIPE */
+
+#define ipipe_preempt_disable(flags)	   \
+	do {				   \
+		preempt_disable();	   \
+		(void)(flags);		   \
+	} while (0)
+#define ipipe_preempt_enable(flags)	preempt_enable()
+
+#define ipipe_get_cpu(flags)		({ (void)(flags); get_cpu(); })
+#define ipipe_put_cpu(flags)		\
+	do {				\
+		(void)(flags);		\
+		put_cpu();		\
+	} while (0)
+	
+#define ipipe_check_context(ipd)	do { } while(0)
+
+#endif	/* CONFIG_IPIPE */
+
+#endif	/* !__LINUX_IPIPE_BASE_H */
diff --git a/include/linux/ipipe_lock.h b/include/linux/ipipe_lock.h
new file mode 100644
index 0000000..5382208
--- /dev/null
+++ b/include/linux/ipipe_lock.h
@@ -0,0 +1,241 @@
+/*   -*- linux-c -*-
+ *   include/linux/ipipe_lock.h
+ *
+ *   Copyright (C) 2009 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_LOCK_H
+#define __LINUX_IPIPE_LOCK_H
+
+typedef struct {
+	arch_spinlock_t arch_lock;
+} __ipipe_spinlock_t;
+
+#define ipipe_spinlock_p(lock)						\
+	__builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *)
+
+#define std_spinlock_raw_p(lock)					\
+	__builtin_types_compatible_p(typeof(lock), raw_spinlock_t *)
+
+#define std_spinlock_p(lock)						\
+	__builtin_types_compatible_p(typeof(lock), spinlock_t *)
+
+#define ipipe_spinlock(lock)	((__ipipe_spinlock_t *)(lock))
+#define std_spinlock_raw(lock)	((raw_spinlock_t *)(lock))
+#define std_spinlock(lock)	((spinlock_t *)(lock))
+
+#define PICK_SPINLOCK_IRQSAVE(lock, flags)				\
+	do {								\
+		if (ipipe_spinlock_p(lock))				\
+			(flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \
+		else if (std_spinlock_raw_p(lock))				\
+			__real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \
+		else if (std_spinlock_p(lock))				\
+			__real_raw_spin_lock_irqsave(&std_spinlock(lock)->rlock, flags); \
+		else __bad_lock_type();					\
+	} while (0)
+
+#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags)				\
+	({								\
+		int __ret__;						\
+		if (ipipe_spinlock_p(lock))				\
+			__ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \
+		else if (std_spinlock_raw_p(lock))				\
+			__ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \
+		else if (std_spinlock_p(lock))				\
+			__ret__ = __real_raw_spin_trylock_irqsave(&std_spinlock(lock)->rlock, flags); \
+		else __bad_lock_type();					\
+		__ret__;						\
+	 })
+
+#define PICK_SPINTRYLOCK_IRQ(lock)					\
+	({								\
+		int __ret__;						\
+		if (ipipe_spinlock_p(lock))				\
+			__ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \
+		else if (std_spinlock_raw_p(lock))				\
+			__ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \
+		else if (std_spinlock_p(lock))				\
+			__ret__ = __real_raw_spin_trylock_irq(&std_spinlock(lock)->rlock); \
+		else __bad_lock_type();					\
+		__ret__;						\
+	 })
+
+#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags)				\
+	do {								\
+		if (ipipe_spinlock_p(lock))				\
+			__ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \
+		else {							\
+			__ipipe_spin_unlock_debug(flags);		\
+			if (std_spinlock_raw_p(lock))			\
+				__real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \
+			else if (std_spinlock_p(lock))			\
+				__real_raw_spin_unlock_irqrestore(&std_spinlock(lock)->rlock, flags); \
+		}							\
+	} while (0)
+
+#define PICK_SPINOP(op, lock)						\
+	do {								\
+		if (ipipe_spinlock_p(lock))				\
+			arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
+		else if (std_spinlock_raw_p(lock))			\
+			__real_raw_spin##op(std_spinlock_raw(lock));	\
+		else if (std_spinlock_p(lock))				\
+			__real_raw_spin##op(&std_spinlock(lock)->rlock); \
+		else __bad_lock_type();					\
+	} while (0)
+
+#define PICK_SPINOP_RET(op, lock, type)					\
+	({								\
+		type __ret__;						\
+		if (ipipe_spinlock_p(lock))				\
+			__ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
+		else if (std_spinlock_raw_p(lock))			\
+			__ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \
+		else if (std_spinlock_p(lock))				\
+			__ret__ = __real_raw_spin##op(&std_spinlock(lock)->rlock); \
+		else { __ret__ = -1; __bad_lock_type(); }		\
+		__ret__;						\
+	})
+
+#define arch_spin_lock_init(lock)					\
+	do {								\
+		IPIPE_DEFINE_SPINLOCK(__lock__);			\
+		*((ipipe_spinlock_t *)lock) = __lock__;			\
+	} while (0)
+
+#define arch_spin_lock_irq(lock)					\
+	do {								\
+		local_irq_disable_hw();					\
+		arch_spin_lock(lock);					\
+	} while (0)
+
+#define arch_spin_unlock_irq(lock)					\
+	do {								\
+		arch_spin_unlock(lock);					\
+		local_irq_enable_hw();					\
+	} while (0)
+
+typedef struct {
+	arch_rwlock_t arch_lock;
+} __ipipe_rwlock_t;
+
+#define ipipe_rwlock_p(lock)						\
+	__builtin_types_compatible_p(typeof(lock), __ipipe_rwlock_t *)
+
+#define std_rwlock_p(lock)						\
+	__builtin_types_compatible_p(typeof(lock), rwlock_t *)
+
+#define ipipe_rwlock(lock)	((__ipipe_rwlock_t *)(lock))
+#define std_rwlock(lock)	((rwlock_t *)(lock))
+
+#define PICK_RWOP(op, lock)						\
+	do {								\
+		if (ipipe_rwlock_p(lock))				\
+			arch##op(&ipipe_rwlock(lock)->arch_lock);	\
+		else if (std_rwlock_p(lock))				\
+			_raw##op(std_rwlock(lock));			\
+		else __bad_lock_type();					\
+	} while (0)
+
+extern int __bad_lock_type(void);
+
+#ifdef CONFIG_IPIPE
+
+#define ipipe_spinlock_t		__ipipe_spinlock_t
+#define IPIPE_DEFINE_RAW_SPINLOCK(x)	ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED
+#define IPIPE_DECLARE_RAW_SPINLOCK(x)	extern ipipe_spinlock_t x
+#define IPIPE_DEFINE_SPINLOCK(x)	IPIPE_DEFINE_RAW_SPINLOCK(x)
+#define IPIPE_DECLARE_SPINLOCK(x)	IPIPE_DECLARE_RAW_SPINLOCK(x)
+
+#define IPIPE_SPIN_LOCK_UNLOCKED	\
+	(__ipipe_spinlock_t) {	.arch_lock = __ARCH_SPIN_LOCK_UNLOCKED }
+
+#define spin_lock_irqsave_cond(lock, flags) \
+	spin_lock_irqsave(lock, flags)
+
+#define spin_unlock_irqrestore_cond(lock, flags) \
+	spin_unlock_irqrestore(lock, flags)
+
+void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock);
+
+int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock);
+
+void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock);
+
+unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock);
+
+int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock,
+				 unsigned long *x);
+
+void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock,
+				    unsigned long x);
+
+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock);
+
+void __ipipe_spin_unlock_irqcomplete(unsigned long x);
+
+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
+void __ipipe_spin_unlock_debug(unsigned long flags);
+#else
+#define __ipipe_spin_unlock_debug(flags)  do { } while (0)
+#endif
+
+#define ipipe_rwlock_t			__ipipe_rwlock_t
+#define IPIPE_DEFINE_RWLOCK(x)		ipipe_rwlock_t x = IPIPE_RW_LOCK_UNLOCKED
+#define IPIPE_DECLARE_RWLOCK(x)		extern ipipe_rwlock_t x
+
+#define IPIPE_RW_LOCK_UNLOCKED	\
+	(__ipipe_rwlock_t) { .arch_lock = __ARCH_RW_LOCK_UNLOCKED }
+
+#else /* !CONFIG_IPIPE */
+
+#define ipipe_spinlock_t		spinlock_t
+#define IPIPE_DEFINE_SPINLOCK(x)	DEFINE_SPINLOCK(x)
+#define IPIPE_DECLARE_SPINLOCK(x)	extern spinlock_t x
+#define IPIPE_SPIN_LOCK_UNLOCKED	SPIN_LOCK_UNLOCKED
+#define IPIPE_DEFINE_RAW_SPINLOCK(x)	DEFINE_RAW_SPINLOCK(x)
+#define IPIPE_DECLARE_RAW_SPINLOCK(x)	extern raw_spinlock_t x
+
+#define spin_lock_irqsave_cond(lock, flags)		\
+	do {						\
+		(void)(flags);				\
+		spin_lock(lock);			\
+	} while(0)
+
+#define spin_unlock_irqrestore_cond(lock, flags)	\
+	spin_unlock(lock)
+
+#define __ipipe_spin_lock_irq(lock)		do { } while (0)
+#define __ipipe_spin_unlock_irq(lock)		do { } while (0)
+#define __ipipe_spin_lock_irqsave(lock)		0
+#define __ipipe_spin_trylock_irq(lock)		1
+#define __ipipe_spin_trylock_irqsave(lock, x)	({ (void)(x); 1; })
+#define __ipipe_spin_unlock_irqrestore(lock, x)	do { (void)(x); } while (0)
+#define __ipipe_spin_unlock_irqbegin(lock)	do { } while (0)
+#define __ipipe_spin_unlock_irqcomplete(x)	do { (void)(x); } while (0)
+#define __ipipe_spin_unlock_debug(flags)	do { } while (0)
+
+#define ipipe_rwlock_t			rwlock_t
+#define IPIPE_DEFINE_RWLOCK(x)		DEFINE_RWLOCK(x)
+#define IPIPE_DECLARE_RWLOCK(x)		extern rwlock_t x
+#define IPIPE_RW_LOCK_UNLOCKED		RW_LOCK_UNLOCKED
+
+#endif /* !CONFIG_IPIPE */
+
+#endif /* !__LINUX_IPIPE_LOCK_H */
diff --git a/include/linux/ipipe_percpu.h b/include/linux/ipipe_percpu.h
new file mode 100644
index 0000000..0b42e8c
--- /dev/null
+++ b/include/linux/ipipe_percpu.h
@@ -0,0 +1,89 @@
+/*   -*- linux-c -*-
+ *   include/linux/ipipe_percpu.h
+ *
+ *   Copyright (C) 2007 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_PERCPU_H
+#define __LINUX_IPIPE_PERCPU_H
+
+#include <asm/percpu.h>
+#include <asm/ptrace.h>
+
+struct ipipe_domain;
+
+struct ipipe_percpu_domain_data {
+	unsigned long status;	/* <= Must be first in struct. */
+	unsigned long irqpend_himap;
+#ifdef __IPIPE_3LEVEL_IRQMAP
+	unsigned long irqpend_mdmap[IPIPE_IRQ_MDMAPSZ];
+#endif
+	unsigned long irqpend_lomap[IPIPE_IRQ_LOMAPSZ];
+	unsigned long irqheld_map[IPIPE_IRQ_LOMAPSZ];
+	unsigned long irqall[IPIPE_NR_IRQS];
+	u64 evsync;
+};
+
+/*
+ * CAREFUL: all accessors based on __raw_get_cpu_var() you may find in
+ * this file should be used only while hw interrupts are off, to
+ * prevent from CPU migration regardless of the running domain.
+ */
+#ifdef CONFIG_SMP
+#define ipipe_percpudom_ptr(ipd, cpu)	\
+	(&per_cpu(ipipe_percpu_darray, cpu)[(ipd)->slot])
+#define ipipe_cpudom_ptr(ipd)	\
+	(&__ipipe_get_cpu_var(ipipe_percpu_darray)[(ipd)->slot])
+#else
+DECLARE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]);
+#define ipipe_percpudom_ptr(ipd, cpu)	\
+	(per_cpu(ipipe_percpu_daddr, cpu)[(ipd)->slot])
+#define ipipe_cpudom_ptr(ipd)	\
+	(__ipipe_get_cpu_var(ipipe_percpu_daddr)[(ipd)->slot])
+#endif
+#define ipipe_percpudom(ipd, var, cpu)	(ipipe_percpudom_ptr(ipd, cpu)->var)
+#define ipipe_cpudom_var(ipd, var)	(ipipe_cpudom_ptr(ipd)->var)
+
+#define IPIPE_ROOT_SLOT			0
+#define IPIPE_HEAD_SLOT			(CONFIG_IPIPE_DOMAINS - 1)
+
+DECLARE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]);
+
+DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain);
+
+DECLARE_PER_CPU(unsigned long, ipipe_nmi_saved_root);
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+DECLARE_PER_CPU(int, ipipe_percpu_context_check);
+DECLARE_PER_CPU(int, ipipe_saved_context_check_state);
+#endif
+
+#define ipipe_root_cpudom_ptr()		\
+	(&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT])
+
+#define ipipe_root_cpudom_var(var)	ipipe_root_cpudom_ptr()->var
+
+#define ipipe_this_cpudom_var(var)	\
+	ipipe_cpudom_var(__ipipe_current_domain, var)
+
+#define ipipe_head_cpudom_ptr()		\
+	(&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_HEAD_SLOT])
+
+#define ipipe_head_cpudom_var(var)	ipipe_head_cpudom_ptr()->var
+
+#endif	/* !__LINUX_IPIPE_PERCPU_H */
diff --git a/include/linux/ipipe_tickdev.h b/include/linux/ipipe_tickdev.h
new file mode 100644
index 0000000..572e391
--- /dev/null
+++ b/include/linux/ipipe_tickdev.h
@@ -0,0 +1,87 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_tickdev.h
+ *
+ * Copyright (C) 2007 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_TICKDEV_H
+#define __LINUX_IPIPE_TICKDEV_H
+
+#include <linux/clocksource.h>
+
+#ifdef CONFIG_IPIPE
+/*
+ * NOTE: When modifying this structure, make sure to keep the Xenomai
+ * definition include/nucleus/vdso.h in sync.
+ */
+struct ipipe_hostrt_data {
+	short live;
+	seqcount_t seqcount;
+	time_t wall_time_sec;
+	u32 wall_time_nsec;
+	struct timespec wall_to_monotonic;
+	cycle_t cycle_last;
+	cycle_t mask;
+	u32 mult;
+	u32 shift;
+};
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+
+#include <linux/clockchips.h>
+
+struct tick_device;
+
+struct ipipe_tick_device {
+
+	void (*emul_set_mode)(enum clock_event_mode,
+			      struct clock_event_device *cdev);
+	int (*emul_set_tick)(unsigned long delta,
+			     struct clock_event_device *cdev);
+	void (*real_set_mode)(enum clock_event_mode mode,
+			      struct clock_event_device *cdev);
+	int (*real_set_tick)(unsigned long delta,
+			     struct clock_event_device *cdev);
+	struct tick_device *slave;
+	unsigned long real_max_delta_ns;
+	unsigned long real_mult;
+	int real_shift;
+};
+
+int ipipe_request_tickdev(const char *devname,
+			  void (*emumode)(enum clock_event_mode mode,
+					  struct clock_event_device *cdev),
+			  int (*emutick)(unsigned long evt,
+					 struct clock_event_device *cdev),
+			  int cpu, unsigned long *tmfreq);
+
+void ipipe_release_tickdev(int cpu);
+
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
+#ifdef CONFIG_HAVE_IPIPE_HOSTRT
+void ipipe_update_hostrt(struct timespec *wall_time,
+			 struct clocksource *clock);
+#else /* !CONFIG_IPIPE_HOSTRT */
+static inline void
+ipipe_update_hostrt(struct timespec *wall_time, struct clocksource *clock) {}
+#endif
+
+#endif /* CONFIG_IPIPE */
+
+#endif /* !__LINUX_IPIPE_TICKDEV_H */
diff --git a/include/linux/ipipe_trace.h b/include/linux/ipipe_trace.h
new file mode 100644
index 0000000..627b354
--- /dev/null
+++ b/include/linux/ipipe_trace.h
@@ -0,0 +1,72 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_trace.h
+ *
+ * Copyright (C) 2005 Luotao Fu.
+ *               2005-2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef _LINUX_IPIPE_TRACE_H
+#define _LINUX_IPIPE_TRACE_H
+
+#ifdef CONFIG_IPIPE_TRACE
+
+#include <linux/types.h>
+
+void ipipe_trace_begin(unsigned long v);
+void ipipe_trace_end(unsigned long v);
+void ipipe_trace_freeze(unsigned long v);
+void ipipe_trace_special(unsigned char special_id, unsigned long v);
+void ipipe_trace_pid(pid_t pid, short prio);
+void ipipe_trace_event(unsigned char id, unsigned long delay_tsc);
+int ipipe_trace_max_reset(void);
+int ipipe_trace_frozen_reset(void);
+
+#else /* !CONFIG_IPIPE_TRACE */
+
+#define ipipe_trace_begin(v)			do { (void)(v); } while(0)
+#define ipipe_trace_end(v)			do { (void)(v); } while(0)
+#define ipipe_trace_freeze(v)			do { (void)(v); } while(0)
+#define ipipe_trace_special(id, v)		do { (void)(id); (void)(v); } while(0)
+#define ipipe_trace_pid(pid, prio)		do { (void)(pid); (void)(prio); } while(0)
+#define ipipe_trace_event(id, delay_tsc)	do { (void)(id); (void)(delay_tsc); } while(0)
+#define ipipe_trace_max_reset()			do { } while(0)
+#define ipipe_trace_froze_reset()		do { } while(0)
+
+#endif /* !CONFIG_IPIPE_TRACE */
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+void ipipe_trace_panic_freeze(void);
+void ipipe_trace_panic_dump(void);
+#else
+static inline void ipipe_trace_panic_freeze(void) { }
+static inline void ipipe_trace_panic_dump(void) { }
+#endif
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+#define ipipe_trace_irq_entry(irq)	ipipe_trace_begin(irq)
+#define ipipe_trace_irq_exit(irq)	ipipe_trace_end(irq)
+#define ipipe_trace_irqsoff()		ipipe_trace_begin(0x80000000UL)
+#define ipipe_trace_irqson()		ipipe_trace_end(0x80000000UL)
+#else
+#define ipipe_trace_irq_entry(irq)	do { (void)(irq);} while(0)
+#define ipipe_trace_irq_exit(irq)	do { (void)(irq);} while(0)
+#define ipipe_trace_irqsoff()		do { } while(0)
+#define ipipe_trace_irqson()		do { } while(0)
+#endif
+
+#endif	/* !__LINUX_IPIPE_TRACE_H */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 80fcb53..3d93391 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -195,6 +195,9 @@ struct irq_chip {
 
 	void		(*irq_bus_lock)(struct irq_data *data);
 	void		(*irq_bus_sync_unlock)(struct irq_data *data);
+#ifdef CONFIG_IPIPE
+	void		(*irq_move)(struct irq_data *data);
+#endif /* CONFIG_IPIPE */
 
 	/* Currently used only by UML, might disappear one day.*/
 #ifdef CONFIG_IRQ_RELEASE_METHOD
@@ -343,7 +346,7 @@ extern int set_irq_data(unsigned int irq, void *data);
 extern int set_irq_chip_data(unsigned int irq, void *data);
 extern int set_irq_type(unsigned int irq, unsigned int type);
 extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
-extern struct irq_data *irq_get_irq_data(unsigned int irq);
+extern struct irq_data *irq_get_irq_data(unsigned int irq) __attribute__((const));
 
 static inline struct irq_chip *get_irq_chip(unsigned int irq)
 {
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c1a95b7..323a4b7 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -55,6 +55,12 @@ struct irq_desc {
 		};
 	};
 #endif
+#ifdef CONFIG_IPIPE
+	void			(*ipipe_ack)(unsigned int irq,
+					     struct irq_desc *desc);
+	void			(*ipipe_end)(unsigned int irq,
+					     struct irq_desc *desc);
+#endif /* CONFIG_IPIPE */
 
 	struct timer_rand_state *timer_rand_state;
 	unsigned int __percpu	*kstat_irqs;
@@ -131,6 +137,10 @@ static inline int irq_balancing_disabled(unsigned int irq)
 	return desc->status & IRQ_NO_BALANCING_MASK;
 }
 
+irq_flow_handler_t
+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle,
+		    int is_chained);
+
 /* caller has locked the irq_desc and both params are valid */
 static inline void __set_irq_handler_unlocked(int irq,
 					      irq_flow_handler_t handler)
@@ -138,6 +148,7 @@ static inline void __set_irq_handler_unlocked(int irq,
 	struct irq_desc *desc;
 
 	desc = irq_to_desc(irq);
+	handler = __fixup_irq_handler(desc, handler, 0);
 	desc->handle_irq = handler;
 }
 #endif
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 3bc4dca..fb1f848 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -24,7 +24,11 @@
 #else /* CONFIG_GENERIC_HARDIRQS */
 
 extern int nr_irqs;
+#if !defined(CONFIG_IPIPE) || defined(CONFIG_SPARSE_IRQ)
 extern struct irq_desc *irq_to_desc(unsigned int irq);
+#else
+#define irq_to_desc(irq)	(&irq_desc[irq])
+#endif
 unsigned int irq_get_next_irq(unsigned int offset);
 
 # define for_each_irq_desc(irq, desc)					\
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 00cec4d..e4f44b9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -16,6 +16,7 @@
 #include <linux/compiler.h>
 #include <linux/bitops.h>
 #include <linux/log2.h>
+#include <linux/ipipe_base.h>
 #include <linux/typecheck.h>
 #include <linux/printk.h>
 #include <linux/dynamic_debug.h>
@@ -116,9 +117,12 @@ struct user;
 
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 extern int _cond_resched(void);
-# define might_resched() _cond_resched()
+# define might_resched() do { \
+		ipipe_check_context(ipipe_root_domain); \
+		_cond_resched(); \
+	} while (0)
 #else
-# define might_resched() do { } while (0)
+# define might_resched() ipipe_check_context(ipipe_root_domain)
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 2e681d9..130b7d5 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -9,13 +9,20 @@
 #include <linux/thread_info.h>
 #include <linux/linkage.h>
 #include <linux/list.h>
+#include <linux/ipipe_base.h>
 
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
   extern void sub_preempt_count(int val);
 #else
-# define add_preempt_count(val)	do { preempt_count() += (val); } while (0)
-# define sub_preempt_count(val)	do { preempt_count() -= (val); } while (0)
+# define add_preempt_count(val)	do {		\
+    ipipe_check_context(ipipe_root_domain);	\
+    preempt_count() += (val);			\
+  } while (0)
+# define sub_preempt_count(val)	do {		\
+    ipipe_check_context(ipipe_root_domain);	\
+    preempt_count() -= (val);			\
+  } while (0)
 #endif
 
 #define inc_preempt_count() add_preempt_count(1)
diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index bc2994e..5e2da8d 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -61,8 +61,8 @@ do {								\
 #define read_trylock(lock)	__cond_lock(lock, _raw_read_trylock(lock))
 #define write_trylock(lock)	__cond_lock(lock, _raw_write_trylock(lock))
 
-#define write_lock(lock)	_raw_write_lock(lock)
-#define read_lock(lock)		_raw_read_lock(lock)
+#define write_lock(lock)	PICK_RWOP(_write_lock, lock)
+#define read_lock(lock)		PICK_RWOP(_read_lock, lock)
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
@@ -96,8 +96,8 @@ do {								\
 #define read_lock_bh(lock)		_raw_read_lock_bh(lock)
 #define write_lock_irq(lock)		_raw_write_lock_irq(lock)
 #define write_lock_bh(lock)		_raw_write_lock_bh(lock)
-#define read_unlock(lock)		_raw_read_unlock(lock)
-#define write_unlock(lock)		_raw_write_unlock(lock)
+#define read_unlock(lock)		PICK_RWOP(_read_unlock, lock)
+#define write_unlock(lock)		PICK_RWOP(_write_unlock, lock)
 #define read_unlock_irq(lock)		_raw_read_unlock_irq(lock)
 #define write_unlock_irq(lock)		_raw_write_unlock_irq(lock)
 
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index 9c9f049..62c894150 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -141,7 +141,9 @@ static inline int __raw_write_trylock(rwlock_t *lock)
  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
+#if !defined(CONFIG_GENERIC_LOCKBREAK) ||	\
+	defined(CONFIG_DEBUG_LOCK_ALLOC) ||	\
+	defined(CONFIG_IPIPE)
 
 static inline void __raw_read_lock(rwlock_t *lock)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2a5da91..061b662 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -62,6 +62,7 @@ struct sched_param {
 #include <linux/errno.h>
 #include <linux/nodemask.h>
 #include <linux/mm_types.h>
+#include <linux/ipipe.h>
 
 #include <asm/system.h>
 #include <asm/page.h>
@@ -190,9 +191,17 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define TASK_DEAD		64
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
+#ifdef CONFIG_IPIPE
+#define TASK_ATOMICSWITCH	512
+#define TASK_NOWAKEUP		1024
+#define TASK_STATE_MAX		2048
+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWAN"
+#else  /* !CONFIG_IPIPE */
+#define TASK_ATOMICSWITCH	0
+#define TASK_NOWAKEUP		0
 #define TASK_STATE_MAX		512
-
 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
+#endif /* CONFIG_IPIPE */
 
 extern char ___assert_task_state[1 - 2*!!(
 		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
@@ -304,6 +313,15 @@ extern void trap_init(void);
 extern void update_process_times(int user);
 extern void scheduler_tick(void);
 
+#ifdef CONFIG_IPIPE
+void update_root_process_times(struct pt_regs *regs);
+#else  /* !CONFIG_IPIPE */
+static inline void update_root_process_times(struct pt_regs *regs)
+{
+	update_process_times(user_mode(regs));
+}
+#endif /* CONFIG_IPIPE */
+
 extern void sched_show_task(struct task_struct *p);
 
 #ifdef CONFIG_LOCKUP_DETECTOR
@@ -358,7 +376,7 @@ extern signed long schedule_timeout(signed long timeout);
 extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
-asmlinkage void schedule(void);
+asmlinkage int schedule(void);
 extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
 
 struct nsproxy;
@@ -435,6 +453,9 @@ extern int get_dumpable(struct mm_struct *mm);
 					/* leave room for more dump flags */
 #define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
 #define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
+#ifdef CONFIG_IPIPE
+#define MMF_VM_PINNED		31	/* ondemand load up and COW disabled */
+#endif
 
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
@@ -1478,6 +1499,10 @@ struct task_struct {
 #endif
 	atomic_t fs_excl;	/* holding fs exclusive resources */
 	struct rcu_head rcu;
+#ifdef CONFIG_IPIPE
+	unsigned int ipipe_flags;
+	void *ptd[IPIPE_ROOT_NPTDKEYS];
+#endif
 
 	/*
 	 * cache last used pipe for splice
@@ -1753,6 +1778,10 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezable */
 #define PF_FREEZER_NOSIG 0x80000000	/* Freezer won't send signals to it */
 
+/* p->ipipe_flags */
+#define PF_EVTRET	0x1	/* EVENT_RETURN is pending */
+#define PF_EVNOTIFY	0x2	/* Notify other domains about internal events */
+
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
  * tasks can access tsk->flags in readonly mode for example
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 80e5358..2e29b16 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -89,10 +89,12 @@
 # include <linux/spinlock_up.h>
 #endif
 
+#include <linux/ipipe_lock.h>
+
 #ifdef CONFIG_DEBUG_SPINLOCK
   extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
 				   struct lock_class_key *key);
-# define raw_spin_lock_init(lock)				\
+# define __real_raw_spin_lock_init(lock)			\
 do {								\
 	static struct lock_class_key __key;			\
 								\
@@ -100,9 +102,10 @@ do {								\
 } while (0)
 
 #else
-# define raw_spin_lock_init(lock)				\
+# define __real_raw_spin_lock_init(lock)			\
 	do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0)
 #endif
+#define raw_spin_lock_init(lock)	PICK_SPINOP(_lock_init, lock)
 
 #define raw_spin_is_locked(lock)	arch_spin_is_locked(&(lock)->raw_lock)
 
@@ -165,9 +168,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
  * various methods are defined as nops in the case they are not
  * required.
  */
-#define raw_spin_trylock(lock)	__cond_lock(lock, _raw_spin_trylock(lock))
+#define __real_raw_spin_trylock(lock)	__cond_lock(lock, _raw_spin_trylock(lock))
+#define raw_spin_trylock(lock)		PICK_SPINOP_RET(_trylock, lock, int)
 
-#define raw_spin_lock(lock)	_raw_spin_lock(lock)
+#define __real_raw_spin_lock(lock)	_raw_spin_lock(lock)
+#define raw_spin_lock(lock)		PICK_SPINOP(_lock, lock)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define raw_spin_lock_nested(lock, subclass) \
@@ -185,7 +190,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
-#define raw_spin_lock_irqsave(lock, flags)			\
+#define __real_raw_spin_lock_irqsave(lock, flags)	\
 	do {						\
 		typecheck(unsigned long, flags);	\
 		flags = _raw_spin_lock_irqsave(lock);	\
@@ -207,7 +212,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 
 #else
 
-#define raw_spin_lock_irqsave(lock, flags)		\
+#define __real_raw_spin_lock_irqsave(lock, flags)	\
 	do {						\
 		typecheck(unsigned long, flags);	\
 		_raw_spin_lock_irqsave(lock, flags);	\
@@ -218,34 +223,46 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 
 #endif
 
-#define raw_spin_lock_irq(lock)		_raw_spin_lock_irq(lock)
+#define raw_spin_lock_irqsave(lock, flags)  \
+	PICK_SPINLOCK_IRQSAVE(lock, flags)
+
+#define __real_raw_spin_lock_irq(lock)	_raw_spin_lock_irq(lock)
+#define raw_spin_lock_irq(lock)		PICK_SPINOP(_lock_irq, lock)
 #define raw_spin_lock_bh(lock)		_raw_spin_lock_bh(lock)
-#define raw_spin_unlock(lock)		_raw_spin_unlock(lock)
-#define raw_spin_unlock_irq(lock)	_raw_spin_unlock_irq(lock)
+#define __real_raw_spin_unlock(lock)	_raw_spin_unlock(lock)
+#define raw_spin_unlock(lock)		PICK_SPINOP(_unlock, lock)
+#define __real_raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock)
+#define raw_spin_unlock_irq(lock)	PICK_SPINOP(_unlock_irq, lock)
 
-#define raw_spin_unlock_irqrestore(lock, flags)		\
+#define __real_raw_spin_unlock_irqrestore(lock, flags)		\
 	do {							\
 		typecheck(unsigned long, flags);		\
 		_raw_spin_unlock_irqrestore(lock, flags);	\
 	} while (0)
+#define raw_spin_unlock_irqrestore(lock, flags)	\
+	PICK_SPINUNLOCK_IRQRESTORE(lock, flags)
+
 #define raw_spin_unlock_bh(lock)	_raw_spin_unlock_bh(lock)
 
 #define raw_spin_trylock_bh(lock) \
 	__cond_lock(lock, _raw_spin_trylock_bh(lock))
 
-#define raw_spin_trylock_irq(lock) \
+#define __real_raw_spin_trylock_irq(lock) \
 ({ \
 	local_irq_disable(); \
-	raw_spin_trylock(lock) ? \
+	__real_raw_spin_trylock(lock) ? \
 	1 : ({ local_irq_enable(); 0;  }); \
 })
+#define raw_spin_trylock_irq(lock)	PICK_SPINTRYLOCK_IRQ(lock)
 
-#define raw_spin_trylock_irqsave(lock, flags) \
+#define __real_raw_spin_trylock_irqsave(lock, flags) \
 ({ \
 	local_irq_save(flags); \
 	raw_spin_trylock(lock) ? \
 	1 : ({ local_irq_restore(flags); 0; }); \
 })
+#define raw_spin_trylock_irqsave(lock, flags)	\
+	PICK_SPINTRYLOCK_IRQSAVE(lock, flags)
 
 /**
  * raw_spin_can_lock - would raw_spin_trylock() succeed?
@@ -276,24 +293,17 @@ static inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
 
 #define spin_lock_init(_lock)				\
 do {							\
-	spinlock_check(_lock);				\
-	raw_spin_lock_init(&(_lock)->rlock);		\
+	raw_spin_lock_init(_lock);			\
 } while (0)
 
-static inline void spin_lock(spinlock_t *lock)
-{
-	raw_spin_lock(&lock->rlock);
-}
+#define spin_lock(lock)		raw_spin_lock(lock)
 
 static inline void spin_lock_bh(spinlock_t *lock)
 {
 	raw_spin_lock_bh(&lock->rlock);
 }
 
-static inline int spin_trylock(spinlock_t *lock)
-{
-	return raw_spin_trylock(&lock->rlock);
-}
+#define spin_trylock(lock)	raw_spin_trylock(lock)
 
 #define spin_lock_nested(lock, subclass)			\
 do {								\
@@ -305,14 +315,11 @@ do {									\
 	raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock);	\
 } while (0)
 
-static inline void spin_lock_irq(spinlock_t *lock)
-{
-	raw_spin_lock_irq(&lock->rlock);
-}
+#define spin_lock_irq(lock)	raw_spin_lock_irq(lock)
 
 #define spin_lock_irqsave(lock, flags)				\
 do {								\
-	raw_spin_lock_irqsave(spinlock_check(lock), flags);	\
+	raw_spin_lock_irqsave(lock, flags);			\
 } while (0)
 
 #define spin_lock_irqsave_nested(lock, flags, subclass)			\
@@ -320,39 +327,28 @@ do {									\
 	raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \
 } while (0)
 
-static inline void spin_unlock(spinlock_t *lock)
-{
-	raw_spin_unlock(&lock->rlock);
-}
+#define spin_unlock(lock)	raw_spin_unlock(lock)
 
 static inline void spin_unlock_bh(spinlock_t *lock)
 {
 	raw_spin_unlock_bh(&lock->rlock);
 }
 
-static inline void spin_unlock_irq(spinlock_t *lock)
-{
-	raw_spin_unlock_irq(&lock->rlock);
-}
+#define spin_unlock_irq(lock)	raw_spin_unlock_irq(lock)
 
-static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
-{
-	raw_spin_unlock_irqrestore(&lock->rlock, flags);
-}
+#define spin_unlock_irqrestore(lock, flags)	\
+	raw_spin_unlock_irqrestore(lock, flags)
 
 static inline int spin_trylock_bh(spinlock_t *lock)
 {
 	return raw_spin_trylock_bh(&lock->rlock);
 }
 
-static inline int spin_trylock_irq(spinlock_t *lock)
-{
-	return raw_spin_trylock_irq(&lock->rlock);
-}
+#define spin_trylock_irq(lock)	raw_spin_trylock_irq(lock)
 
 #define spin_trylock_irqsave(lock, flags)			\
 ({								\
-	raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
+	raw_spin_trylock_irqsave(lock, flags);			\
 })
 
 static inline void spin_unlock_wait(spinlock_t *lock)
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index e253ccd..378e01e 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -99,7 +99,9 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
+#if !defined(CONFIG_GENERIC_LOCKBREAK) ||	\
+	defined(CONFIG_DEBUG_LOCK_ALLOC) ||	\
+	defined(CONFIG_IPIPE)
 
 static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
 {
@@ -113,7 +115,7 @@ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
 	 * do_raw_spin_lock_flags() code, because lockdep assumes
 	 * that interrupts are not re-enabled during lock-acquire:
 	 */
-#ifdef CONFIG_LOCKDEP
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE)
 	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 #else
 	do_raw_spin_lock_flags(lock, &flags);
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index b14f6a9..e400972 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -49,13 +49,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 /*
  * Read-write spinlocks. No debug version.
  */
-#define arch_read_lock(lock)		do { (void)(lock); } while (0)
-#define arch_write_lock(lock)		do { (void)(lock); } while (0)
-#define arch_read_trylock(lock)	({ (void)(lock); 1; })
-#define arch_write_trylock(lock)	({ (void)(lock); 1; })
-#define arch_read_unlock(lock)		do { (void)(lock); } while (0)
-#define arch_write_unlock(lock)	do { (void)(lock); } while (0)
-
 #else /* DEBUG_SPINLOCK */
 #define arch_spin_is_locked(lock)	((void)(lock), 0)
 /* for sched.c and kernel_lock.c: */
@@ -65,6 +58,13 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 # define arch_spin_trylock(lock)	({ (void)(lock); 1; })
 #endif /* DEBUG_SPINLOCK */
 
+#define arch_read_lock(lock)		do { (void)(lock); } while (0)
+#define arch_write_lock(lock)		do { (void)(lock); } while (0)
+#define arch_read_trylock(lock)		({ (void)(lock); 1; })
+#define arch_write_trylock(lock)	({ (void)(lock); 1; })
+#define arch_read_unlock(lock)		do { (void)(lock); } while (0)
+#define arch_write_unlock(lock)		do { (void)(lock); } while (0)
+
 #define arch_spin_is_contended(lock)	(((void)(lock), 0))
 
 #define arch_read_can_lock(lock)	(((void)(lock), 1))
diff --git a/init/Kconfig b/init/Kconfig
index 47dd02f..e432dc88 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -93,6 +93,7 @@ config CROSS_COMPILE
 
 config LOCALVERSION
 	string "Local version - append to kernel release"
+	default "-ipipe"
 	help
 	  Append an extra string to the end of your kernel version.
 	  This will show up when you type uname, for example.
diff --git a/init/main.c b/init/main.c
index a7fcde2..5054188 100644
--- a/init/main.c
+++ b/init/main.c
@@ -562,7 +562,7 @@ asmlinkage void __init start_kernel(void)
 
 	cgroup_init_early();
 
-	local_irq_disable();
+	local_irq_disable_hw();
 	early_boot_irqs_disabled = true;
 
 /*
@@ -595,6 +595,7 @@ asmlinkage void __init start_kernel(void)
 	pidhash_init();
 	vfs_caches_init_early();
 	sort_main_extable();
+	ipipe_init_early();
 	trap_init();
 	mm_init();
 	/*
@@ -626,6 +627,11 @@ asmlinkage void __init start_kernel(void)
 	softirq_init();
 	timekeeping_init();
 	time_init();
+	/*
+	 * We need to wait for the interrupt and time subsystems to be
+	 * initialized before enabling the pipeline.
+	 */
+	ipipe_init();
 	profile_init();
 	if (!irqs_disabled())
 		printk(KERN_CRIT "start_kernel(): bug: interrupts were "
@@ -802,6 +808,7 @@ static void __init do_basic_setup(void)
 	init_tmpfs();
 	driver_init();
 	init_irq_proc();
+  	ipipe_init_proc();
 	do_ctors();
 	do_initcalls();
 }
diff --git a/kernel/Makefile b/kernel/Makefile
index 353d3fe..3ec3ffc 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -88,6 +88,7 @@ obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
 obj-$(CONFIG_TINY_RCU) += rcutiny.o
 obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o
 obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_IPIPE) += ipipe/
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 557a348..da81dae 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -989,6 +989,7 @@ NORET_TYPE void do_exit(long code)
 		acct_process();
 	trace_sched_process_exit(tsk);
 
+  	ipipe_exit_notify(tsk);
 	exit_sem(tsk);
 	exit_files(tsk);
 	exit_fs(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 25e4291..b0d6fe8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -552,6 +552,7 @@ void mmput(struct mm_struct *mm)
 		ksm_exit(mm);
 		khugepaged_exit(mm); /* must run before exit_mmap */
 		exit_mmap(mm);
+ 		ipipe_cleanup_notify(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
 			spin_lock(&mmlist_lock);
@@ -937,6 +938,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 	new_flags |= PF_FORKNOEXEC;
 	new_flags |= PF_STARTING;
 	p->flags = new_flags;
+	ipipe_clear_flags(p);
 	clear_freeze_flag(p);
 }
 
@@ -1309,6 +1311,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
+#ifdef CONFIG_IPIPE
+	p->ipipe_flags = 0;
+	memset(p->ptd, 0, sizeof(p->ptd));
+#endif /* CONFIG_IPIPE */
 	perf_event_fork(p);
 	return p;
 
@@ -1690,15 +1696,18 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 		}
 
 		if (new_mm) {
+			unsigned long flags;
 			mm = current->mm;
 			active_mm = current->active_mm;
 			current->mm = new_mm;
+			ipipe_mm_switch_protect(flags);
 			current->active_mm = new_mm;
 			if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
 				atomic_dec(&mm->oom_disable_count);
 				atomic_inc(&new_mm->oom_disable_count);
 			}
 			activate_mm(active_mm, new_mm);
+			ipipe_mm_switch_unprotect(flags);
 			new_mm = mm;
 		}
 
diff --git a/kernel/ipipe/Kconfig b/kernel/ipipe/Kconfig
new file mode 100644
index 0000000..693a7d2
--- /dev/null
+++ b/kernel/ipipe/Kconfig
@@ -0,0 +1,26 @@
+config IPIPE
+	bool "Interrupt pipeline"
+	default y
+	---help---
+	  Activate this option if you want the interrupt pipeline to be
+	  compiled in.
+
+config IPIPE_DOMAINS
+	int "Max domains"
+	depends on IPIPE
+	default 4
+	---help---
+	The maximum number of I-pipe domains to run concurrently.
+
+config IPIPE_DELAYED_ATOMICSW
+       bool
+       depends on IPIPE
+       default n
+
+config IPIPE_UNMASKED_CONTEXT_SWITCH
+       bool
+       depends on IPIPE
+       default n
+
+config HAVE_IPIPE_HOSTRT
+       bool
diff --git a/kernel/ipipe/Kconfig.debug b/kernel/ipipe/Kconfig.debug
new file mode 100644
index 0000000..ba87335
--- /dev/null
+++ b/kernel/ipipe/Kconfig.debug
@@ -0,0 +1,95 @@
+config IPIPE_DEBUG
+	bool "I-pipe debugging"
+	depends on IPIPE
+
+config IPIPE_DEBUG_CONTEXT
+	bool "Check for illicit cross-domain calls"
+	depends on IPIPE_DEBUG
+	default y
+	---help---
+	  Enable this feature to arm checkpoints in the kernel that
+	  verify the correct invocation context. On entry of critical
+	  Linux services a warning is issued if the caller is not
+	  running over the root domain.
+
+config IPIPE_DEBUG_INTERNAL
+	bool "Enable internal debug checks"
+	depends on IPIPE_DEBUG
+	default y
+	---help---
+	  When this feature is enabled, I-pipe will perform internal
+	  consistency checks of its subsystems, e.g. on per-cpu variable
+	  access.
+
+config IPIPE_TRACE
+	bool "Latency tracing"
+	depends on IPIPE_DEBUG
+	select ARCH_WANT_FRAME_POINTERS
+	select FRAME_POINTER
+	select KALLSYMS
+	select PROC_FS
+	---help---
+	  Activate this option if you want to use per-function tracing of
+	  the kernel. The tracer will collect data via instrumentation
+	  features like the one below or with the help of explicite calls
+	  of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the
+	  in-kernel tracing API. The collected data and runtime control
+	  is available via /proc/ipipe/trace/*.
+
+if IPIPE_TRACE
+
+config IPIPE_TRACE_ENABLE
+	bool "Enable tracing on boot"
+	default y
+	---help---
+	  Disable this option if you want to arm the tracer after booting
+	  manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce
+	  boot time on slow embedded devices due to the tracer overhead.
+
+config IPIPE_TRACE_MCOUNT
+	bool "Instrument function entries"
+	default y
+	select FTRACE
+	select FUNCTION_TRACER
+	---help---
+	  When enabled, records every kernel function entry in the tracer
+	  log. While this slows down the system noticeably, it provides
+	  the highest level of information about the flow of events.
+	  However, it can be switch off in order to record only explicit
+	  I-pipe trace points.
+
+config IPIPE_TRACE_IRQSOFF
+	bool "Trace IRQs-off times"
+	default y
+	---help---
+	  Activate this option if I-pipe shall trace the longest path
+	  with hard-IRQs switched off.
+
+config IPIPE_TRACE_SHIFT
+	int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)"
+	range 10 18
+	default 14
+	---help---
+	  The number of trace points to hold tracing data for each
+	  trace path, as a power of 2.
+
+config IPIPE_TRACE_VMALLOC
+	bool "Use vmalloc'ed trace buffer"
+	default y if EMBEDDED
+	---help---
+	  Instead of reserving static kernel data, the required buffer
+	  is allocated via vmalloc during boot-up when this option is
+	  enabled. This can help to start systems that are low on memory,
+	  but it slightly degrades overall performance. Try this option
+	  when a traced kernel hangs unexpectedly at boot time.
+
+config IPIPE_TRACE_PANIC
+	bool "Enable panic back traces"
+	default y
+	---help---
+	  Provides services to freeze and dump a back trace on panic
+	  situations. This is used on IPIPE_DEBUG_CONTEXT exceptions
+	  as well as ordinary kernel oopses. You can control the number
+	  of printed back trace points via /proc/ipipe/trace.
+
+endif
diff --git a/kernel/ipipe/Makefile b/kernel/ipipe/Makefile
new file mode 100644
index 0000000..6257dfa
--- /dev/null
+++ b/kernel/ipipe/Makefile
@@ -0,0 +1,3 @@
+
+obj-$(CONFIG_IPIPE)	+= core.o
+obj-$(CONFIG_IPIPE_TRACE) += tracer.o
diff --git a/kernel/ipipe/core.c b/kernel/ipipe/core.c
new file mode 100644
index 0000000..51a8be6
--- /dev/null
+++ b/kernel/ipipe/core.c
@@ -0,0 +1,2200 @@
+/* -*- linux-c -*-
+ * linux/kernel/ipipe/core.c
+ *
+ * Copyright (C) 2002-2005 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Architecture-independent I-PIPE core support.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/tick.h>
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#endif	/* CONFIG_PROC_FS */
+#include <linux/ipipe_trace.h>
+#include <linux/ipipe_tickdev.h>
+#include <linux/irq.h>
+
+static int __ipipe_ptd_key_count;
+
+static unsigned long __ipipe_ptd_key_map;
+
+static unsigned long __ipipe_domain_slot_map;
+
+struct ipipe_domain ipipe_root;
+
+#ifdef CONFIG_SMP
+
+#define IPIPE_CRITICAL_TIMEOUT	1000000
+
+static cpumask_t __ipipe_cpu_sync_map;
+
+static cpumask_t __ipipe_cpu_lock_map;
+
+static cpumask_t __ipipe_cpu_pass_map;
+
+static unsigned long __ipipe_critical_lock;
+
+static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier);
+
+static atomic_t __ipipe_critical_count = ATOMIC_INIT(0);
+
+static void (*__ipipe_cpu_sync) (void);
+
+#else /* !CONFIG_SMP */
+
+/*
+ * Create an alias to the unique root status, so that arch-dep code
+ * may get simple and easy access to this percpu variable.  We also
+ * create an array of pointers to the percpu domain data; this tends
+ * to produce a better code when reaching non-root domains. We make
+ * sure that the early boot code would be able to dereference the
+ * pointer to the root domain data safely by statically initializing
+ * its value (local_irq*() routines depend on this).
+ */
+#if __GNUC__ >= 4
+extern unsigned long __ipipe_root_status
+__attribute__((alias(__stringify(ipipe_percpu_darray))));
+EXPORT_SYMBOL(__ipipe_root_status);
+#else /* __GNUC__ < 4 */
+/*
+ * Work around a GCC 3.x issue making alias symbols unusable as
+ * constant initializers.
+ */
+unsigned long *const __ipipe_root_status_addr =
+	&__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT].status;
+EXPORT_SYMBOL(__ipipe_root_status_addr);
+#endif /* __GNUC__ < 4 */
+
+DEFINE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]) =
+{ [IPIPE_ROOT_SLOT] = (struct ipipe_percpu_domain_data *)ipipe_percpu_darray };
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_daddr);
+#endif /* !CONFIG_SMP */
+
+DEFINE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]) =
+{ [IPIPE_ROOT_SLOT] = { .status = IPIPE_STALL_MASK } }; /* Root domain stalled on each CPU at startup. */
+
+DEFINE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain) = { &ipipe_root };
+
+DEFINE_PER_CPU(unsigned long, ipipe_nmi_saved_root); /* Copy of root status during NMI */
+
+static IPIPE_DEFINE_SPINLOCK(__ipipe_pipelock);
+
+LIST_HEAD(__ipipe_pipeline);
+
+unsigned long __ipipe_virtual_irq_map;
+
+#ifdef CONFIG_PRINTK
+unsigned __ipipe_printk_virq;
+#endif /* CONFIG_PRINTK */
+
+int __ipipe_event_monitors[IPIPE_NR_EVENTS];
+
+DEFINE_PER_CPU(ipipe_root_preempt_handler_t, __ipipe_root_preempt_handler);
+EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_root_preempt_handler);
+
+DEFINE_PER_CPU(void *, __ipipe_root_preempt_cookie);
+EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_root_preempt_cookie);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+
+DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
+
+static DEFINE_PER_CPU(struct ipipe_tick_device, ipipe_tick_cpu_device);
+
+int ipipe_request_tickdev(const char *devname,
+			  void (*emumode)(enum clock_event_mode mode,
+					  struct clock_event_device *cdev),
+			  int (*emutick)(unsigned long delta,
+					 struct clock_event_device *cdev),
+			  int cpu, unsigned long *tmfreq)
+{
+	struct ipipe_tick_device *itd;
+	struct tick_device *slave;
+	struct clock_event_device *evtdev;
+	unsigned long long freq;
+	unsigned long flags;
+	int status;
+
+	flags = ipipe_critical_enter(NULL);
+
+	itd = &per_cpu(ipipe_tick_cpu_device, cpu);
+
+	if (itd->slave != NULL) {
+		status = -EBUSY;
+		goto out;
+	}
+
+	slave = &per_cpu(tick_cpu_device, cpu);
+
+	if (strcmp(slave->evtdev->name, devname)) {
+		/*
+		 * No conflict so far with the current tick device,
+		 * check whether the requested device is sane and has
+		 * been blessed by the kernel.
+		 */
+		status = __ipipe_check_tickdev(devname) ?
+			CLOCK_EVT_MODE_UNUSED : CLOCK_EVT_MODE_SHUTDOWN;
+		goto out;
+	}
+
+	/*
+	 * Our caller asks for using the same clock event device for
+	 * ticking than we do, let's create a tick emulation device to
+	 * interpose on the set_next_event() method, so that we may
+	 * both manage the device in oneshot mode. Only the tick
+	 * emulation code will actually program the clockchip hardware
+	 * for the next shot, though.
+	 *
+	 * CAUTION: we still have to grab the tick device even when it
+	 * current runs in periodic mode, since the kernel may switch
+	 * to oneshot dynamically (highres/no_hz tick mode).
+	 */
+
+	evtdev = slave->evtdev;
+	status = evtdev->mode;
+
+        if (status == CLOCK_EVT_MODE_SHUTDOWN)
+                goto out;
+
+	itd->slave = slave;
+	itd->emul_set_mode = emumode;
+	itd->emul_set_tick = emutick;
+	itd->real_set_mode = evtdev->set_mode;
+	itd->real_set_tick = evtdev->set_next_event;
+	itd->real_max_delta_ns = evtdev->max_delta_ns;
+	itd->real_mult = evtdev->mult;
+	itd->real_shift = evtdev->shift;
+	freq = (1000000000ULL * evtdev->mult) >> evtdev->shift;
+	*tmfreq = (unsigned long)freq;
+	evtdev->set_mode = emumode;
+	evtdev->set_next_event = emutick;
+	evtdev->max_delta_ns = ULONG_MAX;
+	evtdev->mult = 1;
+	evtdev->shift = 0;
+out:
+	ipipe_critical_exit(flags);
+
+	return status;
+}
+
+void ipipe_release_tickdev(int cpu)
+{
+	struct ipipe_tick_device *itd;
+	struct tick_device *slave;
+	struct clock_event_device *evtdev;
+	unsigned long flags;
+
+	flags = ipipe_critical_enter(NULL);
+
+	itd = &per_cpu(ipipe_tick_cpu_device, cpu);
+
+	if (itd->slave != NULL) {
+		slave = &per_cpu(tick_cpu_device, cpu);
+		evtdev = slave->evtdev;
+		evtdev->set_mode = itd->real_set_mode;
+		evtdev->set_next_event = itd->real_set_tick;
+		evtdev->max_delta_ns = itd->real_max_delta_ns;
+		evtdev->mult = itd->real_mult;
+		evtdev->shift = itd->real_shift;
+		itd->slave = NULL;
+	}
+
+	ipipe_critical_exit(flags);
+}
+
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
+void __init ipipe_init_early(void)
+{
+	struct ipipe_domain *ipd = &ipipe_root;
+
+	/*
+	 * Do the early init stuff. At this point, the kernel does not
+	 * provide much services yet: be careful.
+	 */
+	__ipipe_check_platform(); /* Do platform dependent checks first. */
+
+	/*
+	 * A lightweight registration code for the root domain. We are
+	 * running on the boot CPU, hw interrupts are off, and
+	 * secondary CPUs are still lost in space.
+	 */
+
+	/* Reserve percpu data slot #0 for the root domain. */
+	ipd->slot = 0;
+	set_bit(0, &__ipipe_domain_slot_map);
+
+	ipd->name = "Linux";
+	ipd->domid = IPIPE_ROOT_ID;
+	ipd->priority = IPIPE_ROOT_PRIO;
+
+	__ipipe_init_stage(ipd);
+
+	list_add_tail(&ipd->p_link, &__ipipe_pipeline);
+
+	__ipipe_init_platform();
+
+#ifdef CONFIG_PRINTK
+	__ipipe_printk_virq = ipipe_alloc_virq();	/* Cannot fail here. */
+	ipd->irqs[__ipipe_printk_virq].handler = &__ipipe_flush_printk;
+	ipd->irqs[__ipipe_printk_virq].cookie = NULL;
+	ipd->irqs[__ipipe_printk_virq].acknowledge = NULL;
+	ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK;
+#endif /* CONFIG_PRINTK */
+}
+
+void __init ipipe_init(void)
+{
+	/* Now we may engage the pipeline. */
+	__ipipe_enable_pipeline();
+
+	printk(KERN_INFO "I-pipe %s: pipeline enabled.\n",
+	       IPIPE_VERSION_STRING);
+}
+
+void __ipipe_init_stage(struct ipipe_domain *ipd)
+{
+	struct ipipe_percpu_domain_data *p;
+	unsigned long status;
+	int cpu, n;
+
+	for_each_online_cpu(cpu) {
+		p = ipipe_percpudom_ptr(ipd, cpu);
+		status = p->status;
+		memset(p, 0, sizeof(*p));
+		p->status = status;
+	}
+
+	for (n = 0; n < IPIPE_NR_IRQS; n++) {
+		ipd->irqs[n].acknowledge = NULL;
+		ipd->irqs[n].handler = NULL;
+		ipd->irqs[n].control = IPIPE_PASS_MASK;	/* Pass but don't handle */
+	}
+
+	for (n = 0; n < IPIPE_NR_EVENTS; n++)
+		ipd->evhand[n] = NULL;
+
+	ipd->evself = 0LL;
+	mutex_init(&ipd->mutex);
+
+	__ipipe_hook_critical_ipi(ipd);
+}
+
+void __ipipe_cleanup_domain(struct ipipe_domain *ipd)
+{
+	ipipe_unstall_pipeline_from(ipd);
+
+#ifdef CONFIG_SMP
+	{
+		struct ipipe_percpu_domain_data *p;
+		int cpu;
+
+		for_each_online_cpu(cpu) {
+			p = ipipe_percpudom_ptr(ipd, cpu);
+			while (__ipipe_ipending_p(p))
+				cpu_relax();
+		}
+	}
+#else
+	__raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = NULL;
+#endif
+
+	clear_bit(ipd->slot, &__ipipe_domain_slot_map);
+}
+
+void __ipipe_unstall_root(void)
+{
+	struct ipipe_percpu_domain_data *p;
+
+        local_irq_disable_hw();
+
+	/* This helps catching bad usage from assembly call sites. */
+	ipipe_check_context(ipipe_root_domain);
+
+	p = ipipe_root_cpudom_ptr();
+
+        __clear_bit(IPIPE_STALL_FLAG, &p->status);
+
+        if (unlikely(__ipipe_ipending_p(p)))
+                __ipipe_sync_pipeline();
+
+        local_irq_enable_hw();
+}
+
+void __ipipe_restore_root(unsigned long x)
+{
+	ipipe_check_context(ipipe_root_domain);
+
+	if (x)
+		__ipipe_stall_root();
+	else
+		__ipipe_unstall_root();
+}
+
+void ipipe_stall_pipeline_from(struct ipipe_domain *ipd)
+{
+	unsigned long flags;
+	/*
+	 * We have to prevent against race on updating the status
+	 * variable _and_ CPU migration at the same time, so disable
+	 * hw IRQs here.
+	 */
+	local_irq_save_hw(flags);
+
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+
+	if (!__ipipe_pipeline_head_p(ipd))
+		local_irq_restore_hw(flags);
+}
+
+unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd)
+{
+	unsigned long flags, x;
+
+	/* See ipipe_stall_pipeline_from() */
+	local_irq_save_hw(flags);
+
+	x = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+
+	if (!__ipipe_pipeline_head_p(ipd))
+		local_irq_restore_hw(flags);
+
+	return x;
+}
+
+unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd)
+{
+	unsigned long flags, x;
+	struct list_head *pos;
+
+	local_irq_save_hw(flags);
+
+	x = __test_and_clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+
+	if (ipd == __ipipe_current_domain)
+		pos = &ipd->p_link;
+	else
+		pos = __ipipe_pipeline.next;
+
+	__ipipe_walk_pipeline(pos);
+
+	if (likely(__ipipe_pipeline_head_p(ipd)))
+		local_irq_enable_hw();
+	else
+		local_irq_restore_hw(flags);
+
+	return x;
+}
+
+void ipipe_restore_pipeline_from(struct ipipe_domain *ipd,
+					  unsigned long x)
+{
+	if (x)
+		ipipe_stall_pipeline_from(ipd);
+	else
+		ipipe_unstall_pipeline_from(ipd);
+}
+
+void ipipe_unstall_pipeline_head(void)
+{
+	struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr();
+	struct ipipe_domain *head_domain;
+
+	local_irq_disable_hw();
+
+	__clear_bit(IPIPE_STALL_FLAG, &p->status);
+
+	if (unlikely(__ipipe_ipending_p(p))) {
+		head_domain = __ipipe_pipeline_head();
+		if (likely(head_domain == __ipipe_current_domain))
+			__ipipe_sync_pipeline();
+		else
+			__ipipe_walk_pipeline(&head_domain->p_link);
+        }
+
+	local_irq_enable_hw();
+}
+
+void __ipipe_restore_pipeline_head(unsigned long x) /* hw interrupt off */
+{
+	struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr();
+	struct ipipe_domain *head_domain;
+
+	if (x) {
+#ifdef CONFIG_DEBUG_KERNEL
+		static int warned;
+		if (!warned &&
+		    __test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) {
+			/*
+			 * Already stalled albeit ipipe_restore_pipeline_head()
+			 * should have detected it? Send a warning once.
+			 */
+			local_irq_enable_hw();	
+			warned = 1;
+			printk(KERN_WARNING
+				   "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n");
+			dump_stack();
+			local_irq_disable_hw();	
+		}
+#else /* !CONFIG_DEBUG_KERNEL */
+		__set_bit(IPIPE_STALL_FLAG, &p->status);
+#endif /* CONFIG_DEBUG_KERNEL */
+	}
+	else {
+		__clear_bit(IPIPE_STALL_FLAG, &p->status);
+		if (unlikely(__ipipe_ipending_p(p))) {
+			head_domain = __ipipe_pipeline_head();
+			if (likely(head_domain == __ipipe_current_domain))
+				__ipipe_sync_pipeline();
+			else
+				__ipipe_walk_pipeline(&head_domain->p_link);
+		}
+		local_irq_enable_hw();
+	}
+}
+
+void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock)
+{
+	local_irq_disable_hw();
+	arch_spin_lock(&lock->arch_lock);
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+}
+
+void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock)
+{
+	arch_spin_unlock(&lock->arch_lock);
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+	local_irq_enable_hw();
+}
+
+unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock)
+{
+	unsigned long flags;
+	int s;
+
+	local_irq_save_hw(flags);
+	arch_spin_lock(&lock->arch_lock);
+	s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+
+	return arch_mangle_irq_bits(s, flags);
+}
+
+int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock,
+				 unsigned long *x)
+{
+	unsigned long flags;
+	int s;
+
+	local_irq_save_hw(flags);
+	if (!arch_spin_trylock(&lock->arch_lock)) {
+		local_irq_restore_hw(flags);
+		return 0;
+	}
+	s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+	*x = arch_mangle_irq_bits(s, flags);
+
+	return 1;
+}
+
+void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock,
+				    unsigned long x)
+{
+	arch_spin_unlock(&lock->arch_lock);
+	if (!arch_demangle_irq_bits(&x))
+		__clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+	local_irq_restore_hw(x);
+}
+
+int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+	if (!arch_spin_trylock(&lock->arch_lock)) {
+		local_irq_restore_hw(flags);
+		return 0;
+	}
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+
+	return 1;
+}
+
+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock)
+{
+	arch_spin_unlock(&lock->arch_lock);
+}
+
+void __ipipe_spin_unlock_irqcomplete(unsigned long x)
+{
+	if (!arch_demangle_irq_bits(&x))
+		__clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+	local_irq_restore_hw(x);
+}
+
+#ifdef __IPIPE_3LEVEL_IRQMAP
+
+/* Must be called hw IRQs off. */
+static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p,
+					unsigned int irq)
+{
+	set_bit(irq, p->irqheld_map);
+	p->irqall[irq]++;
+}
+
+/* Must be called hw IRQs off. */
+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq)
+{
+	struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd);
+	int l0b, l1b;
+
+	IPIPE_WARN_ONCE(!irqs_disabled_hw());
+
+	l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
+	l1b = irq / BITS_PER_LONG;
+
+	if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+		set_bit(irq, p->irqpend_lomap);
+		set_bit(l1b, p->irqpend_mdmap);
+		set_bit(l0b, &p->irqpend_himap);
+	} else
+		set_bit(irq, p->irqheld_map);
+
+	p->irqall[irq]++;
+}
+
+/* Must be called hw IRQs off. */
+void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned int irq)
+{
+	struct ipipe_percpu_domain_data *p;
+	int l0b, l1b;
+
+	IPIPE_WARN_ONCE(!irqs_disabled_hw());
+
+	/* Wired interrupts cannot be locked (it is useless). */
+	if (test_bit(IPIPE_WIRED_FLAG, &ipd->irqs[irq].control) ||
+	    test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
+		return;
+
+	l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
+	l1b = irq / BITS_PER_LONG;
+
+	p = ipipe_percpudom_ptr(ipd, cpu);
+	if (test_and_clear_bit(irq, p->irqpend_lomap)) {
+		set_bit(irq, p->irqheld_map);
+		if (p->irqpend_lomap[l1b] == 0) {
+			clear_bit(l1b, p->irqpend_mdmap);
+			if (p->irqpend_mdmap[l0b] == 0)
+				clear_bit(l0b, &p->irqpend_himap);
+		}
+	}
+}
+
+/* Must be called hw IRQs off. */
+void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned int irq)
+{
+	struct ipipe_percpu_domain_data *p;
+	int l0b, l1b, cpu;
+
+	IPIPE_WARN_ONCE(!irqs_disabled_hw());
+
+	if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG,
+					 &ipd->irqs[irq].control)))
+		return;
+
+	l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
+	l1b = irq / BITS_PER_LONG;
+
+	for_each_online_cpu(cpu) {
+		p = ipipe_percpudom_ptr(ipd, cpu);
+		if (test_and_clear_bit(irq, p->irqheld_map)) {
+			set_bit(irq, p->irqpend_lomap);
+			set_bit(l1b, p->irqpend_mdmap);
+			set_bit(l0b, &p->irqpend_himap);
+		}
+	}
+}
+
+static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p)
+{
+	int l0b, l1b, l2b;
+	unsigned long l0m, l1m, l2m;
+	unsigned int irq;
+
+	l0m = p->irqpend_himap;
+	if (unlikely(l0m == 0))
+		return -1;
+
+	l0b = __ipipe_ffnz(l0m);
+	l1m = p->irqpend_mdmap[l0b];
+	if (unlikely(l1m == 0))
+		return -1;
+
+	l1b = __ipipe_ffnz(l1m) + l0b * BITS_PER_LONG;
+	l2m = p->irqpend_lomap[l1b];
+	if (unlikely(l2m == 0))
+		return -1;
+
+	l2b = __ipipe_ffnz(l2m);
+	irq = l1b * BITS_PER_LONG + l2b;
+
+	clear_bit(irq, p->irqpend_lomap);
+	if (p->irqpend_lomap[l1b] == 0) {
+		clear_bit(l1b, p->irqpend_mdmap);
+		if (p->irqpend_mdmap[l0b] == 0)
+			clear_bit(l0b, &p->irqpend_himap);
+	}
+
+	return irq;
+}
+
+#else /* __IPIPE_2LEVEL_IRQMAP */
+
+/* Must be called hw IRQs off. */
+static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p,
+					unsigned int irq)
+{
+	set_bit(irq, p->irqheld_map);
+	p->irqall[irq]++;
+}
+
+/* Must be called hw IRQs off. */
+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq)
+{
+	struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd);
+	int l0b = irq / BITS_PER_LONG;
+
+	IPIPE_WARN_ONCE(!irqs_disabled_hw());
+	
+	if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+		set_bit(irq, p->irqpend_lomap);
+		set_bit(l0b, &p->irqpend_himap);
+	} else
+		set_bit(irq, p->irqheld_map);
+
+	p->irqall[irq]++;
+}
+
+/* Must be called hw IRQs off. */
+void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq)
+{
+	struct ipipe_percpu_domain_data *p;
+	int l0b = irq / BITS_PER_LONG;
+
+	IPIPE_WARN_ONCE(!irqs_disabled_hw());
+
+	/* Wired interrupts cannot be locked (it is useless). */
+	if (test_bit(IPIPE_WIRED_FLAG, &ipd->irqs[irq].control) ||
+	    test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
+		return;
+
+	p = ipipe_percpudom_ptr(ipd, cpu);
+	if (test_and_clear_bit(irq, p->irqpend_lomap)) {
+		set_bit(irq, p->irqheld_map);
+		if (p->irqpend_lomap[l0b] == 0)
+			clear_bit(l0b, &p->irqpend_himap);
+	}
+}
+
+/* Must be called hw IRQs off. */
+void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq)
+{
+	struct ipipe_percpu_domain_data *p;
+	int l0b = irq / BITS_PER_LONG, cpu;
+
+	IPIPE_WARN_ONCE(!irqs_disabled_hw());
+
+	if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG,
+					 &ipd->irqs[irq].control)))
+		return;
+
+	for_each_online_cpu(cpu) {
+		p = ipipe_percpudom_ptr(ipd, cpu);
+		if (test_and_clear_bit(irq, p->irqheld_map)) {
+			set_bit(irq, p->irqpend_lomap);
+			set_bit(l0b, &p->irqpend_himap);
+		}
+	}
+}
+
+static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p)
+{
+	unsigned long l0m, l1m;
+	int l0b, l1b;
+
+	l0m = p->irqpend_himap;
+	if (unlikely(l0m == 0))
+		return -1;
+
+	l0b = __ipipe_ffnz(l0m);
+	l1m = p->irqpend_lomap[l0b];
+	if (unlikely(l1m == 0))
+		return -1;
+
+	l1b = __ipipe_ffnz(l1m);
+	clear_bit(l1b, &p->irqpend_lomap[l0b]);
+	if (p->irqpend_lomap[l0b] == 0)
+		clear_bit(l0b, &p->irqpend_himap);
+
+	return l0b * BITS_PER_LONG + l1b;
+}
+
+#endif /* __IPIPE_2LEVEL_IRQMAP */
+
+/*
+ * __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must
+ * be called with local hw interrupts disabled.
+ */
+void __ipipe_walk_pipeline(struct list_head *pos)
+{
+	struct ipipe_domain *this_domain = __ipipe_current_domain, *next_domain;
+	struct ipipe_percpu_domain_data *p, *np;
+
+	p = ipipe_cpudom_ptr(this_domain);
+
+	while (pos != &__ipipe_pipeline) {
+
+		next_domain = list_entry(pos, struct ipipe_domain, p_link);
+		np = ipipe_cpudom_ptr(next_domain);
+
+		if (test_bit(IPIPE_STALL_FLAG, &np->status))
+			break;	/* Stalled stage -- do not go further. */
+
+		if (__ipipe_ipending_p(np)) {
+			if (next_domain == this_domain)
+				__ipipe_sync_pipeline();
+			else {
+
+				p->evsync = 0;
+				__ipipe_current_domain = next_domain;
+				ipipe_suspend_domain();	/* Sync stage and propagate interrupts. */
+
+				if (__ipipe_current_domain == next_domain)
+					__ipipe_current_domain = this_domain;
+				/*
+				 * Otherwise, something changed the current domain under our
+				 * feet recycling the register set; do not override the new
+				 * domain.
+				 */
+
+				if (__ipipe_ipending_p(p) &&
+				    !test_bit(IPIPE_STALL_FLAG, &p->status))
+					__ipipe_sync_pipeline();
+			}
+			break;
+		} else if (next_domain == this_domain)
+			break;
+
+		pos = next_domain->p_link.next;
+	}
+}
+
+/*
+ * ipipe_suspend_domain() -- Suspend the current domain, switching to
+ * the next one which has pending work down the pipeline.
+ */
+void ipipe_suspend_domain(void)
+{
+	struct ipipe_domain *this_domain, *next_domain;
+	struct ipipe_percpu_domain_data *p;
+	struct list_head *ln;
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+
+	this_domain = next_domain = __ipipe_current_domain;
+	p = ipipe_cpudom_ptr(this_domain);
+	p->status &= ~IPIPE_STALL_MASK;
+
+	if (__ipipe_ipending_p(p))
+		goto sync_stage;
+
+	for (;;) {
+		ln = next_domain->p_link.next;
+
+		if (ln == &__ipipe_pipeline)
+			break;
+
+		next_domain = list_entry(ln, struct ipipe_domain, p_link);
+		p = ipipe_cpudom_ptr(next_domain);
+
+		if (p->status & IPIPE_STALL_MASK)
+			break;
+
+		if (!__ipipe_ipending_p(p))
+			continue;
+
+		__ipipe_current_domain = next_domain;
+sync_stage:
+		__ipipe_sync_pipeline();
+
+		if (__ipipe_current_domain != next_domain)
+			/*
+			 * Something has changed the current domain under our
+			 * feet, recycling the register set; take note.
+			 */
+			this_domain = __ipipe_current_domain;
+	}
+
+	__ipipe_current_domain = this_domain;
+
+	local_irq_restore_hw(flags);
+}
+
+
+/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt.
+ * Virtual interrupts are handled in exactly the same way than their
+ * hw-generated counterparts wrt pipelining.
+ */
+unsigned ipipe_alloc_virq(void)
+{
+	unsigned long flags, irq = 0;
+	int ipos;
+
+	spin_lock_irqsave(&__ipipe_pipelock, flags);
+
+	if (__ipipe_virtual_irq_map != ~0) {
+		ipos = ffz(__ipipe_virtual_irq_map);
+		set_bit(ipos, &__ipipe_virtual_irq_map);
+		irq = ipos + IPIPE_VIRQ_BASE;
+	}
+
+	spin_unlock_irqrestore(&__ipipe_pipelock, flags);
+
+	return irq;
+}
+
+/*
+ * ipipe_virtualize_irq() -- Set a per-domain pipelined interrupt
+ * handler.
+ */
+int ipipe_virtualize_irq(struct ipipe_domain *ipd,
+			 unsigned int irq,
+			 ipipe_irq_handler_t handler,
+			 void *cookie,
+			 ipipe_irq_ackfn_t acknowledge,
+			 unsigned modemask)
+{
+	ipipe_irq_handler_t old_handler;
+	struct irq_desc *desc;
+	unsigned long flags;
+	int ret = 0;
+
+	if (irq >= IPIPE_NR_IRQS)
+		return -EINVAL;
+
+	if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK)
+		return -EPERM;
+
+	if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags))
+		/* Silently unwire interrupts for non-heading domains. */
+		modemask &= ~IPIPE_WIRED_MASK;
+
+	spin_lock_irqsave(&__ipipe_pipelock, flags);
+
+	old_handler = ipd->irqs[irq].handler;
+
+	if (handler == NULL) {
+		modemask &=
+		    ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK |
+		      IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK);
+
+		ipd->irqs[irq].handler = NULL;
+		ipd->irqs[irq].cookie = NULL;
+		ipd->irqs[irq].acknowledge = NULL;
+		ipd->irqs[irq].control = modemask;
+
+		if (irq < NR_IRQS && !ipipe_virtual_irq_p(irq)) {
+			desc = irq_to_desc(irq);
+			if (old_handler && desc)
+				__ipipe_disable_irqdesc(ipd, irq);
+		}
+
+		goto unlock_and_exit;
+	}
+
+	if (handler == IPIPE_SAME_HANDLER) {
+		cookie = ipd->irqs[irq].cookie;
+		handler = old_handler;
+		if (handler == NULL) {
+			ret = -EINVAL;
+			goto unlock_and_exit;
+		}
+	} else if ((modemask & IPIPE_EXCLUSIVE_MASK) != 0 && old_handler) {
+		ret = -EBUSY;
+		goto unlock_and_exit;
+	}
+
+	/*
+	 * Wired interrupts can only be delivered to domains always
+	 * heading the pipeline, and using dynamic propagation.
+	 */
+	if ((modemask & IPIPE_WIRED_MASK) != 0) {
+		if ((modemask & (IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) != 0) {
+			ret = -EINVAL;
+			goto unlock_and_exit;
+		}
+		modemask |= IPIPE_HANDLE_MASK;
+	}
+
+	if ((modemask & IPIPE_STICKY_MASK) != 0)
+		modemask |= IPIPE_HANDLE_MASK;
+
+	if (acknowledge == NULL)
+		/*
+		 * Acknowledge handler unspecified for a hw interrupt:
+		 * use the Linux-defined handler instead.
+		 */
+		acknowledge = ipipe_root_domain->irqs[irq].acknowledge;
+
+	ipd->irqs[irq].handler = handler;
+	ipd->irqs[irq].cookie = cookie;
+	ipd->irqs[irq].acknowledge = acknowledge;
+	ipd->irqs[irq].control = modemask;
+
+	desc = irq_to_desc(irq);
+	if (desc == NULL)
+		goto unlock_and_exit;
+
+	if (irq < NR_IRQS && !ipipe_virtual_irq_p(irq)) {
+		__ipipe_enable_irqdesc(ipd, irq);
+		/*
+		 * IRQ enable/disable state is domain-sensitive, so we
+		 * may not change it for another domain. What is
+		 * allowed however is forcing some domain to handle an
+		 * interrupt source, by passing the proper 'ipd'
+		 * descriptor which thus may be different from
+		 * __ipipe_current_domain.
+		 */
+		if ((modemask & IPIPE_ENABLE_MASK) != 0) {
+			if (ipd != __ipipe_current_domain)
+				ret = -EPERM;
+			else
+				__ipipe_enable_irq(irq);
+		}
+	}
+
+unlock_and_exit:
+
+	spin_unlock_irqrestore(&__ipipe_pipelock, flags);
+
+	return ret;
+}
+
+/* ipipe_control_irq() -- Change control mode of a pipelined interrupt. */
+
+int ipipe_control_irq(struct ipipe_domain *ipd, unsigned int irq,
+		      unsigned clrmask, unsigned setmask)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	if (irq >= IPIPE_NR_IRQS)
+		return -EINVAL;
+
+	flags = ipipe_critical_enter(NULL);
+
+	if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) {
+		ret = -EPERM;
+		goto out;
+	}
+
+	if (ipd->irqs[irq].handler == NULL)
+		setmask &= ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK);
+
+	if ((setmask & IPIPE_STICKY_MASK) != 0)
+		setmask |= IPIPE_HANDLE_MASK;
+
+	if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) != 0)	/* If one goes, both go. */
+		clrmask |= (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK);
+
+	ipd->irqs[irq].control &= ~clrmask;
+	ipd->irqs[irq].control |= setmask;
+
+	if ((setmask & IPIPE_ENABLE_MASK) != 0)
+		__ipipe_enable_irq(irq);
+	else if ((clrmask & IPIPE_ENABLE_MASK) != 0)
+		__ipipe_disable_irq(irq);
+
+out:
+	ipipe_critical_exit(flags);
+
+	return ret;
+}
+
+/* __ipipe_dispatch_event() -- Low-level event dispatcher. */
+
+int __ipipe_dispatch_event (unsigned event, void *data)
+{
+	struct ipipe_domain *start_domain, *this_domain, *next_domain;
+	struct ipipe_percpu_domain_data *np;
+	ipipe_event_handler_t evhand;
+	struct list_head *pos, *npos;
+	unsigned long flags;
+	int propagate = 1;
+
+	local_irq_save_hw(flags);
+
+	start_domain = this_domain = __ipipe_current_domain;
+
+	list_for_each_safe(pos, npos, &__ipipe_pipeline) {
+		/*
+		 * Note: Domain migration may occur while running
+		 * event or interrupt handlers, in which case the
+		 * current register set is going to be recycled for a
+		 * different domain than the initiating one. We do
+		 * care for that, always tracking the current domain
+		 * descriptor upon return from those handlers.
+		 */
+		next_domain = list_entry(pos, struct ipipe_domain, p_link);
+		np = ipipe_cpudom_ptr(next_domain);
+
+		/*
+		 * Keep a cached copy of the handler's address since
+		 * ipipe_catch_event() may clear it under our feet.
+		 */
+		evhand = next_domain->evhand[event];
+
+		if (evhand != NULL) {
+			__ipipe_current_domain = next_domain;
+			np->evsync |= (1LL << event);
+			local_irq_restore_hw(flags);
+			propagate = !evhand(event, start_domain, data);
+			local_irq_save_hw(flags);
+			/*
+			 * We may have a migration issue here, if the
+			 * current task is migrated to another CPU on
+			 * behalf of the invoked handler, usually when
+			 * a syscall event is processed. However,
+			 * ipipe_catch_event() will make sure that a
+			 * CPU that clears a handler for any given
+			 * event will not attempt to wait for itself
+			 * to clear the evsync bit for that event,
+			 * which practically plugs the hole, without
+			 * resorting to a much more complex strategy.
+			 */
+			np->evsync &= ~(1LL << event);
+			if (__ipipe_current_domain != next_domain)
+				this_domain = __ipipe_current_domain;
+		}
+
+		/* NEVER sync the root stage here. */
+		if (next_domain != ipipe_root_domain &&
+		    __ipipe_ipending_p(np) &&
+		    !test_bit(IPIPE_STALL_FLAG, &np->status)) {
+			__ipipe_current_domain = next_domain;
+			__ipipe_sync_pipeline();
+			if (__ipipe_current_domain != next_domain)
+				this_domain = __ipipe_current_domain;
+		}
+
+		__ipipe_current_domain = this_domain;
+
+		if (next_domain == this_domain || !propagate)
+			break;
+	}
+
+	local_irq_restore_hw(flags);
+
+	return !propagate;
+}
+
+/*
+ * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired
+ * interrupts are immediately and unconditionally delivered to the
+ * domain heading the pipeline upon receipt, and such domain must have
+ * been registered as an invariant head for the system (priority ==
+ * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is
+ * to get an extra-fast dispatching path for those IRQs, by relying on
+ * a straightforward logic based on assumptions that must always be
+ * true for invariant head domains.  The following assumptions are
+ * made when dealing with such interrupts:
+ *
+ * 1- Wired interrupts are purely dynamic, i.e. the decision to
+ * propagate them down the pipeline must be done from the head domain
+ * ISR.
+ * 2- Wired interrupts cannot be shared or sticky.
+ * 3- The root domain cannot be an invariant pipeline head, in
+ * consequence of what the root domain cannot handle wired
+ * interrupts.
+ * 4- Wired interrupts must have a valid acknowledge handler for the
+ * head domain (if needed, see __ipipe_handle_irq).
+ *
+ * Called with hw interrupts off.
+ */
+
+void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq)
+{
+	struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head);
+
+	if (test_bit(IPIPE_STALL_FLAG, &p->status)) {
+		__ipipe_set_irq_pending(head, irq);
+		return;
+	}
+
+	__ipipe_dispatch_wired_nocheck(head, irq);
+}
+
+void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq) /* hw interrupts off */
+{
+	struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head);
+	struct ipipe_domain *old;
+
+	old = __ipipe_current_domain;
+	__ipipe_current_domain = head; /* Switch to the head domain. */
+
+	p->irqall[irq]++;
+	__set_bit(IPIPE_STALL_FLAG, &p->status);
+	barrier();
+	head->irqs[irq].handler(irq, head->irqs[irq].cookie); /* Call the ISR. */
+	__ipipe_run_irqtail(irq);
+	barrier();
+	p = ipipe_cpudom_ptr(head);
+	__clear_bit(IPIPE_STALL_FLAG, &p->status);
+
+	if (__ipipe_current_domain == head) {
+		__ipipe_current_domain = old;
+		if (old == head) {
+			if (__ipipe_ipending_p(p))
+				__ipipe_sync_pipeline();
+			return;
+		}
+	}
+
+	__ipipe_walk_pipeline(&head->p_link);
+}
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#define root_stall_after_handler()	local_irq_disable()
+#else
+#define root_stall_after_handler()	do { } while (0)
+#endif
+
+#ifdef CONFIG_PREEMPT
+
+asmlinkage void preempt_schedule_irq(void);
+
+void __ipipe_preempt_schedule_irq(void)
+{
+	struct ipipe_percpu_domain_data *p; 
+	unsigned long flags;  
+
+	BUG_ON(!irqs_disabled_hw());
+	local_irq_save(flags);
+	local_irq_enable_hw();
+	preempt_schedule_irq(); /* Ok, may reschedule now. */  
+	local_irq_disable_hw();
+
+	/*
+	 * Flush any pending interrupt that may have been logged after
+	 * preempt_schedule_irq() stalled the root stage before
+	 * returning to us, and now.
+	 */
+	p = ipipe_root_cpudom_ptr();
+	if (unlikely(__ipipe_ipending_p(p))) {
+		add_preempt_count(PREEMPT_ACTIVE);
+		trace_hardirqs_on();
+		__clear_bit(IPIPE_STALL_FLAG, &p->status);
+		__ipipe_sync_pipeline();
+		sub_preempt_count(PREEMPT_ACTIVE);
+	}
+
+	__local_irq_restore_nosync(flags);
+}
+
+#else /* !CONFIG_PREEMPT */
+
+#define __ipipe_preempt_schedule_irq()	do { } while (0)
+
+#endif	/* !CONFIG_PREEMPT */
+
+/*
+ * __ipipe_sync_stage() -- Flush the pending IRQs for the current
+ * domain (and processor). This routine flushes the interrupt log
+ * (see "Optimistic interrupt protection" from D. Stodolsky et al. for
+ * more on the deferred interrupt scheme). Every interrupt that
+ * occurred while the pipeline was stalled gets played. WARNING:
+ * callers on SMP boxen should always check for CPU migration on
+ * return of this routine.
+ *
+ * This routine must be called with hw interrupts off.
+ */
+void __ipipe_sync_stage(void)
+{
+	struct ipipe_percpu_domain_data *p;
+	struct ipipe_domain *ipd;
+	int irq;
+
+	ipd = __ipipe_current_domain;
+	p = ipipe_cpudom_ptr(ipd);
+
+	__set_bit(IPIPE_STALL_FLAG, &p->status);
+	smp_wmb();
+
+	if (ipd == ipipe_root_domain)
+		trace_hardirqs_off();
+
+	for (;;) {
+		irq = __ipipe_next_irq(p);
+		if (irq < 0)
+			break;
+		/*
+		 * Make sure the compiler does not reorder wrongly, so
+		 * that all updates to maps are done before the
+		 * handler gets called.
+		 */
+		barrier();
+
+		if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
+			continue;
+
+		if (!__ipipe_pipeline_head_p(ipd))
+			local_irq_enable_hw();
+
+		if (likely(ipd != ipipe_root_domain)) {
+			ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
+			__ipipe_run_irqtail(irq);
+			local_irq_disable_hw();
+		} else if (ipipe_virtual_irq_p(irq)) {
+			irq_enter();
+			ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
+			irq_exit();
+			root_stall_after_handler();
+			local_irq_disable_hw();
+			while (__ipipe_check_root_resched())
+				__ipipe_preempt_schedule_irq();
+		} else {
+			__ipipe_do_root_xirq(ipd, irq);
+			root_stall_after_handler();
+			local_irq_disable_hw();
+		}
+
+		p = ipipe_cpudom_ptr(__ipipe_current_domain);
+	}
+
+	if (ipd == ipipe_root_domain)
+		trace_hardirqs_on();
+
+	__clear_bit(IPIPE_STALL_FLAG, &p->status);
+}
+
+/* ipipe_register_domain() -- Link a new domain to the pipeline. */
+
+int ipipe_register_domain(struct ipipe_domain *ipd,
+			  struct ipipe_domain_attr *attr)
+{
+	struct ipipe_percpu_domain_data *p;
+	struct list_head *pos = NULL;
+	struct ipipe_domain *_ipd;
+	unsigned long flags;
+
+	if (!ipipe_root_domain_p) {
+		printk(KERN_WARNING
+		       "I-pipe: Only the root domain may register a new domain.\n");
+		return -EPERM;
+	}
+
+	flags = ipipe_critical_enter(NULL);
+
+	if (attr->priority == IPIPE_HEAD_PRIORITY) {
+		if (test_bit(IPIPE_HEAD_SLOT, &__ipipe_domain_slot_map)) {
+			ipipe_critical_exit(flags);
+			return -EAGAIN;	/* Cannot override current head. */
+		}
+		ipd->slot = IPIPE_HEAD_SLOT;
+	} else
+		ipd->slot = ffz(__ipipe_domain_slot_map);
+
+	if (ipd->slot < CONFIG_IPIPE_DOMAINS) {
+		set_bit(ipd->slot, &__ipipe_domain_slot_map);
+		list_for_each(pos, &__ipipe_pipeline) {
+			_ipd = list_entry(pos, struct ipipe_domain, p_link);
+			if (_ipd->domid == attr->domid)
+				break;
+		}
+	}
+
+	ipipe_critical_exit(flags);
+
+	if (pos != &__ipipe_pipeline) {
+		if (ipd->slot < CONFIG_IPIPE_DOMAINS)
+			clear_bit(ipd->slot, &__ipipe_domain_slot_map);
+		return -EBUSY;
+	}
+
+#ifndef CONFIG_SMP
+	/*
+	 * Set up the perdomain pointers for direct access to the
+	 * percpu domain data. This saves a costly multiply each time
+	 * we need to refer to the contents of the percpu domain data
+	 * array.
+	 */
+	__raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = &__raw_get_cpu_var(ipipe_percpu_darray)[ipd->slot];
+#endif
+
+	ipd->name = attr->name;
+	ipd->domid = attr->domid;
+	ipd->pdd = attr->pdd;
+	ipd->flags = 0;
+
+	if (attr->priority == IPIPE_HEAD_PRIORITY) {
+		ipd->priority = INT_MAX;
+		__set_bit(IPIPE_AHEAD_FLAG,&ipd->flags);
+	}
+	else
+		ipd->priority = attr->priority;
+
+	__ipipe_init_stage(ipd);
+
+	INIT_LIST_HEAD(&ipd->p_link);
+
+#ifdef CONFIG_PROC_FS
+	__ipipe_add_domain_proc(ipd);
+#endif /* CONFIG_PROC_FS */
+
+	flags = ipipe_critical_enter(NULL);
+
+	list_for_each(pos, &__ipipe_pipeline) {
+		_ipd = list_entry(pos, struct ipipe_domain, p_link);
+		if (ipd->priority > _ipd->priority)
+			break;
+	}
+
+	list_add_tail(&ipd->p_link, pos);
+
+	ipipe_critical_exit(flags);
+
+	printk(KERN_INFO "I-pipe: Domain %s registered.\n", ipd->name);
+
+	if (attr->entry == NULL)
+		return 0;
+
+	/*
+	 * Finally, allow the new domain to perform its initialization
+	 * duties.
+	 */
+	local_irq_save_hw_smp(flags);
+	__ipipe_current_domain = ipd;
+	local_irq_restore_hw_smp(flags);
+	attr->entry();
+	local_irq_save_hw(flags);
+	__ipipe_current_domain = ipipe_root_domain;
+	p = ipipe_root_cpudom_ptr();
+
+	if (__ipipe_ipending_p(p) &&
+	    !test_bit(IPIPE_STALL_FLAG, &p->status))
+		__ipipe_sync_pipeline();
+
+	local_irq_restore_hw(flags);
+
+	return 0;
+}
+
+/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */
+
+int ipipe_unregister_domain(struct ipipe_domain *ipd)
+{
+	unsigned long flags;
+
+	if (!ipipe_root_domain_p) {
+		printk(KERN_WARNING
+		       "I-pipe: Only the root domain may unregister a domain.\n");
+		return -EPERM;
+	}
+
+	if (ipd == ipipe_root_domain) {
+		printk(KERN_WARNING
+		       "I-pipe: Cannot unregister the root domain.\n");
+		return -EPERM;
+	}
+#ifdef CONFIG_SMP
+	{
+		struct ipipe_percpu_domain_data *p;
+		unsigned int irq;
+		int cpu;
+
+		/*
+		 * In the SMP case, wait for the logged events to drain on
+		 * other processors before eventually removing the domain
+		 * from the pipeline.
+		 */
+
+		ipipe_unstall_pipeline_from(ipd);
+
+		flags = ipipe_critical_enter(NULL);
+
+		for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
+			clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control);
+			clear_bit(IPIPE_WIRED_FLAG, &ipd->irqs[irq].control);
+			clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control);
+			set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control);
+		}
+
+		ipipe_critical_exit(flags);
+
+		for_each_online_cpu(cpu) {
+			p = ipipe_percpudom_ptr(ipd, cpu);
+			while (__ipipe_ipending_p(p))
+				cpu_relax();
+		}
+	}
+#endif	/* CONFIG_SMP */
+
+	mutex_lock(&ipd->mutex);
+
+#ifdef CONFIG_PROC_FS
+	__ipipe_remove_domain_proc(ipd);
+#endif /* CONFIG_PROC_FS */
+
+	/*
+	 * Simply remove the domain from the pipeline and we are almost done.
+	 */
+
+	flags = ipipe_critical_enter(NULL);
+	list_del_init(&ipd->p_link);
+	ipipe_critical_exit(flags);
+
+	__ipipe_cleanup_domain(ipd);
+
+	mutex_unlock(&ipd->mutex);
+
+	printk(KERN_INFO "I-pipe: Domain %s unregistered.\n", ipd->name);
+
+	return 0;
+}
+
+/*
+ * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of
+ * a running interrupt handler to the next domain down the pipeline.
+ * ipipe_schedule_irq() -- Does almost the same as above, but attempts
+ * to pend the interrupt for the current domain first.
+ * Must be called hw IRQs off.
+ */
+void __ipipe_pend_irq(unsigned irq, struct list_head *head)
+{
+	struct ipipe_domain *ipd;
+	struct list_head *ln;
+
+#ifdef CONFIG_IPIPE_DEBUG
+	BUG_ON(irq >= IPIPE_NR_IRQS ||
+	       (ipipe_virtual_irq_p(irq)
+		&& !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)));
+#endif
+	for (ln = head; ln != &__ipipe_pipeline; ln = ipd->p_link.next) {
+		ipd = list_entry(ln, struct ipipe_domain, p_link);
+		if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) {
+			__ipipe_set_irq_pending(ipd, irq);
+			return;
+		}
+	}
+}
+
+/* ipipe_free_virq() -- Release a virtual/soft interrupt. */
+
+int ipipe_free_virq(unsigned virq)
+{
+	if (!ipipe_virtual_irq_p(virq))
+		return -EINVAL;
+
+	clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map);
+
+	return 0;
+}
+
+void ipipe_init_attr(struct ipipe_domain_attr *attr)
+{
+	attr->name = "anon";
+	attr->domid = 1;
+	attr->entry = NULL;
+	attr->priority = IPIPE_ROOT_PRIO;
+	attr->pdd = NULL;
+}
+
+/*
+ * ipipe_catch_event() -- Interpose or remove an event handler for a
+ * given domain.
+ */
+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
+					unsigned event,
+					ipipe_event_handler_t handler)
+{
+	ipipe_event_handler_t old_handler;
+	unsigned long flags;
+	int self = 0, cpu;
+
+	if (event & IPIPE_EVENT_SELF) {
+		event &= ~IPIPE_EVENT_SELF;
+		self = 1;
+	}
+
+	if (event >= IPIPE_NR_EVENTS)
+		return NULL;
+
+	flags = ipipe_critical_enter(NULL);
+
+	if (!(old_handler = xchg(&ipd->evhand[event],handler)))	{
+		if (handler) {
+			if (self)
+				ipd->evself |= (1LL << event);
+			else
+				__ipipe_event_monitors[event]++;
+		}
+	}
+	else if (!handler) {
+		if (ipd->evself & (1LL << event))
+			ipd->evself &= ~(1LL << event);
+		else
+			__ipipe_event_monitors[event]--;
+	} else if ((ipd->evself & (1LL << event)) && !self) {
+			__ipipe_event_monitors[event]++;
+			ipd->evself &= ~(1LL << event);
+	} else if (!(ipd->evself & (1LL << event)) && self) {
+			__ipipe_event_monitors[event]--;
+			ipd->evself |= (1LL << event);
+	}
+
+	ipipe_critical_exit(flags);
+
+	if (!handler && ipipe_root_domain_p) {
+		/*
+		 * If we cleared a handler on behalf of the root
+		 * domain, we have to wait for any current invocation
+		 * to drain, since our caller might subsequently unmap
+		 * the target domain. To this aim, this code
+		 * synchronizes with __ipipe_dispatch_event(),
+		 * guaranteeing that either the dispatcher sees a null
+		 * handler in which case it discards the invocation
+		 * (which also prevents from entering a livelock), or
+		 * finds a valid handler and calls it. Symmetrically,
+		 * ipipe_catch_event() ensures that the called code
+		 * won't be unmapped under our feet until the event
+		 * synchronization flag is cleared for the given event
+		 * on all CPUs.
+		 */
+		preempt_disable();
+		cpu = smp_processor_id();
+		/*
+		 * Hack: this solves the potential migration issue
+		 * raised in __ipipe_dispatch_event(). This is a
+		 * work-around which makes the assumption that other
+		 * CPUs will subsequently, either process at least one
+		 * interrupt for the target domain, or call
+		 * __ipipe_dispatch_event() without going through a
+		 * migration while running the handler at least once;
+		 * practically, this is safe on any normally running
+		 * system.
+		 */
+		ipipe_percpudom(ipd, evsync, cpu) &= ~(1LL << event);
+		preempt_enable();
+
+		for_each_online_cpu(cpu) {
+			while (ipipe_percpudom(ipd, evsync, cpu) & (1LL << event))
+				schedule_timeout_interruptible(HZ / 50);
+		}
+	}
+
+	return old_handler;
+}
+
+cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask)
+{
+#ifdef CONFIG_SMP
+	if (irq >= IPIPE_NR_XIRQS)
+		/* Allow changing affinity of external IRQs only. */
+		return CPU_MASK_NONE;
+
+	if (num_online_cpus() > 1)
+		return __ipipe_set_irq_affinity(irq,cpumask);
+#endif /* CONFIG_SMP */
+
+	return CPU_MASK_NONE;
+}
+
+int ipipe_send_ipi (unsigned ipi, cpumask_t cpumask)
+
+{
+#ifdef CONFIG_SMP
+	if (!ipipe_ipi_p(ipi))
+		return -EINVAL;
+	return __ipipe_send_ipi(ipi,cpumask);
+#else /* !CONFIG_SMP */
+	return -EINVAL;
+#endif /* CONFIG_SMP */
+}
+
+#ifdef CONFIG_SMP
+
+/* Always called with hw interrupts off. */
+void __ipipe_do_critical_sync(unsigned irq, void *cookie)
+{
+	int cpu = ipipe_processor_id();
+
+	cpu_set(cpu, __ipipe_cpu_sync_map);
+
+	/* Now we are in sync with the lock requestor running on another
+	   CPU. Enter a spinning wait until he releases the global
+	   lock. */
+	spin_lock(&__ipipe_cpu_barrier);
+
+	/* Got it. Now get out. */
+
+	if (__ipipe_cpu_sync)
+		/* Call the sync routine if any. */
+		__ipipe_cpu_sync();
+
+	cpu_set(cpu, __ipipe_cpu_pass_map);
+
+	spin_unlock(&__ipipe_cpu_barrier);
+
+	cpu_clear(cpu, __ipipe_cpu_sync_map);
+}
+#endif	/* CONFIG_SMP */
+
+/*
+ * ipipe_critical_enter() -- Grab the superlock excluding all CPUs but
+ * the current one from a critical section. This lock is used when we
+ * must enforce a global critical section for a single CPU in a
+ * possibly SMP system whichever context the CPUs are running.
+ */
+unsigned long ipipe_critical_enter(void (*syncfn)(void))
+{
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+
+#ifdef CONFIG_SMP
+	if (num_online_cpus() > 1) {
+		int cpu = ipipe_processor_id();
+		cpumask_t allbutself;
+		unsigned long loops;
+
+		if (!cpu_test_and_set(cpu, __ipipe_cpu_lock_map)) {
+			while (test_and_set_bit(0, &__ipipe_critical_lock)) {
+				int n = 0;
+
+				local_irq_enable_hw();
+
+				do {
+					cpu_relax();
+				} while (++n < cpu);
+
+				local_irq_disable_hw();
+			}
+
+restart:
+			spin_lock(&__ipipe_cpu_barrier);
+
+			__ipipe_cpu_sync = syncfn;
+
+			cpus_clear(__ipipe_cpu_pass_map);
+			cpu_set(cpu, __ipipe_cpu_pass_map);
+
+			/*
+			 * Send the sync IPI to all processors but the current
+			 * one.
+			 */
+			cpus_andnot(allbutself, cpu_online_map,
+				    __ipipe_cpu_pass_map);
+			__ipipe_send_ipi(IPIPE_CRITICAL_IPI, allbutself);
+
+			loops = IPIPE_CRITICAL_TIMEOUT;
+
+			while (!cpus_equal(__ipipe_cpu_sync_map, allbutself)) {
+				cpu_relax();
+
+				if (--loops == 0) {
+					/*
+					 * We ran into a deadlock due to a
+					 * contended rwlock. Cancel this round
+					 * and retry.
+					 */
+					__ipipe_cpu_sync = NULL;
+
+					spin_unlock(&__ipipe_cpu_barrier);
+
+					/*
+					 * Ensure all CPUs consumed the IPI to
+					 * avoid running __ipipe_cpu_sync
+					 * prematurely. This usually resolves
+					 * the deadlock reason too.
+					 */
+					while (!cpus_equal(cpu_online_map,
+							   __ipipe_cpu_pass_map))
+						cpu_relax();
+
+					goto restart;
+				}
+			}
+		}
+
+		atomic_inc(&__ipipe_critical_count);
+	}
+#endif	/* CONFIG_SMP */
+
+	return flags;
+}
+
+/* ipipe_critical_exit() -- Release the superlock. */
+
+void ipipe_critical_exit(unsigned long flags)
+{
+#ifdef CONFIG_SMP
+	if (num_online_cpus() > 1 &&
+	    atomic_dec_and_test(&__ipipe_critical_count)) {
+		spin_unlock(&__ipipe_cpu_barrier);
+
+		while (!cpus_empty(__ipipe_cpu_sync_map))
+			cpu_relax();
+
+		cpu_clear(ipipe_processor_id(), __ipipe_cpu_lock_map);
+		clear_bit(0, &__ipipe_critical_lock);
+		smp_mb__after_clear_bit();
+	}
+#endif	/* CONFIG_SMP */
+
+	local_irq_restore_hw(flags);
+}
+
+#ifdef CONFIG_HAVE_IPIPE_HOSTRT
+/*
+ * NOTE: The architecture specific code must only call this function
+ * when a clocksource suitable for CLOCK_HOST_REALTIME is enabled.
+ */
+void ipipe_update_hostrt(struct timespec *wall_time, struct clocksource *clock)
+{
+	struct ipipe_hostrt_data hostrt_data;
+
+	hostrt_data.live = 1;
+	hostrt_data.cycle_last = clock->cycle_last;
+	hostrt_data.mask = clock->mask;
+	hostrt_data.mult = clock->mult;
+	hostrt_data.shift = clock->shift;
+	hostrt_data.wall_time_sec = wall_time->tv_sec;
+	hostrt_data.wall_time_nsec = wall_time->tv_nsec;
+	hostrt_data.wall_to_monotonic = __get_wall_to_monotonic();
+
+	/* Note: The event receiver is responsible for providing
+	   proper locking */
+	if (__ipipe_event_monitored_p(IPIPE_EVENT_HOSTRT))
+		__ipipe_dispatch_event(IPIPE_EVENT_HOSTRT, &hostrt_data);
+}
+#endif /* CONFIG_HAVE_IPIPE_HOSTRT */
+
+int ipipe_alloc_ptdkey (void)
+{
+	unsigned long flags;
+	int key = -1;
+
+	spin_lock_irqsave(&__ipipe_pipelock,flags);
+
+	if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) {
+		key = ffz(__ipipe_ptd_key_map);
+		set_bit(key,&__ipipe_ptd_key_map);
+		__ipipe_ptd_key_count++;
+	}
+
+	spin_unlock_irqrestore(&__ipipe_pipelock,flags);
+
+	return key;
+}
+
+int ipipe_free_ptdkey (int key)
+{
+	unsigned long flags;
+
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return -EINVAL;
+
+	spin_lock_irqsave(&__ipipe_pipelock,flags);
+
+	if (test_and_clear_bit(key,&__ipipe_ptd_key_map))
+		__ipipe_ptd_key_count--;
+
+	spin_unlock_irqrestore(&__ipipe_pipelock,flags);
+
+	return 0;
+}
+
+int ipipe_set_ptd (int key, void *value)
+
+{
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return -EINVAL;
+
+	current->ptd[key] = value;
+
+	return 0;
+}
+
+void *ipipe_get_ptd (int key)
+
+{
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return NULL;
+
+	return current->ptd[key];
+}
+
+#ifdef CONFIG_PROC_FS
+
+struct proc_dir_entry *ipipe_proc_root;
+
+static int __ipipe_version_info_proc(char *page,
+				     char **start,
+				     off_t off, int count, int *eof, void *data)
+{
+	int len = sprintf(page, "%s\n", IPIPE_VERSION_STRING);
+
+	len -= off;
+
+	if (len <= off + count)
+		*eof = 1;
+
+	*start = page + off;
+
+	if(len > count)
+		len = count;
+
+	if(len < 0)
+		len = 0;
+
+	return len;
+}
+
+static int __ipipe_common_info_show(struct seq_file *p, void *data)
+{
+	struct ipipe_domain *ipd = (struct ipipe_domain *)p->private;
+	char handling, stickiness, lockbit, exclusive, virtuality;
+
+	unsigned long ctlbits;
+	unsigned irq;
+
+	seq_printf(p, "       +----- Handling ([A]ccepted, [G]rabbed, [W]ired, [D]iscarded)\n");
+	seq_printf(p, "       |+---- Sticky\n");
+	seq_printf(p, "       ||+--- Locked\n");
+	seq_printf(p, "       |||+-- Exclusive\n");
+	seq_printf(p, "       ||||+- Virtual\n");
+	seq_printf(p, "[IRQ]  |||||\n");
+
+	mutex_lock(&ipd->mutex);
+
+	for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
+		/* Remember to protect against
+		 * ipipe_virtual_irq/ipipe_control_irq if more fields
+		 * get involved. */
+		ctlbits = ipd->irqs[irq].control;
+
+		if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq))
+			/*
+			 * There might be a hole between the last external
+			 * IRQ and the first virtual one; skip it.
+			 */
+			continue;
+
+		if (ipipe_virtual_irq_p(irq)
+		    && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))
+			/* Non-allocated virtual IRQ; skip it. */
+			continue;
+
+		/*
+		 * Statuses are as follows:
+		 * o "accepted" means handled _and_ passed down the pipeline.
+		 * o "grabbed" means handled, but the interrupt might be
+		 * terminated _or_ passed down the pipeline depending on
+		 * what the domain handler asks for to the I-pipe.
+		 * o "wired" is basically the same as "grabbed", except that
+		 * the interrupt is unconditionally delivered to an invariant
+		 * pipeline head domain.
+		 * o "passed" means unhandled by the domain but passed
+		 * down the pipeline.
+		 * o "discarded" means unhandled and _not_ passed down the
+		 * pipeline. The interrupt merely disappears from the
+		 * current domain down to the end of the pipeline.
+		 */
+		if (ctlbits & IPIPE_HANDLE_MASK) {
+			if (ctlbits & IPIPE_PASS_MASK)
+				handling = 'A';
+			else if (ctlbits & IPIPE_WIRED_MASK)
+				handling = 'W';
+			else
+				handling = 'G';
+		} else if (ctlbits & IPIPE_PASS_MASK)
+			/* Do not output if no major action is taken. */
+			continue;
+		else
+			handling = 'D';
+
+		if (ctlbits & IPIPE_STICKY_MASK)
+			stickiness = 'S';
+		else
+			stickiness = '.';
+
+		if (ctlbits & IPIPE_LOCK_MASK)
+			lockbit = 'L';
+		else
+			lockbit = '.';
+
+		if (ctlbits & IPIPE_EXCLUSIVE_MASK)
+			exclusive = 'X';
+		else
+			exclusive = '.';
+
+		if (ipipe_virtual_irq_p(irq))
+			virtuality = 'V';
+		else
+			virtuality = '.';
+
+		seq_printf(p, " %3u:  %c%c%c%c%c\n",
+			     irq, handling, stickiness, lockbit, exclusive, virtuality);
+	}
+
+	seq_printf(p, "[Domain info]\n");
+
+	seq_printf(p, "id=0x%.8x\n", ipd->domid);
+
+	if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags))
+		seq_printf(p, "priority=topmost\n");
+	else
+		seq_printf(p, "priority=%d\n", ipd->priority);
+
+	mutex_unlock(&ipd->mutex);
+
+	return 0;
+}
+
+static int __ipipe_common_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __ipipe_common_info_show, PROC_I(inode)->pde->data);
+}
+
+static struct file_operations __ipipe_info_proc_ops = {
+	.owner		= THIS_MODULE,
+	.open		= __ipipe_common_info_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void __ipipe_add_domain_proc(struct ipipe_domain *ipd)
+{
+	struct proc_dir_entry *e = create_proc_entry(ipd->name, 0444, ipipe_proc_root);
+	if (e) {
+		e->proc_fops = &__ipipe_info_proc_ops;
+		e->data = (void*) ipd;
+	}
+}
+
+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd)
+{
+	remove_proc_entry(ipd->name,ipipe_proc_root);
+}
+
+void __init ipipe_init_proc(void)
+{
+	ipipe_proc_root = create_proc_entry("ipipe",S_IFDIR, 0);
+	create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_info_proc,NULL);
+	__ipipe_add_domain_proc(ipipe_root_domain);
+
+	__ipipe_init_tracer();
+}
+
+#endif	/* CONFIG_PROC_FS */
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+
+DEFINE_PER_CPU(int, ipipe_percpu_context_check) = { 1 };
+DEFINE_PER_CPU(int, ipipe_saved_context_check_state);
+
+void ipipe_check_context(struct ipipe_domain *border_domain)
+{
+        struct ipipe_percpu_domain_data *p; 
+        struct ipipe_domain *this_domain; 
+        unsigned long flags;
+	int cpu;
+ 
+        local_irq_save_hw_smp(flags); 
+
+        this_domain = __ipipe_current_domain; 
+        p = ipipe_head_cpudom_ptr(); 
+        if (likely(this_domain->priority <= border_domain->priority && 
+		   !test_bit(IPIPE_STALL_FLAG, &p->status))) { 
+                local_irq_restore_hw_smp(flags); 
+                return; 
+        } 
+ 
+	cpu = ipipe_processor_id();
+        if (!per_cpu(ipipe_percpu_context_check, cpu)) { 
+                local_irq_restore_hw_smp(flags); 
+                return; 
+        } 
+ 
+        local_irq_restore_hw_smp(flags); 
+
+	ipipe_context_check_off();
+	ipipe_trace_panic_freeze();
+	ipipe_set_printk_sync(__ipipe_current_domain);
+
+	if (this_domain->priority > border_domain->priority)
+		printk(KERN_ERR "I-pipe: Detected illicit call from domain "
+				"'%s'\n"
+		       KERN_ERR "        into a service reserved for domain "
+				"'%s' and below.\n",
+		       this_domain->name, border_domain->name);
+	else
+		printk(KERN_ERR "I-pipe: Detected stalled topmost domain, "
+				"probably caused by a bug.\n"
+				"        A critical section may have been "
+				"left unterminated.\n");
+	dump_stack();
+	ipipe_trace_panic_dump();
+}
+
+EXPORT_SYMBOL(ipipe_check_context);
+
+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
+
+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
+
+int notrace __ipipe_check_percpu_access(void)
+{
+	struct ipipe_percpu_domain_data *p;
+	struct ipipe_domain *this_domain;
+	unsigned long flags;
+	int ret = 0;
+
+	local_irq_save_hw_notrace(flags);
+
+	this_domain = __raw_get_cpu_var(ipipe_percpu_domain);
+
+	/*
+	 * Only the root domain may implement preemptive CPU migration
+	 * of tasks, so anything above in the pipeline should be fine.
+	 */
+	if (this_domain->priority > IPIPE_ROOT_PRIO)
+		goto out;
+
+	if (raw_irqs_disabled_flags(flags))
+		goto out;
+
+	/*
+	 * Last chance: hw interrupts were enabled on entry while
+	 * running over the root domain, but the root stage might be
+	 * currently stalled, in which case preemption would be
+	 * disabled, and no migration could occur.
+	 */
+	if (this_domain == ipipe_root_domain) {
+		p = ipipe_root_cpudom_ptr(); 
+		if (test_bit(IPIPE_STALL_FLAG, &p->status))
+			goto out;
+	}
+	/*
+	 * Our caller may end up accessing the wrong per-cpu variable
+	 * instance due to CPU migration; tell it to complain about
+	 * this.
+	 */
+	ret = 1;
+out:
+	local_irq_restore_hw_notrace(flags);
+
+	return ret;
+}
+
+void __ipipe_spin_unlock_debug(unsigned long flags)
+{
+	/*
+	 * We catch a nasty issue where spin_unlock_irqrestore() on a
+	 * regular kernel spinlock is about to re-enable hw interrupts
+	 * in a section entered with hw irqs off. This is clearly the
+	 * sign of a massive breakage coming. Usual suspect is a
+	 * regular spinlock which was overlooked, used within a
+	 * section which must run with hw irqs disabled.
+	 */
+	WARN_ON_ONCE(!raw_irqs_disabled_flags(flags) && irqs_disabled_hw());
+}
+EXPORT_SYMBOL(__ipipe_spin_unlock_debug);
+
+#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */
+
+
+void ipipe_prepare_panic(void)
+{
+	ipipe_set_printk_sync(ipipe_current_domain);
+	ipipe_context_check_off();
+}
+
+EXPORT_SYMBOL_GPL(ipipe_prepare_panic);
+
+EXPORT_SYMBOL(ipipe_virtualize_irq);
+EXPORT_SYMBOL(ipipe_control_irq);
+EXPORT_SYMBOL(ipipe_suspend_domain);
+EXPORT_SYMBOL(ipipe_alloc_virq);
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain);
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_darray);
+EXPORT_SYMBOL(ipipe_root);
+EXPORT_SYMBOL(ipipe_stall_pipeline_from);
+EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from);
+EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from);
+EXPORT_SYMBOL(ipipe_restore_pipeline_from);
+EXPORT_SYMBOL(ipipe_unstall_pipeline_head);
+EXPORT_SYMBOL(__ipipe_restore_pipeline_head);
+EXPORT_SYMBOL(__ipipe_unstall_root);
+EXPORT_SYMBOL(__ipipe_restore_root);
+EXPORT_SYMBOL(__ipipe_spin_lock_irq);
+EXPORT_SYMBOL(__ipipe_spin_unlock_irq);
+EXPORT_SYMBOL(__ipipe_spin_lock_irqsave);
+EXPORT_SYMBOL(__ipipe_spin_trylock_irq);
+EXPORT_SYMBOL(__ipipe_spin_trylock_irqsave);
+EXPORT_SYMBOL(__ipipe_spin_unlock_irqrestore);
+EXPORT_SYMBOL(__ipipe_pipeline);
+EXPORT_SYMBOL(__ipipe_lock_irq);
+EXPORT_SYMBOL(__ipipe_unlock_irq);
+EXPORT_SYMBOL(ipipe_register_domain);
+EXPORT_SYMBOL(ipipe_unregister_domain);
+EXPORT_SYMBOL(ipipe_free_virq);
+EXPORT_SYMBOL(ipipe_init_attr);
+EXPORT_SYMBOL(ipipe_catch_event);
+EXPORT_SYMBOL(ipipe_alloc_ptdkey);
+EXPORT_SYMBOL(ipipe_free_ptdkey);
+EXPORT_SYMBOL(ipipe_set_ptd);
+EXPORT_SYMBOL(ipipe_get_ptd);
+EXPORT_SYMBOL(ipipe_set_irq_affinity);
+EXPORT_SYMBOL(ipipe_send_ipi);
+EXPORT_SYMBOL(__ipipe_pend_irq);
+EXPORT_SYMBOL(__ipipe_set_irq_pending);
+EXPORT_SYMBOL(__ipipe_event_monitors);
+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
+EXPORT_SYMBOL(__ipipe_check_percpu_access);
+#endif
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+EXPORT_SYMBOL(ipipe_request_tickdev);
+EXPORT_SYMBOL(ipipe_release_tickdev);
+#endif
+
+EXPORT_SYMBOL(ipipe_critical_enter);
+EXPORT_SYMBOL(ipipe_critical_exit);
+EXPORT_SYMBOL(ipipe_trigger_irq);
+EXPORT_SYMBOL(ipipe_get_sysinfo);
diff --git a/kernel/ipipe/tracer.c b/kernel/ipipe/tracer.c
new file mode 100644
index 0000000..f013ef4
--- /dev/null
+++ b/kernel/ipipe/tracer.c
@@ -0,0 +1,1442 @@
+/* -*- linux-c -*-
+ * kernel/ipipe/tracer.c
+ *
+ * Copyright (C) 2005 Luotao Fu.
+ *               2005-2008 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/vmalloc.h>
+#include <linux/pid.h>
+#include <linux/vermagic.h>
+#include <linux/sched.h>
+#include <linux/ipipe.h>
+#include <linux/ftrace.h>
+#include <asm/uaccess.h>
+
+#define IPIPE_TRACE_PATHS           4 /* <!> Do not lower below 3 */
+#define IPIPE_DEFAULT_ACTIVE        0
+#define IPIPE_DEFAULT_MAX           1
+#define IPIPE_DEFAULT_FROZEN        2
+
+#define IPIPE_TRACE_POINTS          (1 << CONFIG_IPIPE_TRACE_SHIFT)
+#define WRAP_POINT_NO(point)        ((point) & (IPIPE_TRACE_POINTS-1))
+
+#define IPIPE_DEFAULT_PRE_TRACE     10
+#define IPIPE_DEFAULT_POST_TRACE    10
+#define IPIPE_DEFAULT_BACK_TRACE    100
+
+#define IPIPE_DELAY_NOTE            1000  /* in nanoseconds */
+#define IPIPE_DELAY_WARN            10000 /* in nanoseconds */
+
+#define IPIPE_TFLG_NMI_LOCK         0x0001
+#define IPIPE_TFLG_NMI_HIT          0x0002
+#define IPIPE_TFLG_NMI_FREEZE_REQ   0x0004
+
+#define IPIPE_TFLG_HWIRQ_OFF        0x0100
+#define IPIPE_TFLG_FREEZING         0x0200
+#define IPIPE_TFLG_CURRDOM_SHIFT    10   /* bits 10..11: current domain */
+#define IPIPE_TFLG_CURRDOM_MASK     0x0C00
+#define IPIPE_TFLG_DOMSTATE_SHIFT   12   /* bits 12..15: domain stalled? */
+#define IPIPE_TFLG_DOMSTATE_BITS    3
+
+#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \
+	(point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT)))
+#define IPIPE_TFLG_CURRENT_DOMAIN(point) \
+	((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT)
+
+struct ipipe_trace_point {
+	short type;
+	short flags;
+	unsigned long eip;
+	unsigned long parent_eip;
+	unsigned long v;
+	unsigned long long timestamp;
+};
+
+struct ipipe_trace_path {
+	volatile int flags;
+	int dump_lock; /* separated from flags due to cross-cpu access */
+	int trace_pos; /* next point to fill */
+	int begin, end; /* finalised path begin and end */
+	int post_trace; /* non-zero when in post-trace phase */
+	unsigned long long length; /* max path length in cycles */
+	unsigned long nmi_saved_eip; /* for deferred requests from NMIs */
+	unsigned long nmi_saved_parent_eip;
+	unsigned long nmi_saved_v;
+	struct ipipe_trace_point point[IPIPE_TRACE_POINTS];
+} ____cacheline_aligned_in_smp;
+
+enum ipipe_trace_type
+{
+	IPIPE_TRACE_FUNC = 0,
+	IPIPE_TRACE_BEGIN,
+	IPIPE_TRACE_END,
+	IPIPE_TRACE_FREEZE,
+	IPIPE_TRACE_SPECIAL,
+	IPIPE_TRACE_PID,
+	IPIPE_TRACE_EVENT,
+};
+
+#define IPIPE_TYPE_MASK             0x0007
+#define IPIPE_TYPE_BITS             3
+
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path);
+#else /* !CONFIG_IPIPE_TRACE_VMALLOC */
+static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) =
+	{ [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } };
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+
+int ipipe_trace_enable = 0;
+
+static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE };
+static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX };
+static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN };
+static IPIPE_DEFINE_SPINLOCK(global_path_lock);
+static int pre_trace = IPIPE_DEFAULT_PRE_TRACE;
+static int post_trace = IPIPE_DEFAULT_POST_TRACE;
+static int back_trace = IPIPE_DEFAULT_BACK_TRACE;
+static int verbose_trace = 1;
+static unsigned long trace_overhead;
+
+static unsigned long trigger_begin;
+static unsigned long trigger_end;
+
+static DEFINE_MUTEX(out_mutex);
+static struct ipipe_trace_path *print_path;
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+static struct ipipe_trace_path *panic_path;
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+static int print_pre_trace;
+static int print_post_trace;
+
+
+static long __ipipe_signed_tsc2us(long long tsc);
+static void
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point);
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip);
+
+
+static notrace void
+__ipipe_store_domain_states(struct ipipe_trace_point *point)
+{
+	struct ipipe_domain *ipd;
+	struct list_head *pos;
+	int i = 0;
+
+	list_for_each_prev(pos, &__ipipe_pipeline) {
+		ipd = list_entry(pos, struct ipipe_domain, p_link);
+
+		if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)))
+			point->flags |= 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT);
+
+		if (ipd == __ipipe_current_domain)
+			point->flags |= i << IPIPE_TFLG_CURRDOM_SHIFT;
+
+		if (++i > IPIPE_TFLG_DOMSTATE_BITS)
+			break;
+	}
+}
+
+static notrace int __ipipe_get_free_trace_path(int old, int cpu)
+{
+	int new_active = old;
+	struct ipipe_trace_path *tp;
+
+	do {
+		if (++new_active == IPIPE_TRACE_PATHS)
+			new_active = 0;
+		tp = &per_cpu(trace_path, cpu)[new_active];
+	} while (new_active == per_cpu(max_path, cpu) ||
+	         new_active == per_cpu(frozen_path, cpu) ||
+	         tp->dump_lock);
+
+	return new_active;
+}
+
+static notrace void
+__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp,
+                          struct ipipe_trace_path *old_tp, int old_pos)
+{
+	int i;
+
+	new_tp->trace_pos = pre_trace+1;
+
+	for (i = new_tp->trace_pos; i > 0; i--)
+		memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)],
+		       &old_tp->point[WRAP_POINT_NO(old_pos-i)],
+		       sizeof(struct ipipe_trace_point));
+
+	/* mark the end (i.e. the point before point[0]) invalid */
+	new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0;
+}
+
+static notrace struct ipipe_trace_path *
+__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos)
+{
+	struct ipipe_trace_path *old_tp = tp;
+	long active = per_cpu(active_path, cpu);
+	unsigned long long length;
+
+	/* do we have a new worst case? */
+	length = tp->point[tp->end].timestamp -
+	         tp->point[tp->begin].timestamp;
+	if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) {
+		/* we need protection here against other cpus trying
+		   to start a proc dump */
+		spin_lock(&global_path_lock);
+
+		/* active path holds new worst case */
+		tp->length = length;
+		per_cpu(max_path, cpu) = active;
+
+		/* find next unused trace path */
+		active = __ipipe_get_free_trace_path(active, cpu);
+
+		spin_unlock(&global_path_lock);
+
+		tp = &per_cpu(trace_path, cpu)[active];
+
+		/* migrate last entries for pre-tracing */
+		__ipipe_migrate_pre_trace(tp, old_tp, pos);
+	}
+
+	return tp;
+}
+
+static notrace struct ipipe_trace_path *
+__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos)
+{
+	struct ipipe_trace_path *old_tp = tp;
+	long active = per_cpu(active_path, cpu);
+	int n;
+
+	/* frozen paths have no core (begin=end) */
+	tp->begin = tp->end;
+
+	/* we need protection here against other cpus trying
+	 * to set their frozen path or to start a proc dump */
+	spin_lock(&global_path_lock);
+
+	per_cpu(frozen_path, cpu) = active;
+
+	/* find next unused trace path */
+	active = __ipipe_get_free_trace_path(active, cpu);
+
+	/* check if this is the first frozen path */
+	for_each_possible_cpu(n) {
+		if (n != cpu &&
+		    per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0)
+			tp->end = -1;
+	}
+
+	spin_unlock(&global_path_lock);
+
+	tp = &per_cpu(trace_path, cpu)[active];
+
+	/* migrate last entries for pre-tracing */
+	__ipipe_migrate_pre_trace(tp, old_tp, pos);
+
+	return tp;
+}
+
+void notrace
+__ipipe_trace(enum ipipe_trace_type type, unsigned long eip,
+              unsigned long parent_eip, unsigned long v)
+{
+	struct ipipe_trace_path *tp, *old_tp;
+	int pos, next_pos, begin;
+	struct ipipe_trace_point *point;
+	unsigned long flags;
+	int cpu;
+
+	local_irq_save_hw_notrace(flags);
+
+	cpu = ipipe_processor_id();
+ restart:
+	tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+	/* here starts a race window with NMIs - catched below */
+
+	/* check for NMI recursion */
+	if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) {
+		tp->flags |= IPIPE_TFLG_NMI_HIT;
+
+		/* first freeze request from NMI context? */
+		if ((type == IPIPE_TRACE_FREEZE) &&
+		    !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) {
+			/* save arguments and mark deferred freezing */
+			tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ;
+			tp->nmi_saved_eip = eip;
+			tp->nmi_saved_parent_eip = parent_eip;
+			tp->nmi_saved_v = v;
+		}
+		return; /* no need for restoring flags inside IRQ */
+	}
+
+	/* clear NMI events and set lock (atomically per cpu) */
+	tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
+	                           IPIPE_TFLG_NMI_FREEZE_REQ))
+	                       | IPIPE_TFLG_NMI_LOCK;
+
+	/* check active_path again - some nasty NMI may have switched
+	 * it meanwhile */
+	if (unlikely(tp !=
+		     &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) {
+		/* release lock on wrong path and restart */
+		tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+		/* there is no chance that the NMI got deferred
+		 * => no need to check for pending freeze requests */
+		goto restart;
+	}
+
+	/* get the point buffer */
+	pos = tp->trace_pos;
+	point = &tp->point[pos];
+
+	/* store all trace point data */
+	point->type = type;
+	point->flags = raw_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0;
+	point->eip = eip;
+	point->parent_eip = parent_eip;
+	point->v = v;
+	ipipe_read_tsc(point->timestamp);
+
+	__ipipe_store_domain_states(point);
+
+	/* forward to next point buffer */
+	next_pos = WRAP_POINT_NO(pos+1);
+	tp->trace_pos = next_pos;
+
+	/* only mark beginning if we haven't started yet */
+	begin = tp->begin;
+	if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0))
+		tp->begin = pos;
+
+	/* end of critical path, start post-trace if not already started */
+	if (unlikely(type == IPIPE_TRACE_END) &&
+	    (begin >= 0) && !tp->post_trace)
+		tp->post_trace = post_trace + 1;
+
+	/* freeze only if the slot is free and we are not already freezing */
+	if ((unlikely(type == IPIPE_TRACE_FREEZE) ||
+	     (unlikely(eip >= trigger_begin && eip <= trigger_end) &&
+	     type == IPIPE_TRACE_FUNC)) &&
+	    per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 &&
+	    !(tp->flags & IPIPE_TFLG_FREEZING)) {
+		tp->post_trace = post_trace + 1;
+		tp->flags |= IPIPE_TFLG_FREEZING;
+	}
+
+	/* enforce end of trace in case of overflow */
+	if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) {
+		tp->end = pos;
+		goto enforce_end;
+	}
+
+	/* stop tracing this path if we are in post-trace and
+	 *  a) that phase is over now or
+	 *  b) a new TRACE_BEGIN came in but we are not freezing this path */
+	if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) ||
+	             ((type == IPIPE_TRACE_BEGIN) &&
+	              !(tp->flags & IPIPE_TFLG_FREEZING))))) {
+		/* store the path's end (i.e. excluding post-trace) */
+		tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace);
+
+ enforce_end:
+		if (tp->flags & IPIPE_TFLG_FREEZING)
+			tp = __ipipe_trace_freeze(cpu, tp, pos);
+		else
+			tp = __ipipe_trace_end(cpu, tp, pos);
+
+		/* reset the active path, maybe already start a new one */
+		tp->begin = (type == IPIPE_TRACE_BEGIN) ?
+			WRAP_POINT_NO(tp->trace_pos - 1) : -1;
+		tp->end = -1;
+		tp->post_trace = 0;
+		tp->flags = 0;
+
+		/* update active_path not earlier to avoid races with NMIs */
+		per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu);
+	}
+
+	/* we still have old_tp and point,
+	 * let's reset NMI lock and check for catches */
+	old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+	if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) {
+		/* well, this late tagging may not immediately be visible for
+		 * other cpus already dumping this path - a minor issue */
+		point->flags |= IPIPE_TFLG_NMI_HIT;
+
+		/* handle deferred freezing from NMI context */
+		if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
+			__ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip,
+			              old_tp->nmi_saved_parent_eip,
+			              old_tp->nmi_saved_v);
+	}
+
+	local_irq_restore_hw_notrace(flags);
+}
+
+static unsigned long __ipipe_global_path_lock(void)
+{
+	unsigned long flags;
+	int cpu;
+	struct ipipe_trace_path *tp;
+
+	spin_lock_irqsave(&global_path_lock, flags);
+
+	cpu = ipipe_processor_id();
+ restart:
+	tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+	/* here is small race window with NMIs - catched below */
+
+	/* clear NMI events and set lock (atomically per cpu) */
+	tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
+	                           IPIPE_TFLG_NMI_FREEZE_REQ))
+	                       | IPIPE_TFLG_NMI_LOCK;
+
+	/* check active_path again - some nasty NMI may have switched
+	 * it meanwhile */
+	if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) {
+		/* release lock on wrong path and restart */
+		tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+		/* there is no chance that the NMI got deferred
+		 * => no need to check for pending freeze requests */
+		goto restart;
+	}
+
+	return flags;
+}
+
+static void __ipipe_global_path_unlock(unsigned long flags)
+{
+	int cpu;
+	struct ipipe_trace_path *tp;
+
+	/* release spinlock first - it's not involved in the NMI issue */
+	__ipipe_spin_unlock_irqbegin(&global_path_lock);
+
+	cpu = ipipe_processor_id();
+	tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+	tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+	/* handle deferred freezing from NMI context */
+	if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
+		__ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip,
+		              tp->nmi_saved_parent_eip, tp->nmi_saved_v);
+
+	/* See __ipipe_spin_lock_irqsave() and friends. */
+	__ipipe_spin_unlock_irqcomplete(flags);
+}
+
+void notrace ipipe_trace_begin(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_begin);
+
+void notrace ipipe_trace_end(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_end);
+
+void notrace ipipe_trace_freeze(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_freeze);
+
+void notrace ipipe_trace_special(unsigned char id, unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS),
+	              __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_special);
+
+void notrace ipipe_trace_pid(pid_t pid, short prio)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS),
+	              __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, pid);
+}
+EXPORT_SYMBOL(ipipe_trace_pid);
+
+void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS),
+	              __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, delay_tsc);
+}
+EXPORT_SYMBOL(ipipe_trace_event);
+
+int ipipe_trace_max_reset(void)
+{
+	int cpu;
+	unsigned long flags;
+	struct ipipe_trace_path *path;
+	int ret = 0;
+
+	flags = __ipipe_global_path_lock();
+
+	for_each_possible_cpu(cpu) {
+		path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
+
+		if (path->dump_lock) {
+			ret = -EBUSY;
+			break;
+		}
+
+		path->begin     = -1;
+		path->end       = -1;
+		path->trace_pos = 0;
+		path->length    = 0;
+	}
+
+	__ipipe_global_path_unlock(flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(ipipe_trace_max_reset);
+
+int ipipe_trace_frozen_reset(void)
+{
+	int cpu;
+	unsigned long flags;
+	struct ipipe_trace_path *path;
+	int ret = 0;
+
+	flags = __ipipe_global_path_lock();
+
+	for_each_online_cpu(cpu) {
+		path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
+
+		if (path->dump_lock) {
+			ret = -EBUSY;
+			break;
+		}
+
+		path->begin = -1;
+		path->end = -1;
+		path->trace_pos = 0;
+		path->length    = 0;
+	}
+
+	__ipipe_global_path_unlock(flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(ipipe_trace_frozen_reset);
+
+static void
+__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point,
+                      int trylock)
+{
+	struct task_struct *task = NULL;
+	char buf[8];
+	int i;
+	int locked = 1;
+
+	if (trylock) {
+		if (!read_trylock(&tasklist_lock))
+			locked = 0;
+	} else
+		read_lock(&tasklist_lock);
+
+	if (locked)
+		task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns);
+
+	if (task)
+		strncpy(task_info, task->comm, 11);
+	else
+		strcpy(task_info, "-<?>-");
+
+	if (locked)
+		read_unlock(&tasklist_lock);
+
+	for (i = strlen(task_info); i < 11; i++)
+		task_info[i] = ' ';
+
+	sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS);
+	strcpy(task_info + (11 - strlen(buf)), buf);
+}
+
+static void
+__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path,
+		       struct ipipe_trace_point *point)
+{
+	long time;
+	int type;
+
+	time = __ipipe_signed_tsc2us(point->timestamp -
+				     path->point[path->begin].timestamp + point->v);
+	type = point->type >> IPIPE_TYPE_BITS;
+
+	if (type == 0)
+		/*
+		 * Event type #0 is predefined, stands for the next
+		 * timer tick.
+		 */
+		sprintf(buf, "tick@%-6ld", time);
+	else
+		sprintf(buf, "%3d@%-7ld", type, time);
+}
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+void ipipe_trace_panic_freeze(void)
+{
+	unsigned long flags;
+	int cpu;
+
+	if (!ipipe_trace_enable)
+		return;
+
+	ipipe_trace_enable = 0;
+	local_irq_save_hw_notrace(flags);
+
+	cpu = ipipe_processor_id();
+
+	panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+	local_irq_restore_hw(flags);
+}
+EXPORT_SYMBOL(ipipe_trace_panic_freeze);
+
+void ipipe_trace_panic_dump(void)
+{
+	int cnt = back_trace;
+	int start, pos;
+	char buf[16];
+
+	if (!panic_path)
+		return;
+
+	ipipe_context_check_off();
+
+	printk("I-pipe tracer log (%d points):\n", cnt);
+
+	start = pos = WRAP_POINT_NO(panic_path->trace_pos-1);
+
+	while (cnt-- > 0) {
+		struct ipipe_trace_point *point = &panic_path->point[pos];
+		long time;
+		char info[16];
+		int i;
+
+		printk(" %c",
+		       (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
+
+		for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
+			printk("%c",
+			       (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
+				(IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+					'#' : '+') :
+				(IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+					'*' : ' '));
+
+		if (!point->eip)
+			printk("-<invalid>-\n");
+		else {
+			__ipipe_trace_point_type(buf, point);
+			printk("%s", buf);
+
+			switch (point->type & IPIPE_TYPE_MASK) {
+				case IPIPE_TRACE_FUNC:
+					printk("           ");
+					break;
+
+				case IPIPE_TRACE_PID:
+					__ipipe_get_task_info(info,
+							      point, 1);
+					printk("%s", info);
+					break;
+
+				case IPIPE_TRACE_EVENT:
+					__ipipe_get_event_date(info,
+							       panic_path, point);
+					printk("%s", info);
+					break;
+
+				default:
+					printk("0x%08lx ", point->v);
+			}
+
+			time = __ipipe_signed_tsc2us(point->timestamp -
+				panic_path->point[start].timestamp);
+			printk(" %5ld ", time);
+
+			__ipipe_print_symname(NULL, point->eip);
+			printk(" (");
+			__ipipe_print_symname(NULL, point->parent_eip);
+			printk(")\n");
+		}
+		pos = WRAP_POINT_NO(pos - 1);
+	}
+
+	panic_path = NULL;
+}
+EXPORT_SYMBOL(ipipe_trace_panic_dump);
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+
+
+/* --- /proc output --- */
+
+static notrace int __ipipe_in_critical_trpath(long point_no)
+{
+	return ((WRAP_POINT_NO(point_no-print_path->begin) <
+	         WRAP_POINT_NO(print_path->end-print_path->begin)) ||
+	        ((print_path->end == print_path->begin) &&
+	         (WRAP_POINT_NO(point_no-print_path->end) >
+	          print_post_trace)));
+}
+
+static long __ipipe_signed_tsc2us(long long tsc)
+{
+        unsigned long long abs_tsc;
+        long us;
+
+	/* ipipe_tsc2us works on unsigned => handle sign separately */
+        abs_tsc = (tsc >= 0) ? tsc : -tsc;
+        us = ipipe_tsc2us(abs_tsc);
+        if (tsc < 0)
+                return -us;
+        else
+                return us;
+}
+
+static void
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point)
+{
+	switch (point->type & IPIPE_TYPE_MASK) {
+		case IPIPE_TRACE_FUNC:
+			strcpy(buf, "func    ");
+			break;
+
+		case IPIPE_TRACE_BEGIN:
+			strcpy(buf, "begin   ");
+			break;
+
+		case IPIPE_TRACE_END:
+			strcpy(buf, "end     ");
+			break;
+
+		case IPIPE_TRACE_FREEZE:
+			strcpy(buf, "freeze  ");
+			break;
+
+		case IPIPE_TRACE_SPECIAL:
+			sprintf(buf, "(0x%02x)  ",
+				point->type >> IPIPE_TYPE_BITS);
+			break;
+
+		case IPIPE_TRACE_PID:
+			sprintf(buf, "[%5d] ", (pid_t)point->v);
+			break;
+
+		case IPIPE_TRACE_EVENT:
+			sprintf(buf, "event   ");
+			break;
+	}
+}
+
+static void
+__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point)
+{
+	char mark = ' ';
+	int point_no = point - print_path->point;
+	int i;
+
+	if (print_path->end == point_no)
+		mark = '<';
+	else if (print_path->begin == point_no)
+		mark = '>';
+	else if (__ipipe_in_critical_trpath(point_no))
+		mark = ':';
+	seq_printf(m, "%c%c", mark,
+	           (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
+
+	if (!verbose_trace)
+		return;
+
+	for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
+		seq_printf(m, "%c",
+			(IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
+			    (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+				'#' : '+') :
+			(IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' '));
+}
+
+static void
+__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point)
+{
+	unsigned long delay = 0;
+	int next;
+	char *mark = "  ";
+
+	next = WRAP_POINT_NO(point+1 - print_path->point);
+
+	if (next != print_path->trace_pos)
+		delay = ipipe_tsc2ns(print_path->point[next].timestamp -
+		                     point->timestamp);
+
+	if (__ipipe_in_critical_trpath(point - print_path->point)) {
+		if (delay > IPIPE_DELAY_WARN)
+			mark = "! ";
+		else if (delay > IPIPE_DELAY_NOTE)
+			mark = "+ ";
+	}
+	seq_puts(m, mark);
+
+	if (verbose_trace)
+		seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000,
+		           (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' ');
+	else
+		seq_puts(m, " ");
+}
+
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip)
+{
+	char namebuf[KSYM_NAME_LEN+1];
+	unsigned long size, offset;
+	const char *sym_name;
+	char *modname;
+
+	sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf);
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+	if (!m) {
+		/* panic dump */
+		if (sym_name) {
+			printk("%s+0x%lx", sym_name, offset);
+			if (modname)
+				printk(" [%s]", modname);
+		}
+	} else
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+	{
+		if (sym_name) {
+			if (verbose_trace) {
+				seq_printf(m, "%s+0x%lx", sym_name, offset);
+				if (modname)
+					seq_printf(m, " [%s]", modname);
+			} else
+				seq_puts(m, sym_name);
+		} else
+			seq_printf(m, "<%08lx>", eip);
+	}
+}
+
+static void __ipipe_print_headline(struct seq_file *m)
+{
+	seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu "
+		   "us\n\n", trace_overhead/1000, trace_overhead%1000);
+
+	if (verbose_trace) {
+		const char *name[4] = { [0 ... 3] = "<unused>" };
+		struct list_head *pos;
+		int i = 0;
+
+		list_for_each_prev(pos, &__ipipe_pipeline) {
+			struct ipipe_domain *ipd =
+				list_entry(pos, struct ipipe_domain, p_link);
+
+			name[i] = ipd->name;
+			if (++i > 3)
+				break;
+		}
+
+		seq_printf(m,
+		           " +----- Hard IRQs ('|': locked)\n"
+		           " |+---- %s\n"
+		           " ||+--- %s\n"
+		           " |||+-- %s\n"
+		           " ||||+- %s%s\n"
+		           " |||||                        +---------- "
+		               "Delay flag ('+': > %d us, '!': > %d us)\n"
+		           " |||||                        |        +- "
+		               "NMI noise ('N')\n"
+		           " |||||                        |        |\n"
+		           "      Type    User Val.   Time    Delay  Function "
+		               "(Parent)\n",
+		           name[3], name[2], name[1], name[0],
+		           name[0] ? " ('*': domain stalled, '+': current, "
+		               "'#': current+stalled)" : "",
+		           IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
+	} else
+		seq_printf(m,
+		           " +--------------- Hard IRQs ('|': locked)\n"
+		           " |             +- Delay flag "
+		               "('+': > %d us, '!': > %d us)\n"
+		           " |             |\n"
+		           "  Type     Time   Function (Parent)\n",
+		           IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
+}
+
+static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+
+	mutex_lock(&out_mutex);
+
+	if (!n) {
+		struct ipipe_trace_path *tp;
+		unsigned long length_usecs;
+		int points, cpu;
+		unsigned long flags;
+
+		/* protect against max_path/frozen_path updates while we
+		 * haven't locked our target path, also avoid recursively
+		 * taking global_path_lock from NMI context */
+		flags = __ipipe_global_path_lock();
+
+		/* find the longest of all per-cpu paths */
+		print_path = NULL;
+		for_each_online_cpu(cpu) {
+			tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
+			if ((print_path == NULL) ||
+			    (tp->length > print_path->length)) {
+				print_path = tp;
+				break;
+			}
+		}
+		print_path->dump_lock = 1;
+
+		__ipipe_global_path_unlock(flags);
+
+		/* does this path actually contain data? */
+		if (print_path->end == print_path->begin)
+			return NULL;
+
+		/* number of points inside the critical path */
+		points = WRAP_POINT_NO(print_path->end-print_path->begin+1);
+
+		/* pre- and post-tracing length, post-trace length was frozen
+		   in __ipipe_trace, pre-trace may have to be reduced due to
+		   buffer overrun */
+		print_pre_trace  = pre_trace;
+		print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
+		                                 print_path->end - 1);
+		if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
+			print_pre_trace = IPIPE_TRACE_POINTS - 1 - points -
+				print_post_trace;
+
+		length_usecs = ipipe_tsc2us(print_path->length);
+		seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n"
+			"------------------------------------------------------------\n",
+			UTS_RELEASE, IPIPE_ARCH_STRING);
+		seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: "
+			"%d (-%d/+%d), Length: %lu us\n",
+			cpu, print_path->point[print_path->begin].timestamp,
+			points, print_pre_trace, print_post_trace, length_usecs);
+		__ipipe_print_headline(m);
+	}
+
+	/* check if we are inside the trace range */
+	if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
+	                       print_pre_trace + print_post_trace))
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin -
+	                                        print_pre_trace + n)];
+}
+
+static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	loff_t n = ++*pos;
+
+	/* check if we are inside the trace range with the next entry */
+	if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
+	                       print_pre_trace + print_post_trace))
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin -
+	                                        print_pre_trace + *pos)];
+}
+
+static void __ipipe_prtrace_stop(struct seq_file *m, void *p)
+{
+	if (print_path)
+		print_path->dump_lock = 0;
+	mutex_unlock(&out_mutex);
+}
+
+static int __ipipe_prtrace_show(struct seq_file *m, void *p)
+{
+	long time;
+	struct ipipe_trace_point *point = p;
+	char buf[16];
+
+	if (!point->eip) {
+		seq_puts(m, "-<invalid>-\n");
+		return 0;
+	}
+
+	__ipipe_print_pathmark(m, point);
+	__ipipe_trace_point_type(buf, point);
+	seq_puts(m, buf);
+	if (verbose_trace)
+		switch (point->type & IPIPE_TYPE_MASK) {
+			case IPIPE_TRACE_FUNC:
+				seq_puts(m, "           ");
+				break;
+
+			case IPIPE_TRACE_PID:
+				__ipipe_get_task_info(buf, point, 0);
+				seq_puts(m, buf);
+				break;
+
+			case IPIPE_TRACE_EVENT:
+				__ipipe_get_event_date(buf, print_path, point);
+				seq_puts(m, buf);
+				break;
+
+			default:
+				seq_printf(m, "0x%08lx ", point->v);
+		}
+
+	time = __ipipe_signed_tsc2us(point->timestamp -
+		print_path->point[print_path->begin].timestamp);
+	seq_printf(m, "%5ld", time);
+
+	__ipipe_print_delay(m, point);
+	__ipipe_print_symname(m, point->eip);
+	seq_puts(m, " (");
+	__ipipe_print_symname(m, point->parent_eip);
+	seq_puts(m, ")\n");
+
+	return 0;
+}
+
+static struct seq_operations __ipipe_max_ptrace_ops = {
+	.start = __ipipe_max_prtrace_start,
+	.next  = __ipipe_prtrace_next,
+	.stop  = __ipipe_prtrace_stop,
+	.show  = __ipipe_prtrace_show
+};
+
+static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &__ipipe_max_ptrace_ops);
+}
+
+static ssize_t
+__ipipe_max_reset(struct file *file, const char __user *pbuffer,
+                  size_t count, loff_t *data)
+{
+	mutex_lock(&out_mutex);
+	ipipe_trace_max_reset();
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+struct file_operations __ipipe_max_prtrace_fops = {
+	.open       = __ipipe_max_prtrace_open,
+	.read       = seq_read,
+	.write      = __ipipe_max_reset,
+	.llseek     = seq_lseek,
+	.release    = seq_release,
+};
+
+static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+
+	mutex_lock(&out_mutex);
+
+	if (!n) {
+		struct ipipe_trace_path *tp;
+		int cpu;
+		unsigned long flags;
+
+		/* protect against max_path/frozen_path updates while we
+		 * haven't locked our target path, also avoid recursively
+		 * taking global_path_lock from NMI context */
+		flags = __ipipe_global_path_lock();
+
+		/* find the first of all per-cpu frozen paths */
+		print_path = NULL;
+		for_each_online_cpu(cpu) {
+			tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
+			if (tp->end >= 0) {
+				print_path = tp;
+				break;
+			}
+		}
+		if (print_path)
+			print_path->dump_lock = 1;
+
+		__ipipe_global_path_unlock(flags);
+
+		if (!print_path)
+			return NULL;
+
+		/* back- and post-tracing length, post-trace length was frozen
+		   in __ipipe_trace, back-trace may have to be reduced due to
+		   buffer overrun */
+		print_pre_trace  = back_trace-1; /* substract freeze point */
+		print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
+		                                 print_path->end - 1);
+		if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
+			print_pre_trace = IPIPE_TRACE_POINTS - 2 -
+				print_post_trace;
+
+		seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n"
+			"------------------------------------------------------"
+			"------\n",
+			UTS_RELEASE, IPIPE_ARCH_STRING);
+		seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n",
+			cpu, print_path->point[print_path->begin].timestamp,
+			print_pre_trace+1, print_post_trace);
+		__ipipe_print_headline(m);
+	}
+
+	/* check if we are inside the trace range */
+	if (n >= print_pre_trace + 1 + print_post_trace)
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin-
+	                                        print_pre_trace+n)];
+}
+
+static struct seq_operations __ipipe_frozen_ptrace_ops = {
+	.start = __ipipe_frozen_prtrace_start,
+	.next  = __ipipe_prtrace_next,
+	.stop  = __ipipe_prtrace_stop,
+	.show  = __ipipe_prtrace_show
+};
+
+static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &__ipipe_frozen_ptrace_ops);
+}
+
+static ssize_t
+__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer,
+                    size_t count, loff_t *data)
+{
+	char *end, buf[16];
+	int val;
+	int n;
+
+	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+	if (copy_from_user(buf, pbuffer, n))
+		return -EFAULT;
+
+	buf[n] = '\0';
+	val = simple_strtol(buf, &end, 0);
+
+	if (((*end != '\0') && !isspace(*end)) || (val < 0))
+		return -EINVAL;
+
+	mutex_lock(&out_mutex);
+	ipipe_trace_frozen_reset();
+	if (val > 0)
+		ipipe_trace_freeze(-1);
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+struct file_operations __ipipe_frozen_prtrace_fops = {
+	.open       = __ipipe_frozen_prtrace_open,
+	.read       = seq_read,
+	.write      = __ipipe_frozen_ctrl,
+	.llseek     = seq_lseek,
+	.release    = seq_release,
+};
+
+static int __ipipe_rd_proc_val(char *page, char **start, off_t off,
+                               int count, int *eof, void *data)
+{
+	int len;
+
+	len = sprintf(page, "%u\n", *(int *)data);
+	len -= off;
+	if (len <= off + count)
+		*eof = 1;
+	*start = page + off;
+	if (len > count)
+		len = count;
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+
+static int __ipipe_wr_proc_val(struct file *file, const char __user *buffer,
+                               unsigned long count, void *data)
+{
+	char *end, buf[16];
+	int val;
+	int n;
+
+	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+	if (copy_from_user(buf, buffer, n))
+		return -EFAULT;
+
+	buf[n] = '\0';
+	val = simple_strtol(buf, &end, 0);
+
+	if (((*end != '\0') && !isspace(*end)) || (val < 0))
+		return -EINVAL;
+
+	mutex_lock(&out_mutex);
+	*(int *)data = val;
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+static int __ipipe_rd_trigger(char *page, char **start, off_t off, int count,
+			      int *eof, void *data)
+{
+	int len;
+
+	if (!trigger_begin)
+		return 0;
+
+	len = sprint_symbol(page, trigger_begin);
+	page[len++] = '\n';
+
+	len -= off;
+	if (len <= off + count)
+		*eof = 1;
+	*start = page + off;
+	if (len > count)
+		len = count;
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+
+static int __ipipe_wr_trigger(struct file *file, const char __user *buffer,
+			      unsigned long count, void *data)
+{
+	char buf[KSYM_SYMBOL_LEN];
+	unsigned long begin, end;
+
+	if (count > sizeof(buf) - 1)
+		count = sizeof(buf) - 1;
+	if (copy_from_user(buf, buffer, count))
+		return -EFAULT;
+	buf[count] = 0;
+	if (buf[count-1] == '\n')
+		buf[count-1] = 0;
+
+	begin = kallsyms_lookup_name(buf);
+	if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL))
+		return -ENOENT;
+	end += begin - 1;
+
+	mutex_lock(&out_mutex);
+	/* invalidate the current range before setting a new one */
+	trigger_end = 0;
+	wmb();
+	ipipe_trace_frozen_reset();
+
+	/* set new range */
+	trigger_begin = begin;
+	wmb();
+	trigger_end = end;
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
+static void notrace
+ipipe_trace_function(unsigned long ip, unsigned long parent_ip)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0);
+}
+
+static struct ftrace_ops ipipe_trace_ops = {
+	.func = ipipe_trace_function
+};
+
+static int __ipipe_wr_enable(struct file *file, const char __user *buffer,
+			     unsigned long count, void *data)
+{
+	char *end, buf[16];
+	int val;
+	int n;
+
+	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+	if (copy_from_user(buf, buffer, n))
+		return -EFAULT;
+
+	buf[n] = '\0';
+	val = simple_strtol(buf, &end, 0);
+
+	if (((*end != '\0') && !isspace(*end)) || (val < 0))
+		return -EINVAL;
+
+	mutex_lock(&out_mutex);
+
+	if (ipipe_trace_enable) {
+		if (!val)
+			unregister_ftrace_function(&ipipe_trace_ops);
+	} else if (val)
+		register_ftrace_function(&ipipe_trace_ops);
+
+	ipipe_trace_enable = val;
+
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
+
+extern struct proc_dir_entry *ipipe_proc_root;
+
+static struct proc_dir_entry * __init
+__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir,
+                              const char *name, int *value_ptr)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry(name, 0644, trace_dir);
+	if (entry) {
+		entry->data = value_ptr;
+		entry->read_proc = __ipipe_rd_proc_val;
+		entry->write_proc = __ipipe_wr_proc_val;
+	}
+	return entry;
+}
+
+void __init __ipipe_init_tracer(void)
+{
+	struct proc_dir_entry *trace_dir;
+	struct proc_dir_entry *entry;
+	unsigned long long start, end, min = ULLONG_MAX;
+	int i;
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+	int cpu, path;
+
+	for_each_possible_cpu(cpu) {
+		struct ipipe_trace_path *tp_buf;
+
+		tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) *
+				      IPIPE_TRACE_PATHS, cpu_to_node(cpu));
+		if (!tp_buf) {
+			printk(KERN_ERR "I-pipe: "
+			       "insufficient memory for trace buffer.\n");
+			return;
+		}
+		memset(tp_buf, 0,
+		       sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS);
+		for (path = 0; path < IPIPE_TRACE_PATHS; path++) {
+			tp_buf[path].begin = -1;
+			tp_buf[path].end   = -1;
+		}
+		per_cpu(trace_path, cpu) = tp_buf;
+	}
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+
+	/* Calculate minimum overhead of __ipipe_trace() */
+	local_irq_disable_hw();
+	for (i = 0; i < 100; i++) {
+		ipipe_read_tsc(start);
+		__ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0,
+			      __BUILTIN_RETURN_ADDRESS1, 0);
+		ipipe_read_tsc(end);
+
+		end -= start;
+		if (end < min)
+			min = end;
+	}
+	local_irq_enable_hw();
+	trace_overhead = ipipe_tsc2ns(min);
+
+#ifdef CONFIG_IPIPE_TRACE_ENABLE
+	ipipe_trace_enable = 1;
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
+	ftrace_enabled = 1;
+	register_ftrace_function(&ipipe_trace_ops);
+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
+#endif /* CONFIG_IPIPE_TRACE_ENABLE */
+
+	trace_dir = create_proc_entry("trace", S_IFDIR, ipipe_proc_root);
+
+	entry = create_proc_entry("max", 0644, trace_dir);
+	if (entry)
+		entry->proc_fops = &__ipipe_max_prtrace_fops;
+
+	entry = create_proc_entry("frozen", 0644, trace_dir);
+	if (entry)
+		entry->proc_fops = &__ipipe_frozen_prtrace_fops;
+
+	entry = create_proc_entry("trigger", 0644, trace_dir);
+	if (entry) {
+		entry->read_proc = __ipipe_rd_trigger;
+		entry->write_proc = __ipipe_wr_trigger;
+	}
+
+	__ipipe_create_trace_proc_val(trace_dir, "pre_trace_points",
+	                              &pre_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "post_trace_points",
+	                              &post_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "back_trace_points",
+	                              &back_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "verbose",
+	                              &verbose_trace);
+	entry = __ipipe_create_trace_proc_val(trace_dir, "enable",
+					      &ipipe_trace_enable);
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
+	if (entry)
+		entry->write_proc = __ipipe_wr_enable;
+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
+}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index baa5c4a..aeb8cc1 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/ipipe.h>
 
 #include "internals.h"
 
@@ -505,7 +506,9 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 	irqreturn_t action_ret;
 
 	raw_spin_lock(&desc->lock);
+#ifndef CONFIG_IPIPE
 	mask_ack_irq(desc);
+#endif
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
@@ -572,6 +575,9 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 		goto out;
 	}
 
+	if (desc->status & IRQ_ONESHOT)
+		mask_irq(desc);
+
 	desc->status |= IRQ_INPROGRESS;
 	desc->status &= ~IRQ_PENDING;
 	raw_spin_unlock(&desc->lock);
@@ -582,8 +588,14 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 
 	raw_spin_lock(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
+#ifdef CONFIG_IPIPE
+	if (!(desc->status & IRQ_MASKED))
+		desc->irq_data.chip->irq_unmask(&desc->irq_data);
+out:
+#else
 out:
 	desc->irq_data.chip->irq_eoi(&desc->irq_data);
+#endif
 
 	raw_spin_unlock(&desc->lock);
 }
@@ -625,7 +637,9 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/* Start handling the irq */
+#ifndef CONFIG_IPIPE
 	desc->irq_data.chip->irq_ack(&desc->irq_data);
+#endif
 
 	/* Mark the IRQ currently in progress.*/
 	desc->status |= IRQ_INPROGRESS;
@@ -678,8 +692,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 
 	kstat_incr_irqs_this_cpu(irq, desc);
 
+#ifndef CONFIG_IPIPE
 	if (desc->irq_data.chip->irq_ack)
 		desc->irq_data.chip->irq_ack(&desc->irq_data);
+#endif /* CONFIG_IPIPE */
 
 	action_ret = handle_IRQ_event(irq, desc->action);
 	if (!noirqdebug)
@@ -689,6 +705,135 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 		desc->irq_data.chip->irq_eoi(&desc->irq_data);
 }
 
+#ifdef CONFIG_IPIPE
+
+void __ipipe_ack_simple_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void __ipipe_end_simple_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc)
+{
+	mask_ack_irq(desc);
+}
+
+void __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc)
+{
+	if (desc->irq_data.chip->irq_unmask)
+		desc->irq_data.chip->irq_unmask(&desc->irq_data);
+}
+
+void __ipipe_ack_fasteoi_irq(unsigned irq, struct irq_desc *desc)
+{
+	if (desc->irq_data.chip->irq_eoi)
+		desc->irq_data.chip->irq_eoi(&desc->irq_data);
+}
+
+void __ipipe_end_fasteoi_irq(unsigned irq, struct irq_desc *desc)
+{
+	/*
+	 * Non-requestable IRQs should not be masked in EOI handler.
+	 */
+	if (!(desc->status & IRQ_NOREQUEST))
+		desc->irq_data.chip->irq_unmask(&desc->irq_data);
+}
+
+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc)
+{
+	desc->irq_data.chip->irq_ack(&desc->irq_data);
+}
+
+void __ipipe_ack_percpu_irq(unsigned irq, struct irq_desc *desc)
+{
+	if (desc->irq_data.chip->irq_ack)
+		desc->irq_data.chip->irq_ack(&desc->irq_data);
+}
+
+void __ipipe_end_percpu_irq(unsigned irq, struct irq_desc *desc)
+{
+	if (desc->irq_data.chip->irq_eoi)
+		desc->irq_data.chip->irq_eoi(&desc->irq_data);
+}
+
+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void __ipipe_ack_bad_irq(unsigned irq, struct irq_desc *desc)
+{
+	static int done;
+
+	handle_bad_irq(irq, desc);
+
+	if (!done) {
+		printk(KERN_WARNING "%s: unknown flow handler for IRQ %d\n",
+		       __FUNCTION__, irq);
+		done = 1;
+	}
+}
+
+void __ipipe_noack_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void __ipipe_noend_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+irq_flow_handler_t
+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained)
+{
+	if (unlikely(handle == NULL)) {
+		desc->ipipe_ack = &__ipipe_ack_bad_irq;
+		desc->ipipe_end = &__ipipe_noend_irq;
+	} else {
+		if (is_chained) {
+			desc->ipipe_ack = handle;
+			desc->ipipe_end = &__ipipe_noend_irq;
+			handle = __ipipe_noack_irq;
+		} else if (handle == &handle_simple_irq) {
+			desc->ipipe_ack = &__ipipe_ack_simple_irq;
+			desc->ipipe_end = &__ipipe_end_simple_irq;
+		} else if (handle == &handle_level_irq) {
+			desc->ipipe_ack = &__ipipe_ack_level_irq;
+			desc->ipipe_end = &__ipipe_end_level_irq;
+		} else if (handle == &handle_edge_irq) {
+			desc->ipipe_ack = &__ipipe_ack_edge_irq;
+			desc->ipipe_end = &__ipipe_end_edge_irq;
+		} else if (handle == &handle_fasteoi_irq) {
+			desc->ipipe_ack = &__ipipe_ack_fasteoi_irq;
+			desc->ipipe_end = &__ipipe_end_fasteoi_irq;
+		} else if (handle == &handle_percpu_irq) {
+			desc->ipipe_ack = &__ipipe_ack_percpu_irq;
+			desc->ipipe_end = &__ipipe_end_percpu_irq;
+		} else if (get_irq_desc_chip(desc) == &no_irq_chip) {
+			desc->ipipe_ack = &__ipipe_noack_irq;
+			desc->ipipe_end = &__ipipe_noend_irq;
+		} else {
+			desc->ipipe_ack = &__ipipe_ack_bad_irq;
+			desc->ipipe_end = &__ipipe_noend_irq;
+		}
+	}
+
+	/* Suppress intermediate trampoline routine. */
+	ipipe_root_domain->irqs[desc->irq_data.irq].acknowledge = desc->ipipe_ack;
+
+	return handle;
+}
+
+#else /* !CONFIG_IPIPE */
+
+irq_flow_handler_t
+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained)
+{
+	return handle;
+}
+
+#endif /* !CONFIG_IPIPE */
+
 void
 __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		  const char *name)
@@ -720,6 +865,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	chip_bus_lock(desc);
 	raw_spin_lock_irqsave(&desc->lock, flags);
 
+	handle = __fixup_irq_handler(desc, handle, is_chained);
+
 	/* Uninstall? */
 	if (handle == handle_bad_irq) {
 		if (desc->irq_data.chip != &no_irq_chip)
@@ -731,7 +878,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	desc->name = name;
 
 	if (handle != handle_bad_irq && is_chained) {
-		desc->status &= ~IRQ_DISABLED;
+		desc->status &= ~(IRQ_DISABLED | IRQ_MASKED);
 		desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
 		desc->depth = 0;
 		desc->irq_data.chip->irq_startup(&desc->irq_data);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 2039bea..17bad02 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -269,10 +269,12 @@ int __init early_irq_init(void)
 	return arch_early_irq_init();
 }
 
+#ifndef CONFIG_IPIPE
 struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
 }
+#endif /* CONFIG_IPIPE */
 
 struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
 {
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 9033c1c..57fdf57 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -785,7 +785,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
 		if (!(desc->status & IRQ_NOAUTOEN)) {
 			desc->depth = 0;
-			desc->status &= ~IRQ_DISABLED;
+			desc->status &= ~(IRQ_DISABLED | IRQ_MASKED);
 			desc->irq_data.chip->irq_startup(&desc->irq_data);
 		} else
 			/* Undo nested disables: */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 0d2058d..1ac7e20 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2318,7 +2318,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
 	/* we'll do an OFF -> ON transition: */
 	curr->hardirqs_enabled = 1;
 
-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw()))
 		return;
 	if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
 		return;
@@ -2361,7 +2361,7 @@ void trace_hardirqs_off_caller(unsigned long ip)
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
 
-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw()))
 		return;
 
 	if (curr->hardirqs_enabled) {
@@ -2393,7 +2393,7 @@ void trace_softirqs_on(unsigned long ip)
 	if (unlikely(!debug_locks))
 		return;
 
-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw()))
 		return;
 
 	if (curr->softirqs_enabled) {
@@ -2427,7 +2427,7 @@ void trace_softirqs_off(unsigned long ip)
 	if (unlikely(!debug_locks))
 		return;
 
-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw()))
 		return;
 
 	if (curr->softirqs_enabled) {
diff --git a/kernel/panic.c b/kernel/panic.c
index 991bb87..7145489 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/nmi.h>
 #include <linux/dmi.h>
+#include <linux/ipipe_trace.h>
 
 #define PANIC_TIMER_STEP 100
 #define PANIC_BLINK_SPD 18
@@ -319,6 +320,8 @@ void oops_enter(void)
 {
 	tracing_off();
 	/* can't trust the integrity of the kernel anymore: */
+	ipipe_trace_panic_freeze();
+	ipipe_disable_context_check(ipipe_processor_id());
 	debug_locks_off();
 	do_oops_enter_exit();
 }
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 1832bd2..9f2a552 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -270,6 +270,7 @@ static int create_image(int platform_mode)
 		goto Enable_cpus;
 
 	local_irq_disable();
+	local_irq_disable_hw_cond();
 
 	error = sysdev_suspend(PMSG_FREEZE);
 	if (error) {
@@ -301,6 +302,7 @@ static int create_image(int platform_mode)
 	 */
 
  Enable_irqs:
+	local_irq_enable_hw_cond();
 	local_irq_enable();
 
  Enable_cpus:
@@ -401,6 +403,7 @@ static int resume_target_kernel(bool platform_mode)
 		goto Enable_cpus;
 
 	local_irq_disable();
+	local_irq_disable_hw_cond();
 
 	error = sysdev_suspend(PMSG_QUIESCE);
 	if (error)
@@ -432,6 +435,7 @@ static int resume_target_kernel(bool platform_mode)
 	sysdev_resume();
 
  Enable_irqs:
+	local_irq_enable_hw_cond();
 	local_irq_enable();
 
  Enable_cpus:
@@ -515,6 +519,7 @@ int hibernation_platform_enter(void)
 		goto Platform_finish;
 
 	local_irq_disable();
+	local_irq_disable_hw_cond();
 	sysdev_suspend(PMSG_HIBERNATE);
 	if (pm_wakeup_pending()) {
 		error = -EAGAIN;
diff --git a/kernel/printk.c b/kernel/printk.c
index 3623152..beb8497 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -614,6 +614,41 @@ static int have_callable_console(void)
 	return 0;
 }
 
+#ifdef CONFIG_IPIPE
+
+static IPIPE_DEFINE_SPINLOCK(__ipipe_printk_lock);
+
+static int __ipipe_printk_fill;
+
+static char __ipipe_printk_buf[__LOG_BUF_LEN];
+
+void __ipipe_flush_printk (unsigned virq, void *cookie)
+{
+	char *p = __ipipe_printk_buf;
+	int len, lmax, out = 0;
+	unsigned long flags;
+
+	goto start;
+
+	do {
+		spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+ start:
+		lmax = __ipipe_printk_fill;
+		while (out < lmax) {
+			len = strlen(p) + 1;
+			printk("%s",p);
+			p += len;
+			out += len;
+		}
+		spin_lock_irqsave(&__ipipe_printk_lock, flags);
+	}
+	while (__ipipe_printk_fill != lmax);
+
+	__ipipe_printk_fill = 0;
+
+	spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+}
+
 /**
  * printk - print a kernel message
  * @fmt: format string
@@ -638,6 +673,68 @@ static int have_callable_console(void)
 
 asmlinkage int printk(const char *fmt, ...)
 {
+	int r, fbytes, oldcount;
+	unsigned long flags;
+	int sprintk = 1;
+	int cs = -1;
+	va_list args;
+
+	va_start(args, fmt);
+
+	local_irq_save_hw(flags);
+
+	if (test_bit(IPIPE_SPRINTK_FLAG, &__ipipe_current_domain->flags) ||
+	    oops_in_progress)
+		cs = ipipe_disable_context_check(ipipe_processor_id());
+	else if (__ipipe_root_domain_p) {
+		struct ipipe_domain *dom;
+
+		list_for_each_entry(dom, &__ipipe_pipeline, p_link) {
+			if (dom == ipipe_root_domain)
+				break;
+			if (test_bit(IPIPE_STALL_FLAG,
+				     &ipipe_cpudom_var(dom, status))
+			    || raw_irqs_disabled_flags(flags)) {
+				sprintk = 0;
+				break;
+			}
+		}
+	} else
+		sprintk = 0;
+
+	local_irq_restore_hw(flags);
+
+	if (sprintk) {
+		r = vprintk(fmt, args);
+		if (cs != -1)
+			ipipe_restore_context_check(ipipe_processor_id(), cs);
+		goto out;
+	}
+
+	spin_lock_irqsave(&__ipipe_printk_lock, flags);
+
+	oldcount = __ipipe_printk_fill;
+	fbytes = __LOG_BUF_LEN - oldcount;
+
+	if (fbytes > 1)	{
+		r = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill,
+			       fbytes, fmt, args) + 1; /* account for the null byte */
+		__ipipe_printk_fill += r;
+	} else
+		r = 0;
+
+	spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+
+	if (oldcount == 0)
+		ipipe_trigger_irq(__ipipe_printk_virq);
+out:
+	va_end(args);
+
+	return r;
+}
+#else /* !CONFIG_IPIPE */
+asmlinkage int printk(const char *fmt, ...)
+{
 	va_list args;
 	int r;
 
@@ -655,6 +752,7 @@ asmlinkage int printk(const char *fmt, ...)
 
 	return r;
 }
+#endif /* CONFIG_IPIPE */
 
 /* cpu currently holding logbuf_lock */
 static volatile unsigned int printk_cpu = UINT_MAX;
@@ -841,7 +939,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 	 * Try to acquire and then immediately release the
 	 * console semaphore. The release will do all the
 	 * actual magic (print out buffers, wake up klogd,
-	 * etc). 
+	 * etc).
 	 *
 	 * The console_trylock_for_printk() function
 	 * will release 'logbuf_lock' regardless of whether it
@@ -1412,7 +1510,7 @@ EXPORT_SYMBOL(register_console);
 
 int unregister_console(struct console *console)
 {
-        struct console *a, *b;
+	struct console *a, *b;
 	int res = 1;
 
 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE
diff --git a/kernel/sched.c b/kernel/sched.c
index c164920c..3651908 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2423,7 +2423,8 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 
 	smp_wmb();
 	rq = task_rq_lock(p, &flags);
-	if (!(p->state & state))
+	if (!(p->state & state) ||
+	    (p->state & (TASK_NOWAKEUP|TASK_ATOMICSWITCH)))
 		goto out;
 
 	if (p->se.on_rq)
@@ -2898,13 +2899,15 @@ asmlinkage void schedule_tail(struct task_struct *prev)
 #endif
 	if (current->set_child_tid)
 		put_user(task_pid_vnr(current), current->set_child_tid);
+
+ 	ipipe_init_notify(current);
 }
 
 /*
  * context_switch - switch to the new MM and the new
  * thread's register state.
  */
-static inline void
+static inline int
 context_switch(struct rq *rq, struct task_struct *prev,
 	       struct task_struct *next)
 {
@@ -2946,12 +2949,23 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	switch_to(prev, next, prev);
 
 	barrier();
+
+#ifdef CONFIG_IPIPE_DELAYED_ATOMICSW
+	current->state &= ~TASK_ATOMICSWITCH;
+#else
+	prev->state &= ~TASK_ATOMICSWITCH;
+#endif
+	if (task_hijacked(prev))
+		return 1;
+
 	/*
 	 * this_rq must be evaluated again because prev may have moved
 	 * CPUs since it called schedule(), thus the 'rq' on its stack
 	 * frame will be invalid.
 	 */
 	finish_task_switch(this_rq(), prev);
+
+	return 0;
 }
 
 /*
@@ -3804,6 +3818,7 @@ notrace unsigned long get_parent_ip(unsigned long addr)
 
 void __kprobes add_preempt_count(int val)
 {
+ 	ipipe_check_context(ipipe_root_domain);
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Underflow?
@@ -3826,6 +3841,7 @@ EXPORT_SYMBOL(add_preempt_count);
 
 void __kprobes sub_preempt_count(int val)
 {
+ 	ipipe_check_context(ipipe_root_domain);
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Underflow?
@@ -3874,6 +3890,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
  */
 static inline void schedule_debug(struct task_struct *prev)
 {
+	ipipe_check_context(ipipe_root_domain);
 	/*
 	 * Test if we are atomic. Since do_exit() needs to call into
 	 * schedule() atomically, we ignore that path for now.
@@ -3931,7 +3948,7 @@ pick_next_task(struct rq *rq)
 /*
  * schedule() is the main scheduler function.
  */
-asmlinkage void __sched schedule(void)
+asmlinkage int __sched schedule(void)
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
@@ -3945,6 +3962,10 @@ need_resched:
 	rcu_note_context_switch(cpu);
 	prev = rq->curr;
 
+ 	if (unlikely(prev->state & TASK_ATOMICSWITCH))
+		/* Pop one disable level -- one still remains. */
+		preempt_enable();
+
 	release_kernel_lock(prev);
 need_resched_nonpreemptible:
 
@@ -3996,7 +4017,8 @@ need_resched_nonpreemptible:
 		rq->curr = next;
 		++*switch_count;
 
-		context_switch(rq, prev, next); /* unlocks the rq */
+ 		if (context_switch(rq, prev, next)) /* unlocks the rq */
+  			return 1; /* task hijacked by higher domain */
 		/*
 		 * The context switch have flipped the stack from under us
 		 * and restored the local variables which were saved when
@@ -4005,8 +4027,10 @@ need_resched_nonpreemptible:
 		 */
 		cpu = smp_processor_id();
 		rq = cpu_rq(cpu);
-	} else
+	} else {
+  		prev->state &= ~TASK_ATOMICSWITCH;
 		raw_spin_unlock_irq(&rq->lock);
+	}
 
 	post_schedule(rq);
 
@@ -4016,6 +4040,8 @@ need_resched_nonpreemptible:
 	preempt_enable_no_resched();
 	if (need_resched())
 		goto need_resched;
+
+	return 0;
 }
 EXPORT_SYMBOL(schedule);
 
@@ -4107,7 +4133,8 @@ asmlinkage void __sched notrace preempt_schedule(void)
 
 	do {
 		add_preempt_count_notrace(PREEMPT_ACTIVE);
-		schedule();
+		if (schedule())
+			return;
 		sub_preempt_count_notrace(PREEMPT_ACTIVE);
 
 		/*
@@ -4900,6 +4927,7 @@ recheck:
 	oldprio = p->prio;
 	prev_class = p->sched_class;
 	__setscheduler(rq, p, policy, param->sched_priority);
+  	ipipe_setsched_notify(p);
 
 	if (running)
 		p->sched_class->set_curr_task(rq);
@@ -5568,6 +5596,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 #else
 	task_thread_info(idle)->preempt_count = 0;
 #endif
+	ipipe_check_context(ipipe_root_domain);
 	/*
 	 * The idle tasks have their own, simple scheduling class:
 	 */
@@ -9255,3 +9284,63 @@ struct cgroup_subsys cpuacct_subsys = {
 };
 #endif	/* CONFIG_CGROUP_CPUACCT */
 
+
+#ifdef CONFIG_IPIPE
+
+int ipipe_setscheduler_root(struct task_struct *p, int policy, int prio)
+{
+	const struct sched_class *prev_class;
+	int oldprio, on_rq, running;
+	unsigned long flags;
+	struct rq *rq;
+
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	rq = __task_rq_lock(p);
+	on_rq = p->se.on_rq;
+	running = task_current(rq, p);
+	if (on_rq)
+		deactivate_task(rq, p, 0);
+	if (running)
+		p->sched_class->put_prev_task(rq, p);
+
+	p->sched_reset_on_fork = 0;
+
+	oldprio = p->prio;
+	prev_class = p->sched_class;
+	__setscheduler(rq, p, policy, prio);
+	ipipe_setsched_notify(p);
+
+	if (running)
+		p->sched_class->set_curr_task(rq);
+	if (on_rq) {
+		activate_task(rq, p, 0);
+		check_class_changed(rq, p, prev_class, oldprio, running);
+	}
+	__task_rq_unlock(rq);
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+	rt_mutex_adjust_pi(p);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipipe_setscheduler_root);
+
+int ipipe_reenter_root(struct task_struct *prev, int policy, int prio)
+{
+	struct rq *rq = this_rq();
+
+	finish_task_switch(rq, prev);
+
+	post_schedule(rq);
+
+	reacquire_kernel_lock(current);
+	preempt_enable_no_resched();
+
+	if (current->policy != policy || current->rt_priority != prio)
+		return ipipe_setscheduler_root(current, policy, prio);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipipe_reenter_root);
+
+#endif /* CONFIG_IPIPE */
diff --git a/kernel/signal.c b/kernel/signal.c
index bf11d269..9e907e1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -558,6 +558,7 @@ void signal_wake_up(struct task_struct *t, int resume)
 	unsigned int mask;
 
 	set_tsk_thread_flag(t, TIF_SIGPENDING);
+	ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */
 
 	/*
 	 * For SIGKILL, we want to wake it up in the stopped/traced/killable
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index be6517f..862aed4 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -26,7 +26,9 @@
  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
+#if !defined(CONFIG_GENERIC_LOCKBREAK) ||			\
+	defined(CONFIG_DEBUG_LOCK_ALLOC) ||			\
+	defined(CONFIG_IPIPE)
 /*
  * The __lock_function inlines are taken from
  * include/linux/spinlock_api_smp.h
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index ed228ef..33cf47d 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -73,7 +73,7 @@ static void tick_periodic(int cpu)
 		write_sequnlock(&xtime_lock);
 	}
 
-	update_process_times(user_mode(get_irq_regs()));
+	update_root_process_times(get_irq_regs());
 	profile_tick(CPU_PROFILING);
 }
 
@@ -181,6 +181,10 @@ static void tick_setup_device(struct tick_device *td,
 
 	td->evtdev = newdev;
 
+	/* I-pipe: derive global tick IRQ from CPU 0 */
+	if (cpu == 0)
+		ipipe_update_tick_evtdev(newdev);
+
 	/*
 	 * When the device is not per cpu, pin the interrupt to the
 	 * current cpu:
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c55ea24..bd9c203 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -598,7 +598,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
 		ts->idle_jiffies++;
 	}
 
-	update_process_times(user_mode(regs));
+	update_root_process_times(regs);
 	profile_tick(CPU_PROFILING);
 
 	while (tick_nohz_reprogram(ts, now)) {
@@ -757,7 +757,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
 			touch_softlockup_watchdog();
 			ts->idle_jiffies++;
 		}
-		update_process_times(user_mode(regs));
+		update_root_process_times(regs);
 		profile_tick(CPU_PROFILING);
 	}
 
diff --git a/kernel/timer.c b/kernel/timer.c
index d645992..240bb42 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1273,6 +1273,25 @@ void update_process_times(int user_tick)
 	run_posix_cpu_timers(p);
 }
 
+#ifdef CONFIG_IPIPE
+
+void update_root_process_times(struct pt_regs *regs)
+{
+	int cpu, user_tick = user_mode(regs);
+
+	if (__ipipe_root_tick_p(regs)) {
+		update_process_times(user_tick);
+		return;
+	}
+
+	run_local_timers();
+	cpu = smp_processor_id();
+	rcu_check_callbacks(cpu, user_tick);
+	run_posix_cpu_timers(current);
+}
+
+#endif
+
 /*
  * This function runs timers and the timer-tq in bottom half context.
  */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9803b68..bb3ab19 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -29,6 +29,7 @@
 #include <linux/list.h>
 #include <linux/hash.h>
 #include <linux/rcupdate.h>
+#include <linux/ipipe.h>
 
 #include <trace/events/sched.h>
 
@@ -1156,6 +1157,9 @@ static int __ftrace_modify_code(void *data)
 
 static void ftrace_run_update_code(int command)
 {
+#ifdef CONFIG_IPIPE
+	unsigned long flags;
+#endif /* CONFIG_IPIPE */
 	int ret;
 
 	ret = ftrace_arch_code_modify_prepare();
@@ -1163,7 +1167,13 @@ static void ftrace_run_update_code(int command)
 	if (ret)
 		return;
 
+#ifdef CONFIG_IPIPE
+	flags = ipipe_critical_enter(NULL);
+	__ftrace_modify_code(&command);
+	ipipe_critical_exit(flags);
+#else  /* !CONFIG_IPIPE */
 	stop_machine(__ftrace_modify_code, &command, NULL);
+#endif /* !CONFIG_IPIPE */
 
 	ret = ftrace_arch_code_modify_post_process();
 	FTRACE_WARN_ON(ret);
@@ -2724,9 +2734,9 @@ static int ftrace_process_locs(struct module *mod,
 	}
 
 	/* disable interrupts to prevent kstop machine */
-	local_irq_save(flags);
+	local_irq_save_hw_notrace(flags);
 	ftrace_update_code(mod);
-	local_irq_restore(flags);
+	local_irq_restore_hw_notrace(flags);
 	mutex_unlock(&ftrace_lock);
 
 	return 0;
@@ -2805,9 +2815,9 @@ void __init ftrace_init(void)
 	/* Keep the ftrace pointer to the stub */
 	addr = (unsigned long)ftrace_stub;
 
-	local_irq_save(flags);
+	local_irq_save_hw_notrace(flags);
 	ftrace_dyn_arch_init(&addr);
-	local_irq_restore(flags);
+	local_irq_restore_hw_notrace(flags);
 
 	/* ftrace_dyn_arch_init places the return code in addr */
 	if (addr)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d1b5951..0cf26e4 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -136,6 +136,8 @@ config DEBUG_SECTION_MISMATCH
 	  - Enable verbose reporting from modpost to help solving
 	    the section mismatches reported.
 
+source "kernel/ipipe/Kconfig.debug"
+
 config DEBUG_KERNEL
 	bool "Kernel debugging"
 	help
diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c
index 9681d54..2dba50c 100644
--- a/lib/bust_spinlocks.c
+++ b/lib/bust_spinlocks.c
@@ -13,6 +13,7 @@
 #include <linux/wait.h>
 #include <linux/vt_kern.h>
 #include <linux/console.h>
+#include <linux/ipipe_trace.h>
 
 
 void __attribute__((weak)) bust_spinlocks(int yes)
@@ -24,6 +25,7 @@ void __attribute__((weak)) bust_spinlocks(int yes)
 		unblank_screen();
 #endif
 		console_unblank();
+  		ipipe_trace_panic_dump();
 		if (--oops_in_progress == 0)
 			wake_up_klogd();
 	}
diff --git a/lib/ioremap.c b/lib/ioremap.c
index da4e2ad..729c995 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -86,8 +86,8 @@ int ioremap_page_range(unsigned long addr,
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
-
-	flush_cache_vmap(start, end);
+	__ipipe_pin_range_globally(start, end);
+ 	flush_cache_vmap(start, end);
 
 	return err;
 }
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 4689cb0..3d12764 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -12,10 +12,13 @@ notrace unsigned int debug_smp_processor_id(void)
 	unsigned long preempt_count = preempt_count();
 	int this_cpu = raw_smp_processor_id();
 
+	if (!ipipe_root_domain_p)
+		goto out;
+
 	if (likely(preempt_count))
 		goto out;
 
-	if (irqs_disabled())
+	if (irqs_disabled() || irqs_disabled_hw())
 		goto out;
 
 	/*
diff --git a/mm/memory.c b/mm/memory.c
index c8fff70..18a59619 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -647,6 +647,32 @@ out:
 	return pfn_to_page(pfn);
 }
 
+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
+{
+	/*
+	 * If the source page was a PFN mapping, we don't have
+	 * a "struct page" for it. We do a best-effort copy by
+	 * just copying from the original user address. If that
+	 * fails, we just zero-fill it. Live with it.
+	 */
+	if (unlikely(!src)) {
+		void *kaddr = kmap_atomic(dst, KM_USER0);
+		void __user *uaddr = (void __user *)(va & PAGE_MASK);
+
+		/*
+		 * This really shouldn't fail, because the page is there
+		 * in the page tables. But it might just be unreadable,
+		 * in which case we just give up and fill the result with
+		 * zeroes.
+		 */
+		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
+			clear_page(kaddr);
+		kunmap_atomic(kaddr, KM_USER0);
+		flush_dcache_page(dst);
+	} else
+		copy_user_highpage(dst, src, va, vma);
+}
+
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
  * already present in the new task to be cleared in the whole range
@@ -655,8 +681,8 @@ out:
 
 static inline unsigned long
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-		unsigned long addr, int *rss)
+	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+	     unsigned long addr, int *rss, struct page *uncow_page)
 {
 	unsigned long vm_flags = vma->vm_flags;
 	pte_t pte = *src_pte;
@@ -699,6 +725,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	 * in the parent and the child
 	 */
 	if (is_cow_mapping(vm_flags)) {
+#ifdef CONFIG_IPIPE
+		if (uncow_page) {
+			struct page *old_page = vm_normal_page(vma, addr, pte);
+			cow_user_page(uncow_page, old_page, addr, vma);
+			pte = mk_pte(uncow_page, vma->vm_page_prot);
+
+			if (vm_flags & VM_SHARED)
+				pte = pte_mkclean(pte);
+			pte = pte_mkold(pte);
+
+			page_add_new_anon_rmap(uncow_page, vma, addr);
+			rss[!!PageAnon(uncow_page)]++;
+			goto out_set_pte;
+		}
+#endif /* CONFIG_IPIPE */
 		ptep_set_wrprotect(src_mm, addr, src_pte);
 		pte = pte_wrprotect(pte);
 	}
@@ -736,13 +777,27 @@ int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	int progress = 0;
 	int rss[NR_MM_COUNTERS];
 	swp_entry_t entry = (swp_entry_t){0};
-
+	struct page *uncow_page = NULL;
+#ifdef CONFIG_IPIPE
+	int do_cow_break = 0;
+again:
+ 	if (do_cow_break) {
+ 		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+		if (uncow_page == NULL)
+ 			return -ENOMEM;
+		do_cow_break = 0;
+	}
+#else
 again:
+#endif
 	init_rss_vec(rss);
 
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
-	if (!dst_pte)
+	if (!dst_pte) {
+		if (uncow_page)
+			page_cache_release(uncow_page);
 		return -ENOMEM;
+	}
 	src_pte = pte_offset_map(src_pmd, addr);
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -765,8 +820,25 @@ again:
 			progress++;
 			continue;
 		}
+#ifdef CONFIG_IPIPE
+		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
+			if (is_cow_mapping(vma->vm_flags) &&
+			    test_bit(MMF_VM_PINNED, &src_mm->flags) &&
+			    ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
+				arch_leave_lazy_mmu_mode();
+				spin_unlock(src_ptl);
+				pte_unmap(src_pte);
+				add_mm_rss_vec(dst_mm, rss);
+				pte_unmap_unlock(dst_pte, dst_ptl);
+				cond_resched();
+				do_cow_break = 1;
+				goto again;
+			}
+		}
+#endif
 		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
-							vma, addr, rss);
+					 vma, addr, rss, uncow_page);
+		uncow_page = NULL;
 		if (entry.val)
 			break;
 		progress += 8;
@@ -2143,32 +2215,6 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
 	return same;
 }
 
-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
-{
-	/*
-	 * If the source page was a PFN mapping, we don't have
-	 * a "struct page" for it. We do a best-effort copy by
-	 * just copying from the original user address. If that
-	 * fails, we just zero-fill it. Live with it.
-	 */
-	if (unlikely(!src)) {
-		void *kaddr = kmap_atomic(dst, KM_USER0);
-		void __user *uaddr = (void __user *)(va & PAGE_MASK);
-
-		/*
-		 * This really shouldn't fail, because the page is there
-		 * in the page tables. But it might just be unreadable,
-		 * in which case we just give up and fill the result with
-		 * zeroes.
-		 */
-		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
-			clear_page(kaddr);
-		kunmap_atomic(kaddr, KM_USER0);
-		flush_dcache_page(dst);
-	} else
-		copy_user_highpage(dst, src, va, vma);
-}
-
 /*
  * This routine handles present pages, when users try to write
  * to a shared page. It is done by copying the page to a new address
@@ -3795,3 +3841,118 @@ void copy_user_huge_page(struct page *dst, struct page *src,
 	}
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+
+#ifdef CONFIG_IPIPE
+
+static inline int ipipe_pin_pte_range(struct mm_struct *mm, pmd_t *pmd,
+				      struct vm_area_struct *vma,
+				      unsigned long addr, unsigned long end)
+{
+	spinlock_t *ptl;
+	pte_t *pte;
+
+	do {
+		pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+		if (!pte)
+			continue;
+
+		if (!pte_present(*pte) || pte_write(*pte)) {
+			pte_unmap_unlock(pte, ptl);
+			continue;
+		}
+
+		if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) == VM_FAULT_OOM)
+			return -ENOMEM;
+	} while (addr += PAGE_SIZE, addr != end);
+	return 0;
+}
+
+static inline int ipipe_pin_pmd_range(struct mm_struct *mm, pud_t *pud,
+				      struct vm_area_struct *vma,
+				      unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	pmd_t *pmd;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		if (ipipe_pin_pte_range(mm, pmd, vma, addr, next))
+			return -ENOMEM;
+	} while (pmd++, addr = next, addr != end);
+	return 0;
+}
+
+static inline int ipipe_pin_pud_range(struct mm_struct *mm, pgd_t *pgd,
+				      struct vm_area_struct *vma,
+				      unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	pud_t *pud;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		if (ipipe_pin_pmd_range(mm, pud, vma, addr, next))
+			return -ENOMEM;
+	} while (pud++, addr = next, addr != end);
+	return 0;
+}
+
+int ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	unsigned long addr, next, end;
+	pgd_t *pgd;
+
+	addr = vma->vm_start;
+	end = vma->vm_end;
+
+	pgd = pgd_offset(mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		if (ipipe_pin_pud_range(mm, pgd, vma, addr, next))
+			return -ENOMEM;
+	} while (pgd++, addr = next, addr != end);
+
+	return 0;
+}
+
+int ipipe_disable_ondemand_mappings(struct task_struct *tsk)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	int result = 0;
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		return -EPERM;
+
+	down_write(&mm->mmap_sem);
+	if (test_bit(MMF_VM_PINNED, &mm->flags))
+		goto done_mm;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (is_cow_mapping(vma->vm_flags) &&
+		    (vma->vm_flags & VM_WRITE)) {
+			result = ipipe_pin_vma(mm, vma);
+			if (result < 0)
+				goto done_mm;
+		}
+	}
+	set_bit(MMF_VM_PINNED, &mm->flags);
+
+  done_mm:
+	up_write(&mm->mmap_sem);
+	mmput(mm);
+	return result;
+}
+
+EXPORT_SYMBOL(ipipe_disable_ondemand_mappings);
+
+#endif
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 9e82e93..a4bd34d 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -24,15 +24,18 @@ void use_mm(struct mm_struct *mm)
 {
 	struct mm_struct *active_mm;
 	struct task_struct *tsk = current;
+	unsigned long flags;
 
 	task_lock(tsk);
 	active_mm = tsk->active_mm;
+ 	ipipe_mm_switch_protect(flags);
 	if (active_mm != mm) {
 		atomic_inc(&mm->mm_count);
 		tsk->active_mm = mm;
 	}
 	tsk->mm = mm;
-	switch_mm(active_mm, mm, tsk);
+	__switch_mm(active_mm, mm, tsk);
+ 	ipipe_mm_switch_unprotect(flags);
 	task_unlock(tsk);
 
 	if (active_mm != mm)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 5a688a2..af5da94 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -147,6 +147,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long oldflags = vma->vm_flags;
+	unsigned long protflags;
 	long nrpages = (end - start) >> PAGE_SHIFT;
 	unsigned long charged = 0;
 	pgoff_t pgoff;
@@ -205,8 +206,17 @@ success:
 	 * held in write mode.
 	 */
 	vma->vm_flags = newflags;
+	protflags = newflags;
+#ifdef CONFIG_IPIPE
+	/*
+	 * Enforce non-COW vm_page_prot by faking VM_SHARED on locked regions.
+	 */
+	if (test_bit(MMF_VM_PINNED, &mm->flags) &&
+	    ((vma->vm_flags | mm->def_flags) & VM_LOCKED))
+		protflags |= VM_SHARED;
+#endif
 	vma->vm_page_prot = pgprot_modify(vma->vm_page_prot,
-					  vm_get_page_prot(newflags));
+					  vm_get_page_prot(protflags));
 
 	if (vma_wants_writenotify(vma)) {
 		vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED);
@@ -214,6 +224,24 @@ success:
 	}
 
 	mmu_notifier_invalidate_range_start(mm, start, end);
+#ifdef CONFIG_IPIPE
+	/*
+	 * Privatize potential COW pages
+	 */
+	if (test_bit(MMF_VM_PINNED, &mm->flags) &&
+	    (((vma->vm_flags | mm->def_flags) & (VM_LOCKED | VM_WRITE)) ==
+	     (VM_LOCKED | VM_WRITE))) {
+		error = ipipe_pin_vma(mm, vma);
+		if (error)
+			/*
+			 * OOM. Just revert the fake VM_SHARED so that the
+			 * zero page cannot be overwritten.
+			 */
+			vma->vm_page_prot =
+				pgprot_modify(vma->vm_page_prot,
+					      vm_get_page_prot(newflags));
+	}
+#endif
 	if (is_vm_hugetlb_page(vma))
 		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
 	else
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f9b1667..c2a5ed2 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -171,6 +171,8 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
 			return err;
 	} while (pgd++, addr = next, addr != end);
 
+ 	__ipipe_pin_range_globally(start, end);
+ 
 	return nr;
 }
 
