diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9beee7f364ad..bf8daf92bde0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -9,6 +9,7 @@ config 64BIT
 config X86_32
 	def_bool y
 	depends on !64BIT
+	select IPIPE_WANT_CLOCKSOURCE if IPIPE
 
 config X86_64
 	def_bool y
@@ -23,6 +24,10 @@ config X86
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_DISCARD_MEMBLOCK
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select IPIPE_HAVE_HOSTRT if IPIPE
+	select IPIPE_HAVE_VM_NOTIFIER if IPIPE
+	select IPIPE_HAVE_SAFE_THREAD_INFO if X86_64
+	select IPIPE_WANT_PTE_PINNING if IPIPE
 	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
@@ -78,7 +83,7 @@ config X86
 	select HAVE_AOUT			if X86_32
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_HUGE_VMAP		if X86_64 || X86_PAE
-	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_JUMP_LABEL		if !IPIPE
 	select HAVE_ARCH_KASAN			if X86_64 && SPARSEMEM_VMEMMAP
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_KMEMCHECK
@@ -91,7 +96,7 @@ config X86
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
-	select HAVE_CONTEXT_TRACKING		if X86_64
+	select HAVE_CONTEXT_TRACKING		if X86_64 && !IPIPE
 	select HAVE_COPY_THREAD_TLS
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DEBUG_KMEMLEAK
@@ -680,6 +685,7 @@ if HYPERVISOR_GUEST
 
 config PARAVIRT
 	bool "Enable paravirtualization code"
+	depends on !IPIPE
 	---help---
 	  This changes the kernel so it can modify itself when it is run
 	  under a hypervisor, potentially improving performance significantly
@@ -912,6 +918,8 @@ config SCHED_MC
 
 source "kernel/Kconfig.preempt"
 
+source "kernel/ipipe/Kconfig"
+
 config UP_LATE_INIT
        def_bool y
        depends on !SMP && X86_LOCAL_APIC
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 071582a3b5c0..0857250aeb11 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -16,6 +16,7 @@
 #include <linux/tracehook.h>
 #include <linux/audit.h>
 #include <linux/seccomp.h>
+#include <linux/unistd.h>
 #include <linux/signal.h>
 #include <linux/export.h>
 #include <linux/context_tracking.h>
@@ -32,6 +33,22 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
 
+#ifdef CONFIG_IPIPE
+#define disable_local_irqs()	do {	\
+	hard_local_irq_disable();	\
+	trace_hardirqs_off();		\
+} while (0)
+#define enable_local_irqs()	do {	\
+	trace_hardirqs_on();		\
+	hard_local_irq_enable();	\
+} while (0)
+#define check_irqs_disabled()	hard_irqs_disabled()
+#else
+#define disable_local_irqs()	local_irq_disable()
+#define enable_local_irqs()	local_irq_enable()
+#define check_irqs_disabled()	irqs_disabled()
+#endif
+
 static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
 {
 	unsigned long top_of_stack =
@@ -236,7 +253,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
 	 */
 	while (true) {
 		/* We have work to do. */
-		local_irq_enable();
+		enable_local_irqs();
 
 		if (cached_flags & _TIF_NEED_RESCHED)
 			schedule();
@@ -257,13 +274,12 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
 			fire_user_return_notifiers();
 
 		/* Disable IRQs and retry */
-		local_irq_disable();
+		disable_local_irqs();
 
 		cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags);
 
 		if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
 			break;
-
 	}
 }
 
@@ -273,8 +289,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	struct thread_info *ti = pt_regs_to_thread_info(regs);
 	u32 cached_flags;
 
-	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
-		local_irq_disable();
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!check_irqs_disabled()))
+		disable_local_irqs();
 
 	lockdep_sys_exit();
 
@@ -335,21 +351,57 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
 	CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
 
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
-	    WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
-		local_irq_enable();
+	    WARN(check_irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
+		enable_local_irqs();
 
 	/*
 	 * First do one-time work.  If these work items are enabled, we
 	 * want to run them exactly once per syscall exit with IRQs on.
 	 */
-	if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
-		syscall_slow_exit_work(regs, cached_flags);
+	if (unlikely((!IS_ENABLED(CONFIG_IPIPE) ||
+		      syscall_get_nr(current, regs) < NR_syscalls) &&
+		     (cached_flags & SYSCALL_EXIT_WORK_FLAGS)))
+	    syscall_slow_exit_work(regs, cached_flags);
 
-	local_irq_disable();
+	disable_local_irqs();
 	prepare_exit_to_usermode(regs);
 }
 
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+
+#ifdef CONFIG_IPIPE
+#ifdef CONFIG_X86_32
+static inline int pipeline_syscall(struct thread_info *ti,
+				   unsigned long nr, struct pt_regs *regs)
+{
+	return ipipe_handle_syscall(ti, nr, regs);
+}
+#else
+static inline int pipeline_syscall(struct thread_info *ti,
+				   unsigned long nr, struct pt_regs *regs)
+{
+	struct pt_regs regs64 = *regs;
+	int ret;
+
+	regs64.di = (unsigned int)regs->bx;
+	regs64.si = (unsigned int)regs->cx;
+	regs64.r10 = (unsigned int)regs->si;
+	regs64.r8 = (unsigned int)regs->di;
+	regs64.r9 = (unsigned int)regs->bp;
+	ret = ipipe_handle_syscall(ti, nr, &regs64);
+	regs->ax = (unsigned int)regs64.ax;
+
+	return ret;
+}
+#endif /* CONFIG_X86_32 */
+#else  /* CONFIG_IPIPE */
+static inline int pipeline_syscall(struct thread_info *ti,
+				   unsigned long nr, struct pt_regs *regs)
+{
+	return 0;
+}
+#endif /* CONFIG_IPIPE */
+
 /*
  * Does a 32-bit syscall.  Called with IRQs on and does all entry and
  * exit work and returns with IRQs off.  This function is extremely hot
@@ -367,11 +419,20 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 {
 	struct thread_info *ti = pt_regs_to_thread_info(regs);
 	unsigned int nr = (unsigned int)regs->orig_ax;
+	int ret;
 
 #ifdef CONFIG_IA32_EMULATION
 	ti->status |= TS_COMPAT;
 #endif
 
+	ret = pipeline_syscall(ti, nr, regs);
+	if (ret > 0) {
+		disable_local_irqs();
+		return;
+	}
+	if (ret < 0)
+		goto done;
+
 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
 		/*
 		 * Subtlety here: if ptrace pokes something larger than
@@ -395,7 +456,7 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 			(unsigned int)regs->dx, (unsigned int)regs->si,
 			(unsigned int)regs->di, (unsigned int)regs->bp);
 	}
-
+done:
 	syscall_return_slowpath(regs);
 }
 
@@ -403,7 +464,7 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 /* Handles INT80 on 64-bit kernels */
 __visible void do_syscall_32_irqs_off(struct pt_regs *regs)
 {
-	local_irq_enable();
+	enable_local_irqs();
 	do_syscall_32_irqs_on(regs);
 }
 #endif
@@ -431,7 +492,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
 	 *
 	 * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
 	 */
-	local_irq_enable();
+	enable_local_irqs();
 	if (
 #ifdef CONFIG_X86_64
 		/*
@@ -447,7 +508,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
 		) {
 
 		/* User code screwed up. */
-		local_irq_disable();
+		disable_local_irqs();
 		regs->ax = -EFAULT;
 #ifdef CONFIG_CONTEXT_TRACKING
 		enter_from_user_mode();
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 49a8c9f7a379..a36d74620baa 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -32,6 +32,7 @@
 #include <linux/err.h>
 #include <asm/thread_info.h>
 #include <asm/irqflags.h>
+#include <asm/ipipe_base.h>
 #include <asm/errno.h>
 #include <asm/segment.h>
 #include <asm/smp.h>
@@ -61,6 +62,12 @@
  * enough to patch inline, increasing performance.
  */
 
+#ifdef CONFIG_IPIPE
+#define PREEMPT_SCHEDULE_IRQ	call __ipipe_preempt_schedule_irq
+#else /* !CONFIG_IPIPE */
+#define PREEMPT_SCHEDULE_IRQ	call preempt_schedule_irq
+#endif /* CONFIG_IPIPE */
+
 #ifdef CONFIG_PREEMPT
 # define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
@@ -206,6 +213,7 @@
 .endm
 
 ENTRY(ret_from_fork)
+	HARD_COND_ENABLE_INTERRUPTS
 	pushl	%eax
 	call	schedule_tail
 	GET_THREAD_INFO(%ebp)
@@ -252,7 +260,7 @@ ENDPROC(ret_from_kernel_thread)
 	ALIGN
 ret_from_exception:
 	preempt_stop(CLBR_ANY)
-ret_from_intr:
+ENTRY(ret_from_intr)
 	GET_THREAD_INFO(%ebp)
 #ifdef CONFIG_VM86
 	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS and CS
@@ -284,7 +292,7 @@ need_resched:
 	jnz	restore_all
 	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)	# interrupts off (exception path) ?
 	jz	restore_all
-	call	preempt_schedule_irq
+	PREEMPT_SCHEDULE_IRQ
 	jmp	need_resched
 END(resume_kernel)
 #endif
@@ -481,6 +489,75 @@ ENTRY(irq_entries_start)
     .endr
 END(irq_entries_start)
 
+#ifdef CONFIG_IPIPE
+
+	.p2align CONFIG_X86_L1_CACHE_SHIFT
+
+.Lunwind_root_irq:
+	DISABLE_INTERRUPTS(CLBR_ANY)
+	ret	/* back to __ipipe_do_IRQ() */
+
+ENTRY(do_root_irq)
+	pushfl
+	orl	$X86_EFLAGS_IF, (%esp)
+	pushl	%cs
+	pushl	$.Lunwind_root_irq
+	pushl	PT_ORIG_EAX(%eax)
+	mov	%edx, %ecx
+	SAVE_ALL
+	pushl	%eax
+	call	*%ecx
+	addl	$4, %esp
+	jmp	ret_from_intr
+END(do_root_irq)
+	
+common_interrupt:
+	ASM_CLAC;			\
+	addl $-0x80,(%esp)	/* Adjust vector into the [-256,-1] range */
+	SAVE_ALL
+	movl %esp, %eax
+	call __ipipe_handle_irq
+	testl %eax,%eax
+	jnz  ret_from_intr
+	jmp restore_nocheck
+ENDPROC(common_interrupt)
+
+	.p2align CONFIG_X86_L1_CACHE_SHIFT
+
+	.pushsection .kprobes.text, "ax"
+#define BUILD_INTERRUPT2(name, nr)	\
+ENTRY(name)				\
+	ASM_CLAC;			\
+	pushl $~(nr);			\
+	SAVE_ALL;			\
+	movl %esp, %eax;		\
+	call __ipipe_handle_irq;	\
+	testl %eax,%eax;		\
+	jnz  ret_from_intr;		\
+	jmp restore_nocheck;		\
+ENDPROC(name)
+
+/*
+ * We never call the handler directly, we have to go through the
+ * pipeline instead, which sets the proper routing in
+ * __ipipe_enable_pipeline(), so we may ignore the function arg.
+ */
+#define BUILD_INTERRUPT3(name, nr, fn)	\
+	BUILD_INTERRUPT2(name, nr)
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	/*
+	 * The pipeline-specific vectors are not traced by the regular tracing
+	 * infrastructure (CONFIG_IPIPE_TRACE allows this though).
+	 */
+	BUILD_INTERRUPT2(ipipe_hrtimer_interrupt, IPIPE_HRTIMER_VECTOR)
+#ifdef CONFIG_SMP	
+	BUILD_INTERRUPT2(ipipe_reschedule_interrupt, IPIPE_RESCHEDULE_VECTOR)
+	BUILD_INTERRUPT2(ipipe_critical_interrupt, IPIPE_CRITICAL_VECTOR)
+#endif
+#endif
+
+#else /* !CONFIG_IPIPE */
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
@@ -507,6 +584,7 @@ ENTRY(name)				\
 	jmp	ret_from_intr;		\
 ENDPROC(name)
 
+#endif /* !CONFIG_IPIPE */
 
 #ifdef CONFIG_TRACING
 # define TRACE_BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
@@ -939,9 +1017,15 @@ error_code:
 	movl	$(__USER_DS), %ecx
 	movl	%ecx, %ds
 	movl	%ecx, %es
+#ifndef CONFIG_IPIPE
 	TRACE_IRQS_OFF
+#endif	
 	movl	%esp, %eax			# pt_regs pointer
 	CALL_NOSPEC %edi
+#ifdef CONFIG_IPIPE
+	testl %eax,%eax
+	jnz restore_nocheck
+#endif	
 	jmp	ret_from_exception
 END(page_fault)
 
@@ -976,7 +1060,9 @@ ENTRY(debug)
 debug_stack_correct:
 	pushl	$-1				# mark this as an int
 	SAVE_ALL
+#ifndef CONFIG_IPIPE	
 	TRACE_IRQS_OFF
+#endif	
 	xorl	%edx, %edx			# error code 0
 	movl	%esp, %eax			# pt_regs pointer
 	call	do_debug
@@ -1063,7 +1149,9 @@ ENTRY(int3)
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
 	SAVE_ALL
+#ifndef CONFIG_IPIPE
 	TRACE_IRQS_OFF
+#endif
 	xorl	%edx, %edx			# zero error code
 	movl	%esp, %eax			# pt_regs pointer
 	call	do_int3
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 375ed605c83d..aea4e782ecd7 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -28,6 +28,7 @@
 #include <asm/unistd.h>
 #include <asm/thread_info.h>
 #include <asm/hw_irq.h>
+#include <asm/ipipe_base.h>
 #include <asm/page_types.h>
 #include <asm/irqflags.h>
 #include <asm/paravirt.h>
@@ -45,6 +46,22 @@
 #define __AUDIT_ARCH_64BIT			0x80000000
 #define __AUDIT_ARCH_LE				0x40000000
 
+#ifdef CONFIG_IPIPE
+#define PREEMPT_SCHEDULE_IRQ	call __ipipe_preempt_schedule_irq
+#define machine_check_vector	__ipipe_machine_check_vector
+#else /* !CONFIG_IPIPE */
+#define PREEMPT_SCHEDULE_IRQ	call preempt_schedule_irq
+#endif /* !CONFIG_IPIPE */
+
+.macro test_syscall_nr, reg
+#if __SYSCALL_MASK == ~0
+	cmpq $NR_syscalls,%r\reg
+#else
+	andl $__SYSCALL_MASK,%e\reg
+	cmpl $NR_syscalls,%e\reg
+#endif
+.endm
+
 .code64
 .section .entry.text, "ax"
 
@@ -75,7 +92,8 @@ ENDPROC(native_usergs_sysret64)
  * make sure the stack pointer does not get reset back to the top
  * of the debug stack, and instead just reuses the current stack.
  */
-#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
+#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) \
+	&& !defined(CONFIG_IPIPE)
 
 .macro TRACE_IRQS_OFF_DEBUG
 	call	debug_stack_set_zero
@@ -173,19 +191,65 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
 	pushq	%r10				/* pt_regs->r10 */
 	pushq	%r11				/* pt_regs->r11 */
 	sub	$(6*8), %rsp			/* pt_regs->bp, bx, r12-15 not saved */
+#ifdef CONFIG_IPIPE
+#ifndef CONFIG_IPIPE_LEGACY
+	test_syscall_nr	ax
+	jb	slow_path
+	testl	$_TIP_HEAD, ASM_THREAD_INFO(TI_ipipe, %rsp, SIZEOF_PTREGS)
+	jz	slow_path
+	leaq	(%rsp),%rdi
+	call	ipipe_fastcall_hook
+	testl	%eax, %eax
+	js	no_fastcall
+	testl	$_TIP_HEAD, ASM_THREAD_INFO(TI_ipipe, %rsp, SIZEOF_PTREGS)
+	jz	root_fastexit
+	testl	$_TIP_MAYDAY, ASM_THREAD_INFO(TI_ipipe, %rsp, SIZEOF_PTREGS)
+	jz	pipeline_leave
+	leaq	(%rsp),%rdi
+	call	__ipipe_call_mayday
+	jmp	pipeline_leave
+root_fastexit:
+	call	__ipipe_root_sync
+	jmp	ret_from_sys_call
+no_fastcall:
+	/* fastcall handler not implemented */
+	movq	ORIG_RAX(%rsp), %rax
+slow_path:
+#endif /* !CONFIG_IPIPE_LEGACY */
+	testl	$_TIP_NOTIFY, ASM_THREAD_INFO(TI_ipipe, %rsp, SIZEOF_PTREGS)
+	jnz	pipeline_syscall
+	test_syscall_nr	ax
+	jb	root_syscall
+pipeline_syscall:
+	leaq	(%rsp),%rdi	# regs for handler
+	call	__ipipe_notify_syscall
+	testl	$_TIP_HEAD, ASM_THREAD_INFO(TI_ipipe,  %rsp, SIZEOF_PTREGS)
+	jz	root_domain
+pipeline_leave:
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	TRACE_IRQS_OFF
+	jmp	sysret_fastexit
+root_domain:
+	testl	%eax, %eax
+	jnz	ret_from_sys_call
+	movq	ORIG_RAX(%rsp), %rax
+root_syscall:
+	/* Reload all the syscall registers. */
+	movq	RDI(%rsp), %rdi
+	movq	RSI(%rsp), %rsi
+	movq	RDX(%rsp), %rdx
+	movq	R10(%rsp), %r10
+	movq	R9(%rsp), %r9
+	movq	R8(%rsp), %r8
+#endif /* !CONFIG_IPIPE */
 
 	testl	$_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz	tracesys
 entry_SYSCALL_64_fastpath:
-#if __SYSCALL_MASK == ~0
-	cmpq	$NR_syscalls, %rax
-#else
-	andl	$__SYSCALL_MASK, %eax
-	cmpl	$NR_syscalls, %eax
-#endif
-	jae	1f				/* return -ENOSYS (already in pt_regs->ax) */
-	sbb	%rcx, %rcx			/* array_index_mask_nospec() */
-	and	%rcx, %rax
+	test_syscall_nr ax
+	jae     ret_from_sys_call		/* return -ENOSYS (already in pt_regs->ax) */
+	sbb     %rcx, %rcx			/* array_index_mask_nospec() */
+	and     %rcx, %rax
 	movq	%r10, %rcx
 #ifdef CONFIG_RETPOLINE
 	movq	sys_call_table(, %rax, 8), %rax
@@ -195,7 +259,7 @@ entry_SYSCALL_64_fastpath:
 #endif
 
 	movq	%rax, RAX(%rsp)
-1:
+ret_from_sys_call:
 /*
  * Syscall return path ending with SYSRET (fast path).
  * Has incompletely filled pt_regs.
@@ -218,6 +282,7 @@ entry_SYSCALL_64_fastpath:
 	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz	int_ret_from_sys_call_irqs_off	/* Go to the slow path */
 
+sysret_fastexit:
 	movq	RIP(%rsp), %rcx
 	movq	EFLAGS(%rsp), %r11
 	RESTORE_C_REGS_EXCEPT_RCX_R11
@@ -247,6 +312,12 @@ entry_SYSCALL_64_fastpath:
 	 */
 	USERGS_SYSRET64
 
+GLOBAL(int_fast_exit)
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	GET_THREAD_INFO(%rcx)
+	andl	$~TS_COMPAT, TI_status(%rcx)
+	jmp syscall_return
+
 GLOBAL(int_ret_from_sys_call_irqs_off)
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
@@ -277,12 +348,7 @@ tracesys_phase2:
 	 */
 	RESTORE_C_REGS_EXCEPT_RAX
 	RESTORE_EXTRA_REGS
-#if __SYSCALL_MASK == ~0
-	cmpq	$NR_syscalls, %rax
-#else
-	andl	$__SYSCALL_MASK, %eax
-	cmpl	$NR_syscalls, %eax
-#endif
+	test_syscall_nr	ax
 	jae	1f				/* return -ENOSYS (already in pt_regs->ax) */
 	sbb	%rcx, %rcx			/* array_index_mask_nospec() */
 	and	%rcx, %rax
@@ -306,6 +372,7 @@ GLOBAL(int_ret_from_sys_call)
 	movq	%rsp, %rdi
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
 	RESTORE_EXTRA_REGS
+syscall_return:
 	TRACE_IRQS_IRETQ		/* we're about to change IF */
 
 	/*
@@ -394,6 +461,7 @@ opportunistic_sysret_failed:
 	 */
 	SWITCH_USER_CR3
 	SWAPGS
+retint_noswapgs:
 	jmp	restore_c_regs_and_iret
 END(entry_SYSCALL_64)
 
@@ -486,6 +554,7 @@ ENTRY(ret_from_fork)
 
 	pushq	$0x0002
 	popfq					/* reset kernel eflags */
+	HARD_COND_ENABLE_INTERRUPTS
 
 	call	schedule_tail			/* rdi: 'prev' task parameter */
 
@@ -577,7 +646,9 @@ END(irq_entries_start)
 	 */
 	movq	%rsp, %rdi
 	incl	PER_CPU_VAR(irq_count)
+#ifndef CONFIG_IPIPE
 	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
+#endif
 	pushq	%rdi
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
@@ -593,7 +664,18 @@ END(irq_entries_start)
 common_interrupt:
 	ASM_CLAC
 	addq	$-0x80, (%rsp)			/* Adjust vector to [-256, -1] range */
+#ifdef CONFIG_IPIPE
+	interrupt __ipipe_handle_irq
+	testl	%eax, %eax
+	jnz	ret_from_intr
+	decl	PER_CPU_VAR(irq_count)
+	popq	%rsp
+	testl	$3,CS(%rsp)
+	jz	restore_regs_and_iret
+	jmp	retint_swapgs_notrace
+#else /* !CONFIG_IPIPE */
 	interrupt do_IRQ
+#endif /* !CONFIG_IPIPE */
 	/* 0(%rsp): old RSP */
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -611,10 +693,33 @@ GLOBAL(retint_user)
 	mov	%rsp,%rdi
 	call	prepare_exit_to_usermode
 	TRACE_IRQS_IRETQ
+retint_swapgs_notrace:
 	SWITCH_USER_CR3
 	SWAPGS
 	jmp	restore_regs_and_iret
 
+#ifdef CONFIG_IPIPE
+.Lunwind_root_irq:
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	retq	/* back to __ipipe_do_IRQ() */
+
+ENTRY(do_root_irq)
+	movq	%rsp, %rax
+	pushq	$__KERNEL_DS
+	pushq	%rax
+	pushfq
+	orq	$X86_EFLAGS_IF, (%rsp)
+	pushq	$__KERNEL_CS
+	pushq	$.Lunwind_root_irq
+	pushq	ORIG_RAX(%rdi)
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS
+	SAVE_EXTRA_REGS
+	callq	*RSI(%rsp)
+	DISABLE_INTERRUPTS(CLBR_NONE)
+END(do_root_irq)
+#endif
+	
 /* Returning to kernel space */
 retint_kernel:
 #ifdef CONFIG_PREEMPT
@@ -624,7 +729,15 @@ retint_kernel:
 	jnc	1f
 0:	cmpl	$0, PER_CPU_VAR(__preempt_count)
 	jnz	1f
-	call	preempt_schedule_irq
+#ifdef CONFIG_IPIPE
+	/*
+	 * We may have preempted call_softirq before __do_softirq raised or
+	 * after it lowered the preemption counter.
+	 */
+	cmpl	$0,PER_CPU_VAR(irq_count)
+	jge	1f
+#endif
+	PREEMPT_SCHEDULE_IRQ
 	jmp	0b
 1:
 #endif
@@ -696,6 +809,26 @@ END(common_interrupt)
 /*
  * APIC interrupts.
  */
+#ifdef CONFIG_IPIPE
+.macro apicinterrupt2 num sym
+ENTRY(\sym)
+	ASM_CLAC
+	pushq	$~(\num)
+.Lcommon_\sym:
+	interrupt __ipipe_handle_irq
+	testl	%eax, %eax
+	jnz	ret_from_intr
+	decl	PER_CPU_VAR(irq_count)
+	popq	%rsp
+	testl	$3,CS(%rsp)
+	jz	restore_regs_and_iret
+	jmp	retint_swapgs_notrace
+END(\sym)
+.endm
+.macro apicinterrupt3 num sym do_sym
+apicinterrupt2 \num \sym
+.endm
+#else /* !CONFIG_IPIPE */
 .macro apicinterrupt3 num sym do_sym
 ENTRY(\sym)
 	ASM_CLAC
@@ -705,6 +838,7 @@ ENTRY(\sym)
 	jmp	ret_from_intr
 END(\sym)
 .endm
+#endif /* !CONFIG_IPIPE */
 
 #ifdef CONFIG_TRACING
 #define trace(sym) trace_##sym
@@ -756,6 +890,14 @@ apicinterrupt THERMAL_APIC_VECTOR		thermal_interrupt		smp_thermal_interrupt
 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR	call_function_single_interrupt	smp_call_function_single_interrupt
 apicinterrupt CALL_FUNCTION_VECTOR		call_function_interrupt		smp_call_function_interrupt
 apicinterrupt RESCHEDULE_VECTOR			reschedule_interrupt		smp_reschedule_interrupt
+#ifdef CONFIG_IPIPE
+apicinterrupt2 IPIPE_RESCHEDULE_VECTOR		ipipe_reschedule_interrupt
+apicinterrupt2 IPIPE_CRITICAL_VECTOR		ipipe_critical_interrupt
+#endif
+#endif
+
+#ifdef CONFIG_IPIPE
+apicinterrupt2 IPIPE_HRTIMER_VECTOR		ipipe_hrtimer_interrupt
 #endif
 
 apicinterrupt ERROR_APIC_VECTOR			error_interrupt			smp_error_interrupt
@@ -818,7 +960,23 @@ ENTRY(\sym)
 	subq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
 	.endif
 
+	TRACE_IRQS_OFF
 	call	\do_sym
+#ifdef CONFIG_IPIPE
+	.if \paranoid
+	xorl	%eax,%eax 	/* force paranoid_exit to propagate the exception */
+	.else
+	testl	%eax, %eax
+	jz	99f
+	movl	%ebx,%eax	/* %ebx: no swapgs flag */
+	RESTORE_EXTRA_REGS
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	testl	%eax,%eax
+	jnz	restore_regs_and_iret
+	jmp	retint_swapgs_notrace
+	.endif
+99:
+#endif
 
 	.if \shift_ist != -1
 	addq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
@@ -917,12 +1075,16 @@ bad_gs:
 ENTRY(do_softirq_own_stack)
 	pushq	%rbp
 	mov	%rsp, %rbp
+	HARD_COND_DISABLE_INTERRUPTS
 	incl	PER_CPU_VAR(irq_count)
 	cmove	PER_CPU_VAR(irq_stack_ptr), %rsp
+	HARD_COND_ENABLE_INTERRUPTS
 	push	%rbp				/* frame pointer backlink */
 	call	__do_softirq
+	HARD_COND_DISABLE_INTERRUPTS
 	leaveq
 	decl	PER_CPU_VAR(irq_count)
+	HARD_COND_ENABLE_INTERRUPTS
 	ret
 END(do_softirq_own_stack)
 
@@ -1017,7 +1179,11 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
 	hyperv_callback_vector hyperv_vector_handler
 #endif /* CONFIG_HYPERV */
 
+#ifdef CONFIG_IPIPE
+idtentry debug			do_debug		has_error_code=0	paranoid=1
+#else
 idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
+#endif
 idtentry int3			do_int3			has_error_code=0
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
@@ -1153,7 +1319,9 @@ ENTRY(error_entry)
 	ret
 
 .Lerror_entry_done:
+#ifndef CONFIG_IPIPE
 	TRACE_IRQS_OFF
+#endif
 	ret
 
 	/*
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 2d359991a273..13ec61d9e77b 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -29,6 +29,7 @@
 #include <linux/timer.h>
 #include <linux/syscalls.h>
 #include <linux/ratelimit.h>
+#include <linux/ipipe_tickdev.h>
 
 #include <asm/vsyscall.h>
 #include <asm/unistd.h>
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index 51e330416995..e7de3723564c 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/timekeeper_internal.h>
+#include <linux/ipipe_tickdev.h>
 #include <asm/vgtod.h>
 #include <asm/vvar.h>
 
@@ -67,4 +68,7 @@ void update_vsyscall(struct timekeeper *tk)
 	}
 
 	gtod_write_end(vdata);
+
+	if (tk->tkr_mono.clock == &clocksource_tsc)
+		ipipe_update_hostrt(tk);
 }
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index fd810a57ab1b..e3e42120b34d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -431,7 +431,18 @@ static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {}
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
+struct irq_data;
+void move_xxapic_irq(struct irq_data *data);
+#endif /* CONFIG_SMP  && CONFIG_IPIPE */
+
+#ifdef CONFIG_IPIPE
+#define ack_APIC_irq() do { } while(0)
+static inline void __ack_APIC_irq(void)
+#else /* !CONFIG_IPIPE */
+#define __ack_APIC_irq() ack_APIC_irq()
 static inline void ack_APIC_irq(void)
+#endif /* CONFIG_IPIPE */
 {
 	/*
 	 * ack_APIC_irq() actually gets compiled as a single instruction
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index c46bb99d5fb2..569f594ce9e7 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -155,6 +155,7 @@
 # define MAX_LOCAL_APIC 32768
 #endif
 
+#ifndef __ASSEMBLY__
 /*
  * All x86-64 systems are xAPIC compatible.
  * In the following, "apicid" is a physical APIC ID.
@@ -442,4 +443,6 @@ enum ioapic_irq_destination_types {
 	dest_ExtINT		= 7
 };
 
+#endif /* !__ASSEMBLY__ */
+
 #endif /* _ASM_X86_APICDEF_H */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 12cb66f6d3a5..63b015b34fb0 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -93,7 +93,7 @@ extern void aout_dump_debugregs(struct user *dump);
 
 extern void hw_breakpoint_restore(void);
 
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_IPIPE)
 DECLARE_PER_CPU(int, debug_stack_usage);
 static inline void debug_stack_usage_inc(void)
 {
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 880db91d9457..d04f39a8ced3 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -390,6 +390,12 @@ static inline void alloc_system_vector(int vector)
 	}
 }
 
+#define alloc_intr_gate_notrace(n, addr)			\
+	do {							\
+		alloc_system_vector(n);				\
+		set_intr_gate_notrace(n, addr);			\
+	} while (0)
+
 #define alloc_intr_gate(n, addr)				\
 	do {							\
 		alloc_system_vector(n);				\
@@ -435,7 +441,7 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
 	_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
 }
 
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_IPIPE)
 DECLARE_PER_CPU(u32, debug_idt_ctr);
 static inline bool is_debug_idt_enabled(void)
 {
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 16825dda18dc..489e0a0775a9 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -618,11 +618,12 @@ static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switc
 static inline void user_fpu_begin(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
+	unsigned long flags;
 
-	preempt_disable();
+	flags = hard_preempt_disable();
 	if (!fpregs_active())
 		fpregs_activate(fpu);
-	preempt_enable();
+	hard_preempt_enable(flags);
 }
 
 /*
@@ -652,4 +653,23 @@ static inline void xsetbv(u32 index, u64 value)
 		     : : "a" (eax), "d" (edx), "c" (index));
 }
 
+DECLARE_PER_CPU(bool, in_kernel_fpu);
+
+static inline void kernel_fpu_disable(void)
+{
+	WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
+	this_cpu_write(in_kernel_fpu, true);
+}
+
+static inline void kernel_fpu_enable(void)
+{
+	WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+	this_cpu_write(in_kernel_fpu, false);
+}
+
+static inline bool kernel_fpu_disabled(void)
+{
+	return this_cpu_read(in_kernel_fpu);
+}
+
 #endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index ee52ff858699..1c91304f156f 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -45,6 +45,9 @@ extern asmlinkage void deferred_error_interrupt(void);
 extern asmlinkage void call_function_interrupt(void);
 extern asmlinkage void call_function_single_interrupt(void);
 
+extern void smp_kvm_posted_intr_ipi(struct pt_regs *regs);
+extern void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs);
+
 #ifdef CONFIG_TRACING
 /* Interrupt handlers registered during init_IRQ */
 extern void trace_apic_timer_interrupt(void);
@@ -169,6 +172,7 @@ extern __visible void smp_apic_timer_interrupt(struct pt_regs *);
 extern __visible void smp_spurious_interrupt(struct pt_regs *);
 extern __visible void smp_x86_platform_ipi(struct pt_regs *);
 extern __visible void smp_error_interrupt(struct pt_regs *);
+extern __visible void smp_irq_work_interrupt(struct pt_regs *);
 #ifdef CONFIG_X86_IO_APIC
 extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
 #endif
@@ -177,6 +181,7 @@ extern __visible void smp_reschedule_interrupt(struct pt_regs *);
 extern __visible void smp_call_function_interrupt(struct pt_regs *);
 extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
 #endif
+extern asmlinkage __visible void smp_reboot_interrupt(void);
 
 extern char irq_entries_start[];
 #ifdef CONFIG_TRACING
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 39bcefc20de7..354fb24c697d 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask;
 #define SLAVE_ICW4_DEFAULT	0x01
 #define PIC_ICW4_AEOI		2
 
-extern raw_spinlock_t i8259A_lock;
+IPIPE_DECLARE_RAW_SPINLOCK(i8259A_lock);
 
 /* the PIC may need a careful delay on some platforms, hence specific calls */
 static inline unsigned char inb_pic(unsigned int port)
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 615fa9061b57..e0a62ab833d8 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -64,10 +64,12 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest
 	 * Subtle. In the case of the 'never do double writes' workaround
 	 * we have to lock out interrupts to be safe.  As we don't care
 	 * of the value read we use an atomic rmw access to avoid costly
-	 * cli/sti.  Otherwise we use an even cheaper single atomic write
-	 * to the APIC.
+	 * cli/sti (except if running the interrupt pipeline).  Otherwise
+	 * we use an even cheaper single atomic write to the APIC.
 	 */
-	unsigned int cfg;
+	unsigned int cfg, flags;
+
+	flags = hard_cond_local_irq_save();
 
 	/*
 	 * Wait for idle.
@@ -83,6 +85,8 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest
 	 * Send the IPI. The write to APIC_ICR fires this off.
 	 */
 	native_apic_mem_write(APIC_ICR, cfg);
+
+	hard_cond_local_irq_restore(flags);
 }
 
 /*
@@ -92,7 +96,9 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest
 static inline void
  __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
 {
-	unsigned long cfg;
+	unsigned long cfg, flags;
+
+	flags = hard_cond_local_irq_save();
 
 	/*
 	 * Wait for idle.
@@ -117,6 +123,8 @@ static inline void
 	 * Send the IPI. The write to APIC_ICR fires this off.
 	 */
 	native_apic_mem_write(APIC_ICR, cfg);
+
+	hard_cond_local_irq_restore(flags);
 }
 
 extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
diff --git a/arch/x86/include/asm/ipipe.h b/arch/x86/include/asm/ipipe.h
new file mode 100644
index 000000000000..a4eaff2c2a14
--- /dev/null
+++ b/arch/x86/include/asm/ipipe.h
@@ -0,0 +1,125 @@
+/*   -*- linux-c -*-
+ *   arch/x86/include/asm/ipipe.h
+ *
+ *   Copyright (C) 2007 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __X86_IPIPE_H
+#define __X86_IPIPE_H
+
+#ifdef CONFIG_IPIPE
+
+#define IPIPE_CORE_RELEASE	12
+
+struct ipipe_domain;
+struct pt_regs;
+
+struct ipipe_arch_sysinfo {
+};
+
+#define ipipe_processor_id()	raw_smp_processor_id()
+
+#define ipipe_mm_switch_protect(flags)		\
+	do { (flags) = hard_cond_local_irq_save(); } while (0)
+#define ipipe_mm_switch_unprotect(flags)	\
+	hard_cond_local_irq_restore(flags)
+
+/* Private interface -- Internal use only */
+
+#define __ipipe_early_core_setup()	do { } while(0)
+
+#define __ipipe_enable_irq(irq)		irq_to_desc(irq)->chip->enable(irq)
+#define __ipipe_disable_irq(irq)	irq_to_desc(irq)->chip->disable(irq)
+#define __ipipe_enable_irqdesc(ipd, irq)	do { } while(0)
+#define __ipipe_disable_irqdesc(ipd, irq)	do { } while(0)
+
+#ifdef CONFIG_SMP
+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd);
+#else
+#define __ipipe_hook_critical_ipi(ipd) do { } while(0)
+#endif
+
+void __ipipe_enable_pipeline(void);
+
+int __ipipe_trap_prologue(struct pt_regs *regs, int trapnr,
+			  unsigned long *flags);
+
+static inline void __ipipe_fixup_if(int s, struct pt_regs *regs)
+{
+	/*
+	 * Have the saved hw state look like the domain stall bit, so
+	 * that __ipipe_unstall_iret_root() restores the proper
+	 * pipeline state for the root stage upon exit.
+	 */
+	if (s)
+		regs->flags &= ~X86_EFLAGS_IF;
+	else
+		regs->flags |= X86_EFLAGS_IF;
+}
+
+#define IPIPE_DO_TRAP(__handler, __trapnr, __regs, __args...)			\
+	({									\
+		unsigned long __flags, __regs_flags = __regs->flags;		\
+		int __ret = __ipipe_trap_prologue(__regs, __trapnr, &__flags);	\
+		if (__ret <= 0) {						\
+			__handler(__regs, ##__args);				\
+			ipipe_restore_root(raw_irqs_disabled_flags(__flags));	\
+			__ipipe_fixup_if(raw_irqs_disabled_flags(__regs_flags),	\
+					 regs);					\
+		}								\
+		__ret > 0;							\
+	})
+
+#define __ipipe_root_tick_p(regs)	((regs)->flags & X86_EFLAGS_IF)
+
+static inline void ipipe_mute_pic(void) { }
+
+static inline void ipipe_unmute_pic(void) { }
+
+static inline void ipipe_notify_root_preemption(void)
+{
+	__ipipe_notify_vm_preemption();
+}
+
+#else /* !CONFIG_IPIPE */
+
+#define ipipe_mm_switch_protect(flags)		do { (void)(flags); } while(0)
+#define ipipe_mm_switch_unprotect(flags)	do { (void)(flags); } while(0)
+
+#define IPIPE_DO_TRAP(__handler, __trapnr, __regs, __args...)	\
+	({							\
+		__handler(__regs, ##__args);			\
+		0;						\
+	})
+
+#endif /* CONFIG_IPIPE */
+
+#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE)
+#define __ipipe_move_root_irq(__desc)					\
+	do {								\
+		if (!IS_ERR_OR_NULL(__desc)) {				\
+			struct irq_chip *__chip = irq_desc_get_chip(__desc); \
+			if (__chip->irq_move)				\
+				__chip->irq_move(irq_desc_get_irq_data(__desc)); \
+		}							\
+	} while (0)
+#else /* !(CONFIG_SMP && CONFIG_IPIPE) */
+#define __ipipe_move_root_irq(irq)	do { } while (0)
+#endif /* !(CONFIG_SMP && CONFIG_IPIPE) */
+
+#endif	/* !__X86_IPIPE_H */
diff --git a/arch/x86/include/asm/ipipe_32.h b/arch/x86/include/asm/ipipe_32.h
new file mode 100644
index 000000000000..4412011e1f6d
--- /dev/null
+++ b/arch/x86/include/asm/ipipe_32.h
@@ -0,0 +1,65 @@
+/*   -*- linux-c -*-
+ *   arch/x86/include/asm/ipipe_32.h
+ *
+ *   Copyright (C) 2002-2012 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __X86_IPIPE_32_H
+#define __X86_IPIPE_32_H
+
+#define ipipe_read_tsc(t)						\
+	__asm__ __volatile__(ALTERNATIVE("call __ipipe_get_cs_tsc",	\
+					 "rdtsc",			\
+					 X86_FEATURE_TSC) : "=A"(t))
+
+#define ipipe_tsc2ns(t)					\
+({							\
+	unsigned long long delta = (t) * 1000000ULL;	\
+	unsigned long long freq = __ipipe_hrclock_freq;	\
+	do_div(freq, 1000);				\
+	do_div(delta, (unsigned)freq + 1);		\
+	(unsigned long)delta;				\
+})
+
+#define ipipe_tsc2us(t)					\
+({							\
+	unsigned long long delta = (t) * 1000ULL;	\
+	unsigned long long freq = __ipipe_hrclock_freq;	\
+	do_div(freq, 1000);				\
+	do_div(delta, (unsigned)freq + 1);		\
+	(unsigned long)delta;				\
+})
+
+/* Private interface -- Internal use only */
+
+extern unsigned int cpu_khz;
+#define __ipipe_cpu_freq	({ unsigned long long __freq = 1000ULL * cpu_khz; __freq; })
+
+#define ipipe_clock_name() \
+	(cpu_has_tsc ? "tsc" : __ipipe_cs->name)
+
+#define __ipipe_hrclock_freq \
+	(cpu_has_tsc ? __ipipe_cpu_freq : __ipipe_cs_freq)
+
+static inline unsigned long __ipipe_ffnz(unsigned long ul)
+{
+	__asm__("bsrl %1, %0":"=r"(ul) : "r"(ul));
+	return ul;
+}
+
+#endif	/* !__X86_IPIPE_32_H */
diff --git a/arch/x86/include/asm/ipipe_64.h b/arch/x86/include/asm/ipipe_64.h
new file mode 100644
index 000000000000..be183518ca38
--- /dev/null
+++ b/arch/x86/include/asm/ipipe_64.h
@@ -0,0 +1,60 @@
+/*   -*- linux-c -*-
+ *   arch/x86/include/asm/ipipe_64.h
+ *
+ *   Copyright (C) 2007-2012 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __X86_IPIPE_64_H
+#define __X86_IPIPE_64_H
+
+#define ipipe_read_tsc(t)  do {		\
+	unsigned int __a,__d;			\
+	asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
+	(t) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
+} while(0)
+
+extern unsigned int cpu_khz;
+#define __ipipe_cpu_freq	({ unsigned long long __freq = (1000ULL * cpu_khz); __freq; })
+#define __ipipe_hrclock_freq	__ipipe_cpu_freq
+
+#define ipipe_tsc2ns(t)	(((t) * 1000UL) / (__ipipe_hrclock_freq / 1000000UL))
+#define ipipe_tsc2us(t)	((t) / (__ipipe_hrclock_freq / 1000000UL))
+
+static inline const char *ipipe_clock_name(void)
+{
+	return "tsc";
+}
+
+/* Private interface -- Internal use only */
+
+static inline unsigned long __ipipe_ffnz(unsigned long ul)
+{
+      __asm__("bsrq %1, %0":"=r"(ul)
+	      :	"rm"(ul));
+      return ul;
+}
+
+#ifdef CONFIG_PREEMPT
+#define __ipipe_check_root_resched()			\
+	(preempt_count() == 0 && need_resched() &&	\
+	 per_cpu(irq_count, ipipe_processor_id()) < 0)
+#else
+#define __ipipe_check_root_resched()	0
+#endif
+
+#endif	/* !__X86_IPIPE_64_H */
diff --git a/arch/x86/include/asm/ipipe_base.h b/arch/x86/include/asm/ipipe_base.h
new file mode 100644
index 000000000000..ebb24d5f3718
--- /dev/null
+++ b/arch/x86/include/asm/ipipe_base.h
@@ -0,0 +1,206 @@
+/*   -*- linux-c -*-
+ *   arch/x86/include/asm/ipipe_base.h
+ *
+ *   Copyright (C) 2007-2012 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __X86_IPIPE_BASE_H
+#define __X86_IPIPE_BASE_H
+
+#include <asm/irq_vectors.h>
+#include <asm/bitsperlong.h>
+
+#ifdef CONFIG_X86_32
+/* 32 from IDT + iret_error + mayday trap */
+#define IPIPE_TRAP_MAYDAY	33	/* Internal recovery trap */
+#define IPIPE_NR_FAULTS		34
+#else
+/* 32 from IDT + mayday trap */
+#define IPIPE_TRAP_MAYDAY	32	/* Internal recovery trap */
+#define IPIPE_NR_FAULTS		33
+#endif
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+/*
+ * Special APIC interrupts are mapped above the last defined external
+ * IRQ number.
+ */
+#define nr_apic_vectors	        (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+#define IPIPE_FIRST_APIC_IRQ	NR_IRQS
+#define IPIPE_HRTIMER_IPI	ipipe_apic_vector_irq(IPIPE_HRTIMER_VECTOR)
+#ifdef CONFIG_SMP
+#define IPIPE_RESCHEDULE_IPI	ipipe_apic_vector_irq(IPIPE_RESCHEDULE_VECTOR)
+#define IPIPE_CRITICAL_IPI	ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR)
+#endif /* CONFIG_SMP */
+#define IPIPE_NR_XIRQS		(NR_IRQS + nr_apic_vectors)
+#define ipipe_apic_irq_vector(irq)  ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR)
+#define ipipe_apic_vector_irq(vec)  ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ)
+#else /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */
+#define IPIPE_NR_XIRQS		NR_IRQS
+#endif /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */
+
+#ifndef __ASSEMBLY__
+
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_X86_32
+# include "ipipe_32.h"
+#else
+# include "ipipe_64.h"
+#endif
+
+struct pt_regs;
+struct irq_desc;
+struct ipipe_vm_notifier;
+
+static inline unsigned __ipipe_get_irq_vector(int irq)
+{
+#ifdef CONFIG_X86_IO_APIC
+	unsigned int __ipipe_get_ioapic_irq_vector(int irq);
+	return __ipipe_get_ioapic_irq_vector(irq);
+#elif defined(CONFIG_X86_LOCAL_APIC)
+	return irq >= IPIPE_FIRST_APIC_IRQ ?
+		ipipe_apic_irq_vector(irq) : ISA_IRQ_VECTOR(irq);
+#else
+	return ISA_IRQ_VECTOR(irq);
+#endif
+}
+
+void __ipipe_halt_root(int use_mwait);
+
+void ipipe_hrtimer_interrupt(void);
+
+void ipipe_reschedule_interrupt(void);
+
+void ipipe_critical_interrupt(void);
+
+int __ipipe_handle_irq(struct pt_regs *regs);
+
+void __ipipe_handle_vm_preemption(struct ipipe_vm_notifier *nfy);
+
+extern int __ipipe_hrtimer_irq;
+
+#ifdef CONFIG_SMP
+
+#ifdef CONFIG_X86_32
+#define GET_ROOT_STATUS_ADDR			\
+	"pushfl; cli;"				\
+	"movl %%fs:this_cpu_off, %%eax;"	\
+	"lea ipipe_percpu(%%eax), %%eax;"
+#define PUT_ROOT_STATUS_ADDR	"popfl;"
+#define TEST_AND_SET_ROOT_STATUS		\
+	"btsl $0,(%%eax);"
+#define TEST_ROOT_STATUS			\
+	"btl $0,(%%eax);"
+#define ROOT_TEST_CLOBBER_LIST  "eax"
+#else /* CONFIG_X86_64 */
+#define GET_ROOT_STATUS_ADDR			\
+	"pushfq; cli;"				\
+	"movq %%gs:this_cpu_off, %%rax;"	\
+	"lea ipipe_percpu(%%rax), %%rax;"
+#define PUT_ROOT_STATUS_ADDR	"popfq;"
+#define TEST_AND_SET_ROOT_STATUS		\
+	"btsl $0,(%%rax);"
+#define TEST_ROOT_STATUS			\
+	"btl $0,(%%rax);"
+#define ROOT_TEST_CLOBBER_LIST  "rax"
+#endif /* CONFIG_X86_64 */
+
+static inline void ipipe_stall_root(void)
+{
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     TEST_AND_SET_ROOT_STATUS
+			     PUT_ROOT_STATUS_ADDR
+			     : : : ROOT_TEST_CLOBBER_LIST, "memory");
+}
+
+static inline unsigned long ipipe_test_and_stall_root(void)
+{
+	int oldbit;
+
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     TEST_AND_SET_ROOT_STATUS
+			     "sbbl %0,%0;"
+			     PUT_ROOT_STATUS_ADDR
+			     :"=r" (oldbit)
+			     : : ROOT_TEST_CLOBBER_LIST, "memory");
+	return oldbit;
+}
+
+static inline unsigned long ipipe_test_root(void)
+{
+	int oldbit;
+
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     TEST_ROOT_STATUS
+			     "sbbl %0,%0;"
+			     PUT_ROOT_STATUS_ADDR
+			     :"=r" (oldbit)
+			     : : ROOT_TEST_CLOBBER_LIST);
+	return oldbit;
+}
+
+#else /* !CONFIG_SMP */
+
+extern unsigned long __ipipe_root_status;
+
+static inline void ipipe_stall_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	__asm__ __volatile__("btsl $0,%0;"
+			     :"+m" (*p) : : "memory");
+}
+
+static inline unsigned long ipipe_test_and_stall_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	int oldbit;
+
+	__asm__ __volatile__("btsl $0,%1;"
+			     "sbbl %0,%0;"
+			     :"=r" (oldbit), "+m" (*p)
+			     : : "memory");
+	return oldbit;
+}
+
+static inline unsigned long ipipe_test_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	int oldbit;
+
+	__asm__ __volatile__("btl $0,%1;"
+			     "sbbl %0,%0;"
+			     :"=r" (oldbit)
+			     :"m" (*p));
+	return oldbit;
+}
+
+#endif /* !CONFIG_SMP */
+
+#ifdef CONFIG_IPIPE_LEGACY
+#define __ipipe_tick_irq	__ipipe_hrtimer_irq
+/*
+ * The current Linux task is read from the PDA on x86, so this is
+ * always safe, regardless of the active stack.
+ */
+#define ipipe_safe_current()	current
+#endif
+
+#endif	/* !__ASSEMBLY__ */
+
+#endif	/* !__X86_IPIPE_BASE_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6ca9fd6234e1..434c4e2f245e 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -100,6 +100,11 @@
 #define POSTED_INTR_VECTOR		0xf2
 #endif
 
+/* Interrupt pipeline IPIs */
+#define IPIPE_HRTIMER_VECTOR		0xf0
+#define IPIPE_RESCHEDULE_VECTOR		0xee
+#define IPIPE_CRITICAL_VECTOR		0xed
+
 /*
  * Local APIC timer IRQ vector is on a different priority level,
  * to work around the 'lost local interrupt if more than 2 IRQ
@@ -107,13 +112,13 @@
  */
 #define LOCAL_TIMER_VECTOR		0xef
 
-#define NR_VECTORS			 256
+/*
+ * I-pipe: Lowest vector number which may be assigned to a special
+ * APIC IRQ. We must know this at build time.
+ */
+#define FIRST_SYSTEM_VECTOR		IPIPE_CRITICAL_VECTOR
 
-#ifdef CONFIG_X86_LOCAL_APIC
-#define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
-#else
-#define FIRST_SYSTEM_VECTOR		NR_VECTORS
-#endif
+#define NR_VECTORS			 256
 
 #define FPU_IRQ				  13
 
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 8afbdcd3032b..f3ad617bfff3 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -4,6 +4,11 @@
 #include <asm/processor-flags.h>
 
 #ifndef __ASSEMBLY__
+
+#include <linux/ipipe_base.h>
+#include <linux/ipipe_trace.h>
+#include <linux/compiler.h>
+
 /*
  * Interrupt control:
  */
@@ -57,32 +62,64 @@ static inline void native_halt(void)
 	asm volatile("hlt": : :"memory");
 }
 
+static inline int native_irqs_disabled(void)
+{
+	unsigned long flags = native_save_fl();
+
+	return !(flags & X86_EFLAGS_IF);
+}
+
 #endif
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
+#define HARD_COND_ENABLE_INTERRUPTS
+#define HARD_COND_DISABLE_INTERRUPTS
 #else
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 
 static inline notrace unsigned long arch_local_save_flags(void)
 {
+#ifdef CONFIG_IPIPE
+	unsigned long flags;
+
+	flags = (!ipipe_test_root()) << 9;
+	barrier();
+	return flags;
+#else
 	return native_save_fl();
+#endif
 }
 
 static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
+#ifdef CONFIG_IPIPE
+	barrier();
+	ipipe_restore_root(!(flags & X86_EFLAGS_IF));
+#else
 	native_restore_fl(flags);
+#endif
 }
 
 static inline notrace void arch_local_irq_disable(void)
 {
+#ifdef CONFIG_IPIPE
+	ipipe_stall_root();
+	barrier();
+#else
 	native_irq_disable();
+#endif
 }
 
 static inline notrace void arch_local_irq_enable(void)
 {
+#ifdef CONFIG_IPIPE
+	barrier();
+	ipipe_unstall_root();
+#else
 	native_irq_enable();
+#endif
 }
 
 /*
@@ -91,7 +128,12 @@ static inline notrace void arch_local_irq_enable(void)
  */
 static inline void arch_safe_halt(void)
 {
+#ifdef CONFIG_IPIPE
+	barrier();
+	__ipipe_halt_root(0);
+#else
 	native_safe_halt();
+#endif
 }
 
 /*
@@ -103,6 +145,20 @@ static inline void halt(void)
 	native_halt();
 }
 
+/* Merge virtual+real interrupt mask bits into a single word. */
+static inline unsigned long arch_mangle_irq_bits(int virt, unsigned long real)
+{
+	return (real & ~(1L << 31)) | ((unsigned long)(virt != 0) << 31);
+}
+
+/* Converse operation of arch_mangle_irq_bits() */
+static inline int arch_demangle_irq_bits(unsigned long *x)
+{
+	int virt = (*x & (1L << 31)) != 0;
+	*x &= ~(1L << 31);
+	return virt;
+}
+
 /*
  * For spinlocks, etc:
  */
@@ -117,6 +173,14 @@ static inline notrace unsigned long arch_local_irq_save(void)
 #define ENABLE_INTERRUPTS(x)	sti
 #define DISABLE_INTERRUPTS(x)	cli
 
+#ifdef CONFIG_IPIPE
+#define HARD_COND_ENABLE_INTERRUPTS	sti
+#define HARD_COND_DISABLE_INTERRUPTS	cli
+#else /* !CONFIG_IPIPE */
+#define HARD_COND_ENABLE_INTERRUPTS
+#define HARD_COND_DISABLE_INTERRUPTS
+#endif /* !CONFIG_IPIPE */
+
 #ifdef CONFIG_X86_64
 #define SWAPGS	swapgs
 /*
@@ -162,6 +226,122 @@ static inline int arch_irqs_disabled(void)
 
 	return arch_irqs_disabled_flags(flags);
 }
+
+static inline unsigned long hard_local_irq_save_notrace(void)
+{
+	unsigned long flags;
+
+	flags = native_save_fl();
+	native_irq_disable();
+
+	return flags;
+}
+
+static inline void hard_local_irq_restore_notrace(unsigned long flags)
+{
+	native_restore_fl(flags);
+}
+
+static inline void hard_local_irq_disable_notrace(void)
+{
+	native_irq_disable();
+}
+
+static inline void hard_local_irq_enable_notrace(void)
+{
+	native_irq_enable();
+}
+
+static inline int hard_irqs_disabled(void)
+{
+	return native_irqs_disabled();
+}
+
+#define hard_irqs_disabled_flags(flags)	arch_irqs_disabled_flags(flags)
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+
+static inline void hard_local_irq_disable(void)
+{
+	if (!native_irqs_disabled()) {
+		native_irq_disable();
+		ipipe_trace_begin(0x80000000);
+	}
+}
+
+static inline void hard_local_irq_enable(void)
+{
+	if (native_irqs_disabled()) {
+		ipipe_trace_end(0x80000000);
+		native_irq_enable();
+	}
+}
+
+static inline unsigned long hard_local_irq_save(void)
+{
+	unsigned long flags;
+
+	flags = native_save_fl();
+	if (flags & X86_EFLAGS_IF) {
+		native_irq_disable();
+		ipipe_trace_begin(0x80000001);
+	}
+
+	return flags;
+}
+
+static inline void hard_local_irq_restore(unsigned long flags)
+{
+	if (flags & X86_EFLAGS_IF)
+		ipipe_trace_end(0x80000001);
+
+	native_restore_fl(flags);
+}
+
+#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */
+
+static inline unsigned long hard_local_irq_save(void)
+{
+	return hard_local_irq_save_notrace();
+}
+
+static inline void hard_local_irq_restore(unsigned long flags)
+{
+	hard_local_irq_restore_notrace(flags);
+}
+
+static inline void hard_local_irq_enable(void)
+{
+	hard_local_irq_enable_notrace();
+}
+
+static inline void hard_local_irq_disable(void)
+{
+	hard_local_irq_disable_notrace();
+}
+
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+
+static inline unsigned long hard_local_save_flags(void)
+{
+	return native_save_fl();
+}
+
+#ifndef CONFIG_IPIPE
+#define hard_cond_local_irq_enable()		do { } while(0)
+#define hard_cond_local_irq_disable()		do { } while(0)
+#define hard_cond_local_irq_save()		0
+#define hard_cond_local_irq_restore(flags)	do { (void)(flags); } while(0)
+#endif
+
+#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE)
+#define hard_smp_local_irq_save()		hard_local_irq_save()
+#define hard_smp_local_irq_restore(flags)	hard_local_irq_restore(flags)
+#else /* !CONFIG_SMP */
+#define hard_smp_local_irq_save()		0
+#define hard_smp_local_irq_restore(flags)	do { (void)(flags); } while(0)
+#endif /* CONFIG_SMP */
+
 #endif /* !__ASSEMBLY__ */
 
 #ifdef __ASSEMBLY__
@@ -174,28 +354,33 @@ static inline int arch_irqs_disabled(void)
 #endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #  ifdef CONFIG_X86_64
-#    define LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
+#    define LOCKDEP_SYS_EXIT	call lockdep_sys_exit_thunk
 #    define LOCKDEP_SYS_EXIT_IRQ \
 	TRACE_IRQS_ON; \
 	sti; \
 	call lockdep_sys_exit_thunk; \
 	cli; \
 	TRACE_IRQS_OFF;
+
 #  else
-#    define LOCKDEP_SYS_EXIT \
+#    define LOCKDEP_SYS_EXIT			\
 	pushl %eax;				\
 	pushl %ecx;				\
 	pushl %edx;				\
+	pushfl;					\
+	sti;					\
 	call lockdep_sys_exit;			\
+	popfl;					\
 	popl %edx;				\
 	popl %ecx;				\
 	popl %eax;
+
 #    define LOCKDEP_SYS_EXIT_IRQ
 #  endif
 #else
 #  define LOCKDEP_SYS_EXIT
 #  define LOCKDEP_SYS_EXIT_IRQ
 #endif
-#endif /* __ASSEMBLY__ */
 
+#endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index d8d19fe99e45..9b577230c2b7 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -96,7 +96,8 @@ static inline void load_mm_ldt(struct mm_struct *mm)
 	clear_LDT();
 #endif
 
-	DEBUG_LOCKS_WARN_ON(preemptible());
+	DEBUG_LOCKS_WARN_ON(preemptible() &&
+			(!IS_ENABLED(CONFIG_IPIPE) || !hard_irqs_disabled()));
 }
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
@@ -119,14 +120,29 @@ static inline void destroy_context(struct mm_struct *mm)
 extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		      struct task_struct *tsk);
 
-extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-			       struct task_struct *tsk);
+extern void __switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+				 struct task_struct *tsk);
 #define switch_mm_irqs_off switch_mm_irqs_off
 
-#define activate_mm(prev, next)			\
-do {						\
-	paravirt_activate_mm((prev), (next));	\
-	switch_mm((prev), (next), NULL);	\
+static inline void switch_mm_irqs_off(struct mm_struct *prev,
+				      struct mm_struct *next,
+				      struct task_struct *tsk)
+{
+	unsigned long flags;
+	flags = hard_cond_local_irq_save();
+	__switch_mm_irqs_off(prev, next, tsk);
+	hard_cond_local_irq_restore(flags);
+}
+
+#define __switch_mm	__switch_mm_irqs_off
+
+#define ipipe_switch_mm_head(prev, next, tsk) \
+	__switch_mm_irqs_off(prev, next, tsk)
+
+#define activate_mm(prev, next)				\
+do {							\
+	paravirt_activate_mm((prev), (next));		\
+	__switch_mm_irqs_off((prev), (next), NULL);	\
 } while (0);
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 01bcde84d3e4..add5e0534259 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -66,11 +66,13 @@ static __always_inline bool test_preempt_need_resched(void)
 
 static __always_inline void __preempt_count_add(int val)
 {
+	ipipe_preempt_root_only();
 	raw_cpu_add_4(__preempt_count, val);
 }
 
 static __always_inline void __preempt_count_sub(int val)
 {
+	ipipe_preempt_root_only();
 	raw_cpu_add_4(__preempt_count, -val);
 }
 
@@ -81,6 +83,7 @@ static __always_inline void __preempt_count_sub(int val)
  */
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
+	ipipe_preempt_root_only();
 	GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
 }
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 440a948c4feb..35b32f2844ce 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -387,6 +387,7 @@ struct thread_struct {
 	unsigned short		ds;
 	unsigned short		fsindex;
 	unsigned short		gsindex;
+	unsigned long		rip;
 #endif
 #ifdef CONFIG_X86_32
 	unsigned long		ip;
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 2270e41b32fd..ad31f4e12376 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -5,6 +5,9 @@
 #ifdef __KERNEL__
 
 #include <asm/nops.h>
+#include <asm/percpu.h>
+
+DECLARE_PER_CPU(unsigned long, __ipipe_cr2);
 
 static inline void native_clts(void)
 {
@@ -119,15 +122,23 @@ static inline void write_cr0(unsigned long x)
 	native_write_cr0(x);
 }
 
+#ifdef CONFIG_IPIPE
+#define read_cr2()     __this_cpu_read(__ipipe_cr2)
+#else
 static inline unsigned long read_cr2(void)
 {
 	return native_read_cr2();
 }
+#endif
 
+#ifdef CONFIG_IPIPE
+#define write_cr2(x)   __this_cpu_write(__ipipe_cr2, x)
+#else
 static inline void write_cr2(unsigned long x)
 {
 	native_write_cr2(x);
 }
+#endif
 
 static inline unsigned long read_cr3(void)
 {
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 025ecfaba9c9..e96967a76004 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -146,7 +146,11 @@ do {									\
 	asm volatile(SAVE_CONTEXT					  \
 	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
 	     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
-	     "call __switch_to\n\t"					  \
+	     "movq $thread_return,%P[threadrip](%[prev])\n\t" /* save RIP */	  \
+	     "pushq %P[threadrip](%[next])\n\t" /* restore RIP */	  \
+	     "jmp __switch_to\n\t"					  \
+	     ".globl thread_return\n\t"					  \
+	     "thread_return:\n\t"					  \
 	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
 	     __switch_canary						  \
 	     __retpoline_fill_return_buffer				  \
@@ -159,6 +163,7 @@ do {									\
 	       __switch_canary_oparam					  \
 	     : [next] "S" (next), [prev] "D" (prev),			  \
 	       [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
+	       [threadrip] "i" (offsetof(struct task_struct, thread.rip)), \
 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
 	       [_tif_fork] "i" (_TIF_FORK),			  	  \
 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a96e88b243ef..0e7d6ca777ec 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -51,6 +51,7 @@
 struct task_struct;
 #include <asm/cpufeature.h>
 #include <linux/atomic.h>
+#include <ipipe/thread_info.h>
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
@@ -58,6 +59,10 @@ struct thread_info {
 	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;		/* current CPU */
 	mm_segment_t		addr_limit;
+#ifdef CONFIG_IPIPE
+	unsigned long		ipipe_flags;
+#endif
+	struct ipipe_threadinfo ipipe_data;
 	unsigned int		sig_on_uaccess_error:1;
 	unsigned int		uaccess_err:1;	/* uaccess failed */
 };
@@ -154,6 +159,15 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
+/* ti->ipipe_flags */
+#define TIP_MAYDAY	0	/* MAYDAY call is pending */
+#define TIP_NOTIFY	1	/* Notify head domain about kernel events */
+#define TIP_HEAD	2	/* Runs in head domain */
+
+#define _TIP_MAYDAY	(1 << TIP_MAYDAY)
+#define _TIP_NOTIFY	(1 << TIP_NOTIFY)
+#define _TIP_HEAD	(1 << TIP_HEAD)
+
 #define STACK_WARN		(THREAD_SIZE/8)
 
 /*
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 156959ca49ce..044151b4df66 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -56,41 +56,41 @@ asmlinkage void trace_page_fault(void);
 #define trace_async_page_fault async_page_fault
 #endif
 
-dotraplinkage void do_divide_error(struct pt_regs *, long);
-dotraplinkage void do_debug(struct pt_regs *, long);
+dotraplinkage int do_divide_error(struct pt_regs *, long);
+dotraplinkage int do_debug(struct pt_regs *, long);
 dotraplinkage void do_nmi(struct pt_regs *, long);
-dotraplinkage void do_int3(struct pt_regs *, long);
-dotraplinkage void do_overflow(struct pt_regs *, long);
+dotraplinkage int do_int3(struct pt_regs *, long);
+dotraplinkage int do_overflow(struct pt_regs *, long);
 dotraplinkage void do_bounds(struct pt_regs *, long);
-dotraplinkage void do_invalid_op(struct pt_regs *, long);
-dotraplinkage void do_device_not_available(struct pt_regs *, long);
-dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long);
-dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
-dotraplinkage void do_segment_not_present(struct pt_regs *, long);
-dotraplinkage void do_stack_segment(struct pt_regs *, long);
+dotraplinkage int do_invalid_op(struct pt_regs *, long);
+dotraplinkage int do_device_not_available(struct pt_regs *, long);
+dotraplinkage int do_coprocessor_segment_overrun(struct pt_regs *, long);
+dotraplinkage int do_invalid_TSS(struct pt_regs *, long);
+dotraplinkage int do_segment_not_present(struct pt_regs *, long);
+dotraplinkage int do_stack_segment(struct pt_regs *, long);
 #ifdef CONFIG_X86_64
-dotraplinkage void do_double_fault(struct pt_regs *, long);
+dotraplinkage int do_double_fault(struct pt_regs *, long);
 asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
 #endif
-dotraplinkage void do_general_protection(struct pt_regs *, long);
-dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
+dotraplinkage int do_general_protection(struct pt_regs *, long);
+dotraplinkage int do_page_fault(struct pt_regs *, unsigned long);
 #ifdef CONFIG_TRACING
-dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
+dotraplinkage int trace_do_page_fault(struct pt_regs *, unsigned long);
 #else
-static inline void trace_do_page_fault(struct pt_regs *regs, unsigned long error)
+static inline int trace_do_page_fault(struct pt_regs *regs, unsigned long error)
 {
-	do_page_fault(regs, error);
+	return do_page_fault(regs, error);
 }
 #endif
-dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
-dotraplinkage void do_coprocessor_error(struct pt_regs *, long);
-dotraplinkage void do_alignment_check(struct pt_regs *, long);
+dotraplinkage int do_spurious_interrupt_bug(struct pt_regs *, long);
+dotraplinkage int do_coprocessor_error(struct pt_regs *, long);
+dotraplinkage int do_alignment_check(struct pt_regs *, long);
 #ifdef CONFIG_X86_MCE
 dotraplinkage void do_machine_check(struct pt_regs *, long);
 #endif
-dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
+dotraplinkage int do_simd_coprocessor_error(struct pt_regs *, long);
 #ifdef CONFIG_X86_32
-dotraplinkage void do_iret_error(struct pt_regs *, long);
+dotraplinkage int do_iret_error(struct pt_regs *, long);
 #endif
 dotraplinkage void do_mce(struct pt_regs *, long);
 
@@ -107,9 +107,9 @@ static inline int get_si_code(unsigned long condition)
 extern int panic_on_unrecovered_nmi;
 
 void math_emulate(struct math_emu_info *);
-#ifndef CONFIG_X86_32
 asmlinkage void smp_thermal_interrupt(void);
 asmlinkage void smp_threshold_interrupt(void);
+#ifndef CONFIG_X86_32
 asmlinkage void smp_deferred_error_interrupt(void);
 #endif
 
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 6d7c5479bcea..70040ca64f35 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -14,6 +14,7 @@
  */
 typedef unsigned long long cycles_t;
 
+extern struct clocksource clocksource_tsc;
 extern unsigned int cpu_khz;
 extern unsigned int tsc_khz;
 
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index d788b0cdc0ad..1d0371b4184f 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -67,7 +67,7 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
 	__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
 })
 
-#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) && !defined(CONFIG_IPIPE)
 # define WARN_ON_IN_IRQ()	WARN_ON_ONCE(!in_task())
 #else
 # define WARN_ON_IN_IRQ()
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7947cee61f61..6bd278af3008 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -14,6 +14,9 @@ CFLAGS_REMOVE_pvclock.o = -pg
 CFLAGS_REMOVE_kvmclock.o = -pg
 CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_early_printk.o = -pg
+CFLAGS_REMOVE_microcode_core_early.o = -pg
+CFLAGS_REMOVE_microcode_intel_early.o = -pg
+CFLAGS_REMOVE_microcode_amd_early.o = -pg
 endif
 
 KASAN_SANITIZE_head$(BITS).o := n
@@ -58,6 +61,7 @@ obj-y				+= reboot.o
 obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
+obj-$(CONFIG_IPIPE)		+= ipipe.o
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
 obj-$(CONFIG_SMP)		+= smp.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index deddc9b93299..f4b4ee29c47d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -34,6 +34,7 @@
 #include <linux/dmi.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/ipipe_tickdev.h>
 
 #include <asm/trace/irq_vectors.h>
 #include <asm/irq_remapping.h>
@@ -258,10 +259,10 @@ void native_apic_icr_write(u32 low, u32 id)
 {
 	unsigned long flags;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
 	apic_write(APIC_ICR, low);
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 u64 native_apic_icr_read(void)
@@ -471,16 +472,20 @@ static int lapic_next_deadline(unsigned long delta,
 
 static int lapic_timer_shutdown(struct clock_event_device *evt)
 {
+	unsigned long flags;
 	unsigned int v;
 
 	/* Lapic used as dummy for broadcast ? */
 	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
 		return 0;
 
+	flags = hard_local_irq_save();
 	v = apic_read(APIC_LVTT);
 	v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
 	apic_write(APIC_LVTT, v);
 	apic_write(APIC_TMICT, 0);
+	hard_local_irq_restore(flags);
+
 	return 0;
 }
 
@@ -515,6 +520,17 @@ static void lapic_timer_broadcast(const struct cpumask *mask)
 #endif
 }
 
+#ifdef CONFIG_IPIPE
+static void lapic_itimer_ack(void)
+{
+	__ack_APIC_irq();
+}
+
+static DEFINE_PER_CPU(struct ipipe_timer, lapic_itimer) = {
+	.irq = ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR),
+	.ack = lapic_itimer_ack,
+};
+#endif /* CONFIG_IPIPE */
 
 /*
  * The local apic timer can be used for any function which is CPU local.
@@ -551,6 +567,16 @@ static void setup_APIC_timer(void)
 
 	memcpy(levt, &lapic_clockevent, sizeof(*levt));
 	levt->cpumask = cpumask_of(smp_processor_id());
+#ifdef CONFIG_IPIPE
+	if (!(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
+		levt->ipipe_timer = this_cpu_ptr(&lapic_itimer);
+	else {
+		static atomic_t once = ATOMIC_INIT(-1);
+		if (atomic_inc_and_test(&once))
+			printk(KERN_INFO
+			       "I-pipe: cannot use LAPIC as a tick device\n");
+	}
+#endif /* CONFIG_IPIPE */
 
 	if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
 		levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
@@ -1082,7 +1108,7 @@ void lapic_shutdown(void)
 	if (!cpu_has_apic && !apic_from_smp_config())
 		return;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 
 #ifdef CONFIG_X86_32
 	if (!enabled_via_apicbase)
@@ -1092,7 +1118,7 @@ void lapic_shutdown(void)
 		disable_local_APIC();
 
 
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 /**
@@ -1292,7 +1318,7 @@ void setup_local_APIC(void)
 			value = apic_read(APIC_ISR + i*0x10);
 			for (j = 31; j >= 0; j--) {
 				if (value & (1<<j)) {
-					ack_APIC_irq();
+					__ack_APIC_irq();
 					acked++;
 				}
 			}
@@ -1821,7 +1847,7 @@ static void __smp_spurious_interrupt(u8 vector)
 	 */
 	v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
 	if (v & (1 << (vector & 0x1f)))
-		ack_APIC_irq();
+		__ack_APIC_irq();
 
 	inc_irq_stat(irq_spurious_count);
 
@@ -2303,12 +2329,12 @@ static int lapic_suspend(void)
 		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
 #endif
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	disable_local_APIC();
 
 	irq_remapping_disable();
 
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 	return 0;
 }
 
@@ -2321,7 +2347,7 @@ static void lapic_resume(void)
 	if (!apic_pm_state.active)
 		return;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 
 	/*
 	 * IO-APIC and PIC have their own resume routines.
@@ -2375,7 +2401,7 @@ static void lapic_resume(void)
 
 	irq_remapping_reenable(x2apic_mode);
 
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 /*
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index f92ab36979a2..b2d77e1775ae 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -57,9 +57,9 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
 {
 	unsigned long flags;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
index ae50d3454d78..2e39774c39d5 100644
--- a/arch/x86/kernel/apic/htirq.c
+++ b/arch/x86/kernel/apic/htirq.c
@@ -57,6 +57,9 @@ static struct irq_chip ht_irq_chip = {
 	.irq_ack		= irq_chip_ack_parent,
 	.irq_set_affinity	= ht_set_affinity,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
+	.irq_move		= move_xxapic_irq,
+#endif
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fd945099fc95..930f33365432 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -76,7 +76,7 @@
 #define for_each_irq_pin(entry, head) \
 	list_for_each_entry(entry, &head, list)
 
-static DEFINE_RAW_SPINLOCK(ioapic_lock);
+static IPIPE_DEFINE_RAW_SPINLOCK(ioapic_lock);
 static DEFINE_MUTEX(ioapic_mutex);
 static unsigned int ioapic_dynirq_base;
 static int ioapic_initialized;
@@ -464,13 +464,19 @@ static void io_apic_sync(struct irq_pin_list *entry)
 	readl(&io_apic->data);
 }
 
+static inline void __mask_ioapic(struct mp_chip_data *data)
+{
+	io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+}
+
 static void mask_ioapic_irq(struct irq_data *irq_data)
 {
 	struct mp_chip_data *data = irq_data->chip_data;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+	ipipe_lock_irq(irq_data->irq);
+	__mask_ioapic(data);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -486,6 +492,7 @@ static void unmask_ioapic_irq(struct irq_data *irq_data)
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
 	__unmask_ioapic(data);
+	ipipe_unlock_irq(irq_data->irq);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -529,14 +536,20 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector)
 	}
 }
 
-static void eoi_ioapic_pin(int vector, struct mp_chip_data *data)
+static void _eoi_ioapic_pin(int vector, struct mp_chip_data *data)
 {
-	unsigned long flags;
 	struct irq_pin_list *entry;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
 	for_each_irq_pin(entry, data->irq_2_pin)
 		__eoi_ioapic_pin(entry->apic, entry->pin, vector);
+}
+
+void eoi_ioapic_pin(int vector, struct mp_chip_data *data)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	_eoi_ioapic_pin(vector, data);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -1222,6 +1235,19 @@ static inline int IO_APIC_irq_trigger(int irq)
 }
 #endif
 
+#ifdef CONFIG_IPIPE
+static void startup_legacy_irq(unsigned irq)
+{
+	unsigned long flags;
+	legacy_pic->mask(irq);
+	flags = hard_local_irq_save();
+	__ipipe_unlock_irq(irq);
+	hard_local_irq_restore(flags);
+}
+#else /* !CONFIG_IPIPE */
+#define startup_legacy_irq(irq) legacy_pic->mask(irq)
+#endif /* !CONFIG_IPIPE */
+
 static void __init setup_IO_APIC_irqs(void)
 {
 	unsigned int ioapic, pin;
@@ -1672,11 +1698,12 @@ static unsigned int startup_ioapic_irq(struct irq_data *data)
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
 	if (irq < nr_legacy_irqs()) {
-		legacy_pic->mask(irq);
+		startup_legacy_irq(irq);
 		if (legacy_pic->irq_pending(irq))
 			was_pending = 1;
 	}
 	__unmask_ioapic(data->chip_data);
+	ipipe_unlock_irq(irq);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
@@ -1684,7 +1711,7 @@ static unsigned int startup_ioapic_irq(struct irq_data *data)
 
 atomic_t irq_mis_count;
 
-#ifdef CONFIG_GENERIC_PENDING_IRQ
+#if defined(CONFIG_GENERIC_PENDING_IRQ) || (defined(CONFIG_IPIPE) && defined(CONFIG_SMP))
 static bool io_apic_level_ack_pending(struct mp_chip_data *data)
 {
 	struct irq_pin_list *entry;
@@ -1766,9 +1793,9 @@ static void ioapic_ack_level(struct irq_data *irq_data)
 {
 	struct irq_cfg *cfg = irqd_cfg(irq_data);
 	unsigned long v;
-	bool masked;
 	int i;
-
+#ifndef CONFIG_IPIPE
+	bool masked;
 	irq_complete_move(cfg);
 	masked = ioapic_irqd_mask(irq_data);
 
@@ -1826,6 +1853,24 @@ static void ioapic_ack_level(struct irq_data *irq_data)
 	}
 
 	ioapic_irqd_unmask(irq_data, masked);
+#else /* CONFIG_IPIPE */
+	/*
+	 * Prevent low priority IRQs grabbed by high priority domains
+	 * from being delayed, waiting for a high priority interrupt
+	 * handler running in a low priority domain to complete.
+	 * This code assumes hw interrupts off.
+	 */
+	i = cfg->vector;
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+	if (unlikely(!(v & (1 << (i & 0x1f))))) {
+		/* IO-APIC erratum: see comment above. */
+		atomic_inc(&irq_mis_count);
+		raw_spin_lock(&ioapic_lock);
+		_eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
+		raw_spin_unlock(&ioapic_lock);
+	}
+	__ack_APIC_irq();
+#endif /* CONFIG_IPIPE */
 }
 
 static void ioapic_ir_ack_level(struct irq_data *irq_data)
@@ -1838,7 +1883,7 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data)
 	 * intr-remapping table entry. Hence for the io-apic
 	 * EOI we use the pin number.
 	 */
-	ack_APIC_irq();
+	__ack_APIC_irq();
 	eoi_ioapic_pin(data->entry.vector, data);
 }
 
@@ -1867,6 +1912,69 @@ static int ioapic_set_affinity(struct irq_data *irq_data,
 	return ret;
 }
 
+#ifdef CONFIG_IPIPE
+
+#ifdef CONFIG_SMP
+
+void move_xxapic_irq(struct irq_data *irq_data)
+{
+	unsigned int irq = irq_data->irq;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct mp_chip_data *data = irq_data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(irq_data);
+
+	if (desc->handle_irq == &handle_edge_irq) {
+		raw_spin_lock(&desc->lock);
+		irq_complete_move(cfg);
+		irq_move_irq(irq_data);
+		raw_spin_unlock(&desc->lock);
+	} else if (desc->handle_irq == &handle_fasteoi_irq) {
+		raw_spin_lock(&desc->lock);
+		irq_complete_move(cfg);
+		if (unlikely(irqd_is_setaffinity_pending(irq_data))) {
+			if (!io_apic_level_ack_pending(data))
+				irq_move_masked_irq(irq_data);
+			unmask_ioapic_irq(irq_data);
+		}
+		raw_spin_unlock(&desc->lock);
+	} else
+		WARN_ON_ONCE(1);
+}
+
+#endif  /* CONFIG_SMP */
+
+static void hold_ioapic_irq(struct irq_data *irq_data)
+{
+	struct mp_chip_data *data = irq_data->chip_data;
+
+	raw_spin_lock(&ioapic_lock);
+	__mask_ioapic(data);
+	raw_spin_unlock(&ioapic_lock);
+	ioapic_ack_level(irq_data);
+}
+
+static void hold_ioapic_ir_irq(struct irq_data *irq_data)
+{
+	struct mp_chip_data *data = irq_data->chip_data;
+
+	raw_spin_lock(&ioapic_lock);
+	__mask_ioapic(data);
+	raw_spin_unlock(&ioapic_lock);
+	ioapic_ir_ack_level(irq_data);
+}
+
+static void release_ioapic_irq(struct irq_data *irq_data)
+{
+	struct mp_chip_data *data = irq_data->chip_data;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	__unmask_ioapic(data);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#endif	/* CONFIG_IPIPE */
+
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name			= "IO-APIC",
 	.irq_startup		= startup_ioapic_irq,
@@ -1876,6 +1984,13 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.irq_eoi		= ioapic_ack_level,
 	.irq_set_affinity	= ioapic_set_affinity,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+#ifdef CONFIG_IPIPE
+#ifdef CONFIG_SMP
+	.irq_move		= move_xxapic_irq,
+#endif
+	.irq_hold		= hold_ioapic_irq,
+	.irq_release		= release_ioapic_irq,
+#endif
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
@@ -1888,6 +2003,13 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
 	.irq_eoi		= ioapic_ir_ack_level,
 	.irq_set_affinity	= ioapic_set_affinity,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+#ifdef CONFIG_IPIPE
+#ifdef CONFIG_SMP
+	.irq_move		= move_xxapic_irq,
+#endif
+	.irq_hold		= hold_ioapic_ir_irq,
+	.irq_release		= release_ioapic_irq,
+#endif
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
@@ -1919,23 +2041,29 @@ static inline void init_IO_APIC_traps(void)
 
 static void mask_lapic_irq(struct irq_data *data)
 {
-	unsigned long v;
+	unsigned long v, flags;
 
+	flags = hard_cond_local_irq_save();
+	ipipe_lock_irq(data->irq);
 	v = apic_read(APIC_LVT0);
 	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+	hard_cond_local_irq_restore(flags);
 }
 
 static void unmask_lapic_irq(struct irq_data *data)
 {
-	unsigned long v;
+	unsigned long v, flags;
 
+	flags = hard_cond_local_irq_save();
 	v = apic_read(APIC_LVT0);
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+	ipipe_unlock_irq(data->irq);
+	hard_cond_local_irq_restore(flags);
 }
 
 static void ack_lapic_irq(struct irq_data *data)
 {
-	ack_APIC_irq();
+	__ack_APIC_irq();
 }
 
 static struct irq_chip lapic_chip __read_mostly = {
@@ -1943,6 +2071,9 @@ static struct irq_chip lapic_chip __read_mostly = {
 	.irq_mask	= mask_lapic_irq,
 	.irq_unmask	= unmask_lapic_irq,
 	.irq_ack	= ack_lapic_irq,
+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
+	.irq_move	= move_xxapic_irq,
+#endif
 };
 
 static void lapic_register_intr(int irq)
@@ -2062,7 +2193,7 @@ static inline void __init check_timer(void)
 	/*
 	 * get/set the timer IRQ vector:
 	 */
-	legacy_pic->mask(0);
+	startup_legacy_irq(0);
 
 	/*
 	 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2159,6 +2290,10 @@ static inline void __init check_timer(void)
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
 
 	lapic_register_intr(0);
+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_64)
+	irq_to_desc(0)->ipipe_ack = __ipipe_ack_edge_irq;
+	irq_to_desc(0)->ipipe_end = __ipipe_nop_irq;
+#endif
 	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
 	legacy_pic->unmask(0);
 
@@ -2167,7 +2302,7 @@ static inline void __init check_timer(void)
 		goto out;
 	}
 	local_irq_disable();
-	legacy_pic->mask(0);
+	startup_legacy_irq(0);
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
 	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
 
@@ -2515,6 +2650,21 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
 	return 0;
 }
 
+#ifdef CONFIG_IPIPE
+unsigned int __ipipe_get_ioapic_irq_vector(int irq)
+{
+	if (irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS)
+		return ipipe_apic_irq_vector(irq);
+	else if (irq == IRQ_MOVE_CLEANUP_VECTOR)
+		return irq;
+	else {
+		if (irq_cfg(irq) == NULL)
+			return ISA_IRQ_VECTOR(irq); /* Assume ISA. */
+		return irq_cfg(irq)->vector;
+	}
+}
+#endif /* CONFIG_IPIPE */
+
 /*
  * This function currently is only a helper for the i386 smp boot process where
  * we need to reprogram the ioredtbls to cater for the cpus which have come online
@@ -2956,7 +3106,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
 		mp_setup_entry(cfg, data, info->ioapic_entry);
 	mp_register_handler(virq, data->trigger);
 	if (virq < nr_legacy_irqs())
-		legacy_pic->mask(virq);
+		startup_legacy_irq(virq);
 	local_irq_restore(flags);
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 62071569bd50..7aed768eb166 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -28,12 +28,12 @@ void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
 	 * to an arbitrary mask, so I do a unicast to each CPU instead.
 	 * - mbligh
 	 */
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	for_each_cpu(query_cpu, mask) {
 		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
 				query_cpu), vector, APIC_DEST_PHYSICAL);
 	}
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
@@ -45,14 +45,14 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
 
 	/* See Hack comment above */
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	for_each_cpu(query_cpu, mask) {
 		if (query_cpu == this_cpu)
 			continue;
 		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
 				 query_cpu), vector, APIC_DEST_PHYSICAL);
 	}
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 #ifdef CONFIG_X86_32
@@ -69,12 +69,12 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
 	 * should be modified to do 1 message per cluster ID - mbligh
 	 */
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	for_each_cpu(query_cpu, mask)
 		__default_send_IPI_dest_field(
 			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
 			vector, apic->dest_logical);
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
@@ -86,7 +86,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
 
 	/* See Hack comment above */
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	for_each_cpu(query_cpu, mask) {
 		if (query_cpu == this_cpu)
 			continue;
@@ -94,7 +94,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
 			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
 			vector, apic->dest_logical);
 		}
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 /*
@@ -108,10 +108,10 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
 	if (!mask)
 		return;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
 	__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 void default_send_IPI_allbutself(int vector)
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 5f1feb6854af..4f8c4ed48d3c 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -64,6 +64,9 @@ static struct irq_chip pci_msi_controller = {
 	.irq_ack		= irq_chip_ack_parent,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
 	.irq_compose_msi_msg	= irq_msi_compose_msg,
+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
+	.irq_move		= move_xxapic_irq,
+#endif
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
@@ -153,6 +156,9 @@ static struct irq_chip pci_msi_ir_controller = {
 	.irq_ack		= irq_chip_ack_parent,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
 	.irq_set_vcpu_affinity	= irq_chip_set_vcpu_affinity_parent,
+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
+	.irq_move		= move_xxapic_irq,
+#endif
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
@@ -186,6 +192,9 @@ static struct irq_chip dmar_msi_controller = {
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
 	.irq_compose_msi_msg	= irq_msi_compose_msg,
 	.irq_write_msi_msg	= dmar_msi_write_msg,
+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
+	.irq_move		= move_xxapic_irq,
+#endif
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
@@ -276,6 +285,9 @@ static struct irq_chip hpet_msi_controller = {
 	.irq_retrigger = irq_chip_retrigger_hierarchy,
 	.irq_compose_msi_msg = irq_msi_compose_msg,
 	.irq_write_msi_msg = hpet_msi_write_msg,
+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
+	.irq_move = move_xxapic_irq,
+#endif
 	.flags = IRQCHIP_SKIP_SET_WAKE,
 };
 
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 592e260ba05b..dd9c1b2f91bd 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -29,7 +29,7 @@ struct apic_chip_data {
 };
 
 struct irq_domain *x86_vector_domain;
-static DEFINE_RAW_SPINLOCK(vector_lock);
+static IPIPE_DEFINE_RAW_SPINLOCK(vector_lock);
 static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef	CONFIG_X86_IO_APIC
@@ -464,8 +464,13 @@ static void __setup_vector_irq(int cpu)
 		vector = data->cfg.vector;
 		per_cpu(vector_irq, cpu)[vector] = desc;
 	}
+
 	/* Mark the free vectors */
 	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		/* I-pipe requires initialized vector_irq for system vectors */
+		if (test_bit(vector, used_vectors))
+			continue;
+
 		desc = per_cpu(vector_irq, cpu)[vector];
 		if (IS_ERR_OR_NULL(desc))
 			continue;
@@ -483,7 +488,9 @@ void setup_vector_irq(int cpu)
 {
 	int irq;
 
+#ifndef CONFIG_IPIPE
 	lockdep_assert_held(&vector_lock);
+#endif
 	/*
 	 * On most of the platforms, legacy PIC delivers the interrupts on the
 	 * boot cpu. But there are certain platforms where PIC interrupts are
@@ -513,9 +520,11 @@ static int apic_retrigger_irq(struct irq_data *irq_data)
 
 void apic_ack_edge(struct irq_data *data)
 {
+#ifndef CONFIG_IPIPE	
 	irq_complete_move(irqd_cfg(data));
 	irq_move_irq(data);
-	ack_APIC_irq();
+#endif /* !CONFIG_IPIPE */
+	__ack_APIC_irq();
 }
 
 static int apic_set_affinity(struct irq_data *irq_data,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cc8311c4d298..3f66e2e69787 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -34,7 +34,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
 
 	x2apic_wrmsr_fence();
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 
 	this_cpu = smp_processor_id();
 
@@ -71,7 +71,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
 		cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
 	}
 
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 662e9150ea6f..fe7cfcedc884 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -45,7 +45,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
 
 	x2apic_wrmsr_fence();
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 
 	this_cpu = smp_processor_id();
 	for_each_cpu(query_cpu, mask) {
@@ -54,7 +54,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
 				       vector, APIC_DEST_PHYSICAL);
 	}
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 439df975bc7a..38443fd6fc9a 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -32,6 +32,9 @@ void common(void) {
 	OFFSET(TI_flags, thread_info, flags);
 	OFFSET(TI_status, thread_info, status);
 	OFFSET(TI_addr_limit, thread_info, addr_limit);
+#ifdef CONFIG_IPIPE
+	OFFSET(TI_ipipe, thread_info, ipipe_flags);
+#endif
 
 	BLANK();
 	OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b12c0287d6cf..eb2ee1d1a7de 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1448,6 +1448,7 @@ void syscall_init(void)
 DEFINE_PER_CPU(struct orig_ist, orig_ist);
 
 static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
+#ifndef CONFIG_IPIPE
 DEFINE_PER_CPU(int, debug_stack_usage);
 
 int is_debug_stack(unsigned long addr)
@@ -1475,6 +1476,7 @@ void debug_stack_reset(void)
 		load_current_idt();
 }
 NOKPROBE_SYMBOL(debug_stack_reset);
+#endif /* !CONFIG_IPIPE */
 
 #else	/* CONFIG_X86_64 */
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7b8c8c838191..85bba8bc710d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -47,6 +47,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
+#include <asm/traps.h>
 
 #include "mce-internal.h"
 
@@ -1692,6 +1693,16 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 void (*machine_check_vector)(struct pt_regs *, long error_code) =
 						unexpected_machine_check;
 
+#ifdef CONFIG_IPIPE
+static int mce_trampoline(struct pt_regs *regs, long error_code)
+{
+	return IPIPE_DO_TRAP(machine_check_vector, X86_TRAP_MC, regs, error_code);
+}
+
+int (*__ipipe_machine_check_vector)(struct pt_regs *, long error_code) =
+	mce_trampoline;
+#endif
+
 dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
 {
 	machine_check_vector(regs, error_code);
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index f8c81ba0b465..9be0d9cf619c 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -18,7 +18,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base,
 
 	arr = CX86_ARR_BASE + (reg << 1) + reg;	/* avoid multiplication by 3 */
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 
 	ccr3 = getCx86(CX86_CCR3);
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
@@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base,
 	rcr = getCx86(CX86_RCR_BASE + reg);
 	setCx86(CX86_CCR3, ccr3);			/* disable MAPEN */
 
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 
 	shift = ((unsigned char *) base)[1] & 0x0f;
 	*base >>= PAGE_SHIFT;
@@ -178,6 +178,7 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
 			  unsigned long size, mtrr_type type)
 {
 	unsigned char arr, arr_type, arr_size;
+	unsigned long flags;
 
 	arr = CX86_ARR_BASE + (reg << 1) + reg;	/* avoid multiplication by 3 */
 
@@ -221,6 +222,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
 		}
 	}
 
+	flags = hard_local_irq_save();
+
 	prepare_set();
 
 	base <<= PAGE_SHIFT;
@@ -230,6 +233,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
 	setCx86(CX86_RCR_BASE + reg, arr_type);
 
 	post_set();
+
+	hard_local_irq_restore(flags);
 }
 
 typedef struct {
@@ -247,8 +252,10 @@ static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 };
 
 static void cyrix_set_all(void)
 {
+	unsigned long flags;
 	int i;
 
+	flags = hard_local_irq_save();
 	prepare_set();
 
 	/* the CCRs are not contiguous */
@@ -263,6 +270,7 @@ static void cyrix_set_all(void)
 	}
 
 	post_set();
+	hard_local_irq_restore(flags);
 }
 
 static const struct mtrr_ops cyrix_mtrr_ops = {
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 136ae86f4f5f..57e2303ae059 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -786,7 +786,7 @@ static void generic_set_all(void)
 	unsigned long mask, count;
 	unsigned long flags;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	prepare_set();
 
 	/* Actually set the state */
@@ -796,7 +796,7 @@ static void generic_set_all(void)
 	pat_init();
 
 	post_set();
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 
 	/* Use the atomic bitops to update the global mask */
 	for (count = 0; count < sizeof mask * 8; ++count) {
@@ -820,12 +820,13 @@ static void generic_set_all(void)
 static void generic_set_mtrr(unsigned int reg, unsigned long base,
 			     unsigned long size, mtrr_type type)
 {
-	unsigned long flags;
+	unsigned long rflags, vflags;
 	struct mtrr_var_range *vr;
 
 	vr = &mtrr_state.var_ranges[reg];
 
-	local_irq_save(flags);
+	local_irq_save(vflags);
+	rflags = hard_local_irq_save();
 	prepare_set();
 
 	if (size == 0) {
@@ -846,7 +847,8 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
 	}
 
 	post_set();
-	local_irq_restore(flags);
+	hard_local_irq_restore(rflags);
+	local_irq_restore(vflags);
 }
 
 int generic_validate_add_page(unsigned long base, unsigned long size,
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index b322325424bc..7461138c5cf2 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -29,30 +29,13 @@ union fpregs_state init_fpstate __read_mostly;
  *
  *   - to debug kernel_fpu_begin()/end() correctness
  */
-static DEFINE_PER_CPU(bool, in_kernel_fpu);
+DEFINE_PER_CPU(bool, in_kernel_fpu);
 
 /*
  * Track which context is using the FPU on the CPU:
  */
 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
 
-static void kernel_fpu_disable(void)
-{
-	WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
-	this_cpu_write(in_kernel_fpu, true);
-}
-
-static void kernel_fpu_enable(void)
-{
-	WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
-	this_cpu_write(in_kernel_fpu, false);
-}
-
-static bool kernel_fpu_disabled(void)
-{
-	return this_cpu_read(in_kernel_fpu);
-}
-
 static bool interrupted_kernel_fpu_idle(void)
 {
 	return !kernel_fpu_disabled();
@@ -90,9 +73,11 @@ EXPORT_SYMBOL(irq_fpu_usable);
 void __kernel_fpu_begin(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
+	unsigned long flags;
 
 	WARN_ON_FPU(!irq_fpu_usable());
 
+	flags = hard_cond_local_irq_save();
 	kernel_fpu_disable();
 
 	if (fpu->fpregs_active) {
@@ -104,17 +89,21 @@ void __kernel_fpu_begin(void)
 	} else {
 		this_cpu_write(fpu_fpregs_owner_ctx, NULL);
 	}
+	hard_cond_local_irq_restore(flags);
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
 
 void __kernel_fpu_end(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
+	unsigned long flags;
 
+	flags = hard_cond_local_irq_save();
 	if (fpu->fpregs_active)
 		copy_kernel_to_fpregs(&fpu->state);
 
 	kernel_fpu_enable();
+	hard_cond_local_irq_restore(flags);
 }
 EXPORT_SYMBOL(__kernel_fpu_end);
 
@@ -168,15 +157,17 @@ EXPORT_SYMBOL_GPL(irq_ts_restore);
  */
 void fpu__save(struct fpu *fpu)
 {
+	unsigned long flags;
+	
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
-	preempt_disable();
+	flags = hard_preempt_disable();
 	if (fpu->fpregs_active) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
 			copy_kernel_to_fpregs(&fpu->state);
 		}
 	}
-	preempt_enable();
+	hard_preempt_enable(flags);
 }
 EXPORT_SYMBOL_GPL(fpu__save);
 
@@ -215,6 +206,8 @@ EXPORT_SYMBOL_GPL(fpstate_init);
  */
 static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
+	unsigned long flags;
+
 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
 
 	/*
@@ -239,13 +232,13 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 * It shouldn't be an issue as even FNSAVE is plenty
 	 * fast in terms of critical section length.
 	 */
-	preempt_disable();
+	flags = hard_preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
 		memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
 
 		copy_kernel_to_fpregs(&src_fpu->state);
 	}
-	preempt_enable();
+	hard_preempt_enable(flags);
 }
 
 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
@@ -346,6 +339,9 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
  */
 void fpu__restore(struct fpu *fpu)
 {
+	unsigned long flags;
+
+	flags = hard_local_irq_save();
 	fpu__activate_curr(fpu);
 
 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
@@ -353,6 +349,7 @@ void fpu__restore(struct fpu *fpu)
 	fpregs_activate(fpu);
 	copy_kernel_to_fpregs(&fpu->state);
 	kernel_fpu_enable();
+	hard_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(fpu__restore);
 
@@ -365,21 +362,34 @@ EXPORT_SYMBOL_GPL(fpu__restore);
  * a state-restore is coming: either an explicit one,
  * or a reschedule.
  */
+
+#ifdef CONFIG_IPIPE
+#define FWAIT_PROLOGUE "sti\n"
+#define FWAIT_EPILOGUE "cli\n"
+#else
+#define FWAIT_PROLOGUE
+#define FWAIT_EPILOGUE
+#endif
+
 void fpu__drop(struct fpu *fpu)
 {
-	preempt_disable();
+	unsigned long flags;
+	
+	flags = hard_preempt_disable();
 
 	if (fpu->fpregs_active) {
 		/* Ignore delayed exceptions from user space */
-		asm volatile("1: fwait\n"
+		asm volatile(FWAIT_PROLOGUE
+			     "1: fwait\n"
 			     "2:\n"
+			     FWAIT_EPILOGUE
 			     _ASM_EXTABLE(1b, 2b));
 		fpregs_deactivate(fpu);
 	}
 
 	fpu->fpstate_active = 0;
 
-	preempt_enable();
+	hard_preempt_enable(flags);
 }
 
 /*
@@ -404,17 +414,21 @@ static inline void copy_init_fpstate_to_fpregs(void)
  */
 void fpu__clear(struct fpu *fpu)
 {
+	unsigned long flags;
+	
 	WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
 
 	if (!static_cpu_has(X86_FEATURE_FPU)) {
 		/* FPU state will be reallocated lazily at the first use. */
 		fpu__drop(fpu);
 	} else {
+		flags = hard_local_irq_save();
 		if (!fpu->fpstate_active) {
 			fpu__activate_curr(fpu);
 			user_fpu_begin();
 		}
 		copy_init_fpstate_to_fpregs();
+		hard_local_irq_restore(flags);
 	}
 }
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 3fdc1e53aaac..c7f080f28fa1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -11,6 +11,7 @@
 #include <linux/cpu.h>
 #include <linux/pm.h>
 #include <linux/io.h>
+#include <linux/ipipe_tickdev.h>
 
 #include <asm/cpufeature.h>
 #include <asm/irqdomain.h>
@@ -136,6 +137,10 @@ int is_hpet_enabled(void)
 }
 EXPORT_SYMBOL_GPL(is_hpet_enabled);
 
+#ifdef CONFIG_IPIPE
+static DEFINE_PER_CPU(struct ipipe_timer, hpet_itimer);
+#endif
+
 static void _hpet_print_config(const char *function, int line)
 {
 	u32 i, timers, l, h;
@@ -285,6 +290,10 @@ static void hpet_legacy_clockevent_register(void)
 	 * Start hpet with the boot cpu mask and make it
 	 * global after the IO_APIC has been initialized.
 	 */
+#if defined(CONFIG_IPIPE) && !defined(CONFIG_SMP)
+	hpet_clockevent.ipipe_timer = this_cpu_ptr(&hpet_itimer);
+	hpet_clockevent.ipipe_timer->irq = hpet_clockevent.irq;
+#endif
 	hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
 	clockevents_config_and_register(&hpet_clockevent, hpet_freq,
 					HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
@@ -581,6 +590,9 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 	evt->tick_resume = hpet_msi_resume;
 	evt->set_next_event = hpet_msi_next_event;
 	evt->cpumask = cpumask_of(hdev->cpu);
+#ifdef CONFIG_IPIPE
+	evt->ipipe_timer = &per_cpu(hpet_itimer, cpu);
+#endif
 
 	clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA,
 					0x7FFFFFFF);
@@ -647,6 +659,21 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
 			break;
 	}
 
+#ifdef CONFIG_IPIPE
+	/*
+	 * Only register ipipe_timers if there is one for each cpu
+	 */
+	if (num_timers_used == num_possible_cpus()) {
+		for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
+			struct hpet_dev *hdev = &hpet_devs[i];
+			if (hdev->flags & HPET_DEV_VALID) {
+				hdev->evt.ipipe_timer = &per_cpu(hpet_itimer, hdev->cpu);
+				hdev->evt.ipipe_timer->irq = hdev->irq;
+			}
+		}
+	}
+#endif /* CONFIG_IPIPE */
+
 	printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
 		num_timers, num_timers_used);
 }
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index efb82f07b29c..c8fa6b63439f 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -26,6 +26,10 @@ void __init setup_pit_timer(void)
 #ifndef CONFIG_X86_64
 static int __init init_pit_clocksource(void)
 {
+#ifdef CONFIG_IPIPE
+	if (cpu_has_tsc == 0 && is_hpet_enabled() == 0)
+		return clocksource_i8253_init();
+#endif /* CONFIG_IPIPE */
 	 /*
 	  * Several reasons not to register PIT as a clocksource:
 	  *
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 4e3b8a587c88..cf1fb2d3f351 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -31,7 +31,7 @@
 static void init_8259A(int auto_eoi);
 
 static int i8259A_auto_eoi;
-DEFINE_RAW_SPINLOCK(i8259A_lock);
+IPIPE_DEFINE_RAW_SPINLOCK(i8259A_lock);
 
 /*
  * 8259A PIC functions to handle ISA devices:
@@ -59,6 +59,7 @@ static void mask_8259A_irq(unsigned int irq)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
+	ipipe_lock_irq(irq);
 	cached_irq_mask |= mask;
 	if (irq & 8)
 		outb(cached_slave_mask, PIC_SLAVE_IMR);
@@ -74,15 +75,18 @@ static void disable_8259A_irq(struct irq_data *data)
 
 static void unmask_8259A_irq(unsigned int irq)
 {
-	unsigned int mask = ~(1 << irq);
+	unsigned int mask = (1 << irq);
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask &= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
+	if (cached_irq_mask & mask) {
+		cached_irq_mask &= ~mask;
+		if (irq & 8)
+			outb(cached_slave_mask, PIC_SLAVE_IMR);
+		else
+			outb(cached_master_mask, PIC_MASTER_IMR);
+		ipipe_unlock_irq(irq);
+	}
 	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
@@ -168,6 +172,18 @@ static void mask_and_ack_8259A(struct irq_data *data)
 	 */
 	if (cached_irq_mask & irqmask)
 		goto spurious_8259A_irq;
+#ifdef CONFIG_IPIPE
+	if (irq == 0) {
+		/*
+		 * Fast timer ack -- don't mask (unless supposedly
+		 * spurious). We trace outb's in order to detect
+		 * broken hardware inducing large delays.
+		 */
+		outb(0x60, PIC_MASTER_CMD);	/* Specific EOI to master. */
+		raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+		return;
+	}
+#endif /* CONFIG_IPIPE */
 	cached_irq_mask |= irqmask;
 
 handle_real_irq:
diff --git a/arch/x86/kernel/ipipe.c b/arch/x86/kernel/ipipe.c
new file mode 100644
index 000000000000..0b27f1f1d95d
--- /dev/null
+++ b/arch/x86/kernel/ipipe.c
@@ -0,0 +1,517 @@
+/*   -*- linux-c -*-
+ *   linux/arch/x86/kernel/ipipe.c
+ *
+ *   Copyright (C) 2002-2012 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Architecture-dependent I-PIPE support for x86.
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+#include <linux/clockchips.h>
+#include <linux/kprobes.h>
+#include <linux/mm.h>
+#include <linux/kgdb.h>
+#include <linux/ipipe_tickdev.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/processor.h>
+#include <asm/atomic.h>
+#include <asm/hw_irq.h>
+#include <asm/irq.h>
+#include <asm/desc.h>
+#include <asm/io.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/tlbflush.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_X86_IO_APIC
+#include <asm/io_apic.h>
+#endif	/* CONFIG_X86_IO_APIC */
+#include <asm/apic.h>
+#endif	/* CONFIG_X86_LOCAL_APIC */
+#include <asm/fpu/internal.h>
+#include <asm/traps.h>
+#include <asm/tsc.h>
+#include <asm/mce.h>
+#include <asm/mmu_context.h>
+
+DEFINE_PER_CPU(unsigned long, __ipipe_cr2);
+EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_cr2);
+
+int ipipe_get_sysinfo(struct ipipe_sysinfo *info)
+{
+	info->sys_nr_cpus = num_online_cpus();
+	info->sys_cpu_freq = __ipipe_cpu_freq;
+	info->sys_hrtimer_irq = per_cpu(ipipe_percpu.hrtimer_irq, 0);
+	info->sys_hrtimer_freq = __ipipe_hrtimer_freq;
+	info->sys_hrclock_freq = __ipipe_hrclock_freq;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipipe_get_sysinfo);
+
+#ifdef CONFIG_X86_UV
+asmlinkage void uv_bau_message_interrupt(struct pt_regs *regs);
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
+asmlinkage void smp_threshold_interrupt(void);
+#endif
+
+void __ipipe_do_IRQ(unsigned int irq, void *cookie)
+{
+	void do_root_irq(struct pt_regs *regs,
+			 void (*handler)(struct pt_regs *regs));
+	void (*handler)(struct pt_regs *regs);
+	struct pt_regs *regs;
+
+	regs = raw_cpu_ptr(&ipipe_percpu.tick_regs);
+	regs->orig_ax = ~__ipipe_get_irq_vector(irq);
+	handler = (typeof(handler))cookie;
+	do_root_irq(regs, handler);
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+static void __ipipe_noack_apic(struct irq_desc *desc)
+{
+}
+
+static void __ipipe_ack_apic(struct irq_desc *desc)
+{
+	__ack_APIC_irq();
+}
+
+#endif	/* CONFIG_X86_LOCAL_APIC */
+
+/*
+ * __ipipe_enable_pipeline() -- We are running on the boot CPU, hw
+ * interrupts are off, and secondary CPUs are still lost in space.
+ */
+void __init __ipipe_enable_pipeline(void)
+{
+	unsigned int irq;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+	/* Map the APIC system vectors. */
+
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR),
+			  __ipipe_do_IRQ, smp_apic_timer_interrupt,
+			  __ipipe_ack_apic);
+
+#ifdef CONFIG_HAVE_KVM
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(POSTED_INTR_WAKEUP_VECTOR),
+			  __ipipe_do_IRQ, smp_kvm_posted_intr_wakeup_ipi,
+			  __ipipe_ack_apic);
+
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(POSTED_INTR_VECTOR),
+			  __ipipe_do_IRQ, smp_kvm_posted_intr_ipi,
+			  __ipipe_ack_apic);
+#endif
+
+#if defined(CONFIG_X86_MCE_AMD) && defined(CONFIG_X86_64)
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(DEFERRED_ERROR_VECTOR),
+			  __ipipe_do_IRQ, smp_deferred_error_interrupt,
+			  __ipipe_ack_apic);
+#endif
+	
+#ifdef CONFIG_X86_UV
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(UV_BAU_MESSAGE),
+			  __ipipe_do_IRQ, uv_bau_message_interrupt,
+			  __ipipe_ack_apic);
+#endif /* CONFIG_X86_UV */
+
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR),
+			  __ipipe_do_IRQ, smp_spurious_interrupt,
+			  __ipipe_noack_apic);
+
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(ERROR_APIC_VECTOR),
+			  __ipipe_do_IRQ, smp_error_interrupt,
+			  __ipipe_ack_apic);
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(THERMAL_APIC_VECTOR),
+			  __ipipe_do_IRQ, smp_thermal_interrupt,
+			  __ipipe_ack_apic);
+#endif /* CONFIG_X86_THERMAL_VECTOR */
+
+#ifdef CONFIG_X86_MCE_THRESHOLD
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(THRESHOLD_APIC_VECTOR),
+			  __ipipe_do_IRQ, smp_threshold_interrupt,
+			  __ipipe_ack_apic);
+#endif /* CONFIG_X86_MCE_THRESHOLD */
+
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(X86_PLATFORM_IPI_VECTOR),
+			  __ipipe_do_IRQ, smp_x86_platform_ipi,
+			  __ipipe_ack_apic);
+
+	/*
+	 * We expose two high priority APIC vectors the head domain
+	 * may use respectively for hires timing and SMP rescheduling.
+	 * We should never receive them in the root domain.
+	 */
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(IPIPE_HRTIMER_VECTOR),
+			  __ipipe_do_IRQ, smp_spurious_interrupt,
+			  __ipipe_ack_apic);
+
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(IPIPE_RESCHEDULE_VECTOR),
+			  __ipipe_do_IRQ, smp_spurious_interrupt,
+			  __ipipe_ack_apic);
+
+#ifdef CONFIG_IRQ_WORK
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(IRQ_WORK_VECTOR),
+			  __ipipe_do_IRQ, smp_irq_work_interrupt,
+			  __ipipe_ack_apic);
+#endif /* CONFIG_IRQ_WORK */
+
+#endif	/* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_SMP
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(RESCHEDULE_VECTOR),
+			  __ipipe_do_IRQ, smp_reschedule_interrupt,
+			  __ipipe_ack_apic);
+
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR),
+			  __ipipe_do_IRQ, smp_call_function_interrupt,
+			  __ipipe_ack_apic);
+
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(CALL_FUNCTION_SINGLE_VECTOR),
+			  __ipipe_do_IRQ, smp_call_function_single_interrupt,
+			  __ipipe_ack_apic);
+
+	ipipe_request_irq(ipipe_root_domain,
+			  IRQ_MOVE_CLEANUP_VECTOR,
+			  __ipipe_do_IRQ, smp_irq_move_cleanup_interrupt,
+			  __ipipe_ack_apic);
+
+	ipipe_request_irq(ipipe_root_domain,
+			  ipipe_apic_vector_irq(REBOOT_VECTOR),
+			  __ipipe_do_IRQ, smp_reboot_interrupt,
+			  __ipipe_ack_apic);
+#endif	/* CONFIG_SMP */
+
+	/*
+	 * Finally, request the remaining ISA and IO-APIC
+	 * interrupts. Interrupts which have already been requested
+	 * will just beget a silent -EBUSY error, that's ok.
+	 */
+	for (irq = 0; irq < NR_IRQS; irq++)
+		ipipe_request_irq(ipipe_root_domain, irq,
+				  __ipipe_do_IRQ, do_IRQ,
+				  NULL);
+}
+
+#ifdef CONFIG_SMP
+
+void ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask)
+{
+	if (ipipe_virtual_irq_p(irq) ||
+	    irq_get_chip(irq)->irq_set_affinity == NULL)
+		return;
+
+	cpumask_and(&cpumask, &cpumask, cpu_online_mask);
+	if (WARN_ON_ONCE(cpumask_empty(&cpumask)))
+		return;
+
+	irq_get_chip(irq)->irq_set_affinity(irq_get_irq_data(irq), &cpumask, true);
+}
+EXPORT_SYMBOL_GPL(ipipe_set_irq_affinity);
+
+void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask)
+{
+	unsigned long flags;
+
+	flags = hard_local_irq_save();
+
+	cpumask_clear_cpu(ipipe_processor_id(), &cpumask);
+	if (likely(!cpumask_empty(&cpumask)))
+		apic->send_IPI_mask(&cpumask, ipipe_apic_irq_vector(ipi));
+
+	hard_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ipipe_send_ipi);
+
+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd)
+{
+	unsigned int ipi = IPIPE_CRITICAL_IPI;
+
+	ipd->irqs[ipi].ackfn = __ipipe_ack_apic;
+	ipd->irqs[ipi].handler = __ipipe_do_critical_sync;
+	ipd->irqs[ipi].cookie = NULL;
+	ipd->irqs[ipi].control = IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK;
+}
+
+#endif	/* CONFIG_SMP */
+
+void __ipipe_halt_root(int use_mwait)
+{
+	struct ipipe_percpu_domain_data *p;
+
+	/* Emulate sti+hlt sequence over the root domain. */
+
+	hard_local_irq_disable();
+
+	p = ipipe_this_cpu_root_context();
+
+	trace_hardirqs_on();
+	__clear_bit(IPIPE_STALL_FLAG, &p->status);
+
+	if (unlikely(__ipipe_ipending_p(p))) {
+		__ipipe_sync_stage();
+		hard_local_irq_enable();
+	} else {
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+		ipipe_trace_end(0x8000000E);
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+		if (use_mwait)
+			asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
+				     :: "a" (0), "c" (0));
+		else
+			asm volatile("sti; hlt": : :"memory");
+	}
+}
+EXPORT_SYMBOL_GPL(__ipipe_halt_root);
+
+int __ipipe_trap_prologue(struct pt_regs *regs, int trapnr, unsigned long *flags)
+{
+	bool hard_irqs_off = hard_irqs_disabled();
+	struct ipipe_domain *ipd;
+	unsigned long cr2;
+
+#ifdef CONFIG_KGDB
+	/* Fixup kgdb-own faults immediately. */
+	if (__ipipe_probe_access) {
+		const struct exception_table_entry *fixup =
+			search_exception_tables(regs->ip);
+		BUG_ON(!fixup);
+		regs->ip = (unsigned long)&fixup->fixup + fixup->fixup;
+		return 1;
+	}
+#endif /* CONFIG_KGDB */
+
+	if (trapnr == X86_TRAP_PF)
+		cr2 = native_read_cr2();
+
+#ifdef CONFIG_KGDB
+	/*
+	 * Catch int1 and int3 for kgdb here. They may trigger over
+	 * inconsistent states even when the root domain is active.
+	 */
+	if (kgdb_io_module_registered &&
+	    (trapnr == X86_TRAP_DB || trapnr == X86_TRAP_BP)) {
+		unsigned int condition = 0;
+
+		if (trapnr == X86_TRAP_DB) {
+			if (atomic_read(&kgdb_cpu_doing_single_step) == -1 &&
+			    test_thread_flag(TIF_SINGLESTEP))
+				goto skip_kgdb;
+			get_debugreg(condition, 6);
+		}
+		if (!user_mode(regs) &&
+		    !kgdb_handle_exception(trapnr, SIGTRAP, condition, regs))
+			return 1;
+	}
+skip_kgdb:
+#endif /* CONFIG_KGDB */
+
+	if (unlikely(__ipipe_notify_trap(trapnr, regs)))
+		return 1;
+
+	if (likely(ipipe_root_p)) {
+		/*
+		 * If no head domain is installed, or in case we faulted in
+		 * the iret path of x86-32, regs->flags does not match the root
+		 * domain state. The fault handler may evaluate it. So fix this
+		 * up with the current state.
+		 */
+		local_save_flags(*flags);
+		__ipipe_fixup_if(raw_irqs_disabled_flags(*flags), regs);
+
+		/*
+		 * Sync Linux interrupt state with hardware state on
+		 * entry.
+		 */
+		if (hard_irqs_off)
+			local_irq_disable();
+	} else {
+		/*
+		 * Use the flags of the faulting context when restoring later.
+		 */
+		*flags = regs->flags;
+
+		/*
+		 * Detect unhandled faults over the head domain,
+		 * switching to root so that it can handle the fault
+		 * cleanly.
+		 */
+		hard_local_irq_disable();
+		ipd = __ipipe_current_domain;
+		__ipipe_set_current_domain(ipipe_root_domain);
+
+		/* Sync Linux interrupt state with hardware state on entry. */
+		if (hard_irqs_off)
+			local_irq_disable();
+
+		ipipe_trace_panic_freeze();
+
+		/* Always warn about user land and unfixable faults. */
+		if (user_mode(regs) ||
+		    !search_exception_tables(instruction_pointer(regs))) {
+			printk(KERN_ERR "BUG: Unhandled exception over domain"
+			       " %s at 0x%lx - switching to ROOT\n",
+			       ipd->name, instruction_pointer(regs));
+			dump_stack();
+			ipipe_trace_panic_dump();
+#ifdef CONFIG_IPIPE_DEBUG
+		/* Also report fixable ones when debugging is enabled. */
+		} else {
+			printk(KERN_WARNING "WARNING: Fixable exception over "
+			       "domain %s at 0x%lx - switching to ROOT\n",
+			       ipd->name, instruction_pointer(regs));
+			dump_stack();
+			ipipe_trace_panic_dump();
+#endif /* CONFIG_IPIPE_DEBUG */
+		}
+	}
+
+	if (trapnr == X86_TRAP_PF)
+		write_cr2(cr2);
+
+	return 0;
+}
+
+int __ipipe_handle_irq(struct pt_regs *regs)
+{
+	struct ipipe_percpu_data *p = __ipipe_raw_cpu_ptr(&ipipe_percpu);
+	int irq, vector = regs->orig_ax, flags = 0;
+	struct pt_regs *tick_regs;
+	struct irq_desc *desc;
+
+	if (likely(vector < 0)) {
+		vector = ~vector;
+		if (vector >= FIRST_SYSTEM_VECTOR)
+			irq = ipipe_apic_vector_irq(vector);
+		else {
+			desc = __this_cpu_read(vector_irq[vector]);
+			BUG_ON(IS_ERR_OR_NULL(desc));
+			irq = irq_desc_get_irq(desc);
+		}
+	} else { /* Software-generated. */
+		irq = vector;
+		flags = IPIPE_IRQF_NOACK;
+	}
+
+	ipipe_trace_irqbegin(irq, regs);
+
+	/*
+	 * Given our deferred dispatching model for regular IRQs, we
+	 * only record CPU regs for the last timer interrupt, so that
+	 * the timer handler charges CPU times properly. It is assumed
+	 * that no other interrupt handler cares for such information.
+	 */
+	if (irq == p->hrtimer_irq || p->hrtimer_irq == -1) {
+		tick_regs = &p->tick_regs;
+		tick_regs->flags = regs->flags;
+		tick_regs->cs = regs->cs;
+		tick_regs->ip = regs->ip;
+		tick_regs->bp = regs->bp;
+#ifdef CONFIG_X86_64
+		tick_regs->ss = regs->ss;
+		tick_regs->sp = regs->sp;
+#endif
+		if (!__ipipe_root_p)
+			tick_regs->flags &= ~X86_EFLAGS_IF;
+	}
+
+	__ipipe_dispatch_irq(irq, flags);
+
+	if (user_mode(regs) && ipipe_test_thread_flag(TIP_MAYDAY))
+		__ipipe_call_mayday(regs);
+
+	ipipe_trace_irqend(irq, regs);
+	
+	if (!__ipipe_root_p ||
+	    test_bit(IPIPE_STALL_FLAG, &__ipipe_root_status))
+		return 0;
+
+	return 1;
+}
+
+void __ipipe_arch_share_current(int flags)
+{
+	struct task_struct *p = current;
+
+	/*
+	 * Setup a clean extended FPU state for kernel threads.
+	 */
+	if (p->mm == NULL)
+		memcpy(&p->thread.fpu.state, &init_fpstate, xstate_size);
+}
+
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_IPIPE_WANT_CLOCKSOURCE
+u64 __ipipe_get_cs_tsc(void);
+EXPORT_SYMBOL_GPL(__ipipe_get_cs_tsc);
+#endif
+#endif /* CONFIG_X86_32 */
+
+struct task_struct *__switch_to(struct task_struct *prev_p,
+				struct task_struct *next_p);
+EXPORT_SYMBOL_GPL(do_munmap);
+EXPORT_SYMBOL_GPL(__switch_to);
+EXPORT_SYMBOL_GPL(show_stack);
+
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+EXPORT_SYMBOL(tasklist_lock);
+#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */
+
+#if defined(CONFIG_CC_STACKPROTECTOR) && defined(CONFIG_X86_64)
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
+#endif
+
+#ifdef CONFIG_IPIPE_LEGACY
+#ifdef CONFIG_TRACEPOINTS
+EXPORT_TRACEPOINT_SYMBOL_GPL(tlb_flush);
+#endif
+#ifdef CONFIG_PERF_EVENTS
+EXPORT_SYMBOL_GPL(rdpmc_always_available);
+#endif
+#endif
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 9f669fdd2010..36bc5b876e9e 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -51,7 +51,7 @@ void ack_bad_irq(unsigned int irq)
 	 * completely.
 	 * But only ack when the APIC is enabled -AK
 	 */
-	ack_APIC_irq();
+	__ack_APIC_irq();
 }
 
 #define irq_stats(x)		(&per_cpu(irq_stat, x))
@@ -229,12 +229,13 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 	 * IRQs.
 	 */
 
+	desc = __this_cpu_read(vector_irq[vector]);
+	__ipipe_move_root_irq(desc);
 	entering_irq();
 
 	/* entering_irq() tells RCU that we're not quiescent.  Check it. */
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
 
-	desc = __this_cpu_read(vector_irq[vector]);
 
 	if (!handle_irq(desc, regs)) {
 		ack_APIC_irq();
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 206d0b90a3ab..e9f82bfecad7 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -37,29 +37,35 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 	u64 irq_stack_top, irq_stack_bottom;
 	u64 estack_top, estack_bottom;
 	u64 curbase = (u64)task_stack_page(current);
+	unsigned long sp;
 
 	if (user_mode(regs))
 		return;
 
-	if (regs->sp >= curbase + sizeof(struct thread_info) +
+	if (IS_ENABLED(CONFIG_IPIPE))
+		sp = current_stack_pointer();
+	else
+		sp = regs->sp;
+	
+	if (sp >= curbase + sizeof(struct thread_info) +
 				  sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
-	    regs->sp <= curbase + THREAD_SIZE)
+	    sp <= curbase + THREAD_SIZE)
 		return;
 
 	irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) +
 			STACK_TOP_MARGIN;
 	irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr);
-	if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
+	if (sp >= irq_stack_top && sp <= irq_stack_bottom)
 		return;
 
 	oist = this_cpu_ptr(&orig_ist);
 	estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
 	estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
-	if (regs->sp >= estack_top && regs->sp <= estack_bottom)
+	if (sp >= estack_top && sp <= estack_bottom)
 		return;
 
 	WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
-		current->comm, curbase, regs->sp,
+		current->comm, curbase, sp,
 		irq_stack_top, irq_stack_bottom,
 		estack_top, estack_bottom);
 
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f480b38a03c3..c77d2eadd0f0 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -102,6 +102,9 @@ void __init init_IRQ(void)
 static void __init smp_intr_init(void)
 {
 #ifdef CONFIG_SMP
+	unsigned __maybe_unused cpu;
+	int __maybe_unused ret;
+
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
@@ -118,9 +121,20 @@ static void __init smp_intr_init(void)
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
+#ifdef CONFIG_IPIPE
+	ret = irq_alloc_descs(IRQ_MOVE_CLEANUP_VECTOR, 0, 1, 0);
+	BUG_ON(IRQ_MOVE_CLEANUP_VECTOR != ret);
+	for_each_possible_cpu(cpu)
+		per_cpu(vector_irq, cpu)[IRQ_MOVE_CLEANUP_VECTOR] =
+			irq_to_desc(IRQ_MOVE_CLEANUP_VECTOR);
+#endif
 
 	/* IPI used for rebooting/stopping */
 	alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
+#ifdef CONFIG_IPIPE
+	alloc_intr_gate_notrace(IPIPE_RESCHEDULE_VECTOR, ipipe_reschedule_interrupt);
+	alloc_intr_gate_notrace(IPIPE_CRITICAL_VECTOR, ipipe_critical_interrupt);
+#endif
 #endif /* CONFIG_SMP */
 }
 
@@ -155,6 +169,9 @@ static void __init apic_intr_init(void)
 	/* IPI vectors for APIC spurious and error interrupts */
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+#ifdef CONFIG_IPIPE
+	alloc_intr_gate_notrace(IPIPE_HRTIMER_VECTOR, ipipe_hrtimer_interrupt);
+#endif
 
 	/* IRQ work interrupts: */
 # ifdef CONFIG_IRQ_WORK
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 44256a62702b..14b17395fa9a 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -597,9 +597,9 @@ kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
 	unsigned long flags;
 	int ret;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	ret = __kgdb_notify(ptr, cmd);
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 
 	return ret;
 }
@@ -755,12 +755,13 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 #endif /* CONFIG_DEBUG_RODATA */
 
 	bpt->type = BP_BREAKPOINT;
-	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
-				BREAK_INSTR_SIZE);
+	err = ipipe_probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+				      BREAK_INSTR_SIZE);
 	if (err)
 		return err;
-	err = probe_kernel_write((char *)bpt->bpt_addr,
-				 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+	err = ipipe_probe_kernel_write((char *)bpt->bpt_addr,
+				       arch_kgdb_ops.gdb_bpt_instr,
+				       BREAK_INSTR_SIZE);
 #ifdef CONFIG_DEBUG_RODATA
 	if (!err)
 		return err;
@@ -772,7 +773,8 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 		return -EBUSY;
 	text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
 		  BREAK_INSTR_SIZE);
-	err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+	err = ipipe_probe_kernel_read(opc, (char *)bpt->bpt_addr,
+				      BREAK_INSTR_SIZE);
 	if (err)
 		return err;
 	if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
@@ -797,14 +799,16 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
 	if (mutex_is_locked(&text_mutex))
 		goto knl_write;
 	text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE);
-	err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+	err = ipipe_probe_kernel_read(opc, (char *)bpt->bpt_addr,
+				      BREAK_INSTR_SIZE);
 	if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
 		goto knl_write;
 	return err;
 knl_write:
 #endif /* CONFIG_DEBUG_RODATA */
-	return probe_kernel_write((char *)bpt->bpt_addr,
-				  (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
+	return ipipe_probe_kernel_write((char *)bpt->bpt_addr,
+					(char *)bpt->saved_instr,
+					BREAK_INSTR_SIZE);
 }
 
 struct kgdb_arch arch_kgdb_ops = {
diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c
index a311ffcaad16..482b42fa799e 100644
--- a/arch/x86/kernel/pcspeaker.c
+++ b/arch/x86/kernel/pcspeaker.c
@@ -6,6 +6,13 @@ static __init int add_pcspkr(void)
 {
 	struct platform_device *pd;
 
+#ifdef CONFIG_IPIPE
+	if (cpu_has_tsc == 0) {
+		printk("I-pipe: disabling PC speaker for TSC emulation.\n");
+		return -EBUSY;
+	}
+#endif /* CONFIG_IPIPE */
+
 	pd = platform_device_register_simple("pcspkr", -1, NULL, 0);
 
 	return IS_ERR(pd) ? PTR_ERR(pd) : 0;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e18c8798c3a2..6f43b1a1a4b4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -105,8 +105,16 @@ void exit_thread(void)
 	if (bp) {
 		struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
 
-		t->io_bitmap_ptr = NULL;
+		/*
+		 * The caller may be preempted via I-pipe: to make
+		 * sure TIF_IO_BITMAP always denotes a valid I/O
+		 * bitmap when set, we clear it _before_ the I/O
+		 * bitmap pointer. No cache coherence issue ahead as
+		 * migration is currently locked (the primary domain
+		 * may never migrate either).
+		 */
 		clear_thread_flag(TIF_IO_BITMAP);
+		t->io_bitmap_ptr = NULL;
 		/*
 		 * Careful, clear this in the TSS too:
 		 */
@@ -462,7 +470,7 @@ bool xen_set_default_idle(void)
 #endif
 void stop_this_cpu(void *dummy)
 {
-	local_irq_disable();
+	hard_local_irq_disable();
 	/*
 	 * Remove this CPU:
 	 */
@@ -502,6 +510,10 @@ static void amd_e400_idle(void)
 			if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 				mark_tsc_unstable("TSC halt in AMD C1E");
 			pr_info("System has AMD C1E enabled\n");
+#ifdef CONFIG_IPIPE
+			pr_info("I-pipe: will not be able to use LAPIC as a tick device\n"
+				"I-pipe: disable C1E power state in your BIOS\n");
+#endif
 		}
 	}
 
@@ -567,7 +579,11 @@ static void mwait_idle(void)
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		if (!need_resched())
+#ifdef CONFIG_IPIPE
+			__ipipe_halt_root(1);
+#else
 			__sti_mwait(0, 0);
+#endif
 		else
 			local_irq_enable();
 		trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9f950917528b..8d496aef3037 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -245,7 +245,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 			     *next = &next_p->thread;
 	struct fpu *prev_fpu = &prev->fpu;
 	struct fpu *next_fpu = &next->fpu;
-	int cpu = smp_processor_id();
+	int cpu = raw_smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 	fpu_switch_t fpu_switch;
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c7cc81e9bb84..a7bef64cbcc5 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,7 @@
 #include <asm/xen/hypervisor.h>
 
 asmlinkage extern void ret_from_fork(void);
+asmlinkage extern void thread_return(void);
 
 __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
 
@@ -163,6 +164,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
 	childregs = task_pt_regs(p);
 	p->thread.sp = (unsigned long) childregs;
+	p->thread.rip = (unsigned long) thread_return;
 	set_tsk_thread_flag(p, TIF_FORK);
 	p->thread.io_bitmap_ptr = NULL;
 
@@ -279,7 +281,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct thread_struct *next = &next_p->thread;
 	struct fpu *prev_fpu = &prev->fpu;
 	struct fpu *next_fpu = &next->fpu;
-	int cpu = smp_processor_id();
+	int cpu = raw_smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 	unsigned fsindex, gsindex;
 	fpu_switch_t fpu_switch;
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 12c8286206ce..25b65105dc30 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -242,10 +242,10 @@ static void native_stop_other_cpus(int wait)
 	}
 
 finish:
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	disable_local_APIC();
 	mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 /*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c017f1c71560..32c06a462b25 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -946,7 +946,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
 	int apicid = apic->cpu_present_to_apicid(cpu);
-	unsigned long flags;
+	unsigned long vflags, rflags;
 	int err;
 
 	WARN_ON(irqs_disabled());
@@ -1003,9 +1003,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 	 * Check TSC synchronization with the AP (keep irqs disabled
 	 * while doing so):
 	 */
-	local_irq_save(flags);
+	local_irq_save(vflags);
+	rflags = hard_local_irq_save();
 	check_tsc_sync_source(cpu);
-	local_irq_restore(flags);
+	hard_local_irq_restore(rflags);
+	local_irq_restore(vflags);
 
 	while (!cpu_online(cpu)) {
 		cpu_relax();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8c73bf1492b8..b279dbaaebb8 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -105,7 +105,16 @@ static inline void conditional_cli(struct pt_regs *regs)
 static inline void preempt_conditional_cli(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
-		local_irq_disable();
+		/*
+		 * I-pipe doesn't virtualize the IRQ flags in the entry code.
+		 * Therefore we cannot call the original local_irq_disable here
+		 * because there will be no pairing IRQ enable for the root
+		 * domain. So just disable interrupts physically.
+		 *
+		 * There is also no I-pipe hard-irq tracing on return from the
+		 * exception, so do not trace here either.
+		 */
+		hard_local_irq_disable_notrace();
 	preempt_count_dec();
 }
 
@@ -293,9 +302,9 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
 }
 
 #define DO_ERROR(trapnr, signr, str, name)				\
-dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
+dotraplinkage int do_##name(struct pt_regs *regs, long error_code)	\
 {									\
-	do_error_trap(regs, error_code, str, trapnr, signr);		\
+	return IPIPE_DO_TRAP(do_error_trap, trapnr, regs, error_code, str, trapnr, signr); \
 }
 
 DO_ERROR(X86_TRAP_DE,     SIGFPE,  "divide error",		divide_error)
@@ -309,7 +318,7 @@ DO_ERROR(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check)
 
 #ifdef CONFIG_X86_64
 /* Runs on IST stack */
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+static void __do_double_fault(struct pt_regs *regs, long error_code)
 {
 	static const char str[] = "double fault";
 	struct task_struct *tsk = current;
@@ -357,6 +366,12 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 	for (;;)
 		die(str, regs, error_code);
 }
+
+dotraplinkage int do_double_fault(struct pt_regs *regs, long error_code)
+{
+	return IPIPE_DO_TRAP(__do_double_fault, X86_TRAP_DF, regs, error_code);
+}
+
 #endif
 
 dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
@@ -437,8 +452,8 @@ exit_trap:
 	do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL);
 }
 
-dotraplinkage void
-do_general_protection(struct pt_regs *regs, long error_code)
+static void
+__do_general_protection(struct pt_regs *regs, long error_code)
 {
 	struct task_struct *tsk;
 
@@ -478,9 +493,15 @@ do_general_protection(struct pt_regs *regs, long error_code)
 
 	force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
 }
+
+dotraplinkage int
+do_general_protection(struct pt_regs *regs, long error_code)
+{
+	return IPIPE_DO_TRAP(__do_general_protection, X86_TRAP_GP, regs, error_code);
+}
 NOKPROBE_SYMBOL(do_general_protection);
 
-dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
+static void notrace __do_int3(struct pt_regs *regs, long error_code)
 {
 #ifdef CONFIG_DYNAMIC_FTRACE
 	/*
@@ -525,6 +546,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 exit:
 	ist_exit(regs);
 }
+
+dotraplinkage int notrace do_int3(struct pt_regs *regs, long error_code)
+{
+	return IPIPE_DO_TRAP(__do_int3, X86_TRAP_BP, regs, error_code);
+}
 NOKPROBE_SYMBOL(do_int3);
 
 #ifdef CONFIG_X86_64
@@ -596,7 +622,7 @@ NOKPROBE_SYMBOL(fixup_bad_iret);
  *
  * May run on IST stack.
  */
-dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
+static void __do_debug(struct pt_regs *regs, long error_code)
 {
 	struct task_struct *tsk = current;
 	int user_icebp = 0;
@@ -680,6 +706,11 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 exit:
 	ist_exit(regs);
 }
+
+dotraplinkage int do_debug(struct pt_regs *regs, long error_code)
+{
+	return IPIPE_DO_TRAP(__do_debug, X86_TRAP_DB, regs, error_code);
+}
 NOKPROBE_SYMBOL(do_debug);
 
 /*
@@ -728,27 +759,44 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 	force_sig_info(SIGFPE, &info, task);
 }
 
-dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
+static void __do_coprocessor_error(struct pt_regs *regs, long error_code)
 {
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
 	math_error(regs, error_code, X86_TRAP_MF);
 }
 
-dotraplinkage void
-do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
+dotraplinkage int do_coprocessor_error(struct pt_regs *regs, long error_code)
+{
+	return IPIPE_DO_TRAP(__do_coprocessor_error, X86_TRAP_MF, regs, error_code);
+}
+
+static void
+__do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
 {
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
 	math_error(regs, error_code, X86_TRAP_XF);
 }
 
-dotraplinkage void
-do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
+dotraplinkage int
+do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
+{
+	return IPIPE_DO_TRAP(__do_simd_coprocessor_error, X86_TRAP_XF, regs, error_code);
+}
+
+static void
+__do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 {
 	conditional_sti(regs);
 }
 
+dotraplinkage int
+do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
+{
+	return IPIPE_DO_TRAP(__do_spurious_interrupt_bug, X86_TRAP_SPURIOUS, regs, error_code);
+}
+
 dotraplinkage void
-do_device_not_available(struct pt_regs *regs, long error_code)
+__do_device_not_available(struct pt_regs *regs, long error_code)
 {
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
 
@@ -768,10 +816,16 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 	conditional_sti(regs);
 #endif
 }
+
+dotraplinkage int
+do_device_not_available(struct pt_regs *regs, long error_code)
+{
+	return IPIPE_DO_TRAP(__do_device_not_available, X86_TRAP_NM, regs, error_code);
+}
 NOKPROBE_SYMBOL(do_device_not_available);
 
 #ifdef CONFIG_X86_32
-dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
+static void __do_iret_error(struct pt_regs *regs, long error_code)
 {
 	siginfo_t info;
 
@@ -788,6 +842,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 			&info);
 	}
 }
+
+dotraplinkage int do_iret_error(struct pt_regs *regs, long error_code)
+{
+	return IPIPE_DO_TRAP(__do_iret_error, X86_TRAP_IRET, regs, error_code);
+}
 #endif
 
 /* Set of traps needed for early debugging. */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c42d4a3d9494..67e252b65d04 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -672,15 +672,15 @@ unsigned long native_calibrate_tsc(void)
 	int hpet = is_hpet_enabled(), i, loopmin;
 
 	/* Calibrate TSC using MSR for Intel Atom SoCs */
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	fast_calibrate = try_msr_calibrate_tsc();
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 	if (fast_calibrate)
 		return fast_calibrate;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	fast_calibrate = quick_pit_calibrate();
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 	if (fast_calibrate)
 		return fast_calibrate;
 
@@ -723,11 +723,11 @@ unsigned long native_calibrate_tsc(void)
 		 * calibration, which will take at least 50ms, and
 		 * read the end value.
 		 */
-		local_irq_save(flags);
+		flags = hard_local_irq_save();
 		tsc1 = tsc_read_refs(&ref1, hpet);
 		tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
 		tsc2 = tsc_read_refs(&ref2, hpet);
-		local_irq_restore(flags);
+		hard_local_irq_restore(flags);
 
 		/* Pick the lowest PIT TSC calibration so far */
 		tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
@@ -968,7 +968,7 @@ core_initcall(cpufreq_tsc);
 
 /* clocksource code */
 
-static struct clocksource clocksource_tsc;
+struct clocksource clocksource_tsc;
 
 /*
  * We used to compare the TSC to the cycle_last value in the clocksource
@@ -994,7 +994,7 @@ static cycle_t read_tsc(struct clocksource *cs)
 /*
  * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
  */
-static struct clocksource clocksource_tsc = {
+struct clocksource clocksource_tsc = {
 	.name                   = "tsc",
 	.rating                 = 300,
 	.read                   = read_tsc,
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 7f4839ef3608..02c29e06500f 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -144,12 +144,14 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
 		do_exit(SIGSEGV);
 	}
 
+	hard_cond_local_irq_disable();
 	tss = &per_cpu(cpu_tss, get_cpu());
 	tsk->thread.sp0 = vm86->saved_sp0;
 	tsk->thread.sysenter_cs = __KERNEL_CS;
 	load_sp0(tss, &tsk->thread);
 	vm86->saved_sp0 = 0;
 	put_cpu();
+	hard_cond_local_irq_enable();
 
 	memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
 
@@ -354,6 +356,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 	vm86->saved_sp0 = tsk->thread.sp0;
 	lazy_save_gs(vm86->regs32.gs);
 
+	hard_cond_local_irq_disable();
 	tss = &per_cpu(cpu_tss, get_cpu());
 	/* make room for real-mode segments */
 	tsk->thread.sp0 += 16;
@@ -363,6 +366,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 
 	load_sp0(tss, &tsk->thread);
 	put_cpu();
+	hard_cond_local_irq_enable();
 
 	if (vm86->flags & VM86_SCREEN_BITMAP)
 		mark_screen_rdonly(tsk->mm);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index df7827a981dd..0068d0fbfcdc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3809,7 +3809,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	clgi();
 
-	local_irq_enable();
+	hard_local_irq_enable();
 
 	asm volatile (
 		"push %%" _ASM_BP "; \n\t"
@@ -3917,7 +3917,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	reload_tss(vcpu);
 
-	local_irq_disable();
+	hard_local_irq_disable();
 
 	vcpu->arch.cr2 = svm->vmcb->save.cr2;
 	vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
@@ -4283,6 +4283,7 @@ out:
 
 static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
 {
+	hard_cond_local_irq_enable();
 	local_irq_enable();
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c5a4b1978cbf..342b2e8528fd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2006,9 +2006,11 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
 {
-	preempt_disable();
+	unsigned long flags;
+
+	flags = hard_preempt_disable();
 	__vmx_load_host_state(vmx);
-	preempt_enable();
+	hard_preempt_enable(flags);
 }
 
 static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
@@ -2383,6 +2385,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 {
 	int save_nmsrs, index;
 
+	hard_cond_local_irq_disable();
 	save_nmsrs = 0;
 #ifdef CONFIG_X86_64
 	if (is_long_mode(&vmx->vcpu)) {
@@ -2412,6 +2415,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 		move_msr_up(vmx, index, save_nmsrs++);
 
 	vmx->save_nmsrs = save_nmsrs;
+	hard_cond_local_irq_enable();
 
 	if (cpu_has_vmx_msr_bitmap())
 		vmx_set_msr_bitmap(&vmx->vcpu);
@@ -8409,8 +8413,10 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			[ss]"i"(__KERNEL_DS),
 			[cs]"i"(__KERNEL_CS)
 			);
-	} else
+	} else {
+		hard_cond_local_irq_enable();
 		local_irq_enable();
+	}
 }
 
 static bool vmx_has_high_real_mode_segbase(void)
@@ -8870,7 +8876,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	vmx_vcpu_load(&vmx->vcpu, cpu);
 	vmx->vcpu.cpu = cpu;
 	err = vmx_vcpu_setup(vmx);
+	hard_cond_local_irq_disable();
 	vmx_vcpu_put(&vmx->vcpu);
+	hard_cond_local_irq_enable();
 	put_cpu();
 	if (err)
 		goto free_vmcs;
@@ -8895,6 +8903,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	vmx->nested.posted_intr_nv = -1;
 	vmx->nested.current_vmptr = -1ull;
 	vmx->nested.current_vmcs12 = NULL;
+#ifdef CONFIG_IPIPE
+	vmx->vcpu.ipipe_notifier.handler = __ipipe_handle_vm_preemption;
+#endif
 
 	return &vmx->vcpu;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e6ab034f0bc7..3fabff0efb77 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
+#include <linux/ipipe.h>
 #include <linux/user-return-notifier.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
@@ -135,6 +136,7 @@ struct kvm_shared_msrs_global {
 struct kvm_shared_msrs {
 	struct user_return_notifier urn;
 	bool registered;
+	bool dirty;
 	struct kvm_shared_msr_values {
 		u64 host;
 		u64 curr;
@@ -193,12 +195,31 @@ static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
 		vcpu->arch.apf.gfns[i] = ~0;
 }
 
+static void kvm_restore_shared_msrs(struct kvm_shared_msrs *locals)
+{
+	struct kvm_shared_msr_values *values;
+	unsigned long flags;
+	unsigned int slot;
+
+	flags = hard_cond_local_irq_save();
+	if (locals->dirty) {
+		for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
+			values = &locals->values[slot];
+			if (values->host != values->curr) {
+				wrmsrl(shared_msrs_global.msrs[slot],
+				       values->host);
+				values->curr = values->host;
+			}
+		}
+		locals->dirty = false;
+	}
+	hard_cond_local_irq_restore(flags);
+}
+
 static void kvm_on_user_return(struct user_return_notifier *urn)
 {
-	unsigned slot;
 	struct kvm_shared_msrs *locals
 		= container_of(urn, struct kvm_shared_msrs, urn);
-	struct kvm_shared_msr_values *values;
 	unsigned long flags;
 
 	/*
@@ -211,13 +232,8 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
 		user_return_notifier_unregister(urn);
 	}
 	local_irq_restore(flags);
-	for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
-		values = &locals->values[slot];
-		if (values->host != values->curr) {
-			wrmsrl(shared_msrs_global.msrs[slot], values->host);
-			values->curr = values->host;
-		}
-	}
+	kvm_restore_shared_msrs(locals);
+	__ipipe_exit_vm();
 }
 
 static void shared_msr_update(unsigned slot, u32 msr)
@@ -267,6 +283,7 @@ int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 	if (err)
 		return 1;
 
+	smsr->dirty = true;
 	if (!smsr->registered) {
 		smsr->urn.on_user_return = kvm_on_user_return;
 		user_return_notifier_register(&smsr->urn);
@@ -2752,6 +2769,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	unsigned int cpu = smp_processor_id();
+	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+	unsigned long flags;
+
+	flags = hard_cond_local_irq_save();
+
 	kvm_x86_ops->vcpu_put(vcpu);
 	kvm_put_guest_fpu(vcpu);
 	vcpu->arch.last_host_tsc = rdtsc();
@@ -2761,8 +2784,30 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	 * guest. do_debug expects dr6 to be cleared after it runs, do the same.
 	 */
 	set_debugreg(0, 6);
+
+	if (!smsr->dirty)
+		__ipipe_exit_vm();
+
+	hard_cond_local_irq_restore(flags);
 }
 
+#ifdef CONFIG_IPIPE
+
+void __ipipe_handle_vm_preemption(struct ipipe_vm_notifier *nfy)
+{
+	unsigned int cpu = raw_smp_processor_id();
+	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+	struct kvm_vcpu *vcpu;
+
+	vcpu = container_of(nfy, struct kvm_vcpu, ipipe_notifier);
+	kvm_arch_vcpu_put(vcpu);
+	kvm_restore_shared_msrs(smsr);
+	__ipipe_exit_vm();
+}
+EXPORT_SYMBOL_GPL(__ipipe_handle_vm_preemption);
+
+#endif
+
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
@@ -6570,6 +6615,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	preempt_disable();
+	local_irq_disable();
+	hard_cond_local_irq_disable();
+
+	__ipipe_enter_vm(&vcpu->ipipe_notifier);
 
 	kvm_x86_ops->prepare_guest_switch(vcpu);
 	if (vcpu->fpu_active)
@@ -6583,12 +6632,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	 */
 	smp_mb__after_srcu_read_unlock();
 
-	local_irq_disable();
-
 	if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
 	    || need_resched() || signal_pending(current)) {
 		vcpu->mode = OUTSIDE_GUEST_MODE;
 		smp_wmb();
+		hard_cond_local_irq_enable();
 		local_irq_enable();
 		preempt_enable();
 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index c0c8b0a49bb8..15f49a26af11 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -2,6 +2,10 @@
 # Makefile for x86 specific library files.
 #
 
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_string_32.o = -pg
+endif
+
 inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
 inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
 quiet_cmd_inat_tables = GEN     $@
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index e5e3ed8dc079..7d34e3cd0b16 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -30,7 +30,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
 	void *p;
 	int i;
 
-	if (unlikely(in_interrupt()))
+	if (unlikely(!ipipe_root_p || in_interrupt()))
 		return __memcpy(to, from, len);
 
 	p = to;
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index e342586db6e4..d21f071a5977 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -19,7 +19,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 {
 	unsigned long ret;
 
-	if (__range_not_ok(from, n, TASK_SIZE))
+	if (!ipipe_root_p || __range_not_ok(from, n, TASK_SIZE))
 		return n;
 
 	/*
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c4dffae5d939..ff25da38c1d2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -380,7 +380,7 @@ static noinline int vmalloc_fault(unsigned long address)
 	 * happen within a race in page table update. In the later
 	 * case just flush:
 	 */
-	pgd = pgd_offset(current->active_mm, address);
+	pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address);
 	pgd_ref = pgd_offset_k(address);
 	if (pgd_none(*pgd_ref))
 		return -1;
@@ -1072,6 +1072,11 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	tsk = current;
 	mm = tsk->mm;
 
+#ifdef CONFIG_IPIPE
+	if (ipipe_root_domain != ipipe_head_domain)
+		hard_cond_local_irq_enable();
+#endif
+
 	/*
 	 * Detect and handle instructions that would cause a page fault for
 	 * both a tracked kernel page and a userspace page.
@@ -1288,8 +1293,8 @@ good_area:
 }
 NOKPROBE_SYMBOL(__do_page_fault);
 
-dotraplinkage void notrace
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+static void notrace
+___do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	unsigned long address = read_cr2(); /* Get the faulting address */
 	enum ctx_state prev_state;
@@ -1306,8 +1311,61 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	__do_page_fault(regs, error_code, address);
 	exception_exit(prev_state);
 }
+
+dotraplinkage int notrace
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+	return IPIPE_DO_TRAP(___do_page_fault, X86_TRAP_PF, regs, error_code);
+}
+
 NOKPROBE_SYMBOL(do_page_fault);
 
+#ifdef CONFIG_IPIPE
+
+void __ipipe_pin_mapping_globally(unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_X86_32
+	unsigned long next, addr = start;
+
+	do {
+		unsigned long flags;
+		struct page *page;
+
+		next = pgd_addr_end(addr, end);
+		spin_lock_irqsave(&pgd_lock, flags);
+		list_for_each_entry(page, &pgd_list, lru)
+			vmalloc_sync_one(page_address(page), addr);
+		spin_unlock_irqrestore(&pgd_lock, flags);
+
+	} while (addr = next, addr != end);
+#else
+	unsigned long next, addr = start;
+	pgd_t *pgd, *pgd_ref;
+	struct page *page;
+
+	if (!(start >= VMALLOC_START && start < VMALLOC_END))
+		return;
+
+	do {
+		next = pgd_addr_end(addr, end);
+		pgd_ref = pgd_offset_k(addr);
+		if (pgd_none(*pgd_ref))
+			continue;
+		spin_lock(&pgd_lock);
+		list_for_each_entry(page, &pgd_list, lru) {
+			pgd = page_address(page) + pgd_index(addr);
+			if (pgd_none(*pgd))
+				set_pgd(pgd, *pgd_ref);
+		}
+		spin_unlock(&pgd_lock);
+		addr = next;
+	} while (addr != end);
+
+	arch_flush_lazy_mmu_mode();
+#endif
+}
+#endif /* CONFIG_IPIPE */
+
 #ifdef CONFIG_TRACING
 static nokprobe_inline void
 trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
@@ -1319,8 +1377,8 @@ trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
 		trace_page_fault_kernel(address, regs, error_code);
 }
 
-dotraplinkage void notrace
-trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
+static void notrace
+__trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	/*
 	 * The exception_enter and tracepoint processing could
@@ -1336,5 +1394,11 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	__do_page_fault(regs, error_code, address);
 	exception_exit(prev_state);
 }
+
+dotraplinkage int notrace
+trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+	return IPIPE_DO_TRAP(__trace_do_page_fault, X86_TRAP_PF, regs, error_code);
+}
 NOKPROBE_SYMBOL(trace_do_page_fault);
 #endif /* CONFIG_TRACING */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6d683bbb3502..c3eede4c0075 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -74,9 +74,12 @@ static void load_new_mm_cr3(pgd_t *pgdir)
  */
 void leave_mm(int cpu)
 {
+	unsigned long flags;
+
 	struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
 		BUG();
+	flags = hard_cond_local_irq_save();
 	if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
 		cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
 		load_new_mm_cr3(swapper_pg_dir);
@@ -88,6 +91,7 @@ void leave_mm(int cpu)
 		 */
 		trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 	}
+	hard_cond_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(leave_mm);
 
@@ -96,13 +100,13 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
 	unsigned long flags;
 
-	local_irq_save(flags);
-	switch_mm_irqs_off(prev, next, tsk);
-	local_irq_restore(flags);
+	flags = hard_local_irq_save();
+	__switch_mm_irqs_off(prev, next, tsk);
+	hard_local_irq_restore(flags);
 }
 
-void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-			struct task_struct *tsk)
+void __switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+			  struct task_struct *tsk)
 {
 	unsigned cpu = smp_processor_id();
 
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 56bd16e77ae3..cad0fb6bf4a7 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -10,6 +10,9 @@ obj-$(CONFIG_SH_TIMER_MTU2)	+= sh_mtu2.o
 obj-$(CONFIG_SH_TIMER_TMU)	+= sh_tmu.o
 obj-$(CONFIG_EM_TIMER_STI)	+= em_sti.o
 obj-$(CONFIG_CLKBLD_I8253)	+= i8253.o
+ifdef CONFIG_X86_32
+obj-$(CONFIG_IPIPE_WANT_CLOCKSOURCE) += ipipe_i486_tsc_emu.o
+endif
 obj-$(CONFIG_CLKSRC_MMIO)	+= mmio.o
 obj-$(CONFIG_DIGICOLOR_TIMER)	+= timer-digicolor.o
 obj-$(CONFIG_DW_APB_TIMER)	+= dw_apb_timer.o
diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
index 60c8a9bd562d..9f0d28160e78 100644
--- a/drivers/clocksource/i8253.c
+++ b/drivers/clocksource/i8253.c
@@ -9,6 +9,8 @@
 #include <linux/module.h>
 #include <linux/i8253.h>
 #include <linux/smp.h>
+#include <linux/ipipe.h>
+#include <linux/ipipe_tickdev.h>
 
 /*
  * Protects access to I/O ports
@@ -16,8 +18,9 @@
  * 0040-0043 : timer0, i8253 / i8254
  * 0061-0061 : NMI Control Register which contains two speaker control bits.
  */
-DEFINE_RAW_SPINLOCK(i8253_lock);
+IPIPE_DEFINE_RAW_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
+static unsigned periodic_pit_ch0;
 
 /*
  * Handle PIT quirk in pit_shutdown() where zeroing the counter register
@@ -40,6 +43,10 @@ static cycle_t i8253_read(struct clocksource *cs)
 	int count;
 	u32 jifs;
 
+	if (periodic_pit_ch0 == 0)
+		/* The PIT is not running in periodic mode. */
+		return jiffies * PIT_LATCH + (PIT_LATCH - 1) - old_count;
+
 	raw_spin_lock_irqsave(&i8253_lock, flags);
 	/*
 	 * Although our caller may have the read side of jiffies_lock,
@@ -100,8 +107,37 @@ static struct clocksource i8253_cs = {
 	.mask		= CLOCKSOURCE_MASK(32),
 };
 
+#ifdef CONFIG_IPIPE
+
+#define IPIPE_PIT_COUNT2LATCH 0xfffe
+
+extern cycle_t __ipipe_get_8253_tsc(struct clocksource *cs);
+
+int __ipipe_last_8253_counter2;
+
+void ipipe_setup_8253_tsc(void)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&i8253_lock, flags);
+	outb_p(0xb4, PIT_MODE);
+	outb_p(IPIPE_PIT_COUNT2LATCH & 0xff, PIT_CH2);
+	outb_p(IPIPE_PIT_COUNT2LATCH >> 8, PIT_CH2);
+	/* Gate high, disable speaker */
+	outb_p((inb_p(0x61) & ~0x2) | 1, 0x61);
+
+	raw_spin_unlock_irqrestore(&i8253_lock, flags);
+
+	i8253_cs.ipipe_read = __ipipe_get_8253_tsc;
+}
+#else /* !CONFIG_IPIPE */
+#define ipipe_setup_8253_tsc()	do { } while(0)
+#endif /* !CONFIG_IPIPE */
+
 int __init clocksource_i8253_init(void)
 {
+	if (cpu_has_tsc == 0)
+		ipipe_setup_8253_tsc();
 	return clocksource_register_hz(&i8253_cs, PIT_TICK_RATE);
 }
 #endif
@@ -127,6 +163,7 @@ static int pit_shutdown(struct clock_event_device *evt)
 
 static int pit_set_oneshot(struct clock_event_device *evt)
 {
+	periodic_pit_ch0 = 0;
 	raw_spin_lock(&i8253_lock);
 	outb_p(0x38, PIT_MODE);
 	raw_spin_unlock(&i8253_lock);
@@ -135,6 +172,7 @@ static int pit_set_oneshot(struct clock_event_device *evt)
 
 static int pit_set_periodic(struct clock_event_device *evt)
 {
+	periodic_pit_ch0 = 1;
 	raw_spin_lock(&i8253_lock);
 
 	/* binary, mode 2, LSB/MSB, ch 0 */
@@ -154,13 +192,25 @@ static int pit_set_periodic(struct clock_event_device *evt)
 static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
 {
 	raw_spin_lock(&i8253_lock);
+#ifndef CONFIG_IPIPE
 	outb_p(delta & 0xff , PIT_CH0);	/* LSB */
 	outb_p(delta >> 8 , PIT_CH0);		/* MSB */
+#else /* CONFIG_IPIPE */
+	outb(delta & 0xff , PIT_CH0);	/* LSB */
+	outb(delta >> 8 , PIT_CH0);		/* MSB */
+#endif /* CONFIG_IPIPE */
 	raw_spin_unlock(&i8253_lock);
 
 	return 0;
 }
 
+#ifdef CONFIG_IPIPE
+static struct ipipe_timer i8253_itimer = {
+	.irq = 0,
+	.min_delay_ticks = 1,
+};
+#endif /* CONFIG_IPIPE */
+
 /*
  * On UP the PIT can serve all of the possible timer functions. On SMP systems
  * it can be solely used for the global tick.
@@ -171,6 +221,9 @@ struct clock_event_device i8253_clockevent = {
 	.set_state_shutdown	= pit_shutdown,
 	.set_state_periodic	= pit_set_periodic,
 	.set_next_event		= pit_next_event,
+#ifdef CONFIG_IPIPE
+	.ipipe_timer    = &i8253_itimer,
+#endif /* CONFIG_IPIPE */
 };
 
 /*
diff --git a/drivers/clocksource/ipipe_i486_tsc_emu.S b/drivers/clocksource/ipipe_i486_tsc_emu.S
new file mode 100644
index 000000000000..f07d61459ab2
--- /dev/null
+++ b/drivers/clocksource/ipipe_i486_tsc_emu.S
@@ -0,0 +1,97 @@
+#include <linux/linkage.h>
+
+#define PIT_MODE	0x43
+#define PIT_CH2		0x42
+#define PIT_COUNT2LATCH 0xfffe
+
+.macro SAVE reg
+	pushl %\reg
+.endm
+
+.macro RESTORE reg
+	popl %\reg
+.endm
+
+ENTRY(__ipipe_get_8253_tsc)
+	mov	$0xd8, %al
+	out	%al, $(PIT_MODE)
+	in	$(PIT_CH2), %al
+	xor	%ecx, %ecx
+	mov	%al, %cl
+	in	$(PIT_CH2), %al
+	mov	%al, %ch
+
+	mov	__ipipe_last_8253_counter2, %eax
+	mov	__ipipe_cs_last_tsc + 4, %edx
+	sub	%ecx, %eax
+	mov	%ecx, __ipipe_last_8253_counter2
+	test	%eax, %eax
+	mov	__ipipe_cs_last_tsc, %ecx
+	jg	1f
+	add	$(PIT_COUNT2LATCH), %eax
+1:	add	%ecx, %eax
+	adc	$0, %edx
+	mov	%eax, __ipipe_cs_last_tsc
+	mov	%edx, __ipipe_cs_last_tsc + 4
+
+	ret
+
+ENDPROC(__ipipe_get_8253_tsc)
+
+ENTRY(__ipipe_get_cs_tsc)
+	SAVE	ecx
+
+	pushfl
+	cli
+
+	mov	__ipipe_cs_mask + 4, %ecx
+	mov	__ipipe_cs_mask, %edx
+	cmp	$0xffffffff, %ecx
+	mov	__ipipe_cs, %eax
+	jne	1f
+
+	/* 64 bits clocksource */
+	call	*__ipipe_cs_read
+	jmp	4f
+
+1:	cmp	$0xffffffff, %edx
+	jne	2f
+
+	/* 32 bits clocksource */
+	call 	*__ipipe_cs_read
+
+	mov	__ipipe_cs_last_tsc + 4, %edx
+	cmp	__ipipe_cs_last_tsc, %eax
+	adc	$0, %edx
+
+	jmp	4f
+
+	/* n bits (< 32) clocksource */
+2:	SAVE	ebx
+
+	mov	%edx, %ebx
+	call 	*__ipipe_cs_read
+
+	mov	__ipipe_cs_last_tsc, %ecx
+	and	%ebx, %eax
+	mov	%ebx, %edx
+	and	%ecx, %ebx
+	not 	%edx
+	cmp	%ebx, %eax
+	jae	3f
+	sub	%edx, %eax
+3:	and	%edx, %ecx
+	mov	__ipipe_cs_last_tsc + 4, %edx
+	add	%ecx, %eax
+	adc	$0, %edx
+
+	RESTORE	ebx
+
+4:	mov	%eax, __ipipe_cs_last_tsc
+	mov	%edx, __ipipe_cs_last_tsc + 4
+	popfl
+	RESTORE	ecx
+	ret
+
+	/* n bits clocksource with 32 < n < 64, not supported. */
+ENDPROC(__ipipe_get_cs_tsc)
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 7eb4109a3df4..09b3d540174a 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -21,7 +21,7 @@
  * With multiple simultaneous hypertransport irq devices it might pay
  * to make this more fine grained.  But start with simple, stupid, and correct.
  */
-static DEFINE_SPINLOCK(ht_irq_lock);
+static IPIPE_DEFINE_SPINLOCK(ht_irq_lock);
 
 void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
 {
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 049ccc070ce5..778796066fb4 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2222,6 +2222,17 @@ EXPORT_SYMBOL(dev_printk_emit);
 static void __dev_printk(const char *level, const struct device *dev,
 			struct va_format *vaf)
 {
+#ifdef CONFIG_IPIPE
+	/*
+	 * Console logging only if hard locked, or over the head
+	 * stage.
+	 */
+	if (hard_irqs_disabled() || !ipipe_root_p) {
+		__ipipe_log_printk(vaf->fmt, *vaf->va);
+		return;
+	}
+#endif
+
 	if (dev)
 		dev_printk_emit(level[1] - '0', dev, "%s %s: %pV",
 				dev_driver_string(dev), dev_name(dev), vaf);
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index 8c7930b5a65f..c7f7df063786 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -3,6 +3,7 @@ menu "CPU Idle"
 config CPU_IDLE
 	bool "CPU idle PM support"
 	default y if ACPI || PPC_PSERIES
+	depends on !(ARCH_OMAP4 && IPIPE)
 	select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE)
 	select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE)
 	help
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 8adaaeae3268..8f87daea2fa0 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -158,7 +158,7 @@ void panic_if_irq_remap(const char *msg)
 
 void ir_ack_apic_edge(struct irq_data *data)
 {
-	ack_APIC_irq();
+	__ack_APIC_irq();
 }
 
 /**
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 39126460c1f5..aebc7f9cdcce 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -594,6 +594,48 @@ static void univ8250_console_write(struct console *co, const char *s,
 	serial8250_console_write(up, s, count);
 }
 
+#ifdef CONFIG_RAW_PRINTK
+
+static void raw_write_char(struct uart_8250_port *up, int c)
+{
+	unsigned int status, tmout = 10000;
+
+	for (;;) {
+		status = serial_in(up, UART_LSR);
+		up->lsr_saved_flags |= status & LSR_SAVE_FLAGS;
+		if ((status & UART_LSR_THRE) == UART_LSR_THRE)
+			break;
+		if (--tmout == 0)
+			break;
+		cpu_relax();
+	}
+	serial_port_out(&up->port, UART_TX, c);
+}
+
+static void univ8250_console_write_raw(struct console *co, const char *s,
+				       unsigned int count)
+{
+	struct uart_8250_port *up = &serial8250_ports[co->index];
+	unsigned int ier;
+
+        ier = serial_in(up, UART_IER);
+
+        if (up->capabilities & UART_CAP_UUE)
+                serial_out(up, UART_IER, UART_IER_UUE);
+        else
+                serial_out(up, UART_IER, 0);
+
+	while (count-- > 0) {
+		if (*s == '\n')
+			raw_write_char(up, '\r');
+		raw_write_char(up, *s++);
+	}
+	
+        serial_out(up, UART_IER, ier);
+}
+
+#endif
+
 static int univ8250_console_setup(struct console *co, char *options)
 {
 	struct uart_port *port;
@@ -670,7 +712,12 @@ static struct console univ8250_console = {
 	.device		= uart_console_device,
 	.setup		= univ8250_console_setup,
 	.match		= univ8250_console_match,
+#ifdef CONFIG_RAW_PRINTK
+	.write_raw	= univ8250_console_write_raw,
+	.flags		= CON_PRINTBUFFER |  CON_ANYTIME | CON_RAW,
+#else
 	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
+#endif
 	.index		= -1,
 	.data		= &serial8250_reg,
 };
diff --git a/fs/exec.c b/fs/exec.c
index 9c5ee2a880aa..f08cafadf205 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -866,6 +866,7 @@ static int exec_mmap(struct mm_struct *mm)
 {
 	struct task_struct *tsk;
 	struct mm_struct *old_mm, *active_mm;
+	unsigned long flags;
 
 	/* Notify parent that we're no longer interested in the old VM */
 	tsk = current;
@@ -889,8 +890,10 @@ static int exec_mmap(struct mm_struct *mm)
 	task_lock(tsk);
 	active_mm = tsk->active_mm;
 	tsk->mm = mm;
+	ipipe_mm_switch_protect(flags);
 	tsk->active_mm = mm;
 	activate_mm(active_mm, mm);
+	ipipe_mm_switch_unprotect(flags);
 	tsk->mm->vmacache_seqnum = 0;
 	vmacache_flush(tsk);
 	task_unlock(tsk);
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 74f1a3704d7a..90f62b6bf609 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -70,9 +70,9 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
 	unsigned long flags;						\
 									\
-	raw_local_irq_save(flags);					\
+	flags = hard_local_irq_save();					\
 	v->counter = v->counter c_op i;					\
-	raw_local_irq_restore(flags);					\
+	hard_local_irq_restore(flags);					\
 }
 
 #define ATOMIC_OP_RETURN(op, c_op)					\
@@ -81,9 +81,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	unsigned long flags;						\
 	int ret;							\
 									\
-	raw_local_irq_save(flags);					\
+	flags = hard_local_irq_save();					\
 	ret = (v->counter = v->counter c_op i);				\
-	raw_local_irq_restore(flags);					\
+	hard_local_irq_restore(flags);					\
 									\
 	return ret;							\
 }
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index 49673510b484..ba3d54d7ae22 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -21,20 +21,20 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
  * this is the substitute */
 #define _atomic_spin_lock_irqsave(l,f) do {	\
 	arch_spinlock_t *s = ATOMIC_HASH(l);	\
-	local_irq_save(f);			\
+	(f) = hard_local_irq_save();		\
 	arch_spin_lock(s);			\
 } while(0)
 
 #define _atomic_spin_unlock_irqrestore(l,f) do {	\
 	arch_spinlock_t *s = ATOMIC_HASH(l);		\
 	arch_spin_unlock(s);				\
-	local_irq_restore(f);				\
+	hard_local_irq_restore(f);			\
 } while(0)
 
 
 #else
-#  define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0)
-#  define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0)
+#  define _atomic_spin_lock_irqsave(l,f) do { (f) = hard_local_irq_save(); } while (0)
+#  define _atomic_spin_unlock_irqrestore(l,f) do { hard_local_irq_restore(f); } while (0)
 #endif
 
 /*
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
index 70bef78912b7..145e07116636 100644
--- a/include/asm-generic/cmpxchg-local.h
+++ b/include/asm-generic/cmpxchg-local.h
@@ -22,7 +22,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
 	if (size == 8 && sizeof(unsigned long) != 8)
 		wrong_size_cmpxchg(ptr);
 
-	raw_local_irq_save(flags);
+	flags = hard_local_irq_save();
 	switch (size) {
 	case 1: prev = *(u8 *)ptr;
 		if (prev == old)
@@ -43,7 +43,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
 	default:
 		wrong_size_cmpxchg(ptr);
 	}
-	raw_local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 	return prev;
 }
 
@@ -56,11 +56,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr,
 	u64 prev;
 	unsigned long flags;
 
-	raw_local_irq_save(flags);
+	flags = hard_local_irq_save();
 	prev = *(u64 *)ptr;
 	if (prev == old)
 		*(u64 *)ptr = new;
-	raw_local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 	return prev;
 }
 
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 7d58ffdacd62..03b7f0deb1aa 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -43,11 +43,33 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
 #endif
 
+#ifdef CONFIG_IPIPE
+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
+extern int __ipipe_check_percpu_access(void);
+#define __ipipe_cpu_offset					\
+	({							\
+		WARN_ON_ONCE(__ipipe_check_percpu_access());	\
+		__my_cpu_offset;				\
+	})
+#else
+#define __ipipe_cpu_offset  __my_cpu_offset
+#endif
+#ifndef __ipipe_raw_cpu_ptr
+#define __ipipe_raw_cpu_ptr(ptr)  SHIFT_PERCPU_PTR(ptr, __ipipe_cpu_offset)
+#endif
+#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var)))
+#endif /* CONFIG_IPIPE */
+
 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void setup_per_cpu_areas(void);
 #endif
 
-#endif	/* SMP */
+#else /* !SMP */
+
+#define __ipipe_raw_cpu_ptr(ptr)  VERIFY_PERCPU_PTR(ptr)
+#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var)))
+
+#endif	/* !SMP */
 
 #ifndef PER_CPU_BASE_SECTION
 #ifdef CONFIG_SMP
@@ -137,19 +159,19 @@ do {									\
 #define this_cpu_generic_to_op(pcp, val, op)				\
 do {									\
 	unsigned long __flags;						\
-	raw_local_irq_save(__flags);					\
+	__flags = hard_local_irq_save();				\
 	*raw_cpu_ptr(&(pcp)) op val;					\
-	raw_local_irq_restore(__flags);					\
+	hard_local_irq_restore(__flags);				\
 } while (0)
 
 #define this_cpu_generic_add_return(pcp, val)				\
 ({									\
 	typeof(pcp) __ret;						\
 	unsigned long __flags;						\
-	raw_local_irq_save(__flags);					\
+	__flags = hard_local_irq_save();				\
 	raw_cpu_add(pcp, val);						\
 	__ret = raw_cpu_read(pcp);					\
-	raw_local_irq_restore(__flags);					\
+	hard_local_irq_restore(__flags);				\
 	__ret;								\
 })
 
@@ -157,10 +179,10 @@ do {									\
 ({									\
 	typeof(pcp) __ret;						\
 	unsigned long __flags;						\
-	raw_local_irq_save(__flags);					\
+	__flags = hard_local_irq_save();				\
 	__ret = raw_cpu_read(pcp);					\
 	raw_cpu_write(pcp, nval);					\
-	raw_local_irq_restore(__flags);					\
+	hard_local_irq_restore(__flags);				\
 	__ret;								\
 })
 
@@ -168,11 +190,11 @@ do {									\
 ({									\
 	typeof(pcp) __ret;						\
 	unsigned long __flags;						\
-	raw_local_irq_save(__flags);					\
+	__flags = hard_local_irq_save();				\
 	__ret = raw_cpu_read(pcp);					\
 	if (__ret == (oval))						\
 		raw_cpu_write(pcp, nval);				\
-	raw_local_irq_restore(__flags);					\
+	hard_local_irq_restore(__flags);				\
 	__ret;								\
 })
 
@@ -180,10 +202,10 @@ do {									\
 ({									\
 	int __ret;							\
 	unsigned long __flags;						\
-	raw_local_irq_save(__flags);					\
+	__flags = hard_local_irq_save();				\
 	__ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
 			oval1, oval2, nval1, nval2);			\
-	raw_local_irq_restore(__flags);					\
+	hard_local_irq_restore(__flags);				\
 	__ret;								\
 })
 
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index 5d8ffa3e6f8c..24cfa8bfdda0 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -50,16 +50,19 @@ static __always_inline bool test_preempt_need_resched(void)
 
 static __always_inline void __preempt_count_add(int val)
 {
+	ipipe_preempt_root_only();
 	*preempt_count_ptr() += val;
 }
 
 static __always_inline void __preempt_count_sub(int val)
 {
+	ipipe_preempt_root_only();
 	*preempt_count_ptr() -= val;
 }
 
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
+	ipipe_preempt_root_only();
 	/*
 	 * Because of load-store architectures cannot do per-cpu atomic
 	 * operations; we cannot use PREEMPT_NEED_RESCHED because it might get
diff --git a/include/asm-generic/switch_to.h b/include/asm-generic/switch_to.h
index 052c4ac04fd5..6472b800848c 100644
--- a/include/asm-generic/switch_to.h
+++ b/include/asm-generic/switch_to.h
@@ -21,10 +21,17 @@
  */
 extern struct task_struct *__switch_to(struct task_struct *,
 				       struct task_struct *);
-
+#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
 #define switch_to(prev, next, last)					\
 	do {								\
+		hard_cond_local_irq_disable();                                  \
 		((last) = __switch_to((prev), (next)));			\
+		hard_cond_local_irq_enable();                                   \
 	} while (0)
-
+#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
+#define switch_to(prev, next, last)					\
+	do {								\
+		((last) = __switch_to((prev), (next)));			\
+	} while (0)
+#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
 #endif /* __ASM_GENERIC_SWITCH_TO_H */
diff --git a/include/clocksource/timer-sp804.h b/include/clocksource/timer-sp804.h
index 1f8a1caa7cb4..312eddcebda1 100644
--- a/include/clocksource/timer-sp804.h
+++ b/include/clocksource/timer-sp804.h
@@ -4,20 +4,23 @@
 struct clk;
 
 void __sp804_clocksource_and_sched_clock_init(void __iomem *,
+					      unsigned long phys,
 					      const char *, struct clk *, int);
 void __sp804_clockevents_init(void __iomem *, unsigned int,
 			      struct clk *, const char *);
 void sp804_timer_disable(void __iomem *);
 
-static inline void sp804_clocksource_init(void __iomem *base, const char *name)
+static inline void sp804_clocksource_init(void __iomem *base,
+					  unsigned phys, const char *name)
 {
-	__sp804_clocksource_and_sched_clock_init(base, name, NULL, 0);
+	__sp804_clocksource_and_sched_clock_init(base, phys, name, NULL, 0);
 }
 
 static inline void sp804_clocksource_and_sched_clock_init(void __iomem *base,
+							  unsigned phys,
 							  const char *name)
 {
-	__sp804_clocksource_and_sched_clock_init(base, name, NULL, 1);
+	__sp804_clocksource_and_sched_clock_init(base, phys, name, NULL, 1);
 }
 
 static inline void sp804_clockevents_init(void __iomem *base, unsigned int irq, const char *name)
diff --git a/include/ipipe/setup.h b/include/ipipe/setup.h
new file mode 100644
index 000000000000..c2bc5218cf65
--- /dev/null
+++ b/include/ipipe/setup.h
@@ -0,0 +1,10 @@
+#ifndef _IPIPE_SETUP_H
+#define _IPIPE_SETUP_H
+
+/*
+ * Placeholders for setup hooks defined by client domains.
+ */
+
+static inline void __ipipe_early_client_setup(void) { }
+
+#endif /* !_IPIPE_SETUP_H */
diff --git a/include/ipipe/thread_info.h b/include/ipipe/thread_info.h
new file mode 100644
index 000000000000..1f6e9c359cec
--- /dev/null
+++ b/include/ipipe/thread_info.h
@@ -0,0 +1,14 @@
+#ifndef _IPIPE_THREAD_INFO_H
+#define _IPIPE_THREAD_INFO_H
+
+/*
+ * Placeholder for private thread information defined by client
+ * domains.
+ */
+
+struct ipipe_threadinfo {
+};
+
+static inline void __ipipe_init_threadinfo(struct ipipe_threadinfo *p) { }
+
+#endif /* !_IPIPE_THREAD_INFO_H */
diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h
index ed3768f4ecc7..b1d61aed0d19 100644
--- a/include/linux/basic_mmio_gpio.h
+++ b/include/linux/basic_mmio_gpio.h
@@ -50,7 +50,7 @@ struct bgpio_chip {
 	 * Used to lock bgpio_chip->data. Also, this is needed to keep
 	 * shadowed and real data registers writes together.
 	 */
-	spinlock_t lock;
+	ipipe_spinlock_t lock;
 
 	/* Shadowed data register to clear/set bits safely. */
 	unsigned long data;
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index bdcf358dfce2..598efe11fd56 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -128,6 +128,15 @@ struct clock_event_device {
 	const struct cpumask	*cpumask;
 	struct list_head	list;
 	struct module		*owner;
+
+#ifdef CONFIG_IPIPE
+	struct ipipe_timer      *ipipe_timer;
+	unsigned                ipipe_stolen;
+
+#define clockevent_ipipe_stolen(evt) ((evt)->ipipe_stolen)
+#else
+#define clockevent_ipipe_stolen(evt) (0)
+#endif /* !CONFIG_IPIPE */
 } ____cacheline_aligned;
 
 /* Helpers to verify state of a clockevent device */
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7784b597e959..eda54d0cba9b 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -95,6 +95,10 @@ struct clocksource {
 	cycle_t wd_last;
 #endif
 	struct module *owner;
+#ifdef CONFIG_IPIPE_WANT_CLOCKSOURCE
+	cycle_t (*ipipe_read)(struct clocksource *cs);
+#endif /* CONFIG_IPIPE_WANT_CLOCKSOURCE */
+
 } ____cacheline_aligned;
 
 /*
diff --git a/include/linux/console.h b/include/linux/console.h
index ea731af2451e..5f5ebf43839a 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -116,10 +116,12 @@ static inline int con_debug_leave(void)
 #define CON_ANYTIME	(16) /* Safe to call when cpu is offline */
 #define CON_BRL		(32) /* Used for a braille device */
 #define CON_EXTENDED	(64) /* Use the extended output format a la /dev/kmsg */
+#define CON_RAW		(128) /* Supports raw write mode */
 
 struct console {
 	char	name[16];
 	void	(*write)(struct console *, const char *, unsigned);
+	void	(*write_raw)(struct console *, const char *, unsigned);
 	int	(*read)(struct console *, char *, unsigned);
 	struct tty_driver *(*device)(struct console *, int *);
 	void	(*unblank)(void);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 60048c50404e..644a8f4bc606 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -134,6 +134,7 @@ enum {
 	FTRACE_OPS_FL_ALLOC_TRAMP		= 1 << 12,
 	FTRACE_OPS_FL_IPMODIFY			= 1 << 13,
 	FTRACE_OPS_FL_PID			= 1 << 14,
+	FTRACE_OPS_FL_IPIPE_EXCLUSIVE		= 1 << 15,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index dfd59d6bc6f0..2d4da567c651 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -61,6 +61,7 @@ extern void irq_exit(void);
 
 #define nmi_enter()						\
 	do {							\
+		__ipipe_nmi_enter();				\
 		lockdep_off();					\
 		ftrace_nmi_enter();				\
 		BUG_ON(in_nmi());				\
@@ -77,6 +78,7 @@ extern void irq_exit(void);
 		preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		ftrace_nmi_exit();				\
 		lockdep_on();					\
+		__ipipe_nmi_exit();				\
 	} while (0)
 
 #endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/i8253.h b/include/linux/i8253.h
index 8336b2f6f834..c1c5c11c8ff1 100644
--- a/include/linux/i8253.h
+++ b/include/linux/i8253.h
@@ -12,6 +12,7 @@
 #include <linux/param.h>
 #include <linux/spinlock.h>
 #include <linux/timex.h>
+#include <linux/ipipe_lock.h>
 
 /* i8253A PIT registers */
 #define PIT_MODE	0x43
@@ -20,7 +21,7 @@
 
 #define PIT_LATCH	((PIT_TICK_RATE + HZ/2) / HZ)
 
-extern raw_spinlock_t i8253_lock;
+IPIPE_DECLARE_RAW_SPINLOCK(i8253_lock);
 extern bool i8253_clear_counter_on_shutdown;
 extern struct clock_event_device i8253_clockevent;
 extern void clockevent_i8253_init(bool oneshot);
diff --git a/include/linux/ipipe.h b/include/linux/ipipe.h
new file mode 100644
index 000000000000..85af7fd82467
--- /dev/null
+++ b/include/linux/ipipe.h
@@ -0,0 +1,470 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe.h
+ *
+ * Copyright (C) 2002-2014 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_H
+#define __LINUX_IPIPE_H
+
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/percpu.h>
+#include <linux/irq.h>
+#include <linux/thread_info.h>
+#include <linux/ipipe_base.h>
+#include <linux/ipipe_debug.h>
+#include <asm/ptrace.h>
+#include <asm/ipipe.h>
+
+#ifdef CONFIG_IPIPE
+
+#include <linux/ipipe_domain.h>
+
+/* ipipe_set_hooks(..., enables) */
+#define IPIPE_SYSCALL	__IPIPE_SYSCALL_E
+#define IPIPE_TRAP	__IPIPE_TRAP_E
+#define IPIPE_KEVENT	__IPIPE_KEVENT_E
+
+struct ipipe_sysinfo {
+	int sys_nr_cpus;	/* Number of CPUs on board */
+	int sys_hrtimer_irq;	/* hrtimer device IRQ */
+	u64 sys_hrtimer_freq;	/* hrtimer device frequency */
+	u64 sys_hrclock_freq;	/* hrclock device frequency */
+	u64 sys_cpu_freq;	/* CPU frequency (Hz) */
+	struct ipipe_arch_sysinfo arch;
+};
+
+struct ipipe_work_header {
+	size_t size;
+	void (*handler)(struct ipipe_work_header *work);
+};
+
+extern unsigned int __ipipe_printk_virq;
+
+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq);
+
+void __ipipe_complete_domain_migration(void);
+
+int __ipipe_switch_tail(void);
+
+void __ipipe_share_current(int flags);
+
+void __ipipe_arch_share_current(int flags);
+
+int __ipipe_migrate_head(void);
+
+void __ipipe_reenter_root(void);
+
+int __ipipe_disable_ondemand_mappings(struct task_struct *p);
+
+int __ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma);
+
+#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
+
+#define prepare_arch_switch(next)			\
+	do {						\
+		hard_local_irq_enable();		\
+		__ipipe_report_schedule(current, next);	\
+	} while(0)
+
+#ifndef ipipe_get_active_mm
+static inline struct mm_struct *ipipe_get_active_mm(void)
+{
+	return __this_cpu_read(ipipe_percpu.active_mm);
+}
+#define ipipe_get_active_mm ipipe_get_active_mm
+#endif
+
+#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
+
+#define prepare_arch_switch(next)			\
+	do {						\
+		__ipipe_report_schedule(current, next);	\
+		hard_local_irq_disable();		\
+	} while(0)
+
+#ifndef ipipe_get_active_mm
+#define ipipe_get_active_mm()	(current->active_mm)
+#endif
+
+#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
+
+#ifdef CONFIG_IPIPE_WANT_CLOCKSOURCE
+
+extern unsigned long long __ipipe_cs_freq;
+
+extern struct clocksource *__ipipe_cs;
+
+#endif /* CONFIG_IPIPE_WANT_CLOCKSOURCE */
+
+static inline bool __ipipe_hrclock_ok(void)
+{
+	return __ipipe_hrclock_freq != 0;
+}
+
+static inline void __ipipe_nmi_enter(void)
+{
+	__this_cpu_write(ipipe_percpu.nmi_state, __ipipe_root_status);
+	__set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status);
+	ipipe_save_context_nmi();
+}
+
+static inline void __ipipe_nmi_exit(void)
+{
+	ipipe_restore_context_nmi();
+	if (!test_bit(IPIPE_STALL_FLAG, raw_cpu_ptr(&ipipe_percpu.nmi_state)))
+		__clear_bit(IPIPE_STALL_FLAG, &__ipipe_root_status);
+}
+
+/* KVM-side calls, hw IRQs off. */
+static inline void __ipipe_enter_vm(struct ipipe_vm_notifier *vmf)
+{
+	struct ipipe_percpu_data *p;
+
+	p = raw_cpu_ptr(&ipipe_percpu);
+	p->vm_notifier = vmf;
+	barrier();
+}
+
+static inline void __ipipe_exit_vm(void)
+{
+	struct ipipe_percpu_data *p;
+
+	p = raw_cpu_ptr(&ipipe_percpu);
+	p->vm_notifier = NULL;
+	barrier();
+}
+
+/* Client-side call, hw IRQs off. */
+void __ipipe_notify_vm_preemption(void);
+
+static inline void __ipipe_sync_pipeline(struct ipipe_domain *top)
+{
+	if (__ipipe_current_domain != top) {
+		__ipipe_do_sync_pipeline(top);
+		return;
+	}
+	if (!test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(top)->status))
+		__ipipe_sync_stage();
+}
+
+void ipipe_register_head(struct ipipe_domain *ipd,
+			 const char *name);
+
+void ipipe_unregister_head(struct ipipe_domain *ipd);
+
+int ipipe_request_irq(struct ipipe_domain *ipd,
+		      unsigned int irq,
+		      ipipe_irq_handler_t handler,
+		      void *cookie,
+		      ipipe_irq_ackfn_t ackfn);
+
+void ipipe_free_irq(struct ipipe_domain *ipd,
+		    unsigned int irq);
+
+void ipipe_raise_irq(unsigned int irq);
+
+int ipipe_handle_syscall(struct thread_info *ti,
+			 unsigned long nr, struct pt_regs *regs);
+
+void ipipe_set_hooks(struct ipipe_domain *ipd,
+		     int enables);
+
+unsigned int ipipe_alloc_virq(void);
+
+void ipipe_free_virq(unsigned int virq);
+
+static inline void ipipe_post_irq_head(unsigned int irq)
+{
+	__ipipe_set_irq_pending(ipipe_head_domain, irq);
+}
+
+static inline void ipipe_post_irq_root(unsigned int irq)
+{
+	__ipipe_set_irq_pending(&ipipe_root, irq);
+}
+
+static inline void ipipe_stall_head(void)
+{
+	hard_local_irq_disable();
+	__set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status);
+}
+
+static inline unsigned long ipipe_test_and_stall_head(void)
+{
+	hard_local_irq_disable();
+	return __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status);
+}
+
+static inline unsigned long ipipe_test_head(void)
+{
+	unsigned long flags, ret;
+
+	flags = hard_smp_local_irq_save();
+	ret = test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status);
+	hard_smp_local_irq_restore(flags);
+
+	return ret;
+}
+
+void ipipe_unstall_head(void);
+
+void __ipipe_restore_head(unsigned long x);
+
+static inline void ipipe_restore_head(unsigned long x)
+{
+	ipipe_check_irqoff();
+	if ((x ^ test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status)) & 1)
+		__ipipe_restore_head(x);
+}
+
+void __ipipe_post_work_root(struct ipipe_work_header *work);
+
+#define ipipe_post_work_root(p, header)			\
+	do {						\
+		void header_not_at_start(void);		\
+		if (offsetof(typeof(*(p)), header)) {	\
+			header_not_at_start();		\
+		}					\
+		__ipipe_post_work_root(&(p)->header);	\
+	} while (0)
+
+int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo);
+
+unsigned long ipipe_critical_enter(void (*syncfn)(void));
+
+void ipipe_critical_exit(unsigned long flags);
+
+void ipipe_prepare_panic(void);
+
+#ifdef CONFIG_SMP
+#ifndef ipipe_smp_p
+#define ipipe_smp_p (1)
+#endif
+void ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask);
+void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask);
+#else  /* !CONFIG_SMP */
+#define ipipe_smp_p (0)
+static inline
+void ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask) { }
+static inline void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask) { }
+static inline void ipipe_disable_smp(void) { }
+#endif	/* CONFIG_SMP */
+
+static inline void ipipe_restore_root_nosync(unsigned long x)
+{
+	unsigned long flags;
+
+	flags = hard_smp_local_irq_save();
+	__ipipe_restore_root_nosync(x);
+	hard_smp_local_irq_restore(flags);
+}
+
+/* Must be called hw IRQs off. */
+static inline void ipipe_lock_irq(unsigned int irq)
+{
+	struct ipipe_domain *ipd = __ipipe_current_domain;
+	if (ipd == ipipe_root_domain)
+		__ipipe_lock_irq(irq);
+}
+
+/* Must be called hw IRQs off. */
+static inline void ipipe_unlock_irq(unsigned int irq)
+{
+	struct ipipe_domain *ipd = __ipipe_current_domain;
+	if (ipd == ipipe_root_domain)
+		__ipipe_unlock_irq(irq);
+}
+
+static inline struct ipipe_threadinfo *ipipe_current_threadinfo(void)
+{
+	return &current_thread_info()->ipipe_data;
+}
+
+#define ipipe_task_threadinfo(p) (&task_thread_info(p)->ipipe_data)
+
+void ipipe_enable_irq(unsigned int irq);
+
+static inline void ipipe_disable_irq(unsigned int irq)
+{
+	struct irq_desc *desc;
+	struct irq_chip *chip;
+
+	desc = irq_to_desc(irq);
+	if (desc == NULL)
+		return;
+
+	chip = irq_desc_get_chip(desc);
+
+	if (WARN_ON_ONCE(chip->irq_disable == NULL && chip->irq_mask == NULL))
+		return;
+
+	if (chip->irq_disable)
+		chip->irq_disable(&desc->irq_data);
+	else
+		chip->irq_mask(&desc->irq_data);
+}
+
+static inline void ipipe_end_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (desc)
+		desc->ipipe_end(desc);
+}
+
+static inline int ipipe_chained_irq_p(struct irq_desc *desc)
+{
+	void __ipipe_chained_irq(struct irq_desc *desc);
+
+	return desc->handle_irq == __ipipe_chained_irq;
+}
+
+static inline void ipipe_handle_demuxed_irq(unsigned int cascade_irq)
+{
+	ipipe_trace_irq_entry(cascade_irq);
+	__ipipe_dispatch_irq(cascade_irq, IPIPE_IRQF_NOSYNC);
+	ipipe_trace_irq_exit(cascade_irq);
+}
+
+static inline void __ipipe_init_threadflags(struct thread_info *ti)
+{
+	ti->ipipe_flags = 0;
+}
+
+static inline
+void ipipe_set_ti_thread_flag(struct thread_info *ti, int flag)
+{
+	set_bit(flag, &ti->ipipe_flags);
+}
+
+static inline
+void ipipe_clear_ti_thread_flag(struct thread_info *ti, int flag)
+{
+	clear_bit(flag, &ti->ipipe_flags);
+}
+
+static inline
+void ipipe_test_and_clear_ti_thread_flag(struct thread_info *ti, int flag)
+{
+	test_and_clear_bit(flag, &ti->ipipe_flags);
+}
+
+static inline
+int ipipe_test_ti_thread_flag(struct thread_info *ti, int flag)
+{
+	return test_bit(flag, &ti->ipipe_flags);
+}
+
+#define ipipe_set_thread_flag(flag) \
+	ipipe_set_ti_thread_flag(current_thread_info(), flag)
+
+#define ipipe_clear_thread_flag(flag) \
+	ipipe_clear_ti_thread_flag(current_thread_info(), flag)
+
+#define ipipe_test_and_clear_thread_flag(flag) \
+	ipipe_test_and_clear_ti_thread_flag(current_thread_info(), flag)
+
+#define ipipe_test_thread_flag(flag) \
+	ipipe_test_ti_thread_flag(current_thread_info(), flag)
+
+#define ipipe_enable_notifier(p)					\
+	ipipe_set_ti_thread_flag(task_thread_info(p), TIP_NOTIFY)
+
+#define ipipe_disable_notifier(p)					\
+	do {								\
+		struct thread_info *ti = task_thread_info(p);		\
+		ipipe_clear_ti_thread_flag(ti, TIP_NOTIFY);		\
+		ipipe_clear_ti_thread_flag(ti, TIP_MAYDAY);		\
+	} while (0)
+
+#define ipipe_notifier_enabled_p(p)					\
+	ipipe_test_ti_thread_flag(task_thread_info(p), TIP_NOTIFY)
+
+#define ipipe_raise_mayday(p)						\
+	do {								\
+		struct thread_info *ti = task_thread_info(p);		\
+		ipipe_check_irqoff();					\
+		if (ipipe_test_ti_thread_flag(ti, TIP_NOTIFY))		\
+			ipipe_set_ti_thread_flag(ti, TIP_MAYDAY);	\
+	} while (0)
+
+extern bool __ipipe_probe_access;
+
+long ipipe_probe_kernel_read(void *dst, void *src, size_t size);
+long ipipe_probe_kernel_write(void *dst, void *src, size_t size);
+
+#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \
+	defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT)
+extern void __ipipe_uaccess_might_fault(void);
+#else
+#define __ipipe_uaccess_might_fault() might_fault()
+#endif
+
+#ifdef CONFIG_IPIPE_TRACE
+void __ipipe_tracer_hrclock_initialized(void);
+#else /* !CONFIG_IPIPE_TRACE */
+#define __ipipe_tracer_hrclock_initialized()	do { } while(0)
+#endif /* !CONFIG_IPIPE_TRACE */
+
+#include <linux/ipipe_compat.h>
+
+#else	/* !CONFIG_IPIPE */
+
+#define __ipipe_root_p		1
+#define ipipe_root_p		1
+
+static inline void __ipipe_init_threadflags(struct thread_info *ti) { }
+
+static inline void __ipipe_complete_domain_migration(void) { }
+
+static inline int __ipipe_switch_tail(void)
+{
+	return 0;
+}
+
+static inline void __ipipe_nmi_enter(void) { }
+
+static inline void __ipipe_nmi_exit(void) { }
+
+#define ipipe_safe_current()	current
+#define ipipe_processor_id()	smp_processor_id()
+
+static inline int ipipe_test_foreign_stack(void)
+{
+	return 0;
+}
+
+static inline void ipipe_lock_irq(unsigned int irq) { }
+
+static inline void ipipe_unlock_irq(unsigned int irq) { }
+
+#define ipipe_probe_kernel_read(d, s, sz)	probe_kernel_read(d, s, sz)
+#define ipipe_probe_kernel_write(d, s, sz)	probe_kernel_write(d, s, sz)
+#define __ipipe_uaccess_might_fault()		might_fault()
+
+static inline int ipipe_handle_syscall(struct thread_info *ti,
+				       unsigned long nr, struct pt_regs *regs)
+{
+	return 0;
+}
+
+#endif	/* !CONFIG_IPIPE */
+
+#endif	/* !__LINUX_IPIPE_H */
diff --git a/include/linux/ipipe_base.h b/include/linux/ipipe_base.h
new file mode 100644
index 000000000000..e056d27a2fd6
--- /dev/null
+++ b/include/linux/ipipe_base.h
@@ -0,0 +1,358 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_base.h
+ *
+ * Copyright (C) 2002-2014 Philippe Gerum.
+ *               2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_BASE_H
+#define __LINUX_IPIPE_BASE_H
+
+struct kvm_vcpu;
+struct ipipe_vm_notifier;
+struct irq_desc;
+
+#ifdef CONFIG_IPIPE
+
+#define IPIPE_CORE_APIREV  CONFIG_IPIPE_CORE_APIREV
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+void ipipe_root_only(void);
+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+static inline void ipipe_root_only(void) { }
+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+typedef void (*ipipe_irq_handler_t)(unsigned int irq,
+				    void *cookie);
+
+void ipipe_unstall_root(void);
+
+void ipipe_restore_root(unsigned long x);
+
+#include <asm/ipipe_base.h>
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <stdarg.h>
+
+#ifndef IPIPE_NR_ROOT_IRQS
+#define IPIPE_NR_ROOT_IRQS	NR_IRQS
+#endif /* !IPIPE_NR_ROOT_IRQS */
+
+#define __bpl_up(x)		(((x)+(BITS_PER_LONG-1)) & ~(BITS_PER_LONG-1))
+/* Number of virtual IRQs (must be a multiple of BITS_PER_LONG) */
+#define IPIPE_NR_VIRQS		BITS_PER_LONG
+/* First virtual IRQ # (must be aligned on BITS_PER_LONG) */
+#define IPIPE_VIRQ_BASE		__bpl_up(IPIPE_NR_XIRQS)
+/* Total number of IRQ slots */
+#define IPIPE_NR_IRQS		(IPIPE_VIRQ_BASE+IPIPE_NR_VIRQS)
+
+static inline int ipipe_virtual_irq_p(unsigned int irq)
+{
+	return irq >= IPIPE_VIRQ_BASE && irq < IPIPE_NR_IRQS;
+}
+
+#define IPIPE_IRQ_LOMAPSZ	(IPIPE_NR_IRQS / BITS_PER_LONG)
+#if IPIPE_IRQ_LOMAPSZ > BITS_PER_LONG
+/*
+ * We need a 3-level mapping. This allows us to handle up to 32k IRQ
+ * vectors on 32bit machines, 256k on 64bit ones.
+ */
+#define __IPIPE_3LEVEL_IRQMAP	1
+#define IPIPE_IRQ_MDMAPSZ	(__bpl_up(IPIPE_IRQ_LOMAPSZ) / BITS_PER_LONG)
+#else
+/*
+ * 2-level mapping is enough. This allows us to handle up to 1024 IRQ
+ * vectors on 32bit machines, 4096 on 64bit ones.
+ */
+#define __IPIPE_2LEVEL_IRQMAP	1
+#endif
+
+/* Per-cpu pipeline status */
+#define IPIPE_STALL_FLAG	0 /* interrupts (virtually) disabled. */
+#define IPIPE_STALL_MASK	(1L << IPIPE_STALL_FLAG)
+
+/* Interrupt control bits */
+#define IPIPE_HANDLE_FLAG	0
+#define IPIPE_STICKY_FLAG	1
+#define IPIPE_LOCK_FLAG		2
+#define IPIPE_HANDLE_MASK	(1 << IPIPE_HANDLE_FLAG)
+#define IPIPE_STICKY_MASK	(1 << IPIPE_STICKY_FLAG)
+#define IPIPE_LOCK_MASK		(1 << IPIPE_LOCK_FLAG)
+
+struct pt_regs;
+struct ipipe_domain;
+
+struct ipipe_trap_data {
+	int exception;
+	struct pt_regs *regs;
+};
+
+#define IPIPE_KEVT_SCHEDULE	0
+#define IPIPE_KEVT_SIGWAKE	1
+#define IPIPE_KEVT_SETSCHED	2
+#define IPIPE_KEVT_SETAFFINITY	3
+#define IPIPE_KEVT_EXIT		4
+#define IPIPE_KEVT_CLEANUP	5
+#define IPIPE_KEVT_HOSTRT	6
+#define IPIPE_KEVT_CLOCKFREQ	7
+
+struct ipipe_vm_notifier {
+	void (*handler)(struct ipipe_vm_notifier *nfy);
+};
+
+void __ipipe_init_early(void);
+
+void __ipipe_init(void);
+
+#ifdef CONFIG_PROC_FS
+void __ipipe_init_proc(void);
+#ifdef CONFIG_IPIPE_TRACE
+void __ipipe_init_tracer(void);
+#else /* !CONFIG_IPIPE_TRACE */
+static inline void __ipipe_init_tracer(void) { }
+#endif /* CONFIG_IPIPE_TRACE */
+#else	/* !CONFIG_PROC_FS */
+static inline void __ipipe_init_proc(void) { }
+#endif	/* CONFIG_PROC_FS */
+
+void __ipipe_restore_root_nosync(unsigned long x);
+
+#define IPIPE_IRQF_NOACK    0x1
+#define IPIPE_IRQF_NOSYNC   0x2
+
+void __ipipe_dispatch_irq(unsigned int irq, int flags);
+
+void __ipipe_do_sync_stage(void);
+
+void __ipipe_do_sync_pipeline(struct ipipe_domain *top);
+
+void __ipipe_lock_irq(unsigned int irq);
+
+void __ipipe_unlock_irq(unsigned int irq);
+
+void __ipipe_do_critical_sync(unsigned int irq, void *cookie);
+
+void __ipipe_ack_edge_irq(struct irq_desc *desc);
+
+void __ipipe_nop_irq(struct irq_desc *desc);
+
+static inline void __ipipe_idle(void)
+{
+	ipipe_unstall_root();
+}
+
+#ifndef __ipipe_sync_check
+#define __ipipe_sync_check	1
+#endif
+
+static inline void __ipipe_sync_stage(void)
+{
+	if (likely(__ipipe_sync_check))
+		__ipipe_do_sync_stage();
+}
+
+#ifndef __ipipe_run_irqtail
+#define __ipipe_run_irqtail(irq) do { } while(0)
+#endif
+
+int __ipipe_log_printk(const char *fmt, va_list args);
+void __ipipe_flush_printk(unsigned int irq, void *cookie);
+
+#define __ipipe_get_cpu(flags)	({ (flags) = hard_preempt_disable(); ipipe_processor_id(); })
+#define __ipipe_put_cpu(flags)	hard_preempt_enable(flags)
+
+int __ipipe_notify_syscall(struct pt_regs *regs);
+
+int __ipipe_notify_trap(int exception, struct pt_regs *regs);
+
+int __ipipe_notify_kevent(int event, void *data);
+
+#define __ipipe_report_trap(exception, regs)				\
+	__ipipe_notify_trap(exception, regs)
+
+#define __ipipe_report_sigwake(p)					\
+	do {								\
+		if (ipipe_notifier_enabled_p(p))			\
+			__ipipe_notify_kevent(IPIPE_KEVT_SIGWAKE, p);	\
+	} while (0)
+
+struct ipipe_cpu_migration_data {
+	struct task_struct *task;
+	int dest_cpu;
+};
+
+#define __ipipe_report_setaffinity(__p, __dest_cpu)			\
+	do {								\
+		struct ipipe_cpu_migration_data d = {			\
+			.task = (__p),					\
+			.dest_cpu = (__dest_cpu),			\
+		};							\
+		if (ipipe_notifier_enabled_p(__p))			\
+			__ipipe_notify_kevent(IPIPE_KEVT_SETAFFINITY, &d); \
+	} while (0)
+
+#define __ipipe_report_exit(p)						\
+	do {								\
+		if (ipipe_notifier_enabled_p(p))			\
+			__ipipe_notify_kevent(IPIPE_KEVT_EXIT, p);	\
+	} while (0)
+
+#define __ipipe_report_setsched(p)					\
+	do {								\
+		if (ipipe_notifier_enabled_p(p))			\
+			__ipipe_notify_kevent(IPIPE_KEVT_SETSCHED, p); \
+	} while (0)
+
+#define __ipipe_report_schedule(prev, next)				\
+do {									\
+	if (ipipe_notifier_enabled_p(next) ||				\
+	    ipipe_notifier_enabled_p(prev)) {				\
+		__this_cpu_write(ipipe_percpu.rqlock_owner, prev);	\
+		__ipipe_notify_kevent(IPIPE_KEVT_SCHEDULE, next);	\
+	}								\
+} while (0)
+
+#define __ipipe_report_cleanup(mm)					\
+	__ipipe_notify_kevent(IPIPE_KEVT_CLEANUP, mm)
+
+#define __ipipe_report_clockfreq_update(freq)				\
+	__ipipe_notify_kevent(IPIPE_KEVT_CLOCKFREQ, &(freq))
+
+void __ipipe_notify_vm_preemption(void);
+
+void __ipipe_call_mayday(struct pt_regs *regs);
+
+#define hard_cond_local_irq_enable()		hard_local_irq_enable()
+#define hard_cond_local_irq_disable()		hard_local_irq_disable()
+#define hard_cond_local_irq_save()		hard_local_irq_save()
+#define hard_cond_local_irq_restore(flags)	hard_local_irq_restore(flags)
+
+#ifdef CONFIG_IPIPE_LEGACY
+
+#define IPIPE_FIRST_EVENT	IPIPE_NR_FAULTS
+#define IPIPE_EVENT_SCHEDULE	IPIPE_FIRST_EVENT
+#define IPIPE_EVENT_SIGWAKE	(IPIPE_FIRST_EVENT + 1)
+#define IPIPE_EVENT_SETSCHED	(IPIPE_FIRST_EVENT + 2)
+#define IPIPE_EVENT_SETAFFINITY	(IPIPE_FIRST_EVENT + 3)
+#define IPIPE_EVENT_EXIT	(IPIPE_FIRST_EVENT + 4)
+#define IPIPE_EVENT_CLEANUP	(IPIPE_FIRST_EVENT + 5)
+#define IPIPE_EVENT_HOSTRT	(IPIPE_FIRST_EVENT + 6)
+#define IPIPE_EVENT_CLOCKFREQ	(IPIPE_FIRST_EVENT + 7)
+#define IPIPE_EVENT_SYSCALL	(IPIPE_FIRST_EVENT + 8)
+#define IPIPE_LAST_EVENT	IPIPE_EVENT_SYSCALL
+#define IPIPE_NR_EVENTS		(IPIPE_LAST_EVENT + 1)
+
+typedef int (*ipipe_event_handler_t)(unsigned int event,
+				     struct ipipe_domain *from,
+				     void *data);
+struct ipipe_legacy_context {
+	unsigned int domid;
+	int priority;
+	void *pdd;
+	ipipe_event_handler_t handlers[IPIPE_NR_EVENTS];
+};
+
+#define __ipipe_init_taskinfo(p)			\
+	do {						\
+		memset(p->ptd, 0, sizeof(p->ptd));	\
+	} while (0)
+
+#else /* !CONFIG_IPIPE_LEGACY */
+
+struct ipipe_legacy_context {
+};
+
+static inline void __ipipe_init_taskinfo(struct task_struct *p) { }
+
+#endif /* !CONFIG_IPIPE_LEGACY */
+
+#define __ipipe_serial_debug(__fmt, __args...)	raw_printk(__fmt, ##__args)
+
+#else /* !CONFIG_IPIPE */
+
+struct task_struct;
+struct mm_struct;
+
+static inline void __ipipe_init_early(void) { }
+
+static inline void __ipipe_init(void) { }
+
+static inline void __ipipe_init_proc(void) { }
+
+static inline void __ipipe_idle(void) { }
+
+static inline void __ipipe_report_sigwake(struct task_struct *p) { }
+
+static inline void __ipipe_report_setaffinity(struct task_struct *p,
+					      int dest_cpu) { }
+
+static inline void __ipipe_report_setsched(struct task_struct *p) { }
+
+static inline void __ipipe_report_exit(struct task_struct *p) { }
+
+static inline void __ipipe_report_cleanup(struct mm_struct *mm) { }
+
+#define __ipipe_report_trap(exception, regs)  0
+
+static inline void __ipipe_init_taskinfo(struct task_struct *p) { }
+
+#define hard_preempt_disable()		({ preempt_disable(); 0; })
+#define hard_preempt_enable(flags)	({ preempt_enable(); (void)(flags); })
+
+#define __ipipe_get_cpu(flags)		({ (void)(flags); get_cpu(); })
+#define __ipipe_put_cpu(flags)		\
+	do {				\
+		(void)(flags);		\
+		put_cpu();		\
+	} while (0)
+
+#define __ipipe_root_tick_p(regs)	1
+
+#define ipipe_handle_demuxed_irq(irq)		generic_handle_irq(irq)
+
+#define __ipipe_enter_vm(vmf)	do { } while (0)
+
+static inline void __ipipe_exit_vm(void) { }
+
+static inline void __ipipe_notify_vm_preemption(void) { }
+
+static inline void ipipe_root_only(void) { }
+
+#define __ipipe_serial_debug(__fmt, __args...)	do { } while (0)
+
+#endif	/* !CONFIG_IPIPE */
+
+#ifdef CONFIG_IPIPE_WANT_PTE_PINNING
+void __ipipe_pin_mapping_globally(unsigned long start,
+				  unsigned long end);
+#else
+static inline void __ipipe_pin_mapping_globally(unsigned long start,
+						unsigned long end)
+{ }
+#endif
+
+static inline void ipipe_preempt_root_only(void)
+{
+#if defined(CONFIG_IPIPE_DEBUG_CONTEXT) && \
+    defined(CONFIG_IPIPE_LEGACY) && \
+    !defined(CONFIG_IPIPE_HAVE_SAFE_THREAD_INFO)
+	ipipe_root_only();
+#endif
+}
+
+#endif	/* !__LINUX_IPIPE_BASE_H */
diff --git a/include/linux/ipipe_compat.h b/include/linux/ipipe_compat.h
new file mode 100644
index 000000000000..6521852d9a9d
--- /dev/null
+++ b/include/linux/ipipe_compat.h
@@ -0,0 +1,319 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_compat.h
+ *
+ * Copyright (C) 2012 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_COMPAT_H
+#define __LINUX_IPIPE_COMPAT_H
+
+#ifndef __LINUX_IPIPE_H
+#error "Do not include this file directly, use linux/ipipe.h instead"
+#endif
+
+#ifdef CONFIG_IPIPE_LEGACY
+
+#define IPIPE_HEAD_PRIORITY	(-1)
+#define IPIPE_ROOT_PRIO		100
+#define IPIPE_ROOT_ID		0
+#define IPIPE_ROOT_NPTDKEYS	4
+
+/* Legacy pipeline status bit */
+#define IPIPE_NOSTACK_FLAG	1 /* running on foreign stack. */
+#define IPIPE_NOSTACK_MASK	(1L << IPIPE_NOSTACK_FLAG)
+
+/* Legacy interrupt control bits */
+#define IPIPE_DUMMY_FLAG	31
+#define IPIPE_WIRED_FLAG	IPIPE_HANDLE_FLAG
+#define IPIPE_WIRED_MASK	(1 << IPIPE_WIRED_FLAG)
+#define IPIPE_PASS_FLAG		IPIPE_DUMMY_FLAG
+#define IPIPE_PASS_MASK		(1 << IPIPE_PASS_FLAG)
+#define IPIPE_DYNAMIC_FLAG	IPIPE_HANDLE_FLAG
+#define IPIPE_DYNAMIC_MASK	(1 << IPIPE_DYNAMIC_FLAG)
+#define IPIPE_SYSTEM_FLAG	IPIPE_DUMMY_FLAG
+#define IPIPE_SYSTEM_MASK	(1 << IPIPE_SYSTEM_FLAG)
+#define IPIPE_EXCLUSIVE_FLAG	IPIPE_DUMMY_FLAG
+#define IPIPE_EXCLUSIVE_MASK	(1 << IPIPE_EXCLUSIVE_FLAG)
+
+#define IPIPE_NR_CPUS		NR_CPUS
+
+#define IPIPE_EVENT_SELF        0x80000000
+#define IPIPE_EVENT_RETURN	IPIPE_TRAP_MAYDAY
+
+#define TASK_ATOMICSWITCH	TASK_HARDENING
+
+struct ipipe_domain_attr {
+	unsigned int domid;
+	const char *name;
+	int priority;
+	void (*entry) (void);
+	void *pdd;
+};
+
+void ipipe_init_attr(struct ipipe_domain_attr *attr);
+
+int ipipe_register_domain(struct ipipe_domain *ipd,
+			  struct ipipe_domain_attr *attr);
+
+int ipipe_unregister_domain(struct ipipe_domain *ipd);
+
+int ipipe_alloc_ptdkey(void);
+
+int ipipe_free_ptdkey(int key);
+
+int ipipe_set_ptd(int key, void *value);
+
+void *ipipe_get_ptd(int key);
+
+int ipipe_virtualize_irq(struct ipipe_domain *ipd,
+			 unsigned int irq,
+			 ipipe_irq_handler_t handler,
+			 void *cookie,
+			 ipipe_irq_ackfn_t ackfn,
+			 unsigned int modemask);
+
+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
+					unsigned int event,
+					ipipe_event_handler_t handler);
+
+int ipipe_setscheduler_root(struct task_struct *p,
+			    int policy,
+			    int prio);
+
+static inline void ipipe_check_context(struct ipipe_domain *border_ipd)
+{
+	ipipe_root_only();
+}
+
+static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd)
+{
+	ipipe_prepare_panic();
+}
+
+static inline void __ipipe_propagate_irq(unsigned int irq)
+{
+	ipipe_post_irq_root(irq);
+}
+
+static inline void __ipipe_schedule_irq_head(unsigned int irq)
+{
+	ipipe_post_irq_head(irq);
+}
+
+static inline void __ipipe_schedule_irq_root(unsigned int irq)
+{
+	ipipe_post_irq_root(irq);
+}
+
+static inline int ipipe_trigger_irq(unsigned int irq)
+{
+	ipipe_raise_irq(irq);
+	return 1;
+}
+
+static inline void ipipe_stall_pipeline_from(struct ipipe_domain *ipd)
+{
+	if (ipd != ipipe_root_domain)
+		ipipe_stall_head();
+	else
+		ipipe_stall_root();
+}
+
+static inline
+unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd)
+{
+	if (ipd != ipipe_root_domain)
+		return ipipe_test_and_stall_head();
+
+	return ipipe_test_and_stall_root();
+}
+
+static inline
+void ipipe_unstall_pipeline_from(struct ipipe_domain *ipd)
+{
+	if (ipd != ipipe_root_domain)
+		ipipe_unstall_head();
+	else
+		ipipe_unstall_root();
+}
+
+static inline
+void ipipe_restore_pipeline_from(struct ipipe_domain *ipd,
+				 unsigned long x)
+{
+	if (ipd != ipipe_root_domain)
+		ipipe_restore_head(x);
+	else
+		ipipe_restore_root(x);
+}
+
+static inline
+unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd)
+{
+	return test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(ipd)->status);
+}
+
+static inline void ipipe_stall_pipeline_head(void)
+{
+	ipipe_stall_head();
+}
+
+static inline unsigned long ipipe_test_and_stall_pipeline_head(void)
+{
+	return ipipe_test_and_stall_head();
+}
+
+static inline void ipipe_unstall_pipeline_head(void)
+{
+	ipipe_unstall_head();
+}
+
+static inline void ipipe_restore_pipeline_head(unsigned long x)
+{
+	ipipe_restore_head(x);
+}
+
+static inline int ipipe_disable_ondemand_mappings(struct task_struct *p)
+{
+	return __ipipe_disable_ondemand_mappings(p);
+}
+
+static inline int ipipe_reenter_root(struct task_struct *prev,
+				     int policy,
+				     int prio)
+{
+	__ipipe_reenter_root();
+	return 0;
+}
+
+static inline void ipipe_root_preempt_notify(void)
+{
+	ipipe_notify_root_preemption();
+}
+
+#define ipipe_return_notify(p)	ipipe_raise_mayday(p)
+
+/*
+ * Keep the following as a macro, so that client code could check for
+ * the support of the invariant pipeline head optimization.
+ */
+#define __ipipe_pipeline_head() ipipe_head_domain
+
+static inline int irqs_disabled_hw(void)
+{
+	return hard_irqs_disabled();
+}
+
+static inline void local_irq_disable_hw(void)
+{
+	hard_local_irq_disable();
+}
+
+static inline void local_irq_enable_hw(void)
+{
+	hard_local_irq_enable();
+}
+
+#define local_irq_save_hw(flags)			\
+	do {						\
+		(flags) = hard_local_irq_save();	\
+	} while (0)
+
+static inline void local_irq_restore_hw(unsigned long flags)
+{
+	hard_local_irq_restore(flags);
+}
+
+#define local_save_flags_hw(flags)			\
+	do {						\
+		(flags) = hard_local_save_flags();	\
+	} while (0)
+
+#define local_irq_save_hw_smp(flags)			\
+	do {						\
+		(flags) = hard_smp_local_irq_save();	\
+	} while (0)
+#define local_irq_restore_hw_smp(flags)   hard_smp_local_irq_restore(flags)
+
+#define local_irq_save_hw_cond(flags)			\
+	do {						\
+		(flags) = hard_cond_local_irq_save();	\
+	} while (0)
+#define local_irq_restore_hw_cond(flags)  hard_cond_local_irq_restore(flags)
+
+static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd)
+{
+	/* Must be called hw interrupts off. */
+	__set_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpu_context(ipd)->status);
+}
+
+static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd)
+{
+	/* Must be called hw interrupts off. */
+	__clear_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpu_context(ipd)->status);
+}
+
+static inline int ipipe_test_foreign_stack(void)
+{
+	/* Must be called hw interrupts off. */
+	return test_bit(IPIPE_NOSTACK_FLAG, &__ipipe_current_context->status);
+}
+
+#ifndef ipipe_safe_current
+#define ipipe_safe_current()						\
+	({								\
+		struct task_struct *__p__;				\
+		unsigned long __flags__;				\
+		__flags__ = hard_smp_local_irq_save();			\
+		__p__ = ipipe_test_foreign_stack() ? &init_task : current; \
+		hard_smp_local_irq_restore(__flags__);			\
+		__p__;							\
+	})
+#endif
+
+void __ipipe_legacy_init_stage(struct ipipe_domain *ipd);
+
+/*
+ * These values have no real meaning from a versioning POV, however
+ * they are guaranteed to look more recent than any legacy patch
+ * release ever published in the past.
+ */
+#define IPIPE_MAJOR_NUMBER  3
+#define IPIPE_MINOR_NUMBER  0
+#define IPIPE_PATCH_NUMBER  0
+
+#define __IPIPE_FEATURE_REQUEST_TICKDEV		1
+#define __IPIPE_FEATURE_FASTPEND_IRQ		1
+#define __IPIPE_FEATURE_TRACE_EVENT		1
+#define __IPIPE_FEATURE_ENABLE_NOTIFIER		1
+#define __IPIPE_FEATURE_PREPARE_PANIC		1
+#define __IPIPE_FEATURE_SYSINFO_V2		1
+#define __IPIPE_FEATURE_PIC_MUTE		1
+#ifdef CONFIG_IPIPE_HAVE_VM_NOTIFIER
+#define __IPIPE_FEATURE_ROOTPREEMPT_NOTIFIER	1
+#endif
+
+#else  /* !CONFIG_IPIPE_LEGACY */
+
+static inline void __ipipe_legacy_init_stage(struct ipipe_domain *ipd)
+{
+}
+
+#endif /* !CONFIG_IPIPE_LEGACY */
+
+#endif	/* !__LINUX_IPIPE_COMPAT_H */
diff --git a/include/linux/ipipe_debug.h b/include/linux/ipipe_debug.h
new file mode 100644
index 000000000000..5d7efefbdddf
--- /dev/null
+++ b/include/linux/ipipe_debug.h
@@ -0,0 +1,100 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_debug.h
+ *
+ * Copyright (C) 2012 Philippe Gerum <rpm@xenomai.org>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_DEBUG_H
+#define __LINUX_IPIPE_DEBUG_H
+
+#include <linux/ipipe_domain.h>
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+
+#include <asm/bug.h>
+
+static inline int ipipe_disable_context_check(void)
+{
+	return xchg(raw_cpu_ptr(&ipipe_percpu.context_check), 0);
+}
+
+static inline void ipipe_restore_context_check(int old_state)
+{
+	__this_cpu_write(ipipe_percpu.context_check, old_state);
+}
+
+static inline void ipipe_context_check_off(void)
+{
+	int cpu;
+	for_each_online_cpu(cpu)
+		per_cpu(ipipe_percpu, cpu).context_check = 0;
+}
+
+static inline void ipipe_save_context_nmi(void)
+{
+	int state = ipipe_disable_context_check();
+	__this_cpu_write(ipipe_percpu.context_check_saved, state);
+}
+
+static inline void ipipe_restore_context_nmi(void)
+{
+	ipipe_restore_context_check(__this_cpu_read(ipipe_percpu.context_check_saved));
+}
+
+#else	/* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+static inline int ipipe_disable_context_check(void)
+{
+	return 0;
+}
+
+static inline void ipipe_restore_context_check(int old_state) { }
+
+static inline void ipipe_context_check_off(void) { }
+
+static inline void ipipe_save_context_nmi(void) { }
+
+static inline void ipipe_restore_context_nmi(void) { }
+
+#endif	/* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+#ifdef CONFIG_IPIPE_DEBUG
+
+#define ipipe_check_irqoff()					\
+	do {							\
+		if (WARN_ON_ONCE(!hard_irqs_disabled()))	\
+			hard_local_irq_disable();		\
+	} while (0)
+
+#else /* !CONFIG_IPIPE_DEBUG */
+
+static inline void ipipe_check_irqoff(void) { }
+
+#endif /* !CONFIG_IPIPE_DEBUG */
+
+#ifdef CONFIG_IPIPE_DEBUG_INTERNAL
+#define IPIPE_WARN(c)		WARN_ON(c)
+#define IPIPE_WARN_ONCE(c)	WARN_ON_ONCE(c)
+#define IPIPE_BUG_ON(c)		BUG_ON(c)
+#else
+#define IPIPE_WARN(c)		do { (void)(c); } while (0)
+#define IPIPE_WARN_ONCE(c)	do { (void)(c); } while (0)
+#define IPIPE_BUG_ON(c)		do { (void)(c); } while (0)
+#endif
+
+#endif /* !__LINUX_IPIPE_DEBUG_H */
diff --git a/include/linux/ipipe_domain.h b/include/linux/ipipe_domain.h
new file mode 100644
index 000000000000..df6e7a9fb52c
--- /dev/null
+++ b/include/linux/ipipe_domain.h
@@ -0,0 +1,309 @@
+/*   -*- linux-c -*-
+ *   include/linux/ipipe_domain.h
+ *
+ *   Copyright (C) 2007-2012 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_DOMAIN_H
+#define __LINUX_IPIPE_DOMAIN_H
+
+#ifdef CONFIG_IPIPE
+
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <asm/ptrace.h>
+
+struct task_struct;
+struct mm_struct;
+struct irq_desc;
+struct ipipe_vm_notifier;
+
+#define __IPIPE_SYSCALL_P  0
+#define __IPIPE_TRAP_P     1
+#define __IPIPE_KEVENT_P   2
+#define __IPIPE_SYSCALL_E (1 << __IPIPE_SYSCALL_P)
+#define __IPIPE_TRAP_E	  (1 << __IPIPE_TRAP_P)
+#define __IPIPE_KEVENT_E  (1 << __IPIPE_KEVENT_P)
+#define __IPIPE_ALL_E	   0x7
+#define __IPIPE_SYSCALL_R (8 << __IPIPE_SYSCALL_P)
+#define __IPIPE_TRAP_R	  (8 << __IPIPE_TRAP_P)
+#define __IPIPE_KEVENT_R  (8 << __IPIPE_KEVENT_P)
+#define __IPIPE_SHIFT_R	   3
+#define __IPIPE_ALL_R	  (__IPIPE_ALL_E << __IPIPE_SHIFT_R)
+
+typedef void (*ipipe_irq_ackfn_t)(struct irq_desc *desc);
+
+struct ipipe_domain {
+	int context_offset;
+	struct ipipe_irqdesc {
+		unsigned long control;
+		ipipe_irq_ackfn_t ackfn;
+		ipipe_irq_handler_t handler;
+		void *cookie;
+	} ____cacheline_aligned irqs[IPIPE_NR_IRQS];
+	const char *name;
+	struct mutex mutex;
+	struct ipipe_legacy_context legacy;
+};
+
+static inline void *
+__ipipe_irq_cookie(struct ipipe_domain *ipd, unsigned int irq)
+{
+	return ipd->irqs[irq].cookie;
+}
+
+static inline ipipe_irq_handler_t
+__ipipe_irq_handler(struct ipipe_domain *ipd, unsigned int irq)
+{
+	return ipd->irqs[irq].handler;
+}
+
+extern struct ipipe_domain ipipe_root;
+
+#define ipipe_root_domain (&ipipe_root)
+
+extern struct ipipe_domain *ipipe_head_domain;
+
+struct ipipe_percpu_domain_data {
+	unsigned long status;	/* <= Must be first in struct. */
+	unsigned long irqpend_himap;
+#ifdef __IPIPE_3LEVEL_IRQMAP
+	unsigned long irqpend_mdmap[IPIPE_IRQ_MDMAPSZ];
+#endif
+	unsigned long irqpend_lomap[IPIPE_IRQ_LOMAPSZ];
+	unsigned long irqheld_map[IPIPE_IRQ_LOMAPSZ];
+	unsigned long irqall[IPIPE_NR_IRQS];
+	struct ipipe_domain *domain;
+	int coflags;
+};
+
+struct ipipe_percpu_data {
+	struct ipipe_percpu_domain_data root;
+	struct ipipe_percpu_domain_data head;
+	struct ipipe_percpu_domain_data *curr;
+	struct pt_regs tick_regs;
+	int hrtimer_irq;
+	struct task_struct *task_hijacked;
+	struct task_struct *rqlock_owner;
+	struct ipipe_vm_notifier *vm_notifier;
+	unsigned long nmi_state;
+	struct mm_struct *active_mm;
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+	int context_check;
+	int context_check_saved;
+#endif
+};
+
+/*
+ * CAREFUL: all accessors based on __ipipe_raw_cpu_ptr() you may find
+ * in this file should be used only while hw interrupts are off, to
+ * prevent from CPU migration regardless of the running domain.
+ */
+DECLARE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu);
+
+static inline struct ipipe_percpu_domain_data *
+__context_of(struct ipipe_percpu_data *p, struct ipipe_domain *ipd)
+{
+	return (void *)p + ipd->context_offset;
+}
+
+/**
+ * ipipe_percpu_context - return the address of the pipeline context
+ * data for a domain on a given CPU.
+ *
+ * NOTE: this is the slowest accessor, use it carefully. Prefer
+ * ipipe_this_cpu_context() for requests targeted at the current
+ * CPU. Additionally, if the target domain is known at build time,
+ * consider ipipe_this_cpu_{root, head}_context().
+ */
+static inline struct ipipe_percpu_domain_data *
+ipipe_percpu_context(struct ipipe_domain *ipd, int cpu)
+{
+	return __context_of(&per_cpu(ipipe_percpu, cpu), ipd);
+}
+
+/**
+ * ipipe_this_cpu_context - return the address of the pipeline context
+ * data for a domain on the current CPU. hw IRQs must be off.
+ *
+ * NOTE: this accessor is a bit faster, but since we don't know which
+ * one of "root" or "head" ipd refers to, we still need to compute the
+ * context address from its offset.
+ */
+static inline struct ipipe_percpu_domain_data *
+ipipe_this_cpu_context(struct ipipe_domain *ipd)
+{
+	return __context_of(__ipipe_raw_cpu_ptr(&ipipe_percpu), ipd);
+}
+
+/**
+ * ipipe_this_cpu_root_context - return the address of the pipeline
+ * context data for the root domain on the current CPU. hw IRQs must
+ * be off.
+ *
+ * NOTE: this accessor is recommended when the domain we refer to is
+ * known at build time to be the root one.
+ */
+static inline struct ipipe_percpu_domain_data *
+ipipe_this_cpu_root_context(void)
+{
+	return __ipipe_raw_cpu_ptr(&ipipe_percpu.root);
+}
+
+/**
+ * ipipe_this_cpu_head_context - return the address of the pipeline
+ * context data for the registered head domain on the current CPU. hw
+ * IRQs must be off.
+ *
+ * NOTE: this accessor is recommended when the domain we refer to is
+ * known at build time to be the registered head domain. This address
+ * is always different from the context data of the root domain in
+ * absence of registered head domain. To get the address of the
+ * context data for the domain leading the pipeline at the time of the
+ * call (which may be root in absence of registered head domain), use
+ * ipipe_this_cpu_leading_context() instead.
+ */
+static inline struct ipipe_percpu_domain_data *
+ipipe_this_cpu_head_context(void)
+{
+	return __ipipe_raw_cpu_ptr(&ipipe_percpu.head);
+}
+
+/**
+ * ipipe_this_cpu_leading_context - return the address of the pipeline
+ * context data for the domain leading the pipeline on the current
+ * CPU. hw IRQs must be off.
+ *
+ * NOTE: this accessor is required when either root or a registered
+ * head domain may be the final target of this call, depending on
+ * whether the high priority domain was installed via
+ * ipipe_register_head().
+ */
+static inline struct ipipe_percpu_domain_data *
+ipipe_this_cpu_leading_context(void)
+{
+	return ipipe_this_cpu_context(ipipe_head_domain);
+}
+
+/**
+ * __ipipe_get_current_context() - return the address of the pipeline
+ * context data of the domain running on the current CPU. hw IRQs must
+ * be off.
+ */
+static inline struct ipipe_percpu_domain_data *__ipipe_get_current_context(void)
+{
+	return __ipipe_raw_cpu_read(ipipe_percpu.curr);
+}
+
+#define __ipipe_current_context __ipipe_get_current_context()
+
+/**
+ * __ipipe_set_current_context() - switch the current CPU to the
+ * specified domain context.  hw IRQs must be off.
+ *
+ * NOTE: this is the only way to change the current domain for the
+ * current CPU. Don't bypass.
+ */
+static inline
+void __ipipe_set_current_context(struct ipipe_percpu_domain_data *pd)
+{
+	struct ipipe_percpu_data *p;
+	p = __ipipe_raw_cpu_ptr(&ipipe_percpu);
+	p->curr = pd;
+}
+
+/**
+ * __ipipe_set_current_domain() - switch the current CPU to the
+ * specified domain. This is equivalent to calling
+ * __ipipe_set_current_context() with the context data of that
+ * domain. hw IRQs must be off.
+ */
+static inline void __ipipe_set_current_domain(struct ipipe_domain *ipd)
+{
+	struct ipipe_percpu_data *p;
+	p = __ipipe_raw_cpu_ptr(&ipipe_percpu);
+	p->curr = __context_of(p, ipd);
+}
+
+static inline struct ipipe_percpu_domain_data *ipipe_current_context(void)
+{
+	struct ipipe_percpu_domain_data *pd;
+	unsigned long flags;
+
+	flags = hard_smp_local_irq_save();
+	pd = __ipipe_get_current_context();
+	hard_smp_local_irq_restore(flags);
+
+	return pd;
+}
+
+static inline struct ipipe_domain *__ipipe_get_current_domain(void)
+{
+	return __ipipe_get_current_context()->domain;
+}
+
+#define __ipipe_current_domain	__ipipe_get_current_domain()
+
+/**
+ * __ipipe_get_current_domain() - return the address of the pipeline
+ * domain running on the current CPU. hw IRQs must be off.
+ */
+static inline struct ipipe_domain *ipipe_get_current_domain(void)
+{
+	struct ipipe_domain *ipd;
+	unsigned long flags;
+
+	flags = hard_smp_local_irq_save();
+	ipd = __ipipe_get_current_domain();
+	hard_smp_local_irq_restore(flags);
+
+	return ipd;
+}
+
+#define ipipe_current_domain	ipipe_get_current_domain()
+
+#define __ipipe_root_p	(__ipipe_current_domain == ipipe_root_domain)
+#define ipipe_root_p	(ipipe_current_domain == ipipe_root_domain)
+
+#ifdef CONFIG_SMP
+#define __ipipe_root_status	(ipipe_this_cpu_root_context()->status)
+#else
+extern unsigned long __ipipe_root_status;
+#endif
+
+#define __ipipe_head_status	(ipipe_this_cpu_head_context()->status)
+
+/**
+ * __ipipe_ipending_p() - Whether we have interrupts pending
+ * (i.e. logged) for the given domain context on the current CPU. hw
+ * IRQs must be off.
+ */
+static inline int __ipipe_ipending_p(struct ipipe_percpu_domain_data *pd)
+{
+	return pd->irqpend_himap != 0;
+}
+
+static inline unsigned long
+__ipipe_cpudata_irq_hits(struct ipipe_domain *ipd, int cpu, unsigned int irq)
+{
+	return ipipe_percpu_context(ipd, cpu)->irqall[irq];
+}
+
+#endif /* CONFIG_IPIPE */
+
+#endif	/* !__LINUX_IPIPE_DOMAIN_H */
diff --git a/include/linux/ipipe_lock.h b/include/linux/ipipe_lock.h
new file mode 100644
index 000000000000..a108278b7f1c
--- /dev/null
+++ b/include/linux/ipipe_lock.h
@@ -0,0 +1,327 @@
+/*   -*- linux-c -*-
+ *   include/linux/ipipe_lock.h
+ *
+ *   Copyright (C) 2009 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_LOCK_H
+#define __LINUX_IPIPE_LOCK_H
+
+typedef struct {
+	arch_spinlock_t arch_lock;
+} __ipipe_spinlock_t;
+
+#define ipipe_spinlock(lock)	((__ipipe_spinlock_t *)(lock))
+#define ipipe_spinlock_p(lock)							\
+	__builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *) ||	\
+	__builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t [])
+
+#define std_spinlock_raw(lock)	((raw_spinlock_t *)(lock))
+#define std_spinlock_raw_p(lock)					\
+	__builtin_types_compatible_p(typeof(lock), raw_spinlock_t *) ||	\
+	__builtin_types_compatible_p(typeof(lock), raw_spinlock_t [])
+
+#ifdef CONFIG_PREEMPT_RT_FULL
+
+#define PICK_SPINLOCK_IRQSAVE(lock, flags)				\
+	do {								\
+		if (ipipe_spinlock_p(lock))				\
+			(flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \
+		else if (std_spinlock_raw_p(lock))				\
+			__real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \
+		else __bad_lock_type();					\
+	} while (0)
+
+#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags)				\
+	({								\
+		int __ret__;						\
+		if (ipipe_spinlock_p(lock))				\
+			__ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \
+		else if (std_spinlock_raw_p(lock))				\
+			__ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \
+		else __bad_lock_type();					\
+		__ret__;						\
+	 })
+
+#define PICK_SPINTRYLOCK_IRQ(lock)					\
+	({								\
+		int __ret__;						\
+		if (ipipe_spinlock_p(lock))				\
+			__ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \
+		else if (std_spinlock_raw_p(lock))				\
+			__ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \
+		else __bad_lock_type();					\
+		__ret__;						\
+	 })
+
+#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags)				\
+	do {								\
+		if (ipipe_spinlock_p(lock))				\
+			__ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \
+		else if (std_spinlock_raw_p(lock)) {			\
+			__ipipe_spin_unlock_debug(flags);		\
+			__real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \
+		} else __bad_lock_type();				\
+	} while (0)
+
+#define PICK_SPINOP(op, lock)						\
+	({								\
+		if (ipipe_spinlock_p(lock))				\
+			arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
+		else if (std_spinlock_raw_p(lock))			\
+			__real_raw_spin##op(std_spinlock_raw(lock));	\
+		else __bad_lock_type();					\
+		(void)0;						\
+	})
+
+#define PICK_SPINOP_RET(op, lock, type)					\
+	({								\
+		type __ret__;						\
+		if (ipipe_spinlock_p(lock))				\
+			__ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
+		else if (std_spinlock_raw_p(lock))			\
+			__ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \
+		else { __ret__ = -1; __bad_lock_type(); }		\
+		__ret__;						\
+	})
+
+#else /* !CONFIG_PREEMPT_RT_FULL */
+
+#define std_spinlock(lock)	((spinlock_t *)(lock))
+#define std_spinlock_p(lock)						\
+	__builtin_types_compatible_p(typeof(lock), spinlock_t *) ||	\
+	__builtin_types_compatible_p(typeof(lock), spinlock_t [])
+
+#define PICK_SPINLOCK_IRQSAVE(lock, flags)				\
+	do {								\
+		if (ipipe_spinlock_p(lock))				\
+			(flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \
+		else if (std_spinlock_raw_p(lock))				\
+			__real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \
+		else if (std_spinlock_p(lock))				\
+			__real_raw_spin_lock_irqsave(&std_spinlock(lock)->rlock, flags); \
+		else __bad_lock_type();					\
+	} while (0)
+
+#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags)				\
+	({								\
+		int __ret__;						\
+		if (ipipe_spinlock_p(lock))				\
+			__ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \
+		else if (std_spinlock_raw_p(lock))				\
+			__ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \
+		else if (std_spinlock_p(lock))				\
+			__ret__ = __real_raw_spin_trylock_irqsave(&std_spinlock(lock)->rlock, flags); \
+		else __bad_lock_type();					\
+		__ret__;						\
+	 })
+
+#define PICK_SPINTRYLOCK_IRQ(lock)					\
+	({								\
+		int __ret__;						\
+		if (ipipe_spinlock_p(lock))				\
+			__ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \
+		else if (std_spinlock_raw_p(lock))				\
+			__ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \
+		else if (std_spinlock_p(lock))				\
+			__ret__ = __real_raw_spin_trylock_irq(&std_spinlock(lock)->rlock); \
+		else __bad_lock_type();					\
+		__ret__;						\
+	 })
+
+#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags)				\
+	do {								\
+		if (ipipe_spinlock_p(lock))				\
+			__ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \
+		else {							\
+			__ipipe_spin_unlock_debug(flags);		\
+			if (std_spinlock_raw_p(lock))			\
+				__real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \
+			else if (std_spinlock_p(lock))			\
+				__real_raw_spin_unlock_irqrestore(&std_spinlock(lock)->rlock, flags); \
+		}							\
+	} while (0)
+
+#define PICK_SPINOP(op, lock)						\
+	({								\
+		if (ipipe_spinlock_p(lock))				\
+			arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
+		else if (std_spinlock_raw_p(lock))			\
+			__real_raw_spin##op(std_spinlock_raw(lock));	\
+		else if (std_spinlock_p(lock))				\
+			__real_raw_spin##op(&std_spinlock(lock)->rlock); \
+		else __bad_lock_type();					\
+		(void)0;						\
+	})
+
+#define PICK_SPINOP_RET(op, lock, type)					\
+	({								\
+		type __ret__;						\
+		if (ipipe_spinlock_p(lock))				\
+			__ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
+		else if (std_spinlock_raw_p(lock))			\
+			__ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \
+		else if (std_spinlock_p(lock))				\
+			__ret__ = __real_raw_spin##op(&std_spinlock(lock)->rlock); \
+		else { __ret__ = -1; __bad_lock_type(); }		\
+		__ret__;						\
+	})
+
+#endif /* !CONFIG_PREEMPT_RT_FULL */
+
+#define arch_spin_lock_init(lock)					\
+	do {								\
+		IPIPE_DEFINE_SPINLOCK(__lock__);			\
+		*((ipipe_spinlock_t *)lock) = __lock__;			\
+	} while (0)
+
+#define arch_spin_lock_irq(lock)					\
+	do {								\
+		hard_local_irq_disable();				\
+		arch_spin_lock(lock);					\
+	} while (0)
+
+#define arch_spin_unlock_irq(lock)					\
+	do {								\
+		arch_spin_unlock(lock);					\
+		hard_local_irq_enable();				\
+	} while (0)
+
+typedef struct {
+	arch_rwlock_t arch_lock;
+} __ipipe_rwlock_t;
+
+#define ipipe_rwlock_p(lock)						\
+	__builtin_types_compatible_p(typeof(lock), __ipipe_rwlock_t *)
+
+#define std_rwlock_p(lock)						\
+	__builtin_types_compatible_p(typeof(lock), rwlock_t *)
+
+#define ipipe_rwlock(lock)	((__ipipe_rwlock_t *)(lock))
+#define std_rwlock(lock)	((rwlock_t *)(lock))
+
+#define PICK_RWOP(op, lock)						\
+	do {								\
+		if (ipipe_rwlock_p(lock))				\
+			arch##op(&ipipe_rwlock(lock)->arch_lock);	\
+		else if (std_rwlock_p(lock))				\
+			_raw##op(std_rwlock(lock));			\
+		else __bad_lock_type();					\
+	} while (0)
+
+extern int __bad_lock_type(void);
+
+#ifdef CONFIG_IPIPE
+
+#define ipipe_spinlock_t		__ipipe_spinlock_t
+#define IPIPE_DEFINE_RAW_SPINLOCK(x)	ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED
+#define IPIPE_DECLARE_RAW_SPINLOCK(x)	extern ipipe_spinlock_t x
+#define IPIPE_DEFINE_SPINLOCK(x)	IPIPE_DEFINE_RAW_SPINLOCK(x)
+#define IPIPE_DECLARE_SPINLOCK(x)	IPIPE_DECLARE_RAW_SPINLOCK(x)
+
+#define IPIPE_SPIN_LOCK_UNLOCKED					\
+	(__ipipe_spinlock_t) {	.arch_lock = __ARCH_SPIN_LOCK_UNLOCKED }
+
+#define spin_lock_irqsave_cond(lock, flags) \
+	spin_lock_irqsave(lock, flags)
+
+#define spin_unlock_irqrestore_cond(lock, flags) \
+	spin_unlock_irqrestore(lock, flags)
+
+#define raw_spin_lock_irqsave_cond(lock, flags) \
+	raw_spin_lock_irqsave(lock, flags)
+
+#define raw_spin_unlock_irqrestore_cond(lock, flags) \
+	raw_spin_unlock_irqrestore(lock, flags)
+
+void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock);
+
+int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock);
+
+void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock);
+
+unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock);
+
+int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock,
+				 unsigned long *x);
+
+void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock,
+				    unsigned long x);
+
+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock);
+
+void __ipipe_spin_unlock_irqcomplete(unsigned long x);
+
+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
+void __ipipe_spin_unlock_debug(unsigned long flags);
+#else
+#define __ipipe_spin_unlock_debug(flags)  do { } while (0)
+#endif
+
+#define ipipe_rwlock_t			__ipipe_rwlock_t
+#define IPIPE_DEFINE_RWLOCK(x)		ipipe_rwlock_t x = IPIPE_RW_LOCK_UNLOCKED
+#define IPIPE_DECLARE_RWLOCK(x)		extern ipipe_rwlock_t x
+
+#define IPIPE_RW_LOCK_UNLOCKED	\
+	(__ipipe_rwlock_t) { .arch_lock = __ARCH_RW_LOCK_UNLOCKED }
+
+#else /* !CONFIG_IPIPE */
+
+#define ipipe_spinlock_t		spinlock_t
+#define IPIPE_DEFINE_SPINLOCK(x)	DEFINE_SPINLOCK(x)
+#define IPIPE_DECLARE_SPINLOCK(x)	extern spinlock_t x
+#define IPIPE_SPIN_LOCK_UNLOCKED	__SPIN_LOCK_UNLOCKED(unknown)
+#define IPIPE_DEFINE_RAW_SPINLOCK(x)	DEFINE_RAW_SPINLOCK(x)
+#define IPIPE_DECLARE_RAW_SPINLOCK(x)	extern raw_spinlock_t x
+
+#define spin_lock_irqsave_cond(lock, flags)		\
+	do {						\
+		(void)(flags);				\
+		spin_lock(lock);			\
+	} while(0)
+
+#define spin_unlock_irqrestore_cond(lock, flags)	\
+	spin_unlock(lock)
+
+#define raw_spin_lock_irqsave_cond(lock, flags) \
+	do {					\
+		(void)(flags);			\
+		raw_spin_lock(lock);		\
+	} while(0)
+
+#define raw_spin_unlock_irqrestore_cond(lock, flags) \
+	raw_spin_unlock(lock)
+
+#define __ipipe_spin_lock_irq(lock)		do { } while (0)
+#define __ipipe_spin_unlock_irq(lock)		do { } while (0)
+#define __ipipe_spin_lock_irqsave(lock)		0
+#define __ipipe_spin_trylock_irq(lock)		1
+#define __ipipe_spin_trylock_irqsave(lock, x)	({ (void)(x); 1; })
+#define __ipipe_spin_unlock_irqrestore(lock, x)	do { (void)(x); } while (0)
+#define __ipipe_spin_unlock_irqbegin(lock)	spin_unlock(lock)
+#define __ipipe_spin_unlock_irqcomplete(x)	do { (void)(x); } while (0)
+#define __ipipe_spin_unlock_debug(flags)	do { } while (0)
+
+#define ipipe_rwlock_t			rwlock_t
+#define IPIPE_DEFINE_RWLOCK(x)		DEFINE_RWLOCK(x)
+#define IPIPE_DECLARE_RWLOCK(x)		extern rwlock_t x
+#define IPIPE_RW_LOCK_UNLOCKED		RW_LOCK_UNLOCKED
+
+#endif /* !CONFIG_IPIPE */
+
+#endif /* !__LINUX_IPIPE_LOCK_H */
diff --git a/include/linux/ipipe_tickdev.h b/include/linux/ipipe_tickdev.h
new file mode 100644
index 000000000000..dfe645b0f816
--- /dev/null
+++ b/include/linux/ipipe_tickdev.h
@@ -0,0 +1,159 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_tickdev.h
+ *
+ * Copyright (C) 2007 Philippe Gerum.
+ * Copyright (C) 2012 Gilles Chanteperdrix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_TICKDEV_H
+#define __LINUX_IPIPE_TICKDEV_H
+
+#include <linux/list.h>
+#include <linux/cpumask.h>
+#include <linux/clockchips.h>
+#include <linux/ipipe_domain.h>
+#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
+
+#ifdef CONFIG_IPIPE
+
+struct clock_event_device;
+
+struct ipipe_hostrt_data {
+	short live;
+	seqcount_t seqcount;
+	time_t wall_time_sec;
+	u32 wall_time_nsec;
+	struct timespec wall_to_monotonic;
+	cycle_t cycle_last;
+	cycle_t mask;
+	u32 mult;
+	u32 shift;
+};
+
+enum clock_event_mode {
+	CLOCK_EVT_MODE_PERIODIC,
+	CLOCK_EVT_MODE_ONESHOT,
+	CLOCK_EVT_MODE_UNUSED,
+	CLOCK_EVT_MODE_SHUTDOWN,
+};
+
+struct ipipe_timer {
+	int irq;
+	void (*request)(struct ipipe_timer *timer, int steal);
+	int (*set)(unsigned long ticks, void *timer);
+	void (*ack)(void);
+	void (*release)(struct ipipe_timer *timer);
+
+	/* Only if registering a timer directly */
+	const char *name;
+	unsigned rating;
+	unsigned long freq;
+	unsigned min_delay_ticks;
+	const struct cpumask *cpumask;
+
+	/* For internal use */
+	void *timer_set;	/* pointer passed to ->set() callback */
+	struct clock_event_device *host_timer;
+	struct list_head link;
+	
+	/* Conversions between clock frequency and timer frequency */
+	unsigned c2t_integ;
+	unsigned c2t_frac;
+
+	/* For clockevent interception */
+	u32 real_mult;
+	u32 real_shift;
+	void (*mode_handler)(enum clock_event_mode mode,
+			     struct clock_event_device *);
+	int orig_mode;
+	int (*orig_set_state_periodic)(struct clock_event_device *);
+	int (*orig_set_state_oneshot)(struct clock_event_device *);
+	int (*orig_set_state_oneshot_stopped)(struct clock_event_device *);
+	int (*orig_set_state_shutdown)(struct clock_event_device *);
+	int (*orig_set_next_event)(unsigned long evt,
+				   struct clock_event_device *cdev);
+	unsigned int (*refresh_freq)(void);
+};
+
+#define __ipipe_hrtimer_irq __ipipe_raw_cpu_read(ipipe_percpu.hrtimer_irq)
+
+extern unsigned long __ipipe_hrtimer_freq;
+
+/*
+ * Called by clockevents_register_device, to register a piggybacked
+ * ipipe timer, if there is one
+ */
+void ipipe_host_timer_register(struct clock_event_device *clkevt);
+
+/*
+ * Register a standalone ipipe timer
+ */
+void ipipe_timer_register(struct ipipe_timer *timer);
+
+/*
+ * Chooses the best timer for each cpu. Take over its handling.
+ */
+int ipipe_select_timers(const struct cpumask *mask);
+
+/*
+ * Release the per-cpu timers
+ */
+void ipipe_timers_release(void);
+
+/*
+ * Start handling the per-cpu timer irq, and intercepting the linux clockevent
+ * device callbacks.
+ */
+int ipipe_timer_start(void (*tick_handler)(void),
+		      void (*emumode)(enum clock_event_mode mode,
+				      struct clock_event_device *cdev),
+		      int (*emutick)(unsigned long evt,
+				     struct clock_event_device *cdev),
+		      unsigned cpu);
+
+/*
+ * Stop handling a per-cpu timer
+ */
+void ipipe_timer_stop(unsigned cpu);
+
+/*
+ * Program the timer
+ */
+void ipipe_timer_set(unsigned long delay);
+
+const char *ipipe_timer_name(void);
+
+unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns);
+
+void __ipipe_timer_refresh_freq(unsigned int hrclock_freq);
+
+#else /* !CONFIG_IPIPE */
+
+#define ipipe_host_timer_register(clkevt) do { } while (0)
+
+#endif /* !CONFIG_IPIPE */
+
+#ifdef CONFIG_IPIPE_HAVE_HOSTRT
+void ipipe_update_hostrt(struct timekeeper *tk);
+#else
+static inline void
+ipipe_update_hostrt(struct timekeeper *tk) {}
+#endif
+
+#endif /* __LINUX_IPIPE_TICKDEV_H */
diff --git a/include/linux/ipipe_trace.h b/include/linux/ipipe_trace.h
new file mode 100644
index 000000000000..379c5e3f8b58
--- /dev/null
+++ b/include/linux/ipipe_trace.h
@@ -0,0 +1,83 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_trace.h
+ *
+ * Copyright (C) 2005 Luotao Fu.
+ *               2005-2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef _LINUX_IPIPE_TRACE_H
+#define _LINUX_IPIPE_TRACE_H
+
+#ifdef CONFIG_IPIPE_TRACE
+
+#include <linux/types.h>
+
+#ifndef BROKEN_BUILTIN_RETURN_ADDRESS
+#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0))
+#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1))
+#endif /* !BUILTIN_RETURN_ADDRESS */
+
+struct pt_regs;
+
+void ipipe_trace_begin(unsigned long v);
+void ipipe_trace_end(unsigned long v);
+void ipipe_trace_freeze(unsigned long v);
+void ipipe_trace_special(unsigned char special_id, unsigned long v);
+void ipipe_trace_pid(pid_t pid, short prio);
+void ipipe_trace_event(unsigned char id, unsigned long delay_tsc);
+int ipipe_trace_max_reset(void);
+int ipipe_trace_frozen_reset(void);
+void ipipe_trace_irqbegin(int irq, struct pt_regs *regs);
+void ipipe_trace_irqend(int irq, struct pt_regs *regs);
+
+#else /* !CONFIG_IPIPE_TRACE */
+
+#define ipipe_trace_begin(v)			do { (void)(v); } while(0)
+#define ipipe_trace_end(v)			do { (void)(v); } while(0)
+#define ipipe_trace_freeze(v)			do { (void)(v); } while(0)
+#define ipipe_trace_special(id, v)		do { (void)(id); (void)(v); } while(0)
+#define ipipe_trace_pid(pid, prio)		do { (void)(pid); (void)(prio); } while(0)
+#define ipipe_trace_event(id, delay_tsc)	do { (void)(id); (void)(delay_tsc); } while(0)
+#define ipipe_trace_max_reset()			({ 0; })
+#define ipipe_trace_frozen_reset()		({ 0; })
+#define ipipe_trace_irqbegin(irq, regs)		do { } while(0)
+#define ipipe_trace_irqend(irq, regs)		do { } while(0)
+
+#endif /* !CONFIG_IPIPE_TRACE */
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+void ipipe_trace_panic_freeze(void);
+void ipipe_trace_panic_dump(void);
+#else
+static inline void ipipe_trace_panic_freeze(void) { }
+static inline void ipipe_trace_panic_dump(void) { }
+#endif
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+#define ipipe_trace_irq_entry(irq)	ipipe_trace_begin(irq)
+#define ipipe_trace_irq_exit(irq)	ipipe_trace_end(irq)
+#define ipipe_trace_irqsoff()		ipipe_trace_begin(0x80000000UL)
+#define ipipe_trace_irqson()		ipipe_trace_end(0x80000000UL)
+#else
+#define ipipe_trace_irq_entry(irq)	do { (void)(irq);} while(0)
+#define ipipe_trace_irq_exit(irq)	do { (void)(irq);} while(0)
+#define ipipe_trace_irqsoff()		do { } while(0)
+#define ipipe_trace_irqson()		do { } while(0)
+#endif
+
+#endif	/* !__LINUX_IPIPE_TRACE_H */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index f7cade00c525..88299c5c2f74 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -363,6 +363,11 @@ struct irq_chip {
 
 	void		(*irq_bus_lock)(struct irq_data *data);
 	void		(*irq_bus_sync_unlock)(struct irq_data *data);
+#ifdef CONFIG_IPIPE
+	void		(*irq_move)(struct irq_data *data);
+	void		(*irq_hold)(struct irq_data *data);
+	void		(*irq_release)(struct irq_data *data);
+#endif /* CONFIG_IPIPE */
 
 	void		(*irq_cpu_online)(struct irq_data *data);
 	void		(*irq_cpu_offline)(struct irq_data *data);
@@ -483,6 +488,11 @@ extern int irq_chip_retrigger_hierarchy(struct irq_data *data);
 extern void irq_chip_mask_parent(struct irq_data *data);
 extern void irq_chip_unmask_parent(struct irq_data *data);
 extern void irq_chip_eoi_parent(struct irq_data *data);
+#ifdef CONFIG_IPIPE
+extern void irq_chip_hold_parent(struct irq_data *data);
+extern void irq_chip_release_parent(struct irq_data *data);
+#endif
+
 extern int irq_chip_set_affinity_parent(struct irq_data *data,
 					const struct cpumask *dest,
 					bool force);
@@ -603,7 +613,14 @@ extern int irq_set_irq_type(unsigned int irq, unsigned int type);
 extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
 extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
 				struct msi_desc *entry);
-extern struct irq_data *irq_get_irq_data(unsigned int irq);
+
+static inline __attribute__((const)) struct irq_data *
+irq_get_irq_data(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return desc ? &desc->irq_data : NULL;
+}
 
 static inline struct irq_chip *irq_get_chip(unsigned int irq)
 {
@@ -803,7 +820,11 @@ struct irq_chip_type {
  * different flow mechanisms (level/edge) for it.
  */
 struct irq_chip_generic {
+#ifdef CONFIG_IPIPE
+	ipipe_spinlock_t	lock;
+#else
 	raw_spinlock_t		lock;
+#endif
 	void __iomem		*reg_base;
 	u32			(*reg_readl)(void __iomem *addr);
 	void			(*reg_writel)(u32 val, void __iomem *addr);
@@ -902,18 +923,28 @@ static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
 #define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX)
 
 #ifdef CONFIG_SMP
-static inline void irq_gc_lock(struct irq_chip_generic *gc)
+static inline unsigned long irq_gc_lock(struct irq_chip_generic *gc)
 {
-	raw_spin_lock(&gc->lock);
+	unsigned long flags = 0;
+	raw_spin_lock_irqsave_cond(&gc->lock, flags);
+	return flags;
 }
 
-static inline void irq_gc_unlock(struct irq_chip_generic *gc)
+static inline void 
+irq_gc_unlock(struct irq_chip_generic *gc, unsigned long flags)
 {
-	raw_spin_unlock(&gc->lock);
+	raw_spin_unlock_irqrestore_cond(&gc->lock, flags);
 }
 #else
-static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
-static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
+static inline unsigned long irq_gc_lock(struct irq_chip_generic *gc) 
+{ 
+	return hard_cond_local_irq_save();
+}
+static inline void 
+irq_gc_unlock(struct irq_chip_generic *gc, unsigned long flags) 
+{ 
+	hard_cond_local_irq_restore(flags);
+}
 #endif
 
 /*
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index bae69e5d693c..380cbeaaf33e 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -53,7 +53,11 @@
 #define GICD_INT_EN_CLR_X32		0xffffffff
 #define GICD_INT_EN_SET_SGI		0x0000ffff
 #define GICD_INT_EN_CLR_PPI		0xffff0000
+#ifndef CONFIG_IPIPE
 #define GICD_INT_DEF_PRI		0xa0
+#else
+#define GICD_INT_DEF_PRI		0x10
+#endif
 #define GICD_INT_DEF_PRI_X4		((GICD_INT_DEF_PRI << 24) |\
 					(GICD_INT_DEF_PRI << 16) |\
 					(GICD_INT_DEF_PRI << 8) |\
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index a587a33363c7..74eb4a5e0351 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -47,6 +47,10 @@ struct irq_desc {
 	struct irq_common_data	irq_common_data;
 	struct irq_data		irq_data;
 	unsigned int __percpu	*kstat_irqs;
+#ifdef CONFIG_IPIPE
+	void			(*ipipe_ack)(struct irq_desc *desc);
+	void			(*ipipe_end)(struct irq_desc *desc);
+#endif /* CONFIG_IPIPE */
 	irq_flow_handler_t	handle_irq;
 #ifdef CONFIG_IRQ_PREFLOW_FASTEOI
 	irq_preflow_handler_t	preflow_handler;
@@ -165,6 +169,10 @@ static inline int irq_desc_has_action(struct irq_desc *desc)
 	return desc->action != NULL;
 }
 
+irq_flow_handler_t
+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle,
+		    int is_chained);
+
 static inline int irq_has_action(unsigned int irq)
 {
 	return irq_desc_has_action(irq_to_desc(irq));
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 9669bf9d4f48..de003ed52bde 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -5,7 +5,11 @@
 
 
 extern int nr_irqs;
+#if !defined(CONFIG_IPIPE) || defined(CONFIG_SPARSE_IRQ)
 extern struct irq_desc *irq_to_desc(unsigned int irq);
+#else
+#define irq_to_desc(irq)	({ ipipe_virtual_irq_p(irq) ? NULL : &irq_desc[irq]; })
+#endif
 unsigned int irq_get_next_irq(unsigned int offset);
 
 # define for_each_irq_desc(irq, desc)					\
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 50220cab738c..b685af847950 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -9,6 +9,7 @@
 #include <linux/compiler.h>
 #include <linux/bitops.h>
 #include <linux/log2.h>
+#include <linux/ipipe_base.h>
 #include <linux/typecheck.h>
 #include <linux/printk.h>
 #include <linux/dynamic_debug.h>
@@ -168,9 +169,12 @@ struct user;
 
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 extern int _cond_resched(void);
-# define might_resched() _cond_resched()
+# define might_resched() do { \
+		ipipe_root_only(); \
+		_cond_resched(); \
+	} while (0)
 #else
-# define might_resched() do { } while (0)
+# define might_resched() ipipe_root_only()
 #endif
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d7ce4e3280db..aea0005fff85 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -227,6 +227,9 @@ struct kvm_vcpu {
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	struct preempt_notifier preempt_notifier;
 #endif
+#ifdef CONFIG_IPIPE
+	struct ipipe_vm_notifier ipipe_notifier;
+#endif
 	int cpu;
 	int vcpu_id;
 	int srcu_idx;
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 7eeceac52dea..3a85b00a537e 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -8,6 +8,7 @@
 
 #include <linux/linkage.h>
 #include <linux/list.h>
+#include <linux/ipipe_base.h>
 
 /*
  * We put the hardirq and softirq counter into the preemption
@@ -233,7 +234,28 @@ do { \
 
 #endif /* CONFIG_PREEMPT_COUNT */
 
-#ifdef MODULE
+#ifdef CONFIG_IPIPE
+#define hard_preempt_disable()				\
+	({						\
+		unsigned long __flags__;		\
+		__flags__ = hard_local_irq_save();	\
+		if (__ipipe_root_p)			\
+			preempt_disable();		\
+		__flags__;				\
+	})
+
+#define hard_preempt_enable(__flags__)					\
+	do {								\
+		if (__ipipe_root_p) {					\
+			preempt_enable_no_resched();			\
+			hard_local_irq_restore(__flags__);		\
+			if (!hard_irqs_disabled_flags(__flags__))	\
+				preempt_check_resched();		\
+		} else							\
+			hard_local_irq_restore(__flags__);		\
+	} while (0)
+
+#elif defined(MODULE)
 /*
  * Modules have no business playing preemption tricks.
  */
@@ -241,7 +263,7 @@ do { \
 #undef preempt_enable_no_resched
 #undef preempt_enable_no_resched_notrace
 #undef preempt_check_resched
-#endif
+#endif	/* !IPIPE && MODULE */
 
 #define preempt_set_need_resched() \
 do { \
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9729565c25ff..a2c32bf32509 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -124,6 +124,17 @@ void early_printk(const char *s, ...) { }
 
 typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
 
+#ifdef CONFIG_RAW_PRINTK
+void raw_vprintk(const char *fmt, va_list ap);
+asmlinkage __printf(1, 2)
+void raw_printk(const char *fmt, ...);
+#else
+static inline __cold
+void raw_vprintk(const char *s, va_list ap) { }
+static inline __printf(1, 2) __cold
+void raw_printk(const char *s, ...) { }
+#endif
+
 #ifdef CONFIG_PRINTK
 asmlinkage __printf(5, 0)
 int vprintk_emit(int facility, int level,
diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index bc2994ed66e1..5e2da8d9a60b 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -61,8 +61,8 @@ do {								\
 #define read_trylock(lock)	__cond_lock(lock, _raw_read_trylock(lock))
 #define write_trylock(lock)	__cond_lock(lock, _raw_write_trylock(lock))
 
-#define write_lock(lock)	_raw_write_lock(lock)
-#define read_lock(lock)		_raw_read_lock(lock)
+#define write_lock(lock)	PICK_RWOP(_write_lock, lock)
+#define read_lock(lock)		PICK_RWOP(_read_lock, lock)
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
@@ -96,8 +96,8 @@ do {								\
 #define read_lock_bh(lock)		_raw_read_lock_bh(lock)
 #define write_lock_irq(lock)		_raw_write_lock_irq(lock)
 #define write_lock_bh(lock)		_raw_write_lock_bh(lock)
-#define read_unlock(lock)		_raw_read_unlock(lock)
-#define write_unlock(lock)		_raw_write_unlock(lock)
+#define read_unlock(lock)		PICK_RWOP(_read_unlock, lock)
+#define write_unlock(lock)		PICK_RWOP(_write_unlock, lock)
 #define read_unlock_irq(lock)		_raw_read_unlock_irq(lock)
 #define write_unlock_irq(lock)		_raw_write_unlock_irq(lock)
 
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index 5b9b84b20407..6c8bb4dd73e6 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -141,7 +141,9 @@ static inline int __raw_write_trylock(rwlock_t *lock)
  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
+#if !defined(CONFIG_GENERIC_LOCKBREAK) ||	\
+	defined(CONFIG_DEBUG_LOCK_ALLOC) ||	\
+	defined(CONFIG_IPIPE)
 
 static inline void __raw_read_lock(rwlock_t *lock)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b30540d6d125..cfd8e2ba39cc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -26,6 +26,7 @@ struct sched_param {
 #include <linux/nodemask.h>
 #include <linux/mm_types.h>
 #include <linux/preempt.h>
+#include <linux/ipipe.h>
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
@@ -219,9 +220,17 @@ extern void proc_sched_set_task(struct task_struct *p);
 #define TASK_WAKING		256
 #define TASK_PARKED		512
 #define TASK_NOLOAD		1024
+#ifdef CONFIG_IPIPE
+#define TASK_HARDENING		2048
+#define TASK_NOWAKEUP		4096
+#define TASK_STATE_MAX		8192
+#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNHU"
+#else  /* !CONFIG_IPIPE */
+#define TASK_HARDENING		0
+#define TASK_NOWAKEUP		0
 #define TASK_STATE_MAX		2048
-
 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
+#endif /* CONFIG_IPIPE */
 
 extern char ___assert_task_state[1 - 2*!!(
 		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
@@ -374,6 +383,15 @@ extern void trap_init(void);
 extern void update_process_times(int user);
 extern void scheduler_tick(void);
 
+#ifdef CONFIG_IPIPE
+void update_root_process_times(struct pt_regs *regs);
+#else  /* !CONFIG_IPIPE */
+static inline void update_root_process_times(struct pt_regs *regs)
+{
+	update_process_times(user_mode(regs));
+}
+#endif /* CONFIG_IPIPE */
+
 extern void sched_show_task(struct task_struct *p);
 
 #ifdef CONFIG_LOCKUP_DETECTOR
@@ -504,6 +522,9 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
 #define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
 #define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
+#ifdef CONFIG_IPIPE
+#define MMF_VM_PINNED		31	/* ondemand load up and COW disabled */
+#endif
 
 #define MMF_HAS_UPROBES		19	/* has uprobes */
 #define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
@@ -1755,6 +1776,9 @@ struct task_struct {
 #endif
 
 	struct rcu_head rcu;
+#ifdef CONFIG_IPIPE_LEGACY
+	void *ptd[IPIPE_ROOT_NPTDKEYS];
+#endif
 
 	/*
 	 * cache last used pipe for splice
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 47dd0cebd204..938b93e7a24d 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -89,10 +89,12 @@
 # include <linux/spinlock_up.h>
 #endif
 
+#include <linux/ipipe_lock.h>
+
 #ifdef CONFIG_DEBUG_SPINLOCK
   extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
 				   struct lock_class_key *key);
-# define raw_spin_lock_init(lock)				\
+# define __real_raw_spin_lock_init(lock)			\
 do {								\
 	static struct lock_class_key __key;			\
 								\
@@ -100,11 +102,14 @@ do {								\
 } while (0)
 
 #else
-# define raw_spin_lock_init(lock)				\
+# define __real_raw_spin_lock_init(lock)			\
 	do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0)
 #endif
+#define raw_spin_lock_init(lock)	PICK_SPINOP(_lock_init, lock)
 
-#define raw_spin_is_locked(lock)	arch_spin_is_locked(&(lock)->raw_lock)
+#define __real_raw_spin_is_locked(lock)				\
+	arch_spin_is_locked(&(lock)->raw_lock)
+#define raw_spin_is_locked(lock)	PICK_SPINOP_RET(_is_locked, lock, int)
 
 #ifdef CONFIG_GENERIC_LOCKBREAK
 #define raw_spin_is_contended(lock) ((lock)->break_lock)
@@ -173,9 +178,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
  * various methods are defined as nops in the case they are not
  * required.
  */
-#define raw_spin_trylock(lock)	__cond_lock(lock, _raw_spin_trylock(lock))
+#define __real_raw_spin_trylock(lock)	__cond_lock(lock, _raw_spin_trylock(lock))
+#define raw_spin_trylock(lock)		PICK_SPINOP_RET(_trylock, lock, int)
 
-#define raw_spin_lock(lock)	_raw_spin_lock(lock)
+#define __real_raw_spin_lock(lock)	_raw_spin_lock(lock)
+#define raw_spin_lock(lock)		PICK_SPINOP(_lock, lock)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define raw_spin_lock_nested(lock, subclass) \
@@ -202,7 +209,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
-#define raw_spin_lock_irqsave(lock, flags)			\
+#define __real_raw_spin_lock_irqsave(lock, flags)	\
 	do {						\
 		typecheck(unsigned long, flags);	\
 		flags = _raw_spin_lock_irqsave(lock);	\
@@ -224,7 +231,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 
 #else
 
-#define raw_spin_lock_irqsave(lock, flags)		\
+#define __real_raw_spin_lock_irqsave(lock, flags)	\
 	do {						\
 		typecheck(unsigned long, flags);	\
 		_raw_spin_lock_irqsave(lock, flags);	\
@@ -235,34 +242,46 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 
 #endif
 
-#define raw_spin_lock_irq(lock)		_raw_spin_lock_irq(lock)
+#define raw_spin_lock_irqsave(lock, flags)  \
+	PICK_SPINLOCK_IRQSAVE(lock, flags)
+
+#define __real_raw_spin_lock_irq(lock)	_raw_spin_lock_irq(lock)
+#define raw_spin_lock_irq(lock)		PICK_SPINOP(_lock_irq, lock)
 #define raw_spin_lock_bh(lock)		_raw_spin_lock_bh(lock)
-#define raw_spin_unlock(lock)		_raw_spin_unlock(lock)
-#define raw_spin_unlock_irq(lock)	_raw_spin_unlock_irq(lock)
+#define __real_raw_spin_unlock(lock)	_raw_spin_unlock(lock)
+#define raw_spin_unlock(lock)		PICK_SPINOP(_unlock, lock)
+#define __real_raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock)
+#define raw_spin_unlock_irq(lock)	PICK_SPINOP(_unlock_irq, lock)
 
-#define raw_spin_unlock_irqrestore(lock, flags)		\
+#define __real_raw_spin_unlock_irqrestore(lock, flags)		\
 	do {							\
 		typecheck(unsigned long, flags);		\
 		_raw_spin_unlock_irqrestore(lock, flags);	\
 	} while (0)
+#define raw_spin_unlock_irqrestore(lock, flags)	\
+	PICK_SPINUNLOCK_IRQRESTORE(lock, flags)
+
 #define raw_spin_unlock_bh(lock)	_raw_spin_unlock_bh(lock)
 
 #define raw_spin_trylock_bh(lock) \
 	__cond_lock(lock, _raw_spin_trylock_bh(lock))
 
-#define raw_spin_trylock_irq(lock) \
+#define __real_raw_spin_trylock_irq(lock) \
 ({ \
 	local_irq_disable(); \
-	raw_spin_trylock(lock) ? \
+	__real_raw_spin_trylock(lock) ? \
 	1 : ({ local_irq_enable(); 0;  }); \
 })
+#define raw_spin_trylock_irq(lock)	PICK_SPINTRYLOCK_IRQ(lock)
 
-#define raw_spin_trylock_irqsave(lock, flags) \
+#define __real_raw_spin_trylock_irqsave(lock, flags) \
 ({ \
 	local_irq_save(flags); \
 	raw_spin_trylock(lock) ? \
 	1 : ({ local_irq_restore(flags); 0; }); \
 })
+#define raw_spin_trylock_irqsave(lock, flags)	\
+	PICK_SPINTRYLOCK_IRQSAVE(lock, flags)
 
 /**
  * raw_spin_can_lock - would raw_spin_trylock() succeed?
@@ -293,24 +312,17 @@ static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
 
 #define spin_lock_init(_lock)				\
 do {							\
-	spinlock_check(_lock);				\
-	raw_spin_lock_init(&(_lock)->rlock);		\
+	raw_spin_lock_init(_lock);			\
 } while (0)
 
-static __always_inline void spin_lock(spinlock_t *lock)
-{
-	raw_spin_lock(&lock->rlock);
-}
+#define spin_lock(lock)		raw_spin_lock(lock)
 
 static __always_inline void spin_lock_bh(spinlock_t *lock)
 {
 	raw_spin_lock_bh(&lock->rlock);
 }
 
-static __always_inline int spin_trylock(spinlock_t *lock)
-{
-	return raw_spin_trylock(&lock->rlock);
-}
+#define spin_trylock(lock)	raw_spin_trylock(lock)
 
 #define spin_lock_nested(lock, subclass)			\
 do {								\
@@ -327,14 +339,11 @@ do {									\
 	raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock);	\
 } while (0)
 
-static __always_inline void spin_lock_irq(spinlock_t *lock)
-{
-	raw_spin_lock_irq(&lock->rlock);
-}
+#define spin_lock_irq(lock)	raw_spin_lock_irq(lock)
 
 #define spin_lock_irqsave(lock, flags)				\
 do {								\
-	raw_spin_lock_irqsave(spinlock_check(lock), flags);	\
+	raw_spin_lock_irqsave(lock, flags);			\
 } while (0)
 
 #define spin_lock_irqsave_nested(lock, flags, subclass)			\
@@ -342,39 +351,28 @@ do {									\
 	raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \
 } while (0)
 
-static __always_inline void spin_unlock(spinlock_t *lock)
-{
-	raw_spin_unlock(&lock->rlock);
-}
+#define spin_unlock(lock)	raw_spin_unlock(lock)
 
 static __always_inline void spin_unlock_bh(spinlock_t *lock)
 {
 	raw_spin_unlock_bh(&lock->rlock);
 }
 
-static __always_inline void spin_unlock_irq(spinlock_t *lock)
-{
-	raw_spin_unlock_irq(&lock->rlock);
-}
+#define spin_unlock_irq(lock)	raw_spin_unlock_irq(lock)
 
-static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
-{
-	raw_spin_unlock_irqrestore(&lock->rlock, flags);
-}
+#define spin_unlock_irqrestore(lock, flags)	\
+	raw_spin_unlock_irqrestore(lock, flags)
 
 static __always_inline int spin_trylock_bh(spinlock_t *lock)
 {
 	return raw_spin_trylock_bh(&lock->rlock);
 }
 
-static __always_inline int spin_trylock_irq(spinlock_t *lock)
-{
-	return raw_spin_trylock_irq(&lock->rlock);
-}
+#define spin_trylock_irq(lock)	raw_spin_trylock_irq(lock)
 
 #define spin_trylock_irqsave(lock, flags)			\
 ({								\
-	raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
+	raw_spin_trylock_irqsave(lock, flags);			\
 })
 
 static __always_inline void spin_unlock_wait(spinlock_t *lock)
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 5344268e6e62..5026fea7120d 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -101,7 +101,9 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
+#if !defined(CONFIG_GENERIC_LOCKBREAK) ||	\
+	defined(CONFIG_DEBUG_LOCK_ALLOC) ||	\
+	defined(CONFIG_IPIPE)
 
 static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
 {
@@ -115,7 +117,7 @@ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
 	 * do_raw_spin_lock_flags() code, because lockdep assumes
 	 * that interrupts are not re-enabled during lock-acquire:
 	 */
-#ifdef CONFIG_LOCKDEP
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE)
 	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 #else
 	do_raw_spin_lock_flags(lock, &flags);
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 8b3ac0d718eb..cf1414027c4b 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -55,16 +55,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	lock->slock = 1;
 }
 
-/*
- * Read-write spinlocks. No debug version.
- */
-#define arch_read_lock(lock)		do { barrier(); (void)(lock); } while (0)
-#define arch_write_lock(lock)		do { barrier(); (void)(lock); } while (0)
-#define arch_read_trylock(lock)	({ barrier(); (void)(lock); 1; })
-#define arch_write_trylock(lock)	({ barrier(); (void)(lock); 1; })
-#define arch_read_unlock(lock)		do { barrier(); (void)(lock); } while (0)
-#define arch_write_unlock(lock)	do { barrier(); (void)(lock); } while (0)
-
 #else /* DEBUG_SPINLOCK */
 #define arch_spin_is_locked(lock)	((void)(lock), 0)
 /* for sched/core.c and kernel_lock.c: */
@@ -74,6 +64,13 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 # define arch_spin_trylock(lock)	({ barrier(); (void)(lock); 1; })
 #endif /* DEBUG_SPINLOCK */
 
+#define arch_read_lock(lock)		do { barrier(); (void)(lock); } while (0)
+#define arch_write_lock(lock)		do { barrier(); (void)(lock); } while (0)
+#define arch_read_trylock(lock)	({ barrier(); (void)(lock); 1; })
+#define arch_write_trylock(lock)	({ barrier(); (void)(lock); 1; })
+#define arch_read_unlock(lock)		do { barrier(); (void)(lock); } while (0)
+#define arch_write_unlock(lock)	do { barrier(); (void)(lock); } while (0)
+
 #define arch_spin_is_contended(lock)	(((void)(lock), 0))
 
 #define arch_read_can_lock(lock)	(((void)(lock), 1))
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 115216ec7cfe..04b31b32d925 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -132,7 +132,7 @@ extern void update_vsyscall_tz(void);
 #elif defined(CONFIG_GENERIC_TIME_VSYSCALL_OLD)
 
 extern void update_vsyscall_old(struct timespec *ts, struct timespec *wtm,
-				struct clocksource *c, u32 mult,
+				struct clocksource *c, u32 mult, u32 shift,
 				cycle_t cycle_last);
 extern void update_vsyscall_tz(void);
 
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index a74dd84bbb6d..88e579d4b23d 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -19,6 +19,9 @@
 #define MAP_TYPE	0x0f		/* Mask for type of mapping */
 #define MAP_FIXED	0x10		/* Interpret addr exactly */
 #define MAP_ANONYMOUS	0x20		/* don't use a file */
+#ifndef MAP_BRK
+# define MAP_BRK	0
+#endif
 #ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
 # define MAP_UNINITIALIZED 0x4000000	/* For anonymous mmap, memory could be uninitialized */
 #else
diff --git a/include/uapi/asm-generic/resource.h b/include/uapi/asm-generic/resource.h
index c6d10af50123..83cd38b33f88 100644
--- a/include/uapi/asm-generic/resource.h
+++ b/include/uapi/asm-generic/resource.h
@@ -57,5 +57,13 @@
 # define RLIM_INFINITY		(~0UL)
 #endif
 
+/*
+ * Limit the stack by to some sane default: root can always
+ * increase this limit if needed..  8MB seems reasonable.
+ */
+#ifndef _STK_LIM
+# define _STK_LIM		(8*1024*1024)
+#endif
+
 
 #endif /* _UAPI_ASM_GENERIC_RESOURCE_H */
diff --git a/include/uapi/linux/resource.h b/include/uapi/linux/resource.h
index 36fb3b5fb181..3b5e2dc6e9ed 100644
--- a/include/uapi/linux/resource.h
+++ b/include/uapi/linux/resource.h
@@ -59,12 +59,6 @@ struct rlimit64 {
 #define	PRIO_USER	2
 
 /*
- * Limit the stack by to some sane default: root can always
- * increase this limit if needed..  8MB seems reasonable.
- */
-#define _STK_LIM	(8*1024*1024)
-
-/*
  * GPG2 wants 64kB of mlocked memory, to make sure pass phrases
  * and other sensitive information are never written to disk.
  */
diff --git a/init/Kconfig b/init/Kconfig
index 47b0bdcf33c2..ce79a62da758 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -69,6 +69,7 @@ config COMPILE_TEST
 
 config LOCALVERSION
 	string "Local version - append to kernel release"
+	default "-ipipe"
 	help
 	  Append an extra string to the end of your kernel version.
 	  This will show up when you type uname, for example.
@@ -1469,6 +1470,19 @@ config PRINTK
 	  very difficult to diagnose system problems, saying N here is
 	  strongly discouraged.
 
+config RAW_PRINTK
+       bool "Enable support for raw printk"
+       default n
+       select DEBUG_LL if ARM
+       help
+         This option enables a printk variant called raw_printk() for
+         writing all output unmodified to a raw console channel
+         immediately, without any header or preparation whatsoever,
+         usable from any context.
+
+	 Unlike early_printk() console devices, raw_printk() devices
+         can live past the boot sequence.
+
 config BUG
 	bool "BUG() support" if EXPERT
 	default y
diff --git a/init/main.c b/init/main.c
index 49926d95442f..206b13b8ccec 100644
--- a/init/main.c
+++ b/init/main.c
@@ -517,7 +517,7 @@ asmlinkage __visible void __init start_kernel(void)
 
 	cgroup_init_early();
 
-	local_irq_disable();
+	hard_local_irq_disable();
 	early_boot_irqs_disabled = true;
 
 /*
@@ -557,6 +557,7 @@ asmlinkage __visible void __init start_kernel(void)
 	pidhash_init();
 	vfs_caches_init_early();
 	sort_main_extable();
+	__ipipe_init_early();
 	trap_init();
 	mm_init();
 
@@ -592,6 +593,11 @@ asmlinkage __visible void __init start_kernel(void)
 	softirq_init();
 	timekeeping_init();
 	time_init();
+	/*
+	 * We need to wait for the interrupt and time subsystems to be
+	 * initialized before enabling the pipeline.
+	 */
+	__ipipe_init();
 	sched_clock_postinit();
 	perf_event_init();
 	profile_init();
@@ -882,6 +888,7 @@ static void __init do_basic_setup(void)
 	shmem_init();
 	driver_init();
 	init_irq_proc();
+  	__ipipe_init_proc();
 	do_ctors();
 	usermodehelper_enable();
 	do_initcalls();
diff --git a/kernel/Makefile b/kernel/Makefile
index 53abf008ecb3..30db940ed917 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -75,6 +75,7 @@ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_IPIPE) += ipipe/
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index d8560ee3bab7..ae41cfb264d5 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -113,7 +113,7 @@ void context_tracking_enter(enum ctx_state state)
 	 * helpers are enough to protect RCU uses inside the exception. So
 	 * just return immediately if we detect we are in an IRQ.
 	 */
-	if (in_interrupt())
+	if (!ipipe_root_p || in_interrupt())
 		return;
 
 	local_irq_save(flags);
@@ -169,7 +169,7 @@ void context_tracking_exit(enum ctx_state state)
 {
 	unsigned long flags;
 
-	if (in_interrupt())
+	if (!ipipe_root_p || in_interrupt())
 		return;
 
 	local_irq_save(flags);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 79517e5549f1..8cecd5aa5e12 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -118,8 +118,8 @@ static struct kgdb_bkpt		kgdb_break[KGDB_MAX_BREAKPOINTS] = {
  */
 atomic_t			kgdb_active = ATOMIC_INIT(-1);
 EXPORT_SYMBOL_GPL(kgdb_active);
-static DEFINE_RAW_SPINLOCK(dbg_master_lock);
-static DEFINE_RAW_SPINLOCK(dbg_slave_lock);
+static IPIPE_DEFINE_RAW_SPINLOCK(dbg_master_lock);
+static IPIPE_DEFINE_RAW_SPINLOCK(dbg_slave_lock);
 
 /*
  * We use NR_CPUs not PERCPU, in case kgdb is used to debug early
@@ -169,19 +169,21 @@ int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 {
 	int err;
 
-	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
-				BREAK_INSTR_SIZE);
+	err = ipipe_probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+				      BREAK_INSTR_SIZE);
 	if (err)
 		return err;
-	err = probe_kernel_write((char *)bpt->bpt_addr,
-				 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+	err = ipipe_probe_kernel_write((char *)bpt->bpt_addr,
+				       arch_kgdb_ops.gdb_bpt_instr,
+				       BREAK_INSTR_SIZE);
 	return err;
 }
 
 int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
 {
-	return probe_kernel_write((char *)bpt->bpt_addr,
-				  (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
+	return ipipe_probe_kernel_write((char *)bpt->bpt_addr,
+					(char *)bpt->saved_instr,
+					BREAK_INSTR_SIZE);
 }
 
 int __weak kgdb_validate_break_address(unsigned long addr)
@@ -460,7 +462,9 @@ static int kgdb_reenter_check(struct kgdb_state *ks)
 static void dbg_touch_watchdogs(void)
 {
 	touch_softlockup_watchdog_sync();
+#ifndef CONFIG_IPIPE
 	clocksource_touch_watchdog();
+#endif
 	rcu_cpu_stall_reset();
 }
 
@@ -491,7 +495,7 @@ acquirelock:
 	 * Interrupts will be restored by the 'trap return' code, except when
 	 * single stepping.
 	 */
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 
 	cpu = ks->cpu;
 	kgdb_info[cpu].debuggerinfo = regs;
@@ -540,7 +544,7 @@ return_normal:
 			smp_mb__before_atomic();
 			atomic_dec(&slaves_in_kgdb);
 			dbg_touch_watchdogs();
-			local_irq_restore(flags);
+			hard_local_irq_restore(flags);
 			return 0;
 		}
 		cpu_relax();
@@ -558,7 +562,7 @@ return_normal:
 		atomic_set(&kgdb_active, -1);
 		raw_spin_unlock(&dbg_master_lock);
 		dbg_touch_watchdogs();
-		local_irq_restore(flags);
+		hard_local_irq_restore(flags);
 
 		goto acquirelock;
 	}
@@ -675,7 +679,7 @@ kgdb_restore:
 	atomic_set(&kgdb_active, -1);
 	raw_spin_unlock(&dbg_master_lock);
 	dbg_touch_watchdogs();
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 
 	return kgdb_info[cpu].ret_state;
 }
@@ -794,9 +798,9 @@ static void kgdb_console_write(struct console *co, const char *s,
 	if (!kgdb_connected || atomic_read(&kgdb_active) != -1 || dbg_kdb_mode)
 		return;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	gdbstub_msg_write(s, count);
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 static struct console kgdbcons = {
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 19d9a578c753..4e990851c645 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -246,7 +246,7 @@ char *kgdb_mem2hex(char *mem, char *buf, int count)
 	 */
 	tmp = buf + count;
 
-	err = probe_kernel_read(tmp, mem, count);
+	err = ipipe_probe_kernel_read(tmp, mem, count);
 	if (err)
 		return NULL;
 	while (count > 0) {
@@ -282,7 +282,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
 		*tmp_raw |= hex_to_bin(*tmp_hex--) << 4;
 	}
 
-	return probe_kernel_write(mem, tmp_raw, count);
+	return ipipe_probe_kernel_write(mem, tmp_raw, count);
 }
 
 /*
@@ -334,7 +334,7 @@ static int kgdb_ebin2mem(char *buf, char *mem, int count)
 		size++;
 	}
 
-	return probe_kernel_write(mem, c, size);
+	return ipipe_probe_kernel_write(mem, c, size);
 }
 
 #if DBG_MAX_REG_NUM > 0
diff --git a/kernel/exit.c b/kernel/exit.c
index f20e6339761b..1af7ba8bafc4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -705,6 +705,7 @@ void do_exit(long code)
 	 */
 	smp_mb();
 	raw_spin_unlock_wait(&tsk->pi_lock);
+  	__ipipe_report_exit(tsk);
 
 	if (unlikely(in_atomic())) {
 		pr_info("note: %s[%d] exited with preempt_count %d\n",
diff --git a/kernel/fork.c b/kernel/fork.c
index dd2f79ac0771..fc2a945c1d49 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -369,6 +369,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #endif
 
 	setup_thread_stack(tsk, orig);
+	__ipipe_init_threadflags(ti);
+	__ipipe_init_threadinfo(&ti->ipipe_data);
 	clear_user_return_notifier(tsk);
 	clear_tsk_need_resched(tsk);
 	set_task_stack_end_magic(tsk);
@@ -711,6 +713,7 @@ void mmput(struct mm_struct *mm)
 		exit_aio(mm);
 		ksm_exit(mm);
 		khugepaged_exit(mm); /* must run before exit_mmap */
+ 		__ipipe_report_cleanup(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
@@ -1654,6 +1657,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	proc_fork_connector(p);
 	cgroup_post_fork(p, cgrp_ss_priv);
 	threadgroup_change_end(current);
+	__ipipe_init_taskinfo(p);
 	perf_event_fork(p);
 
 	trace_task_newtask(p, clone_flags);
diff --git a/kernel/ipipe/Kconfig b/kernel/ipipe/Kconfig
new file mode 100644
index 000000000000..218f51da9ea5
--- /dev/null
+++ b/kernel/ipipe/Kconfig
@@ -0,0 +1,65 @@
+config IPIPE
+	bool "Interrupt pipeline"
+	default y
+	---help---
+	  Activate this option if you want the interrupt pipeline to be
+	  compiled in.
+
+config IPIPE_LEGACY
+	bool "I-pipe legacy interface"
+	depends on IPIPE
+	---help---
+	  Activate this option if you want to control the interrupt
+	  pipeline via the legacy interface.
+
+config IPIPE_CORE
+	def_bool y if IPIPE
+
+config IPIPE_WANT_CLOCKSOURCE
+       bool
+
+config IPIPE_WANT_PTE_PINNING
+       bool
+
+config IPIPE_CORE_APIREV
+       int
+       depends on IPIPE
+       default 2
+	---help---
+	  The API revision level we implement.
+
+config IPIPE_WANT_APIREV_1
+       bool
+
+config IPIPE_WANT_APIREV_2
+       bool
+
+config IPIPE_TARGET_APIREV
+       int
+       depends on IPIPE
+       default 1 if IPIPE_WANT_APIREV_1
+       default 2 if IPIPE_WANT_APIREV_2
+       default 1 if IPIPE_LEGACY
+       default IPIPE_CORE_APIREV
+	---help---
+	  The API revision level the we want (must be <=
+	  IPIPE_CORE_APIREV).
+
+config IPIPE_HAVE_HOSTRT
+       bool
+
+config IPIPE_HAVE_PIC_MUTE
+       bool
+
+config HAVE_IPIPE_HOSTRT
+       depends on IPIPE_LEGACY
+       bool
+
+config IPIPE_DELAYED_ATOMICSW
+       def_bool y if IPIPE_LEGACY
+
+config IPIPE_HAVE_SAFE_THREAD_INFO
+	bool
+
+config IPIPE_HAVE_VM_NOTIFIER
+	bool
diff --git a/kernel/ipipe/Kconfig.debug b/kernel/ipipe/Kconfig.debug
new file mode 100644
index 000000000000..cee7fab0ee07
--- /dev/null
+++ b/kernel/ipipe/Kconfig.debug
@@ -0,0 +1,96 @@
+config IPIPE_DEBUG
+	bool "I-pipe debugging"
+	depends on IPIPE
+	select RAW_PRINTK
+
+config IPIPE_DEBUG_CONTEXT
+	bool "Check for illicit cross-domain calls"
+	depends on IPIPE_DEBUG
+	default y
+	---help---
+	  Enable this feature to arm checkpoints in the kernel that
+	  verify the correct invocation context. On entry of critical
+	  Linux services a warning is issued if the caller is not
+	  running over the root domain.
+
+config IPIPE_DEBUG_INTERNAL
+	bool "Enable internal debug checks"
+	depends on IPIPE_DEBUG
+	default y
+	---help---
+	  When this feature is enabled, I-pipe will perform internal
+	  consistency checks of its subsystems, e.g. on per-cpu variable
+	  access.
+
+config IPIPE_TRACE
+	bool "Latency tracing"
+	depends on IPIPE_DEBUG
+	select CONFIG_FTRACE
+	select CONFIG_FUNCTION_TRACER
+	select KALLSYMS
+	select PROC_FS
+	---help---
+	  Activate this option if you want to use per-function tracing of
+	  the kernel. The tracer will collect data via instrumentation
+	  features like the one below or with the help of explicite calls
+	  of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the
+	  in-kernel tracing API. The collected data and runtime control
+	  is available via /proc/ipipe/trace/*.
+
+if IPIPE_TRACE
+
+config IPIPE_TRACE_ENABLE
+	bool "Enable tracing on boot"
+	default y
+	---help---
+	  Disable this option if you want to arm the tracer after booting
+	  manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce
+	  boot time on slow embedded devices due to the tracer overhead.
+
+config IPIPE_TRACE_MCOUNT
+	bool "Instrument function entries"
+	default y
+	select FTRACE
+	select FUNCTION_TRACER
+	---help---
+	  When enabled, records every kernel function entry in the tracer
+	  log. While this slows down the system noticeably, it provides
+	  the highest level of information about the flow of events.
+	  However, it can be switch off in order to record only explicit
+	  I-pipe trace points.
+
+config IPIPE_TRACE_IRQSOFF
+	bool "Trace IRQs-off times"
+	default y
+	---help---
+	  Activate this option if I-pipe shall trace the longest path
+	  with hard-IRQs switched off.
+
+config IPIPE_TRACE_SHIFT
+	int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)"
+	range 10 18
+	default 14
+	---help---
+	  The number of trace points to hold tracing data for each
+	  trace path, as a power of 2.
+
+config IPIPE_TRACE_VMALLOC
+	bool "Use vmalloc'ed trace buffer"
+	default y if EMBEDDED
+	---help---
+	  Instead of reserving static kernel data, the required buffer
+	  is allocated via vmalloc during boot-up when this option is
+	  enabled. This can help to start systems that are low on memory,
+	  but it slightly degrades overall performance. Try this option
+	  when a traced kernel hangs unexpectedly at boot time.
+
+config IPIPE_TRACE_PANIC
+	bool "Enable panic back traces"
+	default y
+	---help---
+	  Provides services to freeze and dump a back trace on panic
+	  situations. This is used on IPIPE_DEBUG_CONTEXT exceptions
+	  as well as ordinary kernel oopses. You can control the number
+	  of printed back trace points via /proc/ipipe/trace.
+
+endif
diff --git a/kernel/ipipe/Makefile b/kernel/ipipe/Makefile
new file mode 100644
index 000000000000..c3ffe63e1e6f
--- /dev/null
+++ b/kernel/ipipe/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_IPIPE)	+= core.o timer.o
+obj-$(CONFIG_IPIPE_TRACE) += tracer.o
+obj-$(CONFIG_IPIPE_LEGACY) += compat.o
diff --git a/kernel/ipipe/compat.c b/kernel/ipipe/compat.c
new file mode 100644
index 000000000000..797a84974d42
--- /dev/null
+++ b/kernel/ipipe/compat.c
@@ -0,0 +1,273 @@
+/* -*- linux-c -*-
+ * linux/kernel/ipipe/compat.c
+ *
+ * Copyright (C) 2012 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * I-pipe legacy interface.
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/ipipe.h>
+
+static int ptd_key_count;
+
+static unsigned long ptd_key_map;
+
+IPIPE_DECLARE_SPINLOCK(__ipipe_lock);
+
+void ipipe_init_attr(struct ipipe_domain_attr *attr)
+{
+	attr->name = "anon";
+	attr->domid = 1;
+	attr->entry = NULL;
+	attr->priority = IPIPE_ROOT_PRIO;
+	attr->pdd = NULL;
+}
+EXPORT_SYMBOL_GPL(ipipe_init_attr);
+
+int ipipe_register_domain(struct ipipe_domain *ipd,
+			  struct ipipe_domain_attr *attr)
+{
+	struct ipipe_percpu_domain_data *p;
+	unsigned long flags;
+
+	BUG_ON(attr->priority != IPIPE_HEAD_PRIORITY);
+
+	ipipe_register_head(ipd, attr->name);
+	ipd->legacy.domid = attr->domid;
+	ipd->legacy.pdd = attr->pdd;
+	ipd->legacy.priority = INT_MAX;
+
+	if (attr->entry == NULL)
+		return 0;
+
+	flags = hard_smp_local_irq_save();
+	__ipipe_set_current_domain(ipd);
+	hard_smp_local_irq_restore(flags);
+
+	attr->entry();
+
+	flags = hard_local_irq_save();
+	__ipipe_set_current_domain(ipipe_root_domain);
+	p = ipipe_this_cpu_root_context();
+	if (__ipipe_ipending_p(p) &&
+	    !test_bit(IPIPE_STALL_FLAG, &p->status))
+		__ipipe_sync_stage();
+	hard_local_irq_restore(flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipipe_register_domain);
+
+int ipipe_unregister_domain(struct ipipe_domain *ipd)
+{
+	ipipe_unregister_head(ipd);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipipe_unregister_domain);
+
+int ipipe_alloc_ptdkey(void)
+{
+	unsigned long flags;
+	int key = -1;
+
+	spin_lock_irqsave(&__ipipe_lock,flags);
+
+	if (ptd_key_count < IPIPE_ROOT_NPTDKEYS) {
+		key = ffz(ptd_key_map);
+		set_bit(key,&ptd_key_map);
+		ptd_key_count++;
+	}
+
+	spin_unlock_irqrestore(&__ipipe_lock,flags);
+
+	return key;
+}
+EXPORT_SYMBOL_GPL(ipipe_alloc_ptdkey);
+
+int ipipe_free_ptdkey(int key)
+{
+	unsigned long flags;
+
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return -EINVAL;
+
+	spin_lock_irqsave(&__ipipe_lock,flags);
+
+	if (test_and_clear_bit(key,&ptd_key_map))
+		ptd_key_count--;
+
+	spin_unlock_irqrestore(&__ipipe_lock,flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipipe_free_ptdkey);
+
+int ipipe_set_ptd(int key, void *value)
+{
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return -EINVAL;
+
+	current->ptd[key] = value;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipipe_set_ptd);
+
+void *ipipe_get_ptd(int key)
+{
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return NULL;
+
+	return current->ptd[key];
+}
+EXPORT_SYMBOL_GPL(ipipe_get_ptd);
+
+int ipipe_virtualize_irq(struct ipipe_domain *ipd,
+			 unsigned int irq,
+			 ipipe_irq_handler_t handler,
+			 void *cookie,
+			 ipipe_irq_ackfn_t ackfn,
+			 unsigned int modemask)
+{
+	if (handler == NULL) {
+		ipipe_free_irq(ipd, irq);
+		return 0;
+	}
+
+	return ipipe_request_irq(ipd, irq, handler, cookie, ackfn);
+}
+EXPORT_SYMBOL_GPL(ipipe_virtualize_irq);
+
+static int null_handler(unsigned int event,
+			struct ipipe_domain *from, void *data)
+{
+	/*
+	 * Legacy mode users will trap all events, at worst most
+	 * frequent ones. Therefore it is actually faster to run a
+	 * dummy handler once in a while rather than testing for a
+	 * null handler pointer each time an event is fired.
+	 */
+	return 0;
+}
+
+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
+					unsigned int event,
+					ipipe_event_handler_t handler)
+{
+	ipipe_event_handler_t oldhandler;
+	int n, enables = 0;
+
+	if (event & IPIPE_EVENT_SELF) {
+		event &= ~IPIPE_EVENT_SELF;
+		IPIPE_WARN(event >= IPIPE_NR_FAULTS);
+	}
+
+	if (event >= IPIPE_NR_EVENTS)
+		return NULL;
+
+	/*
+	 * It makes no sense to run a SETSCHED notification handler
+	 * over the head domain, this introduces a useless domain
+	 * switch for doing work which ought to be root specific.
+	 * Unfortunately, some client domains using the legacy
+	 * interface still ask for this, so we silently fix their
+	 * request. This prevents ipipe_set_hooks() from yelling at us
+	 * because of an attempt to enable kernel event notifications
+	 * for the head domain.
+	 */
+	if (event == IPIPE_EVENT_SETSCHED)
+		ipd = ipipe_root_domain;
+
+	oldhandler = ipd->legacy.handlers[event];
+	ipd->legacy.handlers[event] = handler ?: null_handler;
+
+	for (n = 0; n < IPIPE_NR_FAULTS; n++) {
+		if (ipd->legacy.handlers[n] != null_handler) {
+			enables |= __IPIPE_TRAP_E;
+			break;
+		}
+	}
+
+	for (n = IPIPE_FIRST_EVENT; n < IPIPE_LAST_EVENT; n++) {
+		if (ipd->legacy.handlers[n] != null_handler) {
+			enables |= __IPIPE_KEVENT_E;
+			break;
+		}
+	}
+
+	if (ipd->legacy.handlers[IPIPE_EVENT_SYSCALL] != null_handler)
+		enables |= __IPIPE_SYSCALL_E;
+
+	ipipe_set_hooks(ipd, enables);
+
+	return oldhandler == null_handler ? NULL : oldhandler;
+}
+EXPORT_SYMBOL_GPL(ipipe_catch_event);
+
+int ipipe_setscheduler_root(struct task_struct *p, int policy, int prio)
+{
+	struct sched_param param = { .sched_priority = prio };
+	return sched_setscheduler_nocheck(p, policy, &param);
+}
+EXPORT_SYMBOL_GPL(ipipe_setscheduler_root);
+
+int ipipe_syscall_hook(struct ipipe_domain *ipd, struct pt_regs *regs)
+{
+	const int event = IPIPE_EVENT_SYSCALL;
+	return ipipe_current_domain->legacy.handlers[event](event, ipd, regs);
+}
+
+int ipipe_trap_hook(struct ipipe_trap_data *data)
+{
+	struct ipipe_domain *ipd = ipipe_head_domain;
+	struct pt_regs *regs = data->regs;
+	int ex = data->exception;
+
+	return ipd->legacy.handlers[ex](ex, ipd, regs);
+}
+
+int ipipe_kevent_hook(int kevent, void *data)
+{
+	unsigned int event = IPIPE_FIRST_EVENT + kevent;
+	struct ipipe_domain *ipd = ipipe_root_domain;
+
+	return ipd->legacy.handlers[event](event, ipd, data);
+}
+
+void __ipipe_legacy_init_stage(struct ipipe_domain *ipd)
+{
+	int n;
+
+	for (n = 0; n < IPIPE_NR_EVENTS; n++)
+		ipd->legacy.handlers[n] = null_handler;
+
+	if (ipd == &ipipe_root) {
+		ipd->legacy.domid = IPIPE_ROOT_ID;
+		ipd->legacy.priority = IPIPE_ROOT_PRIO;
+	}
+}
+
+notrace asmlinkage int __ipipe_check_root(void) /* hw IRQs off */
+{
+	return __ipipe_root_p;
+}
diff --git a/kernel/ipipe/core.c b/kernel/ipipe/core.c
new file mode 100644
index 000000000000..a1a48a58b9ef
--- /dev/null
+++ b/kernel/ipipe/core.c
@@ -0,0 +1,1991 @@
+/* -*- linux-c -*-
+ * linux/kernel/ipipe/core.c
+ *
+ * Copyright (C) 2002-2012 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Architecture-independent I-PIPE core support.
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/kallsyms.h>
+#include <linux/bitops.h>
+#include <linux/tick.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#endif	/* CONFIG_PROC_FS */
+#include <linux/ipipe_trace.h>
+#include <linux/ipipe.h>
+#include <ipipe/setup.h>
+#include <asm/syscall.h>
+#include <asm/unistd.h>
+
+struct ipipe_domain ipipe_root;
+EXPORT_SYMBOL_GPL(ipipe_root);
+
+struct ipipe_domain *ipipe_head_domain = &ipipe_root;
+EXPORT_SYMBOL_GPL(ipipe_head_domain);
+
+#ifdef CONFIG_SMP
+static __initdata struct ipipe_percpu_domain_data bootup_context = {
+	.status = IPIPE_STALL_MASK,
+	.domain = &ipipe_root,
+};
+#else
+#define bootup_context ipipe_percpu.root
+#endif	/* !CONFIG_SMP */
+
+DEFINE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu) = {
+	.root = {
+		.status = IPIPE_STALL_MASK,
+		.domain = &ipipe_root,
+	},
+	.curr = &bootup_context,
+	.hrtimer_irq = -1,
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+	.context_check = 1,
+#endif
+};
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu);
+
+/* Up to 2k of pending work data per CPU. */
+#define WORKBUF_SIZE 2048
+static DEFINE_PER_CPU_ALIGNED(unsigned char[WORKBUF_SIZE], work_buf);
+static DEFINE_PER_CPU(void *, work_tail);
+static unsigned int __ipipe_work_virq;
+
+static void __ipipe_do_work(unsigned int virq, void *cookie);
+
+#ifdef CONFIG_SMP
+
+#define IPIPE_CRITICAL_TIMEOUT	1000000
+static cpumask_t __ipipe_cpu_sync_map;
+static cpumask_t __ipipe_cpu_lock_map;
+static cpumask_t __ipipe_cpu_pass_map;
+static unsigned long __ipipe_critical_lock;
+static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier);
+static atomic_t __ipipe_critical_count = ATOMIC_INIT(0);
+static void (*__ipipe_cpu_sync) (void);
+
+#else /* !CONFIG_SMP */
+/*
+ * Create an alias to the unique root status, so that arch-dep code
+ * may get fast access to this percpu variable including from
+ * assembly.  A hard-coded assumption is that root.status appears at
+ * offset #0 of the ipipe_percpu struct.
+ */
+extern unsigned long __ipipe_root_status
+__attribute__((alias(__stringify(ipipe_percpu))));
+EXPORT_SYMBOL(__ipipe_root_status);
+
+#endif /* !CONFIG_SMP */
+
+IPIPE_DEFINE_SPINLOCK(__ipipe_lock);
+
+static unsigned long __ipipe_virtual_irq_map;
+
+#ifdef CONFIG_PRINTK
+unsigned int __ipipe_printk_virq;
+int __ipipe_printk_bypass;
+#endif /* CONFIG_PRINTK */
+
+#ifdef CONFIG_PROC_FS
+
+struct proc_dir_entry *ipipe_proc_root;
+
+static int __ipipe_version_info_show(struct seq_file *p, void *data)
+{
+	seq_printf(p, "%d\n", IPIPE_CORE_RELEASE);
+	return 0;
+}
+
+static int __ipipe_version_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __ipipe_version_info_show, NULL);
+}
+
+static const struct file_operations __ipipe_version_proc_ops = {
+	.open		= __ipipe_version_info_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __ipipe_common_info_show(struct seq_file *p, void *data)
+{
+	struct ipipe_domain *ipd = (struct ipipe_domain *)p->private;
+	char handling, lockbit, virtuality;
+	unsigned long ctlbits;
+	unsigned int irq;
+
+	seq_printf(p, "        +--- Handled\n");
+	seq_printf(p, "        |+-- Locked\n");
+	seq_printf(p, "        ||+- Virtual\n");
+	seq_printf(p, " [IRQ]  |||  Handler\n");
+
+	mutex_lock(&ipd->mutex);
+
+	for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
+		ctlbits = ipd->irqs[irq].control;
+		/*
+		 * There might be a hole between the last external IRQ
+		 * and the first virtual one; skip it.
+		 */
+		if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq))
+			continue;
+
+		if (ipipe_virtual_irq_p(irq)
+		    && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))
+			/* Non-allocated virtual IRQ; skip it. */
+			continue;
+
+		if (ctlbits & IPIPE_HANDLE_MASK)
+			handling = 'H';
+		else
+			handling = '.';
+
+		if (ctlbits & IPIPE_LOCK_MASK)
+			lockbit = 'L';
+		else
+			lockbit = '.';
+
+		if (ipipe_virtual_irq_p(irq))
+			virtuality = 'V';
+		else
+			virtuality = '.';
+
+		if (ctlbits & IPIPE_HANDLE_MASK)
+			seq_printf(p, " %4u:  %c%c%c  %pf\n",
+				   irq, handling, lockbit, virtuality,
+				   ipd->irqs[irq].handler);
+		else
+			seq_printf(p, " %4u:  %c%c%c\n",
+				   irq, handling, lockbit, virtuality);
+	}
+
+	mutex_unlock(&ipd->mutex);
+
+	return 0;
+}
+
+static int __ipipe_common_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __ipipe_common_info_show, PDE_DATA(inode));
+}
+
+static const struct file_operations __ipipe_info_proc_ops = {
+	.owner		= THIS_MODULE,
+	.open		= __ipipe_common_info_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void add_domain_proc(struct ipipe_domain *ipd)
+{
+	proc_create_data(ipd->name, 0444, ipipe_proc_root,
+			 &__ipipe_info_proc_ops, ipd);
+}
+
+void remove_domain_proc(struct ipipe_domain *ipd)
+{
+	remove_proc_entry(ipd->name, ipipe_proc_root);
+}
+
+void __init __ipipe_init_proc(void)
+{
+	ipipe_proc_root = proc_mkdir("ipipe", NULL);
+	proc_create("version", 0444, ipipe_proc_root,
+		    &__ipipe_version_proc_ops);
+	add_domain_proc(ipipe_root_domain);
+
+	__ipipe_init_tracer();
+}
+
+#else
+
+static inline void add_domain_proc(struct ipipe_domain *ipd)
+{
+}
+
+static inline void remove_domain_proc(struct ipipe_domain *ipd)
+{
+}
+
+#endif	/* CONFIG_PROC_FS */
+
+static void init_stage(struct ipipe_domain *ipd)
+{
+	memset(&ipd->irqs, 0, sizeof(ipd->irqs));
+	mutex_init(&ipd->mutex);
+	__ipipe_legacy_init_stage(ipd);
+	__ipipe_hook_critical_ipi(ipd);
+}
+
+static inline int root_context_offset(void)
+{
+	void root_context_not_at_start_of_ipipe_percpu(void);
+
+	/* ipipe_percpu.root must be found at offset #0. */
+
+	if (offsetof(struct ipipe_percpu_data, root))
+		root_context_not_at_start_of_ipipe_percpu();
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+
+static inline void fixup_percpu_data(void)
+{
+	struct ipipe_percpu_data *p;
+	int cpu;
+
+	/*
+	 * ipipe_percpu.curr cannot be assigned statically to
+	 * &ipipe_percpu.root, due to the dynamic nature of percpu
+	 * data. So we make ipipe_percpu.curr refer to a temporary
+	 * boot up context in static memory, until we can fixup all
+	 * context pointers in this routine, after per-cpu areas have
+	 * been eventually set up. The temporary context data is
+	 * copied to per_cpu(ipipe_percpu, 0).root in the same move.
+	 *
+	 * Obviously, this code must run over the boot CPU, before SMP
+	 * operations start.
+	 */
+	BUG_ON(smp_processor_id() || !irqs_disabled());
+
+	per_cpu(ipipe_percpu, 0).root = bootup_context;
+
+	for_each_possible_cpu(cpu) {
+		p = &per_cpu(ipipe_percpu, cpu);
+		p->curr = &p->root;
+	}
+}
+
+#else /* !CONFIG_SMP */
+
+static inline void fixup_percpu_data(void) { }
+
+#endif /* CONFIG_SMP */
+
+void __init __ipipe_init_early(void)
+{
+	struct ipipe_domain *ipd = &ipipe_root;
+	int cpu;
+
+	fixup_percpu_data();
+
+	/*
+	 * A lightweight registration code for the root domain. We are
+	 * running on the boot CPU, hw interrupts are off, and
+	 * secondary CPUs are still lost in space.
+	 */
+	ipd->name = "Linux";
+	ipd->context_offset = root_context_offset();
+	init_stage(ipd);
+
+	/*
+	 * Do the early init stuff. First we do the per-arch pipeline
+	 * core setup, then we run the per-client setup code. At this
+	 * point, the kernel does not provide much services yet: be
+	 * careful.
+	 */
+	__ipipe_early_core_setup();
+	__ipipe_early_client_setup();
+
+#ifdef CONFIG_PRINTK
+	__ipipe_printk_virq = ipipe_alloc_virq();
+	ipd->irqs[__ipipe_printk_virq].handler = __ipipe_flush_printk;
+	ipd->irqs[__ipipe_printk_virq].cookie = NULL;
+	ipd->irqs[__ipipe_printk_virq].ackfn = NULL;
+	ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK;
+#endif /* CONFIG_PRINTK */
+
+	__ipipe_work_virq = ipipe_alloc_virq();
+	ipd->irqs[__ipipe_work_virq].handler = __ipipe_do_work;
+	ipd->irqs[__ipipe_work_virq].cookie = NULL;
+	ipd->irqs[__ipipe_work_virq].ackfn = NULL;
+	ipd->irqs[__ipipe_work_virq].control = IPIPE_HANDLE_MASK;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu);
+}
+
+void __init __ipipe_init(void)
+{
+	/* Now we may engage the pipeline. */
+	__ipipe_enable_pipeline();
+
+	pr_info("Interrupt pipeline (release #%d)\n", IPIPE_CORE_RELEASE);
+}
+
+static inline void init_head_stage(struct ipipe_domain *ipd)
+{
+	struct ipipe_percpu_domain_data *p;
+	int cpu;
+
+	/* Must be set first, used in ipipe_percpu_context(). */
+	ipd->context_offset = offsetof(struct ipipe_percpu_data, head);
+
+	for_each_online_cpu(cpu) {
+		p = ipipe_percpu_context(ipd, cpu);
+		memset(p, 0, sizeof(*p));
+		p->domain = ipd;
+	}
+
+	init_stage(ipd);
+}
+
+void ipipe_register_head(struct ipipe_domain *ipd, const char *name)
+{
+	BUG_ON(!ipipe_root_p || ipd == &ipipe_root);
+
+	ipd->name = name;
+	init_head_stage(ipd);
+	barrier();
+	ipipe_head_domain = ipd;
+	add_domain_proc(ipd);
+
+	pr_info("I-pipe: head domain %s registered.\n", name);
+}
+EXPORT_SYMBOL_GPL(ipipe_register_head);
+
+void ipipe_unregister_head(struct ipipe_domain *ipd)
+{
+	BUG_ON(!ipipe_root_p || ipd != ipipe_head_domain);
+
+	ipipe_head_domain = &ipipe_root;
+	smp_mb();
+	mutex_lock(&ipd->mutex);
+	remove_domain_proc(ipd);
+	mutex_unlock(&ipd->mutex);
+
+	pr_info("I-pipe: head domain %s unregistered.\n", ipd->name);
+}
+EXPORT_SYMBOL_GPL(ipipe_unregister_head);
+
+void ipipe_unstall_root(void)
+{
+	struct ipipe_percpu_domain_data *p;
+
+	hard_local_irq_disable();
+
+	/* This helps catching bad usage from assembly call sites. */
+	ipipe_root_only();
+
+	p = ipipe_this_cpu_root_context();
+
+	__clear_bit(IPIPE_STALL_FLAG, &p->status);
+
+	if (unlikely(__ipipe_ipending_p(p)))
+		__ipipe_sync_stage();
+
+	hard_local_irq_enable();
+}
+EXPORT_SYMBOL(ipipe_unstall_root);
+
+void ipipe_restore_root(unsigned long x)
+{
+	ipipe_root_only();
+
+	if (x)
+		ipipe_stall_root();
+	else
+		ipipe_unstall_root();
+}
+EXPORT_SYMBOL(ipipe_restore_root);
+
+void __ipipe_restore_root_nosync(unsigned long x)
+{
+	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_root_context();
+
+	if (raw_irqs_disabled_flags(x)) {
+		__set_bit(IPIPE_STALL_FLAG, &p->status);
+		trace_hardirqs_off();
+	} else {
+		trace_hardirqs_on();
+		__clear_bit(IPIPE_STALL_FLAG, &p->status);
+	}
+}
+EXPORT_SYMBOL_GPL(__ipipe_restore_root_nosync);
+
+void ipipe_unstall_head(void)
+{
+	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context();
+
+	hard_local_irq_disable();
+
+	__clear_bit(IPIPE_STALL_FLAG, &p->status);
+
+	if (unlikely(__ipipe_ipending_p(p)))
+		__ipipe_sync_pipeline(ipipe_head_domain);
+
+	hard_local_irq_enable();
+}
+EXPORT_SYMBOL_GPL(ipipe_unstall_head);
+
+void __ipipe_restore_head(unsigned long x) /* hw interrupt off */
+{
+	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context();
+
+	if (x) {
+#ifdef CONFIG_DEBUG_KERNEL
+		static int warned;
+		if (!warned &&
+		    __test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) {
+			/*
+			 * Already stalled albeit ipipe_restore_head()
+			 * should have detected it? Send a warning once.
+			 */
+			hard_local_irq_enable();
+			warned = 1;
+			pr_warning("I-pipe: ipipe_restore_head() "
+				   "optimization failed.\n");
+			dump_stack();
+			hard_local_irq_disable();
+		}
+#else /* !CONFIG_DEBUG_KERNEL */
+		__set_bit(IPIPE_STALL_FLAG, &p->status);
+#endif /* CONFIG_DEBUG_KERNEL */
+	} else {
+		__clear_bit(IPIPE_STALL_FLAG, &p->status);
+		if (unlikely(__ipipe_ipending_p(p)))
+			__ipipe_sync_pipeline(ipipe_head_domain);
+		hard_local_irq_enable();
+	}
+}
+EXPORT_SYMBOL_GPL(__ipipe_restore_head);
+
+void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock)
+{
+	hard_local_irq_disable();
+	if (ipipe_smp_p)
+		arch_spin_lock(&lock->arch_lock);
+	__set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irq);
+
+void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock)
+{
+	if (ipipe_smp_p)
+		arch_spin_unlock(&lock->arch_lock);
+	__clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+	hard_local_irq_enable();
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irq);
+
+unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock)
+{
+	unsigned long flags;
+	int s;
+
+	flags = hard_local_irq_save();
+	if (ipipe_smp_p)
+		arch_spin_lock(&lock->arch_lock);
+	s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+
+	return arch_mangle_irq_bits(s, flags);
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irqsave);
+
+int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock,
+				 unsigned long *x)
+{
+	unsigned long flags;
+	int s;
+
+	flags = hard_local_irq_save();
+	if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) {
+		hard_local_irq_restore(flags);
+		return 0;
+	}
+	s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+	*x = arch_mangle_irq_bits(s, flags);
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irqsave);
+
+void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock,
+				    unsigned long x)
+{
+	if (ipipe_smp_p)
+		arch_spin_unlock(&lock->arch_lock);
+	if (!arch_demangle_irq_bits(&x))
+		__clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+	hard_local_irq_restore(x);
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irqrestore);
+
+int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock)
+{
+	unsigned long flags;
+
+	flags = hard_local_irq_save();
+	if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) {
+		hard_local_irq_restore(flags);
+		return 0;
+	}
+	__set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irq);
+
+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock)
+{
+	if (ipipe_smp_p)
+		arch_spin_unlock(&lock->arch_lock);
+}
+
+void __ipipe_spin_unlock_irqcomplete(unsigned long x)
+{
+	if (!arch_demangle_irq_bits(&x))
+		__clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+	hard_local_irq_restore(x);
+}
+
+#ifdef __IPIPE_3LEVEL_IRQMAP
+
+/* Must be called hw IRQs off. */
+static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p,
+					unsigned int irq)
+{
+	__set_bit(irq, p->irqheld_map);
+	p->irqall[irq]++;
+}
+
+/* Must be called hw IRQs off. */
+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq)
+{
+	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd);
+	int l0b, l1b;
+
+	IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+	l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
+	l1b = irq / BITS_PER_LONG;
+
+	if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+		__set_bit(irq, p->irqpend_lomap);
+		__set_bit(l1b, p->irqpend_mdmap);
+		__set_bit(l0b, &p->irqpend_himap);
+	} else
+		__set_bit(irq, p->irqheld_map);
+
+	p->irqall[irq]++;
+}
+EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending);
+
+/* Must be called hw IRQs off. */
+void __ipipe_lock_irq(unsigned int irq)
+{
+	struct ipipe_domain *ipd = ipipe_root_domain;
+	struct ipipe_percpu_domain_data *p;
+	int l0b, l1b;
+
+	IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+	/*
+	 * Interrupts requested by a registered head domain cannot be
+	 * locked, since this would make no sense: interrupts are
+	 * globally masked at CPU level when the head domain is
+	 * stalled, so there is no way we could encounter the
+	 * situation IRQ locks are handling.
+	 */
+	if (test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
+		return;
+
+	l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
+	l1b = irq / BITS_PER_LONG;
+
+	p = ipipe_this_cpu_context(ipd);
+	if (__test_and_clear_bit(irq, p->irqpend_lomap)) {
+		__set_bit(irq, p->irqheld_map);
+		if (p->irqpend_lomap[l1b] == 0) {
+			__clear_bit(l1b, p->irqpend_mdmap);
+			if (p->irqpend_mdmap[l0b] == 0)
+				__clear_bit(l0b, &p->irqpend_himap);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(__ipipe_lock_irq);
+
+/* Must be called hw IRQs off. */
+void __ipipe_unlock_irq(unsigned int irq)
+{
+	struct ipipe_domain *ipd = ipipe_root_domain;
+	struct ipipe_percpu_domain_data *p;
+	int l0b, l1b, cpu;
+
+	IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+	if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
+		return;
+
+	l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
+	l1b = irq / BITS_PER_LONG;
+
+	for_each_online_cpu(cpu) {
+		p = ipipe_this_cpu_root_context();
+		if (test_and_clear_bit(irq, p->irqheld_map)) {
+			/* We need atomic ops here: */
+			set_bit(irq, p->irqpend_lomap);
+			set_bit(l1b, p->irqpend_mdmap);
+			set_bit(l0b, &p->irqpend_himap);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(__ipipe_unlock_irq);
+
+static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p)
+{
+	int l0b, l1b, l2b;
+	unsigned long l0m, l1m, l2m;
+	unsigned int irq;
+
+	l0m = p->irqpend_himap;
+	if (unlikely(l0m == 0))
+		return -1;
+
+	l0b = __ipipe_ffnz(l0m);
+	l1m = p->irqpend_mdmap[l0b];
+	if (unlikely(l1m == 0))
+		return -1;
+
+	l1b = __ipipe_ffnz(l1m) + l0b * BITS_PER_LONG;
+	l2m = p->irqpend_lomap[l1b];
+	if (unlikely(l2m == 0))
+		return -1;
+
+	l2b = __ipipe_ffnz(l2m);
+	irq = l1b * BITS_PER_LONG + l2b;
+
+	__clear_bit(irq, p->irqpend_lomap);
+	if (p->irqpend_lomap[l1b] == 0) {
+		__clear_bit(l1b, p->irqpend_mdmap);
+		if (p->irqpend_mdmap[l0b] == 0)
+			__clear_bit(l0b, &p->irqpend_himap);
+	}
+
+	return irq;
+}
+
+#else /* __IPIPE_2LEVEL_IRQMAP */
+
+/* Must be called hw IRQs off. */
+static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p,
+					unsigned int irq)
+{
+	__set_bit(irq, p->irqheld_map);
+	p->irqall[irq]++;
+}
+
+/* Must be called hw IRQs off. */
+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq)
+{
+	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd);
+	int l0b = irq / BITS_PER_LONG;
+
+	IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+	if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+		__set_bit(irq, p->irqpend_lomap);
+		__set_bit(l0b, &p->irqpend_himap);
+	} else
+		__set_bit(irq, p->irqheld_map);
+
+	p->irqall[irq]++;
+}
+EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending);
+
+/* Must be called hw IRQs off. */
+void __ipipe_lock_irq(unsigned int irq)
+{
+	struct ipipe_percpu_domain_data *p;
+	int l0b = irq / BITS_PER_LONG;
+
+	IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+	if (test_and_set_bit(IPIPE_LOCK_FLAG,
+			     &ipipe_root_domain->irqs[irq].control))
+		return;
+
+	p = ipipe_this_cpu_root_context();
+	if (__test_and_clear_bit(irq, p->irqpend_lomap)) {
+		__set_bit(irq, p->irqheld_map);
+		if (p->irqpend_lomap[l0b] == 0)
+			__clear_bit(l0b, &p->irqpend_himap);
+	}
+}
+EXPORT_SYMBOL_GPL(__ipipe_lock_irq);
+
+/* Must be called hw IRQs off. */
+void __ipipe_unlock_irq(unsigned int irq)
+{
+	struct ipipe_domain *ipd = ipipe_root_domain;
+	struct ipipe_percpu_domain_data *p;
+	int l0b = irq / BITS_PER_LONG, cpu;
+
+	IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+	if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
+		return;
+
+	for_each_online_cpu(cpu) {
+		p = ipipe_percpu_context(ipd, cpu);
+		if (test_and_clear_bit(irq, p->irqheld_map)) {
+			/* We need atomic ops here: */
+			set_bit(irq, p->irqpend_lomap);
+			set_bit(l0b, &p->irqpend_himap);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(__ipipe_unlock_irq);
+
+static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p)
+{
+	unsigned long l0m, l1m;
+	int l0b, l1b;
+
+	l0m = p->irqpend_himap;
+	if (unlikely(l0m == 0))
+		return -1;
+
+	l0b = __ipipe_ffnz(l0m);
+	l1m = p->irqpend_lomap[l0b];
+	if (unlikely(l1m == 0))
+		return -1;
+
+	l1b = __ipipe_ffnz(l1m);
+	__clear_bit(l1b, &p->irqpend_lomap[l0b]);
+	if (p->irqpend_lomap[l0b] == 0)
+		__clear_bit(l0b, &p->irqpend_himap);
+
+	return l0b * BITS_PER_LONG + l1b;
+}
+
+#endif /* __IPIPE_2LEVEL_IRQMAP */
+
+void __ipipe_do_sync_pipeline(struct ipipe_domain *top)
+{
+	struct ipipe_percpu_domain_data *p;
+	struct ipipe_domain *ipd;
+
+	/* We must enter over the root domain. */
+	IPIPE_WARN_ONCE(__ipipe_current_domain != ipipe_root_domain);
+	ipd = top;
+next:
+	p = ipipe_this_cpu_context(ipd);
+	if (test_bit(IPIPE_STALL_FLAG, &p->status))
+		return;
+
+	if (__ipipe_ipending_p(p)) {
+		if (ipd == ipipe_root_domain)
+			__ipipe_sync_stage();
+		else {
+			/* Switching to head. */
+			p->coflags &= ~__IPIPE_ALL_R;
+			__ipipe_set_current_context(p);
+			__ipipe_sync_stage();
+			__ipipe_set_current_domain(ipipe_root_domain);
+		}
+	}
+
+	if (ipd != ipipe_root_domain) {
+		ipd = ipipe_root_domain;
+		goto next;
+	}
+}
+EXPORT_SYMBOL_GPL(__ipipe_do_sync_pipeline);
+
+unsigned int ipipe_alloc_virq(void)
+{
+	unsigned long flags, irq = 0;
+	int ipos;
+
+	raw_spin_lock_irqsave(&__ipipe_lock, flags);
+
+	if (__ipipe_virtual_irq_map != ~0) {
+		ipos = ffz(__ipipe_virtual_irq_map);
+		set_bit(ipos, &__ipipe_virtual_irq_map);
+		irq = ipos + IPIPE_VIRQ_BASE;
+	}
+
+	raw_spin_unlock_irqrestore(&__ipipe_lock, flags);
+
+	return irq;
+}
+EXPORT_SYMBOL_GPL(ipipe_alloc_virq);
+
+void ipipe_free_virq(unsigned int virq)
+{
+	clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map);
+	smp_mb__after_atomic();
+}
+EXPORT_SYMBOL_GPL(ipipe_free_virq);
+
+int ipipe_request_irq(struct ipipe_domain *ipd,
+		      unsigned int irq,
+		      ipipe_irq_handler_t handler,
+		      void *cookie,
+		      ipipe_irq_ackfn_t ackfn)
+{
+	unsigned long flags;
+	int ret = 0;
+
+#ifndef CONFIG_IPIPE_LEGACY
+	ipipe_root_only();
+#endif /* CONFIG_IPIPE_LEGACY */
+
+	if (handler == NULL ||
+	    (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)))
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&__ipipe_lock, flags);
+
+	if (ipd->irqs[irq].handler) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (ackfn == NULL)
+		ackfn = ipipe_root_domain->irqs[irq].ackfn;
+
+	ipd->irqs[irq].handler = handler;
+	ipd->irqs[irq].cookie = cookie;
+	ipd->irqs[irq].ackfn = ackfn;
+	ipd->irqs[irq].control = IPIPE_HANDLE_MASK;
+
+	if (irq < IPIPE_NR_ROOT_IRQS)
+		__ipipe_enable_irqdesc(ipd, irq);
+out:
+	raw_spin_unlock_irqrestore(&__ipipe_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ipipe_request_irq);
+
+void ipipe_free_irq(struct ipipe_domain *ipd,
+		    unsigned int irq)
+{
+	unsigned long flags;
+
+#ifndef CONFIG_IPIPE_LEGACY
+	ipipe_root_only();
+#endif /* CONFIG_IPIPE_LEGACY */
+
+	raw_spin_lock_irqsave(&__ipipe_lock, flags);
+
+	if (ipd->irqs[irq].handler == NULL)
+		goto out;
+
+	ipd->irqs[irq].handler = NULL;
+	ipd->irqs[irq].cookie = NULL;
+	ipd->irqs[irq].ackfn = NULL;
+	ipd->irqs[irq].control = 0;
+
+	if (irq < IPIPE_NR_ROOT_IRQS)
+		__ipipe_disable_irqdesc(ipd, irq);
+out:
+	raw_spin_unlock_irqrestore(&__ipipe_lock, flags);
+}
+EXPORT_SYMBOL_GPL(ipipe_free_irq);
+
+void ipipe_set_hooks(struct ipipe_domain *ipd, int enables)
+{
+	struct ipipe_percpu_domain_data *p;
+	unsigned long flags;
+	int cpu, wait;
+
+	if (ipd == ipipe_root_domain) {
+		IPIPE_WARN(enables & __IPIPE_TRAP_E);
+		enables &= ~__IPIPE_TRAP_E;
+	} else {
+		IPIPE_WARN(enables & __IPIPE_KEVENT_E);
+		enables &= ~__IPIPE_KEVENT_E;
+	}
+
+	flags = ipipe_critical_enter(NULL);
+
+	for_each_online_cpu(cpu) {
+		p = ipipe_percpu_context(ipd, cpu);
+		p->coflags &= ~__IPIPE_ALL_E;
+		p->coflags |= enables;
+	}
+
+	wait = (enables ^ __IPIPE_ALL_E) << __IPIPE_SHIFT_R;
+	if (wait == 0 || !__ipipe_root_p) {
+		ipipe_critical_exit(flags);
+		return;
+	}
+
+	ipipe_this_cpu_context(ipd)->coflags &= ~wait;
+
+	ipipe_critical_exit(flags);
+
+	/*
+	 * In case we cleared some hooks over the root domain, we have
+	 * to wait for any ongoing execution to finish, since our
+	 * caller might subsequently unmap the target domain code.
+	 *
+	 * We synchronize with the relevant __ipipe_notify_*()
+	 * helpers, disabling all hooks before we start waiting for
+	 * completion on all CPUs.
+	 */
+	for_each_online_cpu(cpu) {
+		while (ipipe_percpu_context(ipd, cpu)->coflags & wait)
+			schedule_timeout_interruptible(HZ / 50);
+	}
+}
+EXPORT_SYMBOL_GPL(ipipe_set_hooks);
+
+int __weak ipipe_fastcall_hook(struct pt_regs *regs)
+{
+	return -1;	/* i.e. fall back to slow path. */
+}
+
+int __weak ipipe_syscall_hook(struct ipipe_domain *ipd, struct pt_regs *regs)
+{
+	return 0;
+}
+
+void __ipipe_root_sync(void)
+{
+	struct ipipe_percpu_domain_data *p;
+	unsigned long flags;
+
+	flags = hard_local_irq_save();
+
+	p = ipipe_this_cpu_root_context();
+	if (__ipipe_ipending_p(p))
+		__ipipe_sync_stage();
+
+	hard_local_irq_restore(flags);
+}
+
+int __ipipe_notify_syscall(struct pt_regs *regs)
+{
+	struct ipipe_domain *caller_domain, *this_domain, *ipd;
+	struct ipipe_percpu_domain_data *p;
+	unsigned long flags;
+	int ret = 0;
+
+	/*
+	 * We should definitely not pipeline a syscall with IRQs off.
+	 */
+	IPIPE_WARN_ONCE(hard_irqs_disabled());
+
+	flags = hard_local_irq_save();
+	caller_domain = this_domain = __ipipe_current_domain;
+	ipd = ipipe_head_domain;
+next:
+	p = ipipe_this_cpu_context(ipd);
+	if (likely(p->coflags & __IPIPE_SYSCALL_E)) {
+		__ipipe_set_current_context(p);
+		p->coflags |= __IPIPE_SYSCALL_R;
+		hard_local_irq_restore(flags);
+		ret = ipipe_syscall_hook(caller_domain, regs);
+		flags = hard_local_irq_save();
+		p->coflags &= ~__IPIPE_SYSCALL_R;
+		if (__ipipe_current_domain != ipd)
+			/* Account for domain migration. */
+			this_domain = __ipipe_current_domain;
+		else
+			__ipipe_set_current_domain(this_domain);
+	}
+
+	if (this_domain == ipipe_root_domain) {
+		if (ipd != ipipe_root_domain && ret == 0) {
+			ipd = ipipe_root_domain;
+			goto next;
+		}
+		/*
+		 * Careful: we may have migrated from head->root, so p
+		 * would be ipipe_this_cpu_context(head).
+		 */
+		p = ipipe_this_cpu_root_context();
+		if (__ipipe_ipending_p(p))
+			__ipipe_sync_stage();
+ 	} else if (ipipe_test_thread_flag(TIP_MAYDAY))
+		__ipipe_call_mayday(regs);
+
+	hard_local_irq_restore(flags);
+
+	return ret;
+}
+
+int __weak ipipe_trap_hook(struct ipipe_trap_data *data)
+{
+	return 0;
+}
+
+int __ipipe_notify_trap(int exception, struct pt_regs *regs)
+{
+	struct ipipe_percpu_domain_data *p;
+	struct ipipe_trap_data data;
+	unsigned long flags;
+	int ret = 0;
+
+	flags = hard_local_irq_save();
+
+	/*
+	 * We send a notification about all traps raised over a
+	 * registered head domain only.
+	 */
+	if (__ipipe_root_p)
+		goto out;
+
+	p = ipipe_this_cpu_head_context();
+	if (likely(p->coflags & __IPIPE_TRAP_E)) {
+		p->coflags |= __IPIPE_TRAP_R;
+		hard_local_irq_restore(flags);
+		data.exception = exception;
+		data.regs = regs;
+		ret = ipipe_trap_hook(&data);
+		flags = hard_local_irq_save();
+		p->coflags &= ~__IPIPE_TRAP_R;
+	}
+out:
+	hard_local_irq_restore(flags);
+
+	return ret;
+}
+
+int __weak ipipe_kevent_hook(int kevent, void *data)
+{
+	return 0;
+}
+
+int __ipipe_notify_kevent(int kevent, void *data)
+{
+	struct ipipe_percpu_domain_data *p;
+	unsigned long flags;
+	int ret = 0;
+
+	ipipe_root_only();
+
+	flags = hard_local_irq_save();
+
+	p = ipipe_this_cpu_root_context();
+	if (likely(p->coflags & __IPIPE_KEVENT_E)) {
+		p->coflags |= __IPIPE_KEVENT_R;
+		hard_local_irq_restore(flags);
+		ret = ipipe_kevent_hook(kevent, data);
+		flags = hard_local_irq_save();
+		p->coflags &= ~__IPIPE_KEVENT_R;
+	}
+
+	hard_local_irq_restore(flags);
+
+	return ret;
+}
+
+void __weak ipipe_migration_hook(struct task_struct *p)
+{
+}
+
+#ifdef CONFIG_IPIPE_LEGACY
+
+static inline void complete_domain_migration(void) /* hw IRQs off */
+{
+	if (current->state & TASK_HARDENING) {
+		current->state &= ~TASK_HARDENING;
+		ipipe_set_thread_flag(TIP_HEAD);
+	}
+}
+
+#else /* !CONFIG_IPIPE_LEGACY */
+
+static void complete_domain_migration(void) /* hw IRQs off */
+{
+	struct ipipe_percpu_domain_data *p;
+	struct ipipe_percpu_data *pd;
+	struct task_struct *t;
+
+	ipipe_root_only();
+	pd = raw_cpu_ptr(&ipipe_percpu);
+	t = pd->task_hijacked;
+	if (t == NULL)
+		return;
+
+	pd->task_hijacked = NULL;
+	t->state &= ~TASK_HARDENING;
+	if (t->state != TASK_INTERRUPTIBLE)
+		/* Migration aborted (by signal). */
+		return;
+
+	ipipe_set_ti_thread_flag(task_thread_info(t), TIP_HEAD);
+	p = ipipe_this_cpu_head_context();
+	IPIPE_WARN_ONCE(test_bit(IPIPE_STALL_FLAG, &p->status));
+	/*
+	 * hw IRQs are disabled, but the completion hook assumes the
+	 * head domain is logically stalled: fix it up.
+	 */
+	__set_bit(IPIPE_STALL_FLAG, &p->status);
+	ipipe_migration_hook(t);
+	__clear_bit(IPIPE_STALL_FLAG, &p->status);
+	if (__ipipe_ipending_p(p))
+		__ipipe_sync_pipeline(p->domain);
+}
+
+#endif /* !CONFIG_IPIPE_LEGACY */
+
+void __ipipe_complete_domain_migration(void)
+{
+	unsigned long flags;
+
+	flags = hard_local_irq_save();
+	complete_domain_migration();
+	hard_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(__ipipe_complete_domain_migration);
+
+int __ipipe_switch_tail(void)
+{
+	int x;
+
+#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
+	hard_local_irq_disable();
+#endif
+	x = __ipipe_root_p;
+#ifndef CONFIG_IPIPE_LEGACY
+	if (x)
+#endif
+		complete_domain_migration();
+
+#ifndef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
+	if (x)
+#endif
+		hard_local_irq_enable();
+
+	return !x;
+}
+
+#ifdef CONFIG_IPIPE_HAVE_VM_NOTIFIER
+void __ipipe_notify_vm_preemption(void)
+{
+	struct ipipe_vm_notifier *vmf;
+	struct ipipe_percpu_data *p;
+
+	ipipe_check_irqoff();
+	p = __ipipe_raw_cpu_ptr(&ipipe_percpu);
+	vmf = p->vm_notifier;
+	if (unlikely(vmf))
+		vmf->handler(vmf);
+}
+EXPORT_SYMBOL_GPL(__ipipe_notify_vm_preemption);
+#endif /* CONFIG_IPIPE_HAVE_VM_NOTIFIER */
+
+static void dispatch_irq_head(unsigned int irq) /* hw interrupts off */
+{
+	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(), *old;
+	struct ipipe_domain *head = p->domain;
+
+	if (unlikely(test_bit(IPIPE_STALL_FLAG, &p->status))) {
+		__ipipe_set_irq_pending(head, irq);
+		return;
+	}
+
+	/* Switch to the head domain if not current. */
+	old = __ipipe_current_context;
+	if (old != p)
+		__ipipe_set_current_context(p);
+
+	p->irqall[irq]++;
+	__set_bit(IPIPE_STALL_FLAG, &p->status);
+	barrier();
+	head->irqs[irq].handler(irq, head->irqs[irq].cookie);
+	__ipipe_run_irqtail(irq);
+	hard_local_irq_disable();
+	p = ipipe_this_cpu_head_context();
+	__clear_bit(IPIPE_STALL_FLAG, &p->status);
+
+	/* Are we still running in the head domain? */
+	if (likely(__ipipe_current_context == p)) {
+		/* Did we enter this code over the head domain? */
+		if (old->domain == head) {
+			/* Yes, do immediate synchronization. */
+			if (__ipipe_ipending_p(p))
+				__ipipe_sync_stage();
+			return;
+		}
+		__ipipe_set_current_context(ipipe_this_cpu_root_context());
+	}
+
+	/*
+	 * We must be running over the root domain, synchronize
+	 * the pipeline for high priority IRQs (slow path).
+	 */
+	__ipipe_do_sync_pipeline(head);
+}
+
+void __ipipe_dispatch_irq(unsigned int irq, int flags) /* hw interrupts off */
+{
+	struct ipipe_domain *ipd;
+	struct irq_desc *desc;
+	unsigned long control;
+	int chained_irq;
+
+	/*
+	 * Survival kit when reading this code:
+	 *
+	 * - we have two main situations, leading to three cases for
+	 *   handling interrupts:
+	 *
+	 *   a) the root domain is alone, no registered head domain
+	 *      => all interrupts go through the interrupt log
+	 *   b) a head domain is registered
+	 *      => head domain IRQs go through the fast dispatcher
+	 *      => root domain IRQs go through the interrupt log
+	 *
+	 * - when no head domain is registered, ipipe_head_domain ==
+	 *   ipipe_root_domain == &ipipe_root.
+	 *
+	 * - the caller tells us whether we should acknowledge this
+	 *   IRQ. Even virtual IRQs may require acknowledge on some
+	 *   platforms (e.g. arm/SMP).
+	 *
+	 * - the caller tells us whether we may try to run the IRQ log
+	 *   syncer. Typically, demuxed IRQs won't be synced
+	 *   immediately.
+	 *
+	 * - multiplex IRQs most likely have a valid acknowledge
+	 *   handler and we may not be called with IPIPE_IRQF_NOACK
+	 *   for them. The ack handler for the multiplex IRQ actually
+	 *   decodes the demuxed interrupts.
+	 */
+
+#ifdef CONFIG_IPIPE_DEBUG
+	if (unlikely(irq >= IPIPE_NR_IRQS) ||
+	    (irq < IPIPE_NR_ROOT_IRQS && irq_to_desc(irq) == NULL)) {
+		pr_err("I-pipe: spurious interrupt %u\n", irq);
+		return;
+	}
+#endif
+	/*
+	 * CAUTION: on some archs, virtual IRQs may have acknowledge
+	 * handlers. Multiplex IRQs should have one too.
+	 */
+	if (unlikely(irq >= IPIPE_NR_ROOT_IRQS)) {
+		desc = NULL;
+		chained_irq = 0;
+	} else {
+		desc = irq_to_desc(irq);
+		chained_irq = desc ? ipipe_chained_irq_p(desc) : 0;
+	}
+	if (flags & IPIPE_IRQF_NOACK)
+		IPIPE_WARN_ONCE(chained_irq);
+	else {
+		ipd = ipipe_head_domain;
+		control = ipd->irqs[irq].control;
+		if ((control & IPIPE_HANDLE_MASK) == 0)
+			ipd = ipipe_root_domain;
+		if (ipd->irqs[irq].ackfn)
+			ipd->irqs[irq].ackfn(desc);
+		if (chained_irq) {
+			if ((flags & IPIPE_IRQF_NOSYNC) == 0)
+				/* Run demuxed IRQ handlers. */
+				goto sync;
+			return;
+		}
+	}
+
+	/*
+	 * Sticky interrupts must be handled early and separately, so
+	 * that we always process them on the current domain.
+	 */
+	ipd = __ipipe_current_domain;
+	control = ipd->irqs[irq].control;
+	if (control & IPIPE_STICKY_MASK)
+		goto log;
+
+	/*
+	 * In case we have no registered head domain
+	 * (i.e. ipipe_head_domain == &ipipe_root), we always go
+	 * through the interrupt log, and leave the dispatching work
+	 * ultimately to __ipipe_sync_pipeline().
+	 */
+	ipd = ipipe_head_domain;
+	control = ipd->irqs[irq].control;
+	if (ipd == ipipe_root_domain)
+		/*
+		 * The root domain must handle all interrupts, so
+		 * testing the HANDLE bit would be pointless.
+		 */
+		goto log;
+
+	if (control & IPIPE_HANDLE_MASK) {
+		if (unlikely(flags & IPIPE_IRQF_NOSYNC))
+			__ipipe_set_irq_pending(ipd, irq);
+		else
+			dispatch_irq_head(irq);
+		return;
+	}
+
+	ipd = ipipe_root_domain;
+log:
+	__ipipe_set_irq_pending(ipd, irq);
+
+	if (flags & IPIPE_IRQF_NOSYNC)
+		return;
+
+	/*
+	 * Optimize if we preempted a registered high priority head
+	 * domain: we don't need to synchronize the pipeline unless
+	 * there is a pending interrupt for it.
+	 */
+	if (!__ipipe_root_p &&
+	    !__ipipe_ipending_p(ipipe_this_cpu_head_context()))
+		return;
+sync:
+	__ipipe_sync_pipeline(ipipe_head_domain);
+}
+
+void ipipe_raise_irq(unsigned int irq)
+{
+	struct ipipe_domain *ipd = ipipe_head_domain;
+	unsigned long flags, control;
+
+	flags = hard_local_irq_save();
+
+	/*
+	 * Fast path: raising a virtual IRQ handled by the head
+	 * domain.
+	 */
+	if (likely(ipipe_virtual_irq_p(irq) && ipd != ipipe_root_domain)) {
+		control = ipd->irqs[irq].control;
+		if (likely(control & IPIPE_HANDLE_MASK)) {
+			dispatch_irq_head(irq);
+			goto out;
+		}
+	}
+
+	/* Emulate regular device IRQ receipt. */
+	__ipipe_dispatch_irq(irq, IPIPE_IRQF_NOACK);
+out:
+	hard_local_irq_restore(flags);
+
+}
+EXPORT_SYMBOL_GPL(ipipe_raise_irq);
+
+static void sync_root_irqs(void)
+{
+	struct ipipe_percpu_domain_data *p;
+	unsigned long flags;
+
+	flags = hard_local_irq_save();
+
+	p = ipipe_this_cpu_root_context();
+	if (unlikely(__ipipe_ipending_p(p)))
+		__ipipe_sync_stage();
+
+	hard_local_irq_restore(flags);
+}
+
+int ipipe_handle_syscall(struct thread_info *ti,
+			 unsigned long nr, struct pt_regs *regs)
+{
+	unsigned long local_flags = READ_ONCE(ti->ipipe_flags);
+	int ret;
+
+	/*
+	 * NOTE: This is a backport from the DOVETAIL syscall
+	 * redirector to the older pipeline implementation.
+	 *
+	 * ==
+	 *
+	 * If the syscall # is out of bounds and the current IRQ stage
+	 * is not the root one, this has to be a non-native system
+	 * call handled by some co-kernel on the head stage. Hand it
+	 * over to the head stage via the fast syscall handler.
+	 *
+	 * Otherwise, if the system call is out of bounds or the
+	 * current thread is shared with a co-kernel, hand the syscall
+	 * over to the latter through the pipeline stages. This
+	 * allows:
+	 *
+	 * - the co-kernel to receive the initial - foreign - syscall
+	 * a thread should send for enabling syscall handling by the
+	 * co-kernel.
+	 *
+	 * - the co-kernel to manipulate the current execution stage
+	 * for handling the request, which includes switching the
+	 * current thread back to the root stage if the syscall is a
+	 * native one, or promoting it to the head stage if handling
+	 * the foreign syscall requires this.
+	 *
+	 * Native syscalls from regular (non-pipeline) threads are
+	 * ignored by this routine, and flow down to the regular
+	 * system call handler.
+	 */
+
+	if (nr >= NR_syscalls && (local_flags & _TIP_HEAD)) {
+		ipipe_fastcall_hook(regs);
+		local_flags = READ_ONCE(ti->ipipe_flags);
+		if (local_flags & _TIP_HEAD) {
+			if (local_flags &  _TIP_MAYDAY)
+				__ipipe_call_mayday(regs);
+			return 1; /* don't pass down, no tail work. */
+		} else {
+			sync_root_irqs();
+			return -1; /* don't pass down, do tail work. */
+		}
+	}
+
+	if ((local_flags & _TIP_NOTIFY) || nr >= NR_syscalls) {
+		ret =__ipipe_notify_syscall(regs);
+		local_flags = READ_ONCE(ti->ipipe_flags);
+		if (local_flags & _TIP_HEAD)
+			return 1; /* don't pass down, no tail work. */
+		if (ret)
+			return -1; /* don't pass down, do tail work. */
+	}
+
+	return 0; /* pass syscall down to the host. */
+}
+
+#ifdef CONFIG_PREEMPT
+
+void preempt_schedule_irq(void);
+
+void __sched __ipipe_preempt_schedule_irq(void)
+{
+	struct ipipe_percpu_domain_data *p;
+	unsigned long flags;
+
+	if (WARN_ON_ONCE(!hard_irqs_disabled()))
+		hard_local_irq_disable();
+
+	local_irq_save(flags);
+	hard_local_irq_enable();
+	preempt_schedule_irq(); /* Ok, may reschedule now. */
+	hard_local_irq_disable();
+
+	/*
+	 * Flush any pending interrupt that may have been logged after
+	 * preempt_schedule_irq() stalled the root stage before
+	 * returning to us, and now.
+	 */
+	p = ipipe_this_cpu_root_context();
+	if (unlikely(__ipipe_ipending_p(p))) {
+		trace_hardirqs_on();
+		__clear_bit(IPIPE_STALL_FLAG, &p->status);
+		__ipipe_sync_stage();
+	}
+
+	__ipipe_restore_root_nosync(flags);
+}
+
+#else /* !CONFIG_PREEMPT */
+
+#define __ipipe_preempt_schedule_irq()	do { } while (0)
+
+#endif	/* !CONFIG_PREEMPT */
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#define root_stall_after_handler()	local_irq_disable()
+#else
+#define root_stall_after_handler()	do { } while (0)
+#endif
+
+/*
+ * __ipipe_do_sync_stage() -- Flush the pending IRQs for the current
+ * domain (and processor). This routine flushes the interrupt log (see
+ * "Optimistic interrupt protection" from D. Stodolsky et al. for more
+ * on the deferred interrupt scheme). Every interrupt that occurred
+ * while the pipeline was stalled gets played.
+ *
+ * WARNING: CPU migration may occur over this routine.
+ */
+void __ipipe_do_sync_stage(void)
+{
+	struct ipipe_percpu_domain_data *p;
+	struct ipipe_domain *ipd;
+	int irq;
+
+	p = __ipipe_current_context;
+respin:
+	ipd = p->domain;
+
+	__set_bit(IPIPE_STALL_FLAG, &p->status);
+	smp_wmb();
+
+	if (ipd == ipipe_root_domain)
+		trace_hardirqs_off();
+
+	for (;;) {
+		irq = __ipipe_next_irq(p);
+		if (irq < 0)
+			break;
+		/*
+		 * Make sure the compiler does not reorder wrongly, so
+		 * that all updates to maps are done before the
+		 * handler gets called.
+		 */
+		barrier();
+
+		if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
+			continue;
+
+		if (ipd != ipipe_head_domain)
+			hard_local_irq_enable();
+
+		if (likely(ipd != ipipe_root_domain)) {
+			ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
+			__ipipe_run_irqtail(irq);
+			hard_local_irq_disable();
+		} else if (ipipe_virtual_irq_p(irq)) {
+			irq_enter();
+			ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
+			irq_exit();
+			root_stall_after_handler();
+			hard_local_irq_disable();
+		} else {
+			ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
+			root_stall_after_handler();
+			hard_local_irq_disable();
+		}
+
+		/*
+		 * We may have migrated to a different CPU (1) upon
+		 * return from the handler, or downgraded from the
+		 * head domain to the root one (2), the opposite way
+		 * is NOT allowed though.
+		 *
+		 * (1) reload the current per-cpu context pointer, so
+		 * that we further pull pending interrupts from the
+		 * proper per-cpu log.
+		 *
+		 * (2) check the stall bit to know whether we may
+		 * dispatch any interrupt pending for the root domain,
+		 * and respin the entire dispatch loop if
+		 * so. Otherwise, immediately return to the caller,
+		 * _without_ affecting the stall state for the root
+		 * domain, since we do not own it at this stage.  This
+		 * case is basically reflecting what may happen in
+		 * dispatch_irq_head() for the fast path.
+		 */
+		p = __ipipe_current_context;
+		if (p->domain != ipd) {
+			IPIPE_BUG_ON(ipd == ipipe_root_domain);
+			if (test_bit(IPIPE_STALL_FLAG, &p->status))
+				return;
+			goto respin;
+		}
+	}
+
+	if (ipd == ipipe_root_domain)
+		trace_hardirqs_on();
+
+	__clear_bit(IPIPE_STALL_FLAG, &p->status);
+}
+
+void __ipipe_call_mayday(struct pt_regs *regs)
+{
+	unsigned long flags;
+
+	ipipe_clear_thread_flag(TIP_MAYDAY);
+	flags = hard_local_irq_save();
+	__ipipe_notify_trap(IPIPE_TRAP_MAYDAY, regs);
+	hard_local_irq_restore(flags);
+}
+
+#ifdef CONFIG_SMP
+
+/* Always called with hw interrupts off. */
+void __ipipe_do_critical_sync(unsigned int irq, void *cookie)
+{
+	int cpu = ipipe_processor_id();
+
+	cpumask_set_cpu(cpu, &__ipipe_cpu_sync_map);
+
+	/*
+	 * Now we are in sync with the lock requestor running on
+	 * another CPU. Enter a spinning wait until he releases the
+	 * global lock.
+	 */
+	raw_spin_lock(&__ipipe_cpu_barrier);
+
+	/* Got it. Now get out. */
+
+	/* Call the sync routine if any. */
+	if (__ipipe_cpu_sync)
+		__ipipe_cpu_sync();
+
+	cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map);
+
+	raw_spin_unlock(&__ipipe_cpu_barrier);
+
+	cpumask_clear_cpu(cpu, &__ipipe_cpu_sync_map);
+}
+#endif	/* CONFIG_SMP */
+
+unsigned long ipipe_critical_enter(void (*syncfn)(void))
+{
+	cpumask_t allbutself __maybe_unused, online __maybe_unused;
+	int cpu __maybe_unused, n __maybe_unused;
+	unsigned long flags, loops __maybe_unused;
+
+	flags = hard_local_irq_save();
+
+	if (num_online_cpus() == 1)
+		return flags;
+
+#ifdef CONFIG_SMP
+
+	cpu = ipipe_processor_id();
+	if (!cpumask_test_and_set_cpu(cpu, &__ipipe_cpu_lock_map)) {
+		while (test_and_set_bit(0, &__ipipe_critical_lock)) {
+			n = 0;
+			hard_local_irq_enable();
+
+			do
+				cpu_relax();
+			while (++n < cpu);
+
+			hard_local_irq_disable();
+		}
+restart:
+		online = *cpu_online_mask;
+		raw_spin_lock(&__ipipe_cpu_barrier);
+
+		__ipipe_cpu_sync = syncfn;
+
+		cpumask_clear(&__ipipe_cpu_pass_map);
+		cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map);
+
+		/*
+		 * Send the sync IPI to all processors but the current
+		 * one.
+		 */
+		cpumask_andnot(&allbutself, &online, &__ipipe_cpu_pass_map);
+		ipipe_send_ipi(IPIPE_CRITICAL_IPI, allbutself);
+		loops = IPIPE_CRITICAL_TIMEOUT;
+
+		while (!cpumask_equal(&__ipipe_cpu_sync_map, &allbutself)) {
+			if (--loops > 0) {
+				cpu_relax();
+				continue;
+			}
+			/*
+			 * We ran into a deadlock due to a contended
+			 * rwlock. Cancel this round and retry.
+			 */
+			__ipipe_cpu_sync = NULL;
+
+			raw_spin_unlock(&__ipipe_cpu_barrier);
+			/*
+			 * Ensure all CPUs consumed the IPI to avoid
+			 * running __ipipe_cpu_sync prematurely. This
+			 * usually resolves the deadlock reason too.
+			 */
+			while (!cpumask_equal(&online, &__ipipe_cpu_pass_map))
+				cpu_relax();
+
+			goto restart;
+		}
+	}
+
+	atomic_inc(&__ipipe_critical_count);
+
+#endif	/* CONFIG_SMP */
+
+	return flags;
+}
+EXPORT_SYMBOL_GPL(ipipe_critical_enter);
+
+void ipipe_critical_exit(unsigned long flags)
+{
+	if (num_online_cpus() == 1) {
+		hard_local_irq_restore(flags);
+		return;
+	}
+
+#ifdef CONFIG_SMP
+	if (atomic_dec_and_test(&__ipipe_critical_count)) {
+		raw_spin_unlock(&__ipipe_cpu_barrier);
+		while (!cpumask_empty(&__ipipe_cpu_sync_map))
+			cpu_relax();
+		cpumask_clear_cpu(ipipe_processor_id(), &__ipipe_cpu_lock_map);
+		clear_bit(0, &__ipipe_critical_lock);
+		smp_mb__after_atomic();
+	}
+#endif /* CONFIG_SMP */
+
+	hard_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ipipe_critical_exit);
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+
+void ipipe_root_only(void)
+{
+	struct ipipe_domain *this_domain;
+	unsigned long flags;
+
+	flags = hard_smp_local_irq_save();
+
+	this_domain = __ipipe_current_domain;
+	if (likely(this_domain == ipipe_root_domain &&
+		   !test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status))) {
+		hard_smp_local_irq_restore(flags);
+		return;
+	}
+
+	if (!__this_cpu_read(ipipe_percpu.context_check)) {
+		hard_smp_local_irq_restore(flags);
+		return;
+	}
+
+	hard_smp_local_irq_restore(flags);
+
+	ipipe_prepare_panic();
+	ipipe_trace_panic_freeze();
+
+	if (this_domain != ipipe_root_domain)
+		pr_err("I-pipe: Detected illicit call from head domain '%s'\n"
+		       "        into a regular Linux service\n",
+		       this_domain->name);
+	else
+		pr_err("I-pipe: Detected stalled head domain, "
+			"probably caused by a bug.\n"
+			"        A critical section may have been "
+			"left unterminated.\n");
+	dump_stack();
+	ipipe_trace_panic_dump();
+}
+EXPORT_SYMBOL(ipipe_root_only);
+
+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
+
+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
+
+int notrace __ipipe_check_percpu_access(void)
+{
+	struct ipipe_percpu_domain_data *p;
+	struct ipipe_domain *this_domain;
+	unsigned long flags;
+	int ret = 0;
+
+	flags = hard_local_irq_save_notrace();
+
+	/*
+	 * Don't use __ipipe_current_domain here, this would recurse
+	 * indefinitely.
+	 */
+	this_domain = raw_cpu_read(ipipe_percpu.curr)->domain;
+
+	/*
+	 * Only the root domain may implement preemptive CPU migration
+	 * of tasks, so anything above in the pipeline should be fine.
+	 */
+	if (this_domain != ipipe_root_domain)
+		goto out;
+
+	if (raw_irqs_disabled_flags(flags))
+		goto out;
+
+	/*
+	 * Last chance: hw interrupts were enabled on entry while
+	 * running over the root domain, but the root stage might be
+	 * currently stalled, in which case preemption would be
+	 * disabled, and no migration could occur.
+	 */
+
+	p = raw_cpu_ptr(&ipipe_percpu.root);
+	if (!preemptible())
+		goto out;
+	/*
+	 * Our caller may end up accessing the wrong per-cpu variable
+	 * instance due to CPU migration; tell it to complain about
+	 * this.
+	 */
+	ret = 1;
+out:
+	hard_local_irq_restore_notrace(flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__ipipe_check_percpu_access);
+
+void __ipipe_spin_unlock_debug(unsigned long flags)
+{
+	/*
+	 * We catch a nasty issue where spin_unlock_irqrestore() on a
+	 * regular kernel spinlock is about to re-enable hw interrupts
+	 * in a section entered with hw irqs off. This is clearly the
+	 * sign of a massive breakage coming. Usual suspect is a
+	 * regular spinlock which was overlooked, used within a
+	 * section which must run with hw irqs disabled.
+	 */
+	IPIPE_WARN_ONCE(!raw_irqs_disabled_flags(flags) && hard_irqs_disabled());
+}
+EXPORT_SYMBOL(__ipipe_spin_unlock_debug);
+
+#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */
+
+void ipipe_prepare_panic(void)
+{
+#ifdef CONFIG_PRINTK
+	__ipipe_printk_bypass = 1;
+#endif
+	ipipe_context_check_off();
+}
+EXPORT_SYMBOL_GPL(ipipe_prepare_panic);
+
+static void __ipipe_do_work(unsigned int virq, void *cookie)
+{
+	struct ipipe_work_header *work;
+	unsigned long flags;
+	void *curr, *tail;
+	int cpu;
+
+	/*
+	 * Work is dispatched in enqueuing order. This interrupt
+	 * context can't migrate to another CPU.
+	 */
+	cpu = smp_processor_id();
+	curr = per_cpu(work_buf, cpu);
+
+	for (;;) {
+		flags = hard_local_irq_save();
+		tail = per_cpu(work_tail, cpu);
+		if (curr == tail) {
+			per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu);
+			hard_local_irq_restore(flags);
+			return;
+		}
+		work = curr;
+		curr += work->size;
+		hard_local_irq_restore(flags);
+		work->handler(work);
+	}
+}
+
+void __ipipe_post_work_root(struct ipipe_work_header *work)
+{
+	unsigned long flags;
+	void *tail;
+	int cpu;
+
+	/*
+	 * Subtle: we want to use the head stall/unstall operators,
+	 * not the hard_* routines to protect against races. This way,
+	 * we ensure that a root-based caller will trigger the virq
+	 * handling immediately when unstalling the head stage, as a
+	 * result of calling __ipipe_sync_pipeline() under the hood.
+	 */
+	flags = ipipe_test_and_stall_head();
+	cpu = ipipe_processor_id();
+	tail = per_cpu(work_tail, cpu);
+
+	if (WARN_ON_ONCE((unsigned char *)tail + work->size >=
+			 per_cpu(work_buf, cpu) + WORKBUF_SIZE))
+		goto out;
+
+	/* Work handling is deferred, so data has to be copied. */
+	memcpy(tail, work, work->size);
+	per_cpu(work_tail, cpu) = tail + work->size;
+	ipipe_post_irq_root(__ipipe_work_virq);
+out:
+	ipipe_restore_head(flags);
+}
+EXPORT_SYMBOL_GPL(__ipipe_post_work_root);
+
+void __weak __ipipe_arch_share_current(int flags)
+{
+}
+
+void __ipipe_share_current(int flags)
+{
+	ipipe_root_only();
+
+	__ipipe_arch_share_current(flags);
+}
+EXPORT_SYMBOL_GPL(__ipipe_share_current);
+
+#ifdef CONFIG_KGDB
+bool __ipipe_probe_access;
+
+long ipipe_probe_kernel_read(void *dst, void *src, size_t size)
+{
+	long ret;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	__ipipe_probe_access = true;
+	barrier();
+	ret = __copy_from_user_inatomic(dst,
+			(__force const void __user *)src, size);
+	barrier();
+	__ipipe_probe_access = false;
+	set_fs(old_fs);
+
+	return ret ? -EFAULT : 0;
+}
+
+long ipipe_probe_kernel_write(void *dst, void *src, size_t size)
+{
+	long ret;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	__ipipe_probe_access = true;
+	barrier();
+	ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
+	barrier();
+	__ipipe_probe_access = false;
+	set_fs(old_fs);
+
+	return ret ? -EFAULT : 0;
+}
+#endif /* CONFIG_KGDB */
+
+#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \
+	defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT)
+void __ipipe_uaccess_might_fault(void)
+{
+	struct ipipe_percpu_domain_data *pdd;
+	struct ipipe_domain *ipd;
+	unsigned long flags;
+       
+	flags = hard_local_irq_save();
+	ipd = __ipipe_current_domain;
+	if (ipd == ipipe_root_domain) {
+		hard_local_irq_restore(flags);
+		might_fault();
+		return;
+	}
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+	pdd = ipipe_this_cpu_context(ipd);
+	WARN_ON_ONCE(hard_irqs_disabled_flags(flags) 
+		     || test_bit(IPIPE_STALL_FLAG, &pdd->status));
+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+	(void)pdd;
+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+	hard_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(__ipipe_uaccess_might_fault);
+#endif
diff --git a/kernel/ipipe/timer.c b/kernel/ipipe/timer.c
new file mode 100644
index 000000000000..0da956413890
--- /dev/null
+++ b/kernel/ipipe/timer.c
@@ -0,0 +1,588 @@
+/* -*- linux-c -*-
+ * linux/kernel/ipipe/timer.c
+ *
+ * Copyright (C) 2012 Gilles Chanteperdrix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * I-pipe timer request interface.
+ */
+#include <linux/ipipe.h>
+#include <linux/percpu.h>
+#include <linux/irqdesc.h>
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <linux/ipipe_tickdev.h>
+#include <linux/interrupt.h>
+#include <linux/export.h>
+
+unsigned long __ipipe_hrtimer_freq;
+
+static LIST_HEAD(timers);
+static IPIPE_DEFINE_SPINLOCK(lock);
+
+static DEFINE_PER_CPU(struct ipipe_timer *, percpu_timer);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+/*
+ * Default request method: switch to oneshot mode if supported.
+ */
+static void ipipe_timer_default_request(struct ipipe_timer *timer, int steal)
+{
+	struct clock_event_device *evtdev = timer->host_timer;
+
+	if (!(evtdev->features & CLOCK_EVT_FEAT_ONESHOT))
+		return;
+
+	if (clockevent_state_oneshot(evtdev) ||
+		clockevent_state_oneshot_stopped(evtdev))
+		timer->orig_mode = CLOCK_EVT_MODE_ONESHOT;
+	else {
+		if (clockevent_state_periodic(evtdev))
+			timer->orig_mode = CLOCK_EVT_MODE_PERIODIC;
+		else if (clockevent_state_shutdown(evtdev))
+			timer->orig_mode = CLOCK_EVT_MODE_SHUTDOWN;
+		else
+			timer->orig_mode = CLOCK_EVT_MODE_UNUSED;
+		evtdev->set_state_oneshot(evtdev);
+		evtdev->set_next_event(timer->freq / HZ, evtdev);
+	}
+}
+
+/*
+ * Default release method: return the timer to the mode it had when
+ * starting.
+ */
+static void ipipe_timer_default_release(struct ipipe_timer *timer)
+{
+	struct clock_event_device *evtdev = timer->host_timer;
+
+	switch (timer->orig_mode) {
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		evtdev->set_state_shutdown(evtdev);
+		break;
+	case CLOCK_EVT_MODE_PERIODIC:
+		evtdev->set_state_periodic(evtdev);
+	case CLOCK_EVT_MODE_ONESHOT:
+		evtdev->set_next_event(timer->freq / HZ, evtdev);
+		break;
+	}
+}
+
+static int get_dev_mode(struct clock_event_device *evtdev)
+{
+	if (clockevent_state_oneshot(evtdev) ||
+		clockevent_state_oneshot_stopped(evtdev))
+		return CLOCK_EVT_MODE_ONESHOT;
+
+	if (clockevent_state_periodic(evtdev))
+		return CLOCK_EVT_MODE_PERIODIC;
+
+	if (clockevent_state_shutdown(evtdev))
+		return CLOCK_EVT_MODE_SHUTDOWN;
+
+	return CLOCK_EVT_MODE_UNUSED;
+}
+
+void ipipe_host_timer_register(struct clock_event_device *evtdev)
+{
+	struct ipipe_timer *timer = evtdev->ipipe_timer;
+
+	if (timer == NULL)
+		return;
+
+	timer->orig_mode = CLOCK_EVT_MODE_UNUSED;
+	
+	if (timer->request == NULL)
+		timer->request = ipipe_timer_default_request;
+
+	/*
+	 * By default, use the same method as linux timer, on ARM at
+	 * least, most set_next_event methods are safe to be called
+	 * from Xenomai domain anyway.
+	 */
+	if (timer->set == NULL) {
+		timer->timer_set = evtdev;
+		timer->set = (typeof(timer->set))evtdev->set_next_event;
+	}
+
+	if (timer->release == NULL)
+		timer->release = ipipe_timer_default_release;
+
+	if (timer->name == NULL)
+		timer->name = evtdev->name;
+
+	if (timer->rating == 0)
+		timer->rating = evtdev->rating;
+
+	timer->freq = (1000000000ULL * evtdev->mult) >> evtdev->shift;
+
+	if (timer->min_delay_ticks == 0)
+		timer->min_delay_ticks =
+			(evtdev->min_delta_ns * evtdev->mult) >> evtdev->shift;
+
+	if (timer->cpumask == NULL)
+		timer->cpumask = evtdev->cpumask;
+
+	timer->host_timer = evtdev;
+
+	ipipe_timer_register(timer);
+}
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
+/*
+ * register a timer: maintain them in a list sorted by rating
+ */
+void ipipe_timer_register(struct ipipe_timer *timer)
+{
+	struct ipipe_timer *t;
+	unsigned long flags;
+
+	if (timer->timer_set == NULL)
+		timer->timer_set = timer;
+
+	if (timer->cpumask == NULL)
+		timer->cpumask = cpumask_of(smp_processor_id());
+
+	raw_spin_lock_irqsave(&lock, flags);
+
+	list_for_each_entry(t, &timers, link) {
+		if (t->rating <= timer->rating) {
+			__list_add(&timer->link, t->link.prev, &t->link);
+			goto done;
+		}
+	}
+	list_add_tail(&timer->link, &timers);
+  done:
+	raw_spin_unlock_irqrestore(&lock, flags);
+}
+
+static void ipipe_timer_request_sync(void)
+{
+	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+	struct clock_event_device *evtdev;
+	int steal;
+
+	if (!timer)
+		return;
+
+	evtdev = timer->host_timer;
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+	steal = evtdev != NULL && !clockevent_state_detached(evtdev);
+#else /* !CONFIG_GENERIC_CLOCKEVENTS */
+	steal = 1;
+#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
+
+	timer->request(timer, steal);
+}
+
+static void config_pcpu_timer(struct ipipe_timer *t, unsigned hrclock_freq)
+{
+	unsigned long long tmp;
+	unsigned hrtimer_freq;
+
+	if (__ipipe_hrtimer_freq != t->freq)
+		__ipipe_hrtimer_freq = t->freq;
+
+	hrtimer_freq = t->freq;
+	if (__ipipe_hrclock_freq > UINT_MAX)
+		hrtimer_freq /= 1000;
+
+	t->c2t_integ = hrtimer_freq / hrclock_freq;
+	tmp = (((unsigned long long)
+		(hrtimer_freq % hrclock_freq)) << 32)
+		+ hrclock_freq - 1;
+	do_div(tmp, hrclock_freq);
+	t->c2t_frac = tmp;
+}
+
+/* Set up a timer as per-cpu timer for ipipe */
+static void install_pcpu_timer(unsigned cpu, unsigned hrclock_freq,
+			      struct ipipe_timer *t)
+{
+	per_cpu(ipipe_percpu.hrtimer_irq, cpu) = t->irq;
+	per_cpu(percpu_timer, cpu) = t;
+	config_pcpu_timer(t, hrclock_freq);
+}
+
+static void select_root_only_timer(unsigned cpu, unsigned hrclock_khz,
+				   const struct cpumask *mask,
+				   struct ipipe_timer *t) {
+	unsigned icpu;
+	struct clock_event_device *evtdev;
+
+	/*
+	 * If no ipipe-supported CPU shares an interrupt with the
+	 * timer, we do not need to care about it.
+	 */
+	for_each_cpu(icpu, mask) {
+		if (t->irq == per_cpu(ipipe_percpu.hrtimer_irq, icpu)) {
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+			evtdev = t->host_timer;
+			if (evtdev && clockevent_state_shutdown(evtdev))
+				continue;
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+			goto found;
+		}
+	}
+
+	return;
+
+found:
+	install_pcpu_timer(cpu, hrclock_khz, t);
+}
+
+/*
+ * Choose per-cpu timers with the highest rating by traversing the
+ * rating-sorted list for each CPU.
+ */
+int ipipe_select_timers(const struct cpumask *mask)
+{
+	unsigned hrclock_freq;
+	unsigned long long tmp;
+	struct ipipe_timer *t;
+	struct clock_event_device *evtdev;
+	unsigned long flags;
+	unsigned cpu;
+	cpumask_t fixup;
+
+	if (!__ipipe_hrclock_ok()) {
+		printk("I-pipe: high-resolution clock not working\n");
+		return -ENODEV;
+	}
+
+	if (__ipipe_hrclock_freq > UINT_MAX) {
+		tmp = __ipipe_hrclock_freq;
+		do_div(tmp, 1000);
+		hrclock_freq = tmp;
+	} else
+		hrclock_freq = __ipipe_hrclock_freq;
+
+	raw_spin_lock_irqsave(&lock, flags);
+
+	/* First, choose timers for the CPUs handled by ipipe */
+	for_each_cpu(cpu, mask) {
+		list_for_each_entry(t, &timers, link) {
+			if (!cpumask_test_cpu(cpu, t->cpumask))
+				continue;
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+			evtdev = t->host_timer;
+			if (evtdev && clockevent_state_shutdown(evtdev))
+				continue;
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+			goto found;
+		}
+
+		printk("I-pipe: could not find timer for cpu #%d\n",
+		       cpu);
+		goto err_remove_all;
+found:
+		install_pcpu_timer(cpu, hrclock_freq, t);
+	}
+
+	/*
+	 * Second, check if we need to fix up any CPUs not supported
+	 * by ipipe (but by Linux) whose interrupt may need to be
+	 * forwarded because they have the same IRQ as an ipipe-enabled
+	 * timer.
+	 */
+	cpumask_andnot(&fixup, cpu_online_mask, mask);
+
+	for_each_cpu(cpu, &fixup) {
+		list_for_each_entry(t, &timers, link) {
+			if (!cpumask_test_cpu(cpu, t->cpumask))
+				continue;
+
+			select_root_only_timer(cpu, hrclock_freq, mask, t);
+		}
+	}
+
+	raw_spin_unlock_irqrestore(&lock, flags);
+
+	flags = ipipe_critical_enter(ipipe_timer_request_sync);
+	ipipe_timer_request_sync();
+	ipipe_critical_exit(flags);
+
+	return 0;
+
+err_remove_all:
+	raw_spin_unlock_irqrestore(&lock, flags);
+
+	for_each_cpu(cpu, mask) {
+		per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1;
+		per_cpu(percpu_timer, cpu) = NULL;
+	}
+	__ipipe_hrtimer_freq = 0;
+
+	return -ENODEV;
+}
+
+static void ipipe_timer_release_sync(void)
+{
+	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+
+	if (timer)
+		timer->release(timer);
+}
+
+void ipipe_timers_release(void)
+{
+	unsigned long flags;
+	unsigned cpu;
+
+	flags = ipipe_critical_enter(ipipe_timer_release_sync);
+	ipipe_timer_release_sync();
+	ipipe_critical_exit(flags);
+
+	for_each_online_cpu(cpu) {
+		per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1;
+		per_cpu(percpu_timer, cpu) = NULL;
+		__ipipe_hrtimer_freq = 0;
+	}
+}
+
+static void __ipipe_ack_hrtimer_irq(struct irq_desc *desc)
+{
+	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+
+	if (desc)
+		desc->ipipe_ack(desc);
+	if (timer->ack)
+		timer->ack();
+	if (desc)
+		desc->ipipe_end(desc);
+}
+
+static int do_set_oneshot(struct clock_event_device *cdev)
+{
+	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+
+	timer->mode_handler(CLOCK_EVT_MODE_ONESHOT, cdev);
+
+	return 0;
+}
+
+static int do_set_periodic(struct clock_event_device *cdev)
+{
+	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+
+	timer->mode_handler(CLOCK_EVT_MODE_PERIODIC, cdev);
+
+	return 0;
+}
+
+static int do_set_shutdown(struct clock_event_device *cdev)
+{
+	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+
+	timer->mode_handler(CLOCK_EVT_MODE_SHUTDOWN, cdev);
+
+	return 0;
+}
+
+int ipipe_timer_start(void (*tick_handler)(void),
+		      void (*emumode)(enum clock_event_mode mode,
+				      struct clock_event_device *cdev),
+		      int (*emutick)(unsigned long evt,
+				     struct clock_event_device *cdev),
+		      unsigned cpu)
+{
+	struct clock_event_device *evtdev;
+	struct ipipe_timer *timer;
+	struct irq_desc *desc;
+	unsigned long flags;
+	int steal, ret;
+
+	timer = per_cpu(percpu_timer, cpu);
+	evtdev = timer->host_timer;
+
+	flags = ipipe_critical_enter(NULL);
+
+	ret = ipipe_request_irq(ipipe_head_domain, timer->irq,
+				(ipipe_irq_handler_t)tick_handler,
+				NULL, __ipipe_ack_hrtimer_irq);
+	if (ret < 0 && ret != -EBUSY) {
+		ipipe_critical_exit(flags);
+		return ret;
+	}
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+	steal = evtdev != NULL && !clockevent_state_detached(evtdev);
+	if (steal && evtdev->ipipe_stolen == 0) {
+		timer->real_mult = evtdev->mult;
+		timer->real_shift = evtdev->shift;
+		timer->orig_set_state_periodic = evtdev->set_state_periodic;
+		timer->orig_set_state_oneshot = evtdev->set_state_oneshot;
+		timer->orig_set_state_oneshot_stopped = evtdev->set_state_oneshot_stopped;
+		timer->orig_set_state_shutdown = evtdev->set_state_shutdown;
+		timer->orig_set_next_event = evtdev->set_next_event;
+		timer->mode_handler = emumode;
+		evtdev->mult = 1;
+		evtdev->shift = 0;
+		evtdev->max_delta_ns = UINT_MAX;
+		evtdev->set_state_periodic = do_set_periodic;
+		evtdev->set_state_oneshot = do_set_oneshot;
+		evtdev->set_state_oneshot_stopped = do_set_oneshot;
+		evtdev->set_state_shutdown = do_set_shutdown;
+		evtdev->set_next_event = emutick;
+		evtdev->ipipe_stolen = 1;
+	}
+
+	ret = get_dev_mode(evtdev);
+#else /* CONFIG_GENERIC_CLOCKEVENTS */
+	steal = 1;
+	ret = 0;
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
+	ipipe_critical_exit(flags);
+
+	desc = irq_to_desc(timer->irq);
+	if (desc && irqd_irq_disabled(&desc->irq_data))
+		ipipe_enable_irq(timer->irq);
+
+	return ret;
+}
+
+void ipipe_timer_stop(unsigned cpu)
+{
+	unsigned long __maybe_unused flags;
+	struct clock_event_device *evtdev;
+	struct ipipe_timer *timer;
+	struct irq_desc *desc;
+
+	timer = per_cpu(percpu_timer, cpu);
+	evtdev = timer->host_timer;
+
+	desc = irq_to_desc(timer->irq);
+	if (desc && irqd_irq_disabled(&desc->irq_data))
+		ipipe_disable_irq(timer->irq);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+	if (evtdev) {
+		flags = ipipe_critical_enter(NULL);
+
+		if (evtdev->ipipe_stolen) {
+			evtdev->mult = timer->real_mult;
+			evtdev->shift = timer->real_shift;
+			evtdev->set_state_periodic = timer->orig_set_state_periodic;
+			evtdev->set_state_oneshot = timer->orig_set_state_oneshot;
+			evtdev->set_state_oneshot_stopped = timer->orig_set_state_oneshot_stopped;
+			evtdev->set_state_shutdown = timer->orig_set_state_shutdown;
+			evtdev->set_next_event = timer->orig_set_next_event;
+			evtdev->ipipe_stolen = 0;
+		}
+
+		ipipe_critical_exit(flags);
+	}
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
+	ipipe_free_irq(ipipe_head_domain, timer->irq);
+}
+
+void ipipe_timer_set(unsigned long cdelay)
+{
+	unsigned long tdelay;
+	struct ipipe_timer *t;
+
+	t = __ipipe_raw_cpu_read(percpu_timer);
+
+	/*
+	 * Even though some architectures may use a 64 bits delay
+	 * here, we voluntarily limit to 32 bits, 4 billions ticks
+	 * should be enough for now. Would a timer needs more, an
+	 * extra call to the tick handler would simply occur after 4
+	 * billions ticks.
+	 */
+	if (cdelay > UINT_MAX)
+		cdelay = UINT_MAX;
+
+	tdelay = cdelay;
+	if (t->c2t_integ != 1)
+		tdelay *= t->c2t_integ;
+	if (t->c2t_frac)
+		tdelay += ((unsigned long long)cdelay * t->c2t_frac) >> 32;
+	if (tdelay < t->min_delay_ticks)
+		tdelay = t->min_delay_ticks;
+
+	if (t->set(tdelay, t->timer_set) < 0)
+		ipipe_raise_irq(t->irq);
+}
+EXPORT_SYMBOL_GPL(ipipe_timer_set);
+
+const char *ipipe_timer_name(void)
+{
+	return per_cpu(percpu_timer, 0)->name;
+}
+EXPORT_SYMBOL_GPL(ipipe_timer_name);
+
+unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns)
+{
+	unsigned long long tmp;
+	BUG_ON(!timer->freq);
+	tmp = (unsigned long long)ns * timer->freq;
+	do_div(tmp, 1000000000);
+	return tmp;
+}
+
+#ifdef CONFIG_IPIPE_HAVE_HOSTRT
+/*
+ * NOTE: The architecture specific code must only call this function
+ * when a clocksource suitable for CLOCK_HOST_REALTIME is enabled.
+ * The event receiver is responsible for providing proper locking.
+ */
+void ipipe_update_hostrt(struct timekeeper *tk)
+{
+	struct tk_read_base *tkr = &tk->tkr_mono;
+	struct clocksource *clock = tkr->clock;
+	struct ipipe_hostrt_data data;
+	struct timespec xt;
+
+	xt.tv_sec = tk->xtime_sec;
+	xt.tv_nsec = (long)(tkr->xtime_nsec >> tkr->shift);
+	ipipe_root_only();
+	data.live = 1;
+	data.cycle_last = tkr->cycle_last;
+	data.mask = clock->mask;
+	data.mult = tkr->mult;
+	data.shift = tkr->shift;
+	data.wall_time_sec = xt.tv_sec;
+	data.wall_time_nsec = xt.tv_nsec;
+	data.wall_to_monotonic.tv_sec = tk->wall_to_monotonic.tv_sec;
+	data.wall_to_monotonic.tv_nsec = tk->wall_to_monotonic.tv_nsec;
+	__ipipe_notify_kevent(IPIPE_KEVT_HOSTRT, &data);
+}
+
+#endif /* CONFIG_IPIPE_HAVE_HOSTRT */
+
+int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
+			      bool force);
+
+void __ipipe_timer_refresh_freq(unsigned int hrclock_freq)
+{
+	struct ipipe_timer *t = __ipipe_raw_cpu_read(percpu_timer);
+	unsigned long flags;
+
+	if (t && t->refresh_freq) {
+		t->freq = t->refresh_freq();
+		flags = hard_local_irq_save();
+		config_pcpu_timer(t, hrclock_freq);
+		hard_local_irq_restore(flags);
+		clockevents_program_event(t->host_timer,
+					  t->host_timer->next_event, false);
+	}
+}
diff --git a/kernel/ipipe/tracer.c b/kernel/ipipe/tracer.c
new file mode 100644
index 000000000000..ed4e73ae164a
--- /dev/null
+++ b/kernel/ipipe/tracer.c
@@ -0,0 +1,1486 @@
+/* -*- linux-c -*-
+ * kernel/ipipe/tracer.c
+ *
+ * Copyright (C) 2005 Luotao Fu.
+ *		 2005-2008 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/vmalloc.h>
+#include <linux/pid.h>
+#include <linux/vermagic.h>
+#include <linux/sched.h>
+#include <linux/ipipe.h>
+#include <linux/ftrace.h>
+#include <asm/uaccess.h>
+
+#define IPIPE_TRACE_PATHS	    4 /* <!> Do not lower below 3 */
+#define IPIPE_DEFAULT_ACTIVE	    0
+#define IPIPE_DEFAULT_MAX	    1
+#define IPIPE_DEFAULT_FROZEN	    2
+
+#define IPIPE_TRACE_POINTS	    (1 << CONFIG_IPIPE_TRACE_SHIFT)
+#define WRAP_POINT_NO(point)	    ((point) & (IPIPE_TRACE_POINTS-1))
+
+#define IPIPE_DEFAULT_PRE_TRACE	    10
+#define IPIPE_DEFAULT_POST_TRACE    10
+#define IPIPE_DEFAULT_BACK_TRACE    100
+
+#define IPIPE_DELAY_NOTE	    1000  /* in nanoseconds */
+#define IPIPE_DELAY_WARN	    10000 /* in nanoseconds */
+
+#define IPIPE_TFLG_NMI_LOCK	    0x0001
+#define IPIPE_TFLG_NMI_HIT	    0x0002
+#define IPIPE_TFLG_NMI_FREEZE_REQ   0x0004
+
+#define IPIPE_TFLG_HWIRQ_OFF	    0x0100
+#define IPIPE_TFLG_FREEZING	    0x0200
+#define IPIPE_TFLG_CURRDOM_SHIFT    10	 /* bits 10..11: current domain */
+#define IPIPE_TFLG_CURRDOM_MASK	    0x0C00
+#define IPIPE_TFLG_DOMSTATE_SHIFT   12	 /* bits 12..15: domain stalled? */
+#define IPIPE_TFLG_DOMSTATE_BITS    1
+
+#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \
+	(point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT)))
+#define IPIPE_TFLG_CURRENT_DOMAIN(point) \
+	((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT)
+
+struct ipipe_trace_point {
+	short type;
+	short flags;
+	unsigned long eip;
+	unsigned long parent_eip;
+	unsigned long v;
+	unsigned long long timestamp;
+};
+
+struct ipipe_trace_path {
+	volatile int flags;
+	int dump_lock; /* separated from flags due to cross-cpu access */
+	int trace_pos; /* next point to fill */
+	int begin, end; /* finalised path begin and end */
+	int post_trace; /* non-zero when in post-trace phase */
+	unsigned long long length; /* max path length in cycles */
+	unsigned long nmi_saved_eip; /* for deferred requests from NMIs */
+	unsigned long nmi_saved_parent_eip;
+	unsigned long nmi_saved_v;
+	struct ipipe_trace_point point[IPIPE_TRACE_POINTS];
+} ____cacheline_aligned_in_smp;
+
+enum ipipe_trace_type
+{
+	IPIPE_TRACE_FUNC = 0,
+	IPIPE_TRACE_BEGIN,
+	IPIPE_TRACE_END,
+	IPIPE_TRACE_FREEZE,
+	IPIPE_TRACE_SPECIAL,
+	IPIPE_TRACE_PID,
+	IPIPE_TRACE_EVENT,
+};
+
+#define IPIPE_TYPE_MASK		    0x0007
+#define IPIPE_TYPE_BITS		    3
+
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path);
+#else /* !CONFIG_IPIPE_TRACE_VMALLOC */
+static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) =
+	{ [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } };
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+
+int ipipe_trace_enable = 0;
+
+static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE };
+static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX };
+static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN };
+static IPIPE_DEFINE_SPINLOCK(global_path_lock);
+static int pre_trace = IPIPE_DEFAULT_PRE_TRACE;
+static int post_trace = IPIPE_DEFAULT_POST_TRACE;
+static int back_trace = IPIPE_DEFAULT_BACK_TRACE;
+static int verbose_trace = 1;
+static unsigned long trace_overhead;
+
+static unsigned long trigger_begin;
+static unsigned long trigger_end;
+
+static DEFINE_MUTEX(out_mutex);
+static struct ipipe_trace_path *print_path;
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+static struct ipipe_trace_path *panic_path;
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+static int print_pre_trace;
+static int print_post_trace;
+
+
+static long __ipipe_signed_tsc2us(long long tsc);
+static void
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point);
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip);
+
+static inline void store_states(struct ipipe_domain *ipd,
+				struct ipipe_trace_point *point, int pos)
+{
+	if (test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(ipd)->status))
+		point->flags |= 1 << (pos + IPIPE_TFLG_DOMSTATE_SHIFT);
+
+	if (ipd == __ipipe_current_domain)
+		point->flags |= pos << IPIPE_TFLG_CURRDOM_SHIFT;
+}
+
+static notrace void
+__ipipe_store_domain_states(struct ipipe_trace_point *point)
+{
+	store_states(ipipe_root_domain, point, 0);
+	if (ipipe_head_domain != ipipe_root_domain)
+		store_states(ipipe_head_domain, point, 1);
+}
+
+static notrace int __ipipe_get_free_trace_path(int old, int cpu)
+{
+	int new_active = old;
+	struct ipipe_trace_path *tp;
+
+	do {
+		if (++new_active == IPIPE_TRACE_PATHS)
+			new_active = 0;
+		tp = &per_cpu(trace_path, cpu)[new_active];
+	} while (new_active == per_cpu(max_path, cpu) ||
+		 new_active == per_cpu(frozen_path, cpu) ||
+		 tp->dump_lock);
+
+	return new_active;
+}
+
+static notrace void
+__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp,
+			  struct ipipe_trace_path *old_tp, int old_pos)
+{
+	int i;
+
+	new_tp->trace_pos = pre_trace+1;
+
+	for (i = new_tp->trace_pos; i > 0; i--)
+		memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)],
+		       &old_tp->point[WRAP_POINT_NO(old_pos-i)],
+		       sizeof(struct ipipe_trace_point));
+
+	/* mark the end (i.e. the point before point[0]) invalid */
+	new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0;
+}
+
+static notrace struct ipipe_trace_path *
+__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos)
+{
+	struct ipipe_trace_path *old_tp = tp;
+	long active = per_cpu(active_path, cpu);
+	unsigned long long length;
+
+	/* do we have a new worst case? */
+	length = tp->point[tp->end].timestamp -
+		 tp->point[tp->begin].timestamp;
+	if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) {
+		/* we need protection here against other cpus trying
+		   to start a proc dump */
+		raw_spin_lock(&global_path_lock);
+
+		/* active path holds new worst case */
+		tp->length = length;
+		per_cpu(max_path, cpu) = active;
+
+		/* find next unused trace path */
+		active = __ipipe_get_free_trace_path(active, cpu);
+
+		raw_spin_unlock(&global_path_lock);
+
+		tp = &per_cpu(trace_path, cpu)[active];
+
+		/* migrate last entries for pre-tracing */
+		__ipipe_migrate_pre_trace(tp, old_tp, pos);
+	}
+
+	return tp;
+}
+
+static notrace struct ipipe_trace_path *
+__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos)
+{
+	struct ipipe_trace_path *old_tp = tp;
+	long active = per_cpu(active_path, cpu);
+	int n;
+
+	/* frozen paths have no core (begin=end) */
+	tp->begin = tp->end;
+
+	/* we need protection here against other cpus trying
+	 * to set their frozen path or to start a proc dump */
+	raw_spin_lock(&global_path_lock);
+
+	per_cpu(frozen_path, cpu) = active;
+
+	/* find next unused trace path */
+	active = __ipipe_get_free_trace_path(active, cpu);
+
+	/* check if this is the first frozen path */
+	for_each_possible_cpu(n) {
+		if (n != cpu &&
+		    per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0)
+			tp->end = -1;
+	}
+
+	raw_spin_unlock(&global_path_lock);
+
+	tp = &per_cpu(trace_path, cpu)[active];
+
+	/* migrate last entries for pre-tracing */
+	__ipipe_migrate_pre_trace(tp, old_tp, pos);
+
+	return tp;
+}
+
+void notrace
+__ipipe_trace(enum ipipe_trace_type type, unsigned long eip,
+	      unsigned long parent_eip, unsigned long v)
+{
+	struct ipipe_trace_path *tp, *old_tp;
+	int pos, next_pos, begin;
+	struct ipipe_trace_point *point;
+	unsigned long flags;
+	int cpu;
+
+	flags = hard_local_irq_save_notrace();
+
+	cpu = ipipe_processor_id();
+ restart:
+	tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+	/* here starts a race window with NMIs - catched below */
+
+	/* check for NMI recursion */
+	if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) {
+		tp->flags |= IPIPE_TFLG_NMI_HIT;
+
+		/* first freeze request from NMI context? */
+		if ((type == IPIPE_TRACE_FREEZE) &&
+		    !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) {
+			/* save arguments and mark deferred freezing */
+			tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ;
+			tp->nmi_saved_eip = eip;
+			tp->nmi_saved_parent_eip = parent_eip;
+			tp->nmi_saved_v = v;
+		}
+		return; /* no need for restoring flags inside IRQ */
+	}
+
+	/* clear NMI events and set lock (atomically per cpu) */
+	tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
+				   IPIPE_TFLG_NMI_FREEZE_REQ))
+			       | IPIPE_TFLG_NMI_LOCK;
+
+	/* check active_path again - some nasty NMI may have switched
+	 * it meanwhile */
+	if (unlikely(tp !=
+		     &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) {
+		/* release lock on wrong path and restart */
+		tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+		/* there is no chance that the NMI got deferred
+		 * => no need to check for pending freeze requests */
+		goto restart;
+	}
+
+	/* get the point buffer */
+	pos = tp->trace_pos;
+	point = &tp->point[pos];
+
+	/* store all trace point data */
+	point->type = type;
+	point->flags = hard_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0;
+	point->eip = eip;
+	point->parent_eip = parent_eip;
+	point->v = v;
+	ipipe_read_tsc(point->timestamp);
+
+	__ipipe_store_domain_states(point);
+
+	/* forward to next point buffer */
+	next_pos = WRAP_POINT_NO(pos+1);
+	tp->trace_pos = next_pos;
+
+	/* only mark beginning if we haven't started yet */
+	begin = tp->begin;
+	if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0))
+		tp->begin = pos;
+
+	/* end of critical path, start post-trace if not already started */
+	if (unlikely(type == IPIPE_TRACE_END) &&
+	    (begin >= 0) && !tp->post_trace)
+		tp->post_trace = post_trace + 1;
+
+	/* freeze only if the slot is free and we are not already freezing */
+	if ((unlikely(type == IPIPE_TRACE_FREEZE) ||
+	     (unlikely(eip >= trigger_begin && eip <= trigger_end) &&
+	     type == IPIPE_TRACE_FUNC)) &&
+	    per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 &&
+	    !(tp->flags & IPIPE_TFLG_FREEZING)) {
+		tp->post_trace = post_trace + 1;
+		tp->flags |= IPIPE_TFLG_FREEZING;
+	}
+
+	/* enforce end of trace in case of overflow */
+	if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) {
+		tp->end = pos;
+		goto enforce_end;
+	}
+
+	/* stop tracing this path if we are in post-trace and
+	 *  a) that phase is over now or
+	 *  b) a new TRACE_BEGIN came in but we are not freezing this path */
+	if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) ||
+		     ((type == IPIPE_TRACE_BEGIN) &&
+		      !(tp->flags & IPIPE_TFLG_FREEZING))))) {
+		/* store the path's end (i.e. excluding post-trace) */
+		tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace);
+
+ enforce_end:
+		if (tp->flags & IPIPE_TFLG_FREEZING)
+			tp = __ipipe_trace_freeze(cpu, tp, pos);
+		else
+			tp = __ipipe_trace_end(cpu, tp, pos);
+
+		/* reset the active path, maybe already start a new one */
+		tp->begin = (type == IPIPE_TRACE_BEGIN) ?
+			WRAP_POINT_NO(tp->trace_pos - 1) : -1;
+		tp->end = -1;
+		tp->post_trace = 0;
+		tp->flags = 0;
+
+		/* update active_path not earlier to avoid races with NMIs */
+		per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu);
+	}
+
+	/* we still have old_tp and point,
+	 * let's reset NMI lock and check for catches */
+	old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+	if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) {
+		/* well, this late tagging may not immediately be visible for
+		 * other cpus already dumping this path - a minor issue */
+		point->flags |= IPIPE_TFLG_NMI_HIT;
+
+		/* handle deferred freezing from NMI context */
+		if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
+			__ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip,
+				      old_tp->nmi_saved_parent_eip,
+				      old_tp->nmi_saved_v);
+	}
+
+	hard_local_irq_restore_notrace(flags);
+}
+
+static unsigned long __ipipe_global_path_lock(void)
+{
+	unsigned long flags;
+	int cpu;
+	struct ipipe_trace_path *tp;
+
+	raw_spin_lock_irqsave(&global_path_lock, flags);
+
+	cpu = ipipe_processor_id();
+ restart:
+	tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+	/* here is small race window with NMIs - catched below */
+
+	/* clear NMI events and set lock (atomically per cpu) */
+	tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
+				   IPIPE_TFLG_NMI_FREEZE_REQ))
+			       | IPIPE_TFLG_NMI_LOCK;
+
+	/* check active_path again - some nasty NMI may have switched
+	 * it meanwhile */
+	if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) {
+		/* release lock on wrong path and restart */
+		tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+		/* there is no chance that the NMI got deferred
+		 * => no need to check for pending freeze requests */
+		goto restart;
+	}
+
+	return flags;
+}
+
+static void __ipipe_global_path_unlock(unsigned long flags)
+{
+	int cpu;
+	struct ipipe_trace_path *tp;
+
+	/* release spinlock first - it's not involved in the NMI issue */
+	__ipipe_spin_unlock_irqbegin(&global_path_lock);
+
+	cpu = ipipe_processor_id();
+	tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+	tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+	/* handle deferred freezing from NMI context */
+	if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
+		__ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip,
+			      tp->nmi_saved_parent_eip, tp->nmi_saved_v);
+
+	/* See __ipipe_spin_lock_irqsave() and friends. */
+	__ipipe_spin_unlock_irqcomplete(flags);
+}
+
+void notrace asmlinkage
+ipipe_trace_asm(enum ipipe_trace_type type, unsigned long eip,
+		unsigned long parent_eip, unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(type, eip, parent_eip, v);
+}
+
+void notrace ipipe_trace_begin(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0,
+		      __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_begin);
+
+void notrace ipipe_trace_end(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0,
+		      __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_end);
+
+void notrace ipipe_trace_irqbegin(int irq, struct pt_regs *regs)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_BEGIN, instruction_pointer(regs),
+		      __BUILTIN_RETURN_ADDRESS1, irq);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_irqbegin);
+
+void notrace ipipe_trace_irqend(int irq, struct pt_regs *regs)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_END, instruction_pointer(regs),
+		      __BUILTIN_RETURN_ADDRESS1, irq);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_irqend);
+
+void notrace ipipe_trace_freeze(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0,
+		      __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_freeze);
+
+void notrace ipipe_trace_special(unsigned char id, unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS),
+		      __BUILTIN_RETURN_ADDRESS0,
+		      __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_special);
+
+void notrace ipipe_trace_pid(pid_t pid, short prio)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS),
+		      __BUILTIN_RETURN_ADDRESS0,
+		      __BUILTIN_RETURN_ADDRESS1, pid);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_pid);
+
+void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS),
+		      __BUILTIN_RETURN_ADDRESS0,
+		      __BUILTIN_RETURN_ADDRESS1, delay_tsc);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_event);
+
+int ipipe_trace_max_reset(void)
+{
+	int cpu;
+	unsigned long flags;
+	struct ipipe_trace_path *path;
+	int ret = 0;
+
+	flags = __ipipe_global_path_lock();
+
+	for_each_possible_cpu(cpu) {
+		path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
+
+		if (path->dump_lock) {
+			ret = -EBUSY;
+			break;
+		}
+
+		path->begin	= -1;
+		path->end	= -1;
+		path->trace_pos = 0;
+		path->length	= 0;
+	}
+
+	__ipipe_global_path_unlock(flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_max_reset);
+
+int ipipe_trace_frozen_reset(void)
+{
+	int cpu;
+	unsigned long flags;
+	struct ipipe_trace_path *path;
+	int ret = 0;
+
+	flags = __ipipe_global_path_lock();
+
+	for_each_online_cpu(cpu) {
+		path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
+
+		if (path->dump_lock) {
+			ret = -EBUSY;
+			break;
+		}
+
+		path->begin = -1;
+		path->end = -1;
+		path->trace_pos = 0;
+		path->length	= 0;
+	}
+
+	__ipipe_global_path_unlock(flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_frozen_reset);
+
+static void
+__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point,
+		      int trylock)
+{
+	struct task_struct *task = NULL;
+	char buf[8];
+	int i;
+	int locked = 1;
+
+	if (trylock) {
+		if (!read_trylock(&tasklist_lock))
+			locked = 0;
+	} else
+		read_lock(&tasklist_lock);
+
+	if (locked)
+		task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns);
+
+	if (task)
+		strncpy(task_info, task->comm, 11);
+	else
+		strcpy(task_info, "-<?>-");
+
+	if (locked)
+		read_unlock(&tasklist_lock);
+
+	for (i = strlen(task_info); i < 11; i++)
+		task_info[i] = ' ';
+
+	sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS);
+	strcpy(task_info + (11 - strlen(buf)), buf);
+}
+
+static void
+__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path,
+		       struct ipipe_trace_point *point)
+{
+	long time;
+	int type;
+
+	time = __ipipe_signed_tsc2us(point->timestamp -
+				     path->point[path->begin].timestamp + point->v);
+	type = point->type >> IPIPE_TYPE_BITS;
+
+	if (type == 0)
+		/*
+		 * Event type #0 is predefined, stands for the next
+		 * timer tick.
+		 */
+		sprintf(buf, "tick@%-6ld", time);
+	else
+		sprintf(buf, "%3d@%-7ld", type, time);
+}
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+
+void ipipe_trace_panic_freeze(void)
+{
+	unsigned long flags;
+	int cpu;
+
+	if (!ipipe_trace_enable)
+		return;
+
+	ipipe_trace_enable = 0;
+	flags = hard_local_irq_save_notrace();
+
+	cpu = ipipe_processor_id();
+
+	panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+	hard_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_panic_freeze);
+
+void ipipe_trace_panic_dump(void)
+{
+	int cnt = back_trace;
+	int start, pos;
+	char buf[16];
+
+	if (!panic_path)
+		return;
+
+	ipipe_context_check_off();
+
+	printk("I-pipe tracer log (%d points):\n", cnt);
+
+	start = pos = WRAP_POINT_NO(panic_path->trace_pos-1);
+
+	while (cnt-- > 0) {
+		struct ipipe_trace_point *point = &panic_path->point[pos];
+		long time;
+		char info[16];
+		int i;
+
+		printk(" %c",
+		       (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
+
+		for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
+			printk("%c",
+			       (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
+				(IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+					'#' : '+') :
+				(IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+					'*' : ' '));
+
+		if (!point->eip)
+			printk("-<invalid>-\n");
+		else {
+			__ipipe_trace_point_type(buf, point);
+			printk("%s", buf);
+
+			switch (point->type & IPIPE_TYPE_MASK) {
+				case IPIPE_TRACE_FUNC:
+					printk("           ");
+					break;
+
+				case IPIPE_TRACE_PID:
+					__ipipe_get_task_info(info,
+							      point, 1);
+					printk("%s", info);
+					break;
+
+				case IPIPE_TRACE_EVENT:
+					__ipipe_get_event_date(info,
+							       panic_path, point);
+					printk("%s", info);
+					break;
+
+				default:
+					printk("0x%08lx ", point->v);
+			}
+
+			time = __ipipe_signed_tsc2us(point->timestamp -
+				panic_path->point[start].timestamp);
+			printk(" %5ld ", time);
+
+			__ipipe_print_symname(NULL, point->eip);
+			printk(" (");
+			__ipipe_print_symname(NULL, point->parent_eip);
+			printk(")\n");
+		}
+		pos = WRAP_POINT_NO(pos - 1);
+	}
+
+	panic_path = NULL;
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_panic_dump);
+
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+
+
+/* --- /proc output --- */
+
+static notrace int __ipipe_in_critical_trpath(long point_no)
+{
+	return ((WRAP_POINT_NO(point_no-print_path->begin) <
+		 WRAP_POINT_NO(print_path->end-print_path->begin)) ||
+		((print_path->end == print_path->begin) &&
+		 (WRAP_POINT_NO(point_no-print_path->end) >
+		  print_post_trace)));
+}
+
+static long __ipipe_signed_tsc2us(long long tsc)
+{
+	unsigned long long abs_tsc;
+	long us;
+
+	if (!__ipipe_hrclock_ok())
+		return 0;
+
+	/* ipipe_tsc2us works on unsigned => handle sign separately */
+	abs_tsc = (tsc >= 0) ? tsc : -tsc;
+	us = ipipe_tsc2us(abs_tsc);
+	if (tsc < 0)
+		return -us;
+	else
+		return us;
+}
+
+static void
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point)
+{
+	switch (point->type & IPIPE_TYPE_MASK) {
+		case IPIPE_TRACE_FUNC:
+			strcpy(buf, "func    ");
+			break;
+
+		case IPIPE_TRACE_BEGIN:
+			strcpy(buf, "begin   ");
+			break;
+
+		case IPIPE_TRACE_END:
+			strcpy(buf, "end     ");
+			break;
+
+		case IPIPE_TRACE_FREEZE:
+			strcpy(buf, "freeze  ");
+			break;
+
+		case IPIPE_TRACE_SPECIAL:
+			sprintf(buf, "(0x%02x)  ",
+				point->type >> IPIPE_TYPE_BITS);
+			break;
+
+		case IPIPE_TRACE_PID:
+			sprintf(buf, "[%5d] ", (pid_t)point->v);
+			break;
+
+		case IPIPE_TRACE_EVENT:
+			sprintf(buf, "event   ");
+			break;
+	}
+}
+
+static void
+__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point)
+{
+	char mark = ' ';
+	int point_no = point - print_path->point;
+	int i;
+
+	if (print_path->end == point_no)
+		mark = '<';
+	else if (print_path->begin == point_no)
+		mark = '>';
+	else if (__ipipe_in_critical_trpath(point_no))
+		mark = ':';
+	seq_printf(m, "%c%c", mark,
+		   (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
+
+	if (!verbose_trace)
+		return;
+
+	for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
+		seq_printf(m, "%c",
+			(IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
+			    (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+				'#' : '+') :
+			(IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' '));
+}
+
+static void
+__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point)
+{
+	unsigned long delay = 0;
+	int next;
+	char *mark = "	";
+
+	next = WRAP_POINT_NO(point+1 - print_path->point);
+
+	if (next != print_path->trace_pos)
+		delay = ipipe_tsc2ns(print_path->point[next].timestamp -
+				     point->timestamp);
+
+	if (__ipipe_in_critical_trpath(point - print_path->point)) {
+		if (delay > IPIPE_DELAY_WARN)
+			mark = "! ";
+		else if (delay > IPIPE_DELAY_NOTE)
+			mark = "+ ";
+	}
+	seq_puts(m, mark);
+
+	if (verbose_trace)
+		seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000,
+			   (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' ');
+	else
+		seq_puts(m, " ");
+}
+
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip)
+{
+	char namebuf[KSYM_NAME_LEN+1];
+	unsigned long size, offset;
+	const char *sym_name;
+	char *modname;
+
+	sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf);
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+	if (!m) {
+		/* panic dump */
+		if (sym_name) {
+			printk("%s+0x%lx", sym_name, offset);
+			if (modname)
+				printk(" [%s]", modname);
+		} else
+			printk("<%08lx>", eip);
+	} else
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+	{
+		if (sym_name) {
+			if (verbose_trace) {
+				seq_printf(m, "%s+0x%lx", sym_name, offset);
+				if (modname)
+					seq_printf(m, " [%s]", modname);
+			} else
+				seq_puts(m, sym_name);
+		} else
+			seq_printf(m, "<%08lx>", eip);
+	}
+}
+
+static void __ipipe_print_headline(struct seq_file *m)
+{
+	const char *name[2];
+
+	seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu "
+		   "us\n\n", trace_overhead/1000, trace_overhead%1000);
+
+	if (verbose_trace) {
+		name[0] = ipipe_root_domain->name;
+		if (ipipe_head_domain != ipipe_root_domain)
+			name[1] = ipipe_head_domain->name;
+		else
+			name[1] = "<unused>";
+
+		seq_printf(m,
+			   " +----- Hard IRQs ('|': locked)\n"
+			   " |+-- %s\n"
+			   " ||+- %s%s\n"
+			   " |||			  +---------- "
+			       "Delay flag ('+': > %d us, '!': > %d us)\n"
+			   " |||			  |	   +- "
+			       "NMI noise ('N')\n"
+			   " |||			  |	   |\n"
+			   "	  Type	  User Val.   Time    Delay  Function "
+			       "(Parent)\n",
+			   name[1], name[0],
+			   " ('*': domain stalled, '+': current, "
+			   "'#': current+stalled)",
+			   IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
+	} else
+		seq_printf(m,
+			   " +--------------- Hard IRQs ('|': locked)\n"
+			   " |		   +- Delay flag "
+			       "('+': > %d us, '!': > %d us)\n"
+			   " |		   |\n"
+			   "  Type     Time   Function (Parent)\n",
+			   IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
+}
+
+static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+
+	mutex_lock(&out_mutex);
+
+	if (!n) {
+		struct ipipe_trace_path *tp;
+		unsigned long length_usecs;
+		int points, cpu;
+		unsigned long flags;
+
+		/* protect against max_path/frozen_path updates while we
+		 * haven't locked our target path, also avoid recursively
+		 * taking global_path_lock from NMI context */
+		flags = __ipipe_global_path_lock();
+
+		/* find the longest of all per-cpu paths */
+		print_path = NULL;
+		for_each_online_cpu(cpu) {
+			tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
+			if ((print_path == NULL) ||
+			    (tp->length > print_path->length)) {
+				print_path = tp;
+				break;
+			}
+		}
+		print_path->dump_lock = 1;
+
+		__ipipe_global_path_unlock(flags);
+
+		if (!__ipipe_hrclock_ok()) {
+			seq_printf(m, "No hrclock available, dumping traces disabled\n");
+			return NULL;
+		}
+
+		/* does this path actually contain data? */
+		if (print_path->end == print_path->begin)
+			return NULL;
+
+		/* number of points inside the critical path */
+		points = WRAP_POINT_NO(print_path->end-print_path->begin+1);
+
+		/* pre- and post-tracing length, post-trace length was frozen
+		   in __ipipe_trace, pre-trace may have to be reduced due to
+		   buffer overrun */
+		print_pre_trace	 = pre_trace;
+		print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
+						 print_path->end - 1);
+		if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
+			print_pre_trace = IPIPE_TRACE_POINTS - 1 - points -
+				print_post_trace;
+
+		length_usecs = ipipe_tsc2us(print_path->length);
+		seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe release #%d\n"
+			   "-------------------------------------------------------------\n",
+			UTS_RELEASE, IPIPE_CORE_RELEASE);
+		seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: "
+			"%d (-%d/+%d), Length: %lu us\n",
+			cpu, print_path->point[print_path->begin].timestamp,
+			points, print_pre_trace, print_post_trace, length_usecs);
+		__ipipe_print_headline(m);
+	}
+
+	/* check if we are inside the trace range */
+	if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
+			       print_pre_trace + print_post_trace))
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin -
+						print_pre_trace + n)];
+}
+
+static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	loff_t n = ++*pos;
+
+	/* check if we are inside the trace range with the next entry */
+	if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
+			       print_pre_trace + print_post_trace))
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin -
+						print_pre_trace + *pos)];
+}
+
+static void __ipipe_prtrace_stop(struct seq_file *m, void *p)
+{
+	if (print_path)
+		print_path->dump_lock = 0;
+	mutex_unlock(&out_mutex);
+}
+
+static int __ipipe_prtrace_show(struct seq_file *m, void *p)
+{
+	long time;
+	struct ipipe_trace_point *point = p;
+	char buf[16];
+
+	if (!point->eip) {
+		seq_puts(m, "-<invalid>-\n");
+		return 0;
+	}
+
+	__ipipe_print_pathmark(m, point);
+	__ipipe_trace_point_type(buf, point);
+	seq_puts(m, buf);
+	if (verbose_trace)
+		switch (point->type & IPIPE_TYPE_MASK) {
+			case IPIPE_TRACE_FUNC:
+				seq_puts(m, "           ");
+				break;
+
+			case IPIPE_TRACE_PID:
+				__ipipe_get_task_info(buf, point, 0);
+				seq_puts(m, buf);
+				break;
+
+			case IPIPE_TRACE_EVENT:
+				__ipipe_get_event_date(buf, print_path, point);
+				seq_puts(m, buf);
+				break;
+
+			default:
+				seq_printf(m, "0x%08lx ", point->v);
+		}
+
+	time = __ipipe_signed_tsc2us(point->timestamp -
+		print_path->point[print_path->begin].timestamp);
+	seq_printf(m, "%5ld", time);
+
+	__ipipe_print_delay(m, point);
+	__ipipe_print_symname(m, point->eip);
+	seq_puts(m, " (");
+	__ipipe_print_symname(m, point->parent_eip);
+	seq_puts(m, ")\n");
+
+	return 0;
+}
+
+static struct seq_operations __ipipe_max_ptrace_ops = {
+	.start = __ipipe_max_prtrace_start,
+	.next  = __ipipe_prtrace_next,
+	.stop  = __ipipe_prtrace_stop,
+	.show  = __ipipe_prtrace_show
+};
+
+static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &__ipipe_max_ptrace_ops);
+}
+
+static ssize_t
+__ipipe_max_reset(struct file *file, const char __user *pbuffer,
+		  size_t count, loff_t *data)
+{
+	mutex_lock(&out_mutex);
+	ipipe_trace_max_reset();
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+static const struct file_operations __ipipe_max_prtrace_fops = {
+	.open	    = __ipipe_max_prtrace_open,
+	.read	    = seq_read,
+	.write	    = __ipipe_max_reset,
+	.llseek	    = seq_lseek,
+	.release    = seq_release,
+};
+
+static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+
+	mutex_lock(&out_mutex);
+
+	if (!n) {
+		struct ipipe_trace_path *tp;
+		int cpu;
+		unsigned long flags;
+
+		/* protect against max_path/frozen_path updates while we
+		 * haven't locked our target path, also avoid recursively
+		 * taking global_path_lock from NMI context */
+		flags = __ipipe_global_path_lock();
+
+		/* find the first of all per-cpu frozen paths */
+		print_path = NULL;
+		for_each_online_cpu(cpu) {
+			tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
+			if (tp->end >= 0) {
+				print_path = tp;
+				break;
+			}
+		}
+		if (print_path)
+			print_path->dump_lock = 1;
+
+		__ipipe_global_path_unlock(flags);
+
+		if (!print_path)
+			return NULL;
+
+		if (!__ipipe_hrclock_ok()) {
+			seq_printf(m, "No hrclock available, dumping traces disabled\n");
+			return NULL;
+		}
+
+		/* back- and post-tracing length, post-trace length was frozen
+		   in __ipipe_trace, back-trace may have to be reduced due to
+		   buffer overrun */
+		print_pre_trace	 = back_trace-1; /* substract freeze point */
+		print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
+						 print_path->end - 1);
+		if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
+			print_pre_trace = IPIPE_TRACE_POINTS - 2 -
+				print_post_trace;
+
+		seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe release #%d\n"
+			      "------------------------------------------------------------\n",
+			   UTS_RELEASE, IPIPE_CORE_RELEASE);
+		seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n",
+			cpu, print_path->point[print_path->begin].timestamp,
+			print_pre_trace+1, print_post_trace);
+		__ipipe_print_headline(m);
+	}
+
+	/* check if we are inside the trace range */
+	if (n >= print_pre_trace + 1 + print_post_trace)
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin-
+						print_pre_trace+n)];
+}
+
+static struct seq_operations __ipipe_frozen_ptrace_ops = {
+	.start = __ipipe_frozen_prtrace_start,
+	.next  = __ipipe_prtrace_next,
+	.stop  = __ipipe_prtrace_stop,
+	.show  = __ipipe_prtrace_show
+};
+
+static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &__ipipe_frozen_ptrace_ops);
+}
+
+static ssize_t
+__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer,
+		    size_t count, loff_t *data)
+{
+	char *end, buf[16];
+	int val;
+	int n;
+
+	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+	if (copy_from_user(buf, pbuffer, n))
+		return -EFAULT;
+
+	buf[n] = '\0';
+	val = simple_strtol(buf, &end, 0);
+
+	if (((*end != '\0') && !isspace(*end)) || (val < 0))
+		return -EINVAL;
+
+	mutex_lock(&out_mutex);
+	ipipe_trace_frozen_reset();
+	if (val > 0)
+		ipipe_trace_freeze(-1);
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+static const struct file_operations __ipipe_frozen_prtrace_fops = {
+	.open	    = __ipipe_frozen_prtrace_open,
+	.read	    = seq_read,
+	.write	    = __ipipe_frozen_ctrl,
+	.llseek	    = seq_lseek,
+	.release    = seq_release,
+};
+
+static int __ipipe_rd_proc_val(struct seq_file *p, void *data)
+{
+	seq_printf(p, "%u\n", *(int *)p->private);
+	return 0;
+}
+
+static ssize_t
+__ipipe_wr_proc_val(struct file *file, const char __user *buffer,
+		    size_t count, loff_t *data)
+{
+	struct seq_file *p = file->private_data;
+	char *end, buf[16];
+	int val;
+	int n;
+
+	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+	if (copy_from_user(buf, buffer, n))
+		return -EFAULT;
+
+	buf[n] = '\0';
+	val = simple_strtol(buf, &end, 0);
+
+	if (((*end != '\0') && !isspace(*end)) || (val < 0))
+		return -EINVAL;
+
+	mutex_lock(&out_mutex);
+	*(int *)p->private = val;
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+static int __ipipe_rw_proc_val_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __ipipe_rd_proc_val, PDE_DATA(inode));
+}
+
+static const struct file_operations __ipipe_rw_proc_val_ops = {
+	.open		= __ipipe_rw_proc_val_open,
+	.read		= seq_read,
+	.write		= __ipipe_wr_proc_val,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void __init
+__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir,
+			      const char *name, int *value_ptr)
+{
+	proc_create_data(name, 0644, trace_dir, &__ipipe_rw_proc_val_ops,
+			 value_ptr);
+}
+
+static int __ipipe_rd_trigger(struct seq_file *p, void *data)
+{
+	char str[KSYM_SYMBOL_LEN];
+
+	if (trigger_begin) {
+		sprint_symbol(str, trigger_begin);
+		seq_printf(p, "%s\n", str);
+	}
+	return 0;
+}
+
+static ssize_t
+__ipipe_wr_trigger(struct file *file, const char __user *buffer,
+		   size_t count, loff_t *data)
+{
+	char buf[KSYM_SYMBOL_LEN];
+	unsigned long begin, end;
+
+	if (count > sizeof(buf) - 1)
+		count = sizeof(buf) - 1;
+	if (copy_from_user(buf, buffer, count))
+		return -EFAULT;
+	buf[count] = 0;
+	if (buf[count-1] == '\n')
+		buf[count-1] = 0;
+
+	begin = kallsyms_lookup_name(buf);
+	if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL))
+		return -ENOENT;
+	end += begin - 1;
+
+	mutex_lock(&out_mutex);
+	/* invalidate the current range before setting a new one */
+	trigger_end = 0;
+	wmb();
+	ipipe_trace_frozen_reset();
+
+	/* set new range */
+	trigger_begin = begin;
+	wmb();
+	trigger_end = end;
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+static int __ipipe_rw_trigger_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __ipipe_rd_trigger, NULL);
+}
+
+static const struct file_operations __ipipe_rw_trigger_ops = {
+	.open		= __ipipe_rw_trigger_open,
+	.read		= seq_read,
+	.write		= __ipipe_wr_trigger,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
+static void notrace
+ipipe_trace_function(unsigned long ip, unsigned long parent_ip,
+		     struct ftrace_ops *op, struct pt_regs *regs)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0);
+}
+
+static struct ftrace_ops ipipe_trace_ops = {
+	.func = ipipe_trace_function,
+	.flags = FTRACE_OPS_FL_IPIPE_EXCLUSIVE,
+};
+
+static ssize_t __ipipe_wr_enable(struct file *file, const char __user *buffer,
+				 size_t count, loff_t *data)
+{
+	char *end, buf[16];
+	int val;
+	int n;
+
+	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+	if (copy_from_user(buf, buffer, n))
+		return -EFAULT;
+
+	buf[n] = '\0';
+	val = simple_strtol(buf, &end, 0);
+
+	if (((*end != '\0') && !isspace(*end)) || (val < 0))
+		return -EINVAL;
+
+	mutex_lock(&out_mutex);
+
+	if (ipipe_trace_enable) {
+		if (!val)
+			unregister_ftrace_function(&ipipe_trace_ops);
+	} else if (val)
+		register_ftrace_function(&ipipe_trace_ops);
+
+	ipipe_trace_enable = val;
+
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+static const struct file_operations __ipipe_rw_enable_ops = {
+	.open		= __ipipe_rw_proc_val_open,
+	.read		= seq_read,
+	.write		= __ipipe_wr_enable,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
+
+extern struct proc_dir_entry *ipipe_proc_root;
+
+void __init __ipipe_tracer_hrclock_initialized(void)
+{
+	unsigned long long start, end, min = ULLONG_MAX;
+	int i;
+
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+	if (!per_cpu(trace_path, 0))
+		return;
+#endif
+	/* Calculate minimum overhead of __ipipe_trace() */
+	hard_local_irq_disable();
+	for (i = 0; i < 100; i++) {
+		ipipe_read_tsc(start);
+		__ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0,
+			      __BUILTIN_RETURN_ADDRESS1, 0);
+		ipipe_read_tsc(end);
+
+		end -= start;
+		if (end < min)
+			min = end;
+	}
+	hard_local_irq_enable();
+	trace_overhead = ipipe_tsc2ns(min);
+}
+
+void __init __ipipe_init_tracer(void)
+{
+	struct proc_dir_entry *trace_dir;
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+	int cpu, path;
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+	for_each_possible_cpu(cpu) {
+		struct ipipe_trace_path *tp_buf;
+
+		tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) *
+				      IPIPE_TRACE_PATHS, cpu_to_node(cpu));
+		if (!tp_buf) {
+			pr_err("I-pipe: "
+			       "insufficient memory for trace buffer.\n");
+			return;
+		}
+		memset(tp_buf, 0,
+		       sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS);
+		for (path = 0; path < IPIPE_TRACE_PATHS; path++) {
+			tp_buf[path].begin = -1;
+			tp_buf[path].end   = -1;
+		}
+		per_cpu(trace_path, cpu) = tp_buf;
+	}
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+
+	if (__ipipe_hrclock_ok() && !trace_overhead)
+		__ipipe_tracer_hrclock_initialized();
+
+#ifdef CONFIG_IPIPE_TRACE_ENABLE
+	ipipe_trace_enable = 1;
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
+	ftrace_enabled = 1;
+	register_ftrace_function(&ipipe_trace_ops);
+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
+#endif /* CONFIG_IPIPE_TRACE_ENABLE */
+
+	trace_dir = proc_mkdir("trace", ipipe_proc_root);
+
+	proc_create("max", 0644, trace_dir, &__ipipe_max_prtrace_fops);
+	proc_create("frozen", 0644, trace_dir, &__ipipe_frozen_prtrace_fops);
+
+	proc_create("trigger", 0644, trace_dir, &__ipipe_rw_trigger_ops);
+
+	__ipipe_create_trace_proc_val(trace_dir, "pre_trace_points",
+				      &pre_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "post_trace_points",
+				      &post_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "back_trace_points",
+				      &back_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "verbose",
+				      &verbose_trace);
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
+	proc_create_data("enable", 0644, trace_dir, &__ipipe_rw_enable_ops,
+			 &ipipe_trace_enable);
+#else /* !CONFIG_IPIPE_TRACE_MCOUNT */
+	__ipipe_create_trace_proc_val(trace_dir, "enable",
+				      &ipipe_trace_enable);
+#endif /* !CONFIG_IPIPE_TRACE_MCOUNT */
+}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3c74e13a95dc..478ab2810317 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/irqdomain.h>
+#include <linux/ipipe.h>
 
 #include <trace/events/irq.h>
 
@@ -158,14 +159,6 @@ int irq_set_chip_data(unsigned int irq, void *data)
 }
 EXPORT_SYMBOL(irq_set_chip_data);
 
-struct irq_data *irq_get_irq_data(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	return desc ? &desc->irq_data : NULL;
-}
-EXPORT_SYMBOL_GPL(irq_get_irq_data);
-
 static void irq_state_clr_disabled(struct irq_desc *desc)
 {
 	irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED);
@@ -195,8 +188,13 @@ int irq_startup(struct irq_desc *desc, bool resend)
 
 	irq_domain_activate_irq(&desc->irq_data);
 	if (desc->irq_data.chip->irq_startup) {
+		unsigned long flags = hard_cond_local_irq_save();
 		ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
 		irq_state_clr_masked(desc);
+		hard_cond_local_irq_restore(flags);
+#ifdef CONFIG_IPIPE
+		desc->istate &= ~IPIPE_IRQS_NEEDS_STARTUP;
+#endif
 	} else {
 		irq_enable(desc);
 	}
@@ -209,9 +207,12 @@ void irq_shutdown(struct irq_desc *desc)
 {
 	irq_state_set_disabled(desc);
 	desc->depth = 1;
-	if (desc->irq_data.chip->irq_shutdown)
+	if (desc->irq_data.chip->irq_shutdown) {
 		desc->irq_data.chip->irq_shutdown(&desc->irq_data);
-	else if (desc->irq_data.chip->irq_disable)
+#ifdef CONFIG_IPIPE
+		desc->istate |= IPIPE_IRQS_NEEDS_STARTUP;
+#endif
+	} else if (desc->irq_data.chip->irq_disable)
 		desc->irq_data.chip->irq_disable(&desc->irq_data);
 	else
 		desc->irq_data.chip->irq_mask(&desc->irq_data);
@@ -221,12 +222,14 @@ void irq_shutdown(struct irq_desc *desc)
 
 void irq_enable(struct irq_desc *desc)
 {
+	unsigned long flags = hard_cond_local_irq_save();
 	irq_state_clr_disabled(desc);
 	if (desc->irq_data.chip->irq_enable)
 		desc->irq_data.chip->irq_enable(&desc->irq_data);
 	else
 		desc->irq_data.chip->irq_unmask(&desc->irq_data);
 	irq_state_clr_masked(desc);
+	hard_cond_local_irq_restore(flags);
 }
 
 /**
@@ -262,11 +265,13 @@ void irq_disable(struct irq_desc *desc)
 
 void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu)
 {
+	unsigned long flags = hard_cond_local_irq_save();
 	if (desc->irq_data.chip->irq_enable)
 		desc->irq_data.chip->irq_enable(&desc->irq_data);
 	else
 		desc->irq_data.chip->irq_unmask(&desc->irq_data);
 	cpumask_set_cpu(cpu, desc->percpu_enabled);
+	hard_cond_local_irq_restore(flags);
 }
 
 void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu)
@@ -300,9 +305,13 @@ void mask_irq(struct irq_desc *desc)
 
 void unmask_irq(struct irq_desc *desc)
 {
+	unsigned long flags;
+
 	if (desc->irq_data.chip->irq_unmask) {
+		flags = hard_cond_local_irq_save();
 		desc->irq_data.chip->irq_unmask(&desc->irq_data);
 		irq_state_clr_masked(desc);
+		hard_cond_local_irq_restore(flags);
 	}
 }
 
@@ -456,7 +465,9 @@ static void cond_unmask_irq(struct irq_desc *desc)
 void handle_level_irq(struct irq_desc *desc)
 {
 	raw_spin_lock(&desc->lock);
+#ifndef CONFIG_IPIPE
 	mask_ack_irq(desc);
+#endif
 
 	if (!irq_may_run(desc))
 		goto out_unlock;
@@ -492,7 +503,17 @@ static inline void preflow_handler(struct irq_desc *desc)
 static inline void preflow_handler(struct irq_desc *desc) { }
 #endif
 
-static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
+#ifdef CONFIG_IPIPE
+static void cond_release_fasteoi_irq(struct irq_desc *desc,
+				     struct irq_chip *chip)
+{
+	if (chip->irq_release && 
+	    !irqd_irq_disabled(&desc->irq_data) && !desc->threads_oneshot)
+		chip->irq_release(&desc->irq_data);
+}
+#endif /* CONFIG_IPIPE */
+
+static inline void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
 {
 	if (!(desc->istate & IRQS_ONESHOT)) {
 		chip->irq_eoi(&desc->irq_data);
@@ -544,14 +565,23 @@ void handle_fasteoi_irq(struct irq_desc *desc)
 	}
 
 	kstat_incr_irqs_this_cpu(desc);
+#ifndef CONFIG_IPIPE
 	if (desc->istate & IRQS_ONESHOT)
 		mask_irq(desc);
+#endif
 
 	preflow_handler(desc);
 	handle_irq_event(desc);
 
+#ifdef CONFIG_IPIPE
+	/*
+	 * IRQCHIP_EOI_IF_HANDLED is ignored as the I-pipe always
+	 * sends EOI.
+	 */
+	cond_release_fasteoi_irq(desc, chip);
+#else  /* !CONFIG_IPIPE */
 	cond_unmask_eoi_irq(desc, chip);
-
+#endif	/* !CONFIG_IPIPE */
 	raw_spin_unlock(&desc->lock);
 	return;
 out:
@@ -601,7 +631,9 @@ void handle_edge_irq(struct irq_desc *desc)
 	kstat_incr_irqs_this_cpu(desc);
 
 	/* Start handling the irq */
+#ifndef CONFIG_IPIPE
 	desc->irq_data.chip->irq_ack(&desc->irq_data);
+#endif
 
 	do {
 		if (unlikely(!desc->action)) {
@@ -689,6 +721,18 @@ void handle_percpu_irq(struct irq_desc *desc)
 
 	kstat_incr_irqs_this_cpu(desc);
 
+#ifdef CONFIG_IPIPE
+	(void)chip;
+
+	handle_irq_event_percpu(desc);
+
+	if ((desc->percpu_enabled == NULL ||
+	     cpumask_test_cpu(smp_processor_id(), desc->percpu_enabled)) &&
+	    !irqd_irq_masked(&desc->irq_data) &&
+	    !desc->threads_oneshot &&
+	    desc->ipipe_end)
+		desc->ipipe_end(desc);
+#else
 	if (chip->irq_ack)
 		chip->irq_ack(&desc->irq_data);
 
@@ -696,6 +740,7 @@ void handle_percpu_irq(struct irq_desc *desc)
 
 	if (chip->irq_eoi)
 		chip->irq_eoi(&desc->irq_data);
+#endif
 }
 
 /**
@@ -719,17 +764,180 @@ void handle_percpu_devid_irq(struct irq_desc *desc)
 
 	kstat_incr_irqs_this_cpu(desc);
 
+#ifndef CONFIG_IPIPE
 	if (chip->irq_ack)
 		chip->irq_ack(&desc->irq_data);
+#else
+	(void)chip;
+#endif
 
 	trace_irq_handler_entry(irq, action);
 	res = action->handler(irq, dev_id);
 	trace_irq_handler_exit(irq, action, res);
 
+#ifndef CONFIG_IPIPE
 	if (chip->irq_eoi)
 		chip->irq_eoi(&desc->irq_data);
+#else
+	if ((desc->percpu_enabled == NULL ||
+	     cpumask_test_cpu(smp_processor_id(), desc->percpu_enabled)) &&
+	    !irqd_irq_masked(&desc->irq_data) &&
+	    !desc->threads_oneshot &&
+	    desc->ipipe_end)
+		desc->ipipe_end(desc);
+#endif
+}
+
+#ifdef CONFIG_IPIPE
+
+void __ipipe_ack_level_irq(struct irq_desc *desc)
+{
+	mask_ack_irq(desc);
+}
+
+void __ipipe_end_level_irq(struct irq_desc *desc)
+{
+	desc->irq_data.chip->irq_unmask(&desc->irq_data);
+}
+
+void __ipipe_ack_fasteoi_irq(struct irq_desc *desc)
+{
+	desc->irq_data.chip->irq_hold(&desc->irq_data);
+}
+
+void __ipipe_end_fasteoi_irq(struct irq_desc *desc)
+{
+	if (desc->irq_data.chip->irq_release)
+		desc->irq_data.chip->irq_release(&desc->irq_data);
+}
+
+void __ipipe_ack_edge_irq(struct irq_desc *desc)
+{
+	desc->irq_data.chip->irq_ack(&desc->irq_data);
+}
+
+void __ipipe_ack_percpu_irq(struct irq_desc *desc)
+{
+	if (desc->irq_data.chip->irq_ack)
+		desc->irq_data.chip->irq_ack(&desc->irq_data);
+
+	if (desc->irq_data.chip->irq_eoi)
+		desc->irq_data.chip->irq_eoi(&desc->irq_data);
+}
+
+void __ipipe_nop_irq(struct irq_desc *desc)
+{
+}
+
+void __ipipe_chained_irq(struct irq_desc *desc)
+{
+	/*
+	 * XXX: Do NOT fold this into __ipipe_nop_irq(), see
+	 * ipipe_chained_irq_p().
+	 */
+}
+
+static void __ipipe_ack_bad_irq(struct irq_desc *desc)
+{
+	handle_bad_irq(desc);
+	WARN_ON_ONCE(1);
 }
 
+irq_flow_handler_t
+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained)
+{
+	if (unlikely(handle == NULL)) {
+		desc->ipipe_ack = __ipipe_ack_bad_irq;
+		desc->ipipe_end = __ipipe_nop_irq;
+	} else {
+		if (is_chained) {
+			desc->ipipe_ack = handle;
+			desc->ipipe_end = __ipipe_nop_irq;
+			handle = __ipipe_chained_irq;
+		} else if (handle == handle_simple_irq) {
+			desc->ipipe_ack = __ipipe_nop_irq;
+			desc->ipipe_end = __ipipe_nop_irq;
+		} else if (handle == handle_level_irq) {
+			desc->ipipe_ack = __ipipe_ack_level_irq;
+			desc->ipipe_end = __ipipe_end_level_irq;
+		} else if (handle == handle_edge_irq) {
+			desc->ipipe_ack = __ipipe_ack_edge_irq;
+			desc->ipipe_end = __ipipe_nop_irq;
+		} else if (handle == handle_fasteoi_irq) {
+			desc->ipipe_ack = __ipipe_ack_fasteoi_irq;
+			desc->ipipe_end = __ipipe_end_fasteoi_irq;
+		} else if (handle == handle_percpu_irq ||
+			   handle == handle_percpu_devid_irq) {
+			if (irq_desc_get_chip(desc) &&
+			    irq_desc_get_chip(desc)->irq_hold) {
+				desc->ipipe_ack = __ipipe_ack_fasteoi_irq;
+				desc->ipipe_end = __ipipe_end_fasteoi_irq;
+			} else {
+				desc->ipipe_ack = __ipipe_ack_percpu_irq;
+				desc->ipipe_end = __ipipe_nop_irq;
+			}
+		} else if (irq_desc_get_chip(desc) == &no_irq_chip) {
+			desc->ipipe_ack = __ipipe_nop_irq;
+			desc->ipipe_end = __ipipe_nop_irq;
+		} else {
+			desc->ipipe_ack = __ipipe_ack_bad_irq;
+			desc->ipipe_end = __ipipe_nop_irq;
+		}
+	}
+
+	/* Suppress intermediate trampoline routine. */
+	ipipe_root_domain->irqs[desc->irq_data.irq].ackfn = desc->ipipe_ack;
+
+	return handle;
+}
+
+void ipipe_enable_irq(unsigned int irq)
+{
+	struct irq_desc *desc;
+	struct irq_chip *chip;
+	unsigned long flags;
+
+	desc = irq_to_desc(irq);
+	if (desc == NULL)
+		return;
+
+	chip = irq_desc_get_chip(desc);
+
+	if (chip->irq_startup && (desc->istate & IPIPE_IRQS_NEEDS_STARTUP)) {
+
+		ipipe_root_only();
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+		if (desc->istate & IPIPE_IRQS_NEEDS_STARTUP) {
+			desc->istate &= ~IPIPE_IRQS_NEEDS_STARTUP;
+			chip->irq_startup(&desc->irq_data);
+		}
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+		return;
+	}
+
+	if (WARN_ON_ONCE(chip->irq_enable == NULL && chip->irq_unmask == NULL))
+		return;
+
+	if (chip->irq_enable)
+		chip->irq_enable(&desc->irq_data);
+	else
+		chip->irq_unmask(&desc->irq_data);
+}
+EXPORT_SYMBOL_GPL(ipipe_enable_irq);
+
+#else /* !CONFIG_IPIPE */
+
+irq_flow_handler_t
+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained)
+{
+	return handle;
+}
+
+#endif /* !CONFIG_IPIPE */
+EXPORT_SYMBOL_GPL(__fixup_irq_handler);
+
 void
 __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
 		     int is_chained, const char *name)
@@ -764,6 +972,8 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
 			return;
 	}
 
+	handle = __fixup_irq_handler(desc, handle, is_chained);
+
 	/* Uninstall? */
 	if (handle == handle_bad_irq) {
 		if (desc->irq_data.chip != &no_irq_chip)
@@ -981,6 +1191,20 @@ void irq_chip_eoi_parent(struct irq_data *data)
 	data->chip->irq_eoi(data);
 }
 
+#ifdef CONFIG_IPIPE
+void irq_chip_hold_parent(struct irq_data *data)
+{
+	data = data->parent_data;
+	data->chip->irq_hold(data);
+}
+
+void irq_chip_release_parent(struct irq_data *data)
+{
+	data = data->parent_data;
+	data->chip->irq_release(data);
+}
+#endif
+
 /**
  * irq_chip_set_affinity_parent - Set affinity on the parent interrupt
  * @data:	Pointer to interrupt specific data
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index a4775f3451b9..2536e2187cd5 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -36,12 +36,13 @@ void irq_gc_mask_disable_reg(struct irq_data *d)
 {
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+	unsigned long flags;
 	u32 mask = d->mask;
 
-	irq_gc_lock(gc);
+	flags = irq_gc_lock(gc);
 	irq_reg_writel(gc, mask, ct->regs.disable);
 	*ct->mask_cache &= ~mask;
-	irq_gc_unlock(gc);
+	irq_gc_unlock(gc, flags);
 }
 
 /**
@@ -55,12 +56,13 @@ void irq_gc_mask_set_bit(struct irq_data *d)
 {
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+	unsigned long flags;
 	u32 mask = d->mask;
 
-	irq_gc_lock(gc);
+	flags = irq_gc_lock(gc);
 	*ct->mask_cache |= mask;
 	irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask);
-	irq_gc_unlock(gc);
+	irq_gc_unlock(gc, flags);
 }
 EXPORT_SYMBOL_GPL(irq_gc_mask_set_bit);
 
@@ -75,12 +77,13 @@ void irq_gc_mask_clr_bit(struct irq_data *d)
 {
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+	unsigned long flags;
 	u32 mask = d->mask;
 
-	irq_gc_lock(gc);
+	flags = irq_gc_lock(gc);
 	*ct->mask_cache &= ~mask;
 	irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask);
-	irq_gc_unlock(gc);
+	irq_gc_unlock(gc, flags);
 }
 EXPORT_SYMBOL_GPL(irq_gc_mask_clr_bit);
 
@@ -95,12 +98,13 @@ void irq_gc_unmask_enable_reg(struct irq_data *d)
 {
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+	unsigned long flags;
 	u32 mask = d->mask;
 
-	irq_gc_lock(gc);
+	flags = irq_gc_lock(gc);
 	irq_reg_writel(gc, mask, ct->regs.enable);
 	*ct->mask_cache |= mask;
-	irq_gc_unlock(gc);
+	irq_gc_unlock(gc, flags);
 }
 
 /**
@@ -111,11 +115,12 @@ void irq_gc_ack_set_bit(struct irq_data *d)
 {
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+	unsigned long flags;
 	u32 mask = d->mask;
 
-	irq_gc_lock(gc);
+	flags = irq_gc_lock(gc);
 	irq_reg_writel(gc, mask, ct->regs.ack);
-	irq_gc_unlock(gc);
+	irq_gc_unlock(gc, flags);
 }
 EXPORT_SYMBOL_GPL(irq_gc_ack_set_bit);
 
@@ -127,11 +132,12 @@ void irq_gc_ack_clr_bit(struct irq_data *d)
 {
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+	unsigned long flags;
 	u32 mask = ~d->mask;
 
-	irq_gc_lock(gc);
+	flags = irq_gc_lock(gc);
 	irq_reg_writel(gc, mask, ct->regs.ack);
-	irq_gc_unlock(gc);
+	irq_gc_unlock(gc, flags);
 }
 
 /**
@@ -142,12 +148,13 @@ void irq_gc_mask_disable_reg_and_ack(struct irq_data *d)
 {
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+	unsigned long flags;
 	u32 mask = d->mask;
 
-	irq_gc_lock(gc);
+	flags = irq_gc_lock(gc);
 	irq_reg_writel(gc, mask, ct->regs.mask);
 	irq_reg_writel(gc, mask, ct->regs.ack);
-	irq_gc_unlock(gc);
+	irq_gc_unlock(gc, flags);
 }
 
 /**
@@ -158,11 +165,12 @@ void irq_gc_eoi(struct irq_data *d)
 {
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+	unsigned long flags;
 	u32 mask = d->mask;
 
-	irq_gc_lock(gc);
+	flags = irq_gc_lock(gc);
 	irq_reg_writel(gc, mask, ct->regs.eoi);
-	irq_gc_unlock(gc);
+	irq_gc_unlock(gc, flags);
 }
 
 /**
@@ -177,17 +185,18 @@ void irq_gc_eoi(struct irq_data *d)
 int irq_gc_set_wake(struct irq_data *d, unsigned int on)
 {
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
 	u32 mask = d->mask;
 
 	if (!(mask & gc->wake_enabled))
 		return -EINVAL;
 
-	irq_gc_lock(gc);
+	flags = irq_gc_lock(gc);
 	if (on)
 		gc->wake_active |= mask;
 	else
 		gc->wake_active &= ~mask;
-	irq_gc_unlock(gc);
+	irq_gc_unlock(gc, flags);
 	return 0;
 }
 
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index fcab63c66905..9dea951a57b1 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -56,6 +56,9 @@ enum {
 	IRQS_WAITING		= 0x00000080,
 	IRQS_PENDING		= 0x00000200,
 	IRQS_SUSPENDED		= 0x00000800,
+#ifdef CONFIG_IPIPE
+	IPIPE_IRQS_NEEDS_STARTUP= 0x80000000,
+#endif
 };
 
 #include "debug.h"
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 239e2ae2c947..6a102d6d5896 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -91,6 +91,9 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
 	desc_smp_init(desc, node);
+#ifdef CONFIG_IPIPE
+	desc->istate |= IPIPE_IRQS_NEEDS_STARTUP;
+#endif
 }
 
 int nr_irqs = NR_IRQS;
@@ -286,11 +289,13 @@ int __init early_irq_init(void)
 	return arch_early_irq_init();
 }
 
+#ifndef CONFIG_IPIPE
 struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
 }
 EXPORT_SYMBOL(irq_to_desc);
+#endif /* CONFIG_IPIPE */
 
 static void free_desc(unsigned int irq)
 {
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 83cea913983c..d02d96879a28 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -800,9 +800,14 @@ again:
 
 	desc->threads_oneshot &= ~action->thread_mask;
 
+#ifndef CONFIG_IPIPE
 	if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) &&
 	    irqd_irq_masked(&desc->irq_data))
 		unmask_threaded_irq(desc);
+#else /* CONFIG_IPIPE */
+	if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data))
+		desc->ipipe_end(desc);
+#endif /* CONFIG_IPIPE */
 
 out_unlock:
 	raw_spin_unlock_irq(&desc->lock);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 774ab79d3ec7..3ea0630d5086 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2572,6 +2572,9 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
 
 __visible void trace_hardirqs_on_caller(unsigned long ip)
 {
+	if (!ipipe_root_p)
+		return;
+
 	time_hardirqs_on(CALLER_ADDR0, ip);
 
 	if (unlikely(!debug_locks || current->lockdep_recursion))
@@ -2592,7 +2595,7 @@ __visible void trace_hardirqs_on_caller(unsigned long ip)
 	 * already enabled, yet we find the hardware thinks they are in fact
 	 * enabled.. someone messed up their IRQ state tracing.
 	 */
-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled()))
 		return;
 
 	/*
@@ -2625,7 +2628,12 @@ EXPORT_SYMBOL(trace_hardirqs_on);
  */
 __visible void trace_hardirqs_off_caller(unsigned long ip)
 {
-	struct task_struct *curr = current;
+	struct task_struct *curr;
+
+	if (!ipipe_root_p)
+		return;
+
+	curr = current;
 
 	time_hardirqs_off(CALLER_ADDR0, ip);
 
@@ -2636,7 +2644,7 @@ __visible void trace_hardirqs_off_caller(unsigned long ip)
 	 * So we're supposed to get called after you mask local IRQs, but for
 	 * some reason the hardware doesn't quite think you did a proper job.
 	 */
-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled()))
 		return;
 
 	if (curr->hardirqs_enabled) {
@@ -2672,7 +2680,7 @@ void trace_softirqs_on(unsigned long ip)
 	 * We fancy IRQs being disabled here, see softirq.c, avoids
 	 * funny state and nesting things.
 	 */
-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled()))
 		return;
 
 	if (curr->softirqs_enabled) {
@@ -2711,7 +2719,7 @@ void trace_softirqs_off(unsigned long ip)
 	/*
 	 * We fancy IRQs being disabled here, see softirq.c
 	 */
-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled()))
 		return;
 
 	if (curr->softirqs_enabled) {
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index db3ccb1dd614..da9b4d0e6c9b 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -26,7 +26,9 @@
  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
+#if !defined(CONFIG_GENERIC_LOCKBREAK) ||			\
+	defined(CONFIG_DEBUG_LOCK_ALLOC) ||			\
+	defined(CONFIG_IPIPE)
 /*
  * The __lock_function inlines are taken from
  * include/linux/spinlock_api_smp.h
diff --git a/kernel/module.c b/kernel/module.c
index bcc78f4c15e9..1b8210ea47c6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1103,7 +1103,7 @@ bool try_module_get(struct module *module)
 	bool ret = true;
 
 	if (module) {
-		preempt_disable();
+		unsigned long flags = hard_preempt_disable();
 		/* Note: here, we can fail to get a reference */
 		if (likely(module_is_live(module) &&
 			   atomic_inc_not_zero(&module->refcnt) != 0))
@@ -1111,7 +1111,7 @@ bool try_module_get(struct module *module)
 		else
 			ret = false;
 
-		preempt_enable();
+		hard_preempt_enable(flags);
 	}
 	return ret;
 }
@@ -1122,11 +1122,11 @@ void module_put(struct module *module)
 	int ret;
 
 	if (module) {
-		preempt_disable();
+		unsigned long flags = hard_preempt_disable();
 		ret = atomic_dec_if_positive(&module->refcnt);
 		WARN_ON(ret < 0);	/* Failed to put refcount */
 		trace_module_put(module, _RET_IP_);
-		preempt_enable();
+		hard_preempt_enable(flags);
 	}
 }
 EXPORT_SYMBOL(module_put);
diff --git a/kernel/panic.c b/kernel/panic.c
index 1d07cf9af849..68794fa2390a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/nmi.h>
 #include <linux/console.h>
+#include <linux/ipipe_trace.h>
 
 #define PANIC_TIMER_STEP 100
 #define PANIC_BLINK_SPD 18
@@ -389,6 +390,8 @@ void oops_enter(void)
 {
 	tracing_off();
 	/* can't trust the integrity of the kernel anymore: */
+	ipipe_trace_panic_freeze();
+	ipipe_disable_context_check();
 	debug_locks_off();
 	do_oops_enter_exit();
 }
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 3124cebaec31..fe8214323e4f 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -284,6 +284,7 @@ static int create_image(int platform_mode)
 		goto Enable_cpus;
 
 	local_irq_disable();
+	hard_cond_local_irq_disable();
 
 	error = syscore_suspend();
 	if (error) {
@@ -438,6 +439,7 @@ static int resume_target_kernel(bool platform_mode)
 		goto Enable_cpus;
 
 	local_irq_disable();
+	hard_cond_local_irq_disable();
 
 	error = syscore_suspend();
 	if (error)
@@ -556,6 +558,7 @@ int hibernation_platform_enter(void)
 		goto Enable_cpus;
 
 	local_irq_disable();
+	hard_cond_local_irq_disable();
 	syscore_suspend();
 	if (pm_wakeup_pending()) {
 		error = -EAGAIN;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index dd689ab22806..0c68af81e039 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -62,6 +62,9 @@ int console_printk[4] = {
 	CONSOLE_LOGLEVEL_DEFAULT,	/* default_console_loglevel */
 };
 
+/* Deferred messaged from sched code are marked by this special level */
+#define SCHED_MESSAGE_LOGLEVEL -2
+
 /*
  * Low level drivers may need that to know if they can schedule in
  * their unblank() callback or not. So let's export it.
@@ -1302,7 +1305,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
 int do_syslog(int type, char __user *buf, int len, int source)
 {
 	bool clear = false;
-	static int saved_console_loglevel = LOGLEVEL_DEFAULT;
+	static int saved_console_loglevel = -1;
 	int error;
 
 	error = check_syslog_permissions(type, source);
@@ -1355,15 +1358,15 @@ int do_syslog(int type, char __user *buf, int len, int source)
 		break;
 	/* Disable logging to console */
 	case SYSLOG_ACTION_CONSOLE_OFF:
-		if (saved_console_loglevel == LOGLEVEL_DEFAULT)
+		if (saved_console_loglevel == -1)
 			saved_console_loglevel = console_loglevel;
 		console_loglevel = minimum_console_loglevel;
 		break;
 	/* Enable logging to console */
 	case SYSLOG_ACTION_CONSOLE_ON:
-		if (saved_console_loglevel != LOGLEVEL_DEFAULT) {
+		if (saved_console_loglevel != -1) {
 			console_loglevel = saved_console_loglevel;
-			saved_console_loglevel = LOGLEVEL_DEFAULT;
+			saved_console_loglevel = -1;
 		}
 		break;
 	/* Set level of messages printed to console */
@@ -1375,7 +1378,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
 			len = minimum_console_loglevel;
 		console_loglevel = len;
 		/* Implicitly re-enable logging to console */
-		saved_console_loglevel = LOGLEVEL_DEFAULT;
+		saved_console_loglevel = -1;
 		error = 0;
 		break;
 	/* Number of chars in the log buffer */
@@ -1675,10 +1678,10 @@ asmlinkage int vprintk_emit(int facility, int level,
 	int printed_len = 0;
 	bool in_sched = false;
 	/* cpu currently holding logbuf_lock in this function */
-	static unsigned int logbuf_cpu = UINT_MAX;
+	static volatile unsigned int logbuf_cpu = UINT_MAX;
 
-	if (level == LOGLEVEL_SCHED) {
-		level = LOGLEVEL_DEFAULT;
+	if (level == SCHED_MESSAGE_LOGLEVEL) {
+		level = -1;
 		in_sched = true;
 	}
 
@@ -1743,9 +1746,8 @@ asmlinkage int vprintk_emit(int facility, int level,
 			const char *end_of_header = printk_skip_level(text);
 			switch (kern_level) {
 			case '0' ... '7':
-				if (level == LOGLEVEL_DEFAULT)
+				if (level == -1)
 					level = kern_level - '0';
-				/* fallthrough */
 			case 'd':	/* KERN_DEFAULT */
 				lflags |= LOG_PREFIX;
 			}
@@ -1759,7 +1761,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 		}
 	}
 
-	if (level == LOGLEVEL_DEFAULT)
+	if (level == -1)
 		level = default_message_loglevel;
 
 	if (dict)
@@ -1837,7 +1839,7 @@ EXPORT_SYMBOL(vprintk_emit);
 
 asmlinkage int vprintk(const char *fmt, va_list args)
 {
-	return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
+	return vprintk_emit(0, -1, NULL, 0, fmt, args);
 }
 EXPORT_SYMBOL(vprintk);
 
@@ -1880,6 +1882,66 @@ EXPORT_SYMBOL_GPL(vprintk_default);
  */
 DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default;
 
+#ifdef CONFIG_IPIPE
+
+extern int __ipipe_printk_bypass;
+
+static IPIPE_DEFINE_SPINLOCK(__ipipe_printk_lock);
+
+static int __ipipe_printk_fill;
+
+static char __ipipe_printk_buf[__LOG_BUF_LEN];
+
+int __ipipe_log_printk(const char *fmt, va_list args)
+{
+	int ret = 0, fbytes, oldcount;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&__ipipe_printk_lock, flags);
+
+	oldcount = __ipipe_printk_fill;
+	fbytes = __LOG_BUF_LEN - oldcount;
+	if (fbytes > 1)	{
+		ret = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill,
+				 fbytes, fmt, args) + 1;
+		__ipipe_printk_fill += ret;
+	}
+
+	raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+
+	if (oldcount == 0)
+		ipipe_raise_irq(__ipipe_printk_virq);
+
+	return ret;
+}
+
+void __ipipe_flush_printk (unsigned virq, void *cookie)
+{
+	char *p = __ipipe_printk_buf;
+	int len, lmax, out = 0;
+	unsigned long flags;
+
+	goto start;
+
+	do {
+		raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+ start:
+		lmax = __ipipe_printk_fill;
+		while (out < lmax) {
+			len = strlen(p) + 1;
+			printk("%s",p);
+			p += len;
+			out += len;
+		}
+		raw_spin_lock_irqsave(&__ipipe_printk_lock, flags);
+	}
+	while (__ipipe_printk_fill != lmax);
+
+	__ipipe_printk_fill = 0;
+
+	raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+}
+
 /**
  * printk - print a kernel message
  * @fmt: format string
@@ -1901,6 +1963,46 @@ DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default;
  *
  * See the vsnprintf() documentation for format string extensions over C99.
  */
+
+asmlinkage __visible int printk(const char *fmt, ...)
+{
+	printk_func_t vprintk_func;
+	int sprintk = 1, cs = -1;
+	unsigned long flags;
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+
+	flags = hard_local_irq_save();
+
+	if (__ipipe_printk_bypass || oops_in_progress)
+		cs = ipipe_disable_context_check();
+	else if (__ipipe_current_domain == ipipe_root_domain) {
+		if (ipipe_head_domain != ipipe_root_domain && 
+		    (raw_irqs_disabled_flags(flags) ||
+		     test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status)))
+			sprintk = 0;
+	} else
+		sprintk = 0;
+
+	hard_local_irq_restore(flags);
+
+	if (sprintk) {
+		vprintk_func = this_cpu_read(printk_func);
+		ret = vprintk_func(fmt, args);
+		if (cs != -1)
+			ipipe_restore_context_check(cs);
+	} else
+		ret = __ipipe_log_printk(fmt, args);
+
+	va_end(args);
+
+	return ret;
+}
+
+#else /* !CONFIG_IPIPE */
+
 asmlinkage __visible int printk(const char *fmt, ...)
 {
 	printk_func_t vprintk_func;
@@ -1922,6 +2024,8 @@ asmlinkage __visible int printk(const char *fmt, ...)
 
 	return r;
 }
+#endif /* CONFIG_IPIPE */
+
 EXPORT_SYMBOL(printk);
 
 #else /* CONFIG_PRINTK */
@@ -1969,21 +2073,80 @@ DEFINE_PER_CPU(printk_func_t, printk_func);
 #ifdef CONFIG_EARLY_PRINTK
 struct console *early_console;
 
+void early_vprintk(const char *fmt, va_list ap)
+{
+	if (early_console) {
+		char buf[512];
+		int n = vscnprintf(buf, sizeof(buf), fmt, ap);
+
+		early_console->write(early_console, buf, n);
+	}
+}
+
 asmlinkage __visible void early_printk(const char *fmt, ...)
 {
 	va_list ap;
-	char buf[512];
-	int n;
 
-	if (!early_console)
+	va_start(ap, fmt);
+	early_vprintk(fmt, ap);
+	va_end(ap);
+}
+#endif
+
+#ifdef CONFIG_RAW_PRINTK
+static struct console *raw_console;
+static IPIPE_DEFINE_RAW_SPINLOCK(raw_console_lock);
+
+void raw_vprintk(const char *fmt, va_list ap)
+{
+	unsigned long flags;
+	char buf[256];
+	int n;
+	
+	if (raw_console == NULL || console_suspended)
 		return;
 
-	va_start(ap, fmt);
 	n = vscnprintf(buf, sizeof(buf), fmt, ap);
+        touch_nmi_watchdog();
+	raw_spin_lock_irqsave(&raw_console_lock, flags);
+	if (raw_console)
+		raw_console->write_raw(raw_console, buf, n);
+	raw_spin_unlock_irqrestore(&raw_console_lock, flags);
+}
+
+asmlinkage __visible void raw_printk(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	raw_vprintk(fmt, ap);
 	va_end(ap);
+}
 
-	early_console->write(early_console, buf, n);
+static inline void register_raw_console(struct console *newcon)
+{
+	if ((newcon->flags & CON_RAW) != 0 && newcon->write_raw)
+		raw_console = newcon;
 }
+
+static inline void unregister_raw_console(struct console *oldcon)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&raw_console_lock, flags);
+	if (oldcon == raw_console)
+		raw_console = NULL;
+	raw_spin_unlock_irqrestore(&raw_console_lock, flags);
+}
+
+#else
+
+static inline void register_raw_console(struct console *newcon)
+{ }
+
+static inline void unregister_raw_console(struct console *oldcon)
+{ }
+
 #endif
 
 static int __add_preferred_console(char *name, int idx, char *options,
@@ -2603,6 +2766,9 @@ void register_console(struct console *newcon)
 		console_drivers->next = newcon;
 	}
 
+	/* The latest raw console to register is current. */
+	register_raw_console(newcon);
+
 	if (newcon->flags & CON_EXTENDED)
 		if (!nr_ext_console_drivers++)
 			pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n");
@@ -2659,6 +2825,8 @@ int unregister_console(struct console *console)
 		(console->flags & CON_BOOT) ? "boot" : "" ,
 		console->name, console->index);
 
+	unregister_raw_console(console);
+
 	res = _braille_unregister_console(console);
 	if (res)
 		return res;
@@ -2755,7 +2923,7 @@ int printk_deferred(const char *fmt, ...)
 
 	preempt_disable();
 	va_start(args, fmt);
-	r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
+	r = vprintk_emit(0, SCHED_MESSAGE_LOGLEVEL, NULL, 0, fmt, args);
 	va_end(args);
 
 	__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
@@ -3158,6 +3326,9 @@ void dump_stack_print_info(const char *log_lvl)
 	if (dump_stack_arch_desc_str[0] != '\0')
 		printk("%sHardware name: %s\n",
 		       log_lvl, dump_stack_arch_desc_str);
+#ifdef CONFIG_IPIPE
+	printk("I-pipe domain: %s\n", ipipe_current_domain->name);
+#endif
 
 	print_worker_info(log_lvl, current);
 }
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d0618951014b..c34664c13448 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1235,10 +1235,13 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
 	do_set_cpus_allowed(p, new_mask);
 
 	/* Can the task run on the task's current CPU? If so, we're done */
-	if (cpumask_test_cpu(task_cpu(p), new_mask))
+	if (cpumask_test_cpu(task_cpu(p), new_mask)) {
+		__ipipe_report_setaffinity(p, task_cpu(p));
 		goto out;
+	}
 
 	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+	__ipipe_report_setaffinity(p, dest_cpu);
 	if (task_running(rq, p) || p->state == TASK_WAKING) {
 		struct migration_arg arg = { p, dest_cpu };
 		/* Need help from migration thread: drop lock and wait. */
@@ -1834,7 +1837,9 @@ void scheduler_ipi(void)
 	 * however a fair share of IPIs are still resched only so this would
 	 * somewhat pessimize the simple resched case.
 	 */
+#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI
 	irq_enter();
+#endif
 	sched_ttwu_pending();
 
 	/*
@@ -1844,7 +1849,9 @@ void scheduler_ipi(void)
 		this_rq()->idle_balance = 1;
 		raise_softirq_irqoff(SCHED_SOFTIRQ);
 	}
+#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI
 	irq_exit();
+#endif
 }
 
 static void ttwu_queue_remote(struct task_struct *p, int cpu)
@@ -1937,7 +1944,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 */
 	smp_mb__before_spinlock();
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	if (!(p->state & state))
+	if (!(p->state & state) ||
+	    (p->state & (TASK_NOWAKEUP|TASK_HARDENING)))
 		goto out;
 
 	trace_sched_waking(p);
@@ -2677,6 +2685,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
 	 * PREEMPT_COUNT kernels).
 	 */
 
+	__ipipe_complete_domain_migration();
 	rq = finish_task_switch(prev);
 	balance_callback(rq);
 	preempt_enable();
@@ -2729,6 +2738,9 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	switch_to(prev, next, prev);
 	barrier();
 
+	if (unlikely(__ipipe_switch_tail()))
+		return NULL;
+
 	return finish_task_switch(prev);
 }
 
@@ -2951,6 +2963,7 @@ notrace unsigned long get_parent_ip(unsigned long addr)
 
 void preempt_count_add(int val)
 {
+	ipipe_preempt_root_only();
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Underflow?
@@ -2979,6 +2992,7 @@ NOKPROBE_SYMBOL(preempt_count_add);
 
 void preempt_count_sub(int val)
 {
+	ipipe_preempt_root_only();
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Underflow?
@@ -3033,6 +3047,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
  */
 static inline void schedule_debug(struct task_struct *prev)
 {
+	ipipe_root_only();
 #ifdef CONFIG_SCHED_STACK_END_CHECK
 	if (task_stack_end_corrupted(prev))
 		panic("corrupted stack end detected inside scheduler\n");
@@ -3127,7 +3142,7 @@ again:
  *
  * WARNING: must be called with preemption disabled!
  */
-static void __sched notrace __schedule(bool preempt)
+static int __sched notrace __schedule(bool preempt)
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
@@ -3205,13 +3220,18 @@ static void __sched notrace __schedule(bool preempt)
 
 		trace_sched_switch(preempt, prev, next);
 		rq = context_switch(rq, prev, next); /* unlocks the rq */
+  		if (rq == NULL)
+			return 1; /* task hijacked by head domain */
 		cpu = cpu_of(rq);
 	} else {
+		prev->state &= ~TASK_HARDENING;
 		lockdep_unpin_lock(&rq->lock);
 		raw_spin_unlock_irq(&rq->lock);
 	}
 
 	balance_callback(rq);
+
+	return 0;
 }
 
 static inline void sched_submit_work(struct task_struct *tsk)
@@ -3233,7 +3253,8 @@ asmlinkage __visible void __sched schedule(void)
 	sched_submit_work(tsk);
 	do {
 		preempt_disable();
-		__schedule(false);
+		if (__schedule(false))
+			return;
 		sched_preempt_enable_no_resched();
 	} while (need_resched());
 }
@@ -3274,7 +3295,8 @@ static void __sched notrace preempt_schedule_common(void)
 {
 	do {
 		preempt_disable_notrace();
-		__schedule(true);
+		if (__schedule(true))
+			return;
 		preempt_enable_no_resched_notrace();
 
 		/*
@@ -3296,7 +3318,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
 	 * If there is a non-zero preempt_count or interrupts are disabled,
 	 * we do not want to preempt the current task. Just return..
 	 */
-	if (likely(!preemptible()))
+	if (likely(!preemptible() || !ipipe_root_p))
 		return;
 
 	preempt_schedule_common();
@@ -3322,7 +3344,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
 {
 	enum ctx_state prev_ctx;
 
-	if (likely(!preemptible()))
+	if (likely(!preemptible() || !ipipe_root_p || hard_irqs_disabled()))
 		return;
 
 	do {
@@ -4019,6 +4041,7 @@ change:
 
 	prev_class = p->sched_class;
 	__setscheduler(rq, p, attr, pi);
+  	__ipipe_report_setsched(p);
 
 	if (running)
 		p->sched_class->set_curr_task(rq);
@@ -8675,3 +8698,42 @@ void dump_cpu_task(int cpu)
 	pr_info("Task dump for CPU %d:\n", cpu);
 	sched_show_task(cpu_curr(cpu));
 }
+
+#ifdef CONFIG_IPIPE
+
+int __ipipe_migrate_head(void)
+{
+	struct task_struct *p = current;
+
+	preempt_disable();
+
+	IPIPE_WARN_ONCE(__this_cpu_read(ipipe_percpu.task_hijacked) != NULL);
+
+	__this_cpu_write(ipipe_percpu.task_hijacked, p);
+	set_current_state(TASK_INTERRUPTIBLE | TASK_HARDENING);
+	sched_submit_work(p);
+	if (likely(__schedule(false)))
+		return 0;
+
+	BUG_ON(!signal_pending(p));
+
+	preempt_enable();
+	return -ERESTARTSYS;
+}
+EXPORT_SYMBOL_GPL(__ipipe_migrate_head);
+
+void __ipipe_reenter_root(void)
+{
+	struct rq *rq;
+	struct task_struct *p;
+
+	p = __this_cpu_read(ipipe_percpu.rqlock_owner);
+	BUG_ON(p == NULL);
+	ipipe_clear_thread_flag(TIP_HEAD);
+	rq = finish_task_switch(p);
+	balance_callback(rq);
+	preempt_enable_no_resched_notrace();
+}
+EXPORT_SYMBOL_GPL(__ipipe_reenter_root);
+
+#endif /* CONFIG_IPIPE */
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index f15d6b6a538a..32ffbe389cbc 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -67,6 +67,8 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 {
 	wait_queue_t *curr, *next;
 
+	ipipe_root_only();
+
 	list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
 		unsigned flags = curr->flags;
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 5b1313309356..273b26111aa9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -649,6 +649,10 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 void signal_wake_up_state(struct task_struct *t, unsigned int state)
 {
 	set_tsk_thread_flag(t, TIF_SIGPENDING);
+
+	/* TIF_SIGPENDING must be prior to reporting. */
+	__ipipe_report_sigwake(t);
+
 	/*
 	 * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
 	 * case. We don't check t->state here because there is a race with it
@@ -872,8 +876,11 @@ static inline int wants_signal(int sig, struct task_struct *p)
 		return 0;
 	if (sig == SIGKILL)
 		return 1;
-	if (task_is_stopped_or_traced(p))
+	if (task_is_stopped_or_traced(p)) {
+		if (!signal_pending(p))
+			__ipipe_report_sigwake(p);
 		return 0;
+	}
 	return task_curr(p) || !signal_pending(p);
 }
 
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index a9b76a40319e..1a0e615dd00b 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/smp.h>
 #include <linux/device.h>
+#include <linux/ipipe_tickdev.h>
 
 #include "tick-internal.h"
 
@@ -453,6 +454,8 @@ void clockevents_register_device(struct clock_event_device *dev)
 	/* Initialize state to DETACHED */
 	clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
 
+	ipipe_host_timer_register(dev);
+
 	if (!dev->cpumask) {
 		WARN_ON(num_possible_cpus() > 1);
 		dev->cpumask = cpumask_of(smp_processor_id());
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index b98810d2f3b4..a5928e4fb37b 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -32,6 +32,7 @@
 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
 #include <linux/tick.h>
 #include <linux/kthread.h>
+#include <linux/kallsyms.h>
 
 #include "tick-internal.h"
 #include "timekeeping_internal.h"
@@ -172,6 +173,9 @@ static void clocksource_watchdog(unsigned long data)
 	cycle_t csnow, wdnow, cslast, wdlast, delta;
 	int64_t wd_nsec, cs_nsec;
 	int next_cpu, reset_pending;
+#ifdef CONFIG_IPIPE
+	cycle_t wdref;
+#endif
 
 	spin_lock(&watchdog_lock);
 	if (!watchdog_running)
@@ -188,11 +192,24 @@ static void clocksource_watchdog(unsigned long data)
 			continue;
 		}
 
+#ifdef CONFIG_IPIPE
+retry:
+#endif
 		local_irq_disable();
+#ifdef CONFIG_IPIPE
+		wdref = watchdog->read(watchdog);
+#endif
 		csnow = cs->read(cs);
 		wdnow = watchdog->read(watchdog);
 		local_irq_enable();
 
+#ifdef CONFIG_IPIPE
+		wd_nsec = clocksource_cyc2ns((wdnow - wdref) & watchdog->mask,
+					     watchdog->mult, watchdog->shift);
+		if (wd_nsec > WATCHDOG_THRESHOLD)
+			goto retry;
+#endif
+
 		/* Clocksource initialized ? */
 		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
 		    atomic_read(&watchdog_reset_pending)) {
@@ -661,6 +678,95 @@ static int __init clocksource_done_booting(void)
 }
 fs_initcall(clocksource_done_booting);
 
+#ifdef CONFIG_IPIPE_WANT_CLOCKSOURCE
+unsigned long long __ipipe_cs_freq;
+EXPORT_SYMBOL_GPL(__ipipe_cs_freq);
+
+struct clocksource *__ipipe_cs;
+EXPORT_SYMBOL_GPL(__ipipe_cs);
+
+cycle_t (*__ipipe_cs_read)(struct clocksource *cs);
+cycle_t __ipipe_cs_last_tsc;
+cycle_t __ipipe_cs_mask;
+unsigned __ipipe_cs_lat = 0xffffffff;
+
+static void ipipe_check_clocksource(struct clocksource *cs)
+{
+	cycle_t (*cread)(struct clocksource *cs);
+	cycle_t lat, mask, saved;
+	unsigned long long freq;
+	unsigned long flags;
+	unsigned i;
+
+	if (cs->ipipe_read) {
+		mask = CLOCKSOURCE_MASK(64);
+		cread = cs->ipipe_read;
+	} else {
+		mask = cs->mask;
+		cread = cs->read;
+
+		if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) == 0)
+			return;
+
+		/*
+		 * We only support masks such that cs->mask + 1 is a power of 2,
+		 * 64 bits masks or masks lesser than 32 bits
+		 */
+		if (mask != CLOCKSOURCE_MASK(64)
+		    && ((mask & (mask + 1)) != 0 || mask > 0xffffffff))
+			return;
+	}
+
+	/*
+	 * We prefer a clocksource with a better resolution than 1us
+	 */
+	if (cs->shift <= 34) {
+		freq = 1000000000ULL << cs->shift;
+		do_div(freq, cs->mult);
+	} else {
+		freq = 1000000ULL << cs->shift;
+		do_div(freq, cs->mult);
+		freq *= 1000;
+	}
+	if (freq < 1000000)
+		return;
+
+	/* Measure the clocksource latency */
+	flags = hard_local_irq_save();
+	saved = __ipipe_cs_last_tsc;
+	lat = cread(cs);
+	for (i = 0; i < 10; i++)
+		cread(cs);
+	lat = cread(cs) - lat;
+	__ipipe_cs_last_tsc = saved;
+	hard_local_irq_restore(flags);
+	lat = (lat * cs->mult) >> cs->shift;
+	do_div(lat, i + 1);
+
+	if (!strcmp(cs->name, override_name))
+		goto skip_tests;
+
+	if (lat > __ipipe_cs_lat)
+		return;
+
+	if (__ipipe_cs && !strcmp(__ipipe_cs->name, override_name))
+		return;
+
+  skip_tests:
+	flags = hard_local_irq_save();
+	if (__ipipe_cs_last_tsc == 0) {
+		__ipipe_cs_lat = lat;
+		__ipipe_cs_freq = freq;
+		__ipipe_cs = cs;
+		__ipipe_cs_read = cread;
+		__ipipe_cs_mask = mask;
+	}
+	hard_local_irq_restore(flags);
+}
+#else /* !CONFIG_IPIPE_WANT_CLOCKSOURCE */
+#define ipipe_check_clocksource(cs)	do { }while (0)
+#endif /* !CONFIG_IPIPE_WANT_CLOCKSOURCE */
+
 /*
  * Enqueue the clocksource sorted by rating
  */
@@ -674,6 +780,8 @@ static void clocksource_enqueue(struct clocksource *cs)
 		if (tmp->rating >= cs->rating)
 			entry = &tmp->list;
 	list_add(&cs->list, entry);
+
+	ipipe_check_clocksource(cs);
 }
 
 /**
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 4fcd99e12aa0..6c4d3cae7dd1 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -89,7 +89,7 @@ static void tick_periodic(int cpu)
 		update_wall_time();
 	}
 
-	update_process_times(user_mode(get_irq_regs()));
+	update_root_process_times(get_irq_regs());
 	profile_tick(CPU_PROFILING);
 }
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5ad2e852e9f6..6d2866addc2b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -148,7 +148,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 			ts->idle_jiffies++;
 	}
 #endif
-	update_process_times(user_mode(regs));
+	update_root_process_times(regs);
 	profile_tick(CPU_PROFILING);
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index fed86b2dfc89..cedc2f25a7b5 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -488,7 +488,7 @@ static inline void update_vsyscall(struct timekeeper *tk)
 	xt = timespec64_to_timespec(tk_xtime(tk));
 	wm = timespec64_to_timespec(tk->wall_to_monotonic);
 	update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
-			    tk->tkr_mono.cycle_last);
+			    tk->tkr_mono.shift, tk->tkr_mono.cycle_last);
 }
 
 static inline void old_vsyscall_fixup(struct timekeeper *tk)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 3d7588a2e97c..a4691fd908db 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1433,6 +1433,24 @@ void update_process_times(int user_tick)
 	run_posix_cpu_timers(p);
 }
 
+#ifdef CONFIG_IPIPE
+
+void update_root_process_times(struct pt_regs *regs)
+{
+	int user_tick = user_mode(regs);
+
+	if (__ipipe_root_tick_p(regs)) {
+		update_process_times(user_tick);
+		return;
+	}
+
+	run_local_timers();
+	rcu_check_callbacks(user_tick);
+	run_posix_cpu_timers(current);
+}
+
+#endif
+
 /*
  * This function runs timers and the timer-tq in bottom half context.
  */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e45db6b0d878..293de2459026 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -447,6 +447,7 @@ config DYNAMIC_FTRACE
 	bool "enable/disable function tracing dynamically"
 	depends on FUNCTION_TRACER
 	depends on HAVE_DYNAMIC_FTRACE
+	depends on !IPIPE
 	default y
 	help
 	  This option will modify all the calls to function tracing
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ac758a53fcea..f16c5bf16df3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -32,6 +32,7 @@
 #include <linux/list.h>
 #include <linux/hash.h>
 #include <linux/rcupdate.h>
+#include <linux/ipipe.h>
 
 #include <trace/events/sched.h>
 
@@ -267,8 +268,17 @@ static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops)
 
 static void update_ftrace_function(void)
 {
+	struct ftrace_ops *ops;
 	ftrace_func_t func;
 
+	for (ops = ftrace_ops_list;
+	     ops != &ftrace_list_end; ops = ops->next)
+		if (ops->flags & FTRACE_OPS_FL_IPIPE_EXCLUSIVE) {
+			set_function_trace_op = ops;
+			func = ops->func;
+			goto set_pointers;
+		}
+
 	/*
 	 * Prepare the ftrace_ops that the arch callback will use.
 	 * If there's only one ftrace_ops registered, the ftrace_ops_list
@@ -296,6 +306,7 @@ static void update_ftrace_function(void)
 
 	update_function_graph_func();
 
+  set_pointers:
 	/* If there's no change, then do nothing more here */
 	if (ftrace_trace_function == func)
 		return;
@@ -2528,6 +2539,9 @@ void __weak arch_ftrace_update_code(int command)
 
 static void ftrace_run_update_code(int command)
 {
+#ifdef CONFIG_IPIPE
+	unsigned long flags;
+#endif /* CONFIG_IPIPE */
 	int ret;
 
 	ret = ftrace_arch_code_modify_prepare();
@@ -2541,7 +2555,13 @@ static void ftrace_run_update_code(int command)
 	 * is safe. The stop_machine() is the safest, but also
 	 * produces the most overhead.
 	 */
+#ifdef CONFIG_IPIPE
+	flags = ipipe_critical_enter(NULL);
+	__ftrace_modify_code(&command);
+	ipipe_critical_exit(flags);
+#else  /* !CONFIG_IPIPE */
 	arch_ftrace_update_code(command);
+#endif /* !CONFIG_IPIPE */
 
 	ret = ftrace_arch_code_modify_post_process();
 	FTRACE_WARN_ON(ret);
@@ -4893,10 +4913,10 @@ static int ftrace_process_locs(struct module *mod,
 	 * reason to cause large interrupt latencies while we do it.
 	 */
 	if (!mod)
-		local_irq_save(flags);
+		flags = hard_local_irq_save();
 	ftrace_update_code(mod, start_pg);
 	if (!mod)
-		local_irq_restore(flags);
+		hard_local_irq_restore(flags);
 	ret = 0;
  out:
 	mutex_unlock(&ftrace_lock);
@@ -4995,9 +5015,11 @@ void __init ftrace_init(void)
 	unsigned long count, flags;
 	int ret;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save_notrace();
 	ret = ftrace_dyn_arch_init();
-	local_irq_restore(flags);
+	hard_local_irq_restore_notrace(flags);
+
+	/* ftrace_dyn_arch_init places the return code in addr */
 	if (ret)
 		goto failed;
 
@@ -5190,7 +5212,16 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 		}
 	} while_for_each_ftrace_op(op);
 out:
-	preempt_enable_notrace();
+#ifdef CONFIG_IPIPE
+	if (hard_irqs_disabled() || !__ipipe_root_p)
+		/*
+		 * Nothing urgent to schedule here. At latest the timer tick
+		 * will pick up whatever the tracing functions kicked off.
+		 */
+		preempt_enable_no_resched_notrace();
+	else
+#endif
+		preempt_enable_notrace();
 	trace_clear_recursion(bit);
 }
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 74b20e3ab8c6..fcce701018e3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2616,7 +2616,8 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
 static __always_inline int
 trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	unsigned int val = cpu_buffer->current_context;
+	unsigned long flags;
+	unsigned int val;
 	int bit;
 
 	if (in_interrupt()) {
@@ -2629,19 +2630,30 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 	} else
 		bit = RB_CTX_NORMAL;
 
-	if (unlikely(val & (1 << bit)))
+	flags = hard_local_irq_save();
+
+	val = cpu_buffer->current_context;
+	if (unlikely(val & (1 << bit))) {
+		hard_local_irq_restore(flags);
 		return 1;
+	}
 
 	val |= (1 << bit);
 	cpu_buffer->current_context = val;
 
+	hard_local_irq_restore(flags);
+
 	return 0;
 }
 
 static __always_inline void
 trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
 {
+	unsigned long flags;
+
+	flags = hard_local_irq_save();
 	cpu_buffer->current_context &= cpu_buffer->current_context - 1;
+	hard_local_irq_restore(flags);
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1a47a64d623f..b62c0dd3d340 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2164,8 +2164,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	/* Don't pollute graph traces with trace_vprintk internals */
 	pause_graph_tracing();
 
+	flags = hard_local_irq_save();
+
 	pc = preempt_count();
-	preempt_disable_notrace();
 
 	tbuffer = get_trace_buf();
 	if (!tbuffer) {
@@ -2178,7 +2179,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
 		goto out;
 
-	local_save_flags(flags);
 	size = sizeof(*entry) + sizeof(u32) * len;
 	buffer = tr->trace_buffer.buffer;
 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
@@ -2196,7 +2196,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	}
 
 out:
-	preempt_enable_notrace();
+	hard_local_irq_restore(flags);
 	unpause_graph_tracing();
 
 	return len;
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 0f06532a755b..51e15d61d333 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -95,7 +95,7 @@ u64 notrace trace_clock_global(void)
 	int this_cpu;
 	u64 now;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save_notrace();
 
 	this_cpu = raw_smp_processor_id();
 	now = sched_clock_cpu(this_cpu);
@@ -121,7 +121,7 @@ u64 notrace trace_clock_global(void)
 	arch_spin_unlock(&trace_clock_struct.lock);
 
  out:
-	local_irq_restore(flags);
+	hard_local_irq_restore_notrace(flags);
 
 	return now;
 }
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index fcd41a166405..2c44ecdff9c5 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -171,7 +171,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 	 * Need to use raw, since this must be called before the
 	 * recursive protection is performed.
 	 */
-	local_irq_save(flags);
+	flags = hard_local_irq_save();
 	cpu = raw_smp_processor_id();
 	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
 	disabled = atomic_inc_return(&data->disabled);
@@ -191,7 +191,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 	}
 
 	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
+	hard_local_irq_restore(flags);
 }
 
 static struct tracer_opt func_opts[] = {
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index e212ec4cfb4e..92f167b205c0 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -337,7 +337,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 	if (ftrace_graph_notrace_addr(trace->func))
 		return 1;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save_notrace();
 	cpu = raw_smp_processor_id();
 	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
 	disabled = atomic_inc_return(&data->disabled);
@@ -349,7 +349,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 	}
 
 	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
+	hard_local_irq_restore_notrace(flags);
 
 	return ret;
 }
@@ -419,7 +419,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
 	int cpu;
 	int pc;
 
-	local_irq_save(flags);
+	flags = hard_local_irq_save_notrace();
 	cpu = raw_smp_processor_id();
 	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
 	disabled = atomic_inc_return(&data->disabled);
@@ -428,7 +428,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
 		__trace_graph_return(tr, trace, flags, pc);
 	}
 	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
+	hard_local_irq_restore_notrace(flags);
 }
 
 void set_graph_array(struct trace_array *tr)
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index be3222b7d72e..3b831d83db8f 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -471,28 +471,28 @@ inline void print_irqtrace_events(struct task_struct *curr)
  */
 void trace_hardirqs_on(void)
 {
-	if (!preempt_trace() && irq_trace())
+	if (ipipe_root_p && !preempt_trace() && irq_trace())
 		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
 EXPORT_SYMBOL(trace_hardirqs_on);
 
 void trace_hardirqs_off(void)
 {
-	if (!preempt_trace() && irq_trace())
+	if (ipipe_root_p && !preempt_trace() && irq_trace())
 		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
 EXPORT_SYMBOL(trace_hardirqs_off);
 
 __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
 {
-	if (!preempt_trace() && irq_trace())
+	if (ipipe_root_p && !preempt_trace() && irq_trace())
 		stop_critical_timing(CALLER_ADDR0, caller_addr);
 }
 EXPORT_SYMBOL(trace_hardirqs_on_caller);
 
 __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
 {
-	if (!preempt_trace() && irq_trace())
+	if (ipipe_root_p && !preempt_trace() && irq_trace())
 		start_critical_timing(CALLER_ADDR0, caller_addr);
 }
 EXPORT_SYMBOL(trace_hardirqs_off_caller);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f0602beeba26..66d8a63b998f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -373,6 +373,7 @@ config MAGIC_SYSRQ
 	  keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
 	  unless you really know what this hack does.
 
+
 config MAGIC_SYSRQ_DEFAULT_ENABLE
 	hex "Enable magic SysRq key functions by default"
 	depends on MAGIC_SYSRQ
@@ -382,6 +383,8 @@ config MAGIC_SYSRQ_DEFAULT_ENABLE
 	  This may be set to 1 or 0 to enable or disable them all, or
 	  to a bitmask as described in Documentation/sysrq.txt.
 
+source "kernel/ipipe/Kconfig.debug"
+
 config DEBUG_KERNEL
 	bool "Kernel debugging"
 	help
@@ -654,7 +657,7 @@ config HAVE_DEBUG_STACKOVERFLOW
 
 config DEBUG_STACKOVERFLOW
 	bool "Check for stack overflows"
-	depends on DEBUG_KERNEL && HAVE_DEBUG_STACKOVERFLOW
+	depends on DEBUG_KERNEL && HAVE_DEBUG_STACKOVERFLOW && !IPIPE_LEGACY
 	---help---
 	  Say Y here if you want to check for overflows of kernel, IRQ
 	  and exception stacks (if your architecture uses them). This
@@ -1202,7 +1205,7 @@ config DEBUG_CREDENTIALS
 menu "RCU Debugging"
 
 config PROVE_RCU
-	def_bool PROVE_LOCKING
+	def_bool PROVE_LOCKING && !IPIPE
 
 config PROVE_RCU_REPEATEDLY
 	bool "RCU debugging: don't disable PROVE_RCU on first splat"
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 2886ebac6567..0d2c4dd484f6 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -29,15 +29,15 @@
  * Ensure each lock is in a separate cacheline.
  */
 static union {
-	raw_spinlock_t lock;
+	ipipe_spinlock_t lock;
 	char pad[L1_CACHE_BYTES];
 } atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = {
 	[0 ... (NR_LOCKS - 1)] = {
-		.lock =  __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock),
+		.lock =  IPIPE_SPIN_LOCK_UNLOCKED,
 	},
 };
 
-static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
+static inline ipipe_spinlock_t *lock_addr(const atomic64_t *v)
 {
 	unsigned long addr = (unsigned long) v;
 
@@ -49,7 +49,7 @@ static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
 long long atomic64_read(const atomic64_t *v)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	ipipe_spinlock_t *lock = lock_addr(v);
 	long long val;
 
 	raw_spin_lock_irqsave(lock, flags);
@@ -62,7 +62,7 @@ EXPORT_SYMBOL(atomic64_read);
 void atomic64_set(atomic64_t *v, long long i)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	ipipe_spinlock_t *lock = lock_addr(v);
 
 	raw_spin_lock_irqsave(lock, flags);
 	v->counter = i;
@@ -74,7 +74,7 @@ EXPORT_SYMBOL(atomic64_set);
 void atomic64_##op(long long a, atomic64_t *v)				\
 {									\
 	unsigned long flags;						\
-	raw_spinlock_t *lock = lock_addr(v);				\
+	ipipe_spinlock_t *lock = lock_addr(v);				\
 									\
 	raw_spin_lock_irqsave(lock, flags);				\
 	v->counter c_op a;						\
@@ -86,7 +86,7 @@ EXPORT_SYMBOL(atomic64_##op);
 long long atomic64_##op##_return(long long a, atomic64_t *v)		\
 {									\
 	unsigned long flags;						\
-	raw_spinlock_t *lock = lock_addr(v);				\
+	ipipe_spinlock_t *lock = lock_addr(v);				\
 	long long val;							\
 									\
 	raw_spin_lock_irqsave(lock, flags);				\
@@ -113,7 +113,7 @@ ATOMIC64_OP(xor, ^=)
 long long atomic64_dec_if_positive(atomic64_t *v)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	ipipe_spinlock_t *lock = lock_addr(v);
 	long long val;
 
 	raw_spin_lock_irqsave(lock, flags);
@@ -128,7 +128,7 @@ EXPORT_SYMBOL(atomic64_dec_if_positive);
 long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	ipipe_spinlock_t *lock = lock_addr(v);
 	long long val;
 
 	raw_spin_lock_irqsave(lock, flags);
@@ -143,7 +143,7 @@ EXPORT_SYMBOL(atomic64_cmpxchg);
 long long atomic64_xchg(atomic64_t *v, long long new)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	ipipe_spinlock_t *lock = lock_addr(v);
 	long long val;
 
 	raw_spin_lock_irqsave(lock, flags);
@@ -157,7 +157,7 @@ EXPORT_SYMBOL(atomic64_xchg);
 int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	ipipe_spinlock_t *lock = lock_addr(v);
 	int ret = 0;
 
 	raw_spin_lock_irqsave(lock, flags);
diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c
index f8e0e5367398..02175aa39f08 100644
--- a/lib/bust_spinlocks.c
+++ b/lib/bust_spinlocks.c
@@ -14,6 +14,7 @@
 #include <linux/wait.h>
 #include <linux/vt_kern.h>
 #include <linux/console.h>
+#include <linux/ipipe_trace.h>
 
 
 void __attribute__((weak)) bust_spinlocks(int yes)
@@ -25,6 +26,7 @@ void __attribute__((weak)) bust_spinlocks(int yes)
 		unblank_screen();
 #endif
 		console_unblank();
+  		ipipe_trace_panic_dump();
 		if (--oops_in_progress == 0)
 			wake_up_klogd();
 	}
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index c30d07e99dba..6757350a4ecb 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/atomic.h>
+#include <linux/ipipe.h>
 
 static void __dump_stack(void)
 {
@@ -23,6 +24,29 @@ static void __dump_stack(void)
 #ifdef CONFIG_SMP
 static atomic_t dump_lock = ATOMIC_INIT(-1);
 
+static unsigned long disable_local_irqs(void)
+{
+	unsigned long flags = 0; /* only to trick the UMR detection */
+
+	/*
+	 * We neither need nor want to disable root stage IRQs over
+	 * the head stage, where CPU migration can't
+	 * happen. Conversely, we neither need nor want to disable
+	 * hard IRQs from the head stage, so that latency won't
+	 * skyrocket as a result of dumping the stack backtrace.
+	 */
+	if (ipipe_root_p)
+		local_irq_save(flags);
+
+	return flags;
+}
+
+static void restore_local_irqs(unsigned long flags)
+{
+	if (ipipe_root_p)
+		local_irq_restore(flags);
+}
+
 asmlinkage __visible void dump_stack(void)
 {
 	unsigned long flags;
@@ -35,7 +59,7 @@ asmlinkage __visible void dump_stack(void)
 	 * against other CPUs
 	 */
 retry:
-	local_irq_save(flags);
+	flags = disable_local_irqs();
 	cpu = smp_processor_id();
 	old = atomic_cmpxchg(&dump_lock, -1, cpu);
 	if (old == -1) {
@@ -43,7 +67,7 @@ retry:
 	} else if (old == cpu) {
 		was_locked = 1;
 	} else {
-		local_irq_restore(flags);
+		restore_local_irqs(flags);
 		cpu_relax();
 		goto retry;
 	}
@@ -53,7 +77,7 @@ retry:
 	if (!was_locked)
 		atomic_set(&dump_lock, -1);
 
-	local_irq_restore(flags);
+	restore_local_irqs(flags);
 }
 #else
 asmlinkage __visible void dump_stack(void)
diff --git a/lib/ioremap.c b/lib/ioremap.c
index b9462037868d..ba5503bb2ff6 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -10,6 +10,7 @@
 #include <linux/sched.h>
 #include <linux/io.h>
 #include <linux/export.h>
+#include <linux/hardirq.h>
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 
@@ -142,7 +143,12 @@ int ioremap_page_range(unsigned long addr,
 			break;
 	} while (pgd++, addr = next, addr != end);
 
-	flush_cache_vmap(start, end);
+	/* APEI may invoke this for temporarily remapping pages in interrupt
+	 * context - nothing we can and need to propagate globally. */
+	if (!in_interrupt()) {
+		__ipipe_pin_mapping_globally(start, end);
+		flush_cache_vmap(start, end);
+	}
 
 	return err;
 }
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 1afec32de6f2..f7c1a2ac57fe 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -12,6 +12,12 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
 {
 	int this_cpu = raw_smp_processor_id();
 
+	if (hard_irqs_disabled())
+		goto out;
+
+	if (!ipipe_root_p)
+		goto out;
+
 	if (likely(preempt_count()))
 		goto out;
 
diff --git a/mm/memory.c b/mm/memory.c
index 5aee9ec8b8c6..2f4f404ee6f4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -121,6 +121,11 @@ unsigned long highest_memmap_pfn __read_mostly;
 
 EXPORT_SYMBOL(zero_pfn);
 
+static inline void cow_user_page(struct page *dst,
+				 struct page *src,
+				 unsigned long va,
+				 struct vm_area_struct *vma);
+
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
  */
@@ -836,8 +841,8 @@ out:
 
 static inline unsigned long
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-		unsigned long addr, int *rss)
+	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+	     unsigned long addr, int *rss, struct page *uncow_page)
 {
 	unsigned long vm_flags = vma->vm_flags;
 	pte_t pte = *src_pte;
@@ -889,6 +894,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	 * in the parent and the child
 	 */
 	if (is_cow_mapping(vm_flags)) {
+#ifdef CONFIG_IPIPE
+		if (uncow_page) {
+			struct page *old_page = vm_normal_page(vma, addr, pte);
+			cow_user_page(uncow_page, old_page, addr, vma);
+			pte = mk_pte(uncow_page, vma->vm_page_prot);
+
+			if (vm_flags & VM_SHARED)
+				pte = pte_mkclean(pte);
+			pte = pte_mkold(pte);
+
+			page_add_new_anon_rmap(uncow_page, vma, addr);
+			rss[!!PageAnon(uncow_page)]++;
+			goto out_set_pte;
+		}
+#endif /* CONFIG_IPIPE */
 		ptep_set_wrprotect(src_mm, addr, src_pte);
 		pte = pte_wrprotect(pte);
 	}
@@ -926,13 +946,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	int progress = 0;
 	int rss[NR_MM_COUNTERS];
 	swp_entry_t entry = (swp_entry_t){0};
-
+	struct page *uncow_page = NULL;
+#ifdef CONFIG_IPIPE
+	int do_cow_break = 0;
 again:
+ 	if (do_cow_break) {
+ 		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+		if (uncow_page == NULL)
+ 			return -ENOMEM;
+		do_cow_break = 0;
+	}
+#else
+again:
+#endif
 	init_rss_vec(rss);
 
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
-	if (!dst_pte)
+	if (!dst_pte) {
+		if (uncow_page)
+			page_cache_release(uncow_page);
 		return -ENOMEM;
+	}
 	src_pte = pte_offset_map(src_pmd, addr);
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -955,8 +989,25 @@ again:
 			progress++;
 			continue;
 		}
+#ifdef CONFIG_IPIPE
+		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
+			if (is_cow_mapping(vma->vm_flags) &&
+			    test_bit(MMF_VM_PINNED, &src_mm->flags) &&
+			    ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
+				arch_leave_lazy_mmu_mode();
+				spin_unlock(src_ptl);
+				pte_unmap(src_pte);
+				add_mm_rss_vec(dst_mm, rss);
+				pte_unmap_unlock(dst_pte, dst_ptl);
+				cond_resched();
+				do_cow_break = 1;
+				goto again;
+			}
+		}
+#endif
 		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
-							vma, addr, rss);
+					 vma, addr, rss, uncow_page);
+		uncow_page = NULL;
 		if (entry.val)
 			break;
 		progress += 8;
@@ -3935,6 +3986,41 @@ void copy_user_huge_page(struct page *dst, struct page *src,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
+#ifdef CONFIG_IPIPE
+
+int __ipipe_disable_ondemand_mappings(struct task_struct *tsk)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	int result = 0;
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		return -EPERM;
+
+	down_write(&mm->mmap_sem);
+	if (test_bit(MMF_VM_PINNED, &mm->flags))
+		goto done_mm;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (is_cow_mapping(vma->vm_flags) &&
+		    (vma->vm_flags & VM_WRITE)) {
+			result = __ipipe_pin_vma(mm, vma);
+			if (result < 0)
+				goto done_mm;
+		}
+	}
+	set_bit(MMF_VM_PINNED, &mm->flags);
+
+  done_mm:
+	up_write(&mm->mmap_sem);
+	mmput(mm);
+	return result;
+}
+EXPORT_SYMBOL_GPL(__ipipe_disable_ondemand_mappings);
+
+#endif /* CONFIG_IPIPE */
+
 #if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS
 
 static struct kmem_cache *page_ptl_cachep;
diff --git a/mm/mlock.c b/mm/mlock.c
index 9d2e773f3a95..54a1cdb18fd1 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -794,3 +794,28 @@ void user_shm_unlock(size_t size, struct user_struct *user)
 	spin_unlock(&shmlock_user_lock);
 	free_uid(user);
 }
+
+#ifdef CONFIG_IPIPE
+int __ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	int ret, write, len;
+
+	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+		return 0;
+
+	if (!((vma->vm_flags & VM_DONTEXPAND) ||
+	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(mm))) {
+		ret = populate_vma_page_range(vma, vma->vm_start, vma->vm_end,
+					      NULL);
+		return ret < 0 ? ret : 0;
+	}
+
+	write = (vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE;
+	len = DIV_ROUND_UP(vma->vm_end, PAGE_SIZE) - vma->vm_start/PAGE_SIZE;
+	ret = get_user_pages(current, mm, vma->vm_start,
+			     len, write, 0, NULL, NULL);
+	if (ret < 0)
+		return ret;
+	return ret == len ? 0 : -EFAULT;
+}
+#endif
diff --git a/mm/mmap.c b/mm/mmap.c
index 3074dbcd9621..09fbc80300f8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -50,6 +50,10 @@
 
 #include "internal.h"
 
+#ifndef MMAP_BRK
+#define MMAP_BRK 0
+#endif
+
 #ifndef arch_mmap_check
 #define arch_mmap_check(addr, len, flags)	(0)
 #endif
@@ -2819,7 +2823,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
 
 	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
 
-	error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
+	error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED | MAP_BRK);
 	if (offset_in_page(error))
 		return error;
 
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 6f4d27c5bb32..7ee7ddfb8afc 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -21,15 +21,18 @@ void use_mm(struct mm_struct *mm)
 {
 	struct mm_struct *active_mm;
 	struct task_struct *tsk = current;
+	unsigned long flags;
 
 	task_lock(tsk);
 	active_mm = tsk->active_mm;
+ 	ipipe_mm_switch_protect(flags);
 	if (active_mm != mm) {
 		atomic_inc(&mm->mm_count);
 		tsk->active_mm = mm;
 	}
 	tsk->mm = mm;
-	switch_mm(active_mm, mm, tsk);
+	__switch_mm(active_mm, mm, tsk);
+ 	ipipe_mm_switch_unprotect(flags);
 	task_unlock(tsk);
 #ifdef finish_arch_post_lock_switch
 	finish_arch_post_lock_switch();
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a277f3412a5d..48abacccb3b0 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -66,7 +66,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *pte, oldpte;
 	spinlock_t *ptl;
-	unsigned long pages = 0;
+	unsigned long pages = 0, flags;
 
 	pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl);
 	if (!pte)
@@ -96,6 +96,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 					continue;
 			}
 
+			flags = hard_local_irq_save();
 			ptent = ptep_modify_prot_start(mm, addr, pte);
 			ptent = pte_modify(ptent, newprot);
 			if (preserve_write)
@@ -108,6 +109,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				ptent = pte_mkwrite(ptent);
 			}
 			ptep_modify_prot_commit(mm, addr, pte, ptent);
+			hard_local_irq_restore(flags);
 			pages++;
 		} else if (IS_ENABLED(CONFIG_MIGRATION)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -251,6 +253,12 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 		pages = hugetlb_change_protection(vma, start, end, newprot);
 	else
 		pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
+#ifdef CONFIG_IPIPE
+	if (test_bit(MMF_VM_PINNED, &vma->vm_mm->flags) &&
+	    ((vma->vm_flags | vma->vm_mm->def_flags) & VM_LOCKED) &&
+	    (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
+		__ipipe_pin_vma(vma->vm_mm, vma);
+#endif
 
 	return pages;
 }
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index de8e372ece04..8063f3f62381 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -200,6 +200,8 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
 			return err;
 	} while (pgd++, addr = next, addr != end);
 
+ 	__ipipe_pin_mapping_globally(start, end);
+ 
 	return nr;
 }
 
