irq_work.c 4.49 KB
Newer Older
1 2 3 4 5 6 7
/*
 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 *
 * Provides a framework for enqueueing and running callbacks from hardirq
 * context. The enqueueing is NMI-safe.
 */

8
#include <linux/bug.h>
9
#include <linux/kernel.h>
10
#include <linux/export.h>
11
#include <linux/irq_work.h>
12
#include <linux/percpu.h>
13
#include <linux/hardirq.h>
14
#include <linux/irqflags.h>
15 16
#include <linux/sched.h>
#include <linux/tick.h>
17 18
#include <linux/cpu.h>
#include <linux/notifier.h>
19
#include <asm/processor.h>
20 21


22
static DEFINE_PER_CPU(struct llist_head, irq_work_list);
23
static DEFINE_PER_CPU(int, irq_work_raised);
24 25 26 27

/*
 * Claim the entry so that no one else will poke at it.
 */
28
static bool irq_work_claim(struct irq_work *work)
29
{
30
	unsigned long flags, oflags, nflags;
31

32 33 34 35 36
	/*
	 * Start with our best wish as a premise but only trust any
	 * flag value after cmpxchg() result.
	 */
	flags = work->flags & ~IRQ_WORK_PENDING;
37 38
	for (;;) {
		nflags = flags | IRQ_WORK_FLAGS;
39 40
		oflags = cmpxchg(&work->flags, flags, nflags);
		if (oflags == flags)
41
			break;
42 43 44
		if (oflags & IRQ_WORK_PENDING)
			return false;
		flags = oflags;
45 46
		cpu_relax();
	}
47 48 49 50 51 52 53 54 55 56 57 58

	return true;
}

void __weak arch_irq_work_raise(void)
{
	/*
	 * Lame architectures will get the timer tick callback
	 */
}

/*
59 60 61 62
 * Enqueue the irq_work @entry unless it's already pending
 * somewhere.
 *
 * Can be re-enqueued while the callback is still in progress.
63
 */
64
void irq_work_queue(struct irq_work *work)
65
{
66 67 68 69 70
	/* Only queue if not already pending */
	if (!irq_work_claim(work))
		return;

	/* Queue the entry and raise the IPI if needed. */
71
	preempt_disable();
72

73 74 75 76 77 78 79 80 81 82 83
	llist_add(&work->llnode, &__get_cpu_var(irq_work_list));

	/*
	 * If the work is not "lazy" or the tick is stopped, raise the irq
	 * work interrupt (if supported by the arch), otherwise, just wait
	 * for the next tick.
	 */
	if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
		if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
			arch_irq_work_raise();
	}
84

85
	preempt_enable();
86 87 88
}
EXPORT_SYMBOL_GPL(irq_work_queue);

89 90 91 92 93 94 95 96
bool irq_work_needs_cpu(void)
{
	struct llist_head *this_list;

	this_list = &__get_cpu_var(irq_work_list);
	if (llist_empty(this_list))
		return false;

97 98 99
	/* All work should have been flushed before going offline */
	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));

100 101 102
	return true;
}

103
static void __irq_work_run(void)
104
{
105
	unsigned long flags;
106 107 108
	struct irq_work *work;
	struct llist_head *this_list;
	struct llist_node *llnode;
109

110 111 112 113 114 115 116 117

	/*
	 * Reset the "raised" state right before we check the list because
	 * an NMI may enqueue after we find the list empty from the runner.
	 */
	__this_cpu_write(irq_work_raised, 0);
	barrier();

118 119
	this_list = &__get_cpu_var(irq_work_list);
	if (llist_empty(this_list))
120 121 122 123
		return;

	BUG_ON(!irqs_disabled());

124 125 126
	llnode = llist_del_all(this_list);
	while (llnode != NULL) {
		work = llist_entry(llnode, struct irq_work, llnode);
127

Peter Zijlstra's avatar
Peter Zijlstra committed
128
		llnode = llist_next(llnode);
129 130

		/*
131
		 * Clear the PENDING bit, after this point the @work
132
		 * can be re-used.
133 134 135
		 * Make it immediately visible so that other CPUs trying
		 * to claim that work don't rely on us to handle their data
		 * while we are in the middle of the func.
136
		 */
137 138 139
		flags = work->flags & ~IRQ_WORK_PENDING;
		xchg(&work->flags, flags);

140
		work->func(work);
141 142 143 144
		/*
		 * Clear the BUSY bit and return to the free state if
		 * no-one else claimed it meanwhile.
		 */
145
		(void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
146 147
	}
}
148 149 150 151 152 153 154 155 156 157

/*
 * Run the irq_work entries on this cpu. Requires to be ran from hardirq
 * context with local IRQs disabled.
 */
void irq_work_run(void)
{
	BUG_ON(!in_irq());
	__irq_work_run();
}
158 159 160 161 162 163
EXPORT_SYMBOL_GPL(irq_work_run);

/*
 * Synchronize against the irq_work @entry, ensures the entry is not
 * currently in use.
 */
164
void irq_work_sync(struct irq_work *work)
165 166 167
{
	WARN_ON_ONCE(irqs_disabled());

168
	while (work->flags & IRQ_WORK_BUSY)
169 170 171
		cpu_relax();
}
EXPORT_SYMBOL_GPL(irq_work_sync);
172 173 174 175 176 177 178 179 180

#ifdef CONFIG_HOTPLUG_CPU
static int irq_work_cpu_notify(struct notifier_block *self,
			       unsigned long action, void *hcpu)
{
	long cpu = (long)hcpu;

	switch (action) {
	case CPU_DYING:
Mauro Ribeiro's avatar
Mauro Ribeiro committed
181
	case CPU_DYING_FROZEN:
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
		/* Called from stop_machine */
		if (WARN_ON_ONCE(cpu != smp_processor_id()))
			break;
		__irq_work_run();
		break;
	default:
		break;
	}
	return NOTIFY_OK;
}

static struct notifier_block cpu_notify;

static __init int irq_work_init_cpu_notifier(void)
{
	cpu_notify.notifier_call = irq_work_cpu_notify;
	cpu_notify.priority = 0;
	register_cpu_notifier(&cpu_notify);
	return 0;
}
device_initcall(irq_work_init_cpu_notifier);

#endif /* CONFIG_HOTPLUG_CPU */