hv: Change sched_event structure to resolve data race in event handling
Currently the sched event handling may encounter data race problem, and as a result some vcpus might be stalled forever. One example can be wbinvd handling where more than 1 vcpus are doing wbinvd concurrently. The following is a possible execution of 3 vcpus: ------- 0 1 2 req [Note: 0] req bit0 set [Note: 1] IPI -> 0 req bit2 set IPI -> 2 VMExit req bit2 cleared wait vcpu2 descheduled VMExit req bit0 cleared wait vcpu0 descheduled signal 0 event0->set=true wake 0 signal 2 event2->set=true [Note: 3] wake 2 vcpu2 scheduled event2->set=false resume req req bit0 set IPI -> 0 req bit1 set IPI -> 1 (doesn't matter) vcpu0 scheduled [Note: 4] signal 0 event0->set=true (no wake) [Note: 2] event0->set=false (the rest doesn't matter) resume Any VMExit req bit0 cleared wait idle running (blocked forever) Notes: 0: req: vcpu_make_request(vcpu, ACRN_REQUEST_WAIT_WBINVD). 1: req bit: Bit in pending_req_bits. Bit0 stands for bit for vcpu0. 2: In function signal_event, At this time the event->waiting_thread is not NULL, so wake_thread will not execute 3: eventX: struct sched_event of vcpuX. 4: In function wait_event, the lock does not strictly cover the execution between schedule() and event->set=false, so other threads may kick in. ----- As shown in above example, before the last random VMExit, vcpu0 ended up with request bit set but event->set==false, so blocked forever. This patch proposes to change event->set from a boolean variable to an integer. The semantic is very similar to a semaphore. The wait_event will add 1 to this value, and block when this value is > 0, whereas signal_event will decrease this value by 1. It may happen that this value was decreased to a negative number but that is OK. As long as the wait_event and signal_event are paired and program order is observed (that is, wait_event always happens-before signal_event on a single vcpu), this value will eventually be 0. Tracked-On: #6405 Signed-off-by: Yifan Liu <yifan1.liu@intel.com>
This commit is contained in:
parent
bbc92361bc
commit
d575edf79a
|
@ -11,7 +11,7 @@
|
|||
void init_event(struct sched_event *event)
|
||||
{
|
||||
spinlock_init(&event->lock);
|
||||
event->set = false;
|
||||
event->nqueued = 0;
|
||||
event->waiting_thread = NULL;
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@ void reset_event(struct sched_event *event)
|
|||
uint64_t rflag;
|
||||
|
||||
spinlock_irqsave_obtain(&event->lock, &rflag);
|
||||
event->set = false;
|
||||
event->nqueued = 0;
|
||||
event->waiting_thread = NULL;
|
||||
spinlock_irqrestore_release(&event->lock, rflag);
|
||||
}
|
||||
|
@ -33,13 +33,13 @@ void wait_event(struct sched_event *event)
|
|||
spinlock_irqsave_obtain(&event->lock, &rflag);
|
||||
ASSERT((event->waiting_thread == NULL), "only support exclusive waiting");
|
||||
event->waiting_thread = sched_get_current(get_pcpu_id());
|
||||
while (!event->set && (event->waiting_thread != NULL)) {
|
||||
event->nqueued++;
|
||||
while ((event->nqueued > 0) && (event->waiting_thread != NULL)) {
|
||||
sleep_thread(event->waiting_thread);
|
||||
spinlock_irqrestore_release(&event->lock, rflag);
|
||||
schedule();
|
||||
spinlock_irqsave_obtain(&event->lock, &rflag);
|
||||
}
|
||||
event->set = false;
|
||||
event->waiting_thread = NULL;
|
||||
spinlock_irqrestore_release(&event->lock, rflag);
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ void signal_event(struct sched_event *event)
|
|||
uint64_t rflag;
|
||||
|
||||
spinlock_irqsave_obtain(&event->lock, &rflag);
|
||||
event->set = true;
|
||||
event->nqueued--;
|
||||
if (event->waiting_thread != NULL) {
|
||||
wake_thread(event->waiting_thread);
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
struct sched_event {
|
||||
spinlock_t lock;
|
||||
bool set;
|
||||
int8_t nqueued;
|
||||
struct thread_object* waiting_thread;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue