hv: Change sched_event structure to resolve data race in event handling

Currently the sched event handling may encounter data race problem, and
as a result some vcpus might be stalled forever.

One example can be wbinvd handling where more than 1 vcpus are doing
wbinvd concurrently. The following is a possible execution of 3 vcpus:
-------
0                            1                           2
                             req [Note: 0]
                             req bit0 set [Note: 1]
                             IPI -> 0
                             req bit2 set
                             IPI -> 2
                                                         VMExit
                                                         req bit2 cleared
                                                         wait
                                                         vcpu2 descheduled

VMExit
req bit0 cleared
wait
vcpu0 descheduled
                             signal 0
                             event0->set=true
                             wake 0

                             signal 2
                             event2->set=true [Note: 3]
                             wake 2
                                                         vcpu2 scheduled
                                                         event2->set=false
                                                         resume

                                                         req
                                                         req bit0 set
                                                         IPI -> 0
                                                         req bit1 set
                                                         IPI -> 1
                             (doesn't matter)
vcpu0 scheduled [Note: 4]
                                                         signal 0
                                                         event0->set=true
                                                         (no wake) [Note: 2]
event0->set=false                                        (the rest doesn't matter)
resume

Any VMExit
req bit0 cleared
wait
idle running

(blocked forever)

Notes:
0: req: vcpu_make_request(vcpu, ACRN_REQUEST_WAIT_WBINVD).
1: req bit: Bit in pending_req_bits. Bit0 stands for bit for vcpu0.
2: In function signal_event, At this time the event->waiting_thread
    is not NULL, so wake_thread will not execute
3: eventX: struct sched_event of vcpuX.
4: In function wait_event, the lock does not strictly cover the execution between
    schedule() and event->set=false, so other threads may kick in.
-----

As shown in above example, before the last random VMExit, vcpu0 ended up
with request bit set but event->set==false, so blocked forever.

This patch proposes to change event->set from a boolean variable to an
integer. The semantic is very similar to a semaphore. The wait_event
will add 1 to this value, and block when this value is > 0, whereas signal_event
will decrease this value by 1.

It may happen that this value was decreased to a negative number but that
is OK. As long as the wait_event and signal_event are paired and
program order is observed (that is, wait_event always happens-before signal_event
on a single vcpu), this value will eventually be 0.

Tracked-On: #6405
Signed-off-by: Yifan Liu <yifan1.liu@intel.com>
This commit is contained in:
Yifan Liu 2021-08-10 10:05:17 +08:00 committed by wenlingz
parent bbc92361bc
commit d575edf79a
2 changed files with 6 additions and 6 deletions

View File

@ -11,7 +11,7 @@
void init_event(struct sched_event *event) void init_event(struct sched_event *event)
{ {
spinlock_init(&event->lock); spinlock_init(&event->lock);
event->set = false; event->nqueued = 0;
event->waiting_thread = NULL; event->waiting_thread = NULL;
} }
@ -20,7 +20,7 @@ void reset_event(struct sched_event *event)
uint64_t rflag; uint64_t rflag;
spinlock_irqsave_obtain(&event->lock, &rflag); spinlock_irqsave_obtain(&event->lock, &rflag);
event->set = false; event->nqueued = 0;
event->waiting_thread = NULL; event->waiting_thread = NULL;
spinlock_irqrestore_release(&event->lock, rflag); spinlock_irqrestore_release(&event->lock, rflag);
} }
@ -33,13 +33,13 @@ void wait_event(struct sched_event *event)
spinlock_irqsave_obtain(&event->lock, &rflag); spinlock_irqsave_obtain(&event->lock, &rflag);
ASSERT((event->waiting_thread == NULL), "only support exclusive waiting"); ASSERT((event->waiting_thread == NULL), "only support exclusive waiting");
event->waiting_thread = sched_get_current(get_pcpu_id()); event->waiting_thread = sched_get_current(get_pcpu_id());
while (!event->set && (event->waiting_thread != NULL)) { event->nqueued++;
while ((event->nqueued > 0) && (event->waiting_thread != NULL)) {
sleep_thread(event->waiting_thread); sleep_thread(event->waiting_thread);
spinlock_irqrestore_release(&event->lock, rflag); spinlock_irqrestore_release(&event->lock, rflag);
schedule(); schedule();
spinlock_irqsave_obtain(&event->lock, &rflag); spinlock_irqsave_obtain(&event->lock, &rflag);
} }
event->set = false;
event->waiting_thread = NULL; event->waiting_thread = NULL;
spinlock_irqrestore_release(&event->lock, rflag); spinlock_irqrestore_release(&event->lock, rflag);
} }
@ -49,7 +49,7 @@ void signal_event(struct sched_event *event)
uint64_t rflag; uint64_t rflag;
spinlock_irqsave_obtain(&event->lock, &rflag); spinlock_irqsave_obtain(&event->lock, &rflag);
event->set = true; event->nqueued--;
if (event->waiting_thread != NULL) { if (event->waiting_thread != NULL) {
wake_thread(event->waiting_thread); wake_thread(event->waiting_thread);
} }

View File

@ -4,7 +4,7 @@
struct sched_event { struct sched_event {
spinlock_t lock; spinlock_t lock;
bool set; int8_t nqueued;
struct thread_object* waiting_thread; struct thread_object* waiting_thread;
}; };