From 102a6357ca31898207767d917bff4b7346e570da Mon Sep 17 00:00:00 2001 From: "chao.an" Date: Thu, 9 Dec 2021 15:08:01 +0800 Subject: [PATCH] Revert "sched: Remove a redundant critical section" There is a potential problem that can lead to deadlock at condition wait, if the timeout of watchdog is a very small value (1 tick ?), the timer interrupt will come before the nxsem_wait() Revert "sched: pthread: Remove a redundant critical section in pthread_condclockwsait.c" Revert "sched: semaphore: Remove a redundant critical section in nxsem_clockwait()" Revert "sched: semaphore: Remove a redundant critical section in nxsem_tickwait()" This reverts commit 7758f3dcb1dcf61e9280aaba6cc98668e6c3bc65. This reverts commit 2976bb212e7e49d8c9c303f85fc3b83e07cd0010. This reverts commit 65dec5d10ac57abc9a15aadf076d693ab5208f69. Signed-off-by: chao.an --- sched/pthread/pthread_condclockwait.c | 50 ++++++++++++++++++++++----- sched/semaphore/sem_clockwait.c | 26 +++++++++----- sched/semaphore/sem_tickwait.c | 33 ++++++++++++++---- 3 files changed, 85 insertions(+), 24 deletions(-) diff --git a/sched/pthread/pthread_condclockwait.c b/sched/pthread/pthread_condclockwait.c index 865a40a1ac..8bc046ef0e 100644 --- a/sched/pthread/pthread_condclockwait.c +++ b/sched/pthread/pthread_condclockwait.c @@ -154,6 +154,7 @@ int pthread_cond_clockwait(FAR pthread_cond_t *cond, FAR const struct timespec *abstime) { FAR struct tcb_s *rtcb = this_task(); + irqstate_t flags; sclock_t ticks; int mypid = getpid(); int ret = OK; @@ -192,15 +193,14 @@ int pthread_cond_clockwait(FAR pthread_cond_t *cond, { sinfo("Give up mutex...\n"); - /* NOTE: We do not need a critical section here, - * because nxsem_wait() uses a critical section and - * pthread_condtimedout() is called from wd_timer() - * which uses a critical section for SMP case + /* We must disable pre-emption and interrupts here so that + * the time stays valid until the wait begins. This adds + * complexity because we assure that interrupts and + * pre-emption are re-enabled correctly. */ - /* REVISIT: Do we need to disable pre-emption? */ - sched_lock(); + flags = enter_critical_section(); /* Convert the timespec to clock ticks. We must disable pre- * emption here so that this time stays valid until the wait @@ -208,7 +208,15 @@ int pthread_cond_clockwait(FAR pthread_cond_t *cond, */ ret = clock_abstime2ticks(clockid, abstime, &ticks); - if (ret == 0) + if (ret) + { + /* Restore interrupts (pre-emption will be enabled when + * we fall through the if/then/else) + */ + + leave_critical_section(flags); + } + else { /* Check the absolute time to wait. If it is now or in the * past, then just return with the timedout condition. @@ -216,6 +224,12 @@ int pthread_cond_clockwait(FAR pthread_cond_t *cond, if (ticks <= 0) { + /* Restore interrupts and indicate that we have already + * timed out. (pre-emption will be enabled when we fall + * through the if/then/else + */ + + leave_critical_section(flags); ret = ETIMEDOUT; } else @@ -238,7 +252,15 @@ int pthread_cond_clockwait(FAR pthread_cond_t *cond, nlocks = mutex->nlocks; #endif ret = pthread_mutex_give(mutex); - if (ret == 0) + if (ret != 0) + { + /* Restore interrupts (pre-emption will be enabled + * when we fall through the if/then/else) + */ + + leave_critical_section(flags); + } + else { /* Start the watchdog */ @@ -277,6 +299,14 @@ int pthread_cond_clockwait(FAR pthread_cond_t *cond, ret = status; } } + + /* The interrupts stay disabled until after we sample + * the errno. This is because when debug is enabled + * and the console is used for debug output, then the + * errno can be altered by interrupt handling! (bad) + */ + + leave_critical_section(flags); } /* Reacquire the mutex (retaining the ret). */ @@ -302,7 +332,9 @@ int pthread_cond_clockwait(FAR pthread_cond_t *cond, } } - /* Re-enable pre-emption */ + /* Re-enable pre-emption (It is expected that interrupts + * have already been re-enabled in the above logic) + */ sched_unlock(); } diff --git a/sched/semaphore/sem_clockwait.c b/sched/semaphore/sem_clockwait.c index c27c110b87..0b0db1cb3a 100644 --- a/sched/semaphore/sem_clockwait.c +++ b/sched/semaphore/sem_clockwait.c @@ -92,6 +92,7 @@ int nxsem_clockwait(FAR sem_t *sem, clockid_t clockid, FAR const struct timespec *abstime) { FAR struct tcb_s *rtcb = this_task(); + irqstate_t flags; sclock_t ticks; int status; int ret = ERROR; @@ -109,11 +110,16 @@ int nxsem_clockwait(FAR sem_t *sem, clockid_t clockid, } #endif - /* NOTE: We do not need a critical section here, because - * nxsem_wait() and nxsem_timeout() use a critical section - * in the functions. + /* We will disable interrupts until we have completed the semaphore + * wait. We need to do this (as opposed to just disabling pre-emption) + * because there could be interrupt handlers that are asynchronously + * posting semaphores and to prevent race conditions with watchdog + * timeout. This is not too bad because interrupts will be re- + * enabled while we are blocked waiting for the semaphore. */ + flags = enter_critical_section(); + /* Try to take the semaphore without waiting. */ ret = nxsem_trywait(sem); @@ -121,7 +127,7 @@ int nxsem_clockwait(FAR sem_t *sem, clockid_t clockid, { /* We got it! */ - goto out; + goto success_with_irqdisabled; } /* We will have to wait for the semaphore. Make sure that we were provided @@ -131,7 +137,7 @@ int nxsem_clockwait(FAR sem_t *sem, clockid_t clockid, if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) { ret = -EINVAL; - goto out; + goto errout_with_irqdisabled; } /* Convert the timespec to clock ticks. We must have interrupts @@ -148,7 +154,7 @@ int nxsem_clockwait(FAR sem_t *sem, clockid_t clockid, if (status == OK && ticks <= 0) { ret = -ETIMEDOUT; - goto out; + goto errout_with_irqdisabled; } /* Handle any time-related errors */ @@ -156,7 +162,7 @@ int nxsem_clockwait(FAR sem_t *sem, clockid_t clockid, if (status != OK) { ret = -status; - goto out; + goto errout_with_irqdisabled; } /* Start the watchdog */ @@ -173,7 +179,11 @@ int nxsem_clockwait(FAR sem_t *sem, clockid_t clockid, wd_cancel(&rtcb->waitdog); -out: + /* We can now restore interrupts and delete the watchdog */ + +success_with_irqdisabled: +errout_with_irqdisabled: + leave_critical_section(flags); return ret; } diff --git a/sched/semaphore/sem_tickwait.c b/sched/semaphore/sem_tickwait.c index 7cc4e04287..9b0e1c1997 100644 --- a/sched/semaphore/sem_tickwait.c +++ b/sched/semaphore/sem_tickwait.c @@ -71,16 +71,22 @@ int nxsem_tickwait(FAR sem_t *sem, clock_t start, uint32_t delay) { FAR struct tcb_s *rtcb = this_task(); + irqstate_t flags; clock_t elapsed; int ret; DEBUGASSERT(sem != NULL && up_interrupt_context() == false); - /* NOTE: We do not need a critical section here, because - * nxsem_wait() and nxsem_timeout() use a critical section - * in the functions. + /* We will disable interrupts until we have completed the semaphore + * wait. We need to do this (as opposed to just disabling pre-emption) + * because there could be interrupt handlers that are asynchronously + * posting semaphores and to prevent race conditions with watchdog + * timeout. This is not too bad because interrupts will be re- + * enabled while we are blocked waiting for the semaphore. */ + flags = enter_critical_section(); + /* Try to take the semaphore without waiting. */ ret = nxsem_trywait(sem); @@ -88,7 +94,7 @@ int nxsem_tickwait(FAR sem_t *sem, clock_t start, uint32_t delay) { /* We got it! */ - goto out; + goto success_with_irqdisabled; } /* We will have to wait for the semaphore. Make sure that we were provided @@ -99,7 +105,7 @@ int nxsem_tickwait(FAR sem_t *sem, clock_t start, uint32_t delay) { /* Return the errno from nxsem_trywait() */ - goto out; + goto errout_with_irqdisabled; } /* Adjust the delay for any time since the delay was calculated */ @@ -108,7 +114,7 @@ int nxsem_tickwait(FAR sem_t *sem, clock_t start, uint32_t delay) if (/* elapsed >= (UINT32_MAX / 2) || */ elapsed >= delay) { ret = -ETIMEDOUT; - goto out; + goto errout_with_irqdisabled; } delay -= elapsed; @@ -125,8 +131,21 @@ int nxsem_tickwait(FAR sem_t *sem, clock_t start, uint32_t delay) wd_cancel(&rtcb->waitdog); -out: + if (ret < 0) + { + goto errout_with_irqdisabled; + } + /* We can now restore interrupts */ + + /* Success exits */ + +success_with_irqdisabled: + + /* Error exits */ + +errout_with_irqdisabled: + leave_critical_section(flags); return ret; }