From f29250c671727c43ccfbf8d0afe6b60231e6ce82 Mon Sep 17 00:00:00 2001 From: patacongo Date: Fri, 30 Mar 2007 00:49:11 +0000 Subject: [PATCH] Correct a race condition in the pthread join logic. Sometimes the join structure was being deallocated while it was still needed. git-svn-id: svn://svn.code.sf.net/p/nuttx/code/trunk@180 42af7a65-404d-4744-a932-0658087f49c3 --- ChangeLog | 3 + Documentation/NuttX.html | 3 + configs/ntosd-dm320/doc/test-result.txt | 140 ++++++++++++++---------- sched/pthread_completejoin.c | 114 +++++++++++-------- sched/pthread_detach.c | 4 +- sched/pthread_internal.h | 2 + sched/pthread_join.c | 12 +- 7 files changed, 165 insertions(+), 113 deletions(-) diff --git a/ChangeLog b/ChangeLog index 002a40b9d6..f2f36734c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -107,5 +107,8 @@ * Fixed a bug in the wait-for-message-queue-not-empty logic. * Added a test of timed mqueue operations; detected and corrected some mqueue errors. + * Identified and corrected a race condition associated with + pthread_join. In the failure condition, memory was being + deallocated while still in use. * Started m68322 diff --git a/Documentation/NuttX.html b/Documentation/NuttX.html index 3df09d3c41..b2e8bef41e 100644 --- a/Documentation/NuttX.html +++ b/Documentation/NuttX.html @@ -468,6 +468,9 @@ Other memory: * Fixed a bug in the wait-for-message-queue-not-empty logic. * Added a test of timed mqueue operations; detected and corrected some mqueue errors. + * Identified and corrected a race condition associated with + pthread_join. In the failure condition, memory was being + deallocated while still in use. * Started m68322 diff --git a/configs/ntosd-dm320/doc/test-result.txt b/configs/ntosd-dm320/doc/test-result.txt index 56fac5064a..9179df8c89 100644 --- a/configs/ntosd-dm320/doc/test-result.txt +++ b/configs/ntosd-dm320/doc/test-result.txt @@ -8,9 +8,9 @@ DM9000 work in 16 bus width TFTP from server 10.0.0.1; our IP address is 10.0.0.2 Filename 'nuttx.dm320'. Load address: 0x10 -Loading: ############################ +Loading: ############################# done -Bytes transferred = 141981 (22a9d hex) +Bytes transferred = 146570 (23c8a hex) Neuros Devboard > go 1008000 ## Starting application at 0x01008000 ... stdio_test: write fd=1 @@ -30,11 +30,11 @@ user_main: argv[4]="Arg4" End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 user_main: /dev/null test dev_null: Read 0 bytes from /dev/null @@ -43,11 +43,11 @@ dev_null: Wrote 1024 bytes to /dev/null End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 user_main: mutex test Initializing mutex @@ -60,11 +60,11 @@ Starting thread 2 End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 user_main: cancel test cancel_test: Test 1: Normal Cancelation @@ -115,11 +115,11 @@ cancel_test: PASS thread terminated with PTHREAD_CANCELED End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 user_main: semaphore test sem_test: Initializing semaphore to 0 @@ -153,11 +153,11 @@ poster_func: Thread 3 done End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 user_main: condition variable test cond_test: Initializing mutex @@ -181,11 +181,11 @@ cond_test: 0 times, the waiter was in an unexpected state when the signaler ran End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 user_main: timed wait test thread_waiter: Initializing mutex @@ -203,11 +203,11 @@ timedwait_test: waiter exited with result=12345678 End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 user_main: message queue test mqueue_test: Starting receiver @@ -247,29 +247,62 @@ mqueue_test: receiver has already terminated End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 + +user_main: timed message queue test +timedmqueue_test: Starting sender +sender_thread: Starting +sender_thread: mq_timedsend succeeded on msg 0 +sender_thread: mq_timedsend succeeded on msg 1 +sender_thread: mq_timedsend succeeded on msg 2 +sender_thread: mq_timedsend succeeded on msg 3 +sender_thread: mq_timedsend succeeded on msg 4 +sender_thread: mq_timedsend succeeded on msg 5 +sender_thread: mq_timedsend succeeded on msg 6 +sender_thread: mq_timedsend succeeded on msg 7 +sender_thread: mq_timedsend succeeded on msg 8 +timedmqueue_test: Waiting for sender to complete +sender_thread: mq_timedsend 9 timed out as expected +sender_thread: returning nerrors=0 +timedmqueue_test: Starting receiver +receiver_thread: Starting +receiver_thread: mq_timedreceive succeeded on msg 0 +receiver_thread: mq_timedreceive succeeded on msg 1 +receiver_thread: mq_timedreceive succeeded on msg 2 +receiver_thread: mq_timedreceive succeeded on msg 3 +receiver_thread: mq_timedreceive succeeded on msg 4 +receiver_thread: mq_timedreceive succeeded on msg 5 +receiver_thread: mq_timedreceive succeeded on msg 6 +receiver_thread: mq_timedreceive succeeded on msg 7 +receiver_thread: mq_timedreceive succeeded on msg 8 +timedmqueue_test: Waiting for sender to complete +receiver_thread: Receive 9 timed out as expected +receiver_thread: returning nerrors=0 +timedmqueue_test: Test complete + +End of test memory usage: +VARIABLE BEFORE AFTER +======== ======== ======== +arena fe0f10 fe0f10 +ordblks 2 2 +mxordblk fd99b0 fd99b0 +uordblks 53f0 53f0 +fordblks fdbb20 fdbb20 user_main: signal handler test sighand_test: Initializing semaphore to 0 sighand_test: Starting waiter task -sighand_test: Started waiter_main pid=16 +sighand_test: Started waiter_main pid=18 waiter_main: Waiter started waiter_main: Unmasking signal 17 waiter_main: Registering signal handler waiter_main: oact.sigaction=0 oact.sa_flags=0 oact.sa_mask=0 waiter_main: Waiting on semaphore -sighand_test: Signaling pid=16 with signo=17 sigvalue=42 -MALLOC(272)->207f0 -MALLOC(400)->20900 -MALLOC(656)->20a90 -MALLOC(1040)->20d20 -MALLOC(1040)->21130 -MALLOC(1040)->21540 -MALLOC(4112)->21950 +sighand_test: Signaling pid=18 with signo=17 sigvalue=42 wakeup_action: Received signal 17 wakeup_action: sival_int=42 wakeup_action: si_code=1 @@ -277,22 +310,15 @@ wakeup_action: ucontext=0 waiter_main: sem_wait() successfully interrupted by signal waiter_main: done sighand_test: done -FREE(21950) -FREE(20900) -FREE(20d20) -FREE(21130) -FREE(21540) -FREE(20a90) -FREE(207f0) End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 user_main: POSIX timer test timer_test: Initializing semaphore to 0 @@ -342,11 +368,11 @@ timer_test: done End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 user_main: round-robin scheduler test rr_test: Starting sieve1 thread @@ -365,11 +391,11 @@ rr_test: Done End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 user_main: barrier test barrier_test: Initializing barrier @@ -425,18 +451,18 @@ barrier_test: Thread 7 completed with result=0 End of test memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 Final memory usage: VARIABLE BEFORE AFTER ======== ======== ======== -arena fe1a50 fe1a50 +arena fe0f10 fe0f10 ordblks 2 2 -mxordblk fda4f0 fda4f0 +mxordblk fd99b0 fd99b0 uordblks 53f0 53f0 -fordblks fdc660 fdc660 +fordblks fdbb20 fdbb20 user_main: Exitting diff --git a/sched/pthread_completejoin.c b/sched/pthread_completejoin.c index 571bb68624..76b2e9f80a 100644 --- a/sched/pthread_completejoin.c +++ b/sched/pthread_completejoin.c @@ -65,16 +65,16 @@ ************************************************************/ /************************************************************ - * Function: pthread_destroyjoininfo + * Function: pthread_notifywaiters * * Description: - * Destroy a join_t structure. This must - * be done by the child thread at child thread destruction - * time. + * Notify all other threads waiting in phread join for this + * thread's exit data. This must be done by the child + * at child thread destruction time. * ************************************************************/ -static void pthread_destroyjoininfo(FAR join_t *pjoin) +static boolean pthread_notifywaiters(FAR join_t *pjoin) { int ntasks_waiting; int status; @@ -111,14 +111,9 @@ static void pthread_destroyjoininfo(FAR join_t *pjoin) */ (void)pthread_takesemaphore(&pjoin->data_sem); - (void)sem_destroy(&pjoin->data_sem); + return TRUE; } - - /* All of the joined threads have had received the exit value. - * Now we can destroy this thread's exit semaphore - */ - - (void)sem_destroy(&pjoin->exit_sem); + return FALSE; } /************************************************************ @@ -148,7 +143,6 @@ static void pthread_destroyjoininfo(FAR join_t *pjoin) int pthread_completejoin(pid_t pid, FAR void *exit_value) { FAR join_t *pjoin; - boolean detached = FALSE; dbg("process_id=%d exit_value=%p\n", pid, exit_value); @@ -164,48 +158,72 @@ int pthread_completejoin(pid_t pid, FAR void *exit_value) } else { - /* Has the thread been marked as detached? */ + boolean waiters; + + /* Save the return exit value in the thread structure. */ pjoin->terminated = TRUE; - detached = pjoin->detached; - if (detached) + pjoin->exit_value = exit_value; + + /* Notify waiters of the availability of the exit value */ + + waiters = pthread_notifywaiters(pjoin); + + /* If there are no waiters and if the thread is marked as detached. + * then discard the join information now. Otherwise, the pthread + * join logic will call pthread_destroyjoin() when all of the threads + * have sampled the exit value. + */ + + if (!waiters && pjoin->detached) { - dbg("Detaching\n"); - - /* If so, then remove the thread's structure from the private - * data set. After this point, no other thread can perform a join - * operation. - */ - - (void)pthread_removejoininfo(pid); - (void)pthread_givesemaphore(&g_join_semaphore); - - /* Destroy this thread data structure. */ - - pthread_destroyjoininfo(pjoin); - - /* Deallocate the join entry if it was detached. */ - - sched_free((FAR void*)pjoin); + pthread_destroyjoin(pjoin); } - /* No, then we can assume that some other thread is waiting for the join info */ + /* Giving the following semaphore will allow the waiters + * to call pthread_destroyjoin. + */ - else - { - /* Save the return exit value in the thread structure. */ - - pjoin->exit_value = exit_value; - - /* Destroy this thread data structure. */ - - pthread_destroyjoininfo(pjoin); - - /* pthread_join may now access the thread entry structure. */ - - (void)pthread_givesemaphore(&g_join_semaphore); - } + (void)pthread_givesemaphore(&g_join_semaphore); } return OK; } + +/************************************************************ + * Function: pthread_destroyjoin + * + * Description: + * This is called from pthread_completejoin if the join + * info was detached or from pthread_join when the last + * waiting thread has received the thread exit info. + * + * Or it may never be called if the join info was never + * detached or if no thread ever calls pthread_join. In + * case, there is a memory leak! + * + * Assumptions: + * The caller holds g_join_semaphore + * + ************************************************************/ + +void pthread_destroyjoin(FAR join_t *pjoin) +{ + int status; + + dbg("pjoin=0x%p\n", pjoin); + + /* Remove the join info from the set of joins */ + + (void)pthread_removejoininfo((pid_t)pjoin->thread); + + /* Destroy its semaphores */ + + (void)sem_destroy(&pjoin->data_sem); + (void)sem_destroy(&pjoin->exit_sem); + + /* And deallocate the pjoin structure */ + + sched_free(pjoin); +} + diff --git a/sched/pthread_detach.c b/sched/pthread_detach.c index ca3f732f40..a1a60acf53 100644 --- a/sched/pthread_detach.c +++ b/sched/pthread_detach.c @@ -109,9 +109,7 @@ int pthread_detach(pthread_t thread) { /* YES.. just remove the thread entry. */ - (void)pthread_removejoininfo((pid_t)thread); - sched_free(pjoin); - pjoin = NULL; + pthread_destroyjoin(pjoin); } else { diff --git a/sched/pthread_internal.h b/sched/pthread_internal.h index 34a2f518d1..ec7c5ed717 100644 --- a/sched/pthread_internal.h +++ b/sched/pthread_internal.h @@ -64,6 +64,7 @@ struct join_s { FAR struct join_s *next; /* Implements link list */ + ubyte crefs; /* Reference count */ boolean started; /* TRUE: pthread started. */ boolean detached; /* TRUE: pthread_detached'ed */ boolean terminated; /* TRUE: detach'ed+exit'ed */ @@ -115,6 +116,7 @@ extern "C" { EXTERN void weak_function pthread_initialize(void); EXTERN int pthread_completejoin(pid_t pid, FAR void *exit_value); +EXTERN void pthread_destroyjoin(FAR join_t *pjoin); EXTERN FAR join_t *pthread_findjoininfo(pid_t pid); EXTERN int pthread_givesemaphore(sem_t *sem); EXTERN FAR join_t *pthread_removejoininfo(pid_t pid); diff --git a/sched/pthread_join.c b/sched/pthread_join.c index 3819c1fa37..71ea7e9f03 100644 --- a/sched/pthread_join.c +++ b/sched/pthread_join.c @@ -181,6 +181,7 @@ int pthread_join(pthread_t thread, pthread_addr_t *pexit_value) */ sched_lock(); + pjoin->crefs++; (void)pthread_givesemaphore(&g_join_semaphore); /* Take the thread's join semaphore */ @@ -195,10 +196,6 @@ int pthread_join(pthread_t thread, pthread_addr_t *pexit_value) dbg("exit_value=0x%p\n", pjoin->exit_value); } - /* Then remove the thread entry. */ - - (void)pthread_removejoininfo((pid_t)thread); - /* Post the thread's join semaphore so that exitting thread * will know that we have received the data. */ @@ -211,7 +208,12 @@ int pthread_join(pthread_t thread, pthread_addr_t *pexit_value) /* Deallocate the thread entry */ - sched_free(pjoin); + (void)pthread_takesemaphore(&g_join_semaphore); + if (--pjoin->crefs <= 0) + { + (void)pthread_destroyjoin(pjoin); + } + (void)pthread_givesemaphore(&g_join_semaphore); ret = OK; }