diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index fa05770865c6..1071bf492e42 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -3091,19 +3091,18 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf, goto start_over; } } else { + /* Fill up the new registration node info */ + requested_offset_record->ts_reg_info.buf = buf; + requested_offset_record->ts_reg_info.cq_cb = cq_cb; + requested_offset_record->ts_reg_info.timestamp_kernel_addr = + (u64 *) ts_buff->user_buff_address + ts_offset; + requested_offset_record->cq_kernel_addr = + (u64 *) cq_cb->kernel_address + cq_offset; + requested_offset_record->cq_target_value = target_value; + spin_unlock_irqrestore(wait_list_lock, flags); } - /* Fill up the new registration node info */ - requested_offset_record->ts_reg_info.in_use = 1; - requested_offset_record->ts_reg_info.buf = buf; - requested_offset_record->ts_reg_info.cq_cb = cq_cb; - requested_offset_record->ts_reg_info.timestamp_kernel_addr = - (u64 *) ts_buff->user_buff_address + ts_offset; - requested_offset_record->cq_kernel_addr = - (u64 *) cq_cb->kernel_address + cq_offset; - requested_offset_record->cq_target_value = target_value; - *pend = requested_offset_record; dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n", @@ -3151,7 +3150,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, goto put_cq_cb; } - /* Find first available record */ + /* get ts buffer record */ rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset, cq_counters_offset, target_value, &interrupt->wait_list_lock, &pend); @@ -3199,7 +3198,19 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, * Note that we cannot have sorted list by target value, * in order to shorten the list pass loop, since * same list could have nodes for different cq counter handle. + * Note: + * Mark ts buff offset as in use here in the spinlock protection area + * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record + * before adding the node to the list. this scenario might happen when + * multiple threads are racing on same offset and one thread could + * set the ts buff in ts_buff_get_kernel_ts_record then the other thread + * takes over and get to ts_buff_get_kernel_ts_record and then we will try + * to re-use the same ts buff offset, and will try to delete a non existing + * node from the list. */ + if (register_ts_record) + pend->ts_reg_info.in_use = 1; + list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);