Samples: Add SMP Pi
This sample showcases efficient utilization of SMP system with processing of independent resource-hungry workloads. With no cross-dependencies between workers and no usage of shared resources (during heavy-lifting itself) we may demonstrate almost linear scaling of efficiency. I.e. 2 cores do the same amount of calculations twice faster than only 1 core. 4 cores complete the same calculations 2 times faster than 2 cores. Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
This commit is contained in:
parent
265b6ff59e
commit
7c3498dc43
|
@ -25,6 +25,7 @@ Samples and Demos
|
|||
posix/*
|
||||
gui/*
|
||||
video/*
|
||||
smp/*
|
||||
|
||||
.. comment
|
||||
To add a new sample document, please use the template available under
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
.. _smp-samples:
|
||||
|
||||
Various SMP Samples
|
||||
###################
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
**/*
|
|
@ -0,0 +1,8 @@
|
|||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
cmake_minimum_required(VERSION 3.13.1)
|
||||
|
||||
include($ENV{ZEPHYR_BASE}/cmake/app/boilerplate.cmake NO_POLICY_SCOPE)
|
||||
project(smp_pi)
|
||||
|
||||
target_sources(app PRIVATE src/main.c)
|
|
@ -0,0 +1,46 @@
|
|||
.. _smp_pi:
|
||||
|
||||
SMP Pi
|
||||
###########
|
||||
|
||||
Overview
|
||||
********
|
||||
This sample application calculates Pi independently in many threads, and
|
||||
demonstrates the benefit of multiple execution units (CPU cores)
|
||||
when compute-intensive tasks can be run in parallel, with
|
||||
no cross-dependencies or shared resources.
|
||||
|
||||
By changing the value of CONFIG_MP_NUM_CPUS on SMP systems, you
|
||||
can see that using more cores takes almost linearly less time
|
||||
to complete the computational task.
|
||||
|
||||
You can also edit the sample source code to change the
|
||||
number of digits calculated (``DIGITS_NUM``), and the
|
||||
number of threads to use (``THREADS_NUM``).
|
||||
|
||||
Building and Running
|
||||
********************
|
||||
|
||||
This project outputs Pi values calculated by each thread and in the end total time
|
||||
required for all the calculation to be done. It can be built and executed
|
||||
on Synopsys ARC HSDK board as follows:
|
||||
|
||||
.. zephyr-app-commands::
|
||||
:zephyr-app: samples/smp_pi
|
||||
:host-os: unix
|
||||
:board: qemu_x86_64
|
||||
:goals: run
|
||||
:compact:
|
||||
|
||||
Sample Output
|
||||
=============
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
Calculate first 240 digits of Pi independently by 16 threads.
|
||||
Pi value calculated by thread #0: 3141592653589793238462643383279502884197...
|
||||
Pi value calculated by thread #1: 3141592653589793238462643383279502884197...
|
||||
...
|
||||
Pi value calculated by thread #14: 314159265358979323846264338327950288419...
|
||||
Pi value calculated by thread #15: 314159265358979323846264338327950288419...
|
||||
All 16 threads executed by 4 cores in 28 msec
|
|
@ -0,0 +1,5 @@
|
|||
# Allow worker threads to capture all resources
|
||||
CONFIG_MAIN_THREAD_PRIORITY=11
|
||||
|
||||
# Enable SMP
|
||||
CONFIG_SMP=y
|
|
@ -0,0 +1,18 @@
|
|||
sample:
|
||||
description: Calculation of Pi independently in
|
||||
a number of threads
|
||||
name: SMP Pi
|
||||
common:
|
||||
tags: introduction
|
||||
harness: console
|
||||
harness_config:
|
||||
type: multi_line
|
||||
ordered: yes
|
||||
regex:
|
||||
- "Calculate first [0-9]+ digits of Pi independently by [0-9]+ threads.(.*)"
|
||||
- "Pi value calculated by thread #[0-9]+: [0-9]+(.*)"
|
||||
- "All [0-9]+ threads executed by [0-9]+ cores in [0-9]+ msec(.*)"
|
||||
tests:
|
||||
sample.smp_pi:
|
||||
tags: introduction
|
||||
platform_whitelist: nsim_hs_smp qemu_x86_64
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Copyright (c) 2019 Synopsys, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <zephyr.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/* Amount of execution threads to create and run */
|
||||
#define THREADS_NUM 16
|
||||
|
||||
/*
|
||||
* Amount of digits of Pi to calculate, must be a multiple of 4,
|
||||
* as used algorythm spits 4 digits on every iteration.
|
||||
*/
|
||||
#define DIGITS_NUM 240
|
||||
|
||||
#define LENGTH ((DIGITS_NUM / 4) * 14)
|
||||
#define STACK_SIZE (LENGTH * sizeof(int) + 512)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#define CORES_NUM CONFIG_MP_NUM_CPUS
|
||||
#else
|
||||
#define CORES_NUM 1
|
||||
#endif
|
||||
|
||||
static K_THREAD_STACK_ARRAY_DEFINE(tstack, THREADS_NUM, STACK_SIZE);
|
||||
static struct k_thread tthread[THREADS_NUM];
|
||||
static char buffer[THREADS_NUM][DIGITS_NUM + 1];
|
||||
static atomic_t counter = THREADS_NUM;
|
||||
|
||||
void test_thread(void *arg1, void *arg2, void *arg3)
|
||||
{
|
||||
atomic_t *counter = (atomic_t *)arg1;
|
||||
char *buffer = (char *)arg2;
|
||||
|
||||
ARG_UNUSED(arg3);
|
||||
|
||||
/*
|
||||
* Adapted and improved (for random number of digits) version of Pi
|
||||
* calculation program initially proposed by Dik T. Winter as:
|
||||
* -------------------------------->8--------------------------------
|
||||
* int a=10000,b,c=2800,d,e,f[2801],g;main(){for(;b-c;)f[b++]=a/5;
|
||||
* for(;d=0,g=c*2;c-=14,printf("%.4d",e+d/a),e=d%a)for(b=c;d+=f[b]*a,
|
||||
* f[b]=d%--g,d/=g--,--b;d*=b);}
|
||||
* -------------------------------->8--------------------------------
|
||||
*/
|
||||
#define NEW_BASE 10000
|
||||
#define ARRAY_INIT 2000
|
||||
|
||||
int array[LENGTH + 1] = {};
|
||||
int carry = 0;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < LENGTH; i++)
|
||||
array[i] = ARRAY_INIT;
|
||||
|
||||
for (i = LENGTH; i > 0; i -= 14) {
|
||||
int sum = 0, value;
|
||||
|
||||
for (j = i; j > 0; --j) {
|
||||
sum = sum * j + NEW_BASE * array[j];
|
||||
array[j] = sum % (j * 2 - 1);
|
||||
sum /= j * 2 - 1;
|
||||
}
|
||||
|
||||
value = carry + sum / NEW_BASE;
|
||||
carry = sum % NEW_BASE;
|
||||
|
||||
/* Convert 4-digit int to string */
|
||||
sprintf(buffer, "%.4d", value);
|
||||
buffer += 4;
|
||||
}
|
||||
|
||||
atomic_dec(counter);
|
||||
}
|
||||
|
||||
void main(void)
|
||||
{
|
||||
u32_t start_time, stop_time, cycles_spent, nanoseconds_spent;
|
||||
int i;
|
||||
|
||||
printk("Calculate first %d digits of Pi independently by %d threads.\n",
|
||||
DIGITS_NUM, THREADS_NUM);
|
||||
|
||||
/* Capture initial time stamp */
|
||||
start_time = k_cycle_get_32();
|
||||
|
||||
for (i = 0; i < THREADS_NUM; i++) {
|
||||
k_thread_create(&tthread[i], tstack[i], STACK_SIZE,
|
||||
(k_thread_entry_t)test_thread,
|
||||
(void *)&counter, (void *)buffer[i], NULL,
|
||||
K_PRIO_COOP(10), 0, K_NO_WAIT);
|
||||
}
|
||||
|
||||
/* Wait for all workers to finish their calculations */
|
||||
while (counter)
|
||||
k_sleep(1);
|
||||
|
||||
/* Capture final time stamp */
|
||||
stop_time = k_cycle_get_32();
|
||||
|
||||
cycles_spent = stop_time - start_time;
|
||||
nanoseconds_spent = SYS_CLOCK_HW_CYCLES_TO_NS(cycles_spent);
|
||||
|
||||
for (i = 0; i < THREADS_NUM; i++)
|
||||
printk("Pi value calculated by thread #%d: %s\n", i, buffer[i]);
|
||||
|
||||
printk("All %d threads executed by %d cores in %d msec\n", THREADS_NUM,
|
||||
CORES_NUM, nanoseconds_spent / 1000 / 1000);
|
||||
}
|
Loading…
Reference in New Issue