[Xenomai] Xenomai 3 Multi-core Semaphore latency
Singh, Raman
rasingh at mitre.org
Fri May 18 18:24:47 CEST 2018
Environment: ARM Cortex-A53 quad-core processor (ARM 64-bit) on a
Zynq Ultrascale+ ZCU102 dev board, Xenomai 3 next branch from May
14, 2018 (SHA1: 410a4cc1109ba4e0d05b7ece7b4a5210287e1183 ),
Cobalt configuration with POSIX skin, Linux Kernel version 4.9.24
I've been having issues with semaphore latency when threads access
semaphores while executing on different cores. When both threads accessing
a semaphore execute on the same processor core, the latency between
one thread posting a semaphore and another waking up after waiting on it
is fairly small. However, as soon as one of the threads is moved to a
different core, the latency between a semaphore post from one thread to a
waiting thread waking up in response starts to become large enough to
affect real time performance. The latencies I've been seeing are on the order
of 100's of milliseconds.
The code below demonstrates the issue. It creates two threads. One thread
calls sem_post() and sleeps for a millisecond for each iteration of a loop
for 1000 iterations. The other thread performs a blocking wait on the
semaphore in a loop for the same amount of iterations. Both threads print
out their operation (either sem_post() or sem_wait()) along with a
timestamp and loop iteration count. The expectation is that the printouts
that result should alternate between posts and waits and this is in fact
what happens when both threads run on the same core. The timestamps show
that the latency between a post and the second thread waking up is pretty
low.
When the threads are moved to separate cores, the pattern of the printouts
changes. Instead of alternating posts followed by waits, there are
multiple posts in a row followed by an equal number of waits in a row. The
timestamps show significant latency between the first call to sem_post()
in a sequence and the corresponding first sem_wait() call that returns
after blocking.
I've also tried this test code using standard Linux primitives, and in
both cases, the printouts come out the same with alternating posts and
waits. Additionally, this multi-core issue does not occur on an older ZC706
platform with Xenomai 2.6.x.
With regard to the executable generated from the code below, with no
arguments provided it will run with both threads on the same core. If
any argument(s) are provided, it will run with the threads on separate
cores.
Any help with this issue would be appreciated.
Thanks,
Raman
#include <cstdlib>
#include <errno.h>
#include <stdio.h>
#include <pthread.h>
#include <semaphore.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
static sem_t semaphore;
void* producer_thread(void* arg)
{
int count = 0;
struct timespec current_time;
while(count < 1000)
{
sem_post(&semaphore);
++count;
if(clock_gettime(CLOCK_REALTIME, ¤t_time) == -1)
{
printf("Failed to get current time.\n");
return 0;
}
printf("sem_post:%lld.%.9ld, count = %d\n",
static_cast<long long>(current_time.tv_sec), current_time.tv_nsec, count);
usleep(1000);
}
return 0;
}
void* consumer_thread(void* arg)
{
int count = 0;
struct timespec current_time;
while(count < 1000)
{
sem_wait(&semaphore);
++count;
if(clock_gettime(CLOCK_REALTIME, ¤t_time) == -1)
{
printf("Failed to get current time.\n");
return 0;
}
printf("sem_wait:%lld.%.9ld, count = %d\n",
static_cast<long long>(current_time.tv_sec), current_time.tv_nsec, count);
}
return 0;
}
int main(int argc, char *argv[])
{
bool same_core = true;
if(argc > 1) same_core = false;
if(sem_init(&semaphore, 0, 0) == -1)
{
printf("Failed to initialize semaphore. Error: %s\n.", strerror(errno));
return -1;
}
int producer_affinity = 0;
int consumer_affinity = 0;
if(!same_core) consumer_affinity = 1;
int consumer_priority = 86;
int producer_priority = 99;
pthread_attr_t attr;
if(pthread_attr_init(&attr))
{
printf("Failed to initialize attributes.\n");
return -1;
}
if(pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED))
{
printf("Failed to disable scheduler inheritance.\n");
return -1;
}
if(pthread_attr_setschedpolicy(&attr, SCHED_FIFO))
{
printf("Failed to set scheduler to SCHED_FIFO.\n");
return -1;
}
sched_param param;
if(!pthread_attr_getschedparam(&attr, ¶m))
{
param.sched_priority = consumer_priority;
if(pthread_attr_setschedparam(&attr, ¶m))
{
printf("Unable to set thread priority.\n");
return -1;
}
}
else
{
printf("Unable to get params for setting priority.\n");
return -1;
}
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
CPU_SET(consumer_affinity, &cpu_set);
if(pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpu_set))
{
printf("Unable to set CPU affinity.\n");
return -1;
}
pthread_t consumer;
if(pthread_create(&consumer, &attr, consumer_thread, NULL))
{
printf("Unable to create consumer thread.\n");
return -1;
}
if(!pthread_attr_getschedparam(&attr, ¶m))
{
param.sched_priority = producer_priority;
if(pthread_attr_setschedparam(&attr, ¶m))
{
printf("Unable to set thread priority.\n");
return -1;
}
}
else
{
printf("Unable to get params for setting priority.\n");
return -1;
}
CPU_ZERO(&cpu_set);
CPU_SET(producer_affinity, &cpu_set);
if(pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpu_set))
{
printf("Unable to set CPU affinity.\n");
return -1;
}
pthread_t producer;
if(pthread_create(&producer, &attr, producer_thread, NULL))
{
printf("Unable to create producer thread.\n");
return -1;
}
pthread_attr_destroy(&attr);
pthread_join(producer, NULL);
pthread_join(consumer, NULL);
sem_destroy(&semaphore);
return 0;
}
More information about the Xenomai
mailing list