#include "main.h"
#include "msg.h"
#include "mtime.h"
+#include "perf.h"
#include "generic.h"
printf ("Sending ping...\n");
+ int fd0 = cachemiss_init (0);
+ int fd1 = cachemiss_init (1);
+ int fd2 = cachemiss_init (2);
+ int fd3 = cachemiss_init (3);
+ int fd4 = cachemiss_init (4);
+ int fd5 = cachemiss_init (5);
for (int i = -1; i < nb_measurements; i++) {
usleep (TIMER / 10);
}
+ fprintf (stdout, "ping cache miss ratio: %.1f%%\n",
+ cachemiss_end (fd1) / (double)cachemiss_end (fd0) * 100.);
+ fprintf (stdout, "ping cache L1D miss ratio: %.1f%%\n",
+ cachemiss_end (fd3) / (double)cachemiss_end (fd2) * 100.);
+ fprintf (stdout, "ping cache L1I miss ratio: %.1f%%\n",
+ cachemiss_end (fd5) / (double)cachemiss_end (fd4) * 100.);
+
/* close communication between process */
if (current_mode == 1) {
printf ("Receiving pong...\n");
+ int fd0 = cachemiss_init (0);
+ int fd1 = cachemiss_init (1);
+ int fd2 = cachemiss_init (2);
+ int fd3 = cachemiss_init (3);
+ int fd4 = cachemiss_init (4);
+ int fd5 = cachemiss_init (5);
+
for (int i = -1; i < nb_measurements; i++) {
usleep (TIMER);
}
}
+ fprintf (stdout, "pong cache miss ratio: %.1f%%\n",
+ cachemiss_end (fd1) / (double)cachemiss_end (fd0) * 100.);
+ fprintf (stdout, "pong cache L1D miss ratio: %.1f%%\n",
+ cachemiss_end (fd3) / (double)cachemiss_end (fd2) * 100.);
+ fprintf (stdout, "pong cache L1I miss ratio: %.1f%%\n",
+ cachemiss_end (fd5) / (double)cachemiss_end (fd4) * 100.);
+
/* close communication between process */
if (current_mode == 1) {
/* depend: */
/* cflags: */
-/* linker: generic.o main.o msg.o mtime.o stat.o -lm -lpthread -lrt */
+/* linker: generic.o main.o msg.o mtime.o perf.o stat.o -lm -lpthread -lrt */
#include <errno.h>
#include <fcntl.h>
--- /dev/null
+#define _GNU_SOURCE
+#include <asm/unistd.h>
+#include <linux/perf_event.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "perf.h"
+
+int cachemiss_init (int mode)
+{
+
+ int cpu = -1;
+ cpu_set_t cpu_mask = {0};
+ if (sched_getaffinity (0, sizeof (cpu_set_t), &cpu_mask) != 0) {
+ fprintf (stderr, "error: sched_getaffinity\n");
+ return -1;
+ }
+ for (int i = 0; i < CPU_COUNT (&cpu_mask); i++) {
+ if (CPU_ISSET (i, &cpu_mask)) {
+ if (cpu == -1) {
+ cpu = i;
+ } else {
+ cpu = -1;
+ break;
+ }
+ }
+ }
+
+ struct perf_event_attr pe = {0};
+ pe.size = sizeof (struct perf_event_attr);
+ switch (mode) {
+ case 0:
+ pe.type = PERF_TYPE_HARDWARE;
+ pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
+ break;
+ case 1:
+ pe.type = PERF_TYPE_HARDWARE;
+ pe.config = PERF_COUNT_HW_CACHE_MISSES;
+ break;
+ case 2:
+ pe.type = PERF_TYPE_HW_CACHE;
+ pe.config = PERF_COUNT_HW_CACHE_L1D |
+ PERF_COUNT_HW_CACHE_OP_READ << 8 |
+ PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16;
+ break;
+ case 3:
+ pe.type = PERF_TYPE_HW_CACHE;
+ pe.config = PERF_COUNT_HW_CACHE_L1D |
+ PERF_COUNT_HW_CACHE_OP_READ << 8 |
+ PERF_COUNT_HW_CACHE_RESULT_MISS << 16;
+ break;
+ case 4:
+ pe.type = PERF_TYPE_HW_CACHE;
+ pe.config = PERF_COUNT_HW_CACHE_L1I |
+ PERF_COUNT_HW_CACHE_OP_READ << 8 |
+ PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16;
+ break;
+ case 5:
+ pe.type = PERF_TYPE_HW_CACHE;
+ pe.config = PERF_COUNT_HW_CACHE_L1I |
+ PERF_COUNT_HW_CACHE_OP_READ << 8 |
+ PERF_COUNT_HW_CACHE_RESULT_MISS << 16;
+ break;
+ }
+ pe.disabled = 1;
+ pe.exclude_kernel = 1;
+ pe.exclude_hv = 1;
+
+ int fd = syscall(__NR_perf_event_open, &pe, 0, cpu, -1, 0);
+ if (fd == -1) {
+ fprintf (stderr, "Error opening leader %llx\n", pe.config);
+ return -1;
+ }
+
+ ioctl(fd, PERF_EVENT_IOC_RESET, 0);
+ ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
+
+ return fd;
+}
+
+long long int cachemiss_end (int fd)
+{
+
+ ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
+
+ long long int count = 0;
+ read(fd, &count, sizeof(long long int));
+
+ close (fd);
+
+ return count;
+}
--- /dev/null
+#ifndef __PERF_H__
+#define __PERF_H__
+
+int cachemiss_init (int mode);
+long long int cachemiss_end (int fd);
+
+#endif /* __PERF_H__ */
/* depend: */
/* cflags: */
-/* linker: generic.o main.o msg.o mtime.o stat.o -lm -lpthread -lrt */
+/* linker: generic.o main.o msg.o mtime.o perf.o stat.o -lm -lpthread -lrt */
+#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
int ping_test (char *buffer, int len)
{
+#if 0
+#ifdef __aarch64__ /* 64 bits */
+ int64_t instructions = 0;
+#else /* 32 bits */
+ int32_t instructions = 0;
+#endif
+ while (instructions < 10000000) {
+#define INST0 "add %[i], %[i], #1\n\t"
+#define INST1 INST0 INST0 INST0 INST0 INST0 INST0 INST0 INST0 \
+ INST0 INST0 INST0 INST0 INST0 INST0 INST0 INST0
+#define INST2 INST1 INST1 INST1 INST1 INST1 INST1 INST1 INST1 \
+ INST1 INST1 INST1 INST1 INST1 INST1 INST1 INST1
+#define INST3 INST2 INST2 INST2 INST2 INST2 INST2 INST2 INST2 \
+ INST2 INST2 INST2 INST2 INST2 INST2 INST2 INST2
+#define INST4 INST3 INST3 INST3 INST3 INST3 INST3 INST3 INST3 \
+ INST3 INST3 INST3 INST3 INST3 INST3 INST3 INST3
+ asm volatile (INST4 : [i] "+r" (instructions) :: "cc");
+ asm volatile (INST4 : [i] "+r" (instructions) :: "cc");
+ }
+#endif
return (write (fdout, buffer, len) == -1);
}
/* depend: */
/* cflags: */
-/* linker: generic.o main.o msg.o mtime.o stat.o -lm -lpthread -lrt */
+/* linker: generic.o main.o msg.o mtime.o perf.o stat.o -lm -lpthread -lrt */
#include <arpa/inet.h>
#include <netdb.h>
/* depend: */
/* cflags: */
-/* linker: main.o msg.o mtime.o stat.o -lm -lpthread -lrt */
+/* linker: main.o msg.o mtime.o stat.o perf.o -lm -lpthread -lrt */
#define __TESTNAME__
char *message = "UDP socket latency (1k)";
/* depend: */
/* cflags: */
-/* linker: main.o msg.o mtime.o stat.o -lm -lpthread -lrt */
+/* linker: main.o msg.o mtime.o perf.o stat.o -lm -lpthread -lrt */
#define __TESTNAME__
char *message = "UDP socket latency (8k)";