summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrédéric Bonnard <frediz@debian.org>2021-10-21 12:37:09 +0200
committerFrédéric Bonnard <frediz@debian.org>2021-10-21 12:37:09 +0200
commit0e7a0e557588a919ed154bae52c5d35e6424cb36 (patch)
tree6f899beb5ea0cb5fd7be2e4a4b6d3331be32af61
parent1441481020f90911c7f59010e23f67676da6abb7 (diff)
parentca8679d9f8af343f03e464c58df42acb0de93638 (diff)
Update to upstream 1.2.1
[git-debrebase anchor: new upstream 1.2.1, merge]
-rw-r--r--.gitignore1
-rw-r--r--Changelog.md13
-rw-r--r--Doxyfile-html28
-rw-r--r--Doxyfile-man28
-rw-r--r--Makefile46
-rw-r--r--Makefile.rules6
-rw-r--r--Makefile.vars7
-rw-r--r--README.md24
-rw-r--r--afutests/afp/README.md59
-rw-r--r--afutests/afp/ocxl_afp3.c544
-rw-r--r--afutests/afp/ocxl_afp3.h48
-rw-r--r--afutests/afp/ocxl_afp3_latency.c530
-rw-r--r--afutests/memcpy/README.md34
-rw-r--r--afutests/memcpy/ocxl_memcpy.c847
-rw-r--r--afutests/reset/README.md25
-rwxr-xr-xafutests/reset/ocxl_reset_tests.sh150
-rw-r--r--src/afu.c4
-rw-r--r--src/internal.c5
-rw-r--r--src/irq.c4
-rw-r--r--src/mmio.c3
-rw-r--r--unittests/unittests.c6
-rw-r--r--unittests/virtocxl.c5
22 files changed, 2335 insertions, 82 deletions
diff --git a/.gitignore b/.gitignore
index 87667b3..cd346ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ src/libocxl_info.h
obj/*
testobj/*
sampleobj/*
+afuobj/
docs
cppcheck.xml
kernel/include/misc/ocxl.h
diff --git a/Changelog.md b/Changelog.md
index 85a28eb..43ae128 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -1,3 +1,16 @@
+# 1.2.1
+ - Set library version correctly
+ - Fix test build
+
+# 1.2.0
+This is mostly a bug fix release, there's no major new feature:
+ - Fix to support devices names with a hexadecimal domain name
+ - Allow to override path to read driver info (LIBOCXL_SYSPATH)
+ - Documentation enhancements
+ - Makefile/build enhancements
+ - Add tests for debug AFUs (memcpy, afp)
+
+
# 1.1.0
## User facing changes
- Requires Linux headers >= 4.18 to compile
diff --git a/Doxyfile-html b/Doxyfile-html
index 6a82d7e..fd23764 100644
--- a/Doxyfile-html
+++ b/Doxyfile-html
@@ -240,12 +240,6 @@ TAB_SIZE = 4
ALIASES =
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST =
-
# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
# only. Doxygen will then generate output that is more tailored for C. For
# instance, some of the names that are used will be different. The list of all
@@ -1082,13 +1076,6 @@ VERBATIM_HEADERS = YES
ALPHABETICAL_INDEX = YES
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX = 5
-
# In case all classes in a project start with a common prefix, all classes will
# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
# can be used to specify a prefix (or a list of prefixes) that should be ignored
@@ -2148,12 +2135,6 @@ EXTERNAL_GROUPS = YES
EXTERNAL_PAGES = YES
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of 'which perl').
-# The default file (with absolute path) is: /usr/bin/perl.
-
-PERL_PATH = /usr/bin/perl
-
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
@@ -2167,15 +2148,6 @@ PERL_PATH = /usr/bin/perl
CLASS_DIAGRAMS = YES
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see:
-# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH =
-
# You can include diagrams made with dia in doxygen documentation. Doxygen will
# then run dia to produce the diagram and insert it in the documentation. The
# DIA_PATH tag allows you to specify the directory where the dia binary resides.
diff --git a/Doxyfile-man b/Doxyfile-man
index 56d9c06..f3c33c8 100644
--- a/Doxyfile-man
+++ b/Doxyfile-man
@@ -240,12 +240,6 @@ TAB_SIZE = 4
ALIASES =
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST =
-
# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
# only. Doxygen will then generate output that is more tailored for C. For
# instance, some of the names that are used will be different. The list of all
@@ -1081,13 +1075,6 @@ VERBATIM_HEADERS = YES
ALPHABETICAL_INDEX = YES
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX = 5
-
# In case all classes in a project start with a common prefix, all classes will
# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
# can be used to specify a prefix (or a list of prefixes) that should be ignored
@@ -2147,12 +2134,6 @@ EXTERNAL_GROUPS = YES
EXTERNAL_PAGES = YES
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of 'which perl').
-# The default file (with absolute path) is: /usr/bin/perl.
-
-PERL_PATH = /usr/bin/perl
-
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
@@ -2166,15 +2147,6 @@ PERL_PATH = /usr/bin/perl
CLASS_DIAGRAMS = YES
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see:
-# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH =
-
# You can include diagrams made with dia in doxygen documentation. Doxygen will
# then run dia to produce the diagram and insert it in the documentation. The
# DIA_PATH tag allows you to specify the directory where the dia binary resides.
diff --git a/Makefile b/Makefile
index 48d3cae..352d4fc 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ include Makefile.vars
OBJS = obj/afu.o obj/internal.o obj/irq.o obj/mmio.o obj/setup.o
TEST_OBJS = testobj/afu.o testobj/internal.o testobj/irq.o testobj/mmio.o testobj/setup.o
-CFLAGS += -I src/include -I kernel/include -fPIC -D_FILE_OFFSET_BITS=64
+override CFLAGS += -I src/include -I kernel/include -fPIC -D_FILE_OFFSET_BITS=64
VERS_LIB = $(VERSION_MAJOR).$(VERSION_MINOR)
LIBNAME = libocxl.so.$(VERS_LIB)
@@ -12,24 +12,32 @@ SONAMEOPT = -Wl,-soname,$(LIBSONAME)
DOCDIR = docs
-all: check_ocxl_header obj/$(LIBSONAME) obj/libocxl.so obj/libocxl.a sampleobj/memcpy
+all: check_ocxl_header obj/$(LIBSONAME) obj/libocxl.so obj/libocxl.a \
+ sampleobj/memcpy afuobj/ocxl_memcpy afuobj/ocxl_afp3 \
+ afuobj/ocxl_afp3_latency afuobj/ocxl_reset_tests.sh
HAS_WGET = $(shell /bin/which wget > /dev/null 2>&1 && echo y || echo n)
HAS_CURL = $(shell /bin/which curl > /dev/null 2>&1 && echo y || echo n)
-# Update this to test a single feature from the most recent header we require:
-CHECK_OCXL_HEADER_IS_UP_TO_DATE = $(shell /bin/echo -e \\\#include $(1)\\\nvoid test\(struct ocxl_ioctl_features test\)\; | \
+# Update this to test a single feature from the most recent header we require.
+#
+# Note that a backward-incompatible change in make 4.3 modified the
+# handling \# in a function invocation, so we define the test code in
+# a separate variable to work around it and keep consistent behavior
+# across all versions of make
+TEST_CODE = '\#include <misc/ocxl.h>\nvoid test(struct ocxl_ioctl_features test);'
+CHECK_OCXL_HEADER_IS_UP_TO_DATE = $(shell /bin/echo -e $(TEST_CODE) | \
$(CC) $(CFLAGS) -Werror -x c -S -o /dev/null - > /dev/null 2>&1 && echo y || echo n)
check_ocxl_header:
-ifeq ($(call CHECK_OCXL_HEADER_IS_UP_TO_DATE,'<misc/ocxl.h>'),n)
+ifeq (${CHECK_OCXL_HEADER_IS_UP_TO_DATE},n)
mkdir -p kernel/include/misc
ifeq (${HAS_WGET},y)
- $(call Q,WGET kernel/include/misc/ocxl.h, wget -O kernel/include/misc/ocxl.h -q http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h)
+ $(call Q,WGET kernel/include/misc/ocxl.h, wget -O kernel/include/misc/ocxl.h -q https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h)
else ifeq (${HAS_CURL},y)
- $(call Q,CURL kernel/include/misc/ocxl.h, curl -o kernel/include/misc/ocxl.h -s http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h)
+ $(call Q,CURL kernel/include/misc/ocxl.h, curl -L -o kernel/include/misc/ocxl.h -s https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h)
else
- $(error 'ocxl.h is non-existant or out of date, Download from http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h and place in ${PWD}/kernel/include/misc/ocxl.h')
+ $(error 'ocxl.h is non-existant or out of date, Download from https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h and place in ${PWD}/kernel/include/misc/ocxl.h')
endif
endif
@@ -56,12 +64,28 @@ obj/libocxl.a: $(OBJS)
sampleobj/memcpy: sampleobj/memcpy.o-memcpy
$(call Q,CC, $(CC) $(CFLAGS) $(LDFLAGS) -o sampleobj/memcpy sampleobj/memcpy.o-memcpy obj/libocxl.a, sampleobj/memcpy)
+afuobj/ocxl_memcpy: afuobj/ocxl_memcpy.o-memcpy
+ $(call Q,CC, $(CC) $(CFLAGS) $(LDFLAGS) -o afuobj/ocxl_memcpy afuobj/ocxl_memcpy.o-memcpy obj/libocxl.a, afuobj/ocxl_memcpy)
+
+afuobj/ocxl_afp3: afuobj/ocxl_afp3.o-afp
+ $(call Q,CC, $(CC) $(CFLAGS) $(LDFLAGS) -o afuobj/ocxl_afp3 afuobj/ocxl_afp3.o-afp obj/libocxl.a, afuobj/ocxl_afp3)
+
+afuobj/ocxl_afp3_latency: afuobj/ocxl_afp3_latency.o-afp
+ $(call Q,CC, $(CC) $(CFLAGS) $(LDFLAGS) -o afuobj/ocxl_afp3_latency afuobj/ocxl_afp3_latency.o-afp obj/libocxl.a, afuobj/ocxl_afp3_latency)
+
+afuobj/ocxl_reset_tests.sh: afutests/reset/ocxl_reset_tests.sh
+ cp afutests/reset/ocxl_reset_tests.sh afuobj/ocxl_reset_tests.sh
+ chmod +x afuobj/ocxl_reset_tests.sh
+
testobj:
mkdir testobj
sampleobj:
mkdir sampleobj
+afuobj:
+ mkdir afuobj
+
testobj/libocxl.a: $(TEST_OBJS)
$(call Q,AR, $(AR) rcs testobj/libocxl-temp.a $(TEST_OBJS), testobj/libocxl-temp.a)
$(call Q,STATIC_SYMS, $(NM) testobj/libocxl-temp.a | grep ' t ' | grep -v __ | cut -d ' ' -f 3 > testobj/static-syms)
@@ -85,8 +109,8 @@ cppcheck:
cppcheck-xml:
cppcheck --enable=all -j 4 -q src/*.c src/include/libocxl.h --xml-version=2 2>cppcheck.xml
-precommit: clean all docs cppcheck
- astyle --style=linux --indent=tab=8 --max-code-length=120 src/*.c src/*.h src/include/*.h samples/*/*.c
+precommit: clean all docs cppcheck afutests
+ astyle --style=linux --indent=tab=8 --max-code-length=120 src/*.c src/*.h src/include/*.h samples/*/*.c afutests/*/*.c
$(call Q, SYMVER-CHECK, nm obj/$(LIBNAME) | grep ' t ocxl' && (echo "Symbols are missing from symver.map" && exit 1) || true)
docs:
@@ -97,7 +121,7 @@ docs:
$(call Q,DOCS-HTML, doxygen Doxyfile-html,)
clean:
- rm -rf obj testobj sampleobj docs src/libocxl_info.h
+ rm -rf obj testobj sampleobj afuobj docs src/libocxl_info.h
install: all docs
mkdir -p $(DESTDIR)$(libdir)
diff --git a/Makefile.rules b/Makefile.rules
index ab04e5d..60fb683 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -31,6 +31,12 @@ testobj/%.o-test : unittests/%.c testobj/libocxl.a | testobj
sampleobj/%.o-memcpy : samples/memcpy/%.c obj/libocxl.a | sampleobj
$(call Q,CC, $(CC) $(CPPFLAGS) $(SAMPLECFLAGS) -c -o $@ $<, $@)
+afuobj/%.o-memcpy : afutests/memcpy/%.c obj/libocxl.a | afuobj
+ $(call Q,CC, $(CC) $(CPPFLAGS) $(AFUTESTCFLAGS) -c -o $@ $<, $@)
+
+afuobj/%.o-afp : afutests/afp/%.c obj/libocxl.a | afuobj
+ $(call Q,CC, $(CC) $(CPPFLAGS) $(AFUTESTCFLAGS) -c -o $@ $<, $@)
+
datadir ?= $(PREFIX)/share
includedir ?= $(PREFIX)/include
mandir ?= $(datadir)/man
diff --git a/Makefile.vars b/Makefile.vars
index b0fe46e..212ed2e 100644
--- a/Makefile.vars
+++ b/Makefile.vars
@@ -6,10 +6,10 @@ MAKEFLAGS += -rR
VERSION_MAJOR = 1
# Change VERSION_MINOR on new features
-VERSION_MINOR = 1
+VERSION_MINOR = 2
# Change VERSION_PATCH on each tag
-VERSION_PATCH = 0
+VERSION_PATCH = 1
AR = $(CROSS_COMPILE)ar
AS = $(CROSS_COMPILE)as
@@ -19,4 +19,5 @@ NM = $(CROSS_COMPILE)nm
OBJCOPY = $(CROSS_COMPILE)objcopy
CFLAGS ?= -g -Wall -Wextra -O2 -m64 -std=c11 -DLIBOCXL_SUPPRESS_INACCESSIBLE_WARNINGS
TESTCFLAGS += $(CFLAGS) -O0 -DTEST_ENVIRONMENT=1 -I src -I testobj -pthread
-SAMPLECFLAGS += $(CFLAGS) -std=gnu11 -I src -I testobj -pthread \ No newline at end of file
+SAMPLECFLAGS += $(CFLAGS) -std=gnu11 -I src -I testobj -pthread
+AFUTESTCFLAGS += $(CFLAGS) -std=gnu11 -I src -I testobj -pthread
diff --git a/README.md b/README.md
index b0f71ab..6a4d209 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,17 @@ to the IRQ.
Functions are provide to allow 32 & 64 bit access to the global and per-PASID MMIO
areas on the the AFU. Endian conversion is handled automatically.
+## Installation
+LibOCXL is available in popular Linux distributions for PPC64le. To install:
+### Redhat
+`yum install libocxl-devel`
+
+### Fedora
+`dnf install libocxl-devel`
+
+### Ubuntu/Debian
+`apt install libocxl-dev`
+
# Building
## Prerequisites
1. A GCC toolchain with libc (if cross compiling), crosstool-ng can build a suitable toolchain
@@ -70,6 +81,14 @@ A typical use of libocxl will follow this pattern:
ocxl\_mmio\_read64().
10. **Termination:** ocxl\_afu\_close() will free all resources associated with an AFU handle.
+# Documentation
+
+API documentation is generated using [Doxygen](http://www.stack.nl/~dimitri/doxygen/) in both HTML and man page format.
+
+To build the documentation, run `make docs`.
+
+An online version of the libocxl HTML documentation can be found at [opencapi.github.io](https://opencapi.github.io).
+
# Development
The following environment variables may be set (to 1 or "YES") to assist with development:
@@ -79,6 +98,11 @@ The following environment variables may be set (to 1 or "YES") to assist with de
**LIBOCXL_VERBOSE_ERRORS_ALL** Force verbose errors to be emitted for any failed LibOCXL calls, unless explicitly disabled.
+
+For testing and/or assist in various environments, the following environment variable can also be set:
+
+**LIBOCXL_SYSPATH** Override the default path (/sys/class/ocxl) used by the library to read driver information.
+
Patches may be submitted via Github pull requests. Please prepare your patches
by running `make precommit` before committing your work, and addressing any warnings & errors reported.
Patches must compile cleanly with the latest stable version of GCC to be accepted.
diff --git a/afutests/afp/README.md b/afutests/afp/README.md
new file mode 100644
index 0000000..1c36ca5
--- /dev/null
+++ b/afutests/afp/README.md
@@ -0,0 +1,59 @@
+ocxl_afp3, ocxl_afp3_latency
+============================
+
+`ocxl_afp3` and 'ocxl_afp3_latency' are test programs for the OpenCAPI AFU IBM,AFP3 (used for
+development).
+
+Requirements
+------------
+
+The OpenCAPI card must be flashed with an IBM,AFP3 AFU image.
+
+Usage
+-----
+
+ $ ../../afuobj/ocxl_afp3
+
+```
+ Usage: ocxl_afp3 [ options ]
+ --tags_ld Number of tags for loads. Default=0
+ --tags_st Number of tags for stores. Default=0
+ 0 - 0 tags (disabled)
+ 1 - 1 tag
+ 2 - 2 tags
+ 3 - 4 tags
+ 4 - 16 tags
+ 5 - 64 tags
+ 6 - 256 tags
+ 7 - 512 tags
+ --size_ld Data size, in Bytes, for loads.
+ Supported values: 64, 128, 256. Default=128
+ --size_st Data size, in Bytes, for stores.
+ Supported values: 64, 128, 256. Default=128
+ --npu_ld Use rd_wnitc.n for loads. Default is rd_wnitc
+ --npu_st Use dma_w.n for stores. Default is dma_w
+ --num Number of times to check perf counts. Default is 3
+ --wait Amount of seconds to wait between perf count reads.
+ Default is 2
+ --prefetch Initialize buffer memory
+ --offsetmask Determines how much of buffer to use.
+ Default 512kB. Valid Range: 4K-512M.
+ Format: NumberLetter, e.g. 4K, 512K, 1M, 512M
+ --device Device to open instead of first AFP AFU found
+ --verbose Verbose output
+ --help Print this message
+```
+
+
+ $ ../../afuobj/ocxl_afp3_latency
+
+```
+ Usage: /home/fbarrat/capi/libocxl/afuobj/ocxl_afp3_latency [ options ]
+ -i 10000 --iterations Default=10000
+ -p 64 to 512 --pong Pong size from card to host (Bytes) Default=64
+ -x --extraread Add an DMA extraread before the DMA Wr Default is no
+ -f --forever Run until CTRL+C, Default=no
+ -d --device Device to open instead of first AFP AFU found
+ -v --verbose Verbose output
+ -h --help Print this message
+```
diff --git a/afutests/afp/ocxl_afp3.c b/afutests/afp/ocxl_afp3.c
new file mode 100644
index 0000000..e418b3f
--- /dev/null
+++ b/afutests/afp/ocxl_afp3.c
@@ -0,0 +1,544 @@
+/*
+ * Copyright 2018 International Business Machines
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unistd.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#include "libocxl.h"
+#include "ocxl_afp3.h"
+
+#define AFU_NAME "IBM,AFP3"
+
+#define CACHELINE_BYTES 128
+#define PAGE_BYTES 4096
+#define AFU_MMIO_REG_SIZE 0x4000000
+#define BUF_512MB 536870912
+#define BUF_4MB 4194304
+
+
+static int verbose;
+static int tags_ld = 0;
+static int tags_st = 7;
+static int size_ld = 128;
+static int size_st = 128;
+static int npu_ld = 0;
+static int npu_st = 0;
+static uint16_t numLoops = 3;
+static uint16_t waitTime = 2;
+static uint64_t offsetmask = 0x3FF; // Default to 4MB
+
+static uint64_t enableAfu = 0x8000000000000000;
+static uint64_t disableAfu = 0x0000000000000000;
+static uint64_t resetCnt = 0x4000000000000000;
+
+static void print_help(char *name)
+{
+ printf("Usage: %s [ options ]\n", name);
+ printf("\t--tags_ld \tNumber of tags for loads. Default=%d\n", tags_ld);
+ printf("\t--tags_st \tNumber of tags for stores. Default=%d\n", tags_st);
+ printf("\t \t 0 - 0 tags (disabled)\n");
+ printf("\t \t 1 - 1 tag\n");
+ printf("\t \t 2 - 2 tags\n");
+ printf("\t \t 3 - 4 tags\n");
+ printf("\t \t 4 - 16 tags\n");
+ printf("\t \t 5 - 64 tags\n");
+ printf("\t \t 6 - 256 tags\n");
+ printf("\t \t 7 - 512 tags\n");
+ printf("\t--size_ld \tData size, in Bytes, for loads. Supported values: 64, 128, 256. Default=%d\n", size_ld);
+ printf("\t--size_st \tData size, in Bytes, for stores. Supported values: 64, 128, 256. Default=%d\n", size_st);
+ printf("\t--npu_ld \tUse rd_wnitc.n for loads. Default is rd_wnitc\n");
+ printf("\t--npu_st \tUse dma_w.n for stores. Default is dma_w\n");
+ printf("\t--num \tNumber of times to check perf counts, default is %d\n", numLoops);
+ printf("\t--wait \tAmount of seconds to wait between perf count reads, default is %d\n", waitTime);
+ printf("\t--prefetch \tInitialize buffer memory\n");
+ printf("\t--offsetmask\tDetermines how much of buffer to use. Default 512kB. Valid Range: 4K-512M. Format: NumberLetter, e.g. 4K, 512K, 1M, 512M\n");
+ printf("\t--device \tDevice to open instead of first AFP AFU found\n");
+ printf("\t--verbose \tVerbose output\n");
+ printf("\t--help \tPrint this message\n");
+ printf("\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int opt;
+ int rc;
+ int size_enc_ld;
+ int size_enc_st;
+ int option_index = 0;
+ int prefetch = 0;
+ uint64_t pasid;
+ uint64_t wed_in = 0;
+ uint64_t *buffer;
+ ocxl_afu_h afu_h;
+ ocxl_err err;
+ ocxl_mmio_h global;
+ char *device = NULL;
+
+ // Parse parameters
+ static struct option long_options[] = {
+ {"tags_ld", required_argument, 0, 'a'},
+ {"tags_st", required_argument, 0, 'b'},
+ {"size_ld", required_argument, 0, 'y'},
+ {"size_st", required_argument, 0, 'z'},
+ {"num", required_argument, 0, 'n'},
+ {"wait", required_argument, 0, 'w'},
+ {"prefetch", no_argument, 0, 'p'},
+ {"offsetmask", required_argument, 0, 'o'},
+ {"verbose", no_argument, &verbose, 1 },
+ {"help", no_argument, 0, 'h'},
+ {"device", required_argument, 0, 'd'},
+ {NULL, 0, 0, 0}
+ };
+
+ while ((opt = getopt_long(argc, argv, "avhc:d:", long_options, &option_index)) >= 0) {
+ switch (opt) {
+ case 0:
+ case 'v':
+ break;
+ case 'a':
+ tags_ld = strtoul(optarg, NULL, 0);
+ break;
+ case 'b':
+ tags_st = strtoul(optarg, NULL, 0);
+ break;
+ case 'y':
+ size_ld = strtoul(optarg, NULL, 0);
+ break;
+ case 'z':
+ size_st = strtoul(optarg, NULL, 0);
+ break;
+ case 'n':
+ numLoops = (uint16_t) strtoul(optarg, NULL, 0);
+ break;
+ case 'w':
+ waitTime = (uint16_t) strtoul(optarg, NULL, 0);
+ break;
+ case 'p':
+ prefetch = 1;
+ break;
+ case 'o':
+ if (!strcasecmp(optarg, "4K")) {
+ offsetmask = 0x00;
+ } else if (!strcasecmp(optarg, "8K")) {
+ offsetmask = 0x01;
+ } else if (!strcasecmp(optarg, "16K")) {
+ offsetmask = 0x03;
+ } else if (!strcasecmp(optarg, "32K")) {
+ offsetmask = 0x07;
+ } else if (!strcasecmp(optarg, "64K")) {
+ offsetmask = 0x0F;
+ } else if (!strcasecmp(optarg, "128K")) {
+ offsetmask = 0x1F;
+ } else if (!strcasecmp(optarg, "256K")) {
+ offsetmask = 0x3F;
+ } else if (!strcasecmp(optarg, "512K")) {
+ offsetmask = 0x7F;
+ } else if (!strcasecmp(optarg, "1M")) {
+ offsetmask = 0xFF;
+ } else if (!strcasecmp(optarg, "2M")) {
+ offsetmask = 0x1FF;
+ } else if (!strcasecmp(optarg, "4M")) {
+ offsetmask = 0x3FF;
+ } else if (!strcasecmp(optarg, "8M")) {
+ offsetmask = 0x7FF;
+ } else if (!strcasecmp(optarg, "16M")) {
+ offsetmask = 0xFFF;
+ } else if (!strcasecmp(optarg, "32M")) {
+ offsetmask = 0x1FFF;
+ } else if (!strcasecmp(optarg, "64M")) {
+ offsetmask = 0x3FFF;
+ } else if (!strcasecmp(optarg, "128M")) {
+ offsetmask = 0x7FFF;
+ } else if (!strcasecmp(optarg, "256M")) {
+ offsetmask = 0xFFFF;
+ } else if (!strcasecmp(optarg, "512M")) {
+ offsetmask = 0x1FFFF;
+ } else if (!strcasecmp(optarg, "1G")) {
+ offsetmask = 0x3FFFF;
+ } else if (!strcasecmp(optarg, "2G")) {
+ offsetmask = 0x7FFFF;
+ } else if (!strcasecmp(optarg, "4G")) {
+ offsetmask = 0xFFFFF;
+ } else {
+ fprintf(stderr, "Illegal value entered for --offsetmask argument = 0x%lx Must be string: 4K-512M\n", offsetmask);
+ print_help(argv[0]);
+ return -1;
+ }
+ if (offsetmask > 0x3FF)
+ printf("Warning: offsetmask is bigger than the 4MB memory buffer allocated by this app\n");
+ break;
+ case 'd':
+ device = optarg;
+ break;
+ case 'h':
+ print_help(argv[0]);
+ return 0;
+ break;
+ default:
+ print_help(argv[0]);
+ return -1;
+ }
+ }
+
+ offsetmask <<= 12;
+
+ switch (size_ld) {
+ case 64:
+ size_enc_ld = 1;
+ break;
+ case 128:
+ size_enc_ld = 2;
+ break;
+ case 256:
+ size_enc_ld = 3;
+ break;
+ default:
+ fprintf(stderr, "Illegal value entered for --size_ld argument = %d\n", size_ld);
+ print_help(argv[0]);
+ return -1;
+ }
+
+ switch (size_st) {
+ case 64:
+ size_enc_st = 1;
+ break;
+ case 128:
+ size_enc_st = 2;
+ break;
+ case 256:
+ size_enc_st = 3;
+ break;
+ default:
+ fprintf(stderr, "Illegal value entered for --size_st argument = %d\n", size_st);
+ print_help(argv[0]);
+ return -1;
+ }
+
+ ocxl_enable_messages(OCXL_ERRORS);
+
+ if (verbose) {
+ printf("Calling ocxl_afu_open\n");
+ }
+ if (device)
+ err = ocxl_afu_open_from_dev(device, &afu_h);
+ else
+ err = ocxl_afu_open(AFU_NAME, &afu_h);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_afu_open() failed for %s, error %d\n",
+ device ? device : AFU_NAME, err);
+ return err;
+ }
+
+ // attach to afu - attach does not "start" the afu anymore
+ if (verbose) {
+ printf("Calling ocxl_afu_attach\n");
+ }
+ err = ocxl_afu_attach(afu_h, OCXL_ATTACH_FLAGS_NONE);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_afu_attach: %d", err);
+ return err;
+ }
+
+ // map the global mmio space
+ err = ocxl_mmio_map(afu_h, OCXL_GLOBAL_MMIO, &global);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "global ocxl_mmio_map: %d", err);
+ return err;
+ }
+
+
+ // Allocate a buffer for "to" memory buffer. Force alignment of address on cacheline boundary.
+ rc = posix_memalign((void **) &buffer, BUF_4MB, BUF_4MB);
+ if (rc) {
+ fprintf(stderr, "Memory alloc failed for buffer: %d", rc);
+ return rc;
+ }
+ if (verbose)
+ printf("Allocated Buffer memory @ %p\n", buffer);
+
+ if (prefetch) {
+ printf("Initializing allocated memory\n");
+ memset(buffer, 0x66, BUF_4MB);
+ }
+
+ // Get the PASID for the currently open context.
+ pasid = ocxl_afu_get_pasid(afu_h);
+ if (verbose)
+ printf("PASID = %ld\n", pasid);
+ err = ocxl_mmio_write64(global, AFUPASID_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, pasid);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_write64: %d", err);
+ return err;
+ }
+
+ // Initialize WED value
+ wed_in = (uint64_t) buffer + (tags_ld * 512) + (size_enc_ld * 128) + (npu_ld * 64) + (tags_st * 8) +
+ (size_enc_st * 2) + (npu_st);
+ if (verbose)
+ printf("WED = 0x%lx\n", wed_in);
+
+ err = ocxl_mmio_write64(global, AFUWED_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, (uint64_t) wed_in);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_write64: %d", err);
+ return err;
+ }
+
+ if (verbose)
+ printf("BUFMASK = %lx\n", offsetmask);
+ err = ocxl_mmio_write64(global, AFUBufmask_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, (uint64_t) offsetmask);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_write64: %d", err);
+ return err;
+ }
+
+ if (verbose)
+ printf("CONTROL_REG(reset) = %lx\n", resetCnt);
+ err = ocxl_mmio_write64(global, AFUControl_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, (uint64_t) resetCnt);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_write64: %d", err);
+ return err;
+ }
+
+ // Set ENABLE value
+ if (verbose)
+ printf("ENABLE_REG = %lx\n", enableAfu);
+ err = ocxl_mmio_write64(global, AFUEnable_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, (uint64_t) enableAfu);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_write64: %d", err);
+ return err;
+ }
+ printf("Parameters used: tags_ld=%d - size_ld=%d - tags_st=%d - size_st=%d\n",
+ tags_ld, size_ld, tags_st, size_st);
+
+ ////////////////////////////////////////////////////////////////////////
+ // Measure bandwidth
+ ////////////////////////////////////////////////////////////////////////
+ sleep(1);
+
+ uint16_t i;
+ struct timeval c0Time, c0Time_prev;
+ double c0TimeElapsed, cyclesElapsed;
+ uint64_t count0_prev, count1_prev, count2_prev, count3_prev, count4_prev, count5_prev, count6_prev, count7_prev;
+ uint64_t count0, count1, count2, count3, count4, count5, count6, count7;
+ uint64_t delta_cnt0, delta_cnt1, delta_cnt2, delta_cnt3, delta_cnt4, delta_cnt5, delta_cnt6, delta_cnt7;
+ double bw_cnt0, bw_cnt1, bw_cnt2, bw_cnt3, bw_cnt4, bw_cnt5, bw_cnt6, bw_cnt7;
+ double bpc_tb_cnt0, bpc_tb_cnt1, bpc_tb_cnt2, bpc_tb_cnt3, bpc_tb_cnt4, bpc_tb_cnt5, bpc_tb_cnt6, bpc_tb_cnt7;
+ double bw_tb_cnt0, bw_tb_cnt1, bw_tb_cnt2, bw_tb_cnt3, bw_tb_cnt4, bw_tb_cnt5, bw_tb_cnt6, bw_tb_cnt7;
+
+
+ printf("Counter Curr Count (64B) Prev Count Count Diff. BW (GB/s) using App clock\tBytes or Events per AFP cycle\t\tBW using 200MHz AFU clock (GB/s)\n");
+ printf("-----------------------------------------------------------------------------------------\n");
+
+ gettimeofday(&c0Time_prev, NULL);
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt0_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count0_prev);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt1_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count1_prev);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt2_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count2_prev);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt3_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count3_prev);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt4_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count4_prev);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt5_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count5_prev);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt6_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count6_prev);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt7_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count7_prev);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ sleep(waitTime);
+ for (i=0; i<numLoops; i++) {
+
+ gettimeofday(&c0Time, NULL);
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt0_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count0);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt1_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count1);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt2_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count2);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt3_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count3);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt4_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count4);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt5_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count5);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt6_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count6);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ err = ocxl_mmio_read64(global, AFUPerfCnt7_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count7);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_read64: %d", err);
+ return err;
+ }
+
+ c0TimeElapsed = (c0Time.tv_sec - c0Time_prev.tv_sec) * 1000000 + c0Time.tv_usec - c0Time_prev.tv_usec;
+ cyclesElapsed = count0 - count0_prev;
+
+ bw_cnt0 = (double)(count0 - count0_prev) * (1 / (c0TimeElapsed / 1000000)) / 1000000000; // convert to Billion cycles
+ bw_cnt1 = (double)(count1 - count1_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s
+ bw_cnt2 = (double)(count2 - count2_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s
+ bw_cnt3 = (double)(count3 - count3_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s
+ bw_cnt4 = (double)(count4 - count4_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s
+ bw_cnt5 = (double)(count5 - count5_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s
+ bw_cnt6 = (double)(count6 - count6_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s
+ bw_cnt7 = (double)(count7 - count7_prev) * (1 / (c0TimeElapsed / 1000000)) / 1000000000; // convert to Billion events
+
+ bpc_tb_cnt0 = (double)(count0 - count0_prev) * 1 / cyclesElapsed;
+ bpc_tb_cnt1 = (double)(count1 - count1_prev) * 64 / cyclesElapsed;
+ bpc_tb_cnt2 = (double)(count2 - count2_prev) * 64 / cyclesElapsed;
+ bpc_tb_cnt3 = (double)(count3 - count3_prev) * 64 / cyclesElapsed;
+ bpc_tb_cnt4 = (double)(count4 - count4_prev) * 64 / cyclesElapsed;
+ bpc_tb_cnt5 = (double)(count5 - count5_prev) * 64 / cyclesElapsed;
+ bpc_tb_cnt6 = (double)(count6 - count6_prev) * 64 / cyclesElapsed;
+ bpc_tb_cnt7 = (double)(count7 - count7_prev) * 1 / cyclesElapsed;
+
+ bw_tb_cnt0 = (double)(count0 - count0_prev) * (1 / (cyclesElapsed / 200000000)) /
+ 1000000000; // convert to Billion cycles/s
+ bw_tb_cnt1 = (double)(count1 - count1_prev) * (64 / (cyclesElapsed / 200000000)) /
+ 1000000000; // convert to Billion GB/s
+ bw_tb_cnt2 = (double)(count2 - count2_prev) * (64 / (cyclesElapsed / 200000000)) /
+ 1000000000; // convert to Billion GB/s
+ bw_tb_cnt3 = (double)(count3 - count3_prev) * (64 / (cyclesElapsed / 200000000)) /
+ 1000000000; // convert to Billion GB/s
+ bw_tb_cnt4 = (double)(count4 - count4_prev) * (64 / (cyclesElapsed / 200000000)) /
+ 1000000000; // convert to Billion GB/s
+ bw_tb_cnt5 = (double)(count5 - count5_prev) * (64 / (cyclesElapsed / 200000000)) /
+ 1000000000; // convert to Billion GB/s
+ bw_tb_cnt6 = (double)(count6 - count6_prev) * (64 / (cyclesElapsed / 200000000)) /
+ 1000000000; // convert to Billion GB/s
+ bw_tb_cnt7 = (double)(count7 - count7_prev) * (1 / (cyclesElapsed / 200000000)) /
+ 1000000000; // convert to Billion cycles/s
+
+ delta_cnt0 = count0 - count0_prev;
+ delta_cnt1 = count1 - count1_prev;
+ delta_cnt2 = count2 - count2_prev;
+ delta_cnt3 = count3 - count3_prev;
+ delta_cnt4 = count4 - count4_prev;
+ delta_cnt5 = count5 - count5_prev;
+ delta_cnt6 = count6 - count6_prev;
+ delta_cnt7 = count7 - count7_prev;
+
+ printf("Total Cycles %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count0, count0_prev, delta_cnt0, bw_cnt0,
+ bpc_tb_cnt0, bw_tb_cnt0);
+ printf("Good Resp Total %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count1, count1_prev, delta_cnt1, bw_cnt1,
+ bpc_tb_cnt1, bw_tb_cnt1);
+ printf("Good Resp Load %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count2, count2_prev, delta_cnt2, bw_cnt2,
+ bpc_tb_cnt2, bw_tb_cnt2);
+ printf("Good Resp Store %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count3, count3_prev, delta_cnt3, bw_cnt3,
+ bpc_tb_cnt3, bw_tb_cnt3);
+ printf("Retries - Total %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count4, count4_prev, delta_cnt4, bw_cnt4,
+ bpc_tb_cnt4, bw_tb_cnt4);
+ printf("Retries - Loads %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count5, count5_prev, delta_cnt5, bw_cnt5,
+ bpc_tb_cnt5, bw_tb_cnt5);
+ printf("Retries - Store %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count6, count6_prev, delta_cnt6, bw_cnt6,
+ bpc_tb_cnt6, bw_tb_cnt6);
+ printf("No cred cycles %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count7, count7_prev, delta_cnt7, bw_cnt7,
+ bpc_tb_cnt7, bw_tb_cnt7);
+ printf("\n");
+
+ count0_prev = count0;
+ count1_prev = count1;
+ count2_prev = count2;
+ count3_prev = count3;
+ count4_prev = count4;
+ count5_prev = count5;
+ count6_prev = count6;
+ count7_prev = count7;
+ c0Time_prev = c0Time;
+
+ sleep(waitTime);
+ }
+
+ if (verbose)
+ printf("Stopping AFU\n");
+ // stop afu
+ err = ocxl_mmio_write64(global, AFUEnable_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, disableAfu);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_write64: %d", err);
+ return err;
+ }
+
+ if (verbose)
+ printf("Free afu\n");
+ ocxl_afu_close(afu_h);
+ return 0;
+}
diff --git a/afutests/afp/ocxl_afp3.h b/afutests/afp/ocxl_afp3.h
new file mode 100644
index 0000000..05f3f4d
--- /dev/null
+++ b/afutests/afp/ocxl_afp3.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2018 International Business Machines
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _OCXL_AFP3_H
+#define _OCXL_AFP3_H
+
+// global mmio registers
+#define AFUConfiguration_REGISTER 0x0000
+#define AFUInternalError_REGISTER 0x0010
+#define AFUInternalErrorInfo_REGISTER 0x0018
+#define AFUTraceControl_REGISTER 0x0020
+
+// global AFP3 registers
+#define AFUExtraReadEA_AFP_REGISTER 0x0038
+#define AFUWED_AFP_REGISTER 0x0040
+#define AFUBufmask_AFP_REGISTER 0x0048
+#define AFUPASID_AFP_REGISTER 0x0050
+#define AFUMisc_AFP_REGISTER 0x0058
+#define AFUEnable_AFP_REGISTER 0x0060
+#define AFUControl_AFP_REGISTER 0x0068
+#define AFULatency_AFP_REGISTER 0x0070
+#define AFUPerfCnt0_AFP_REGISTER 0x00C0
+#define AFUPerfCnt1_AFP_REGISTER 0x00C8
+#define AFUPerfCnt2_AFP_REGISTER 0x00D0
+#define AFUPerfCnt3_AFP_REGISTER 0x00D8
+#define AFUPerfCnt4_AFP_REGISTER 0x00E0
+#define AFUPerfCnt5_AFP_REGISTER 0x00E8
+#define AFUPerfCnt6_AFP_REGISTER 0x00F0
+#define AFUPerfCnt7_AFP_REGISTER 0x00F8
+#define Large_Data0_AFP_REGISTER 0x10000
+#define Large_Data1_AFP_REGISTER 0x10080
+#define Large_Data2_AFP_REGISTER 0x10100
+#define Large_Data3_AFP_REGISTER 0x10180
+
+#endif /* _OCXL_AFP3_H */
diff --git a/afutests/afp/ocxl_afp3_latency.c b/afutests/afp/ocxl_afp3_latency.c
new file mode 100644
index 0000000..695eedd
--- /dev/null
+++ b/afutests/afp/ocxl_afp3_latency.c
@@ -0,0 +1,530 @@
+/*
+ * Copyright 2018 International Business Machines
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <getopt.h>
+#include <sys/mman.h>
+#include <signal.h>
+
+#include "libocxl.h"
+#include "ocxl_afp3.h"
+
+#define AFU_NAME "IBM,AFP3"
+#define BUF_512MB (512 * 1024 * 1024)
+#define BIT(n) (1ull << n)
+
+static char *device = NULL;
+static int verbose = 0;
+static int size_ping = 8;
+static int size_ld = 64;
+static int size_st = 64;
+static int extra_read = 0;
+static uint64_t iterations = 10000;
+
+static uint64_t disableAfu = 0x0000000000000000;
+static uint64_t resetCnt = 0x4000000000000000;
+
+#define miso() asm volatile("or 26, 26, 26")
+
+#define mfspr(rn) ({unsigned long rval; \
+ asm volatile("mfspr %0, %1" \
+ : "=r" (rval) : "i" (rn)); rval;})
+
+static uint64_t read_timebase(void)
+{
+ return mfspr(268);
+}
+
+static void printf_buf(uint64_t addr, uint64_t size)
+{
+ unsigned int i, j;
+ uint8_t *base_p = (uint8_t *)addr;
+ uint64_t per_line = 32;
+
+ for (i = 0; i < size/per_line; i++) {
+ printf("0x%016lx:", (uint64_t) base_p);
+ for (j = 0; j < per_line; j++) {
+ if (j % 8 == 0)
+ printf(" ");
+
+ printf("%02x", *base_p);
+ base_p++;
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+static int flag_stop = 0;
+static void stop_handler(int signal)
+{
+ printf("Signal %d received, stopping\n", signal);
+ flag_stop = 1;
+}
+
+static inline uint64_t ping_8B(uint64_t global_mmio_start,
+ volatile uint64_t *flag, uint64_t enable_in,
+ uint64_t *count)
+{
+ uint64_t *afu_enable_reg_p;
+ uint64_t timebase[2];
+ uint64_t j, loop_count;
+
+ afu_enable_reg_p = (uint64_t *)(global_mmio_start +
+ AFUEnable_AFP_REGISTER);
+
+ if (*count) {
+ loop_count = *count;
+ } else {
+ loop_count = ~0ull;
+ printf("Running test forever, interrupt with ctrl-c\n");
+ }
+
+ timebase[0] = read_timebase();
+ for (j = 0; j < loop_count; j++) {
+ if (flag_stop)
+ break;
+
+ *flag = 0;
+ __sync_synchronize();
+ *afu_enable_reg_p = enable_in;
+ miso(); // force no gather
+
+ while (*flag == 0);
+ }
+ timebase[1] = read_timebase();
+
+ *count = j;
+ return (timebase[1] - timebase[0]);
+}
+
+// use '-m' option with value > 8 to use this function doing a 64/128B
+// MMIO write before the mmpp DMA write
+static inline uint64_t ping_OVER_8B(uint64_t global_mmio_start,
+ volatile uint64_t *flag,
+ uint64_t enable_in, uint64_t *count)
+{
+ uint64_t *afu_enable_reg_p, *afu_large_data0_p;
+ uint64_t timebase[2];
+ uint64_t i, j, num_dw, loop_count;
+
+ fprintf(stderr, "Use of ping data bigger than 8B requires special support in the ocxl driver for mmio write-combine. Disabled by default as it generates HMI on default setup\n");
+ *count = 1;
+ return 0;
+
+ afu_enable_reg_p = (uint64_t *)(global_mmio_start +
+ AFUEnable_AFP_REGISTER);
+ afu_large_data0_p = (uint64_t *)(global_mmio_start +
+ Large_Data0_AFP_REGISTER);
+
+ num_dw = size_st / sizeof(uint64_t);
+
+ // Write enable register.
+ // The AFU is configured to use the register data for its DMA
+ // writes, so we need to make sure it's not 0 first.
+ for (i = 0; i < num_dw; i++)
+ *(afu_large_data0_p + i) = 0xCEECEECEECEE0000 + i;
+
+ *flag = 0;
+ __sync_synchronize();
+ *afu_enable_reg_p = (uint64_t) enable_in;
+ miso(); // force no gather
+
+ while (*flag == 0);
+
+ if (*count) {
+ loop_count = *count;
+ } else {
+ loop_count = ~0ull;
+ printf("Running test forever, interrupt with ctrl-c\n");
+ }
+
+ timebase[0] = read_timebase();
+ for (j = 0; j < loop_count; j++) {
+ if (flag_stop)
+ break;
+
+ *flag = 0;
+ __sync_synchronize();
+ // Write the large_data0 128 register
+ // num_dw = 8 if m64, num_dw = 16 if m128
+ for (i = 0; i < num_dw; i++) {
+ // Write large_data0 register
+ *(afu_large_data0_p + i) = 0xDAFADAFADAFA0000 + i;
+ }
+
+ miso(); // force no gather
+
+ while (*flag == 0);
+ }
+ timebase[1] = read_timebase();
+
+ *count = j;
+ return (timebase[1] - timebase[0]);
+}
+
+//Main function called after line commands arguments processed
+int ocapi_afp3_lat(void)
+{
+ int rc, j, k;
+ int size_enc_ld, size_enc_st;
+ int tags_ld = 0, tags_st = 7;
+ int npu_ld = 0, npu_st = 0;
+ int num_dw, use_large_data;
+ uint64_t total_latency, global_mmio_start, offsetmask;
+ uint64_t wed_in, misc_in, enable_in, extra_read_ea_in;
+ int flag_location;
+ volatile uint64_t *buffer;
+ size_t size;
+ ocxl_err err;
+ ocxl_afu_h afu_h;
+ ocxl_mmio_h mmio_h;
+
+ if (size_ping == 8)
+ use_large_data = 0;
+ else
+ use_large_data = 1;
+
+ size_ld = size_st;
+
+ switch (size_st) {
+ case 64:
+ size_enc_st = 1;
+ break;
+ case 128:
+ size_enc_st = 2;
+ break;
+ case 256:
+ case 512:
+ size_enc_st = 3;
+ break;
+ default:
+ printf("\nIllegal value entered for --size_st argument = %d!!!!\n", size_st);
+ return -1;
+ }
+
+ switch (size_ld) {
+ case 64:
+ size_enc_ld = 1;
+ break;
+ case 128:
+ size_enc_ld = 2;
+ break;
+ case 256:
+ case 512:
+ size_enc_ld = 3;
+ break;
+ default:
+ printf("\nIllegal value entered for --size_ld argument = %d!!!!\n", size_ld);
+ return -1;
+ }
+
+ if ((tags_ld != 0) || (tags_st == 0))
+ printf("WARNING: For MMIO ping-pong latency mode, it is recommended to enable stores (tags_st > 0), and disable loads (tags_ld = 0)\n");
+
+ printf("Parameters used: tags_ld=%d - size_ld=%d - tags_st=%d - size_st=%d\n",
+ tags_ld, size_ld, tags_st, size_st);
+
+ // Open AFU device(s)
+ if (verbose)
+ printf("Calling ocxl_afu_open\n");
+ if (device)
+ err = ocxl_afu_open_from_dev(device, &afu_h);
+ else
+ err = ocxl_afu_open(AFU_NAME, &afu_h);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_afu_open() failed for %s, error %d\n",
+ device ? device : AFU_NAME, err);
+ return err;
+ }
+
+ // attach to afu - attach does not "start" the afu anymore
+ if (verbose)
+ printf("Calling ocxl_afu_attach\n");
+ err = ocxl_afu_attach(afu_h, OCXL_ATTACH_FLAGS_NONE);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_afu_attach: %d", err);
+ return err;
+ }
+
+ // map the mmio spaces
+ err = ocxl_mmio_map(afu_h, OCXL_GLOBAL_MMIO, &mmio_h);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_map: %d\n", err);
+ return err;
+ }
+
+ err = ocxl_mmio_get_info(mmio_h, (void **)&global_mmio_start, &size);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_get_info: %d\n", err);
+ return err;
+ }
+ printf("MMIO INFO: address 0x%016lx - size 0x%lx\n",
+ global_mmio_start, size);
+
+ // Allocate a buffer for "to" memory buffer.
+ // Force alignment of address on cacheline boundary.
+ offsetmask = 0x7F << 12; // Hardcode to 512K
+ rc = posix_memalign((void **) &buffer, BUF_512MB, BUF_512MB);
+ if (rc) {
+ perror("memalign main buffer");
+ return -1;
+ }
+ if (verbose)
+ printf("Allocated Buffer memory @ 0x%016llx\n",
+ (long long)buffer);
+
+ // Turn off MMIO latency mode
+ err = ocxl_mmio_write64(mmio_h, AFUEnable_AFP_REGISTER,
+ OCXL_MMIO_LITTLE_ENDIAN, disableAfu);
+ if (err != OCXL_OK) {
+ fprintf(stderr,
+ "ocxl_mmio_write64(AFUEnable_AFP_REGISTER): %d\n", err);
+ return err;
+ }
+
+ // Initialize WED value
+ wed_in = (uint64_t) buffer +
+ (tags_ld << 9) + (size_enc_ld << 7) + (npu_ld << 6) +
+ (tags_st << 3) + (size_enc_st << 1) + (npu_st);
+ if (verbose)
+ printf("WED = %016lx\n", wed_in);
+ err = ocxl_mmio_write64(mmio_h, AFUWED_AFP_REGISTER,
+ OCXL_MMIO_LITTLE_ENDIAN, wed_in);
+ if (err != OCXL_OK) {
+ fprintf(stderr,
+ "ocxl_mmio_write64(AFUWED_AFP_REGISTER): %d\n", err);
+ return err;
+ }
+
+ if (verbose)
+ printf("BUFMASK = %016lx\n", offsetmask);
+ err = ocxl_mmio_write64(mmio_h, AFUBufmask_AFP_REGISTER,
+ OCXL_MMIO_LITTLE_ENDIAN, offsetmask);
+ if (err != OCXL_OK) {
+ fprintf(stderr,
+ "ocxl_mmio_write64(AFUBufmask_AFP_REGISTER): %d\n", err);
+ return err;
+ }
+
+ if (use_large_data) {
+ misc_in = 1 << 12; // 0b01: triggered by writing or
+ // reading large data 0 register
+ if (verbose)
+ printf("MISC_REG = %016lx\n", misc_in);
+
+ err = ocxl_mmio_write64(mmio_h, AFUMisc_AFP_REGISTER,
+ OCXL_MMIO_LITTLE_ENDIAN, misc_in);
+ if (err != OCXL_OK) {
+ fprintf(stderr,
+ "ocxl_mmio_write64(AFUMisc_AFP_REGISTER): %d\n",
+ err);
+ return err;
+ }
+ }
+
+ if (verbose)
+ printf("CONTROL_REG (reset) = %016lx\n", resetCnt);
+ err = ocxl_mmio_write64(mmio_h, AFUControl_AFP_REGISTER,
+ OCXL_MMIO_LITTLE_ENDIAN, resetCnt);
+ if (err != OCXL_OK) {
+ fprintf(stderr, "ocxl_mmio_write64(AFUControl_AFP_REGISTER): %d\n", err);
+ return err;
+ }
+
+ if (extra_read) {
+ // Set Read address to base address + 1K. This way,
+ // it does not overlap with MMIO Latency DMA Writes,
+ // and we do not need to set up more memory
+ extra_read_ea_in = (uint64_t) buffer + 1024;
+ if (verbose)
+ printf("EXTRA_READ_EA = %016lx\n", extra_read_ea_in);
+
+ err = ocxl_mmio_write64(mmio_h, AFUExtraReadEA_AFP_REGISTER,
+ OCXL_MMIO_LITTLE_ENDIAN,
+ extra_read_ea_in);
+ if (err != OCXL_OK) {
+ fprintf(stderr,
+ "ocxl_mmio_write64(AFUExtraReadEA_AFP_REGISTER): %d\n", err);
+ return err;
+ }
+
+ printf("Initializing extra_read memory .....\n");
+ for (j = 0; j < 64; j++)
+ buffer[(1024/8) + j] = 0xdafa0201dafa0100 + j;
+
+ if (verbose) {
+ printf("Done initializing extra read memory\n");
+ printf_buf(extra_read_ea_in, 512);
+ }
+ }
+
+ // Set ENABLE register
+ enable_in = BIT(63) | BIT(62); // AFU enable | MMIO ping pong latency test mode
+ if (size_st == 512)
+ enable_in |= BIT(61); // use 512B stores
+ if (use_large_data)
+ enable_in |= BIT(60); // use large ping pong data register for DMA write(s)
+ if (extra_read)
+ enable_in |= BIT(59); // extra read mode
+ if (size_ld == 512)
+ enable_in |= BIT(58); // use 512B loads
+ if (verbose) {
+ printf("ENABLE_REG = %016lx", enable_in);
+ if (use_large_data)
+ printf("\t> use large data regs value\n");
+ else
+ printf("\n");
+ }
+
+ num_dw = size_st / sizeof(uint64_t);
+ for (k = 0; k < num_dw; k++)
+ buffer[k] = 0;
+
+ if (verbose) {
+ printf("Buffer before test\n");
+ printf_buf((uint64_t) buffer, 512);
+ }
+
+ asm volatile("": : :"memory");
+ asm volatile("sync");
+
+ ///////////////////////////////////////////////////////////////////////
+ // MMIO Ping-Pong Latency Test
+ ///////////////////////////////////////////////////////////////////////
+
+ if (verbose)
+ printf("Calling ping_pong test\n");
+ printf("MMIO WR %dB (host to card) -> %sDMA WR %dB (card to host)\n",
+ size_ping, (extra_read ? "DMA RD + " : ""), size_st);
+
+ // flag_location is the address where lower bytes of counter
+ // value will be set
+ flag_location = (size_st - 64) / sizeof(uint64_t);
+
+ if (size_ping == 8)
+ total_latency = ping_8B(global_mmio_start,
+ &buffer[flag_location], enable_in,
+ &iterations);
+ else
+ total_latency = ping_OVER_8B(global_mmio_start,
+ &buffer[flag_location], enable_in,
+ &iterations);
+
+ if (verbose) {
+ usleep(100000); // .1s
+ printf("\nBuffer after test\n");
+ printf_buf((uint64_t) buffer, 512);
+ }
+ printf("Completed %lu iterations. Total time measured using timebase: %10.2f ns\n",
+ iterations, total_latency*1000./512);
+ printf("Average roundtrip per iteration: %10.2f ns\n",
+ total_latency*1000./iterations/512);
+
+ // Turn off MMIO latency mode
+ err = ocxl_mmio_write64(mmio_h, AFUEnable_AFP_REGISTER,
+ OCXL_MMIO_LITTLE_ENDIAN, disableAfu);
+ if (err != OCXL_OK) {
+ fprintf(stderr,
+ "ocxl_mmio_write64(AFUEnable_AFP_REGISTER): %d\n", err);
+ return err;
+ }
+
+ if (verbose)
+ printf("Unmap afu\n");
+ ocxl_mmio_unmap(mmio_h);
+
+ if (verbose)
+ printf("Free afu\n");
+ ocxl_afu_close(afu_h);
+ return 0;
+}
+
+static void print_help(char *name)
+{
+ printf("\nUsage: %s [ options ]\n", name);
+ printf("\t-i 10000 --iterations\tDefault=%ld\n", iterations);
+ printf("\t-p 64 to 512 --pong \tPong size from card to host (Bytes) Default=%d\n", size_st);
+ printf("\t-x --extraread \tAdd an DMA extraread before the DMA Wr Default is no\n");
+ printf("\t-f --forever \tRun until CTRL+C, Default=no\n");
+ printf("\t-d --device \tDevice to open instead of first AFP AFU found\n");
+ printf("\t-v --verbose \tVerbose output\n");
+ printf("\t-h --help \tPrint this message\n");
+ printf("\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int opt;
+ int option_index = 0;
+
+ static struct option long_options[] = {
+ {"iterations", required_argument, 0 , 'i'},
+ {"ping", required_argument, 0 , 'm'},
+ {"pong", required_argument, 0 , 'p'},
+ {"extraread", no_argument , 0 , 'x'},
+ {"forever", no_argument , 0 , 'f'},
+ {"verbose", no_argument , &verbose, 1 },
+ {"help", no_argument , 0 , 'h'},
+ {"device", required_argument, 0 , 'd'},
+ {NULL, 0, 0, 0}
+ };
+
+ while ((opt = getopt_long(argc, argv, "vxhi:p:m:fd:", long_options,
+ &option_index)) >= 0) {
+ switch (opt) {
+ case 'v':
+ verbose = 1;
+ break;
+ case 'i':
+ iterations = strtoul(optarg, NULL, 0);
+ break;
+ case 'm':
+ size_ping = strtoul(optarg, NULL, 0);
+ break;
+ case 'p':
+ size_st = strtoul(optarg, NULL, 0);
+ break;
+ case 'x':
+ extra_read = 1;
+ break;
+ case 'f':
+ iterations = 0;
+ break;
+ case 'h':
+ print_help(argv[0]);
+ return 0;
+ case 'd':
+ device = optarg;
+ break;
+ default:
+ print_help(argv[0]);
+ return -1;
+ }
+ }
+
+ // Registering signal handlers, useful for 'forever' mode
+ if (signal(SIGINT, stop_handler) == SIG_ERR)
+ printf("\ncan't catch SIGINT\n");
+ if (signal(SIGTERM, stop_handler) == SIG_ERR)
+ printf("\ncan't catch SIGTERM\n");
+
+ return ocapi_afp3_lat();
+}
diff --git a/afutests/memcpy/README.md b/afutests/memcpy/README.md
new file mode 100644
index 0000000..73d1259
--- /dev/null
+++ b/afutests/memcpy/README.md
@@ -0,0 +1,34 @@
+ocxl_memcpy
+===========
+
+`ocxl_memcpy` is a test program for the OpenCAPI AFU IBM,MEMCPY3 (used for
+development).
+
+Requirements
+------------
+
+The OpenCAPI card must be flashed with an IBM,MEMCPY3 AFU image.
+
+Usage
+-----
+
+ $ ../../afuobj/ocxl_memcpy # Test memcpy AFU memory copy
+ $ ../../afuobj/ocxl_memcpy -A # Test memcpy AFU atomic compare and swap
+ $ ../../afuobj/ocxl_memcpy -a # Test memcpy AFU increment
+
+```
+ Usage: ocxl_memcpy [ options ]
+ Options:
+ -A Run the atomic compare and swap test
+ -a Run the increment test
+ -d <device> Use this capi card
+ -I Initialize the destination buffer after each loop
+ -i Send an interrupt after copy
+ -l <loops> Run this number of memcpy loops (default 1)
+ -p <procs> Fork this number of processes (default 1)
+ -p 0 Use the maximum number of processes permitted by the AFU
+ -r Reallocate the destination buffer in between 2 loops
+ -S Operate on shared memory
+ -s <bufsize> Copy this number of bytes (default 2048)
+ -t <timeout> Seconds to wait for the AFU to signal completion
+```
diff --git a/afutests/memcpy/ocxl_memcpy.c b/afutests/memcpy/ocxl_memcpy.c
new file mode 100644
index 0000000..ed22290
--- /dev/null
+++ b/afutests/memcpy/ocxl_memcpy.c
@@ -0,0 +1,847 @@
+/*
+ * Copyright 2018 International Business Machines
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include "libocxl.h"
+
+#define LOG_ERR(pid, fmt, x...) fprintf(stderr, "Process %d: " fmt, pid, ##x)
+#define LOG_INF(pid, fmt, x...) printf("Process %d: " fmt, pid, ##x)
+
+#define AFU_NAME "IBM,MEMCPY3"
+#define AFU_MAX_PROCESSES 512
+
+#define CACHELINESIZE 128
+/* Queue sizes other than 512kB don't seem to work (still true?) */
+#define QUEUE_SIZE 4095*CACHELINESIZE
+
+#define MEMCPY_WED(queue, depth) \
+ ((((uint64_t)queue) & 0xfffffffffffff000ULL) | \
+ (((uint64_t)depth) & 0xfffULL))
+
+#define MEMCPY_WE_CMD(valid, cmd) \
+ (((valid) & 0x1) | \
+ (((cmd) & 0x3f) << 2))
+#define MEMCPY_WE_CMD_VALID (0x1 << 0)
+#define MEMCPY_WE_CMD_WRAP (0x1 << 1)
+#define MEMCPY_WE_CMD_COPY 0
+#define MEMCPY_WE_CMD_IRQ 1
+#define MEMCPY_WE_CMD_STOP 2
+#define MEMCPY_WE_CMD_WAKE_HOST_THREAD 3
+#define MEMCPY_WE_CMD_INCREMENT 4
+#define MEMCPY_WE_CMD_ATOMIC 5
+#define MEMCPY_WE_CMD_TRANSLATE_TOUCH 6
+
+/* global mmio registers */
+#define MEMCPY_AFU_GLOBAL_CFG 0
+#define MEMCPY_AFU_GLOBAL_TRACE 0x20
+
+/* per-process mmio registers */
+#define MEMCPY_AFU_PP_WED 0
+#define MEMCPY_AFU_PP_STATUS 0x10
+#define MEMCPY_AFU_PP_STATUS_Terminated 0x8
+#define MEMCPY_AFU_PP_STATUS_Stopped 0x10
+
+#define MEMCPY_AFU_PP_CTRL 0x18
+#define MEMCPY_AFU_PP_CTRL_Restart (0x1 << 0)
+#define MEMCPY_AFU_PP_CTRL_Terminate (0x1 << 1)
+#define MEMCPY_AFU_PP_IRQ 0x28
+
+
+struct memcpy_work_element {
+ volatile uint8_t cmd; /* valid, wrap, cmd */
+ volatile uint8_t status;
+ uint16_t length;
+ uint8_t cmd_extra;
+ uint8_t reserved[3];
+ uint64_t atomic_op;
+ uint64_t src; /* also irq EA or atomic_op2 */
+ uint64_t dst;
+} __packed;
+
+struct memcpy_weq {
+ struct memcpy_work_element *queue;
+ struct memcpy_work_element *next;
+ struct memcpy_work_element *last;
+ int wrap;
+ int count;
+};
+
+struct memcpy_test_args {
+ int loop_count;
+ int size;
+ int irq;
+ int completion_timeout;
+ int reallocate;
+ int initialize;
+ char *device;
+ int wake_host_thread;
+ int increment;
+ int atomic_cas;
+ int shared_mem;
+ /* global vars */
+ int shmid;
+ char *lock;
+ char *counter;
+};
+
+int memcpy3_queue_length(size_t queue_size)
+{
+ return queue_size/sizeof(struct memcpy_work_element);
+}
+
+void memcpy3_init_weq(struct memcpy_weq *weq, size_t queue_size)
+{
+ weq->queue = aligned_alloc(getpagesize(), queue_size);
+ memset(weq->queue, 0, queue_size);
+ weq->next = weq->queue;
+ weq->last = weq->queue + memcpy3_queue_length(queue_size) - 1;
+ weq->wrap = 0;
+ weq->count = 0;
+}
+
+/*
+ * Copies a work element into the queue, taking care to set the wrap
+ * bit correctly. Returns a pointer to the element in the queue.
+ */
+struct memcpy_work_element *memcpy3_add_we(struct memcpy_weq *weq, struct memcpy_work_element we)
+{
+ struct memcpy_work_element *new_we = weq->next;
+
+ new_we->status = we.status;
+ new_we->length = we.length;
+ new_we->cmd_extra = we.cmd_extra;
+ new_we->atomic_op = we.atomic_op;
+ new_we->src = we.src;
+ new_we->dst = we.dst;
+ __sync_synchronize();
+ new_we->cmd = (we.cmd & ~MEMCPY_WE_CMD_WRAP) | weq->wrap;
+ weq->next++;
+ if (weq->next > weq->last) {
+ weq->wrap ^= MEMCPY_WE_CMD_WRAP;
+ weq->next = weq->queue;
+ }
+
+ return new_we;
+}
+
+int global_setup(struct memcpy_test_args *args)
+{
+ ocxl_err err;
+ ocxl_afu_h afu_h;
+ uint64_t reg, cfg;
+ pid_t pid;
+ ocxl_mmio_h global_mmio;
+
+ pid = getpid();
+ if (args->device)
+ err = ocxl_afu_open_from_dev(args->device, &afu_h);
+ else
+ err = ocxl_afu_open(AFU_NAME, &afu_h);
+
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "ocxl_afu_open() failed: %d\n", err);
+ return -1;
+ }
+
+ err = ocxl_mmio_map(afu_h, OCXL_GLOBAL_MMIO, &global_mmio);
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "global ocxl_mmio_map() failed: %d\n", err);
+ return -1;
+ }
+
+ // cfg = 0;
+ // cfg |= (1ull << 3); /* disable 256B ops */
+ // cfg &= ~((0xFFFFFFFFull) << 32);
+ // cfg |= (0xFFFFFFFCull) << 32;
+ // cfg |= (1ull << 30); /* disable back-off timers */
+ // cfg |= (3ull << 17); /* xtouch enable */
+ // cfg |= (0b111111) << 8; /* all bypass */
+ ocxl_mmio_read64(global_mmio, MEMCPY_AFU_GLOBAL_CFG, OCXL_MMIO_LITTLE_ENDIAN, &cfg);
+ LOG_INF(pid, "AFU config = %#lx\n", cfg);
+
+ reg = 0x8008008000000000;
+ err = ocxl_mmio_write64(global_mmio, MEMCPY_AFU_GLOBAL_TRACE, OCXL_MMIO_LITTLE_ENDIAN, reg);
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "global ocxl_mmio_write64(trace) failed: %d\n", err);
+ return -1;
+ }
+
+ reg = 0x000000000007100B;
+ err = ocxl_mmio_write64(global_mmio, MEMCPY_AFU_GLOBAL_TRACE, OCXL_MMIO_LITTLE_ENDIAN, reg);
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "global ocxl_mmio_write64(trace) failed: %d\n", err);
+ return -1;
+ }
+ LOG_INF(pid, "traces reset and rearmed\n");
+ ocxl_afu_close(afu_h);
+ return 0;
+}
+
+int shm_create(struct memcpy_test_args *args)
+{
+ /* Allocate shared memory for atomic lock and counter */
+ args->shmid = shmget(IPC_PRIVATE, getpagesize(), 0);
+ if (args->shmid == -1) {
+ perror("Error getting shared memory segment");
+ return -1;
+ }
+ args->lock = shmat(args->shmid, NULL, 0);
+ if (args->lock == (char *)-1) {
+ perror("Unable to attach shared memory segment");
+ if (shmctl(args->shmid, IPC_RMID, NULL))
+ perror("Error destroying shared memory segment");
+ return -1;
+ }
+ args->counter = args->lock + args->size;
+ return 0;
+}
+
+void shm_destroy(struct memcpy_test_args *args)
+{
+ if (shmdt(args->lock))
+ perror("Error detaching shared memory segment");
+ if (shmctl(args->shmid, IPC_RMID, NULL))
+ perror("Error destroying shared memory segment");
+}
+
+int wait_for_status(struct memcpy_work_element *we, int timeout, pid_t pid)
+{
+ struct timeval test_timeout, temp;
+
+ temp.tv_sec = timeout;
+ temp.tv_usec = 0;
+
+ gettimeofday(&test_timeout, NULL);
+ timeradd(&test_timeout, &temp, &test_timeout);
+
+ for (;; gettimeofday(&temp, NULL)) {
+ if (timercmp(&temp, &test_timeout, >)) {
+ LOG_ERR(pid, "timeout polling for completion\n");
+ return -1;
+ }
+ if (we->status)
+ break;
+ }
+ return 0;
+}
+
+int wait_for_irq(struct memcpy_work_element *we, int timeout, pid_t pid, ocxl_afu_h afu_h, uint64_t irq_ea,
+ uint64_t err_ea)
+{
+ ocxl_event event;
+ int nevent;
+
+ nevent = ocxl_afu_event_check(afu_h, timeout * 1000, &event, 1);
+ if (nevent != 1) {
+ if (nevent == 0)
+ LOG_ERR(pid, "timeout waiting for AFU interrupt\n");
+ else
+ LOG_ERR(pid, "unexpected return value for ocxl_afu_event_check(): %d\n", nevent);
+ return -1;
+ }
+ if (event.type != OCXL_EVENT_IRQ) {
+ LOG_ERR(pid, "unexpected event type returned by ocxl_afu_event_check(): %d\n", event.type);
+ return -1;
+
+ }
+ if (event.irq.handle != irq_ea) {
+ if (event.irq.handle == err_ea)
+ LOG_ERR(pid, "received error irq instead of AFU irq\n");
+ else
+ LOG_ERR(pid, "received unknown irq EA=0x%lx\n", event.irq.handle);
+ return -1;
+ }
+ /*
+ * It's possible to receive the AFU interrupt before the work
+ * element is marked as completed. So poll for status as
+ * well. It should be short, except in case of troubles
+ */
+ return wait_for_status(we, timeout, pid);
+}
+
+int wait_fast(struct memcpy_work_element *we, int timeout, pid_t pid, ocxl_afu_h afu_h, uint64_t irq_ea)
+{
+ struct timeval test_timeout, temp;
+ ocxl_event event;
+ int nevent;
+
+ temp.tv_sec = timeout;
+ temp.tv_usec = 0;
+
+ gettimeofday(&test_timeout, NULL);
+ timeradd(&test_timeout, &temp, &test_timeout);
+
+ /*
+ * Warning: the result of the test is not deterministic:
+ *
+ * - if the thread is running on a CPU when the AFU is sending
+ * the wake_host_thread command, then the command is
+ * accepted and the thread gets out of ocxl_wait().
+ *
+ * - if the thread is not running, the wake_host_thread fails
+ * and we'll receive an AFU interrupt.
+ *
+ * We don't sleep in the below loop to maxime the chances of
+ * having the thread running.
+ */
+ for (;;) {
+ ocxl_wait();
+ if (we->status)
+ break;
+ gettimeofday(&temp, NULL);
+ if (timercmp(&temp, &test_timeout, >)) {
+ LOG_ERR(pid, "timeout waiting for wake_host_thread\n");
+ return -1;
+ }
+ }
+
+ /* if interrupt is sent, status is 0x11 (complete, fault response) */
+ if (we->status != 1) {
+ nevent = ocxl_afu_event_check(afu_h, 1000, &event, 1);
+ if (nevent == 1) {
+ if (event.type != OCXL_EVENT_IRQ || event.irq.handle != irq_ea) {
+ LOG_ERR(pid, "received unexpected event type %d while in 'wait' (handle=%#lx)\n", event.type, event.irq.handle);
+ return -1;
+ }
+ } else {
+ LOG_ERR(pid, "wake_host_thread failed with status %d\n", we->status);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int restart_afu(pid_t pid, ocxl_mmio_h pp_mmio)
+{
+ ocxl_err err;
+ uint64_t status;
+
+ err = ocxl_mmio_read64(pp_mmio, MEMCPY_AFU_PP_STATUS, OCXL_MMIO_LITTLE_ENDIAN, &status);
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "read of process status failed: %d\n", err);
+ return -1;
+ }
+
+ if (!(status & MEMCPY_AFU_PP_STATUS_Stopped))
+ return 0; /* not stopped */
+
+ err = ocxl_mmio_write64(pp_mmio, MEMCPY_AFU_PP_CTRL, OCXL_MMIO_LITTLE_ENDIAN, MEMCPY_AFU_PP_CTRL_Restart);
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "couldn't restart process: %d\n", err);
+ return -1;
+ }
+ return 0;
+}
+
+int test_afu_memcpy(struct memcpy_test_args *args)
+{
+ uint64_t wed;
+ pid_t pid;
+ int i, t, rc = -1;
+ uint64_t status, afu_irq_ea = 0, err_irq_ea;
+ uint16_t tidr;
+ struct memcpy_weq weq;
+ struct memcpy_work_element memcpy_we, irq_we;
+ struct memcpy_work_element increment_we, atomic_cas_we;
+ struct memcpy_work_element *first_we, *last_we;
+ struct timeval start, end;
+ char *src, *dst;
+ int nevent;
+ ocxl_err err;
+ ocxl_afu_h afu_h;
+ ocxl_irq_h afu_irq, err_irq;
+ ocxl_event event;
+ ocxl_mmio_h pp_mmio;
+
+ pid = getpid();
+
+ /* Allocate memory areas for afu to copy to/from */
+ if (args->shared_mem) {
+ rc = shm_create(args);
+ if (rc)
+ exit(1);
+ src = args->counter;
+ dst = args->lock;
+ memcpy_we.src = htole64((uintptr_t) src);
+ memcpy_we.dst = htole64((uintptr_t) dst);
+ } else {
+ src = aligned_alloc(64, getpagesize());
+ }
+ if (args->atomic_cas) {
+ dst = args->lock;
+ } else {
+ dst = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (dst == MAP_FAILED) {
+ LOG_ERR(pid, "mmap failed for destination buffer\n");
+ return -1;
+ }
+ }
+
+ ocxl_enable_messages(OCXL_ERRORS);
+
+ if (args->device)
+ err = ocxl_afu_open_from_dev(args->device, &afu_h);
+ else
+ err = ocxl_afu_open(AFU_NAME, &afu_h);
+
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "ocxl_afu_open() failed: %d\n", err);
+ return -1;
+ }
+
+ memcpy3_init_weq(&weq, QUEUE_SIZE);
+
+ /* Point the work element descriptor (wed) at the weq */
+ wed = MEMCPY_WED(weq.queue, QUEUE_SIZE / CACHELINESIZE);
+ LOG_INF(pid, "WED = 0x%lx src = %p dst = %p\n", wed, src, dst);
+
+ /* Setup the atomic compare and swap work element */
+ memset(&atomic_cas_we, 0, sizeof(atomic_cas_we));
+ atomic_cas_we.cmd = MEMCPY_WE_CMD(0, MEMCPY_WE_CMD_ATOMIC);
+ atomic_cas_we.length = htole16((uint16_t) sizeof(uint64_t));
+ atomic_cas_we.src = htole64(1);
+ atomic_cas_we.dst = htole64((uintptr_t) dst);
+ atomic_cas_we.atomic_op = htole64(0);
+ atomic_cas_we.cmd_extra = 0x19;
+
+ /* Setup the increment work element */
+ memset(&increment_we, 0, sizeof(increment_we));
+ increment_we.cmd = MEMCPY_WE_CMD(0, MEMCPY_WE_CMD_INCREMENT);
+ increment_we.length = htole16((uint16_t) sizeof(pid_t));
+ increment_we.src = htole64((uintptr_t) src);
+ increment_we.dst = htole64((uintptr_t) dst);
+
+ /* Setup the memcpy work element */
+ memset(&memcpy_we, 0, sizeof(memcpy_we));
+ memcpy_we.cmd = MEMCPY_WE_CMD(0, MEMCPY_WE_CMD_COPY);
+ memcpy_we.length = htole16((uint16_t) args->size);
+ memcpy_we.src = htole64((uintptr_t) src);
+ memcpy_we.dst = htole64((uintptr_t) dst);
+
+ /* Setup the interrupt work element */
+ if (args->irq || args->wake_host_thread) {
+ err = ocxl_irq_alloc(afu_h, NULL, &afu_irq);
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "ocxl_irq_alloc() failed: %d\n", err);
+ goto err;
+ }
+ afu_irq_ea = ocxl_irq_get_handle(afu_h, afu_irq);
+ LOG_INF(pid, "irq EA = %lx\n", afu_irq_ea);
+
+ memset(&irq_we, 0, sizeof(irq_we));
+ irq_we.src = htole64(afu_irq_ea);
+ if (args->irq)
+ irq_we.cmd = MEMCPY_WE_CMD(1, MEMCPY_WE_CMD_IRQ);
+ else {
+ err = ocxl_afu_get_p9_thread_id(afu_h, &tidr);
+ if (err < 0) {
+ LOG_ERR(pid, "ocxl_afu_get_p9_thread_id() failed: %d\n", err);
+ goto err;
+ }
+ /*
+ * tidr allocated before attaching, so it will
+ * be in the Process Element and the default
+ * tid value used by AFU
+ */
+ irq_we.cmd = MEMCPY_WE_CMD(1, MEMCPY_WE_CMD_WAKE_HOST_THREAD);
+ }
+ }
+
+ err = ocxl_irq_alloc(afu_h, NULL, &err_irq);
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "ocxl_irq_alloc(err) failed: %d\n", err);
+ goto err;
+ }
+ err_irq_ea = ocxl_irq_get_handle(afu_h, err_irq);
+
+ err = ocxl_afu_attach(afu_h, 0);
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "ocxl_attach() failed: %d\n", err);
+ goto err;
+ }
+
+ err = ocxl_mmio_map(afu_h, OCXL_PER_PASID_MMIO, &pp_mmio);
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "pp ocxl_mmio_map() failed: %d\n", err);
+ goto err;
+ }
+
+ err = ocxl_mmio_write64(pp_mmio, MEMCPY_AFU_PP_IRQ, OCXL_MMIO_LITTLE_ENDIAN, err_irq_ea);
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "ocxl_mmio_write64(err irq) failed: %d\n", err);
+ goto err;
+ }
+ __sync_synchronize();
+ err = ocxl_mmio_write64(pp_mmio, MEMCPY_AFU_PP_WED, OCXL_MMIO_LITTLE_ENDIAN, wed);
+ if (err != OCXL_OK) {
+ LOG_ERR(pid, "ocxl_mmio_write64(wed) failed: %d\n", err);
+ goto err;
+ }
+
+ /* Initialise source buffer with unique(ish) per-process value */
+ if (args->atomic_cas) {
+ memset(src, 0, args->size);
+ increment_we.src = htole64((uintptr_t) args->counter);
+ increment_we.dst = htole64((uintptr_t) args->counter);
+ } else if (args->increment) {
+ *(pid_t *)src = htole32(pid - 1);
+ } else {
+ for (i = 0; i < args->size; i++)
+ *(src + i) = pid & 0xff;
+ }
+ rc = 0;
+ gettimeofday(&start, NULL);
+
+ for (i = 0; i < args->loop_count; i++) {
+
+ /* setup the work queue */
+ if (args->atomic_cas) {
+ /* acquire lock */
+ first_we = memcpy3_add_we(&weq, atomic_cas_we);
+ /* increment counter */
+ last_we = memcpy3_add_we(&weq, increment_we);
+ last_we->cmd |= MEMCPY_WE_CMD_VALID;
+ /* release lock */
+ last_we = memcpy3_add_we(&weq, memcpy_we);
+ last_we->cmd |= MEMCPY_WE_CMD_VALID;
+ } else if (args->increment) {
+ *(pid_t *)src = htole32(le32toh(*(pid_t *)src) + 1);
+ first_we = last_we = memcpy3_add_we(&weq, increment_we);
+ } else {
+ first_we = last_we = memcpy3_add_we(&weq, memcpy_we);
+ }
+ if (args->irq || args->wake_host_thread)
+ last_we = memcpy3_add_we(&weq, irq_we);
+ __sync_synchronize();
+
+ /* press the big red 'go' button */
+ first_we->cmd |= MEMCPY_WE_CMD_VALID;
+
+ /*
+ * wait for the AFU to be done
+ *
+ * if we're using an interrupt, we can go to sleep.
+ * Otherwise, we poll the last work element status from memory
+ */
+ if (args->irq)
+ rc = wait_for_irq(last_we, args->completion_timeout, pid, afu_h, afu_irq_ea, err_irq_ea);
+ else if (args->wake_host_thread)
+ rc = wait_fast(last_we, args->completion_timeout, pid, afu_h, afu_irq_ea);
+ else
+ rc = wait_for_status(last_we, args->completion_timeout, pid);
+ if (rc)
+ goto err_status;
+ if (first_we->status != 1) {
+ LOG_ERR(pid, "unexpected status 0x%x for copy\n", first_we->status);
+ goto err_status;
+ }
+ if (args->irq && last_we->status != 1) {
+ LOG_ERR(pid, "unexpected status 0x%x for irq\n", last_we->status);
+ goto err_status;
+ }
+ if (args->wake_host_thread && (last_we->status != 1) &&
+ (last_we->status != 0x11)) {
+ LOG_ERR(pid, "unexpected status 0x%x for wake_host_thread\n", last_we->status);
+ goto err_status;
+ }
+
+ /*
+ * The memory barrier is to avoid instructions
+ * re-ordering and make sure no output addresses are
+ * read before the work element status is complete
+ */
+ __sync_synchronize();
+
+ if (args->atomic_cas) {
+ ; /* atomicity is checked at the end of main() */
+ } else if (args->increment) {
+ if (le32toh(*(pid_t *)dst)-le32toh(*(pid_t *)src)-1) {
+ LOG_ERR(pid, "increment error on loop %d\n", i);
+ goto err_status;
+ }
+ } else {
+ if (memcmp(dst, src, args->size)) {
+ LOG_ERR(pid, "copy error on loop %d\n", i);
+ goto err_status;
+ }
+ }
+ if (args->irq || args->wake_host_thread) {
+ /* AFU engine stops on irq, need to restart it */
+ rc = restart_afu(pid, pp_mmio);
+ if (rc)
+ goto err_status;
+ }
+
+ if (args->reallocate) {
+ /*
+ * unmap/remap the destination buffer to force a TLBI
+ * and extra memory translation with each loop
+ */
+ if (args->shared_mem) {
+ shm_destroy(args);
+ shm_create(args);
+ src = args->counter;
+ dst = args->lock;
+ memcpy_we.src = htole64((uintptr_t) src);
+ } else {
+ munmap(dst, getpagesize());
+ dst = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (dst == MAP_FAILED) {
+ LOG_ERR(pid, "reallocation of destination buffer failed\n");
+ goto err;
+ }
+ }
+ memcpy_we.dst = htole64((uintptr_t) dst);
+ if (args->initialize) {
+ /* let us fault in the destination buffer */
+ memset(dst, 0, args->size);
+ }
+ } else if (! args->atomic_cas) {
+ memset(dst, 0, args->size);
+ }
+ }
+
+ gettimeofday(&end, NULL);
+ t = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec;
+
+ /* catch any error interrupt */
+ nevent = ocxl_afu_event_check(afu_h, 0, &event, 1);
+ if (nevent != 0) {
+ if (nevent == 1) {
+ if (event.irq.handle == err_irq_ea)
+ LOG_ERR(pid, "received error interrupt at end of test\n");
+ else
+ LOG_ERR(pid, "received unexpected event at end of test, type %d\n", event.type);
+ } else {
+ LOG_ERR(pid, "invalid return value for ocxl_afu_event_check: %d\n", nevent);
+ }
+ goto err_status;
+ }
+
+ LOG_INF(pid, "%d loops in %d uS (%0.2f uS per loop)\n", args->loop_count, t, ((float) t)/args->loop_count);
+ ocxl_afu_close(afu_h);
+ if (args->shared_mem)
+ shm_destroy(args);
+ return 0;
+
+err_status:
+ err = ocxl_mmio_read64(pp_mmio, MEMCPY_AFU_PP_STATUS, OCXL_MMIO_LITTLE_ENDIAN, &status);
+ if (err != OCXL_OK)
+ LOG_ERR(pid, "read of process status failed: %d\n", err);
+ else
+ LOG_ERR(pid, "process status at end of failed test=0x%lx\n", status);
+err:
+ ocxl_afu_close(afu_h);
+ if (args->shared_mem)
+ shm_destroy(args);
+ return -1;
+}
+
+void usage(char *name)
+{
+ fprintf(stderr, "Usage: %s [ options ]\n", name);
+ fprintf(stderr, "Options:\n");
+ fprintf(stderr, "\t-A\t\tRun the atomic compare and swap test\n");
+ fprintf(stderr, "\t-a\t\tRun the increment test\n");
+ fprintf(stderr, "\t-d <device>\tUse this opencapi card\n");
+ fprintf(stderr, "\t-I\t\tInitialize the destination buffer after each loop\n");
+ fprintf(stderr, "\t-i\t\tSend an interrupt after copy\n");
+ fprintf(stderr, "\t-w\t\tSend a wake_host_thread command after copy\n");
+ fprintf(stderr, "\t-l <loops>\tRun this number of memcpy loops (default 1)\n");
+ fprintf(stderr, "\t-p <procs>\tFork this number of processes (default 1)\n");
+ fprintf(stderr, "\t-p 0\t\tUse the maximum number of processes permitted by the AFU\n");
+ fprintf(stderr, "\t-r\t\tReallocate the destination buffer in between 2 loops\n");
+ fprintf(stderr, "\t-S\t\tOperate on shared memory\n");
+ fprintf(stderr, "\t-s <bufsize>\tCopy this number of bytes (default 2048)\n");
+ fprintf(stderr, "\t-t <timeout>\tSeconds to wait for the AFU to signal completion\n");
+ exit(1);
+}
+
+int main(int argc, char *argv[])
+{
+ struct memcpy_test_args args;
+ int rc, c, i, j, processes = 1;
+ pid_t pid, failing;
+
+ args.loop_count = 1;
+ args.size = 2048;
+ args.irq = 0;
+ args.completion_timeout = -1;
+ args.reallocate = 0;
+ args.initialize = 0;
+ args.device = NULL;
+ args.wake_host_thread = 0;
+ args.increment = 0;
+ args.atomic_cas = 0;
+ args.shared_mem = 0;
+ args.shmid = -1;
+ args.lock = NULL;
+ args.counter = NULL;
+
+ while (1) {
+ c = getopt(argc, argv, "+aAhl:p:Ss:Iit:rd:w");
+ if (c < 0)
+ break;
+ switch (c) {
+ case '?':
+ case 'h':
+ usage(argv[0]);
+ break;
+ case 'l':
+ args.loop_count = atoi(optarg);
+ break;
+ case 'p':
+ processes = atoi(optarg);
+ break;
+ case 's':
+ args.size = atoi(optarg);
+ break;
+ case 'i':
+ args.irq = 1;
+ break;
+ case 't':
+ args.completion_timeout = atoi(optarg);
+ break;
+ case 'r':
+ args.reallocate = 1;
+ break;
+ case 'I':
+ args.initialize = 1;
+ break;
+ case 'd':
+ args.device = optarg;
+ break;
+ case 'w':
+ args.wake_host_thread = 1;
+ break;
+ case 'a':
+ args.increment = 1;
+ break;
+ case 'A':
+ args.atomic_cas = 1;
+ break;
+ case 'S':
+ args.shared_mem = 1;
+ break;
+ }
+ }
+
+ if (processes == 0)
+ processes = AFU_MAX_PROCESSES;
+
+ if (args.completion_timeout == -1) {
+ args.completion_timeout = processes / 5;
+ if (args.completion_timeout < 10)
+ args.completion_timeout = 10;
+ }
+
+ if (argv[optind]) {
+ fprintf(stderr, "Error: Unexpected argument '%s'\n", argv[optind]);
+ usage(argv[0]);
+ }
+
+ if (args.wake_host_thread && args.irq) {
+ fprintf(stderr, "Error: -i and -w are mutually exclusive\n");
+ usage(argv[0]);
+ }
+
+ if (args.atomic_cas && args.reallocate) {
+ fprintf(stderr, "Error: -A and -r are mutually exclusive\n");
+ usage(argv[0]);
+ }
+
+ if (args.atomic_cas && args.shared_mem) {
+ fprintf(stderr, "Error: -A and -S are mutually exclusive\n");
+ usage(argv[0]);
+ }
+
+ if (args.increment && args.reallocate) {
+ fprintf(stderr, "Error: -a and -r are mutually exclusive\n");
+ usage(argv[0]);
+ }
+
+ if (args.increment && args.shared_mem) {
+ fprintf(stderr, "Error: -a and -S are mutually exclusive\n");
+ usage(argv[0]);
+ }
+
+ /* max buffer size supported by AFU */
+ if (args.size > 2048 || args.size % 64) {
+ fprintf(stderr, "invalid buffer size %d\n", args.size);
+ return -1;
+ }
+
+ rc = global_setup(&args);
+ if (rc)
+ exit(1);
+
+ if (args.atomic_cas) {
+ rc = shm_create(&args);
+ if (rc)
+ exit(1);
+
+ /* initialize lock and counter */
+ memset(args.lock, 0, args.size);
+ memset(args.counter, 0, args.size);
+ printf("Shared memory ID: %i attached at: %p\n", args.shmid, args.lock);
+ }
+
+ for (i = 0; i < processes; i++) {
+ if (!fork())
+ /* Child process */
+ exit(test_afu_memcpy(&args));
+ }
+
+ rc = 0;
+ failing = -1;
+ for (i = 0; i < processes; i++) {
+ pid = wait(&j);
+ if (pid && j) {
+ rc++;
+ if (failing == -1)
+ failing = pid;
+ }
+ }
+ if (args.atomic_cas) {
+ if (*(int *)args.counter != processes * args.loop_count) {
+ fprintf(stderr,"Atomicity Error:\n");
+ fprintf(stderr," procs=%d\n", processes);
+ fprintf(stderr," loops=%d\n", args.loop_count);
+ fprintf(stderr," procs*loops=%d\n", processes * args.loop_count);
+ fprintf(stderr," count=%d (should be %d)\n", *(int *)args.counter, processes * args.loop_count);
+ return -1;
+ }
+ shm_destroy(&args);
+ }
+
+ if (rc)
+ fprintf(stderr, "%d test(s) failed. Check process %d, maybe others\n", rc, failing);
+ else
+ printf("Test successful\n");
+ return rc;
+}
diff --git a/afutests/reset/README.md b/afutests/reset/README.md
new file mode 100644
index 0000000..bc3d410
--- /dev/null
+++ b/afutests/reset/README.md
@@ -0,0 +1,25 @@
+ocxl_reset_tests.sh
+===================
+
+`ocxl_reset_tests.sh` is a script for testing the reset of an OpenCAPI card.
+
+Requirements
+------------
+
+The OpenCAPI card must be flashed with either an IBM,AFP3 or IBM,MEMCPY3 AFU
+image.
+
+This test requires the kernel module pnv-php, that will be automatically
+loaded.
+
+Usage
+-----
+
+ $ ../../afuobj/ocxl_reset_tests.sh # Reset the first card and check AFU
+
+```
+ Usage: ocxl_reset_tests [ options ]
+ Options:
+ -d <device> Use this capi card
+ -l <loops> Run this number of resets (default 1)
+```
diff --git a/afutests/reset/ocxl_reset_tests.sh b/afutests/reset/ocxl_reset_tests.sh
new file mode 100755
index 0000000..46d2d57
--- /dev/null
+++ b/afutests/reset/ocxl_reset_tests.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+#
+# Copyright 2019 International Business Machines
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# ocxl_reset_tests.sh
+#
+# This test assumes that user is root and memcpy afu is programmed.
+
+function usage
+{
+ echo 'ocxl_reset_tests.sh [-d <device_path>] [-l <loops>]' >&2
+ exit 2
+}
+
+device=
+loops=1 # default
+
+while true
+do
+ case $1 in
+ ('') break ;;
+ (-d) device=$2; shift 2 || break ;;
+ (-l) loops=$2; shift 2 || break ;;
+ (*) usage ;;
+ esac
+done
+(( $# == 0 )) || usage
+
+[[ $device == -* ]] && usage
+(( loops < 1 )) && usage
+
+if [[ $device ]]
+then
+ if ! ls "$device" >/dev/null 2>&1
+ then
+ echo ocxl_reset_tests.sh: "$device": no such device >&2
+ exit 2
+ fi
+ card=${device##*/}
+fi
+
+if [[ -z $card ]]
+then
+ # find first IBM,AFP3 or IBM,MEMCPY3 opencapi card
+ card=$(
+ set -- $(ls /dev/ocxl/ 2>/dev/null)
+ for i
+ do
+ case $i in
+ (*IBM,AFP3*) echo $i; break ;;
+ (*IBM,MEMCPY3*) echo $i; break ;;
+ esac
+ done
+ )
+fi
+
+if [[ -z $card ]]
+then
+ echo ocxl_reset_tests.sh: could not find afu IBM,AFP3 nor IBM,MEMCPY3 >&2
+ exit 3
+fi
+
+# load module pnv-php
+if ! modprobe pnv-php
+then
+ echo ocxl_reset_tests.sh: cannot load module pnv-php >&2
+ exit 1
+fi
+
+slot=$(ls /dev/ocxl/$card | awk -F"." '{ print $2 }' | sed s/$/.0/)
+slot=$(lspci -m -v -s $slot | awk '/^PhySlot:/ { print $2; exit }')
+if [[ -z $slot ]]
+then
+ printf "$card: No slot found. Exiting.\n"
+ exit 1
+fi
+slot=/sys/bus/pci/slots/$slot
+
+for ((i = 0; i < loops; i++))
+do
+ ((loops > 1)) && echo Loop: $((i+1))/$loops
+
+ echo ocxl_reset_tests.sh: resetting card $card in slot ${slot##*/}
+ if ! echo 0 > $slot/power
+ then
+ echo ocxl_reset_tests.sh: could not write to $slot/power
+ exit 4
+ fi
+
+ if ! echo 1 > $slot/power
+ then
+ echo ocxl_reset_tests.sh: could not write to $slot/power
+ exit 5
+ fi
+
+ echo ocxl_reset_tests.sh: card $card has been reset
+
+ case $card in
+ (*,AFP3.*)
+ ocxl_afp3=$(which ocxl_afp3 2>/dev/null)
+ [[ $ocxl_afp3 ]] || ocxl_afp3=${0%/*}/ocxl_afp3
+
+ if [[ ! -x $ocxl_afp3 ]]
+ then
+ echo ocxl_reset_tests.sh: could not find test program $ocxl_afp3
+ echo ocxl_reset_tests.sh: skipping IBM,AFP3 afu check
+ else
+ echo ocxl_reset_tests.sh: verifying afu IBM,AFP3
+
+ if ! "$ocxl_afp3" >/tmp/ocxl_reset_afp3.log
+ then
+ echo ocxl_reset_tests.sh: ocxl_afp3 fails after reset
+ exit 6
+ fi
+ fi ;;
+ (*,MEMCPY3.*)
+ ocxl_memcpy=$(which ocxl_memcpy 2>/dev/null)
+ [[ $ocxl_memcpy ]] || ocxl_memcpy=${0%/*}/ocxl_memcpy
+
+ if [[ ! -x $ocxl_memcpy ]]
+ then
+ echo ocxl_reset_tests.sh: could not find test program $ocxl_memcpy
+ echo ocxl_reset_tests.sh: skipping IBM,MEMCPY3 afu check
+ else
+ echo ocxl_reset_tests.sh: verifying afu IBM,MEMCPY3
+
+ if ! "$ocxl_memcpy" -p0 -l10000 >/tmp/ocxl_reset_memcpy.log
+ then
+ echo ocxl_reset_tests.sh: ocxl_memcpy fails after reset
+ exit 7
+ fi
+ fi ;;
+ esac
+done
+
+echo ocxl_reset_tests.sh: ocxl_reset test passes
+exit 0
diff --git a/src/afu.c b/src/afu.c
index 761b2ac..a3673ca 100644
--- a/src/afu.c
+++ b/src/afu.c
@@ -343,9 +343,9 @@ static bool populate_metadata(dev_t dev, ocxl_afu *afu)
}
physical_function++;
- uint16_t domain;
+ uint32_t domain;
uint8_t bus, device, function;
- int found = sscanf(physical_function, "%hu:%hhu:%hhu.%hhu.%hhu",
+ int found = sscanf(physical_function, "%x:%hhu:%hhu.%hhu.%hhu",
&domain, &bus, &device, &function, &afu->identifier.afu_index);
if (found != 5) {
diff --git a/src/internal.c b/src/internal.c
index 0dd8e91..07eaa9d 100644
--- a/src/internal.c
+++ b/src/internal.c
@@ -53,6 +53,7 @@ pthread_mutex_t stderr_mutex = PTHREAD_MUTEX_INITIALIZER;
* - Check the LIBOCXL_INFO environment variable and output the info string
* - Check the LIBOCXL_TRACE_ALL environment variable and enable tracing_all
* - Check the LIBOCXL_VERBOSE_ERRORS_ALL environment variable and enable verbose_errors_all
+ * - Check the LIBOCXL_SYSPATH environment variable and override sys_path
*/
void libocxl_init()
{
@@ -81,6 +82,10 @@ void libocxl_init()
verbose_errors = true;
}
+ val = getenv("LIBOCXL_SYSPATH");
+ if (val)
+ sys_path = val;
+
libocxl_inited = true;
pthread_mutex_unlock(&libocxl_inited_mutex);
diff --git a/src/irq.c b/src/irq.c
index 86b285c..603e7b9 100644
--- a/src/irq.c
+++ b/src/irq.c
@@ -112,13 +112,13 @@ static ocxl_err irq_allocate(ocxl_afu *afu, ocxl_irq *irq, void *info)
int rc = ioctl(afu->fd, OCXL_IOCTL_IRQ_ALLOC, &irq->event.irq_offset);
if (rc) {
- errmsg(afu, ret, "Could not allocate IRQ in kernel: %d", rc);
+ errmsg(afu, ret, "Could not allocate IRQ in kernel: %d: '%s'", errno, strerror(errno));
goto errend;
}
rc = ioctl(afu->fd, OCXL_IOCTL_IRQ_SET_FD, &irq->event);
if (rc) {
- errmsg(afu, ret, "Could not set event descriptor in kernel: %d", rc);
+ errmsg(afu, ret, "Could not set event descriptor in kernel: %d: '%s'", errno, strerror(errno));
goto errend;
}
diff --git a/src/mmio.c b/src/mmio.c
index 8d38192..a6f37a7 100644
--- a/src/mmio.c
+++ b/src/mmio.c
@@ -14,6 +14,9 @@
* limitations under the License.
*/
+// Needed for le32toh() and friends when building against glibc version < 2.20
+#define _BSD_SOURCE
+
#include "libocxl_internal.h"
#include "sys/mman.h"
#include "errno.h"
diff --git a/unittests/unittests.c b/unittests/unittests.c
index 8d67358..6f45de4 100644
--- a/unittests/unittests.c
+++ b/unittests/unittests.c
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "libocxl_internal.h"
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
@@ -24,14 +25,9 @@
#include <pthread.h>
#include <signal.h>
#include <fcntl.h>
-
#include <misc/ocxl.h>
-
-#include "libocxl_internal.h"
#include "static.h"
-#include <stdlib.h>
-
static const char *ocxl_sysfs_path = "/tmp/ocxl-test";
static const char *ocxl_dev_path = "/dev/ocxl-test";
diff --git a/unittests/virtocxl.c b/unittests/virtocxl.c
index c00f1fc..a918b70 100644
--- a/unittests/virtocxl.c
+++ b/unittests/virtocxl.c
@@ -14,10 +14,10 @@
* limitations under the License.
*/
+#include "libocxl_internal.h"
#include <fuse/cuse_lowlevel.h>
#include <fuse/fuse_lowlevel.h>
#include <linux/poll.h>
-#include "libocxl_internal.h"
#include <misc/ocxl.h>
#include <errno.h>
#include <sys/types.h>
@@ -26,7 +26,6 @@
#include <string.h>
#include <unistd.h>
#include <pthread.h>
-
#include <stdlib.h>
typedef struct ocxl_kernel_event_header ocxl_kernel_event_header;
@@ -193,7 +192,7 @@ bool afu_is_attached() {
* @return the thread for the device, or 0 on error
*/
pthread_t create_ocxl_device(const char *afu_name, size_t global_mmio_size, size_t per_pasid_mmio_size) {
- char sysfs_base[PATH_MAX];
+ char sysfs_base[PATH_MAX - 20];
char tmp[PATH_MAX];
char buf[BUF_SIZE];