diff options
author | Frédéric Bonnard <frediz@debian.org> | 2021-10-21 12:37:09 +0200 |
---|---|---|
committer | Frédéric Bonnard <frediz@debian.org> | 2021-10-21 12:37:09 +0200 |
commit | 0e7a0e557588a919ed154bae52c5d35e6424cb36 (patch) | |
tree | 6f899beb5ea0cb5fd7be2e4a4b6d3331be32af61 | |
parent | 1441481020f90911c7f59010e23f67676da6abb7 (diff) | |
parent | ca8679d9f8af343f03e464c58df42acb0de93638 (diff) |
Update to upstream 1.2.1
[git-debrebase anchor: new upstream 1.2.1, merge]
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Changelog.md | 13 | ||||
-rw-r--r-- | Doxyfile-html | 28 | ||||
-rw-r--r-- | Doxyfile-man | 28 | ||||
-rw-r--r-- | Makefile | 46 | ||||
-rw-r--r-- | Makefile.rules | 6 | ||||
-rw-r--r-- | Makefile.vars | 7 | ||||
-rw-r--r-- | README.md | 24 | ||||
-rw-r--r-- | afutests/afp/README.md | 59 | ||||
-rw-r--r-- | afutests/afp/ocxl_afp3.c | 544 | ||||
-rw-r--r-- | afutests/afp/ocxl_afp3.h | 48 | ||||
-rw-r--r-- | afutests/afp/ocxl_afp3_latency.c | 530 | ||||
-rw-r--r-- | afutests/memcpy/README.md | 34 | ||||
-rw-r--r-- | afutests/memcpy/ocxl_memcpy.c | 847 | ||||
-rw-r--r-- | afutests/reset/README.md | 25 | ||||
-rwxr-xr-x | afutests/reset/ocxl_reset_tests.sh | 150 | ||||
-rw-r--r-- | src/afu.c | 4 | ||||
-rw-r--r-- | src/internal.c | 5 | ||||
-rw-r--r-- | src/irq.c | 4 | ||||
-rw-r--r-- | src/mmio.c | 3 | ||||
-rw-r--r-- | unittests/unittests.c | 6 | ||||
-rw-r--r-- | unittests/virtocxl.c | 5 |
22 files changed, 2335 insertions, 82 deletions
@@ -5,6 +5,7 @@ src/libocxl_info.h obj/* testobj/* sampleobj/* +afuobj/ docs cppcheck.xml kernel/include/misc/ocxl.h diff --git a/Changelog.md b/Changelog.md index 85a28eb..43ae128 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,3 +1,16 @@ +# 1.2.1 + - Set library version correctly + - Fix test build + +# 1.2.0 +This is mostly a bug fix release, there's no major new feature: + - Fix to support devices names with a hexadecimal domain name + - Allow to override path to read driver info (LIBOCXL_SYSPATH) + - Documentation enhancements + - Makefile/build enhancements + - Add tests for debug AFUs (memcpy, afp) + + # 1.1.0 ## User facing changes - Requires Linux headers >= 4.18 to compile diff --git a/Doxyfile-html b/Doxyfile-html index 6a82d7e..fd23764 100644 --- a/Doxyfile-html +++ b/Doxyfile-html @@ -240,12 +240,6 @@ TAB_SIZE = 4 ALIASES = -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST = - # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all @@ -1082,13 +1076,6 @@ VERBATIM_HEADERS = YES ALPHABETICAL_INDEX = YES -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX = 5 - # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored @@ -2148,12 +2135,6 @@ EXTERNAL_GROUPS = YES EXTERNAL_PAGES = YES -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of 'which perl'). -# The default file (with absolute path) is: /usr/bin/perl. - -PERL_PATH = /usr/bin/perl - #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- @@ -2167,15 +2148,6 @@ PERL_PATH = /usr/bin/perl CLASS_DIAGRAMS = YES -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see: -# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The # DIA_PATH tag allows you to specify the directory where the dia binary resides. diff --git a/Doxyfile-man b/Doxyfile-man index 56d9c06..f3c33c8 100644 --- a/Doxyfile-man +++ b/Doxyfile-man @@ -240,12 +240,6 @@ TAB_SIZE = 4 ALIASES = -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST = - # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all @@ -1081,13 +1075,6 @@ VERBATIM_HEADERS = YES ALPHABETICAL_INDEX = YES -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX = 5 - # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored @@ -2147,12 +2134,6 @@ EXTERNAL_GROUPS = YES EXTERNAL_PAGES = YES -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of 'which perl'). -# The default file (with absolute path) is: /usr/bin/perl. - -PERL_PATH = /usr/bin/perl - #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- @@ -2166,15 +2147,6 @@ PERL_PATH = /usr/bin/perl CLASS_DIAGRAMS = YES -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see: -# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The # DIA_PATH tag allows you to specify the directory where the dia binary resides. @@ -3,7 +3,7 @@ include Makefile.vars OBJS = obj/afu.o obj/internal.o obj/irq.o obj/mmio.o obj/setup.o TEST_OBJS = testobj/afu.o testobj/internal.o testobj/irq.o testobj/mmio.o testobj/setup.o -CFLAGS += -I src/include -I kernel/include -fPIC -D_FILE_OFFSET_BITS=64 +override CFLAGS += -I src/include -I kernel/include -fPIC -D_FILE_OFFSET_BITS=64 VERS_LIB = $(VERSION_MAJOR).$(VERSION_MINOR) LIBNAME = libocxl.so.$(VERS_LIB) @@ -12,24 +12,32 @@ SONAMEOPT = -Wl,-soname,$(LIBSONAME) DOCDIR = docs -all: check_ocxl_header obj/$(LIBSONAME) obj/libocxl.so obj/libocxl.a sampleobj/memcpy +all: check_ocxl_header obj/$(LIBSONAME) obj/libocxl.so obj/libocxl.a \ + sampleobj/memcpy afuobj/ocxl_memcpy afuobj/ocxl_afp3 \ + afuobj/ocxl_afp3_latency afuobj/ocxl_reset_tests.sh HAS_WGET = $(shell /bin/which wget > /dev/null 2>&1 && echo y || echo n) HAS_CURL = $(shell /bin/which curl > /dev/null 2>&1 && echo y || echo n) -# Update this to test a single feature from the most recent header we require: -CHECK_OCXL_HEADER_IS_UP_TO_DATE = $(shell /bin/echo -e \\\#include $(1)\\\nvoid test\(struct ocxl_ioctl_features test\)\; | \ +# Update this to test a single feature from the most recent header we require. +# +# Note that a backward-incompatible change in make 4.3 modified the +# handling \# in a function invocation, so we define the test code in +# a separate variable to work around it and keep consistent behavior +# across all versions of make +TEST_CODE = '\#include <misc/ocxl.h>\nvoid test(struct ocxl_ioctl_features test);' +CHECK_OCXL_HEADER_IS_UP_TO_DATE = $(shell /bin/echo -e $(TEST_CODE) | \ $(CC) $(CFLAGS) -Werror -x c -S -o /dev/null - > /dev/null 2>&1 && echo y || echo n) check_ocxl_header: -ifeq ($(call CHECK_OCXL_HEADER_IS_UP_TO_DATE,'<misc/ocxl.h>'),n) +ifeq (${CHECK_OCXL_HEADER_IS_UP_TO_DATE},n) mkdir -p kernel/include/misc ifeq (${HAS_WGET},y) - $(call Q,WGET kernel/include/misc/ocxl.h, wget -O kernel/include/misc/ocxl.h -q http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h) + $(call Q,WGET kernel/include/misc/ocxl.h, wget -O kernel/include/misc/ocxl.h -q https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h) else ifeq (${HAS_CURL},y) - $(call Q,CURL kernel/include/misc/ocxl.h, curl -o kernel/include/misc/ocxl.h -s http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h) + $(call Q,CURL kernel/include/misc/ocxl.h, curl -L -o kernel/include/misc/ocxl.h -s https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h) else - $(error 'ocxl.h is non-existant or out of date, Download from http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h and place in ${PWD}/kernel/include/misc/ocxl.h') + $(error 'ocxl.h is non-existant or out of date, Download from https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/include/uapi/misc/ocxl.h and place in ${PWD}/kernel/include/misc/ocxl.h') endif endif @@ -56,12 +64,28 @@ obj/libocxl.a: $(OBJS) sampleobj/memcpy: sampleobj/memcpy.o-memcpy $(call Q,CC, $(CC) $(CFLAGS) $(LDFLAGS) -o sampleobj/memcpy sampleobj/memcpy.o-memcpy obj/libocxl.a, sampleobj/memcpy) +afuobj/ocxl_memcpy: afuobj/ocxl_memcpy.o-memcpy + $(call Q,CC, $(CC) $(CFLAGS) $(LDFLAGS) -o afuobj/ocxl_memcpy afuobj/ocxl_memcpy.o-memcpy obj/libocxl.a, afuobj/ocxl_memcpy) + +afuobj/ocxl_afp3: afuobj/ocxl_afp3.o-afp + $(call Q,CC, $(CC) $(CFLAGS) $(LDFLAGS) -o afuobj/ocxl_afp3 afuobj/ocxl_afp3.o-afp obj/libocxl.a, afuobj/ocxl_afp3) + +afuobj/ocxl_afp3_latency: afuobj/ocxl_afp3_latency.o-afp + $(call Q,CC, $(CC) $(CFLAGS) $(LDFLAGS) -o afuobj/ocxl_afp3_latency afuobj/ocxl_afp3_latency.o-afp obj/libocxl.a, afuobj/ocxl_afp3_latency) + +afuobj/ocxl_reset_tests.sh: afutests/reset/ocxl_reset_tests.sh + cp afutests/reset/ocxl_reset_tests.sh afuobj/ocxl_reset_tests.sh + chmod +x afuobj/ocxl_reset_tests.sh + testobj: mkdir testobj sampleobj: mkdir sampleobj +afuobj: + mkdir afuobj + testobj/libocxl.a: $(TEST_OBJS) $(call Q,AR, $(AR) rcs testobj/libocxl-temp.a $(TEST_OBJS), testobj/libocxl-temp.a) $(call Q,STATIC_SYMS, $(NM) testobj/libocxl-temp.a | grep ' t ' | grep -v __ | cut -d ' ' -f 3 > testobj/static-syms) @@ -85,8 +109,8 @@ cppcheck: cppcheck-xml: cppcheck --enable=all -j 4 -q src/*.c src/include/libocxl.h --xml-version=2 2>cppcheck.xml -precommit: clean all docs cppcheck - astyle --style=linux --indent=tab=8 --max-code-length=120 src/*.c src/*.h src/include/*.h samples/*/*.c +precommit: clean all docs cppcheck afutests + astyle --style=linux --indent=tab=8 --max-code-length=120 src/*.c src/*.h src/include/*.h samples/*/*.c afutests/*/*.c $(call Q, SYMVER-CHECK, nm obj/$(LIBNAME) | grep ' t ocxl' && (echo "Symbols are missing from symver.map" && exit 1) || true) docs: @@ -97,7 +121,7 @@ docs: $(call Q,DOCS-HTML, doxygen Doxyfile-html,) clean: - rm -rf obj testobj sampleobj docs src/libocxl_info.h + rm -rf obj testobj sampleobj afuobj docs src/libocxl_info.h install: all docs mkdir -p $(DESTDIR)$(libdir) diff --git a/Makefile.rules b/Makefile.rules index ab04e5d..60fb683 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -31,6 +31,12 @@ testobj/%.o-test : unittests/%.c testobj/libocxl.a | testobj sampleobj/%.o-memcpy : samples/memcpy/%.c obj/libocxl.a | sampleobj $(call Q,CC, $(CC) $(CPPFLAGS) $(SAMPLECFLAGS) -c -o $@ $<, $@) +afuobj/%.o-memcpy : afutests/memcpy/%.c obj/libocxl.a | afuobj + $(call Q,CC, $(CC) $(CPPFLAGS) $(AFUTESTCFLAGS) -c -o $@ $<, $@) + +afuobj/%.o-afp : afutests/afp/%.c obj/libocxl.a | afuobj + $(call Q,CC, $(CC) $(CPPFLAGS) $(AFUTESTCFLAGS) -c -o $@ $<, $@) + datadir ?= $(PREFIX)/share includedir ?= $(PREFIX)/include mandir ?= $(datadir)/man diff --git a/Makefile.vars b/Makefile.vars index b0fe46e..212ed2e 100644 --- a/Makefile.vars +++ b/Makefile.vars @@ -6,10 +6,10 @@ MAKEFLAGS += -rR VERSION_MAJOR = 1 # Change VERSION_MINOR on new features -VERSION_MINOR = 1 +VERSION_MINOR = 2 # Change VERSION_PATCH on each tag -VERSION_PATCH = 0 +VERSION_PATCH = 1 AR = $(CROSS_COMPILE)ar AS = $(CROSS_COMPILE)as @@ -19,4 +19,5 @@ NM = $(CROSS_COMPILE)nm OBJCOPY = $(CROSS_COMPILE)objcopy CFLAGS ?= -g -Wall -Wextra -O2 -m64 -std=c11 -DLIBOCXL_SUPPRESS_INACCESSIBLE_WARNINGS TESTCFLAGS += $(CFLAGS) -O0 -DTEST_ENVIRONMENT=1 -I src -I testobj -pthread -SAMPLECFLAGS += $(CFLAGS) -std=gnu11 -I src -I testobj -pthread
\ No newline at end of file +SAMPLECFLAGS += $(CFLAGS) -std=gnu11 -I src -I testobj -pthread +AFUTESTCFLAGS += $(CFLAGS) -std=gnu11 -I src -I testobj -pthread @@ -17,6 +17,17 @@ to the IRQ. Functions are provide to allow 32 & 64 bit access to the global and per-PASID MMIO areas on the the AFU. Endian conversion is handled automatically. +## Installation +LibOCXL is available in popular Linux distributions for PPC64le. To install: +### Redhat +`yum install libocxl-devel` + +### Fedora +`dnf install libocxl-devel` + +### Ubuntu/Debian +`apt install libocxl-dev` + # Building ## Prerequisites 1. A GCC toolchain with libc (if cross compiling), crosstool-ng can build a suitable toolchain @@ -70,6 +81,14 @@ A typical use of libocxl will follow this pattern: ocxl\_mmio\_read64(). 10. **Termination:** ocxl\_afu\_close() will free all resources associated with an AFU handle. +# Documentation + +API documentation is generated using [Doxygen](http://www.stack.nl/~dimitri/doxygen/) in both HTML and man page format. + +To build the documentation, run `make docs`. + +An online version of the libocxl HTML documentation can be found at [opencapi.github.io](https://opencapi.github.io). + # Development The following environment variables may be set (to 1 or "YES") to assist with development: @@ -79,6 +98,11 @@ The following environment variables may be set (to 1 or "YES") to assist with de **LIBOCXL_VERBOSE_ERRORS_ALL** Force verbose errors to be emitted for any failed LibOCXL calls, unless explicitly disabled. + +For testing and/or assist in various environments, the following environment variable can also be set: + +**LIBOCXL_SYSPATH** Override the default path (/sys/class/ocxl) used by the library to read driver information. + Patches may be submitted via Github pull requests. Please prepare your patches by running `make precommit` before committing your work, and addressing any warnings & errors reported. Patches must compile cleanly with the latest stable version of GCC to be accepted. diff --git a/afutests/afp/README.md b/afutests/afp/README.md new file mode 100644 index 0000000..1c36ca5 --- /dev/null +++ b/afutests/afp/README.md @@ -0,0 +1,59 @@ +ocxl_afp3, ocxl_afp3_latency +============================ + +`ocxl_afp3` and 'ocxl_afp3_latency' are test programs for the OpenCAPI AFU IBM,AFP3 (used for +development). + +Requirements +------------ + +The OpenCAPI card must be flashed with an IBM,AFP3 AFU image. + +Usage +----- + + $ ../../afuobj/ocxl_afp3 + +``` + Usage: ocxl_afp3 [ options ] + --tags_ld Number of tags for loads. Default=0 + --tags_st Number of tags for stores. Default=0 + 0 - 0 tags (disabled) + 1 - 1 tag + 2 - 2 tags + 3 - 4 tags + 4 - 16 tags + 5 - 64 tags + 6 - 256 tags + 7 - 512 tags + --size_ld Data size, in Bytes, for loads. + Supported values: 64, 128, 256. Default=128 + --size_st Data size, in Bytes, for stores. + Supported values: 64, 128, 256. Default=128 + --npu_ld Use rd_wnitc.n for loads. Default is rd_wnitc + --npu_st Use dma_w.n for stores. Default is dma_w + --num Number of times to check perf counts. Default is 3 + --wait Amount of seconds to wait between perf count reads. + Default is 2 + --prefetch Initialize buffer memory + --offsetmask Determines how much of buffer to use. + Default 512kB. Valid Range: 4K-512M. + Format: NumberLetter, e.g. 4K, 512K, 1M, 512M + --device Device to open instead of first AFP AFU found + --verbose Verbose output + --help Print this message +``` + + + $ ../../afuobj/ocxl_afp3_latency + +``` + Usage: /home/fbarrat/capi/libocxl/afuobj/ocxl_afp3_latency [ options ] + -i 10000 --iterations Default=10000 + -p 64 to 512 --pong Pong size from card to host (Bytes) Default=64 + -x --extraread Add an DMA extraread before the DMA Wr Default is no + -f --forever Run until CTRL+C, Default=no + -d --device Device to open instead of first AFP AFU found + -v --verbose Verbose output + -h --help Print this message +``` diff --git a/afutests/afp/ocxl_afp3.c b/afutests/afp/ocxl_afp3.c new file mode 100644 index 0000000..e418b3f --- /dev/null +++ b/afutests/afp/ocxl_afp3.c @@ -0,0 +1,544 @@ +/* + * Copyright 2018 International Business Machines + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <unistd.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <time.h> + +#include "libocxl.h" +#include "ocxl_afp3.h" + +#define AFU_NAME "IBM,AFP3" + +#define CACHELINE_BYTES 128 +#define PAGE_BYTES 4096 +#define AFU_MMIO_REG_SIZE 0x4000000 +#define BUF_512MB 536870912 +#define BUF_4MB 4194304 + + +static int verbose; +static int tags_ld = 0; +static int tags_st = 7; +static int size_ld = 128; +static int size_st = 128; +static int npu_ld = 0; +static int npu_st = 0; +static uint16_t numLoops = 3; +static uint16_t waitTime = 2; +static uint64_t offsetmask = 0x3FF; // Default to 4MB + +static uint64_t enableAfu = 0x8000000000000000; +static uint64_t disableAfu = 0x0000000000000000; +static uint64_t resetCnt = 0x4000000000000000; + +static void print_help(char *name) +{ + printf("Usage: %s [ options ]\n", name); + printf("\t--tags_ld \tNumber of tags for loads. Default=%d\n", tags_ld); + printf("\t--tags_st \tNumber of tags for stores. Default=%d\n", tags_st); + printf("\t \t 0 - 0 tags (disabled)\n"); + printf("\t \t 1 - 1 tag\n"); + printf("\t \t 2 - 2 tags\n"); + printf("\t \t 3 - 4 tags\n"); + printf("\t \t 4 - 16 tags\n"); + printf("\t \t 5 - 64 tags\n"); + printf("\t \t 6 - 256 tags\n"); + printf("\t \t 7 - 512 tags\n"); + printf("\t--size_ld \tData size, in Bytes, for loads. Supported values: 64, 128, 256. Default=%d\n", size_ld); + printf("\t--size_st \tData size, in Bytes, for stores. Supported values: 64, 128, 256. Default=%d\n", size_st); + printf("\t--npu_ld \tUse rd_wnitc.n for loads. Default is rd_wnitc\n"); + printf("\t--npu_st \tUse dma_w.n for stores. Default is dma_w\n"); + printf("\t--num \tNumber of times to check perf counts, default is %d\n", numLoops); + printf("\t--wait \tAmount of seconds to wait between perf count reads, default is %d\n", waitTime); + printf("\t--prefetch \tInitialize buffer memory\n"); + printf("\t--offsetmask\tDetermines how much of buffer to use. Default 512kB. Valid Range: 4K-512M. Format: NumberLetter, e.g. 4K, 512K, 1M, 512M\n"); + printf("\t--device \tDevice to open instead of first AFP AFU found\n"); + printf("\t--verbose \tVerbose output\n"); + printf("\t--help \tPrint this message\n"); + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int opt; + int rc; + int size_enc_ld; + int size_enc_st; + int option_index = 0; + int prefetch = 0; + uint64_t pasid; + uint64_t wed_in = 0; + uint64_t *buffer; + ocxl_afu_h afu_h; + ocxl_err err; + ocxl_mmio_h global; + char *device = NULL; + + // Parse parameters + static struct option long_options[] = { + {"tags_ld", required_argument, 0, 'a'}, + {"tags_st", required_argument, 0, 'b'}, + {"size_ld", required_argument, 0, 'y'}, + {"size_st", required_argument, 0, 'z'}, + {"num", required_argument, 0, 'n'}, + {"wait", required_argument, 0, 'w'}, + {"prefetch", no_argument, 0, 'p'}, + {"offsetmask", required_argument, 0, 'o'}, + {"verbose", no_argument, &verbose, 1 }, + {"help", no_argument, 0, 'h'}, + {"device", required_argument, 0, 'd'}, + {NULL, 0, 0, 0} + }; + + while ((opt = getopt_long(argc, argv, "avhc:d:", long_options, &option_index)) >= 0) { + switch (opt) { + case 0: + case 'v': + break; + case 'a': + tags_ld = strtoul(optarg, NULL, 0); + break; + case 'b': + tags_st = strtoul(optarg, NULL, 0); + break; + case 'y': + size_ld = strtoul(optarg, NULL, 0); + break; + case 'z': + size_st = strtoul(optarg, NULL, 0); + break; + case 'n': + numLoops = (uint16_t) strtoul(optarg, NULL, 0); + break; + case 'w': + waitTime = (uint16_t) strtoul(optarg, NULL, 0); + break; + case 'p': + prefetch = 1; + break; + case 'o': + if (!strcasecmp(optarg, "4K")) { + offsetmask = 0x00; + } else if (!strcasecmp(optarg, "8K")) { + offsetmask = 0x01; + } else if (!strcasecmp(optarg, "16K")) { + offsetmask = 0x03; + } else if (!strcasecmp(optarg, "32K")) { + offsetmask = 0x07; + } else if (!strcasecmp(optarg, "64K")) { + offsetmask = 0x0F; + } else if (!strcasecmp(optarg, "128K")) { + offsetmask = 0x1F; + } else if (!strcasecmp(optarg, "256K")) { + offsetmask = 0x3F; + } else if (!strcasecmp(optarg, "512K")) { + offsetmask = 0x7F; + } else if (!strcasecmp(optarg, "1M")) { + offsetmask = 0xFF; + } else if (!strcasecmp(optarg, "2M")) { + offsetmask = 0x1FF; + } else if (!strcasecmp(optarg, "4M")) { + offsetmask = 0x3FF; + } else if (!strcasecmp(optarg, "8M")) { + offsetmask = 0x7FF; + } else if (!strcasecmp(optarg, "16M")) { + offsetmask = 0xFFF; + } else if (!strcasecmp(optarg, "32M")) { + offsetmask = 0x1FFF; + } else if (!strcasecmp(optarg, "64M")) { + offsetmask = 0x3FFF; + } else if (!strcasecmp(optarg, "128M")) { + offsetmask = 0x7FFF; + } else if (!strcasecmp(optarg, "256M")) { + offsetmask = 0xFFFF; + } else if (!strcasecmp(optarg, "512M")) { + offsetmask = 0x1FFFF; + } else if (!strcasecmp(optarg, "1G")) { + offsetmask = 0x3FFFF; + } else if (!strcasecmp(optarg, "2G")) { + offsetmask = 0x7FFFF; + } else if (!strcasecmp(optarg, "4G")) { + offsetmask = 0xFFFFF; + } else { + fprintf(stderr, "Illegal value entered for --offsetmask argument = 0x%lx Must be string: 4K-512M\n", offsetmask); + print_help(argv[0]); + return -1; + } + if (offsetmask > 0x3FF) + printf("Warning: offsetmask is bigger than the 4MB memory buffer allocated by this app\n"); + break; + case 'd': + device = optarg; + break; + case 'h': + print_help(argv[0]); + return 0; + break; + default: + print_help(argv[0]); + return -1; + } + } + + offsetmask <<= 12; + + switch (size_ld) { + case 64: + size_enc_ld = 1; + break; + case 128: + size_enc_ld = 2; + break; + case 256: + size_enc_ld = 3; + break; + default: + fprintf(stderr, "Illegal value entered for --size_ld argument = %d\n", size_ld); + print_help(argv[0]); + return -1; + } + + switch (size_st) { + case 64: + size_enc_st = 1; + break; + case 128: + size_enc_st = 2; + break; + case 256: + size_enc_st = 3; + break; + default: + fprintf(stderr, "Illegal value entered for --size_st argument = %d\n", size_st); + print_help(argv[0]); + return -1; + } + + ocxl_enable_messages(OCXL_ERRORS); + + if (verbose) { + printf("Calling ocxl_afu_open\n"); + } + if (device) + err = ocxl_afu_open_from_dev(device, &afu_h); + else + err = ocxl_afu_open(AFU_NAME, &afu_h); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_afu_open() failed for %s, error %d\n", + device ? device : AFU_NAME, err); + return err; + } + + // attach to afu - attach does not "start" the afu anymore + if (verbose) { + printf("Calling ocxl_afu_attach\n"); + } + err = ocxl_afu_attach(afu_h, OCXL_ATTACH_FLAGS_NONE); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_afu_attach: %d", err); + return err; + } + + // map the global mmio space + err = ocxl_mmio_map(afu_h, OCXL_GLOBAL_MMIO, &global); + if (err != OCXL_OK) { + fprintf(stderr, "global ocxl_mmio_map: %d", err); + return err; + } + + + // Allocate a buffer for "to" memory buffer. Force alignment of address on cacheline boundary. + rc = posix_memalign((void **) &buffer, BUF_4MB, BUF_4MB); + if (rc) { + fprintf(stderr, "Memory alloc failed for buffer: %d", rc); + return rc; + } + if (verbose) + printf("Allocated Buffer memory @ %p\n", buffer); + + if (prefetch) { + printf("Initializing allocated memory\n"); + memset(buffer, 0x66, BUF_4MB); + } + + // Get the PASID for the currently open context. + pasid = ocxl_afu_get_pasid(afu_h); + if (verbose) + printf("PASID = %ld\n", pasid); + err = ocxl_mmio_write64(global, AFUPASID_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, pasid); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_write64: %d", err); + return err; + } + + // Initialize WED value + wed_in = (uint64_t) buffer + (tags_ld * 512) + (size_enc_ld * 128) + (npu_ld * 64) + (tags_st * 8) + + (size_enc_st * 2) + (npu_st); + if (verbose) + printf("WED = 0x%lx\n", wed_in); + + err = ocxl_mmio_write64(global, AFUWED_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, (uint64_t) wed_in); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_write64: %d", err); + return err; + } + + if (verbose) + printf("BUFMASK = %lx\n", offsetmask); + err = ocxl_mmio_write64(global, AFUBufmask_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, (uint64_t) offsetmask); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_write64: %d", err); + return err; + } + + if (verbose) + printf("CONTROL_REG(reset) = %lx\n", resetCnt); + err = ocxl_mmio_write64(global, AFUControl_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, (uint64_t) resetCnt); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_write64: %d", err); + return err; + } + + // Set ENABLE value + if (verbose) + printf("ENABLE_REG = %lx\n", enableAfu); + err = ocxl_mmio_write64(global, AFUEnable_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, (uint64_t) enableAfu); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_write64: %d", err); + return err; + } + printf("Parameters used: tags_ld=%d - size_ld=%d - tags_st=%d - size_st=%d\n", + tags_ld, size_ld, tags_st, size_st); + + //////////////////////////////////////////////////////////////////////// + // Measure bandwidth + //////////////////////////////////////////////////////////////////////// + sleep(1); + + uint16_t i; + struct timeval c0Time, c0Time_prev; + double c0TimeElapsed, cyclesElapsed; + uint64_t count0_prev, count1_prev, count2_prev, count3_prev, count4_prev, count5_prev, count6_prev, count7_prev; + uint64_t count0, count1, count2, count3, count4, count5, count6, count7; + uint64_t delta_cnt0, delta_cnt1, delta_cnt2, delta_cnt3, delta_cnt4, delta_cnt5, delta_cnt6, delta_cnt7; + double bw_cnt0, bw_cnt1, bw_cnt2, bw_cnt3, bw_cnt4, bw_cnt5, bw_cnt6, bw_cnt7; + double bpc_tb_cnt0, bpc_tb_cnt1, bpc_tb_cnt2, bpc_tb_cnt3, bpc_tb_cnt4, bpc_tb_cnt5, bpc_tb_cnt6, bpc_tb_cnt7; + double bw_tb_cnt0, bw_tb_cnt1, bw_tb_cnt2, bw_tb_cnt3, bw_tb_cnt4, bw_tb_cnt5, bw_tb_cnt6, bw_tb_cnt7; + + + printf("Counter Curr Count (64B) Prev Count Count Diff. BW (GB/s) using App clock\tBytes or Events per AFP cycle\t\tBW using 200MHz AFU clock (GB/s)\n"); + printf("-----------------------------------------------------------------------------------------\n"); + + gettimeofday(&c0Time_prev, NULL); + + err = ocxl_mmio_read64(global, AFUPerfCnt0_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count0_prev); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt1_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count1_prev); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt2_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count2_prev); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt3_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count3_prev); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt4_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count4_prev); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt5_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count5_prev); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt6_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count6_prev); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt7_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count7_prev); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + sleep(waitTime); + for (i=0; i<numLoops; i++) { + + gettimeofday(&c0Time, NULL); + + err = ocxl_mmio_read64(global, AFUPerfCnt0_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count0); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt1_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count1); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt2_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count2); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt3_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count3); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt4_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count4); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt5_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count5); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt6_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count6); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + err = ocxl_mmio_read64(global, AFUPerfCnt7_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, &count7); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_read64: %d", err); + return err; + } + + c0TimeElapsed = (c0Time.tv_sec - c0Time_prev.tv_sec) * 1000000 + c0Time.tv_usec - c0Time_prev.tv_usec; + cyclesElapsed = count0 - count0_prev; + + bw_cnt0 = (double)(count0 - count0_prev) * (1 / (c0TimeElapsed / 1000000)) / 1000000000; // convert to Billion cycles + bw_cnt1 = (double)(count1 - count1_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s + bw_cnt2 = (double)(count2 - count2_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s + bw_cnt3 = (double)(count3 - count3_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s + bw_cnt4 = (double)(count4 - count4_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s + bw_cnt5 = (double)(count5 - count5_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s + bw_cnt6 = (double)(count6 - count6_prev) * (64 / (c0TimeElapsed / 1000000)) / 1000000000; // convert B/s to GB/s + bw_cnt7 = (double)(count7 - count7_prev) * (1 / (c0TimeElapsed / 1000000)) / 1000000000; // convert to Billion events + + bpc_tb_cnt0 = (double)(count0 - count0_prev) * 1 / cyclesElapsed; + bpc_tb_cnt1 = (double)(count1 - count1_prev) * 64 / cyclesElapsed; + bpc_tb_cnt2 = (double)(count2 - count2_prev) * 64 / cyclesElapsed; + bpc_tb_cnt3 = (double)(count3 - count3_prev) * 64 / cyclesElapsed; + bpc_tb_cnt4 = (double)(count4 - count4_prev) * 64 / cyclesElapsed; + bpc_tb_cnt5 = (double)(count5 - count5_prev) * 64 / cyclesElapsed; + bpc_tb_cnt6 = (double)(count6 - count6_prev) * 64 / cyclesElapsed; + bpc_tb_cnt7 = (double)(count7 - count7_prev) * 1 / cyclesElapsed; + + bw_tb_cnt0 = (double)(count0 - count0_prev) * (1 / (cyclesElapsed / 200000000)) / + 1000000000; // convert to Billion cycles/s + bw_tb_cnt1 = (double)(count1 - count1_prev) * (64 / (cyclesElapsed / 200000000)) / + 1000000000; // convert to Billion GB/s + bw_tb_cnt2 = (double)(count2 - count2_prev) * (64 / (cyclesElapsed / 200000000)) / + 1000000000; // convert to Billion GB/s + bw_tb_cnt3 = (double)(count3 - count3_prev) * (64 / (cyclesElapsed / 200000000)) / + 1000000000; // convert to Billion GB/s + bw_tb_cnt4 = (double)(count4 - count4_prev) * (64 / (cyclesElapsed / 200000000)) / + 1000000000; // convert to Billion GB/s + bw_tb_cnt5 = (double)(count5 - count5_prev) * (64 / (cyclesElapsed / 200000000)) / + 1000000000; // convert to Billion GB/s + bw_tb_cnt6 = (double)(count6 - count6_prev) * (64 / (cyclesElapsed / 200000000)) / + 1000000000; // convert to Billion GB/s + bw_tb_cnt7 = (double)(count7 - count7_prev) * (1 / (cyclesElapsed / 200000000)) / + 1000000000; // convert to Billion cycles/s + + delta_cnt0 = count0 - count0_prev; + delta_cnt1 = count1 - count1_prev; + delta_cnt2 = count2 - count2_prev; + delta_cnt3 = count3 - count3_prev; + delta_cnt4 = count4 - count4_prev; + delta_cnt5 = count5 - count5_prev; + delta_cnt6 = count6 - count6_prev; + delta_cnt7 = count7 - count7_prev; + + printf("Total Cycles %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count0, count0_prev, delta_cnt0, bw_cnt0, + bpc_tb_cnt0, bw_tb_cnt0); + printf("Good Resp Total %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count1, count1_prev, delta_cnt1, bw_cnt1, + bpc_tb_cnt1, bw_tb_cnt1); + printf("Good Resp Load %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count2, count2_prev, delta_cnt2, bw_cnt2, + bpc_tb_cnt2, bw_tb_cnt2); + printf("Good Resp Store %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count3, count3_prev, delta_cnt3, bw_cnt3, + bpc_tb_cnt3, bw_tb_cnt3); + printf("Retries - Total %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count4, count4_prev, delta_cnt4, bw_cnt4, + bpc_tb_cnt4, bw_tb_cnt4); + printf("Retries - Loads %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count5, count5_prev, delta_cnt5, bw_cnt5, + bpc_tb_cnt5, bw_tb_cnt5); + printf("Retries - Store %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count6, count6_prev, delta_cnt6, bw_cnt6, + bpc_tb_cnt6, bw_tb_cnt6); + printf("No cred cycles %016lx %016lx %016lx %#12.8f %#1.8f %#12.8f\n", count7, count7_prev, delta_cnt7, bw_cnt7, + bpc_tb_cnt7, bw_tb_cnt7); + printf("\n"); + + count0_prev = count0; + count1_prev = count1; + count2_prev = count2; + count3_prev = count3; + count4_prev = count4; + count5_prev = count5; + count6_prev = count6; + count7_prev = count7; + c0Time_prev = c0Time; + + sleep(waitTime); + } + + if (verbose) + printf("Stopping AFU\n"); + // stop afu + err = ocxl_mmio_write64(global, AFUEnable_AFP_REGISTER, OCXL_MMIO_LITTLE_ENDIAN, disableAfu); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_write64: %d", err); + return err; + } + + if (verbose) + printf("Free afu\n"); + ocxl_afu_close(afu_h); + return 0; +} diff --git a/afutests/afp/ocxl_afp3.h b/afutests/afp/ocxl_afp3.h new file mode 100644 index 0000000..05f3f4d --- /dev/null +++ b/afutests/afp/ocxl_afp3.h @@ -0,0 +1,48 @@ +/* + * Copyright 2018 International Business Machines + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _OCXL_AFP3_H +#define _OCXL_AFP3_H + +// global mmio registers +#define AFUConfiguration_REGISTER 0x0000 +#define AFUInternalError_REGISTER 0x0010 +#define AFUInternalErrorInfo_REGISTER 0x0018 +#define AFUTraceControl_REGISTER 0x0020 + +// global AFP3 registers +#define AFUExtraReadEA_AFP_REGISTER 0x0038 +#define AFUWED_AFP_REGISTER 0x0040 +#define AFUBufmask_AFP_REGISTER 0x0048 +#define AFUPASID_AFP_REGISTER 0x0050 +#define AFUMisc_AFP_REGISTER 0x0058 +#define AFUEnable_AFP_REGISTER 0x0060 +#define AFUControl_AFP_REGISTER 0x0068 +#define AFULatency_AFP_REGISTER 0x0070 +#define AFUPerfCnt0_AFP_REGISTER 0x00C0 +#define AFUPerfCnt1_AFP_REGISTER 0x00C8 +#define AFUPerfCnt2_AFP_REGISTER 0x00D0 +#define AFUPerfCnt3_AFP_REGISTER 0x00D8 +#define AFUPerfCnt4_AFP_REGISTER 0x00E0 +#define AFUPerfCnt5_AFP_REGISTER 0x00E8 +#define AFUPerfCnt6_AFP_REGISTER 0x00F0 +#define AFUPerfCnt7_AFP_REGISTER 0x00F8 +#define Large_Data0_AFP_REGISTER 0x10000 +#define Large_Data1_AFP_REGISTER 0x10080 +#define Large_Data2_AFP_REGISTER 0x10100 +#define Large_Data3_AFP_REGISTER 0x10180 + +#endif /* _OCXL_AFP3_H */ diff --git a/afutests/afp/ocxl_afp3_latency.c b/afutests/afp/ocxl_afp3_latency.c new file mode 100644 index 0000000..695eedd --- /dev/null +++ b/afutests/afp/ocxl_afp3_latency.c @@ -0,0 +1,530 @@ +/* + * Copyright 2018 International Business Machines + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <getopt.h> +#include <sys/mman.h> +#include <signal.h> + +#include "libocxl.h" +#include "ocxl_afp3.h" + +#define AFU_NAME "IBM,AFP3" +#define BUF_512MB (512 * 1024 * 1024) +#define BIT(n) (1ull << n) + +static char *device = NULL; +static int verbose = 0; +static int size_ping = 8; +static int size_ld = 64; +static int size_st = 64; +static int extra_read = 0; +static uint64_t iterations = 10000; + +static uint64_t disableAfu = 0x0000000000000000; +static uint64_t resetCnt = 0x4000000000000000; + +#define miso() asm volatile("or 26, 26, 26") + +#define mfspr(rn) ({unsigned long rval; \ + asm volatile("mfspr %0, %1" \ + : "=r" (rval) : "i" (rn)); rval;}) + +static uint64_t read_timebase(void) +{ + return mfspr(268); +} + +static void printf_buf(uint64_t addr, uint64_t size) +{ + unsigned int i, j; + uint8_t *base_p = (uint8_t *)addr; + uint64_t per_line = 32; + + for (i = 0; i < size/per_line; i++) { + printf("0x%016lx:", (uint64_t) base_p); + for (j = 0; j < per_line; j++) { + if (j % 8 == 0) + printf(" "); + + printf("%02x", *base_p); + base_p++; + } + printf("\n"); + } + printf("\n"); +} + +static int flag_stop = 0; +static void stop_handler(int signal) +{ + printf("Signal %d received, stopping\n", signal); + flag_stop = 1; +} + +static inline uint64_t ping_8B(uint64_t global_mmio_start, + volatile uint64_t *flag, uint64_t enable_in, + uint64_t *count) +{ + uint64_t *afu_enable_reg_p; + uint64_t timebase[2]; + uint64_t j, loop_count; + + afu_enable_reg_p = (uint64_t *)(global_mmio_start + + AFUEnable_AFP_REGISTER); + + if (*count) { + loop_count = *count; + } else { + loop_count = ~0ull; + printf("Running test forever, interrupt with ctrl-c\n"); + } + + timebase[0] = read_timebase(); + for (j = 0; j < loop_count; j++) { + if (flag_stop) + break; + + *flag = 0; + __sync_synchronize(); + *afu_enable_reg_p = enable_in; + miso(); // force no gather + + while (*flag == 0); + } + timebase[1] = read_timebase(); + + *count = j; + return (timebase[1] - timebase[0]); +} + +// use '-m' option with value > 8 to use this function doing a 64/128B +// MMIO write before the mmpp DMA write +static inline uint64_t ping_OVER_8B(uint64_t global_mmio_start, + volatile uint64_t *flag, + uint64_t enable_in, uint64_t *count) +{ + uint64_t *afu_enable_reg_p, *afu_large_data0_p; + uint64_t timebase[2]; + uint64_t i, j, num_dw, loop_count; + + fprintf(stderr, "Use of ping data bigger than 8B requires special support in the ocxl driver for mmio write-combine. Disabled by default as it generates HMI on default setup\n"); + *count = 1; + return 0; + + afu_enable_reg_p = (uint64_t *)(global_mmio_start + + AFUEnable_AFP_REGISTER); + afu_large_data0_p = (uint64_t *)(global_mmio_start + + Large_Data0_AFP_REGISTER); + + num_dw = size_st / sizeof(uint64_t); + + // Write enable register. + // The AFU is configured to use the register data for its DMA + // writes, so we need to make sure it's not 0 first. + for (i = 0; i < num_dw; i++) + *(afu_large_data0_p + i) = 0xCEECEECEECEE0000 + i; + + *flag = 0; + __sync_synchronize(); + *afu_enable_reg_p = (uint64_t) enable_in; + miso(); // force no gather + + while (*flag == 0); + + if (*count) { + loop_count = *count; + } else { + loop_count = ~0ull; + printf("Running test forever, interrupt with ctrl-c\n"); + } + + timebase[0] = read_timebase(); + for (j = 0; j < loop_count; j++) { + if (flag_stop) + break; + + *flag = 0; + __sync_synchronize(); + // Write the large_data0 128 register + // num_dw = 8 if m64, num_dw = 16 if m128 + for (i = 0; i < num_dw; i++) { + // Write large_data0 register + *(afu_large_data0_p + i) = 0xDAFADAFADAFA0000 + i; + } + + miso(); // force no gather + + while (*flag == 0); + } + timebase[1] = read_timebase(); + + *count = j; + return (timebase[1] - timebase[0]); +} + +//Main function called after line commands arguments processed +int ocapi_afp3_lat(void) +{ + int rc, j, k; + int size_enc_ld, size_enc_st; + int tags_ld = 0, tags_st = 7; + int npu_ld = 0, npu_st = 0; + int num_dw, use_large_data; + uint64_t total_latency, global_mmio_start, offsetmask; + uint64_t wed_in, misc_in, enable_in, extra_read_ea_in; + int flag_location; + volatile uint64_t *buffer; + size_t size; + ocxl_err err; + ocxl_afu_h afu_h; + ocxl_mmio_h mmio_h; + + if (size_ping == 8) + use_large_data = 0; + else + use_large_data = 1; + + size_ld = size_st; + + switch (size_st) { + case 64: + size_enc_st = 1; + break; + case 128: + size_enc_st = 2; + break; + case 256: + case 512: + size_enc_st = 3; + break; + default: + printf("\nIllegal value entered for --size_st argument = %d!!!!\n", size_st); + return -1; + } + + switch (size_ld) { + case 64: + size_enc_ld = 1; + break; + case 128: + size_enc_ld = 2; + break; + case 256: + case 512: + size_enc_ld = 3; + break; + default: + printf("\nIllegal value entered for --size_ld argument = %d!!!!\n", size_ld); + return -1; + } + + if ((tags_ld != 0) || (tags_st == 0)) + printf("WARNING: For MMIO ping-pong latency mode, it is recommended to enable stores (tags_st > 0), and disable loads (tags_ld = 0)\n"); + + printf("Parameters used: tags_ld=%d - size_ld=%d - tags_st=%d - size_st=%d\n", + tags_ld, size_ld, tags_st, size_st); + + // Open AFU device(s) + if (verbose) + printf("Calling ocxl_afu_open\n"); + if (device) + err = ocxl_afu_open_from_dev(device, &afu_h); + else + err = ocxl_afu_open(AFU_NAME, &afu_h); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_afu_open() failed for %s, error %d\n", + device ? device : AFU_NAME, err); + return err; + } + + // attach to afu - attach does not "start" the afu anymore + if (verbose) + printf("Calling ocxl_afu_attach\n"); + err = ocxl_afu_attach(afu_h, OCXL_ATTACH_FLAGS_NONE); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_afu_attach: %d", err); + return err; + } + + // map the mmio spaces + err = ocxl_mmio_map(afu_h, OCXL_GLOBAL_MMIO, &mmio_h); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_map: %d\n", err); + return err; + } + + err = ocxl_mmio_get_info(mmio_h, (void **)&global_mmio_start, &size); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_get_info: %d\n", err); + return err; + } + printf("MMIO INFO: address 0x%016lx - size 0x%lx\n", + global_mmio_start, size); + + // Allocate a buffer for "to" memory buffer. + // Force alignment of address on cacheline boundary. + offsetmask = 0x7F << 12; // Hardcode to 512K + rc = posix_memalign((void **) &buffer, BUF_512MB, BUF_512MB); + if (rc) { + perror("memalign main buffer"); + return -1; + } + if (verbose) + printf("Allocated Buffer memory @ 0x%016llx\n", + (long long)buffer); + + // Turn off MMIO latency mode + err = ocxl_mmio_write64(mmio_h, AFUEnable_AFP_REGISTER, + OCXL_MMIO_LITTLE_ENDIAN, disableAfu); + if (err != OCXL_OK) { + fprintf(stderr, + "ocxl_mmio_write64(AFUEnable_AFP_REGISTER): %d\n", err); + return err; + } + + // Initialize WED value + wed_in = (uint64_t) buffer + + (tags_ld << 9) + (size_enc_ld << 7) + (npu_ld << 6) + + (tags_st << 3) + (size_enc_st << 1) + (npu_st); + if (verbose) + printf("WED = %016lx\n", wed_in); + err = ocxl_mmio_write64(mmio_h, AFUWED_AFP_REGISTER, + OCXL_MMIO_LITTLE_ENDIAN, wed_in); + if (err != OCXL_OK) { + fprintf(stderr, + "ocxl_mmio_write64(AFUWED_AFP_REGISTER): %d\n", err); + return err; + } + + if (verbose) + printf("BUFMASK = %016lx\n", offsetmask); + err = ocxl_mmio_write64(mmio_h, AFUBufmask_AFP_REGISTER, + OCXL_MMIO_LITTLE_ENDIAN, offsetmask); + if (err != OCXL_OK) { + fprintf(stderr, + "ocxl_mmio_write64(AFUBufmask_AFP_REGISTER): %d\n", err); + return err; + } + + if (use_large_data) { + misc_in = 1 << 12; // 0b01: triggered by writing or + // reading large data 0 register + if (verbose) + printf("MISC_REG = %016lx\n", misc_in); + + err = ocxl_mmio_write64(mmio_h, AFUMisc_AFP_REGISTER, + OCXL_MMIO_LITTLE_ENDIAN, misc_in); + if (err != OCXL_OK) { + fprintf(stderr, + "ocxl_mmio_write64(AFUMisc_AFP_REGISTER): %d\n", + err); + return err; + } + } + + if (verbose) + printf("CONTROL_REG (reset) = %016lx\n", resetCnt); + err = ocxl_mmio_write64(mmio_h, AFUControl_AFP_REGISTER, + OCXL_MMIO_LITTLE_ENDIAN, resetCnt); + if (err != OCXL_OK) { + fprintf(stderr, "ocxl_mmio_write64(AFUControl_AFP_REGISTER): %d\n", err); + return err; + } + + if (extra_read) { + // Set Read address to base address + 1K. This way, + // it does not overlap with MMIO Latency DMA Writes, + // and we do not need to set up more memory + extra_read_ea_in = (uint64_t) buffer + 1024; + if (verbose) + printf("EXTRA_READ_EA = %016lx\n", extra_read_ea_in); + + err = ocxl_mmio_write64(mmio_h, AFUExtraReadEA_AFP_REGISTER, + OCXL_MMIO_LITTLE_ENDIAN, + extra_read_ea_in); + if (err != OCXL_OK) { + fprintf(stderr, + "ocxl_mmio_write64(AFUExtraReadEA_AFP_REGISTER): %d\n", err); + return err; + } + + printf("Initializing extra_read memory .....\n"); + for (j = 0; j < 64; j++) + buffer[(1024/8) + j] = 0xdafa0201dafa0100 + j; + + if (verbose) { + printf("Done initializing extra read memory\n"); + printf_buf(extra_read_ea_in, 512); + } + } + + // Set ENABLE register + enable_in = BIT(63) | BIT(62); // AFU enable | MMIO ping pong latency test mode + if (size_st == 512) + enable_in |= BIT(61); // use 512B stores + if (use_large_data) + enable_in |= BIT(60); // use large ping pong data register for DMA write(s) + if (extra_read) + enable_in |= BIT(59); // extra read mode + if (size_ld == 512) + enable_in |= BIT(58); // use 512B loads + if (verbose) { + printf("ENABLE_REG = %016lx", enable_in); + if (use_large_data) + printf("\t> use large data regs value\n"); + else + printf("\n"); + } + + num_dw = size_st / sizeof(uint64_t); + for (k = 0; k < num_dw; k++) + buffer[k] = 0; + + if (verbose) { + printf("Buffer before test\n"); + printf_buf((uint64_t) buffer, 512); + } + + asm volatile("": : :"memory"); + asm volatile("sync"); + + /////////////////////////////////////////////////////////////////////// + // MMIO Ping-Pong Latency Test + /////////////////////////////////////////////////////////////////////// + + if (verbose) + printf("Calling ping_pong test\n"); + printf("MMIO WR %dB (host to card) -> %sDMA WR %dB (card to host)\n", + size_ping, (extra_read ? "DMA RD + " : ""), size_st); + + // flag_location is the address where lower bytes of counter + // value will be set + flag_location = (size_st - 64) / sizeof(uint64_t); + + if (size_ping == 8) + total_latency = ping_8B(global_mmio_start, + &buffer[flag_location], enable_in, + &iterations); + else + total_latency = ping_OVER_8B(global_mmio_start, + &buffer[flag_location], enable_in, + &iterations); + + if (verbose) { + usleep(100000); // .1s + printf("\nBuffer after test\n"); + printf_buf((uint64_t) buffer, 512); + } + printf("Completed %lu iterations. Total time measured using timebase: %10.2f ns\n", + iterations, total_latency*1000./512); + printf("Average roundtrip per iteration: %10.2f ns\n", + total_latency*1000./iterations/512); + + // Turn off MMIO latency mode + err = ocxl_mmio_write64(mmio_h, AFUEnable_AFP_REGISTER, + OCXL_MMIO_LITTLE_ENDIAN, disableAfu); + if (err != OCXL_OK) { + fprintf(stderr, + "ocxl_mmio_write64(AFUEnable_AFP_REGISTER): %d\n", err); + return err; + } + + if (verbose) + printf("Unmap afu\n"); + ocxl_mmio_unmap(mmio_h); + + if (verbose) + printf("Free afu\n"); + ocxl_afu_close(afu_h); + return 0; +} + +static void print_help(char *name) +{ + printf("\nUsage: %s [ options ]\n", name); + printf("\t-i 10000 --iterations\tDefault=%ld\n", iterations); + printf("\t-p 64 to 512 --pong \tPong size from card to host (Bytes) Default=%d\n", size_st); + printf("\t-x --extraread \tAdd an DMA extraread before the DMA Wr Default is no\n"); + printf("\t-f --forever \tRun until CTRL+C, Default=no\n"); + printf("\t-d --device \tDevice to open instead of first AFP AFU found\n"); + printf("\t-v --verbose \tVerbose output\n"); + printf("\t-h --help \tPrint this message\n"); + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int opt; + int option_index = 0; + + static struct option long_options[] = { + {"iterations", required_argument, 0 , 'i'}, + {"ping", required_argument, 0 , 'm'}, + {"pong", required_argument, 0 , 'p'}, + {"extraread", no_argument , 0 , 'x'}, + {"forever", no_argument , 0 , 'f'}, + {"verbose", no_argument , &verbose, 1 }, + {"help", no_argument , 0 , 'h'}, + {"device", required_argument, 0 , 'd'}, + {NULL, 0, 0, 0} + }; + + while ((opt = getopt_long(argc, argv, "vxhi:p:m:fd:", long_options, + &option_index)) >= 0) { + switch (opt) { + case 'v': + verbose = 1; + break; + case 'i': + iterations = strtoul(optarg, NULL, 0); + break; + case 'm': + size_ping = strtoul(optarg, NULL, 0); + break; + case 'p': + size_st = strtoul(optarg, NULL, 0); + break; + case 'x': + extra_read = 1; + break; + case 'f': + iterations = 0; + break; + case 'h': + print_help(argv[0]); + return 0; + case 'd': + device = optarg; + break; + default: + print_help(argv[0]); + return -1; + } + } + + // Registering signal handlers, useful for 'forever' mode + if (signal(SIGINT, stop_handler) == SIG_ERR) + printf("\ncan't catch SIGINT\n"); + if (signal(SIGTERM, stop_handler) == SIG_ERR) + printf("\ncan't catch SIGTERM\n"); + + return ocapi_afp3_lat(); +} diff --git a/afutests/memcpy/README.md b/afutests/memcpy/README.md new file mode 100644 index 0000000..73d1259 --- /dev/null +++ b/afutests/memcpy/README.md @@ -0,0 +1,34 @@ +ocxl_memcpy +=========== + +`ocxl_memcpy` is a test program for the OpenCAPI AFU IBM,MEMCPY3 (used for +development). + +Requirements +------------ + +The OpenCAPI card must be flashed with an IBM,MEMCPY3 AFU image. + +Usage +----- + + $ ../../afuobj/ocxl_memcpy # Test memcpy AFU memory copy + $ ../../afuobj/ocxl_memcpy -A # Test memcpy AFU atomic compare and swap + $ ../../afuobj/ocxl_memcpy -a # Test memcpy AFU increment + +``` + Usage: ocxl_memcpy [ options ] + Options: + -A Run the atomic compare and swap test + -a Run the increment test + -d <device> Use this capi card + -I Initialize the destination buffer after each loop + -i Send an interrupt after copy + -l <loops> Run this number of memcpy loops (default 1) + -p <procs> Fork this number of processes (default 1) + -p 0 Use the maximum number of processes permitted by the AFU + -r Reallocate the destination buffer in between 2 loops + -S Operate on shared memory + -s <bufsize> Copy this number of bytes (default 2048) + -t <timeout> Seconds to wait for the AFU to signal completion +``` diff --git a/afutests/memcpy/ocxl_memcpy.c b/afutests/memcpy/ocxl_memcpy.c new file mode 100644 index 0000000..ed22290 --- /dev/null +++ b/afutests/memcpy/ocxl_memcpy.c @@ -0,0 +1,847 @@ +/* + * Copyright 2018 International Business Machines + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include "libocxl.h" + +#define LOG_ERR(pid, fmt, x...) fprintf(stderr, "Process %d: " fmt, pid, ##x) +#define LOG_INF(pid, fmt, x...) printf("Process %d: " fmt, pid, ##x) + +#define AFU_NAME "IBM,MEMCPY3" +#define AFU_MAX_PROCESSES 512 + +#define CACHELINESIZE 128 +/* Queue sizes other than 512kB don't seem to work (still true?) */ +#define QUEUE_SIZE 4095*CACHELINESIZE + +#define MEMCPY_WED(queue, depth) \ + ((((uint64_t)queue) & 0xfffffffffffff000ULL) | \ + (((uint64_t)depth) & 0xfffULL)) + +#define MEMCPY_WE_CMD(valid, cmd) \ + (((valid) & 0x1) | \ + (((cmd) & 0x3f) << 2)) +#define MEMCPY_WE_CMD_VALID (0x1 << 0) +#define MEMCPY_WE_CMD_WRAP (0x1 << 1) +#define MEMCPY_WE_CMD_COPY 0 +#define MEMCPY_WE_CMD_IRQ 1 +#define MEMCPY_WE_CMD_STOP 2 +#define MEMCPY_WE_CMD_WAKE_HOST_THREAD 3 +#define MEMCPY_WE_CMD_INCREMENT 4 +#define MEMCPY_WE_CMD_ATOMIC 5 +#define MEMCPY_WE_CMD_TRANSLATE_TOUCH 6 + +/* global mmio registers */ +#define MEMCPY_AFU_GLOBAL_CFG 0 +#define MEMCPY_AFU_GLOBAL_TRACE 0x20 + +/* per-process mmio registers */ +#define MEMCPY_AFU_PP_WED 0 +#define MEMCPY_AFU_PP_STATUS 0x10 +#define MEMCPY_AFU_PP_STATUS_Terminated 0x8 +#define MEMCPY_AFU_PP_STATUS_Stopped 0x10 + +#define MEMCPY_AFU_PP_CTRL 0x18 +#define MEMCPY_AFU_PP_CTRL_Restart (0x1 << 0) +#define MEMCPY_AFU_PP_CTRL_Terminate (0x1 << 1) +#define MEMCPY_AFU_PP_IRQ 0x28 + + +struct memcpy_work_element { + volatile uint8_t cmd; /* valid, wrap, cmd */ + volatile uint8_t status; + uint16_t length; + uint8_t cmd_extra; + uint8_t reserved[3]; + uint64_t atomic_op; + uint64_t src; /* also irq EA or atomic_op2 */ + uint64_t dst; +} __packed; + +struct memcpy_weq { + struct memcpy_work_element *queue; + struct memcpy_work_element *next; + struct memcpy_work_element *last; + int wrap; + int count; +}; + +struct memcpy_test_args { + int loop_count; + int size; + int irq; + int completion_timeout; + int reallocate; + int initialize; + char *device; + int wake_host_thread; + int increment; + int atomic_cas; + int shared_mem; + /* global vars */ + int shmid; + char *lock; + char *counter; +}; + +int memcpy3_queue_length(size_t queue_size) +{ + return queue_size/sizeof(struct memcpy_work_element); +} + +void memcpy3_init_weq(struct memcpy_weq *weq, size_t queue_size) +{ + weq->queue = aligned_alloc(getpagesize(), queue_size); + memset(weq->queue, 0, queue_size); + weq->next = weq->queue; + weq->last = weq->queue + memcpy3_queue_length(queue_size) - 1; + weq->wrap = 0; + weq->count = 0; +} + +/* + * Copies a work element into the queue, taking care to set the wrap + * bit correctly. Returns a pointer to the element in the queue. + */ +struct memcpy_work_element *memcpy3_add_we(struct memcpy_weq *weq, struct memcpy_work_element we) +{ + struct memcpy_work_element *new_we = weq->next; + + new_we->status = we.status; + new_we->length = we.length; + new_we->cmd_extra = we.cmd_extra; + new_we->atomic_op = we.atomic_op; + new_we->src = we.src; + new_we->dst = we.dst; + __sync_synchronize(); + new_we->cmd = (we.cmd & ~MEMCPY_WE_CMD_WRAP) | weq->wrap; + weq->next++; + if (weq->next > weq->last) { + weq->wrap ^= MEMCPY_WE_CMD_WRAP; + weq->next = weq->queue; + } + + return new_we; +} + +int global_setup(struct memcpy_test_args *args) +{ + ocxl_err err; + ocxl_afu_h afu_h; + uint64_t reg, cfg; + pid_t pid; + ocxl_mmio_h global_mmio; + + pid = getpid(); + if (args->device) + err = ocxl_afu_open_from_dev(args->device, &afu_h); + else + err = ocxl_afu_open(AFU_NAME, &afu_h); + + if (err != OCXL_OK) { + LOG_ERR(pid, "ocxl_afu_open() failed: %d\n", err); + return -1; + } + + err = ocxl_mmio_map(afu_h, OCXL_GLOBAL_MMIO, &global_mmio); + if (err != OCXL_OK) { + LOG_ERR(pid, "global ocxl_mmio_map() failed: %d\n", err); + return -1; + } + + // cfg = 0; + // cfg |= (1ull << 3); /* disable 256B ops */ + // cfg &= ~((0xFFFFFFFFull) << 32); + // cfg |= (0xFFFFFFFCull) << 32; + // cfg |= (1ull << 30); /* disable back-off timers */ + // cfg |= (3ull << 17); /* xtouch enable */ + // cfg |= (0b111111) << 8; /* all bypass */ + ocxl_mmio_read64(global_mmio, MEMCPY_AFU_GLOBAL_CFG, OCXL_MMIO_LITTLE_ENDIAN, &cfg); + LOG_INF(pid, "AFU config = %#lx\n", cfg); + + reg = 0x8008008000000000; + err = ocxl_mmio_write64(global_mmio, MEMCPY_AFU_GLOBAL_TRACE, OCXL_MMIO_LITTLE_ENDIAN, reg); + if (err != OCXL_OK) { + LOG_ERR(pid, "global ocxl_mmio_write64(trace) failed: %d\n", err); + return -1; + } + + reg = 0x000000000007100B; + err = ocxl_mmio_write64(global_mmio, MEMCPY_AFU_GLOBAL_TRACE, OCXL_MMIO_LITTLE_ENDIAN, reg); + if (err != OCXL_OK) { + LOG_ERR(pid, "global ocxl_mmio_write64(trace) failed: %d\n", err); + return -1; + } + LOG_INF(pid, "traces reset and rearmed\n"); + ocxl_afu_close(afu_h); + return 0; +} + +int shm_create(struct memcpy_test_args *args) +{ + /* Allocate shared memory for atomic lock and counter */ + args->shmid = shmget(IPC_PRIVATE, getpagesize(), 0); + if (args->shmid == -1) { + perror("Error getting shared memory segment"); + return -1; + } + args->lock = shmat(args->shmid, NULL, 0); + if (args->lock == (char *)-1) { + perror("Unable to attach shared memory segment"); + if (shmctl(args->shmid, IPC_RMID, NULL)) + perror("Error destroying shared memory segment"); + return -1; + } + args->counter = args->lock + args->size; + return 0; +} + +void shm_destroy(struct memcpy_test_args *args) +{ + if (shmdt(args->lock)) + perror("Error detaching shared memory segment"); + if (shmctl(args->shmid, IPC_RMID, NULL)) + perror("Error destroying shared memory segment"); +} + +int wait_for_status(struct memcpy_work_element *we, int timeout, pid_t pid) +{ + struct timeval test_timeout, temp; + + temp.tv_sec = timeout; + temp.tv_usec = 0; + + gettimeofday(&test_timeout, NULL); + timeradd(&test_timeout, &temp, &test_timeout); + + for (;; gettimeofday(&temp, NULL)) { + if (timercmp(&temp, &test_timeout, >)) { + LOG_ERR(pid, "timeout polling for completion\n"); + return -1; + } + if (we->status) + break; + } + return 0; +} + +int wait_for_irq(struct memcpy_work_element *we, int timeout, pid_t pid, ocxl_afu_h afu_h, uint64_t irq_ea, + uint64_t err_ea) +{ + ocxl_event event; + int nevent; + + nevent = ocxl_afu_event_check(afu_h, timeout * 1000, &event, 1); + if (nevent != 1) { + if (nevent == 0) + LOG_ERR(pid, "timeout waiting for AFU interrupt\n"); + else + LOG_ERR(pid, "unexpected return value for ocxl_afu_event_check(): %d\n", nevent); + return -1; + } + if (event.type != OCXL_EVENT_IRQ) { + LOG_ERR(pid, "unexpected event type returned by ocxl_afu_event_check(): %d\n", event.type); + return -1; + + } + if (event.irq.handle != irq_ea) { + if (event.irq.handle == err_ea) + LOG_ERR(pid, "received error irq instead of AFU irq\n"); + else + LOG_ERR(pid, "received unknown irq EA=0x%lx\n", event.irq.handle); + return -1; + } + /* + * It's possible to receive the AFU interrupt before the work + * element is marked as completed. So poll for status as + * well. It should be short, except in case of troubles + */ + return wait_for_status(we, timeout, pid); +} + +int wait_fast(struct memcpy_work_element *we, int timeout, pid_t pid, ocxl_afu_h afu_h, uint64_t irq_ea) +{ + struct timeval test_timeout, temp; + ocxl_event event; + int nevent; + + temp.tv_sec = timeout; + temp.tv_usec = 0; + + gettimeofday(&test_timeout, NULL); + timeradd(&test_timeout, &temp, &test_timeout); + + /* + * Warning: the result of the test is not deterministic: + * + * - if the thread is running on a CPU when the AFU is sending + * the wake_host_thread command, then the command is + * accepted and the thread gets out of ocxl_wait(). + * + * - if the thread is not running, the wake_host_thread fails + * and we'll receive an AFU interrupt. + * + * We don't sleep in the below loop to maxime the chances of + * having the thread running. + */ + for (;;) { + ocxl_wait(); + if (we->status) + break; + gettimeofday(&temp, NULL); + if (timercmp(&temp, &test_timeout, >)) { + LOG_ERR(pid, "timeout waiting for wake_host_thread\n"); + return -1; + } + } + + /* if interrupt is sent, status is 0x11 (complete, fault response) */ + if (we->status != 1) { + nevent = ocxl_afu_event_check(afu_h, 1000, &event, 1); + if (nevent == 1) { + if (event.type != OCXL_EVENT_IRQ || event.irq.handle != irq_ea) { + LOG_ERR(pid, "received unexpected event type %d while in 'wait' (handle=%#lx)\n", event.type, event.irq.handle); + return -1; + } + } else { + LOG_ERR(pid, "wake_host_thread failed with status %d\n", we->status); + return -1; + } + } + return 0; +} + +int restart_afu(pid_t pid, ocxl_mmio_h pp_mmio) +{ + ocxl_err err; + uint64_t status; + + err = ocxl_mmio_read64(pp_mmio, MEMCPY_AFU_PP_STATUS, OCXL_MMIO_LITTLE_ENDIAN, &status); + if (err != OCXL_OK) { + LOG_ERR(pid, "read of process status failed: %d\n", err); + return -1; + } + + if (!(status & MEMCPY_AFU_PP_STATUS_Stopped)) + return 0; /* not stopped */ + + err = ocxl_mmio_write64(pp_mmio, MEMCPY_AFU_PP_CTRL, OCXL_MMIO_LITTLE_ENDIAN, MEMCPY_AFU_PP_CTRL_Restart); + if (err != OCXL_OK) { + LOG_ERR(pid, "couldn't restart process: %d\n", err); + return -1; + } + return 0; +} + +int test_afu_memcpy(struct memcpy_test_args *args) +{ + uint64_t wed; + pid_t pid; + int i, t, rc = -1; + uint64_t status, afu_irq_ea = 0, err_irq_ea; + uint16_t tidr; + struct memcpy_weq weq; + struct memcpy_work_element memcpy_we, irq_we; + struct memcpy_work_element increment_we, atomic_cas_we; + struct memcpy_work_element *first_we, *last_we; + struct timeval start, end; + char *src, *dst; + int nevent; + ocxl_err err; + ocxl_afu_h afu_h; + ocxl_irq_h afu_irq, err_irq; + ocxl_event event; + ocxl_mmio_h pp_mmio; + + pid = getpid(); + + /* Allocate memory areas for afu to copy to/from */ + if (args->shared_mem) { + rc = shm_create(args); + if (rc) + exit(1); + src = args->counter; + dst = args->lock; + memcpy_we.src = htole64((uintptr_t) src); + memcpy_we.dst = htole64((uintptr_t) dst); + } else { + src = aligned_alloc(64, getpagesize()); + } + if (args->atomic_cas) { + dst = args->lock; + } else { + dst = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (dst == MAP_FAILED) { + LOG_ERR(pid, "mmap failed for destination buffer\n"); + return -1; + } + } + + ocxl_enable_messages(OCXL_ERRORS); + + if (args->device) + err = ocxl_afu_open_from_dev(args->device, &afu_h); + else + err = ocxl_afu_open(AFU_NAME, &afu_h); + + if (err != OCXL_OK) { + LOG_ERR(pid, "ocxl_afu_open() failed: %d\n", err); + return -1; + } + + memcpy3_init_weq(&weq, QUEUE_SIZE); + + /* Point the work element descriptor (wed) at the weq */ + wed = MEMCPY_WED(weq.queue, QUEUE_SIZE / CACHELINESIZE); + LOG_INF(pid, "WED = 0x%lx src = %p dst = %p\n", wed, src, dst); + + /* Setup the atomic compare and swap work element */ + memset(&atomic_cas_we, 0, sizeof(atomic_cas_we)); + atomic_cas_we.cmd = MEMCPY_WE_CMD(0, MEMCPY_WE_CMD_ATOMIC); + atomic_cas_we.length = htole16((uint16_t) sizeof(uint64_t)); + atomic_cas_we.src = htole64(1); + atomic_cas_we.dst = htole64((uintptr_t) dst); + atomic_cas_we.atomic_op = htole64(0); + atomic_cas_we.cmd_extra = 0x19; + + /* Setup the increment work element */ + memset(&increment_we, 0, sizeof(increment_we)); + increment_we.cmd = MEMCPY_WE_CMD(0, MEMCPY_WE_CMD_INCREMENT); + increment_we.length = htole16((uint16_t) sizeof(pid_t)); + increment_we.src = htole64((uintptr_t) src); + increment_we.dst = htole64((uintptr_t) dst); + + /* Setup the memcpy work element */ + memset(&memcpy_we, 0, sizeof(memcpy_we)); + memcpy_we.cmd = MEMCPY_WE_CMD(0, MEMCPY_WE_CMD_COPY); + memcpy_we.length = htole16((uint16_t) args->size); + memcpy_we.src = htole64((uintptr_t) src); + memcpy_we.dst = htole64((uintptr_t) dst); + + /* Setup the interrupt work element */ + if (args->irq || args->wake_host_thread) { + err = ocxl_irq_alloc(afu_h, NULL, &afu_irq); + if (err != OCXL_OK) { + LOG_ERR(pid, "ocxl_irq_alloc() failed: %d\n", err); + goto err; + } + afu_irq_ea = ocxl_irq_get_handle(afu_h, afu_irq); + LOG_INF(pid, "irq EA = %lx\n", afu_irq_ea); + + memset(&irq_we, 0, sizeof(irq_we)); + irq_we.src = htole64(afu_irq_ea); + if (args->irq) + irq_we.cmd = MEMCPY_WE_CMD(1, MEMCPY_WE_CMD_IRQ); + else { + err = ocxl_afu_get_p9_thread_id(afu_h, &tidr); + if (err < 0) { + LOG_ERR(pid, "ocxl_afu_get_p9_thread_id() failed: %d\n", err); + goto err; + } + /* + * tidr allocated before attaching, so it will + * be in the Process Element and the default + * tid value used by AFU + */ + irq_we.cmd = MEMCPY_WE_CMD(1, MEMCPY_WE_CMD_WAKE_HOST_THREAD); + } + } + + err = ocxl_irq_alloc(afu_h, NULL, &err_irq); + if (err != OCXL_OK) { + LOG_ERR(pid, "ocxl_irq_alloc(err) failed: %d\n", err); + goto err; + } + err_irq_ea = ocxl_irq_get_handle(afu_h, err_irq); + + err = ocxl_afu_attach(afu_h, 0); + if (err != OCXL_OK) { + LOG_ERR(pid, "ocxl_attach() failed: %d\n", err); + goto err; + } + + err = ocxl_mmio_map(afu_h, OCXL_PER_PASID_MMIO, &pp_mmio); + if (err != OCXL_OK) { + LOG_ERR(pid, "pp ocxl_mmio_map() failed: %d\n", err); + goto err; + } + + err = ocxl_mmio_write64(pp_mmio, MEMCPY_AFU_PP_IRQ, OCXL_MMIO_LITTLE_ENDIAN, err_irq_ea); + if (err != OCXL_OK) { + LOG_ERR(pid, "ocxl_mmio_write64(err irq) failed: %d\n", err); + goto err; + } + __sync_synchronize(); + err = ocxl_mmio_write64(pp_mmio, MEMCPY_AFU_PP_WED, OCXL_MMIO_LITTLE_ENDIAN, wed); + if (err != OCXL_OK) { + LOG_ERR(pid, "ocxl_mmio_write64(wed) failed: %d\n", err); + goto err; + } + + /* Initialise source buffer with unique(ish) per-process value */ + if (args->atomic_cas) { + memset(src, 0, args->size); + increment_we.src = htole64((uintptr_t) args->counter); + increment_we.dst = htole64((uintptr_t) args->counter); + } else if (args->increment) { + *(pid_t *)src = htole32(pid - 1); + } else { + for (i = 0; i < args->size; i++) + *(src + i) = pid & 0xff; + } + rc = 0; + gettimeofday(&start, NULL); + + for (i = 0; i < args->loop_count; i++) { + + /* setup the work queue */ + if (args->atomic_cas) { + /* acquire lock */ + first_we = memcpy3_add_we(&weq, atomic_cas_we); + /* increment counter */ + last_we = memcpy3_add_we(&weq, increment_we); + last_we->cmd |= MEMCPY_WE_CMD_VALID; + /* release lock */ + last_we = memcpy3_add_we(&weq, memcpy_we); + last_we->cmd |= MEMCPY_WE_CMD_VALID; + } else if (args->increment) { + *(pid_t *)src = htole32(le32toh(*(pid_t *)src) + 1); + first_we = last_we = memcpy3_add_we(&weq, increment_we); + } else { + first_we = last_we = memcpy3_add_we(&weq, memcpy_we); + } + if (args->irq || args->wake_host_thread) + last_we = memcpy3_add_we(&weq, irq_we); + __sync_synchronize(); + + /* press the big red 'go' button */ + first_we->cmd |= MEMCPY_WE_CMD_VALID; + + /* + * wait for the AFU to be done + * + * if we're using an interrupt, we can go to sleep. + * Otherwise, we poll the last work element status from memory + */ + if (args->irq) + rc = wait_for_irq(last_we, args->completion_timeout, pid, afu_h, afu_irq_ea, err_irq_ea); + else if (args->wake_host_thread) + rc = wait_fast(last_we, args->completion_timeout, pid, afu_h, afu_irq_ea); + else + rc = wait_for_status(last_we, args->completion_timeout, pid); + if (rc) + goto err_status; + if (first_we->status != 1) { + LOG_ERR(pid, "unexpected status 0x%x for copy\n", first_we->status); + goto err_status; + } + if (args->irq && last_we->status != 1) { + LOG_ERR(pid, "unexpected status 0x%x for irq\n", last_we->status); + goto err_status; + } + if (args->wake_host_thread && (last_we->status != 1) && + (last_we->status != 0x11)) { + LOG_ERR(pid, "unexpected status 0x%x for wake_host_thread\n", last_we->status); + goto err_status; + } + + /* + * The memory barrier is to avoid instructions + * re-ordering and make sure no output addresses are + * read before the work element status is complete + */ + __sync_synchronize(); + + if (args->atomic_cas) { + ; /* atomicity is checked at the end of main() */ + } else if (args->increment) { + if (le32toh(*(pid_t *)dst)-le32toh(*(pid_t *)src)-1) { + LOG_ERR(pid, "increment error on loop %d\n", i); + goto err_status; + } + } else { + if (memcmp(dst, src, args->size)) { + LOG_ERR(pid, "copy error on loop %d\n", i); + goto err_status; + } + } + if (args->irq || args->wake_host_thread) { + /* AFU engine stops on irq, need to restart it */ + rc = restart_afu(pid, pp_mmio); + if (rc) + goto err_status; + } + + if (args->reallocate) { + /* + * unmap/remap the destination buffer to force a TLBI + * and extra memory translation with each loop + */ + if (args->shared_mem) { + shm_destroy(args); + shm_create(args); + src = args->counter; + dst = args->lock; + memcpy_we.src = htole64((uintptr_t) src); + } else { + munmap(dst, getpagesize()); + dst = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (dst == MAP_FAILED) { + LOG_ERR(pid, "reallocation of destination buffer failed\n"); + goto err; + } + } + memcpy_we.dst = htole64((uintptr_t) dst); + if (args->initialize) { + /* let us fault in the destination buffer */ + memset(dst, 0, args->size); + } + } else if (! args->atomic_cas) { + memset(dst, 0, args->size); + } + } + + gettimeofday(&end, NULL); + t = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec; + + /* catch any error interrupt */ + nevent = ocxl_afu_event_check(afu_h, 0, &event, 1); + if (nevent != 0) { + if (nevent == 1) { + if (event.irq.handle == err_irq_ea) + LOG_ERR(pid, "received error interrupt at end of test\n"); + else + LOG_ERR(pid, "received unexpected event at end of test, type %d\n", event.type); + } else { + LOG_ERR(pid, "invalid return value for ocxl_afu_event_check: %d\n", nevent); + } + goto err_status; + } + + LOG_INF(pid, "%d loops in %d uS (%0.2f uS per loop)\n", args->loop_count, t, ((float) t)/args->loop_count); + ocxl_afu_close(afu_h); + if (args->shared_mem) + shm_destroy(args); + return 0; + +err_status: + err = ocxl_mmio_read64(pp_mmio, MEMCPY_AFU_PP_STATUS, OCXL_MMIO_LITTLE_ENDIAN, &status); + if (err != OCXL_OK) + LOG_ERR(pid, "read of process status failed: %d\n", err); + else + LOG_ERR(pid, "process status at end of failed test=0x%lx\n", status); +err: + ocxl_afu_close(afu_h); + if (args->shared_mem) + shm_destroy(args); + return -1; +} + +void usage(char *name) +{ + fprintf(stderr, "Usage: %s [ options ]\n", name); + fprintf(stderr, "Options:\n"); + fprintf(stderr, "\t-A\t\tRun the atomic compare and swap test\n"); + fprintf(stderr, "\t-a\t\tRun the increment test\n"); + fprintf(stderr, "\t-d <device>\tUse this opencapi card\n"); + fprintf(stderr, "\t-I\t\tInitialize the destination buffer after each loop\n"); + fprintf(stderr, "\t-i\t\tSend an interrupt after copy\n"); + fprintf(stderr, "\t-w\t\tSend a wake_host_thread command after copy\n"); + fprintf(stderr, "\t-l <loops>\tRun this number of memcpy loops (default 1)\n"); + fprintf(stderr, "\t-p <procs>\tFork this number of processes (default 1)\n"); + fprintf(stderr, "\t-p 0\t\tUse the maximum number of processes permitted by the AFU\n"); + fprintf(stderr, "\t-r\t\tReallocate the destination buffer in between 2 loops\n"); + fprintf(stderr, "\t-S\t\tOperate on shared memory\n"); + fprintf(stderr, "\t-s <bufsize>\tCopy this number of bytes (default 2048)\n"); + fprintf(stderr, "\t-t <timeout>\tSeconds to wait for the AFU to signal completion\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + struct memcpy_test_args args; + int rc, c, i, j, processes = 1; + pid_t pid, failing; + + args.loop_count = 1; + args.size = 2048; + args.irq = 0; + args.completion_timeout = -1; + args.reallocate = 0; + args.initialize = 0; + args.device = NULL; + args.wake_host_thread = 0; + args.increment = 0; + args.atomic_cas = 0; + args.shared_mem = 0; + args.shmid = -1; + args.lock = NULL; + args.counter = NULL; + + while (1) { + c = getopt(argc, argv, "+aAhl:p:Ss:Iit:rd:w"); + if (c < 0) + break; + switch (c) { + case '?': + case 'h': + usage(argv[0]); + break; + case 'l': + args.loop_count = atoi(optarg); + break; + case 'p': + processes = atoi(optarg); + break; + case 's': + args.size = atoi(optarg); + break; + case 'i': + args.irq = 1; + break; + case 't': + args.completion_timeout = atoi(optarg); + break; + case 'r': + args.reallocate = 1; + break; + case 'I': + args.initialize = 1; + break; + case 'd': + args.device = optarg; + break; + case 'w': + args.wake_host_thread = 1; + break; + case 'a': + args.increment = 1; + break; + case 'A': + args.atomic_cas = 1; + break; + case 'S': + args.shared_mem = 1; + break; + } + } + + if (processes == 0) + processes = AFU_MAX_PROCESSES; + + if (args.completion_timeout == -1) { + args.completion_timeout = processes / 5; + if (args.completion_timeout < 10) + args.completion_timeout = 10; + } + + if (argv[optind]) { + fprintf(stderr, "Error: Unexpected argument '%s'\n", argv[optind]); + usage(argv[0]); + } + + if (args.wake_host_thread && args.irq) { + fprintf(stderr, "Error: -i and -w are mutually exclusive\n"); + usage(argv[0]); + } + + if (args.atomic_cas && args.reallocate) { + fprintf(stderr, "Error: -A and -r are mutually exclusive\n"); + usage(argv[0]); + } + + if (args.atomic_cas && args.shared_mem) { + fprintf(stderr, "Error: -A and -S are mutually exclusive\n"); + usage(argv[0]); + } + + if (args.increment && args.reallocate) { + fprintf(stderr, "Error: -a and -r are mutually exclusive\n"); + usage(argv[0]); + } + + if (args.increment && args.shared_mem) { + fprintf(stderr, "Error: -a and -S are mutually exclusive\n"); + usage(argv[0]); + } + + /* max buffer size supported by AFU */ + if (args.size > 2048 || args.size % 64) { + fprintf(stderr, "invalid buffer size %d\n", args.size); + return -1; + } + + rc = global_setup(&args); + if (rc) + exit(1); + + if (args.atomic_cas) { + rc = shm_create(&args); + if (rc) + exit(1); + + /* initialize lock and counter */ + memset(args.lock, 0, args.size); + memset(args.counter, 0, args.size); + printf("Shared memory ID: %i attached at: %p\n", args.shmid, args.lock); + } + + for (i = 0; i < processes; i++) { + if (!fork()) + /* Child process */ + exit(test_afu_memcpy(&args)); + } + + rc = 0; + failing = -1; + for (i = 0; i < processes; i++) { + pid = wait(&j); + if (pid && j) { + rc++; + if (failing == -1) + failing = pid; + } + } + if (args.atomic_cas) { + if (*(int *)args.counter != processes * args.loop_count) { + fprintf(stderr,"Atomicity Error:\n"); + fprintf(stderr," procs=%d\n", processes); + fprintf(stderr," loops=%d\n", args.loop_count); + fprintf(stderr," procs*loops=%d\n", processes * args.loop_count); + fprintf(stderr," count=%d (should be %d)\n", *(int *)args.counter, processes * args.loop_count); + return -1; + } + shm_destroy(&args); + } + + if (rc) + fprintf(stderr, "%d test(s) failed. Check process %d, maybe others\n", rc, failing); + else + printf("Test successful\n"); + return rc; +} diff --git a/afutests/reset/README.md b/afutests/reset/README.md new file mode 100644 index 0000000..bc3d410 --- /dev/null +++ b/afutests/reset/README.md @@ -0,0 +1,25 @@ +ocxl_reset_tests.sh +=================== + +`ocxl_reset_tests.sh` is a script for testing the reset of an OpenCAPI card. + +Requirements +------------ + +The OpenCAPI card must be flashed with either an IBM,AFP3 or IBM,MEMCPY3 AFU +image. + +This test requires the kernel module pnv-php, that will be automatically +loaded. + +Usage +----- + + $ ../../afuobj/ocxl_reset_tests.sh # Reset the first card and check AFU + +``` + Usage: ocxl_reset_tests [ options ] + Options: + -d <device> Use this capi card + -l <loops> Run this number of resets (default 1) +``` diff --git a/afutests/reset/ocxl_reset_tests.sh b/afutests/reset/ocxl_reset_tests.sh new file mode 100755 index 0000000..46d2d57 --- /dev/null +++ b/afutests/reset/ocxl_reset_tests.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# +# Copyright 2019 International Business Machines +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# ocxl_reset_tests.sh +# +# This test assumes that user is root and memcpy afu is programmed. + +function usage +{ + echo 'ocxl_reset_tests.sh [-d <device_path>] [-l <loops>]' >&2 + exit 2 +} + +device= +loops=1 # default + +while true +do + case $1 in + ('') break ;; + (-d) device=$2; shift 2 || break ;; + (-l) loops=$2; shift 2 || break ;; + (*) usage ;; + esac +done +(( $# == 0 )) || usage + +[[ $device == -* ]] && usage +(( loops < 1 )) && usage + +if [[ $device ]] +then + if ! ls "$device" >/dev/null 2>&1 + then + echo ocxl_reset_tests.sh: "$device": no such device >&2 + exit 2 + fi + card=${device##*/} +fi + +if [[ -z $card ]] +then + # find first IBM,AFP3 or IBM,MEMCPY3 opencapi card + card=$( + set -- $(ls /dev/ocxl/ 2>/dev/null) + for i + do + case $i in + (*IBM,AFP3*) echo $i; break ;; + (*IBM,MEMCPY3*) echo $i; break ;; + esac + done + ) +fi + +if [[ -z $card ]] +then + echo ocxl_reset_tests.sh: could not find afu IBM,AFP3 nor IBM,MEMCPY3 >&2 + exit 3 +fi + +# load module pnv-php +if ! modprobe pnv-php +then + echo ocxl_reset_tests.sh: cannot load module pnv-php >&2 + exit 1 +fi + +slot=$(ls /dev/ocxl/$card | awk -F"." '{ print $2 }' | sed s/$/.0/) +slot=$(lspci -m -v -s $slot | awk '/^PhySlot:/ { print $2; exit }') +if [[ -z $slot ]] +then + printf "$card: No slot found. Exiting.\n" + exit 1 +fi +slot=/sys/bus/pci/slots/$slot + +for ((i = 0; i < loops; i++)) +do + ((loops > 1)) && echo Loop: $((i+1))/$loops + + echo ocxl_reset_tests.sh: resetting card $card in slot ${slot##*/} + if ! echo 0 > $slot/power + then + echo ocxl_reset_tests.sh: could not write to $slot/power + exit 4 + fi + + if ! echo 1 > $slot/power + then + echo ocxl_reset_tests.sh: could not write to $slot/power + exit 5 + fi + + echo ocxl_reset_tests.sh: card $card has been reset + + case $card in + (*,AFP3.*) + ocxl_afp3=$(which ocxl_afp3 2>/dev/null) + [[ $ocxl_afp3 ]] || ocxl_afp3=${0%/*}/ocxl_afp3 + + if [[ ! -x $ocxl_afp3 ]] + then + echo ocxl_reset_tests.sh: could not find test program $ocxl_afp3 + echo ocxl_reset_tests.sh: skipping IBM,AFP3 afu check + else + echo ocxl_reset_tests.sh: verifying afu IBM,AFP3 + + if ! "$ocxl_afp3" >/tmp/ocxl_reset_afp3.log + then + echo ocxl_reset_tests.sh: ocxl_afp3 fails after reset + exit 6 + fi + fi ;; + (*,MEMCPY3.*) + ocxl_memcpy=$(which ocxl_memcpy 2>/dev/null) + [[ $ocxl_memcpy ]] || ocxl_memcpy=${0%/*}/ocxl_memcpy + + if [[ ! -x $ocxl_memcpy ]] + then + echo ocxl_reset_tests.sh: could not find test program $ocxl_memcpy + echo ocxl_reset_tests.sh: skipping IBM,MEMCPY3 afu check + else + echo ocxl_reset_tests.sh: verifying afu IBM,MEMCPY3 + + if ! "$ocxl_memcpy" -p0 -l10000 >/tmp/ocxl_reset_memcpy.log + then + echo ocxl_reset_tests.sh: ocxl_memcpy fails after reset + exit 7 + fi + fi ;; + esac +done + +echo ocxl_reset_tests.sh: ocxl_reset test passes +exit 0 @@ -343,9 +343,9 @@ static bool populate_metadata(dev_t dev, ocxl_afu *afu) } physical_function++; - uint16_t domain; + uint32_t domain; uint8_t bus, device, function; - int found = sscanf(physical_function, "%hu:%hhu:%hhu.%hhu.%hhu", + int found = sscanf(physical_function, "%x:%hhu:%hhu.%hhu.%hhu", &domain, &bus, &device, &function, &afu->identifier.afu_index); if (found != 5) { diff --git a/src/internal.c b/src/internal.c index 0dd8e91..07eaa9d 100644 --- a/src/internal.c +++ b/src/internal.c @@ -53,6 +53,7 @@ pthread_mutex_t stderr_mutex = PTHREAD_MUTEX_INITIALIZER; * - Check the LIBOCXL_INFO environment variable and output the info string * - Check the LIBOCXL_TRACE_ALL environment variable and enable tracing_all * - Check the LIBOCXL_VERBOSE_ERRORS_ALL environment variable and enable verbose_errors_all + * - Check the LIBOCXL_SYSPATH environment variable and override sys_path */ void libocxl_init() { @@ -81,6 +82,10 @@ void libocxl_init() verbose_errors = true; } + val = getenv("LIBOCXL_SYSPATH"); + if (val) + sys_path = val; + libocxl_inited = true; pthread_mutex_unlock(&libocxl_inited_mutex); @@ -112,13 +112,13 @@ static ocxl_err irq_allocate(ocxl_afu *afu, ocxl_irq *irq, void *info) int rc = ioctl(afu->fd, OCXL_IOCTL_IRQ_ALLOC, &irq->event.irq_offset); if (rc) { - errmsg(afu, ret, "Could not allocate IRQ in kernel: %d", rc); + errmsg(afu, ret, "Could not allocate IRQ in kernel: %d: '%s'", errno, strerror(errno)); goto errend; } rc = ioctl(afu->fd, OCXL_IOCTL_IRQ_SET_FD, &irq->event); if (rc) { - errmsg(afu, ret, "Could not set event descriptor in kernel: %d", rc); + errmsg(afu, ret, "Could not set event descriptor in kernel: %d: '%s'", errno, strerror(errno)); goto errend; } @@ -14,6 +14,9 @@ * limitations under the License. */ +// Needed for le32toh() and friends when building against glibc version < 2.20 +#define _BSD_SOURCE + #include "libocxl_internal.h" #include "sys/mman.h" #include "errno.h" diff --git a/unittests/unittests.c b/unittests/unittests.c index 8d67358..6f45de4 100644 --- a/unittests/unittests.c +++ b/unittests/unittests.c @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "libocxl_internal.h" #include <unistd.h> #include <string.h> #include <stdlib.h> @@ -24,14 +25,9 @@ #include <pthread.h> #include <signal.h> #include <fcntl.h> - #include <misc/ocxl.h> - -#include "libocxl_internal.h" #include "static.h" -#include <stdlib.h> - static const char *ocxl_sysfs_path = "/tmp/ocxl-test"; static const char *ocxl_dev_path = "/dev/ocxl-test"; diff --git a/unittests/virtocxl.c b/unittests/virtocxl.c index c00f1fc..a918b70 100644 --- a/unittests/virtocxl.c +++ b/unittests/virtocxl.c @@ -14,10 +14,10 @@ * limitations under the License. */ +#include "libocxl_internal.h" #include <fuse/cuse_lowlevel.h> #include <fuse/fuse_lowlevel.h> #include <linux/poll.h> -#include "libocxl_internal.h" #include <misc/ocxl.h> #include <errno.h> #include <sys/types.h> @@ -26,7 +26,6 @@ #include <string.h> #include <unistd.h> #include <pthread.h> - #include <stdlib.h> typedef struct ocxl_kernel_event_header ocxl_kernel_event_header; @@ -193,7 +192,7 @@ bool afu_is_attached() { * @return the thread for the device, or 0 on error */ pthread_t create_ocxl_device(const char *afu_name, size_t global_mmio_size, size_t per_pasid_mmio_size) { - char sysfs_base[PATH_MAX]; + char sysfs_base[PATH_MAX - 20]; char tmp[PATH_MAX]; char buf[BUF_SIZE]; |