diff options
author | Picca Frédéric-Emmanuel <picca@debian.org> | 2024-02-05 16:30:07 +0100 |
---|---|---|
committer | Picca Frédéric-Emmanuel <picca@debian.org> | 2024-02-05 16:30:07 +0100 |
commit | 04095a69f18767d222b16fae5b40f2b712cd6f7e (patch) | |
tree | d20abd3ee2f237319443e9dfd7500ad55d29a33d /src/silx/resources | |
parent | 3427caf0e96690e56aac6231a91df8f0f7a64fc2 (diff) |
New upstream version 2.0.0+dfsg
Diffstat (limited to 'src/silx/resources')
-rw-r--r-- | src/silx/resources/__init__.py | 170 | ||||
-rw-r--r-- | src/silx/resources/gui/icons/ruler.png | bin | 0 -> 1416 bytes | |||
-rw-r--r-- | src/silx/resources/gui/icons/ruler.svg | 216 | ||||
-rw-r--r-- | src/silx/resources/opencl/codec/bitshuffle_lz4.cl | 625 | ||||
-rw-r--r-- | src/silx/resources/opencl/doubleword.cl | 7 |
5 files changed, 935 insertions, 83 deletions
diff --git a/src/silx/resources/__init__.py b/src/silx/resources/__init__.py index b53f15b..4946600 100644 --- a/src/silx/resources/__init__.py +++ b/src/silx/resources/__init__.py @@ -1,6 +1,6 @@ # /*########################################################################## # -# Copyright (c) 2016-2018 European Synchrotron Radiation Facility +# Copyright (c) 2016-2023 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,7 @@ All access to data and documentation files MUST be made through the functions of this modules to ensure access across different distribution schemes: - Installing from source or from wheel -- Installing package as a zip (through the use of pkg_resources) +- Installing package as a zip - Linux packaging willing to install data files (and doc files) in alternative folders. In this case, this file must be patched. - Frozen fat binary application using silx (frozen with cx_Freeze or py2app). @@ -52,28 +52,28 @@ of this modules to ensure access across different distribution schemes: options={'py2app': {'packages': ['silx']}} ) """ +from __future__ import annotations __authors__ = ["V.A. Sole", "Thomas Vincent", "J. Kieffer"] __license__ = "MIT" __date__ = "08/03/2019" +import atexit +import contextlib +import functools +import importlib +import importlib.resources +import logging import os import sys -import logging -import importlib - +from typing import NamedTuple, Optional -logger = logging.getLogger(__name__) +if sys.version_info < (3, 9): + import pkg_resources -# pkg_resources is useful when this package is stored in a zip -# When pkg_resources is not available, the resources dir defaults to the -# directory containing this module. -try: - import pkg_resources -except ImportError: - pkg_resources = None +logger = logging.getLogger(__name__) # For packaging purpose, patch this variable to use an alternative directory @@ -87,66 +87,56 @@ _RESOURCES_DIR = None # cx_Freeze frozen support # See http://cx-freeze.readthedocs.io/en/latest/faq.html#using-data-files -if getattr(sys, 'frozen', False): +if getattr(sys, "frozen", False): # Running in a frozen application: # We expect resources to be located either in a silx/resources/ dir # relative to the executable or within this package. - _dir = os.path.join(os.path.dirname(sys.executable), 'silx', 'resources') + _dir = os.path.join(os.path.dirname(sys.executable), "silx", "resources") if os.path.isdir(_dir): _RESOURCES_DIR = _dir -class _ResourceDirectory(object): +class _ResourceDirectory(NamedTuple): """Store a source of resources""" - def __init__(self, package_name, package_path=None, forced_path=None): - if forced_path is None: - if package_path is None: - if pkg_resources is None: - # In this case we have to compute the package path - # Else it will not be used - module = importlib.import_module(package_name) - package_path = os.path.abspath(os.path.dirname(module.__file__)) - self.package_name = package_name - self.package_path = package_path - self.forced_path = forced_path + package_name: str + forced_path: Optional[str] = None -_SILX_DIRECTORY = _ResourceDirectory( - package_name=__name__, - package_path=os.path.abspath(os.path.dirname(__file__)), - forced_path=_RESOURCES_DIR) +_SILX_DIRECTORY = _ResourceDirectory(package_name=__name__, forced_path=_RESOURCES_DIR) _RESOURCE_DIRECTORIES = {} _RESOURCE_DIRECTORIES["silx"] = _SILX_DIRECTORY -def register_resource_directory(name, package_name, forced_path=None): +def register_resource_directory( + name: str, package_name: str, forced_path: Optional[str] = None +): """Register another resource directory to the available list. By default only the directory "silx" is available. .. versionadded:: 0.6 - :param str name: Name of the resource directory. It is used on the resource + :param name: Name of the resource directory. It is used on the resource name to specify the resource directory to use. The resource "silx:foo.png" will use the "silx" resource directory. - :param str package_name: Python name of the package containing resources. + :param package_name: Python name of the package containing resources. For example "silx.resources". - :param str forced_path: Path containing the resources. If specified - `pkg_resources` nor `package_name` will be used + :param forced_path: Path containing the resources. If specified + neither `importlib` nor `package_name` will be used For example "silx.resources". :raises ValueError: If the resource directory name already exists. """ if name in _RESOURCE_DIRECTORIES: raise ValueError("Resource directory name %s already exists" % name) resource_directory = _ResourceDirectory( - package_name=package_name, - forced_path=forced_path) + package_name=package_name, forced_path=forced_path + ) _RESOURCE_DIRECTORIES[name] = resource_directory -def list_dir(resource): +def list_dir(resource: str) -> list[str]: """List the content of a resource directory. Result are not prefixed by the resource name. @@ -155,9 +145,8 @@ def list_dir(resource): example "silx:foo.png" identify the resource "foo.png" from the resource directory "silx". See also :func:`register_resource_directory`. - :param str resource: Name of the resource directory to list + :param resource: Name of the resource directory to list :return: list of name contained in the directory - :rtype: List """ resource_directory, resource_name = _get_package_and_resource(resource) @@ -165,50 +154,49 @@ def list_dir(resource): # if set, use this directory path = resource_filename(resource) return os.listdir(path) - elif pkg_resources is None: - # Fallback if pkg_resources is not available - path = resource_filename(resource) - return os.listdir(path) - else: - # Preferred way to get resources as it supports zipfile package - package_name = resource_directory.package_name - return pkg_resources.resource_listdir(package_name, resource_name) + if sys.version_info < (3, 9): + return pkg_resources.resource_listdir( + resource_directory.package_name, resource_name + ) -def is_dir(resource): + path = importlib.resources.files(resource_directory.package_name) / resource_name + return [entry.name for entry in path.iterdir()] + + +def is_dir(resource: str) -> bool: """True is the resource is a resource directory. The resource name can be prefixed by the name of a resource directory. For example "silx:foo.png" identify the resource "foo.png" from the resource directory "silx". See also :func:`register_resource_directory`. - :param str resource: Name of the resource - :rtype: bool + :param resource: Name of the resource """ path = resource_filename(resource) return os.path.isdir(path) -def exists(resource): +def exists(resource: str) -> bool: """True is the resource exists. - :param str resource: Name of the resource - :rtype: bool + :param resource: Name of the resource """ path = resource_filename(resource) return os.path.exists(path) -def _get_package_and_resource(resource, default_directory=None): +def _get_package_and_resource( + resource: str, default_directory: Optional[str] = None +) -> tuple[_ResourceDirectory, str]: """ Return the resource directory class and a cleaned resource name without prefix. - :param str: resource: Name of the resource with resource prefix. - :param str default_directory: If the resource is not prefixed, the resource + :param resource: Name of the resource with resource prefix. + :param default_directory: If the resource is not prefixed, the resource will be searched on this default directory of the silx resource directory. - :rtype: tuple(_ResourceDirectory, str) :raises ValueError: If the resource name uses an unregistred resource directory name """ @@ -217,14 +205,14 @@ def _get_package_and_resource(resource, default_directory=None): else: prefix = "silx" if default_directory is not None: - resource = os.path.join(default_directory, resource) + resource = f"{default_directory}/{resource}" if prefix not in _RESOURCE_DIRECTORIES: raise ValueError("Resource '%s' uses an unregistred prefix", resource) resource_directory = _RESOURCE_DIRECTORIES[prefix] return resource_directory, resource -def resource_filename(resource): +def resource_filename(resource: str) -> str: """Return filename corresponding to resource. The existence of the resource is not checked. @@ -233,18 +221,41 @@ def resource_filename(resource): example "silx:foo.png" identify the resource "foo.png" from the resource directory "silx". See also :func:`register_resource_directory`. - :param str resource: Resource path relative to resource directory - using '/' path separator. It can be either a file or - a directory. + :param resource: Resource path relative to resource directory + using '/' path separator. It can be either a file or + a directory. :raises ValueError: If the resource name uses an unregistred resource directory name :return: Absolute resource path in the file system - :rtype: str """ return _resource_filename(resource, default_directory=None) -def _resource_filename(resource, default_directory=None): +# Manage resource files life-cycle +_file_manager = contextlib.ExitStack() +atexit.register(_file_manager.close) + + +@functools.lru_cache(maxsize=None) +def _get_resource_filename(package: str, resource: str) -> str: + """Returns path to requested resource in package + + :param package: Name of the package in which to look for the resource + :param resource: Resource path relative to package using '/' path separator + :return: Abolute resource path in the file system + """ + if sys.version_info < (3, 9): + return pkg_resources.resource_filename(package, resource) + + # Caching prevents extracting the resource twice + file_context = importlib.resources.as_file( + importlib.resources.files(package) / resource + ) + path = _file_manager.enter_context(file_context) + return str(path.absolute()) + + +def _resource_filename(resource: str, default_directory: Optional[str] = None) -> str: """Return filename corresponding to resource. The existence of the resource is not checked. @@ -253,32 +264,25 @@ def _resource_filename(resource, default_directory=None): example "silx:foo.png" identify the resource "foo.png" from the resource directory "silx". See also :func:`register_resource_directory`. - :param str resource: Resource path relative to resource directory - using '/' path separator. It can be either a file or - a directory. - :param str default_directory: If the resource is not prefixed, the resource + :param resource: Resource path relative to resource directory + using '/' path separator. It can be either a file or + a directory. + :param default_directory: If the resource is not prefixed, the resource will be searched on this default directory of the silx resource directory. It should only be used internally by silx. :return: Absolute resource path in the file system - :rtype: str """ - resource_directory, resource_name = _get_package_and_resource(resource, - default_directory=default_directory) + resource_directory, resource_name = _get_package_and_resource( + resource, default_directory=default_directory + ) if resource_directory.forced_path is not None: # if set, use this directory base_dir = resource_directory.forced_path - resource_path = os.path.join(base_dir, *resource_name.split('/')) + resource_path = os.path.join(base_dir, *resource_name.split("/")) return resource_path - elif pkg_resources is None: - # Fallback if pkg_resources is not available - base_dir = resource_directory.package_path - resource_path = os.path.join(base_dir, *resource_name.split('/')) - return resource_path - else: - # Preferred way to get resources as it supports zipfile package - package_name = resource_directory.package_name - return pkg_resources.resource_filename(package_name, resource_name) + + return _get_resource_filename(resource_directory.package_name, resource_name) # Expose ExternalResources for compatibility (since silx 0.11) diff --git a/src/silx/resources/gui/icons/ruler.png b/src/silx/resources/gui/icons/ruler.png Binary files differnew file mode 100644 index 0000000..0ff603f --- /dev/null +++ b/src/silx/resources/gui/icons/ruler.png diff --git a/src/silx/resources/gui/icons/ruler.svg b/src/silx/resources/gui/icons/ruler.svg new file mode 100644 index 0000000..268b1db --- /dev/null +++ b/src/silx/resources/gui/icons/ruler.svg @@ -0,0 +1,216 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg + width="32" + height="32" + viewBox="0 0 8.4666657 8.4666657" + version="1.1" + id="svg5" + inkscape:version="1.2.2 (b0a8486541, 2022-12-01)" + sodipodi:docname="ruler.svg" + inkscape:export-filename="ruler.png" + inkscape:export-xdpi="100" + inkscape:export-ydpi="100" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns="http://www.w3.org/2000/svg" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:dc="http://purl.org/dc/elements/1.1/"> + <metadata + id="metadata35"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + </cc:Work> + </rdf:RDF> + </metadata> + <sodipodi:namedview + id="namedview7" + pagecolor="#ffffff" + bordercolor="#000000" + borderopacity="0.25" + inkscape:showpageshadow="2" + inkscape:pageopacity="0.0" + inkscape:pagecheckerboard="0" + inkscape:deskcolor="#d1d1d1" + inkscape:document-units="mm" + showgrid="false" + inkscape:zoom="13.455443" + inkscape:cx="-18.988598" + inkscape:cy="0.2229581" + inkscape:window-width="1920" + inkscape:window-height="1163" + inkscape:window-x="1920" + inkscape:window-y="0" + inkscape:window-maximized="1" + inkscape:current-layer="g1102" + inkscape:document-rotation="0" + showguides="true"> + <inkscape:grid + type="xygrid" + id="grid3452" + originx="0" + originy="0" /> + </sodipodi:namedview> + <defs + id="defs2" /> + <g + inkscape:label="Layer 1" + inkscape:groupmode="layer" + id="layer1"> + <g + id="g1102" + transform="translate(-0.36925443,-7.7531893)"> + <g + id="path1743"> + <path + style="color:#000000;fill:#ffffff;stroke-width:0.517192;stroke-miterlimit:3.5;-inkscape-stroke:none" + d="M 6.5495575,11.909879 5.7344716,11.369562" + id="path418" /> + <path + style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none" + d="M 5.8769531,11.154297 5.5917969,11.585938 6.40625,12.125 6.6933594,11.695313 Z" + id="path420" /> + <g + id="g408"> + <g + id="path410"> + <path + style="color:#000000;fill:#ffffff;fill-rule:evenodd;stroke-width:0.0456346pt;-inkscape-stroke:none" + d="m 5.5442899,11.243491 c 0.069591,-0.10498 0.211272,-0.133702 0.3162524,-0.06411 0.1049803,0.06959 0.133702,0.211272 0.064111,0.316253 -0.069591,0.10498 -0.2135542,0.132189 -0.3162524,0.06411 -0.1049803,-0.06959 -0.133702,-0.211272 -0.064111,-0.316253 z" + id="path414" /> + <path + style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none" + d="m 5.8769531,11.154297 c -0.1187003,-0.07869 -0.278736,-0.04643 -0.3574219,0.07227 -0.078686,0.1187 -0.046435,0.28069 0.072266,0.359375 0.116723,0.07738 0.2784923,0.04485 0.3574218,-0.07422 0.078686,-0.118701 0.046435,-0.278737 -0.072266,-0.357422 z m -0.033203,0.05078 c 0.09126,0.06049 0.1151836,0.182177 0.054687,0.273438 -0.060252,0.09089 -0.1847643,0.115422 -0.2734375,0.05664 -0.09126,-0.0605 -0.1151836,-0.184129 -0.054687,-0.27539 0.060496,-0.09126 0.1821772,-0.115185 0.2734375,-0.05469 z" + id="path416" /> + </g> + </g> + </g> + <g + id="rect234" + transform="matrix(0.77265229,0.63482945,-0.6944385,0.71955206,0,0)"> + <path + style="color:#000000;fill:#ffffff;stroke-width:0.399005;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:3.5;-inkscape-stroke:none" + d="m 10.480519,2.0891316 h 2.354846 c 0.12095,0 0.218321,0.097371 0.218321,0.2183209 v 8.0683205 c 0,0.120949 -0.09737,0.218321 -0.218321,0.218321 h -2.354846 c -0.120949,0 -0.218321,-0.09737 -0.218321,-0.218321 V 2.3074525 c 0,-0.1209498 0.09737,-0.2183209 0.218321,-0.2183209 z" + id="path330" /> + <path + style="color:#000000;fill:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:3.5;-inkscape-stroke:none" + d="m 10.480469,1.8886719 c -0.228023,0 -0.417969,0.1899469 -0.417969,0.4179687 V 10.375 c 0,0.228024 0.189947,0.417969 0.417969,0.417969 h 2.355469 c 0.228024,0 0.417968,-0.189947 0.417968,-0.417969 V 2.3066406 c 0,-0.2280229 -0.189946,-0.4179687 -0.417968,-0.4179687 z m 0,0.4003906 h 2.355469 c 0.01388,0 0.01758,0.0037 0.01758,0.017578 V 10.375 c 0,0.01388 -0.0037,0.01953 -0.01758,0.01953 h -2.355469 c -0.01388,0 -0.01953,-0.0057 -0.01953,-0.01953 V 2.3066406 c 0,-0.013877 0.0057,-0.017578 0.01953,-0.017578 z" + id="path332" /> + </g> + <g + id="path1743-9-3"> + <path + style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none;paint-order:stroke markers fill" + d="m 5.9492187,10.972656 -0.3945312,0.439453 0.796875,0.716797 0.3945312,-0.4375 z" + id="path404" /> + <g + id="g394"> + <g + id="path396"> + <path + style="color:#000000;fill:#ffffff;fill-rule:evenodd;stroke-width:0.0520108pt;-inkscape-stroke:none" + d="m 5.5585792,11.01876 c 0.095985,-0.10674 0.260515,-0.11547 0.3672555,-0.01949 0.1067406,0.09599 0.1154702,0.260515 0.019486,0.367256 -0.095985,0.10674 -0.2628354,0.113383 -0.3672555,0.01949 C 5.4713246,11.290036 5.462595,11.125501 5.5585792,11.01876 Z" + id="path400" /> + <path + style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none" + d="m 5.9492187,10.972656 c -0.1206907,-0.108524 -0.3074864,-0.09725 -0.4160156,0.02344 -0.1085286,0.120691 -0.099207,0.307491 0.021484,0.416015 0.1186803,0.106716 0.3071504,0.09958 0.4160156,-0.02148 0.108528,-0.120691 0.099205,-0.309435 -0.021484,-0.417969 z m -0.046875,0.05274 c 0.092792,0.08345 0.1010184,0.225568 0.017578,0.318359 -0.083105,0.09242 -0.2281997,0.0967 -0.3183594,0.01563 -0.092789,-0.08343 -0.1010177,-0.225569 -0.017578,-0.318359 0.083441,-0.09279 0.2255693,-0.09906 0.3183593,-0.01563 z" + id="path402" /> + </g> + </g> + </g> + <g + id="path1741-9"> + <path + style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none" + d="M 6.4042969,9.6074219 6.0117187,10.052734 7.3417969,11.224609 7.734375,10.779297 Z" + id="path390" /> + <g + id="g380"> + <g + id="path382"> + <path + style="color:#000000;fill:#000000;fill-rule:evenodd;stroke-width:0.0524193pt;-inkscape-stroke:none" + d="m 6.0116332,9.6575594 c 0.095671,-0.1085287 0.261399,-0.1189636 0.3699276,-0.023292 0.1085287,0.095671 0.1189636,0.261399 0.023292,0.3699276 -0.095671,0.108528 -0.2637583,0.116884 -0.3699276,0.02329 C 5.9263967,9.9318156 5.9159618,9.766088 6.0116332,9.6575594 Z" + id="path386" /> + <path + style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none" + d="m 6.4042969,9.6074219 c -0.1227123,-0.108175 -0.3097944,-0.095369 -0.4179688,0.027344 -0.1081747,0.1227123 -0.097322,0.3097959 0.025391,0.4179684 0.1206682,0.106376 0.3114126,0.0977 0.4199219,-0.02539 0.108175,-0.1227125 0.095369,-0.3117477 -0.027344,-0.4199221 z m -0.044922,0.052734 c 0.094345,0.083167 0.1026993,0.2259678 0.019531,0.3203125 C 6.2960737,10.074433 6.150264,10.082765 6.0585937,10.001953 5.9642495,9.9187874 5.9539415,9.7759853 6.0371094,9.6816406 6.1202768,9.5872959 6.2650303,9.5769882 6.359375,9.6601563 Z" + id="path388" /> + </g> + </g> + </g> + <g + id="path1741-9-7"> + <path + style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none" + d="m 4.2851562,11.6875 -0.3925781,0.445313 1.3300781,1.171875 0.3925782,-0.445313 z" + id="path376" /> + <g + id="g366"> + <g + id="path368"> + <path + style="color:#000000;fill:#000000;fill-rule:evenodd;stroke-width:0.0524193pt;-inkscape-stroke:none" + d="m 3.8930299,11.736561 c 0.095671,-0.108528 0.261399,-0.118963 0.3699276,-0.02329 0.1085286,0.09567 0.1189634,0.261399 0.023292,0.369928 -0.095671,0.108528 -0.2637583,0.116883 -0.3699276,0.02329 -0.1085286,-0.09567 -0.1189634,-0.261399 -0.023292,-0.369928 z" + id="path372" /> + <path + style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none" + d="m 4.2851562,11.6875 c -0.1227122,-0.108177 -0.3097943,-0.09732 -0.4179687,0.02539 -0.1081748,0.122712 -0.097322,0.311748 0.025391,0.419922 0.1206681,0.106374 0.3114125,0.09575 0.4199219,-0.02734 0.1081748,-0.122713 0.095369,-0.309795 -0.027344,-0.417969 z m -0.044922,0.05273 c 0.094344,0.08317 0.1026991,0.225968 0.019531,0.320313 -0.082832,0.09396 -0.2286422,0.100343 -0.3203125,0.01953 -0.094344,-0.08317 -0.1026991,-0.225967 -0.019531,-0.320312 0.083167,-0.09434 0.2259677,-0.102701 0.3203125,-0.01953 z" + id="path374" /> + </g> + </g> + </g> + <g + id="path1741-9-3"> + <path + style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none" + d="m 2.4824219,13.669922 -0.3945313,0.445312 1.3300782,1.171875 0.3925781,-0.445312 z" + id="path362" /> + <g + id="g352"> + <g + id="path354"> + <path + style="color:#000000;fill:#000000;fill-rule:evenodd;stroke-width:0.0524193pt;-inkscape-stroke:none" + d="m 2.0886536,13.720045 c 0.095671,-0.108528 0.2613989,-0.118963 0.3699275,-0.02329 0.1085287,0.09567 0.1189637,0.261399 0.023292,0.369928 -0.095671,0.108528 -0.2637582,0.116883 -0.3699275,0.02329 -0.1085287,-0.09567 -0.1189637,-0.261399 -0.023292,-0.369928 z" + id="path358" /> + <path + style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none" + d="M 2.4824219,13.669922 C 2.3597097,13.561745 2.1706744,13.574554 2.0625,13.697266 c -0.1081751,0.122712 -0.097322,0.309795 0.025391,0.417968 0.1206681,0.106375 0.3114126,0.0977 0.4199219,-0.02539 0.1081751,-0.122713 0.097322,-0.311749 -0.025391,-0.419922 z m -0.046875,0.05273 c 0.094344,0.08317 0.1026993,0.225968 0.019531,0.320313 -0.082832,0.09396 -0.2286422,0.102296 -0.3203125,0.02148 -0.094344,-0.08317 -0.1026993,-0.22792 -0.019531,-0.322265 0.083167,-0.09435 0.2259677,-0.102701 0.3203125,-0.01953 z" + id="path360" /> + </g> + </g> + </g> + <g + id="path1743-9-3-6"> + <path + style="color:#000000;fill:#ffffff;stroke-width:0.589456;stroke-miterlimit:3.5;-inkscape-stroke:none" + d="M 4.4307455,14.144747 3.6331377,13.427511" + id="path346" /> + <path + style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none" + d="m 3.8300781,13.208984 -0.3945312,0.4375 0.7988281,0.716797 0.3925781,-0.4375 z" + id="path348" /> + <g + id="g336"> + <g + id="path338"> + <path + style="color:#000000;fill:#ffffff;fill-rule:evenodd;stroke-width:0.0520108pt;-inkscape-stroke:none" + d="m 3.4397673,13.253626 c 0.095985,-0.106741 0.2605151,-0.11547 0.3672556,-0.01949 0.1067404,0.09598 0.1154698,0.260515 0.019485,0.367255 -0.095985,0.106741 -0.2628356,0.113383 -0.3672556,0.01949 -0.1067404,-0.09598 -0.1154698,-0.260515 -0.019485,-0.367255 z" + id="path342" /> + <path + style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none" + d="m 3.8300781,13.208984 c -0.1206906,-0.108523 -0.3074864,-0.09921 -0.4160156,0.02149 -0.1085295,0.12069 -0.099207,0.307491 0.021484,0.416015 0.1186801,0.106716 0.3071504,0.09958 0.4160156,-0.02148 0.1085295,-0.12069 0.099207,-0.307491 -0.021484,-0.416016 z m -0.046875,0.05078 c 0.092789,0.08343 0.1010181,0.22557 0.017578,0.318359 -0.083105,0.09242 -0.2281998,0.09865 -0.3183594,0.01758 -0.092789,-0.08343 -0.1010181,-0.22557 -0.017578,-0.318359 0.083441,-0.09279 0.2255692,-0.101014 0.3183593,-0.01758 z" + id="path344" /> + </g> + </g> + </g> + </g> + </g> +</svg> diff --git a/src/silx/resources/opencl/codec/bitshuffle_lz4.cl b/src/silx/resources/opencl/codec/bitshuffle_lz4.cl new file mode 100644 index 0000000..71f617a --- /dev/null +++ b/src/silx/resources/opencl/codec/bitshuffle_lz4.cl @@ -0,0 +1,625 @@ +/* + * Project: SILX: Bitshuffle LZ4 decompressor + * + * Copyright (C) 2022 European Synchrotron Radiation Facility + * Grenoble, France + * + * Principal authors: J. Kieffer (kieffer@esrf.fr) + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* To decompress bitshuffle-LZ4 data in parallel on GPU one needs to: + * - Find all begining of blocks, this is performed by the ... kernel. + * - Decompress each block by one workgroup. + * - bitshuffle the data from one workgroup + */ + +#ifndef LZ4_BLOCK_SIZE +# define LZ4_BLOCK_SIZE 8192 +#endif +#define LZ4_BLOCK_EXTRA 400 +#ifdef __ENDIAN_LITTLE__ +#define SWAP_BE 1 +#define SWAP_LE 0 +#else +#define SWAP_BE 0 +#define SWAP_LE 1 +#endif + + +#define int8_t char +#define uint8_t uchar +#define int16_t short +#define uint16_t ushort +#define int32_t int +#define uint32_t uint +#define int64_t long +#define uint64_t ulong + +#define position_t uint +#define token_t uchar2 + +//Some function used as part of bitshuffle: + +inline token_t decode_token(uint8_t value){ + return (token_t)(value >> 4, // literals + value & 0x0f); // matches +} + +inline bool has_liter_over(token_t token) +{ + return token.s0 >= 15; +} + +inline bool has_match_over(token_t token) +{ + return token.s1 >= 15; +} + +//parse overflow, return the number of overflow and the new position +inline uint2 read_overflow(local uint8_t* buffer, + position_t buffer_size, + position_t idx){ + position_t num = 0; + uint8_t next = 0xff; + while (next == 0xff && idx < buffer_size){ + next = buffer[idx]; + idx += 1; + num += next; + } + return (uint2)(num, idx); +} + +inline void copy_no_overlap(local uint8_t* dest, + const position_t dest_position, + local uint8_t* source, + const position_t src_position, + const position_t length){ + for (position_t i=get_local_id(0); i<length; i+=get_local_size(0)) { + dest[dest_position+i] = source[src_position+i]; + } +} + +inline void copy_repeat(local uint8_t* dest, + const position_t dest_position, + local uint8_t* source, + const position_t src_position, + const position_t dist, + const position_t length){ + + // if there is overlap, it means we repeat, so we just + // need to organize our copy around that + for (position_t i=get_local_id(0); i<length; i+=get_local_size(0)) { + dest[dest_position+i] = source[src_position + i%dist]; + } +} + +inline void copy_collab(local uint8_t* dest, + const position_t dest_position, + local uint8_t* source, + const position_t src_position, + const position_t dist, + const position_t length){ + //Generic copy function + if (dist < length) { + copy_repeat(dest, dest_position, source, src_position, dist, length); + } + else { + copy_no_overlap(dest, dest_position, source, src_position, length); + } +} + +// Function to read larger integers at various position. Endianness is addressed as well with the swap flag +uint64_t load64_at(global uint8_t *src, + const uint64_t position, + const bool swap){ + uchar8 vector; + if (swap){ + vector = (uchar8)(src[position+7],src[position+6], + src[position+5],src[position+4], + src[position+3],src[position+2], + src[position+1],src[position+0]); + } + else{ + vector = (uchar8)(src[position+0],src[position+1], + src[position+2],src[position+3], + src[position+4],src[position+5], + src[position+6],src[position+7]); + } + return as_ulong(vector); +} + +uint32_t load32_at(global uint8_t *src, + const uint64_t position, + const bool swap){ + uchar4 vector; + if (swap){ + vector = (uchar4)( + src[position+3],src[position+2], + src[position+1],src[position+0]); + } + else{ + vector = (uchar4)(src[position+0],src[position+1], + src[position+2],src[position+3]); + } + return as_uint(vector); +} + +uint16_t load16_at(local uint8_t *src, + const uint64_t position, + const bool swap){ + uchar2 vector; + if (swap){ + vector = (uchar2)(src[position+1],src[position+0]); + } + else{ + vector = (uchar2)(src[position+0],src[position+1]); + } + return as_ushort(vector); +} + +//Calculate the begining and the end of the block corresponding to the block=gid +inline void _lz4_unblock(global uint8_t *src, + const uint64_t size, + local uint64_t *block_position){ + uint32_t gid = get_group_id(0); + uint32_t lid = get_local_id(0); + if (lid == 0){ + uint64_t block_start=16; + uint32_t block_size = load32_at(src, 12, SWAP_BE); + uint64_t block_end = block_start + block_size; + + for (uint32_t block_idx=0; block_idx<gid; block_idx++){ + // printf("gid %u idx %u %lu-%lu\n",gid, block_idx,block_start,block_end); + block_start = block_end + 4; + if (block_start>=size){ + printf("Read beyond end of source buffer at gid %u %lu>%lu\n",gid, block_start, size); + block_start = 0; + block_end = 0; + break; + } + block_size = load32_at(src, block_end, SWAP_BE); + block_end = block_start + block_size; + } + block_position[0] = block_start; + block_position[1] = block_end; +// if (gid>get_num_groups(0)-10) printf("Success finish unblock gid %u block: %lu - %lu\n",gid,block_start,block_end); + } + barrier(CLK_LOCAL_MEM_FENCE); +} + + +//Decompress one block in shared memory +inline uint32_t lz4_decompress_local_block( local uint8_t* local_cmp, + local uint8_t* local_dec, + const uint32_t cmp_buffer_size, + const uint32_t dec_buffer_size){ + + uint32_t gid = get_group_id(0); // One block is decompressed by one workgroup + uint32_t lid = get_local_id(0); // This is the thread position in the group... + uint32_t wg = get_local_size(0); // workgroup size + + position_t dec_idx = 0; + position_t cmp_idx = 0; + while (cmp_idx < cmp_buffer_size) { + // read header byte + token_t tok = decode_token(local_cmp[cmp_idx]); + // if (lid==0) printf("gid %u at idx %u/%u. Token is litterials: %u; matches: %u\n", gid, cmp_idx, cmp_buffer_size,tok.s0, tok.s1); + + cmp_idx+=1; + + // read the length of the literals + position_t num_literals = tok.s0; + if (has_liter_over(tok)) { + uint2 tmp = read_overflow(local_cmp, + cmp_buffer_size, + cmp_idx); + num_literals += tmp.s0; + cmp_idx = tmp.s1; + } + const position_t start_literal = cmp_idx; + + // copy the literals to the dst stream in parallel + // if (lid==0) printf("gid %u: copy literals from %u to %u <%u (len %u)\n", gid, cmp_idx,num_literals+cmp_idx,cmp_buffer_size,num_literals); + copy_no_overlap(local_dec, dec_idx, local_cmp, cmp_idx, num_literals); + cmp_idx += num_literals; + dec_idx += num_literals; + + // Note that the last sequence stops right after literals field. + // There are specific parsing rules to respect to be compatible with the + // reference decoder : 1) The last 5 bytes are always literals 2) The last + // match cannot start within the last 12 bytes Consequently, a file with + // less then 13 bytes can only be represented as literals These rules are in + // place to benefit speed and ensure buffer limits are never crossed. + if (cmp_idx < cmp_buffer_size) { + + // read the offset + uint16_t offset = load16_at(local_cmp, cmp_idx, SWAP_LE); + // if (lid==0) printf("gid %u: offset is %u at %u\n",gid, offset, cmp_idx); + if (offset == 0) { + //corruped block + if (lid == 0) + printf("Corrupted block #%u\n", gid); + return 0; + } + + cmp_idx += 2; + + // read the match length + position_t match = 4 + tok.s1; + if (has_match_over(tok)) { + uint2 tmp = read_overflow(local_cmp, + cmp_buffer_size, + cmp_idx); + match += tmp.s0; + cmp_idx = tmp.s1; + } + + //syncronize threads before reading shared memory + barrier(CLK_LOCAL_MEM_FENCE); + + // copy match + copy_collab(local_dec, dec_idx, local_dec, dec_idx - offset, offset, match); + dec_idx += match; + } + } + //syncronize threads before reading shared memory + barrier(CLK_LOCAL_MEM_FENCE); + return dec_idx; +} + +//Perform the bifshuffling on 8-bits objects +inline void bitunshuffle8( local uint8_t* inp, + local uint8_t* out, + const uint32_t buffer_size){ //8k ... or less. +// uint32_t gid = get_group_id(0); + uint32_t lid = get_local_id(0); + uint32_t wg = get_local_size(0); + uint32_t u8_buffer_size = buffer_size; // /1 -> 8k + + // One thread deals with one or several output data + for (uint32_t dpos=lid; dpos<u8_buffer_size; dpos+=wg){ + uint8_t res = 0; + // read bits at several places... + for (uint32_t bit=0; bit<8; bit++){ + uint32_t read_bit = bit*u8_buffer_size + dpos; + uint32_t u8_word_pos = read_bit>>3; // /8 + uint32_t u8_bit_pos = read_bit&7; // %8 + // if (lid==0) printf("dpos %u bit %u read at %u,%u\n",dpos,bit,u8_word_pos,u8_bit_pos); + res |= ((inp[u8_word_pos]>>u8_bit_pos) & 1)<<bit; + } + // if (lid==0) printf("dpos %u res %u\n",dpos,res); + out[dpos] = res; + } +} + + +//Perform the bifshuffling on 16-bits objects +inline void bitunshuffle16( local uint8_t* inp, + local uint8_t* out, + const uint32_t buffer_size){ //8k ... or less. +// uint32_t gid = get_group_id(0); + uint32_t lid = get_local_id(0); + uint32_t wg = get_local_size(0); + uint32_t u16_buffer_size = buffer_size>>1; // /2 -> 4k + + // One thread deals with one or several output data + for (uint32_t dpos=lid; dpos<u16_buffer_size; dpos+=wg){ + uint16_t res = 0; + // read bits at several places... + for (uint32_t bit=0; bit<16; bit++){ + uint32_t read_bit = bit*u16_buffer_size + dpos; + uint32_t u8_word_pos = read_bit>>3; // /8 + uint32_t u8_bit_pos = read_bit&7; // %8 + // if (lid==0) printf("dpos %u bit %u read at %u,%u\n",dpos,bit,u8_word_pos,u8_bit_pos); + res |= ((inp[u8_word_pos]>>u8_bit_pos) & 1)<<bit; + } + // if (lid==0) printf("dpos %u res %u\n",dpos,res); + uchar2 tmp = as_uchar2(res); + out[2*dpos] = tmp.s0; + out[2*dpos+1] = tmp.s1; + } +} + + +//Perform the bifshuffling on 32-bits objects +inline void bitunshuffle32( local uint8_t* inp, + local uint8_t* out, + const uint32_t buffer_size){ //8k ... or less. +// uint32_t gid = get_group_id(0); + uint32_t lid = get_local_id(0); + uint32_t wg = get_local_size(0); + uint32_t u32_buffer_size = buffer_size>>2; // /4 -> 2k + + // One thread deals with one or several output data + for (uint32_t dpos=lid; dpos<u32_buffer_size; dpos+=wg){ + uint32_t res = 0; + // read bits at several places... + for (uint32_t bit=0; bit<32; bit++){ + uint32_t read_bit = bit*u32_buffer_size + dpos; + uint32_t u8_word_pos = read_bit>>3; // /8 + uint32_t u8_bit_pos = read_bit&7; // %8 + // if (lid==0) printf("dpos %u bit %u read at %u,%u\n",dpos,bit,u8_word_pos,u8_bit_pos); + res |= ((inp[u8_word_pos]>>u8_bit_pos) & 1)<<bit; + } + // if (lid==0) printf("dpos %u res %u\n",dpos,res); + uchar4 tmp = as_uchar4(res); + out[4*dpos] = tmp.s0; + out[4*dpos+1] = tmp.s1; + out[4*dpos+2] = tmp.s2; + out[4*dpos+3] = tmp.s3; + } +} + +//Perform the bifshuffling on 32-bits objects +inline void bitunshuffle64( local uint8_t* inp, + local uint8_t* out, + const uint32_t buffer_size){ //8k ... or less. +// uint32_t gid = get_group_id(0); + uint32_t lid = get_local_id(0); + uint32_t wg = get_local_size(0); + uint32_t u64_buffer_size = buffer_size>>3; // /8 -> 1k + + // One thread deals with one or several output data + for (uint32_t dpos=lid; dpos<u64_buffer_size; dpos+=wg){ + uint64_t res = 0; + // read bits at several places... + for (uint32_t bit=0; bit<64; bit++){ + uint32_t read_bit = bit*u64_buffer_size + dpos; + uint32_t u8_word_pos = read_bit>>3; // /8 + uint32_t u8_bit_pos = read_bit&7; // %8 + // if (lid==0) printf("dpos %u bit %u read at %u,%u\n",dpos,bit,u8_word_pos,u8_bit_pos); + res |= ((inp[u8_word_pos]>>u8_bit_pos) & 1)<<bit; + } + // if (lid==0) printf("dpos %u res %u\n",dpos,res); + uchar8 tmp = as_uchar8(res); + out[8*dpos] = tmp.s0; + out[8*dpos+1] = tmp.s1; + out[8*dpos+2] = tmp.s2; + out[8*dpos+3] = tmp.s3; + out[8*dpos+4] = tmp.s4; + out[8*dpos+5] = tmp.s5; + out[8*dpos+6] = tmp.s6; + out[8*dpos+7] = tmp.s7; + } +} + + +/* Preprocessing kernel which performs: +- Memset arrays +- read block position stored in block_position array + +Param: +- src: input buffer in global memory +- size: input buffer size +- block_position: output buffer in local memory containing the index of the begining of each block +- max_blocks: allocated memory for block_position array (output) +- nb_blocks: output buffer with the actual number of blocks in src (output). + +Return: Nothing, this is a kernel + +Hint on workgroup size: little kernel ... wg=1, 1 wg is enough. +*/ + +kernel void lz4_unblock(global uint8_t *src, + const uint64_t size, + global uint64_t *block_start, + const uint32_t max_blocks, + global uint32_t *nb_blocks){ + + uint64_t total_nbytes = load64_at(src,0,SWAP_BE); + uint32_t block_nbytes = load32_at(src,8,SWAP_BE); + + uint32_t block_idx = 0; + uint64_t pos = 12; + uint32_t block_size; + + while ((pos+4<size) && (block_idx<max_blocks)){ + block_size = load32_at(src, pos, SWAP_BE); + block_start[block_idx] = pos + 4; + block_idx +=1; + pos += 4 + block_size; + } + nb_blocks[0] = block_idx; +} + +// decompress a frame blockwise. +// Needs the block position to be known in advance (block_start) calculated from lz4_unblock. +// one workgroup treats on block. + +kernel void bslz4_decompress_block( global uint8_t* comp_src, + global uint8_t* dec_dest, + global uint64_t* block_start, + global uint32_t *nb_blocks, + const uint8_t item_size){ + + uint32_t gid = get_group_id(0); // One block is decompressed by one workgroup + uint32_t lid = get_local_id(0); // This is the thread position in the group... + uint32_t wg = get_local_size(0); // workgroup size + + //guard if the number of wg scheduled is too large + if (gid >=nb_blocks[0]) return; + + // No need to guard, the number of blocks can be calculated in advance. + uint64_t start_read = block_start[gid]; + if (start_read<12) return; + + local uint8_t local_cmp[LZ4_BLOCK_SIZE+LZ4_BLOCK_EXTRA]; + local uint8_t local_dec[LZ4_BLOCK_SIZE]; + + uint32_t cmp_buffer_size = load32_at(comp_src, start_read-4, SWAP_BE); + uint64_t end_read = start_read + cmp_buffer_size; + // Copy locally the compressed buffer and memset the destination buffer + for (uint32_t i=lid; i<cmp_buffer_size; i+=wg){ + uint64_t read_pos = start_read + i; + if (read_pos<end_read) + local_cmp[i] = comp_src[read_pos]; + else + local_cmp[i] = 0; + } + for (uint32_t i=lid+cmp_buffer_size; i<LZ4_BLOCK_SIZE+LZ4_BLOCK_EXTRA; i+=wg){ + local_cmp[i] = 0; + } + for (uint32_t i=lid; i<LZ4_BLOCK_SIZE; i+=wg){ + local_dec[i] = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + //All the work is performed here: + uint32_t dec_size = lz4_decompress_local_block( local_cmp, local_dec, cmp_buffer_size, LZ4_BLOCK_SIZE); + + barrier(CLK_LOCAL_MEM_FENCE); + local uint8_t* local_buffer; + + //Perform bit-unshuffle + if (item_size == 1){ +// if ((gid==0) && (lid==0)) printf("bitunshuffle8"); + bitunshuffle8(local_dec, local_cmp, dec_size); + local_buffer=local_cmp; + } + else if (item_size == 2){ +// if ((gid==0) && (lid==0)) printf("bitunshuffle16"); + bitunshuffle16(local_dec, local_cmp, dec_size); + local_buffer=local_cmp; + } + else if (item_size == 4){ +// if ((gid==0) && (lid==0)) printf("bitunshuffle32"); + bitunshuffle32(local_dec, local_cmp, dec_size); + local_buffer=local_cmp; + } + else if (item_size == 8){ +// if ((gid==0) && (lid==0)) printf("bitunshuffle64"); + bitunshuffle64(local_dec, local_cmp, dec_size); + local_buffer=local_cmp; + } + else { + local_buffer = local_dec; + } + + + //Finally copy the destination data from local to global memory: + uint64_t start_write = LZ4_BLOCK_SIZE*gid; + barrier(CLK_LOCAL_MEM_FENCE); + for (uint32_t i=lid; i<dec_size; i+=wg){ + dec_dest[start_write + i] = local_buffer[i]; + } + + if (gid+1==get_num_groups(0)){ + uint64_t total_nbytes = load64_at(comp_src,0,SWAP_BE); + uint64_t end_write = dec_size + start_write; + int32_t remaining = total_nbytes - end_write; +// if (lid==0) printf("gid %u is last block has %u elements. Writing ends at %u/%lu, copy remaining %d\n",gid, dec_size, end_write, total_nbytes, remaining); + if ((remaining>0) && (remaining<item_size*8)){ + for (uint32_t i=lid; i<remaining; i++){ + dec_dest[end_write + i] = comp_src[end_read+i]; + } + } + } + +} + +// decompress a frame blockwise. +// block-start are searched by one thread from each workgroup ... not very efficient +// one workgroup treats on block. + +kernel void bslz4_decompress_frame( + global uint8_t* comp_src, + const uint64_t src_size, + global uint8_t* dec_dest, + const uint8_t item_size){ + + uint32_t gid = get_group_id(0); // One block is decompressed by one workgroup + uint32_t lid = get_local_id(0); // This is the thread position in the group... + uint32_t wg = get_local_size(0); // workgroup size + + local uint8_t local_cmp[LZ4_BLOCK_SIZE+LZ4_BLOCK_EXTRA]; + local uint8_t local_dec[LZ4_BLOCK_SIZE]; + local uint64_t block[2]; // will contain begining and end of the current block + + uint64_t start_read, end_read; + uint32_t cmp_buffer_size; + _lz4_unblock(comp_src, src_size, block); + start_read = block[0]; + end_read = block[1]; + cmp_buffer_size = end_read - start_read; + if (cmp_buffer_size == 0){ + if (lid == 0) printf("gid=%u: Empty buffer\n", gid); + return; + } + + // Copy locally the compressed buffer and memset the destination buffer + for (uint32_t i=lid; i<cmp_buffer_size; i+=wg){ + uint64_t read_pos = start_read + i; + if (read_pos<end_read) + local_cmp[i] = comp_src[read_pos]; + else + local_cmp[i] = 0; + } + for (uint32_t i=lid+cmp_buffer_size; i<LZ4_BLOCK_SIZE+LZ4_BLOCK_EXTRA; i+=wg){ + local_cmp[i] = 0; + } + for (uint32_t i=lid; i<LZ4_BLOCK_SIZE; i+=wg){ + local_dec[i] = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + //All the work is performed here: + uint32_t dec_size; + dec_size = lz4_decompress_local_block( local_cmp, local_dec, cmp_buffer_size, LZ4_BLOCK_SIZE); + + barrier(CLK_LOCAL_MEM_FENCE); + local uint8_t* local_buffer; + + //Perform bit-unshuffle + if (item_size == 1){ +// if ((gid==0) && (lid==0)) printf("bitunshuffle8"); + bitunshuffle8(local_dec, local_cmp, dec_size); + local_buffer=local_cmp; + } + else if (item_size == 2){ +// if ((gid==0) && (lid==0)) printf("bitunshuffle16"); + bitunshuffle16(local_dec, local_cmp, dec_size); + local_buffer=local_cmp; + } + else if (item_size == 4){ +// if ((gid==0) && (lid==0)) printf("bitunshuffle32"); + bitunshuffle32(local_dec, local_cmp, dec_size); + local_buffer=local_cmp; + } + else if (item_size == 8){ +// if ((gid==0) && (lid==0)) printf("bitunshuffle64"); + bitunshuffle64(local_dec, local_cmp, dec_size); + local_buffer=local_cmp; + } + else { + local_buffer = local_dec; + } + + //Finally copy the destination data from local to global memory: + uint64_t start_write = LZ4_BLOCK_SIZE*gid; + barrier(CLK_LOCAL_MEM_FENCE); + for (uint32_t i=lid; i<dec_size; i+=wg){ + dec_dest[start_write + i] = local_buffer[i]; + } + +} diff --git a/src/silx/resources/opencl/doubleword.cl b/src/silx/resources/opencl/doubleword.cl index a0ebfda..02a8aba 100644 --- a/src/silx/resources/opencl/doubleword.cl +++ b/src/silx/resources/opencl/doubleword.cl @@ -29,6 +29,7 @@ * * We use the trick to declare some variable "volatile" to enforce the actual * precision reduction of those variables. + * This has to be used in combination with #pragma clang fp contract(on) */ #ifndef X87_VOLATILE @@ -37,6 +38,7 @@ //Algorithm 1, p23, theorem 1.1.12. Requires e_x > e_y, valid if |x| > |y| inline fp2 fast_fp_plus_fp(fp x, fp y){ + #pragma clang fp contract(on) X87_VOLATILE fp s = x + y; X87_VOLATILE fp z = s - x; fp e = y - z; @@ -45,6 +47,7 @@ inline fp2 fast_fp_plus_fp(fp x, fp y){ //Algorithm 2, p24, same as fast_fp_plus_fp without the condition on e_x and e_y inline fp2 fp_plus_fp(fp x, fp y){ + #pragma clang fp contract(on) X87_VOLATILE fp s = x + y; X87_VOLATILE fp xp = s - y; X87_VOLATILE fp yp = s - xp; @@ -62,6 +65,7 @@ inline fp2 fp_times_fp(fp x, fp y){ //Algorithm 7, p38: Addition of a FP to a DW. 10flop bounds:2u²+5u³ inline fp2 dw_plus_fp(fp2 x, fp y){ + #pragma clang fp contract(on) fp2 s = fp_plus_fp(x.s0, y); X87_VOLATILE fp v = x.s1 + s.s1; return fast_fp_plus_fp(s.s0, v); @@ -83,6 +87,7 @@ inline fp2 dw_times_fp(fp2 x, fp y){ //Algorithm 14, p52: Multiplication DW*DW, 8 flops bounds:6u² inline fp2 dw_times_dw(fp2 x, fp2 y){ + #pragma clang fp contract(on) fp2 c = fp_times_fp(x.s0, y.s0); X87_VOLATILE fp l = fma(x.s1, y.s0, x.s0 * y.s1); return fast_fp_plus_fp(c.s0, c.s1 + l); @@ -90,6 +95,7 @@ inline fp2 dw_times_dw(fp2 x, fp2 y){ //Algorithm 17, p55: Division DW / FP, 10flops bounds: 3.5u² inline fp2 dw_div_fp(fp2 x, fp y){ + #pragma clang fp contract(on) X87_VOLATILE fp th = x.s0 / y; fp2 pi = fp_times_fp(th, y); fp2 d = x - pi; @@ -100,6 +106,7 @@ inline fp2 dw_div_fp(fp2 x, fp y){ //Derived from algorithm 20, p64: Inversion 1/ DW, 22 flops inline fp2 inv_dw(fp2 y){ + #pragma clang fp contract(on) X87_VOLATILE fp th = one/y.s0; X87_VOLATILE fp rh = fma(-y.s0, th, one); X87_VOLATILE fp rl = -y.s1 * th; |