summaryrefslogtreecommitdiff
path: root/src/silx/resources
diff options
context:
space:
mode:
authorPicca Frédéric-Emmanuel <picca@debian.org>2024-02-05 16:30:07 +0100
committerPicca Frédéric-Emmanuel <picca@debian.org>2024-02-05 16:30:07 +0100
commit04095a69f18767d222b16fae5b40f2b712cd6f7e (patch)
treed20abd3ee2f237319443e9dfd7500ad55d29a33d /src/silx/resources
parent3427caf0e96690e56aac6231a91df8f0f7a64fc2 (diff)
New upstream version 2.0.0+dfsg
Diffstat (limited to 'src/silx/resources')
-rw-r--r--src/silx/resources/__init__.py170
-rw-r--r--src/silx/resources/gui/icons/ruler.pngbin0 -> 1416 bytes
-rw-r--r--src/silx/resources/gui/icons/ruler.svg216
-rw-r--r--src/silx/resources/opencl/codec/bitshuffle_lz4.cl625
-rw-r--r--src/silx/resources/opencl/doubleword.cl7
5 files changed, 935 insertions, 83 deletions
diff --git a/src/silx/resources/__init__.py b/src/silx/resources/__init__.py
index b53f15b..4946600 100644
--- a/src/silx/resources/__init__.py
+++ b/src/silx/resources/__init__.py
@@ -1,6 +1,6 @@
# /*##########################################################################
#
-# Copyright (c) 2016-2018 European Synchrotron Radiation Facility
+# Copyright (c) 2016-2023 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -27,7 +27,7 @@ All access to data and documentation files MUST be made through the functions
of this modules to ensure access across different distribution schemes:
- Installing from source or from wheel
-- Installing package as a zip (through the use of pkg_resources)
+- Installing package as a zip
- Linux packaging willing to install data files (and doc files) in
alternative folders. In this case, this file must be patched.
- Frozen fat binary application using silx (frozen with cx_Freeze or py2app).
@@ -52,28 +52,28 @@ of this modules to ensure access across different distribution schemes:
options={'py2app': {'packages': ['silx']}}
)
"""
+from __future__ import annotations
__authors__ = ["V.A. Sole", "Thomas Vincent", "J. Kieffer"]
__license__ = "MIT"
__date__ = "08/03/2019"
+import atexit
+import contextlib
+import functools
+import importlib
+import importlib.resources
+import logging
import os
import sys
-import logging
-import importlib
-
+from typing import NamedTuple, Optional
-logger = logging.getLogger(__name__)
+if sys.version_info < (3, 9):
+ import pkg_resources
-# pkg_resources is useful when this package is stored in a zip
-# When pkg_resources is not available, the resources dir defaults to the
-# directory containing this module.
-try:
- import pkg_resources
-except ImportError:
- pkg_resources = None
+logger = logging.getLogger(__name__)
# For packaging purpose, patch this variable to use an alternative directory
@@ -87,66 +87,56 @@ _RESOURCES_DIR = None
# cx_Freeze frozen support
# See http://cx-freeze.readthedocs.io/en/latest/faq.html#using-data-files
-if getattr(sys, 'frozen', False):
+if getattr(sys, "frozen", False):
# Running in a frozen application:
# We expect resources to be located either in a silx/resources/ dir
# relative to the executable or within this package.
- _dir = os.path.join(os.path.dirname(sys.executable), 'silx', 'resources')
+ _dir = os.path.join(os.path.dirname(sys.executable), "silx", "resources")
if os.path.isdir(_dir):
_RESOURCES_DIR = _dir
-class _ResourceDirectory(object):
+class _ResourceDirectory(NamedTuple):
"""Store a source of resources"""
- def __init__(self, package_name, package_path=None, forced_path=None):
- if forced_path is None:
- if package_path is None:
- if pkg_resources is None:
- # In this case we have to compute the package path
- # Else it will not be used
- module = importlib.import_module(package_name)
- package_path = os.path.abspath(os.path.dirname(module.__file__))
- self.package_name = package_name
- self.package_path = package_path
- self.forced_path = forced_path
+ package_name: str
+ forced_path: Optional[str] = None
-_SILX_DIRECTORY = _ResourceDirectory(
- package_name=__name__,
- package_path=os.path.abspath(os.path.dirname(__file__)),
- forced_path=_RESOURCES_DIR)
+_SILX_DIRECTORY = _ResourceDirectory(package_name=__name__, forced_path=_RESOURCES_DIR)
_RESOURCE_DIRECTORIES = {}
_RESOURCE_DIRECTORIES["silx"] = _SILX_DIRECTORY
-def register_resource_directory(name, package_name, forced_path=None):
+def register_resource_directory(
+ name: str, package_name: str, forced_path: Optional[str] = None
+):
"""Register another resource directory to the available list.
By default only the directory "silx" is available.
.. versionadded:: 0.6
- :param str name: Name of the resource directory. It is used on the resource
+ :param name: Name of the resource directory. It is used on the resource
name to specify the resource directory to use. The resource
"silx:foo.png" will use the "silx" resource directory.
- :param str package_name: Python name of the package containing resources.
+ :param package_name: Python name of the package containing resources.
For example "silx.resources".
- :param str forced_path: Path containing the resources. If specified
- `pkg_resources` nor `package_name` will be used
+ :param forced_path: Path containing the resources. If specified
+ neither `importlib` nor `package_name` will be used
For example "silx.resources".
:raises ValueError: If the resource directory name already exists.
"""
if name in _RESOURCE_DIRECTORIES:
raise ValueError("Resource directory name %s already exists" % name)
resource_directory = _ResourceDirectory(
- package_name=package_name,
- forced_path=forced_path)
+ package_name=package_name, forced_path=forced_path
+ )
_RESOURCE_DIRECTORIES[name] = resource_directory
-def list_dir(resource):
+def list_dir(resource: str) -> list[str]:
"""List the content of a resource directory.
Result are not prefixed by the resource name.
@@ -155,9 +145,8 @@ def list_dir(resource):
example "silx:foo.png" identify the resource "foo.png" from the resource
directory "silx". See also :func:`register_resource_directory`.
- :param str resource: Name of the resource directory to list
+ :param resource: Name of the resource directory to list
:return: list of name contained in the directory
- :rtype: List
"""
resource_directory, resource_name = _get_package_and_resource(resource)
@@ -165,50 +154,49 @@ def list_dir(resource):
# if set, use this directory
path = resource_filename(resource)
return os.listdir(path)
- elif pkg_resources is None:
- # Fallback if pkg_resources is not available
- path = resource_filename(resource)
- return os.listdir(path)
- else:
- # Preferred way to get resources as it supports zipfile package
- package_name = resource_directory.package_name
- return pkg_resources.resource_listdir(package_name, resource_name)
+ if sys.version_info < (3, 9):
+ return pkg_resources.resource_listdir(
+ resource_directory.package_name, resource_name
+ )
-def is_dir(resource):
+ path = importlib.resources.files(resource_directory.package_name) / resource_name
+ return [entry.name for entry in path.iterdir()]
+
+
+def is_dir(resource: str) -> bool:
"""True is the resource is a resource directory.
The resource name can be prefixed by the name of a resource directory. For
example "silx:foo.png" identify the resource "foo.png" from the resource
directory "silx". See also :func:`register_resource_directory`.
- :param str resource: Name of the resource
- :rtype: bool
+ :param resource: Name of the resource
"""
path = resource_filename(resource)
return os.path.isdir(path)
-def exists(resource):
+def exists(resource: str) -> bool:
"""True is the resource exists.
- :param str resource: Name of the resource
- :rtype: bool
+ :param resource: Name of the resource
"""
path = resource_filename(resource)
return os.path.exists(path)
-def _get_package_and_resource(resource, default_directory=None):
+def _get_package_and_resource(
+ resource: str, default_directory: Optional[str] = None
+) -> tuple[_ResourceDirectory, str]:
"""
Return the resource directory class and a cleaned resource name without
prefix.
- :param str: resource: Name of the resource with resource prefix.
- :param str default_directory: If the resource is not prefixed, the resource
+ :param resource: Name of the resource with resource prefix.
+ :param default_directory: If the resource is not prefixed, the resource
will be searched on this default directory of the silx resource
directory.
- :rtype: tuple(_ResourceDirectory, str)
:raises ValueError: If the resource name uses an unregistred resource
directory name
"""
@@ -217,14 +205,14 @@ def _get_package_and_resource(resource, default_directory=None):
else:
prefix = "silx"
if default_directory is not None:
- resource = os.path.join(default_directory, resource)
+ resource = f"{default_directory}/{resource}"
if prefix not in _RESOURCE_DIRECTORIES:
raise ValueError("Resource '%s' uses an unregistred prefix", resource)
resource_directory = _RESOURCE_DIRECTORIES[prefix]
return resource_directory, resource
-def resource_filename(resource):
+def resource_filename(resource: str) -> str:
"""Return filename corresponding to resource.
The existence of the resource is not checked.
@@ -233,18 +221,41 @@ def resource_filename(resource):
example "silx:foo.png" identify the resource "foo.png" from the resource
directory "silx". See also :func:`register_resource_directory`.
- :param str resource: Resource path relative to resource directory
- using '/' path separator. It can be either a file or
- a directory.
+ :param resource: Resource path relative to resource directory
+ using '/' path separator. It can be either a file or
+ a directory.
:raises ValueError: If the resource name uses an unregistred resource
directory name
:return: Absolute resource path in the file system
- :rtype: str
"""
return _resource_filename(resource, default_directory=None)
-def _resource_filename(resource, default_directory=None):
+# Manage resource files life-cycle
+_file_manager = contextlib.ExitStack()
+atexit.register(_file_manager.close)
+
+
+@functools.lru_cache(maxsize=None)
+def _get_resource_filename(package: str, resource: str) -> str:
+ """Returns path to requested resource in package
+
+ :param package: Name of the package in which to look for the resource
+ :param resource: Resource path relative to package using '/' path separator
+ :return: Abolute resource path in the file system
+ """
+ if sys.version_info < (3, 9):
+ return pkg_resources.resource_filename(package, resource)
+
+ # Caching prevents extracting the resource twice
+ file_context = importlib.resources.as_file(
+ importlib.resources.files(package) / resource
+ )
+ path = _file_manager.enter_context(file_context)
+ return str(path.absolute())
+
+
+def _resource_filename(resource: str, default_directory: Optional[str] = None) -> str:
"""Return filename corresponding to resource.
The existence of the resource is not checked.
@@ -253,32 +264,25 @@ def _resource_filename(resource, default_directory=None):
example "silx:foo.png" identify the resource "foo.png" from the resource
directory "silx". See also :func:`register_resource_directory`.
- :param str resource: Resource path relative to resource directory
- using '/' path separator. It can be either a file or
- a directory.
- :param str default_directory: If the resource is not prefixed, the resource
+ :param resource: Resource path relative to resource directory
+ using '/' path separator. It can be either a file or
+ a directory.
+ :param default_directory: If the resource is not prefixed, the resource
will be searched on this default directory of the silx resource
directory. It should only be used internally by silx.
:return: Absolute resource path in the file system
- :rtype: str
"""
- resource_directory, resource_name = _get_package_and_resource(resource,
- default_directory=default_directory)
+ resource_directory, resource_name = _get_package_and_resource(
+ resource, default_directory=default_directory
+ )
if resource_directory.forced_path is not None:
# if set, use this directory
base_dir = resource_directory.forced_path
- resource_path = os.path.join(base_dir, *resource_name.split('/'))
+ resource_path = os.path.join(base_dir, *resource_name.split("/"))
return resource_path
- elif pkg_resources is None:
- # Fallback if pkg_resources is not available
- base_dir = resource_directory.package_path
- resource_path = os.path.join(base_dir, *resource_name.split('/'))
- return resource_path
- else:
- # Preferred way to get resources as it supports zipfile package
- package_name = resource_directory.package_name
- return pkg_resources.resource_filename(package_name, resource_name)
+
+ return _get_resource_filename(resource_directory.package_name, resource_name)
# Expose ExternalResources for compatibility (since silx 0.11)
diff --git a/src/silx/resources/gui/icons/ruler.png b/src/silx/resources/gui/icons/ruler.png
new file mode 100644
index 0000000..0ff603f
--- /dev/null
+++ b/src/silx/resources/gui/icons/ruler.png
Binary files differ
diff --git a/src/silx/resources/gui/icons/ruler.svg b/src/silx/resources/gui/icons/ruler.svg
new file mode 100644
index 0000000..268b1db
--- /dev/null
+++ b/src/silx/resources/gui/icons/ruler.svg
@@ -0,0 +1,216 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+ width="32"
+ height="32"
+ viewBox="0 0 8.4666657 8.4666657"
+ version="1.1"
+ id="svg5"
+ inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
+ sodipodi:docname="ruler.svg"
+ inkscape:export-filename="ruler.png"
+ inkscape:export-xdpi="100"
+ inkscape:export-ydpi="100"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <metadata
+ id="metadata35">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <sodipodi:namedview
+ id="namedview7"
+ pagecolor="#ffffff"
+ bordercolor="#000000"
+ borderopacity="0.25"
+ inkscape:showpageshadow="2"
+ inkscape:pageopacity="0.0"
+ inkscape:pagecheckerboard="0"
+ inkscape:deskcolor="#d1d1d1"
+ inkscape:document-units="mm"
+ showgrid="false"
+ inkscape:zoom="13.455443"
+ inkscape:cx="-18.988598"
+ inkscape:cy="0.2229581"
+ inkscape:window-width="1920"
+ inkscape:window-height="1163"
+ inkscape:window-x="1920"
+ inkscape:window-y="0"
+ inkscape:window-maximized="1"
+ inkscape:current-layer="g1102"
+ inkscape:document-rotation="0"
+ showguides="true">
+ <inkscape:grid
+ type="xygrid"
+ id="grid3452"
+ originx="0"
+ originy="0" />
+ </sodipodi:namedview>
+ <defs
+ id="defs2" />
+ <g
+ inkscape:label="Layer 1"
+ inkscape:groupmode="layer"
+ id="layer1">
+ <g
+ id="g1102"
+ transform="translate(-0.36925443,-7.7531893)">
+ <g
+ id="path1743">
+ <path
+ style="color:#000000;fill:#ffffff;stroke-width:0.517192;stroke-miterlimit:3.5;-inkscape-stroke:none"
+ d="M 6.5495575,11.909879 5.7344716,11.369562"
+ id="path418" />
+ <path
+ style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none"
+ d="M 5.8769531,11.154297 5.5917969,11.585938 6.40625,12.125 6.6933594,11.695313 Z"
+ id="path420" />
+ <g
+ id="g408">
+ <g
+ id="path410">
+ <path
+ style="color:#000000;fill:#ffffff;fill-rule:evenodd;stroke-width:0.0456346pt;-inkscape-stroke:none"
+ d="m 5.5442899,11.243491 c 0.069591,-0.10498 0.211272,-0.133702 0.3162524,-0.06411 0.1049803,0.06959 0.133702,0.211272 0.064111,0.316253 -0.069591,0.10498 -0.2135542,0.132189 -0.3162524,0.06411 -0.1049803,-0.06959 -0.133702,-0.211272 -0.064111,-0.316253 z"
+ id="path414" />
+ <path
+ style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none"
+ d="m 5.8769531,11.154297 c -0.1187003,-0.07869 -0.278736,-0.04643 -0.3574219,0.07227 -0.078686,0.1187 -0.046435,0.28069 0.072266,0.359375 0.116723,0.07738 0.2784923,0.04485 0.3574218,-0.07422 0.078686,-0.118701 0.046435,-0.278737 -0.072266,-0.357422 z m -0.033203,0.05078 c 0.09126,0.06049 0.1151836,0.182177 0.054687,0.273438 -0.060252,0.09089 -0.1847643,0.115422 -0.2734375,0.05664 -0.09126,-0.0605 -0.1151836,-0.184129 -0.054687,-0.27539 0.060496,-0.09126 0.1821772,-0.115185 0.2734375,-0.05469 z"
+ id="path416" />
+ </g>
+ </g>
+ </g>
+ <g
+ id="rect234"
+ transform="matrix(0.77265229,0.63482945,-0.6944385,0.71955206,0,0)">
+ <path
+ style="color:#000000;fill:#ffffff;stroke-width:0.399005;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:3.5;-inkscape-stroke:none"
+ d="m 10.480519,2.0891316 h 2.354846 c 0.12095,0 0.218321,0.097371 0.218321,0.2183209 v 8.0683205 c 0,0.120949 -0.09737,0.218321 -0.218321,0.218321 h -2.354846 c -0.120949,0 -0.218321,-0.09737 -0.218321,-0.218321 V 2.3074525 c 0,-0.1209498 0.09737,-0.2183209 0.218321,-0.2183209 z"
+ id="path330" />
+ <path
+ style="color:#000000;fill:#000000;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:3.5;-inkscape-stroke:none"
+ d="m 10.480469,1.8886719 c -0.228023,0 -0.417969,0.1899469 -0.417969,0.4179687 V 10.375 c 0,0.228024 0.189947,0.417969 0.417969,0.417969 h 2.355469 c 0.228024,0 0.417968,-0.189947 0.417968,-0.417969 V 2.3066406 c 0,-0.2280229 -0.189946,-0.4179687 -0.417968,-0.4179687 z m 0,0.4003906 h 2.355469 c 0.01388,0 0.01758,0.0037 0.01758,0.017578 V 10.375 c 0,0.01388 -0.0037,0.01953 -0.01758,0.01953 h -2.355469 c -0.01388,0 -0.01953,-0.0057 -0.01953,-0.01953 V 2.3066406 c 0,-0.013877 0.0057,-0.017578 0.01953,-0.017578 z"
+ id="path332" />
+ </g>
+ <g
+ id="path1743-9-3">
+ <path
+ style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none;paint-order:stroke markers fill"
+ d="m 5.9492187,10.972656 -0.3945312,0.439453 0.796875,0.716797 0.3945312,-0.4375 z"
+ id="path404" />
+ <g
+ id="g394">
+ <g
+ id="path396">
+ <path
+ style="color:#000000;fill:#ffffff;fill-rule:evenodd;stroke-width:0.0520108pt;-inkscape-stroke:none"
+ d="m 5.5585792,11.01876 c 0.095985,-0.10674 0.260515,-0.11547 0.3672555,-0.01949 0.1067406,0.09599 0.1154702,0.260515 0.019486,0.367256 -0.095985,0.10674 -0.2628354,0.113383 -0.3672555,0.01949 C 5.4713246,11.290036 5.462595,11.125501 5.5585792,11.01876 Z"
+ id="path400" />
+ <path
+ style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none"
+ d="m 5.9492187,10.972656 c -0.1206907,-0.108524 -0.3074864,-0.09725 -0.4160156,0.02344 -0.1085286,0.120691 -0.099207,0.307491 0.021484,0.416015 0.1186803,0.106716 0.3071504,0.09958 0.4160156,-0.02148 0.108528,-0.120691 0.099205,-0.309435 -0.021484,-0.417969 z m -0.046875,0.05274 c 0.092792,0.08345 0.1010184,0.225568 0.017578,0.318359 -0.083105,0.09242 -0.2281997,0.0967 -0.3183594,0.01563 -0.092789,-0.08343 -0.1010177,-0.225569 -0.017578,-0.318359 0.083441,-0.09279 0.2255693,-0.09906 0.3183593,-0.01563 z"
+ id="path402" />
+ </g>
+ </g>
+ </g>
+ <g
+ id="path1741-9">
+ <path
+ style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none"
+ d="M 6.4042969,9.6074219 6.0117187,10.052734 7.3417969,11.224609 7.734375,10.779297 Z"
+ id="path390" />
+ <g
+ id="g380">
+ <g
+ id="path382">
+ <path
+ style="color:#000000;fill:#000000;fill-rule:evenodd;stroke-width:0.0524193pt;-inkscape-stroke:none"
+ d="m 6.0116332,9.6575594 c 0.095671,-0.1085287 0.261399,-0.1189636 0.3699276,-0.023292 0.1085287,0.095671 0.1189636,0.261399 0.023292,0.3699276 -0.095671,0.108528 -0.2637583,0.116884 -0.3699276,0.02329 C 5.9263967,9.9318156 5.9159618,9.766088 6.0116332,9.6575594 Z"
+ id="path386" />
+ <path
+ style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none"
+ d="m 6.4042969,9.6074219 c -0.1227123,-0.108175 -0.3097944,-0.095369 -0.4179688,0.027344 -0.1081747,0.1227123 -0.097322,0.3097959 0.025391,0.4179684 0.1206682,0.106376 0.3114126,0.0977 0.4199219,-0.02539 0.108175,-0.1227125 0.095369,-0.3117477 -0.027344,-0.4199221 z m -0.044922,0.052734 c 0.094345,0.083167 0.1026993,0.2259678 0.019531,0.3203125 C 6.2960737,10.074433 6.150264,10.082765 6.0585937,10.001953 5.9642495,9.9187874 5.9539415,9.7759853 6.0371094,9.6816406 6.1202768,9.5872959 6.2650303,9.5769882 6.359375,9.6601563 Z"
+ id="path388" />
+ </g>
+ </g>
+ </g>
+ <g
+ id="path1741-9-7">
+ <path
+ style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none"
+ d="m 4.2851562,11.6875 -0.3925781,0.445313 1.3300781,1.171875 0.3925782,-0.445313 z"
+ id="path376" />
+ <g
+ id="g366">
+ <g
+ id="path368">
+ <path
+ style="color:#000000;fill:#000000;fill-rule:evenodd;stroke-width:0.0524193pt;-inkscape-stroke:none"
+ d="m 3.8930299,11.736561 c 0.095671,-0.108528 0.261399,-0.118963 0.3699276,-0.02329 0.1085286,0.09567 0.1189634,0.261399 0.023292,0.369928 -0.095671,0.108528 -0.2637583,0.116883 -0.3699276,0.02329 -0.1085286,-0.09567 -0.1189634,-0.261399 -0.023292,-0.369928 z"
+ id="path372" />
+ <path
+ style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none"
+ d="m 4.2851562,11.6875 c -0.1227122,-0.108177 -0.3097943,-0.09732 -0.4179687,0.02539 -0.1081748,0.122712 -0.097322,0.311748 0.025391,0.419922 0.1206681,0.106374 0.3114125,0.09575 0.4199219,-0.02734 0.1081748,-0.122713 0.095369,-0.309795 -0.027344,-0.417969 z m -0.044922,0.05273 c 0.094344,0.08317 0.1026991,0.225968 0.019531,0.320313 -0.082832,0.09396 -0.2286422,0.100343 -0.3203125,0.01953 -0.094344,-0.08317 -0.1026991,-0.225967 -0.019531,-0.320312 0.083167,-0.09434 0.2259677,-0.102701 0.3203125,-0.01953 z"
+ id="path374" />
+ </g>
+ </g>
+ </g>
+ <g
+ id="path1741-9-3">
+ <path
+ style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none"
+ d="m 2.4824219,13.669922 -0.3945313,0.445312 1.3300782,1.171875 0.3925781,-0.445312 z"
+ id="path362" />
+ <g
+ id="g352">
+ <g
+ id="path354">
+ <path
+ style="color:#000000;fill:#000000;fill-rule:evenodd;stroke-width:0.0524193pt;-inkscape-stroke:none"
+ d="m 2.0886536,13.720045 c 0.095671,-0.108528 0.2613989,-0.118963 0.3699275,-0.02329 0.1085287,0.09567 0.1189637,0.261399 0.023292,0.369928 -0.095671,0.108528 -0.2637582,0.116883 -0.3699275,0.02329 -0.1085287,-0.09567 -0.1189637,-0.261399 -0.023292,-0.369928 z"
+ id="path358" />
+ <path
+ style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none"
+ d="M 2.4824219,13.669922 C 2.3597097,13.561745 2.1706744,13.574554 2.0625,13.697266 c -0.1081751,0.122712 -0.097322,0.309795 0.025391,0.417968 0.1206681,0.106375 0.3114126,0.0977 0.4199219,-0.02539 0.1081751,-0.122713 0.097322,-0.311749 -0.025391,-0.419922 z m -0.046875,0.05273 c 0.094344,0.08317 0.1026993,0.225968 0.019531,0.320313 -0.082832,0.09396 -0.2286422,0.102296 -0.3203125,0.02148 -0.094344,-0.08317 -0.1026993,-0.22792 -0.019531,-0.322265 0.083167,-0.09435 0.2259677,-0.102701 0.3203125,-0.01953 z"
+ id="path360" />
+ </g>
+ </g>
+ </g>
+ <g
+ id="path1743-9-3-6">
+ <path
+ style="color:#000000;fill:#ffffff;stroke-width:0.589456;stroke-miterlimit:3.5;-inkscape-stroke:none"
+ d="M 4.4307455,14.144747 3.6331377,13.427511"
+ id="path346" />
+ <path
+ style="color:#000000;fill:#000000;stroke-miterlimit:3.5;-inkscape-stroke:none"
+ d="m 3.8300781,13.208984 -0.3945312,0.4375 0.7988281,0.716797 0.3925781,-0.4375 z"
+ id="path348" />
+ <g
+ id="g336">
+ <g
+ id="path338">
+ <path
+ style="color:#000000;fill:#ffffff;fill-rule:evenodd;stroke-width:0.0520108pt;-inkscape-stroke:none"
+ d="m 3.4397673,13.253626 c 0.095985,-0.106741 0.2605151,-0.11547 0.3672556,-0.01949 0.1067404,0.09598 0.1154698,0.260515 0.019485,0.367255 -0.095985,0.106741 -0.2628356,0.113383 -0.3672556,0.01949 -0.1067404,-0.09598 -0.1154698,-0.260515 -0.019485,-0.367255 z"
+ id="path342" />
+ <path
+ style="color:#000000;fill:#000000;fill-rule:evenodd;-inkscape-stroke:none"
+ d="m 3.8300781,13.208984 c -0.1206906,-0.108523 -0.3074864,-0.09921 -0.4160156,0.02149 -0.1085295,0.12069 -0.099207,0.307491 0.021484,0.416015 0.1186801,0.106716 0.3071504,0.09958 0.4160156,-0.02148 0.1085295,-0.12069 0.099207,-0.307491 -0.021484,-0.416016 z m -0.046875,0.05078 c 0.092789,0.08343 0.1010181,0.22557 0.017578,0.318359 -0.083105,0.09242 -0.2281998,0.09865 -0.3183594,0.01758 -0.092789,-0.08343 -0.1010181,-0.22557 -0.017578,-0.318359 0.083441,-0.09279 0.2255692,-0.101014 0.3183593,-0.01758 z"
+ id="path344" />
+ </g>
+ </g>
+ </g>
+ </g>
+ </g>
+</svg>
diff --git a/src/silx/resources/opencl/codec/bitshuffle_lz4.cl b/src/silx/resources/opencl/codec/bitshuffle_lz4.cl
new file mode 100644
index 0000000..71f617a
--- /dev/null
+++ b/src/silx/resources/opencl/codec/bitshuffle_lz4.cl
@@ -0,0 +1,625 @@
+/*
+ * Project: SILX: Bitshuffle LZ4 decompressor
+ *
+ * Copyright (C) 2022 European Synchrotron Radiation Facility
+ * Grenoble, France
+ *
+ * Principal authors: J. Kieffer (kieffer@esrf.fr)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To decompress bitshuffle-LZ4 data in parallel on GPU one needs to:
+ * - Find all begining of blocks, this is performed by the ... kernel.
+ * - Decompress each block by one workgroup.
+ * - bitshuffle the data from one workgroup
+ */
+
+#ifndef LZ4_BLOCK_SIZE
+# define LZ4_BLOCK_SIZE 8192
+#endif
+#define LZ4_BLOCK_EXTRA 400
+#ifdef __ENDIAN_LITTLE__
+#define SWAP_BE 1
+#define SWAP_LE 0
+#else
+#define SWAP_BE 0
+#define SWAP_LE 1
+#endif
+
+
+#define int8_t char
+#define uint8_t uchar
+#define int16_t short
+#define uint16_t ushort
+#define int32_t int
+#define uint32_t uint
+#define int64_t long
+#define uint64_t ulong
+
+#define position_t uint
+#define token_t uchar2
+
+//Some function used as part of bitshuffle:
+
+inline token_t decode_token(uint8_t value){
+ return (token_t)(value >> 4, // literals
+ value & 0x0f); // matches
+}
+
+inline bool has_liter_over(token_t token)
+{
+ return token.s0 >= 15;
+}
+
+inline bool has_match_over(token_t token)
+{
+ return token.s1 >= 15;
+}
+
+//parse overflow, return the number of overflow and the new position
+inline uint2 read_overflow(local uint8_t* buffer,
+ position_t buffer_size,
+ position_t idx){
+ position_t num = 0;
+ uint8_t next = 0xff;
+ while (next == 0xff && idx < buffer_size){
+ next = buffer[idx];
+ idx += 1;
+ num += next;
+ }
+ return (uint2)(num, idx);
+}
+
+inline void copy_no_overlap(local uint8_t* dest,
+ const position_t dest_position,
+ local uint8_t* source,
+ const position_t src_position,
+ const position_t length){
+ for (position_t i=get_local_id(0); i<length; i+=get_local_size(0)) {
+ dest[dest_position+i] = source[src_position+i];
+ }
+}
+
+inline void copy_repeat(local uint8_t* dest,
+ const position_t dest_position,
+ local uint8_t* source,
+ const position_t src_position,
+ const position_t dist,
+ const position_t length){
+
+ // if there is overlap, it means we repeat, so we just
+ // need to organize our copy around that
+ for (position_t i=get_local_id(0); i<length; i+=get_local_size(0)) {
+ dest[dest_position+i] = source[src_position + i%dist];
+ }
+}
+
+inline void copy_collab(local uint8_t* dest,
+ const position_t dest_position,
+ local uint8_t* source,
+ const position_t src_position,
+ const position_t dist,
+ const position_t length){
+ //Generic copy function
+ if (dist < length) {
+ copy_repeat(dest, dest_position, source, src_position, dist, length);
+ }
+ else {
+ copy_no_overlap(dest, dest_position, source, src_position, length);
+ }
+}
+
+// Function to read larger integers at various position. Endianness is addressed as well with the swap flag
+uint64_t load64_at(global uint8_t *src,
+ const uint64_t position,
+ const bool swap){
+ uchar8 vector;
+ if (swap){
+ vector = (uchar8)(src[position+7],src[position+6],
+ src[position+5],src[position+4],
+ src[position+3],src[position+2],
+ src[position+1],src[position+0]);
+ }
+ else{
+ vector = (uchar8)(src[position+0],src[position+1],
+ src[position+2],src[position+3],
+ src[position+4],src[position+5],
+ src[position+6],src[position+7]);
+ }
+ return as_ulong(vector);
+}
+
+uint32_t load32_at(global uint8_t *src,
+ const uint64_t position,
+ const bool swap){
+ uchar4 vector;
+ if (swap){
+ vector = (uchar4)(
+ src[position+3],src[position+2],
+ src[position+1],src[position+0]);
+ }
+ else{
+ vector = (uchar4)(src[position+0],src[position+1],
+ src[position+2],src[position+3]);
+ }
+ return as_uint(vector);
+}
+
+uint16_t load16_at(local uint8_t *src,
+ const uint64_t position,
+ const bool swap){
+ uchar2 vector;
+ if (swap){
+ vector = (uchar2)(src[position+1],src[position+0]);
+ }
+ else{
+ vector = (uchar2)(src[position+0],src[position+1]);
+ }
+ return as_ushort(vector);
+}
+
+//Calculate the begining and the end of the block corresponding to the block=gid
+inline void _lz4_unblock(global uint8_t *src,
+ const uint64_t size,
+ local uint64_t *block_position){
+ uint32_t gid = get_group_id(0);
+ uint32_t lid = get_local_id(0);
+ if (lid == 0){
+ uint64_t block_start=16;
+ uint32_t block_size = load32_at(src, 12, SWAP_BE);
+ uint64_t block_end = block_start + block_size;
+
+ for (uint32_t block_idx=0; block_idx<gid; block_idx++){
+ // printf("gid %u idx %u %lu-%lu\n",gid, block_idx,block_start,block_end);
+ block_start = block_end + 4;
+ if (block_start>=size){
+ printf("Read beyond end of source buffer at gid %u %lu>%lu\n",gid, block_start, size);
+ block_start = 0;
+ block_end = 0;
+ break;
+ }
+ block_size = load32_at(src, block_end, SWAP_BE);
+ block_end = block_start + block_size;
+ }
+ block_position[0] = block_start;
+ block_position[1] = block_end;
+// if (gid>get_num_groups(0)-10) printf("Success finish unblock gid %u block: %lu - %lu\n",gid,block_start,block_end);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+}
+
+
+//Decompress one block in shared memory
+inline uint32_t lz4_decompress_local_block( local uint8_t* local_cmp,
+ local uint8_t* local_dec,
+ const uint32_t cmp_buffer_size,
+ const uint32_t dec_buffer_size){
+
+ uint32_t gid = get_group_id(0); // One block is decompressed by one workgroup
+ uint32_t lid = get_local_id(0); // This is the thread position in the group...
+ uint32_t wg = get_local_size(0); // workgroup size
+
+ position_t dec_idx = 0;
+ position_t cmp_idx = 0;
+ while (cmp_idx < cmp_buffer_size) {
+ // read header byte
+ token_t tok = decode_token(local_cmp[cmp_idx]);
+ // if (lid==0) printf("gid %u at idx %u/%u. Token is litterials: %u; matches: %u\n", gid, cmp_idx, cmp_buffer_size,tok.s0, tok.s1);
+
+ cmp_idx+=1;
+
+ // read the length of the literals
+ position_t num_literals = tok.s0;
+ if (has_liter_over(tok)) {
+ uint2 tmp = read_overflow(local_cmp,
+ cmp_buffer_size,
+ cmp_idx);
+ num_literals += tmp.s0;
+ cmp_idx = tmp.s1;
+ }
+ const position_t start_literal = cmp_idx;
+
+ // copy the literals to the dst stream in parallel
+ // if (lid==0) printf("gid %u: copy literals from %u to %u <%u (len %u)\n", gid, cmp_idx,num_literals+cmp_idx,cmp_buffer_size,num_literals);
+ copy_no_overlap(local_dec, dec_idx, local_cmp, cmp_idx, num_literals);
+ cmp_idx += num_literals;
+ dec_idx += num_literals;
+
+ // Note that the last sequence stops right after literals field.
+ // There are specific parsing rules to respect to be compatible with the
+ // reference decoder : 1) The last 5 bytes are always literals 2) The last
+ // match cannot start within the last 12 bytes Consequently, a file with
+ // less then 13 bytes can only be represented as literals These rules are in
+ // place to benefit speed and ensure buffer limits are never crossed.
+ if (cmp_idx < cmp_buffer_size) {
+
+ // read the offset
+ uint16_t offset = load16_at(local_cmp, cmp_idx, SWAP_LE);
+ // if (lid==0) printf("gid %u: offset is %u at %u\n",gid, offset, cmp_idx);
+ if (offset == 0) {
+ //corruped block
+ if (lid == 0)
+ printf("Corrupted block #%u\n", gid);
+ return 0;
+ }
+
+ cmp_idx += 2;
+
+ // read the match length
+ position_t match = 4 + tok.s1;
+ if (has_match_over(tok)) {
+ uint2 tmp = read_overflow(local_cmp,
+ cmp_buffer_size,
+ cmp_idx);
+ match += tmp.s0;
+ cmp_idx = tmp.s1;
+ }
+
+ //syncronize threads before reading shared memory
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ // copy match
+ copy_collab(local_dec, dec_idx, local_dec, dec_idx - offset, offset, match);
+ dec_idx += match;
+ }
+ }
+ //syncronize threads before reading shared memory
+ barrier(CLK_LOCAL_MEM_FENCE);
+ return dec_idx;
+}
+
+//Perform the bifshuffling on 8-bits objects
+inline void bitunshuffle8( local uint8_t* inp,
+ local uint8_t* out,
+ const uint32_t buffer_size){ //8k ... or less.
+// uint32_t gid = get_group_id(0);
+ uint32_t lid = get_local_id(0);
+ uint32_t wg = get_local_size(0);
+ uint32_t u8_buffer_size = buffer_size; // /1 -> 8k
+
+ // One thread deals with one or several output data
+ for (uint32_t dpos=lid; dpos<u8_buffer_size; dpos+=wg){
+ uint8_t res = 0;
+ // read bits at several places...
+ for (uint32_t bit=0; bit<8; bit++){
+ uint32_t read_bit = bit*u8_buffer_size + dpos;
+ uint32_t u8_word_pos = read_bit>>3; // /8
+ uint32_t u8_bit_pos = read_bit&7; // %8
+ // if (lid==0) printf("dpos %u bit %u read at %u,%u\n",dpos,bit,u8_word_pos,u8_bit_pos);
+ res |= ((inp[u8_word_pos]>>u8_bit_pos) & 1)<<bit;
+ }
+ // if (lid==0) printf("dpos %u res %u\n",dpos,res);
+ out[dpos] = res;
+ }
+}
+
+
+//Perform the bifshuffling on 16-bits objects
+inline void bitunshuffle16( local uint8_t* inp,
+ local uint8_t* out,
+ const uint32_t buffer_size){ //8k ... or less.
+// uint32_t gid = get_group_id(0);
+ uint32_t lid = get_local_id(0);
+ uint32_t wg = get_local_size(0);
+ uint32_t u16_buffer_size = buffer_size>>1; // /2 -> 4k
+
+ // One thread deals with one or several output data
+ for (uint32_t dpos=lid; dpos<u16_buffer_size; dpos+=wg){
+ uint16_t res = 0;
+ // read bits at several places...
+ for (uint32_t bit=0; bit<16; bit++){
+ uint32_t read_bit = bit*u16_buffer_size + dpos;
+ uint32_t u8_word_pos = read_bit>>3; // /8
+ uint32_t u8_bit_pos = read_bit&7; // %8
+ // if (lid==0) printf("dpos %u bit %u read at %u,%u\n",dpos,bit,u8_word_pos,u8_bit_pos);
+ res |= ((inp[u8_word_pos]>>u8_bit_pos) & 1)<<bit;
+ }
+ // if (lid==0) printf("dpos %u res %u\n",dpos,res);
+ uchar2 tmp = as_uchar2(res);
+ out[2*dpos] = tmp.s0;
+ out[2*dpos+1] = tmp.s1;
+ }
+}
+
+
+//Perform the bifshuffling on 32-bits objects
+inline void bitunshuffle32( local uint8_t* inp,
+ local uint8_t* out,
+ const uint32_t buffer_size){ //8k ... or less.
+// uint32_t gid = get_group_id(0);
+ uint32_t lid = get_local_id(0);
+ uint32_t wg = get_local_size(0);
+ uint32_t u32_buffer_size = buffer_size>>2; // /4 -> 2k
+
+ // One thread deals with one or several output data
+ for (uint32_t dpos=lid; dpos<u32_buffer_size; dpos+=wg){
+ uint32_t res = 0;
+ // read bits at several places...
+ for (uint32_t bit=0; bit<32; bit++){
+ uint32_t read_bit = bit*u32_buffer_size + dpos;
+ uint32_t u8_word_pos = read_bit>>3; // /8
+ uint32_t u8_bit_pos = read_bit&7; // %8
+ // if (lid==0) printf("dpos %u bit %u read at %u,%u\n",dpos,bit,u8_word_pos,u8_bit_pos);
+ res |= ((inp[u8_word_pos]>>u8_bit_pos) & 1)<<bit;
+ }
+ // if (lid==0) printf("dpos %u res %u\n",dpos,res);
+ uchar4 tmp = as_uchar4(res);
+ out[4*dpos] = tmp.s0;
+ out[4*dpos+1] = tmp.s1;
+ out[4*dpos+2] = tmp.s2;
+ out[4*dpos+3] = tmp.s3;
+ }
+}
+
+//Perform the bifshuffling on 32-bits objects
+inline void bitunshuffle64( local uint8_t* inp,
+ local uint8_t* out,
+ const uint32_t buffer_size){ //8k ... or less.
+// uint32_t gid = get_group_id(0);
+ uint32_t lid = get_local_id(0);
+ uint32_t wg = get_local_size(0);
+ uint32_t u64_buffer_size = buffer_size>>3; // /8 -> 1k
+
+ // One thread deals with one or several output data
+ for (uint32_t dpos=lid; dpos<u64_buffer_size; dpos+=wg){
+ uint64_t res = 0;
+ // read bits at several places...
+ for (uint32_t bit=0; bit<64; bit++){
+ uint32_t read_bit = bit*u64_buffer_size + dpos;
+ uint32_t u8_word_pos = read_bit>>3; // /8
+ uint32_t u8_bit_pos = read_bit&7; // %8
+ // if (lid==0) printf("dpos %u bit %u read at %u,%u\n",dpos,bit,u8_word_pos,u8_bit_pos);
+ res |= ((inp[u8_word_pos]>>u8_bit_pos) & 1)<<bit;
+ }
+ // if (lid==0) printf("dpos %u res %u\n",dpos,res);
+ uchar8 tmp = as_uchar8(res);
+ out[8*dpos] = tmp.s0;
+ out[8*dpos+1] = tmp.s1;
+ out[8*dpos+2] = tmp.s2;
+ out[8*dpos+3] = tmp.s3;
+ out[8*dpos+4] = tmp.s4;
+ out[8*dpos+5] = tmp.s5;
+ out[8*dpos+6] = tmp.s6;
+ out[8*dpos+7] = tmp.s7;
+ }
+}
+
+
+/* Preprocessing kernel which performs:
+- Memset arrays
+- read block position stored in block_position array
+
+Param:
+- src: input buffer in global memory
+- size: input buffer size
+- block_position: output buffer in local memory containing the index of the begining of each block
+- max_blocks: allocated memory for block_position array (output)
+- nb_blocks: output buffer with the actual number of blocks in src (output).
+
+Return: Nothing, this is a kernel
+
+Hint on workgroup size: little kernel ... wg=1, 1 wg is enough.
+*/
+
+kernel void lz4_unblock(global uint8_t *src,
+ const uint64_t size,
+ global uint64_t *block_start,
+ const uint32_t max_blocks,
+ global uint32_t *nb_blocks){
+
+ uint64_t total_nbytes = load64_at(src,0,SWAP_BE);
+ uint32_t block_nbytes = load32_at(src,8,SWAP_BE);
+
+ uint32_t block_idx = 0;
+ uint64_t pos = 12;
+ uint32_t block_size;
+
+ while ((pos+4<size) && (block_idx<max_blocks)){
+ block_size = load32_at(src, pos, SWAP_BE);
+ block_start[block_idx] = pos + 4;
+ block_idx +=1;
+ pos += 4 + block_size;
+ }
+ nb_blocks[0] = block_idx;
+}
+
+// decompress a frame blockwise.
+// Needs the block position to be known in advance (block_start) calculated from lz4_unblock.
+// one workgroup treats on block.
+
+kernel void bslz4_decompress_block( global uint8_t* comp_src,
+ global uint8_t* dec_dest,
+ global uint64_t* block_start,
+ global uint32_t *nb_blocks,
+ const uint8_t item_size){
+
+ uint32_t gid = get_group_id(0); // One block is decompressed by one workgroup
+ uint32_t lid = get_local_id(0); // This is the thread position in the group...
+ uint32_t wg = get_local_size(0); // workgroup size
+
+ //guard if the number of wg scheduled is too large
+ if (gid >=nb_blocks[0]) return;
+
+ // No need to guard, the number of blocks can be calculated in advance.
+ uint64_t start_read = block_start[gid];
+ if (start_read<12) return;
+
+ local uint8_t local_cmp[LZ4_BLOCK_SIZE+LZ4_BLOCK_EXTRA];
+ local uint8_t local_dec[LZ4_BLOCK_SIZE];
+
+ uint32_t cmp_buffer_size = load32_at(comp_src, start_read-4, SWAP_BE);
+ uint64_t end_read = start_read + cmp_buffer_size;
+ // Copy locally the compressed buffer and memset the destination buffer
+ for (uint32_t i=lid; i<cmp_buffer_size; i+=wg){
+ uint64_t read_pos = start_read + i;
+ if (read_pos<end_read)
+ local_cmp[i] = comp_src[read_pos];
+ else
+ local_cmp[i] = 0;
+ }
+ for (uint32_t i=lid+cmp_buffer_size; i<LZ4_BLOCK_SIZE+LZ4_BLOCK_EXTRA; i+=wg){
+ local_cmp[i] = 0;
+ }
+ for (uint32_t i=lid; i<LZ4_BLOCK_SIZE; i+=wg){
+ local_dec[i] = 0;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ //All the work is performed here:
+ uint32_t dec_size = lz4_decompress_local_block( local_cmp, local_dec, cmp_buffer_size, LZ4_BLOCK_SIZE);
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+ local uint8_t* local_buffer;
+
+ //Perform bit-unshuffle
+ if (item_size == 1){
+// if ((gid==0) && (lid==0)) printf("bitunshuffle8");
+ bitunshuffle8(local_dec, local_cmp, dec_size);
+ local_buffer=local_cmp;
+ }
+ else if (item_size == 2){
+// if ((gid==0) && (lid==0)) printf("bitunshuffle16");
+ bitunshuffle16(local_dec, local_cmp, dec_size);
+ local_buffer=local_cmp;
+ }
+ else if (item_size == 4){
+// if ((gid==0) && (lid==0)) printf("bitunshuffle32");
+ bitunshuffle32(local_dec, local_cmp, dec_size);
+ local_buffer=local_cmp;
+ }
+ else if (item_size == 8){
+// if ((gid==0) && (lid==0)) printf("bitunshuffle64");
+ bitunshuffle64(local_dec, local_cmp, dec_size);
+ local_buffer=local_cmp;
+ }
+ else {
+ local_buffer = local_dec;
+ }
+
+
+ //Finally copy the destination data from local to global memory:
+ uint64_t start_write = LZ4_BLOCK_SIZE*gid;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ for (uint32_t i=lid; i<dec_size; i+=wg){
+ dec_dest[start_write + i] = local_buffer[i];
+ }
+
+ if (gid+1==get_num_groups(0)){
+ uint64_t total_nbytes = load64_at(comp_src,0,SWAP_BE);
+ uint64_t end_write = dec_size + start_write;
+ int32_t remaining = total_nbytes - end_write;
+// if (lid==0) printf("gid %u is last block has %u elements. Writing ends at %u/%lu, copy remaining %d\n",gid, dec_size, end_write, total_nbytes, remaining);
+ if ((remaining>0) && (remaining<item_size*8)){
+ for (uint32_t i=lid; i<remaining; i++){
+ dec_dest[end_write + i] = comp_src[end_read+i];
+ }
+ }
+ }
+
+}
+
+// decompress a frame blockwise.
+// block-start are searched by one thread from each workgroup ... not very efficient
+// one workgroup treats on block.
+
+kernel void bslz4_decompress_frame(
+ global uint8_t* comp_src,
+ const uint64_t src_size,
+ global uint8_t* dec_dest,
+ const uint8_t item_size){
+
+ uint32_t gid = get_group_id(0); // One block is decompressed by one workgroup
+ uint32_t lid = get_local_id(0); // This is the thread position in the group...
+ uint32_t wg = get_local_size(0); // workgroup size
+
+ local uint8_t local_cmp[LZ4_BLOCK_SIZE+LZ4_BLOCK_EXTRA];
+ local uint8_t local_dec[LZ4_BLOCK_SIZE];
+ local uint64_t block[2]; // will contain begining and end of the current block
+
+ uint64_t start_read, end_read;
+ uint32_t cmp_buffer_size;
+ _lz4_unblock(comp_src, src_size, block);
+ start_read = block[0];
+ end_read = block[1];
+ cmp_buffer_size = end_read - start_read;
+ if (cmp_buffer_size == 0){
+ if (lid == 0) printf("gid=%u: Empty buffer\n", gid);
+ return;
+ }
+
+ // Copy locally the compressed buffer and memset the destination buffer
+ for (uint32_t i=lid; i<cmp_buffer_size; i+=wg){
+ uint64_t read_pos = start_read + i;
+ if (read_pos<end_read)
+ local_cmp[i] = comp_src[read_pos];
+ else
+ local_cmp[i] = 0;
+ }
+ for (uint32_t i=lid+cmp_buffer_size; i<LZ4_BLOCK_SIZE+LZ4_BLOCK_EXTRA; i+=wg){
+ local_cmp[i] = 0;
+ }
+ for (uint32_t i=lid; i<LZ4_BLOCK_SIZE; i+=wg){
+ local_dec[i] = 0;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ //All the work is performed here:
+ uint32_t dec_size;
+ dec_size = lz4_decompress_local_block( local_cmp, local_dec, cmp_buffer_size, LZ4_BLOCK_SIZE);
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+ local uint8_t* local_buffer;
+
+ //Perform bit-unshuffle
+ if (item_size == 1){
+// if ((gid==0) && (lid==0)) printf("bitunshuffle8");
+ bitunshuffle8(local_dec, local_cmp, dec_size);
+ local_buffer=local_cmp;
+ }
+ else if (item_size == 2){
+// if ((gid==0) && (lid==0)) printf("bitunshuffle16");
+ bitunshuffle16(local_dec, local_cmp, dec_size);
+ local_buffer=local_cmp;
+ }
+ else if (item_size == 4){
+// if ((gid==0) && (lid==0)) printf("bitunshuffle32");
+ bitunshuffle32(local_dec, local_cmp, dec_size);
+ local_buffer=local_cmp;
+ }
+ else if (item_size == 8){
+// if ((gid==0) && (lid==0)) printf("bitunshuffle64");
+ bitunshuffle64(local_dec, local_cmp, dec_size);
+ local_buffer=local_cmp;
+ }
+ else {
+ local_buffer = local_dec;
+ }
+
+ //Finally copy the destination data from local to global memory:
+ uint64_t start_write = LZ4_BLOCK_SIZE*gid;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ for (uint32_t i=lid; i<dec_size; i+=wg){
+ dec_dest[start_write + i] = local_buffer[i];
+ }
+
+}
diff --git a/src/silx/resources/opencl/doubleword.cl b/src/silx/resources/opencl/doubleword.cl
index a0ebfda..02a8aba 100644
--- a/src/silx/resources/opencl/doubleword.cl
+++ b/src/silx/resources/opencl/doubleword.cl
@@ -29,6 +29,7 @@
*
* We use the trick to declare some variable "volatile" to enforce the actual
* precision reduction of those variables.
+ * This has to be used in combination with #pragma clang fp contract(on)
*/
#ifndef X87_VOLATILE
@@ -37,6 +38,7 @@
//Algorithm 1, p23, theorem 1.1.12. Requires e_x > e_y, valid if |x| > |y|
inline fp2 fast_fp_plus_fp(fp x, fp y){
+ #pragma clang fp contract(on)
X87_VOLATILE fp s = x + y;
X87_VOLATILE fp z = s - x;
fp e = y - z;
@@ -45,6 +47,7 @@ inline fp2 fast_fp_plus_fp(fp x, fp y){
//Algorithm 2, p24, same as fast_fp_plus_fp without the condition on e_x and e_y
inline fp2 fp_plus_fp(fp x, fp y){
+ #pragma clang fp contract(on)
X87_VOLATILE fp s = x + y;
X87_VOLATILE fp xp = s - y;
X87_VOLATILE fp yp = s - xp;
@@ -62,6 +65,7 @@ inline fp2 fp_times_fp(fp x, fp y){
//Algorithm 7, p38: Addition of a FP to a DW. 10flop bounds:2u²+5u³
inline fp2 dw_plus_fp(fp2 x, fp y){
+ #pragma clang fp contract(on)
fp2 s = fp_plus_fp(x.s0, y);
X87_VOLATILE fp v = x.s1 + s.s1;
return fast_fp_plus_fp(s.s0, v);
@@ -83,6 +87,7 @@ inline fp2 dw_times_fp(fp2 x, fp y){
//Algorithm 14, p52: Multiplication DW*DW, 8 flops bounds:6u²
inline fp2 dw_times_dw(fp2 x, fp2 y){
+ #pragma clang fp contract(on)
fp2 c = fp_times_fp(x.s0, y.s0);
X87_VOLATILE fp l = fma(x.s1, y.s0, x.s0 * y.s1);
return fast_fp_plus_fp(c.s0, c.s1 + l);
@@ -90,6 +95,7 @@ inline fp2 dw_times_dw(fp2 x, fp2 y){
//Algorithm 17, p55: Division DW / FP, 10flops bounds: 3.5u²
inline fp2 dw_div_fp(fp2 x, fp y){
+ #pragma clang fp contract(on)
X87_VOLATILE fp th = x.s0 / y;
fp2 pi = fp_times_fp(th, y);
fp2 d = x - pi;
@@ -100,6 +106,7 @@ inline fp2 dw_div_fp(fp2 x, fp y){
//Derived from algorithm 20, p64: Inversion 1/ DW, 22 flops
inline fp2 inv_dw(fp2 y){
+ #pragma clang fp contract(on)
X87_VOLATILE fp th = one/y.s0;
X87_VOLATILE fp rh = fma(-y.s0, th, one);
X87_VOLATILE fp rl = -y.s1 * th;