summaryrefslogtreecommitdiff
path: root/src/silx/io/test/test_h5link_utils.py
blob: 41400036d636086a53ee35d541302c492af68002 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import pytest
import h5py
import numpy
from silx.io import open
from silx.io import h5link_utils


@pytest.fixture(scope="module")
def hdf5_with_external_data(tmpdir_factory):
    tmpdir = tmpdir_factory.mktemp("hdf5_with_external_data")
    master = str(tmpdir / "master.h5")
    external_h5 = str(tmpdir / "external.h5")
    external_raw = str(tmpdir / "external.raw")

    data = numpy.array([100, 1000, 10000], numpy.uint16)
    tshape = (1,) + data.shape

    with h5py.File(master, "w") as fmaster:
        dset = fmaster.create_dataset("data", data=data)

        fmaster["int"] = h5py.SoftLink("data")

        layout = h5py.VirtualLayout(shape=tshape, dtype=data.dtype)
        layout[0] = h5py.VirtualSource(".", "data", shape=data.shape)
        fmaster.create_virtual_dataset("vds0", layout)

        with h5py.File(external_h5, "w") as f:
            dset = f.create_dataset("data", data=data)
            layout = h5py.VirtualLayout(shape=tshape, dtype=data.dtype)
            layout[0] = h5py.VirtualSource(dset)
            fmaster.create_virtual_dataset("vds1", layout)

            layout = h5py.VirtualLayout(shape=tshape, dtype=data.dtype)
            layout[0] = h5py.VirtualSource(
                external_h5,
                "data",
                shape=data.shape,
            )
            fmaster.create_virtual_dataset("vds2", layout)
            fmaster["ext1"] = h5py.ExternalLink(external_h5, "data")

            layout = h5py.VirtualLayout(shape=tshape, dtype=data.dtype)
            layout[0] = h5py.VirtualSource(
                "external.h5",
                "data",
                shape=data.shape,
            )
            fmaster.create_virtual_dataset("vds3", layout)
            fmaster["ext2"] = h5py.ExternalLink("external.h5", "data")

            layout = h5py.VirtualLayout(shape=tshape, dtype=data.dtype)
            layout[0] = h5py.VirtualSource(
                "./external.h5",
                "data",
                shape=data.shape,
            )
            fmaster.create_virtual_dataset("vds4", layout)
            fmaster["ext3"] = h5py.ExternalLink("./external.h5", "data")

        data.tofile(external_raw)

        external = [(external_raw, 0, 16 * 3)]
        fmaster.create_dataset(
            "raw1", external=external, shape=tshape, dtype=data.dtype
        )

        external = [("external.raw", 0, 16 * 3)]
        fmaster.create_dataset(
            "raw2", external=external, shape=tshape, dtype=data.dtype
        )

        external = [("./external.raw", 0, 16 * 3)]
        fmaster.create_dataset(
            "raw3", external=external, shape=tshape, dtype=data.dtype
        )

    # Validate links
    expected = data.tolist()
    cwd = os.getcwd()
    with h5py.File(master, "r") as master:
        for name in master:
            if name in ("raw2", "raw3"):
                os.chdir(str(tmpdir))
            try:
                data = master[name][()].flatten().tolist()
            except Exception:
                assert False, name
            finally:
                if name in ("raw2", "raw3"):
                    os.chdir(cwd)
            assert data == expected, name

    return tmpdir


@pytest.mark.skipif("VirtualLayout" not in dir(h5py), reason="h5py is too old")
def test_external_dataset_info(hdf5_with_external_data):
    tmpdir = hdf5_with_external_data
    master = str(tmpdir / "master.h5")
    external_h5 = str(tmpdir / "external.h5")
    external_raw = str(tmpdir / "external.raw")
    with open(master) as f:
        for name in f:
            hdf5obj = f[name]
            info = h5link_utils.external_dataset_info(hdf5obj)
            if name in ("data", "int", "ext1", "ext2", "ext3"):
                assert info is None, name
            elif name == "vds0":
                assert info.first_source_url == f"{master}::/data"
            elif name in ("vds1", "vds2", "vds3", "vds4"):
                assert info.first_source_url == f"{external_h5}::/data"
            elif name in ("raw1", "raw2", "raw3"):
                assert info.first_source_url == external_raw
            else:
                assert False, name