1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
import os
import pytest
import h5py
import numpy
from silx.io import open
from silx.io import h5link_utils
@pytest.fixture(scope="module")
def hdf5_with_external_data(tmpdir_factory):
tmpdir = tmpdir_factory.mktemp("hdf5_with_external_data")
master = str(tmpdir / "master.h5")
external_h5 = str(tmpdir / "external.h5")
external_raw = str(tmpdir / "external.raw")
data = numpy.array([100, 1000, 10000], numpy.uint16)
tshape = (1,) + data.shape
with h5py.File(master, "w") as fmaster:
dset = fmaster.create_dataset("data", data=data)
fmaster["int"] = h5py.SoftLink("data")
layout = h5py.VirtualLayout(shape=tshape, dtype=data.dtype)
layout[0] = h5py.VirtualSource(".", "data", shape=data.shape)
fmaster.create_virtual_dataset("vds0", layout)
with h5py.File(external_h5, "w") as f:
dset = f.create_dataset("data", data=data)
layout = h5py.VirtualLayout(shape=tshape, dtype=data.dtype)
layout[0] = h5py.VirtualSource(dset)
fmaster.create_virtual_dataset("vds1", layout)
layout = h5py.VirtualLayout(shape=tshape, dtype=data.dtype)
layout[0] = h5py.VirtualSource(
external_h5,
"data",
shape=data.shape,
)
fmaster.create_virtual_dataset("vds2", layout)
fmaster["ext1"] = h5py.ExternalLink(external_h5, "data")
layout = h5py.VirtualLayout(shape=tshape, dtype=data.dtype)
layout[0] = h5py.VirtualSource(
"external.h5",
"data",
shape=data.shape,
)
fmaster.create_virtual_dataset("vds3", layout)
fmaster["ext2"] = h5py.ExternalLink("external.h5", "data")
layout = h5py.VirtualLayout(shape=tshape, dtype=data.dtype)
layout[0] = h5py.VirtualSource(
"./external.h5",
"data",
shape=data.shape,
)
fmaster.create_virtual_dataset("vds4", layout)
fmaster["ext3"] = h5py.ExternalLink("./external.h5", "data")
data.tofile(external_raw)
external = [(external_raw, 0, 16 * 3)]
fmaster.create_dataset(
"raw1", external=external, shape=tshape, dtype=data.dtype
)
external = [("external.raw", 0, 16 * 3)]
fmaster.create_dataset(
"raw2", external=external, shape=tshape, dtype=data.dtype
)
external = [("./external.raw", 0, 16 * 3)]
fmaster.create_dataset(
"raw3", external=external, shape=tshape, dtype=data.dtype
)
# Validate links
expected = data.tolist()
cwd = os.getcwd()
with h5py.File(master, "r") as master:
for name in master:
if name in ("raw2", "raw3"):
os.chdir(str(tmpdir))
try:
data = master[name][()].flatten().tolist()
except Exception:
assert False, name
finally:
if name in ("raw2", "raw3"):
os.chdir(cwd)
assert data == expected, name
return tmpdir
@pytest.mark.skipif("VirtualLayout" not in dir(h5py), reason="h5py is too old")
def test_external_dataset_info(hdf5_with_external_data):
tmpdir = hdf5_with_external_data
master = str(tmpdir / "master.h5")
external_h5 = str(tmpdir / "external.h5")
external_raw = str(tmpdir / "external.raw")
with open(master) as f:
for name in f:
hdf5obj = f[name]
info = h5link_utils.external_dataset_info(hdf5obj)
if name in ("data", "int", "ext1", "ext2", "ext3"):
assert info is None, name
elif name == "vds0":
assert info.first_source_url == f"{master}::/data"
elif name in ("vds1", "vds2", "vds3", "vds4"):
assert info.first_source_url == f"{external_h5}::/data"
elif name in ("raw1", "raw2", "raw3"):
assert info.first_source_url == external_raw
else:
assert False, name
|