summaryrefslogtreecommitdiff
path: root/src/silx/io/h5link_utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/silx/io/h5link_utils.py')
-rw-r--r--src/silx/io/h5link_utils.py77
1 files changed, 77 insertions, 0 deletions
diff --git a/src/silx/io/h5link_utils.py b/src/silx/io/h5link_utils.py
new file mode 100644
index 0000000..39f9ae4
--- /dev/null
+++ b/src/silx/io/h5link_utils.py
@@ -0,0 +1,77 @@
+import os
+from typing import NamedTuple, Optional
+from .utils import is_dataset
+
+
+class ExternalDatasetInfo(NamedTuple):
+ type: str
+ nfiles: int
+ first_file_path: str
+ first_data_path: Optional[str] = None
+
+ @property
+ def first_source_url(self):
+ if self.first_data_path:
+ if self.first_data_path.startswith("/"):
+ return self.first_file_path + "::" + self.first_data_path
+ else:
+ return self.first_file_path + "::/" + self.first_data_path
+ return self.first_file_path
+
+
+def external_dataset_info(hdf5obj) -> Optional[ExternalDatasetInfo]:
+ """When the object is a virtual dataset or an external dataset,
+ return information on the external files. Return `None` otherwise.
+
+ Note that this has nothing to do with external HDF5 links."""
+ if not is_dataset(hdf5obj):
+ return
+ if hasattr(hdf5obj, "is_virtual") and hdf5obj.is_virtual:
+ sources = hdf5obj.virtual_sources()
+ if not sources:
+ return ExternalDatasetInfo(
+ type="Virtual",
+ nfiles=0,
+ first_file_path="",
+ )
+
+ first_source = sources[0]
+ first_file_path = first_source.file_name
+ if first_file_path == ".":
+ first_file_path = hdf5obj.file.filename
+ elif not os.path.isabs(first_file_path):
+ dirname = os.path.dirname(hdf5obj.file.filename)
+ first_file_path = os.path.normpath(
+ os.path.join(
+ dirname,
+ first_file_path,
+ )
+ )
+
+ return ExternalDatasetInfo(
+ type="Virtual",
+ nfiles=len(sources),
+ first_file_path=first_file_path,
+ first_data_path=first_source.dset_name,
+ )
+ if hasattr(hdf5obj, "external"):
+ sources = hdf5obj.external
+ if not sources:
+ return
+
+ first_source = sources[0]
+ first_file_path = first_source[0]
+ if not os.path.isabs(first_file_path):
+ dirname = os.path.dirname(hdf5obj.file.filename)
+ first_file_path = os.path.normpath(
+ os.path.join(
+ dirname,
+ first_file_path,
+ )
+ )
+
+ return ExternalDatasetInfo(
+ type="Raw",
+ nfiles=len(sources),
+ first_file_path=first_file_path,
+ )