Volumes#
Context#
the aim of classes dedicated to volumes is to have a common way of saving and loading volumes through the tomotools suite. From it we intend to simplify volume handling for creating them but also to do some post processing operation on them such as stitching but also ease handling from gui.*
overview#
Volumes contains two main parts:
data: the volume itself. It is expected to be a 3D numpy arraymetadata: metadata associated to the volume such as information regarding the reconstruction parametersโฆ
For now the following volumes exists:
HDF5Volume: save data and metadata to an hdf5 file.
EDFVolume: save data to single frame EDF files and metadata to a text file
JP2KVolume: save data to single frame jp2k files and metadata to a text file
TIFFVolume: save data to single frame tiff files and metadata to a text file
MultiTIFFVolume: save data to a single 3D tiff file and metadata to a text file
Volume API#
[1]:
from tomoscan.esrf import HDF5Volume
from tomoscan.esrf import EDFVolume
from tomoscan.esrf import TIFFVolume, MultiTIFFVolume, has_tifffile
from tomoscan.esrf import JP2KVolume, has_glymur
import numpy
from silx.io.url import DataUrl
from tempfile import TemporaryDirectory
import os
from h5glance import H5Glance
create some dataset to test#
[2]:
from tomoscan.esrf.volume.mock import create_volume
data = create_volume(frame_dims=(100, 100), z_size=5)
metadata = {
"reconstruction_params": {
"dataset": {
"location": "toto.hdf5",
"entry": "entry0000",
},
"phase": {
"method": "None",
"padding_type": "edge",
},
},
"processing_option": {
"build_sino": {
"axis_correction": "None",
"enable_halftomo": True,
},
"flatfield": {
"binning": "11",
"do_flat_distortion": False,
},
},
}
[3]:
# %pylab
[4]:
# imshow(data[0])
HDF5Volume#
constructor#
Here we will only focus on providing a file_path and a data_path to the constructor. From it it will deduce default data and metadata data_path.
This way allow us to get a identifier that can be reused. But users can also provide directly an url for data and one for the metadata. But this is for advance usage and it will not be detailled here.
[5]:
# users can define the volume will all the information
volume = HDF5Volume(
file_path="test_volume.hdf5",
data_path="entry0000/reconstruction",
data=data,
metadata=metadata,
)
# or define them from properties
volume = HDF5Volume()
volume.file_path = "test_volume.hdf5"
volume.data_path = "entry0000/reconstruction"
volume.data = data # data must be None or 3D numpy array
volume.metadata = metadata # metadata must be None or an instance of dict
[6]:
# data can be access from the data property
# imshow(volume.data[0])
# metadata from the metadata property.
volume.metadata
[6]:
{'reconstruction_params': {'dataset': {'location': 'toto.hdf5',
'entry': 'entry0000'},
'phase': {'method': 'None', 'padding_type': 'edge'}},
'processing_option': {'build_sino': {'axis_correction': 'None',
'enable_halftomo': True},
'flatfield': {'binning': '11', 'do_flat_distortion': False}}}
saving#
[7]:
# save the volume
volume.save()
# display the contents of the volume file
H5Glance("test_volume.hdf5")
WARNING:tomoscan.DEPRECATION:parameter url is deprecated since tomoscan version 2.3. Reason: about to be removed.
File "/home/docs/checkouts/readthedocs.org/user_builds/tomoscan-esrf/envs/v2.3.5/lib/python3.12/site-packages/tomoscan/utils/io.py", line 123, in _deprecate_url_in_signature
deprecated_warning(
[7]:
- data [๐]: 5 ร 100 ร 100 entries, dtype: float64
loadding#
once the data is saved on disk then we can retrieve it from the same class
[8]:
volume_loaded = HDF5Volume(
file_path="test_volume.hdf5", data_path="entry0000/reconstruction"
)
volume_loaded.load()
# imshow(volume_loaded.data[0])
print(volume_loaded.metadata)
{'processing_option': {'build_sino': {'axis_correction': 'None', 'enable_halftomo': np.True_}, 'flatfield': {'binning': '11', 'do_flat_distortion': np.False_}}, 'reconstruction_params': {'dataset': {'entry': 'entry0000', 'location': 'toto.hdf5'}, 'phase': {'method': 'None', 'padding_type': 'edge'}}}
To avoid heavy memory consumption the data and metadata will not be loaded automatically. In order to purge cache you can also call volume.clear_cache() or set data and metadata to None like:
volume.data = None
volume.metadata = None
Note: if data or metadata is None call to saving function can raise ValueError exception.
identifier#
Each subclass of VolumeBase define what we call an identifier. The goal is that from this identifier a user can retrieve a Volume. An identifier can be save as an instance of Identifier or as it string representation. An identifier looks like:
scheme:tomo_type:data_path@data_file
For now:
scheme can be
hdf5,edf,jp2k,tiffandtiff_3dtomo_type can be
scanorvolume
Note: This is pretty convienient for gui for example or to โcopy/pasteโ a reference to a volume from one application to another.
get identifier#
[9]:
identifier = volume_loaded.get_identifier()
print(identifier, type(identifier))
print(identifier.to_str(), type(identifier.to_str()))
hdf5:volume:/home/docs/checkouts/readthedocs.org/user_builds/tomoscan-esrf/checkouts/v2.3.5/doc/tutorials/test_volume.hdf5?path=entry0000/reconstruction <class 'tomoscan.esrf.identifier.hdf5Identifier.HDF5VolumeIdentifier'>
hdf5:volume:/home/docs/checkouts/readthedocs.org/user_builds/tomoscan-esrf/checkouts/v2.3.5/doc/tutorials/test_volume.hdf5?path=entry0000/reconstruction <class 'str'>
use identifier to retrieve a volume#
[10]:
from tomoscan.factory import Factory
retrieve_volume = Factory.create_tomo_object_from_identifier(identifier=identifier)
assert isinstance(retrieve_volume, HDF5Volume)
retrieve_volume = Factory.create_tomo_object_from_identifier(
identifier=identifier.to_str()
)
assert isinstance(retrieve_volume, HDF5Volume)
[11]:
# clean workspace
if os.path.exists("test_volume.hdf5"):
os.remove("test_volume.hdf5")
EDFVolume#
constructor#
EDFVolume will store each frame as folder_prefix_index.edf and metadata to folder_prefix_infos.txt.
So the default constructor only require a path to a folder where to save the data.
This folder path will also be the identifier
[12]:
volume = EDFVolume(folder="edf_volume", data=data, metadata=metadata)
or as for the HDF5Volume you can provide data and metadata once the Volume has been defined
[13]:
volume = EDFVolume(folder="edf_volume")
volume.data = data
volume.metadata = metadata
access to data and metadata is the same for any instance of VolumeBase, from data and metadata properties
[14]:
# imshow(volume.data[0])
print(volume.metadata)
{'reconstruction_params': {'dataset': {'location': 'toto.hdf5', 'entry': 'entry0000'}, 'phase': {'method': 'None', 'padding_type': 'edge'}}, 'processing_option': {'build_sino': {'axis_correction': 'None', 'enable_halftomo': True}, 'flatfield': {'binning': '11', 'do_flat_distortion': False}}}
saving#
[15]:
# save the volume
volume.save()
# if you want to overwrite existing data you can set the overwrite property
volume.overwrite = True
# display the contents of the folder
print(os.listdir("edf_volume"))
['edf_volume_000001.edf', 'edf_volume_infos.txt', 'edf_volume_000004.edf', 'edf_volume_000000.edf', 'edf_volume_000002.edf', 'edf_volume_000003.edf']
[16]:
# open one file containing a frame
import fabio
file_frame_0 = os.path.join("edf_volume", "edf_volume_000000.edf")
# imshow(fabio.open(file_frame_0).data)
[17]:
# display contents of the infos.txt file
metadata_file = os.path.join("edf_volume", "edf_volume_infos.txt")
with open(metadata_file, "r") as of:
print("".join(of.readlines()))
[reconstruction_params]
[reconstruction_params.dataset]
location = toto.hdf5
entry = entry0000
[reconstruction_params.phase]
method = \None
padding_type = edge
[processing_option]
[processing_option.build_sino]
axis_correction = \None
enable_halftomo = True
[processing_option.flatfield]
binning = \11
do_flat_distortion = False
loading#
[18]:
volume_loaded = EDFVolume(folder="edf_volume")
volume_loaded.load()
# imshow(volume_loaded.data[1])
print(volume_loaded.metadata)
{'reconstruction_params': {'dataset': {'location': 'toto.hdf5', 'entry': 'entry0000'}, 'phase': {'method': 'None', 'padding_type': 'edge'}}, 'processing_option': {'build_sino': {'axis_correction': 'None', 'enable_halftomo': True}, 'flatfield': {'binning': '11', 'do_flat_distortion': False}}}
identifier#
get_identifier#
[19]:
identifier = volume_loaded.get_identifier()
print(identifier, type(identifier))
print(identifier.to_str(), type(identifier.to_str()))
edf:volume:/home/docs/checkouts/readthedocs.org/user_builds/tomoscan-esrf/checkouts/v2.3.5/doc/tutorials/edf_volume <class 'tomoscan.esrf.identifier.edfidentifier.EDFVolumeIdentifier'>
edf:volume:/home/docs/checkouts/readthedocs.org/user_builds/tomoscan-esrf/checkouts/v2.3.5/doc/tutorials/edf_volume <class 'str'>
use identifier to retrieve a volume#
[20]:
from tomoscan.factory import Factory
retrieve_volume = Factory.create_tomo_object_from_identifier(identifier=identifier)
assert isinstance(retrieve_volume, EDFVolume)
retrieve_volume = Factory.create_tomo_object_from_identifier(
identifier=identifier.to_str()
)
assert isinstance(retrieve_volume, EDFVolume)
[21]:
# clean
import shutil
if os.path.exists("edf_volume"):
shutil.rmtree("edf_volume")
JP2KVolume#
The API of JP2KVolume is the same as EDFVolume. So for tutorial please have a look at this one.
It requires glymur to be install
TIFFVolume#
The API of TIFFVolume is the same as EDFVolume. So for tutorial please have a look at this one.
It requires tifffile to be install
MultiTIFFVolume#
The MultiTIFFVolume requires tifffile to be install
constructor#
[22]:
volume = MultiTIFFVolume(file_path="multitiff_file.tiff", data=data, metadata=metadata)
[23]:
# access data and metadata
# imshow(volume.data[0])
print(volume.metadata)
{'reconstruction_params': {'dataset': {'location': 'toto.hdf5', 'entry': 'entry0000'}, 'phase': {'method': 'None', 'padding_type': 'edge'}}, 'processing_option': {'build_sino': {'axis_correction': 'None', 'enable_halftomo': True}, 'flatfield': {'binning': '11', 'do_flat_distortion': False}}}
saving#
[24]:
from tomoscan.esrf.volume.tiffvolume import has_tifffile
if has_tifffile:
volume.save()
else:
print("tifffile must be installed to use MultiTIFFVolume")
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[24], line 4
1 from tomoscan.esrf.volume.tiffvolume import has_tifffile
3 if has_tifffile:
----> 4 volume.save()
5 else:
6 print("tifffile must be installed to use MultiTIFFVolume")
File ~/checkouts/readthedocs.org/user_builds/tomoscan-esrf/envs/v2.3.5/lib/python3.12/site-packages/tomoscan/volumebase.py:464, in VolumeBase.save(self, url, **kwargs)
462 # catch warning from using data_url until 'url' removed from the signature. See https://gitlab.esrf.fr/tomotools/tomwer/-/issues/1488
463 with catch_log_messages():
--> 464 self.save_data(data_url, **kwargs)
465 if self.metadata is not None:
466 # a volume is not force to have metadata to save. But calling save_metadata direclty might raise an error
467 # if no metadata found
468 self.save_metadata(metadata_url)
File ~/checkouts/readthedocs.org/user_builds/tomoscan-esrf/envs/v2.3.5/lib/python3.12/site-packages/tomoscan/esrf/volume/tiffvolume.py:277, in MultiTIFFVolume.save_data(self, url)
273 if url is None:
274 raise ValueError(
275 "Cannot get data_url. An url should be provided. Don't know where to save this."
276 )
--> 277 os.makedirs(os.path.dirname(url.file_path()), exist_ok=True)
279 if url.scheme() == "tifffile":
280 if url.data_path() is not None:
File <frozen os>:225, in makedirs(name, mode, exist_ok)
FileNotFoundError: [Errno 2] No such file or directory: ''
loading#
[25]:
if has_tifffile:
volume_loaded = MultiTIFFVolume(file_path="multitiff_file.tiff")
assert volume_loaded.data is None
assert volume_loaded.metadata is None
volume_loaded.load()
# imshow(volume_loaded.data[0])
print(volume_loaded.metadata)
else:
print("tifffile must be installed to use MultiTIFFVolume")
WARNING:tomoscan.esrf.volume.tiffvolume:unable to load metadata from multitiff_file_infos.txt
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
/tmp/ipykernel_1189/614007523.py in ?()
1 if has_tifffile:
2 volume_loaded = MultiTIFFVolume(file_path="multitiff_file.tiff")
3 assert volume_loaded.data is None
4 assert volume_loaded.metadata is None
----> 5 volume_loaded.load()
6 # imshow(volume_loaded.data[0])
7 print(volume_loaded.metadata)
8 else:
~/checkouts/readthedocs.org/user_builds/tomoscan-esrf/envs/v2.3.5/lib/python3.12/site-packages/tomoscan/volumebase.py in ?(self)
441 def load(self):
442 self.load_metadata(store=True)
443 # always load metadata first because we might expect to get some information from
444 # it in order to load data next
--> 445 self.load_data(store=True)
~/checkouts/readthedocs.org/user_builds/tomoscan-esrf/envs/v2.3.5/lib/python3.12/site-packages/tomoscan/esrf/volume/tiffvolume.py in ?(self, url, store)
382 raise ValueError(
383 "Cannot get data_url. An url should be provided. Don't know where to save this."
384 )
385
--> 386 data = numpy.asarray([slice for slice in self.browse_slices(url=url)])
387
388 if store:
389 self.data = data
~/checkouts/readthedocs.org/user_builds/tomoscan-esrf/envs/v2.3.5/lib/python3.12/site-packages/tomoscan/esrf/volume/tiffvolume.py in ?(self, url)
482 if url.data_path() is not None:
483 raise ValueError("data_path is not handle by multiframe tiff")
484
485 url = url or self.data_url
--> 486 reader = TiffFile(url.file_path())
487 for series in reader.series:
488 data = series.asarray()
489 if data.ndim == 3:
~/checkouts/readthedocs.org/user_builds/tomoscan-esrf/envs/v2.3.5/lib/python3.12/site-packages/tifffile/tifffile.py in ?(self, file, mode, name, offset, size, omexml, superres, _multifile, _useframes, _parent, **is_flags)
4292 raise ValueError(msg)
4293 self._omexml = omexml
4294 self.is_ome = True
4295
-> 4296 fh = FileHandle(file, mode=mode, name=name, offset=offset, size=size)
4297 self._fh = fh
4298 self._multifile = True if _multifile is None else bool(_multifile)
4299 self._files = {fh.name: self}
~/checkouts/readthedocs.org/user_builds/tomoscan-esrf/envs/v2.3.5/lib/python3.12/site-packages/tifffile/tifffile.py in ?(self, file, mode, name, offset, size)
13595 self._offset = -1 if offset is None else offset
13596 self._size = -1 if size is None else size
13597 self._close = True
13598 self._lock = NullContext()
> 13599 self.open()
13600 assert self._fh is not None
~/checkouts/readthedocs.org/user_builds/tomoscan-esrf/envs/v2.3.5/lib/python3.12/site-packages/tifffile/tifffile.py in ?(self)
13615 msg = f'invalid mode {self._mode}'
13616 raise ValueError(msg)
13617 self._file = os.path.realpath(self._file)
13618 self._dir, self._name = os.path.split(self._file)
> 13619 self._fh = open( # noqa: SIM115
13620 self._file, self._mode, encoding=None
13621 )
13622 self._close = True
FileNotFoundError: [Errno 2] No such file or directory: '/home/docs/checkouts/readthedocs.org/user_builds/tomoscan-esrf/checkouts/v2.3.5/doc/tutorials/multitiff_file.tiff'
identifier#
[26]:
if has_tifffile:
identifier = volume_loaded.get_identifier()
print(identifier, type(identifier))
print(identifier.to_str(), type(identifier.to_str()))
tiff3d:volume:/home/docs/checkouts/readthedocs.org/user_builds/tomoscan-esrf/checkouts/v2.3.5/doc/tutorials/multitiff_file.tiff <class 'tomoscan.esrf.identifier.tiffidentifier.MultiTiffVolumeIdentifier'>
tiff3d:volume:/home/docs/checkouts/readthedocs.org/user_builds/tomoscan-esrf/checkouts/v2.3.5/doc/tutorials/multitiff_file.tiff <class 'str'>
[27]:
from tomoscan.factory import Factory
if has_tifffile:
retrieve_volume = Factory.create_tomo_object_from_identifier(identifier=identifier)
assert isinstance(retrieve_volume, MultiTIFFVolume)
retrieve_volume = Factory.create_tomo_object_from_identifier(
identifier=identifier.to_str()
)
assert isinstance(retrieve_volume, MultiTIFFVolume)
[28]:
# clean
if os.path.exists("multitiff_file.tiff"):
os.remove("multitiff_file.tiff")
if os.path.exists(retrieve_volume.metadata_url.file_path()):
os.remove(retrieve_volume.metadata_url.file_path())