Source code for algotom.io.converter

# ===========================================================================
# ===========================================================================
# Copyright (c) 2021 Nghia T. Vo. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===========================================================================
# Author: Nghia T. Vo
# E-mail:  
# Description: Python module for converting data format.
# Contributors:
# ===========================================================================

"""
Module for converting data type:

    -   Convert a list of tif files to a hdf/nxs file.
    -   Extract tif images from a hdf/nxs file.
    -   Emulate an HDF5-like interface for TIF files in a folder.
"""

import os
import glob
import numpy as np
from PIL import Image
from joblib import Parallel, delayed
import algotom.io.loadersaver as losa


[docs]def convert_tif_to_hdf(input_path, output_path, key_path="entry/data", crop=(0, 0, 0, 0), pattern=None, **options): """ Convert a folder of tif files to a hdf/nxs file. Parameters ---------- input_path : str Folder path to the tif files. output_path : str Path to the hdf/nxs file. key_path : str, optional Key path to the dataset. crop : tuple of int, optional Crop the images from the edges, i.e. crop = (crop_top, crop_bottom, crop_left, crop_right). pattern : str, optional Used to find tif files with names matching the pattern. options : dict, optional Add metadata. E.g options={"entry/angles": angles, "entry/energy": 53}. Returns ------- str Path to the hdf/nxs file. """ if pattern is None: list_file = losa.find_file(input_path + "/*.tif*") else: list_file = losa.find_file(input_path + "/*" + pattern + "*.tif*") depth = len(list_file) (height, width) = np.shape(losa.load_image(list_file[0])) file_base, file_ext = os.path.splitext(output_path) if not (file_ext == '.hdf' or file_ext == '.h5' or file_ext == ".nxs"): file_ext = '.hdf' output_path = file_base + file_ext cr_top, cr_bottom, cr_left, cr_right = crop cr_height = height - cr_top - cr_bottom cr_width = width - cr_left - cr_right if cr_height < 1 or cr_width < 1: raise ValueError("Can't crop images with the given parameters !!!") data_out = losa.open_hdf_stream(output_path, (depth, cr_height, cr_width), key_path=key_path, overwrite=True, **options) for i, fname in enumerate(list_file): data_out[i] = losa.load_image(fname)[cr_top:cr_height + cr_top, cr_left:cr_width + cr_left] return output_path
[docs]def extract_tif_from_hdf(input_path, output_path, key_path, index=(0, -1, 1), axis=0, crop=(0, 0, 0, 0), prefix="img"): """ Extract tif images from a hdf/nxs file. Parameters ---------- input_path : str Path to the hdf/nxs file. output_path : str Output folder. key_path : str Key path to the dataset in the hdf/nxs file. index : tuple of int or int. Indices of extracted images. A tuple corresponds to (start,stop,step). axis : int Axis which the images are extracted. crop : tuple of int, optional Crop the images from the edges, i.e. crop = (crop_top, crop_bottom, crop_left, crop_right). prefix : str, optional Prefix of names of tif files. Returns ------- str Folder path to the tif files. """ data = losa.load_hdf(input_path, key_path) (depth, height, width) = data.shape if isinstance(index, tuple): start, stop, step = index else: start, stop, step = index, index + 1, 1 cr_top, cr_bottom, cr_left, cr_right = crop if axis == 1: if stop < 1 or stop > height: stop = height start = np.clip(start, 0, stop - 1) for i in range(start, stop, step): mat = data[cr_top:depth - cr_bottom, i, cr_left:width - cr_right] out_name = "0000" + str(i) losa.save_image( output_path + "/" + prefix + "_" + out_name[-5:] + ".tif", mat) elif axis == 2: if stop < 1 or stop > width: stop = width start = np.clip(start, 0, stop - 1) for i in range(start, stop, step): mat = data[cr_top:depth - cr_bottom, cr_left:height - cr_right, i] out_name = "0000" + str(i) losa.save_image( output_path + "/" + prefix + "_" + out_name[-5:] + ".tif", mat) else: if stop < 1 or stop > depth: stop = depth start = np.clip(start, 0, stop - 1) for i in range(start, stop, step): mat = data[i, cr_top:height - cr_bottom, cr_left:width - cr_right] out_name = "0000" + str(i) losa.save_image( output_path + "/" + prefix + "_" + out_name[-5:] + ".tif", mat) return output_path
[docs]class HdfEmulatorFromTif: """ Emulate an HDF5-like interface for TIF files in a folder, allowing indexed and sliced data access. Parameters ---------- folder_path : str Path to the folder containing TIFF files. ncore : int, optional Number of cores to use for parallel processing. The default is 1 (sequential processing). Examples -------- >>> hdf_emulator = HdfEmulatorFromTif('/path/to/tif/files', ncore=4) >>> print(hdf_emulator.shape) >>> last_image = hdf_emulator[-1] >>> image_stack = hdf_emulator[:, 0:4, :] """ def __init__(self, folder_path, ncore=1): files = glob.glob(folder_path + "/*tif*") if len(files) == 0: files = glob.glob(folder_path + "/*TIF*") if len(files) == 0: raise ValueError(f"!!! No tif files found in: {folder_path}") else: for i in range(len(files)): files[i] = files[i].replace("\\", "/") self.files = sorted(files) self.n_jobs = ncore self._shape, self._dtype = self._get_shape_and_dtype() def _get_shape_and_dtype(self): img = np.asarray(Image.open(self.files[0])) shape = (len(self.files), *img.shape) dtype = img.dtype return shape, dtype def __getitem__(self, index): if isinstance(index, int): return self._load_image(self.files[index]) elif isinstance(index, slice): indices = range(*index.indices(len(self.files))) return np.stack(Parallel(n_jobs=self.n_jobs)( delayed(self._load_image)(self.files[i]) for i in indices)) elif isinstance(index, tuple): z, y, x = index if isinstance(z, slice): images = Parallel(n_jobs=self.n_jobs)( delayed(self._load_image)(self.files[i]) for i in range(*z.indices(self.shape[0]))) return np.stack([img[y, x] for img in images]) else: return self._load_image(self.files[z])[y, x] else: raise TypeError("Invalid index type") def _load_image(self, file_path): return np.array(Image.open(file_path)) @property def shape(self): return self._shape @property def dtype(self): return self._dtype def __len__(self): return len(self.files)