Source code for algotom.io.loadersaver

# ============================================================================
# ============================================================================
# Copyright (c) 2021 Nghia T. Vo. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
# Author: Nghia T. Vo
# E-mail:  
# Description: Python module for loading and saving data.
# Contributors:
# ============================================================================

"""
Module for I/O tasks:

    -   Load data from an image file (tif, png, jpeg) or a hdf/nxs file.
    -   Get information from a hdf/nxs file.
    -   Search for datasets in a hdf/nxs file.
    -   Save a 2D array as a tif image or 2D, 3D array to a hdf/nxs file.
    -   Get file names, make file/folder name.
    -   Load distortion coefficients from a txt file.
    -   Get the tree view of a hdf/nxs file.
    -   Functions for loading stacks of images from multiple datasets, e.g. to
        be used by speckle-based phase contrast tomography.
"""

import os
import warnings
import platform
from pathlib import Path
from fnmatch import fnmatchcase
import multiprocessing as mp
from joblib import Parallel, delayed
from collections import OrderedDict, deque
import h5py
import numpy as np
from PIL import Image


PIPE = "│"
ELBOW = "└──"
TEE = "├──"
PIPE_PREFIX = "│   "
SPACE_PREFIX = "    "


def __correct_path(file_path):
    """
    Correct escaped sequences in WinOS file path.
    """
    if isinstance(file_path, Path):
        file_path = str(file_path)
    escape_sequences = {
        '\a': r'\a',
        '\b': r'\b',
        '\f': r'\f',
        '\n': r'\n',
        '\r': r'\r',
        '\t': r'\t',
        '\v': r'\v',
    }
    for char, escaped in escape_sequences.items():
        if char in file_path:
            file_path = file_path.replace(char, escaped)
    file_path = file_path.replace('\\', '/')
    return Path(file_path)


def __get_path(file_path, check_exist=True):
    """
    Get/check a file path
    """
    if platform.system() == 'Windows':
        file_path = __correct_path(file_path)
    else:
        file_path = Path(file_path)
    if check_exist:
        if not file_path.exists():
            raise ValueError(f"No such file: {file_path}")
    return file_path


[docs]def load_image(file_path):
    """
    Load an image and convert it to a 2D array

    Parameters
    ----------
    file_path : str
        Path to the file.

    Returns
    -------
    array_like
        2D array.
    """
    try:
        mat = np.array(Image.open(__get_path(file_path)), dtype=np.float32)
    except Exception as error:
        raise ValueError(error)
    if len(mat.shape) > 2:
        axis_m = np.argmin(mat.shape)
        mat = np.mean(mat, axis=axis_m)
    return mat


[docs]def get_hdf_information(file_path, display=False):
    """
    Get information of datasets in a hdf/nxs file.

    Parameters
    ----------
    file_path : str
        Path to the file.
    display : bool
        Print the results onto the screen if True.

    Returns
    -------
    list_key : str
        Keys to the datasets.
    list_shape : tuple of int
        Shapes of the datasets.
    list_type : str
        Types of the datasets.
    """
    hdf_object = h5py.File(__get_path(file_path), 'r')
    keys = []
    hdf_object.visit(keys.append)
    list_key, list_shape, list_type = [], [], []
    for key in keys:
        try:
            data = hdf_object[key]
            if isinstance(data, h5py.Group):
                list_tmp = list(data.items())
                if list_tmp:
                    for key2, _ in list_tmp:
                        list_key.append(key + "/" + key2)
                else:
                    list_key.append(key)
            else:
                list_key.append(data.name)
        except KeyError:
            list_key.append(key)
            pass
    for i, key in enumerate(list_key):
        shape, dtype = None, None
        try:
            data = hdf_object[list_key[i]]
            if isinstance(data, h5py.Dataset):
                shape, dtype = data.shape, data.dtype
            list_shape.append(shape)
            list_type.append(dtype)
        except KeyError:
            list_shape.append(shape)
            list_type.append(dtype)
            pass
    hdf_object.close()
    if display:
        if list_key:
            for i, key in enumerate(list_key):
                print(key + " : " + str(list_shape[i]) + " : " + str(
                    list_type[i]))
        else:
            print("Empty file !!!")
    return list_key, list_shape, list_type


[docs]def find_hdf_key(file_path, pattern, display=False):
    """
    Find datasets matching the name-pattern in a hdf/nxs file.

    Parameters
    ----------
    file_path : str
        Path to the file.
    pattern : str
        Pattern to find the full names of the datasets.
    display : bool
        Print the results onto the screen if True.

    Returns
    -------
    list_key : str
        Keys to the datasets.
    list_shape : tuple of int
        Shapes of the datasets.
    list_type : str
        Types of the datasets.
    """
    hdf_object = h5py.File(__get_path(file_path), 'r')
    list_key, keys = [], []
    hdf_object.visit(keys.append)
    for key in keys:
        try:
            data = hdf_object[key]
            if isinstance(data, h5py.Group):
                list_tmp = list(data.items())
                if list_tmp:
                    for key2, _ in list_tmp:
                        list_key.append(key + "/" + key2)
                else:
                    list_key.append(key)
            else:
                list_key.append(data.name)
        except KeyError:
            pass
    list_dkey, list_dshape, list_dtype = [], [], []
    for _, key in enumerate(list_key):
        if pattern in key:
            list_dkey.append(key)
            shape, dtype = None, None
            try:
                data = hdf_object[key]
                if isinstance(data, h5py.Dataset):
                    shape, dtype = data.shape, data.dtype
                list_dtype.append(dtype)
                list_dshape.append(shape)
            except KeyError:
                list_dtype.append(dtype)
                list_dshape.append(shape)
                pass
    hdf_object.close()
    if display:
        if list_dkey:
            for i, key in enumerate(list_dkey):
                print(key + " : " + str(list_dshape[i]) + " : " + str(
                    list_dtype[i]))
        else:
            print("Can't find datasets with keys matching the "
                  "pattern: {}".format(pattern))
    return list_dkey, list_dshape, list_dtype


[docs]def load_hdf(file_path, key_path, return_file_obj=False):
    """
    Load a hdf/nexus dataset as an object.

    Parameters
    ----------
    file_path : str
        Path to the file.
    key_path : str
        Key path to the dataset.
    return_file_obj : bool, optional

    Returns
    -------
    objects
        hdf-dataset object, and file-object if return_file_obj is True.
    """
    try:
        hdf_object = h5py.File(__get_path(file_path), 'r')
    except Exception as error:
        raise ValueError(f"{error}")
    check = key_path in hdf_object
    if not check:
        raise ValueError(
            "Couldn't open object with the given key: {}".format(key_path))
    if return_file_obj:
        return hdf_object[key_path], hdf_object
    else:
        return hdf_object[key_path]


[docs]def make_folder(file_path):
    """
    Create a folder for saving file if the folder does not exist. This is a
    supplementary function for savers.

    Parameters
    ----------
    file_path : str
        Path to a file.
    """
    path = Path(file_path).resolve()
    if path.suffix:
        folder_path = path.parent
    else:
        folder_path = path
    if not folder_path.exists():
        try:
            folder_path.mkdir(parents=True, exist_ok=True)
        except Exception as e:
            raise ValueError(f"Can't create : {folder_path}. Error: {e}")


[docs]def make_folder_name(folder_path, name_prefix="Output", zero_prefix=5):
    """
    Create a new folder name to avoid overwriting.
    E.g: Output_00001, Output_00002...

    Parameters
    ----------
    folder_path : str
        Path to the parent folder.
    name_prefix : str
        Name prefix
    zero_prefix : int
        Number of zeros to be added to file names.
    Returns
    -------
    str
        Name of the folder.
    """
    folder_path = Path(folder_path)
    scan_name_prefix = name_prefix + "_"
    existing_folders = list(folder_path.glob(f"{scan_name_prefix}*"))
    num_folder_exist = len(existing_folders)
    num_folder_new = num_folder_exist + 1
    name_tmp = f"{num_folder_new:0{zero_prefix}}"
    scan_name = scan_name_prefix + name_tmp
    while (folder_path / scan_name).is_dir():
        num_folder_new += 1
        name_tmp = f"{num_folder_new:0{zero_prefix}}"
        scan_name = scan_name_prefix + name_tmp
    return scan_name


[docs]def make_file_name(file_path):
    """
    Create a new file name to avoid overwriting.

    Parameters
    ----------
    file_path : str or Path object
        Path to the file.

    Returns
    -------
    str
        Updated file path.
    """
    file_path = Path(file_path)
    file_base = file_path.stem
    file_ext = file_path.suffix
    parent_dir = file_path.parent
    if file_path.exists():
        nfile = 0
        while True:
            name_add = f"_{nfile:04d}"
            new_file_name = f"{file_base}{name_add}{file_ext}"
            new_file_path = parent_dir / new_file_name
            if new_file_path.exists():
                nfile += 1
            else:
                file_path = new_file_path
                break
    return str(file_path)


[docs]def find_file(path, case_sensitive=False):
    """
    Search file

    Parameters
    ----------
    path : str
        Path and pattern to find files. e.g. 'C:/data/*.tif*'
    case_sensitive : bool, optional
        If True, pattern matching is case-sensitive. Defaults to False.

    Returns
    -------
    str or list of str
        List of files.
    """
    path = __correct_path(path)
    directory = path.parent
    pattern = path.name
    if case_sensitive:
        file_paths = list(directory.glob(path.name))
    else:
        pattern_lower = pattern.casefold()
        file_paths = [p for p in directory.iterdir()
                      if fnmatchcase(p.name.casefold(), pattern_lower)]
    if not file_paths:
        raise FileNotFoundError(f"No files found matching: {path}")
    return sorted([file.as_posix() for file in file_paths])


[docs]def save_image(file_path, mat, overwrite=True):
    """
    Save a 2D array to an image.

    Parameters
    ----------
    file_path : str
        Path to the file.
    mat : int or float
        2D array.
    overwrite : bool
        Overwrite an existing file if True.

    Returns
    -------
    str
        Updated file path.
    """
    file_path = __get_path(file_path, check_exist=False)
    file_path = file_path.resolve()
    file_ext = file_path.suffix
    if not ((file_ext == ".tif") or (file_ext == ".tiff")):
        mat = np.uint8(
            255.0 * (mat - np.min(mat)) / (np.max(mat) - np.min(mat)))
    else:
        data_type = str(mat.dtype)
        if "complex" in data_type:
            raise ValueError(f"Can't save to tiff with format: {data_type}")
    image = Image.fromarray(mat)
    if not overwrite:
        file_path = make_file_name(file_path)
    make_folder(file_path)
    try:
        image.save(file_path)
    except Exception as error:
        raise ValueError(f"Couldn't write to file: {file_path}. Error {error}")
    return file_path


[docs]def open_hdf_stream(file_path, data_shape, key_path='entry/data',
                    data_type='float32', overwrite=True, **options):
    """
    Write an array to a hdf/nxs file with options to add metadata.

    Parameters
    ----------
    file_path : str
        Path to the file.
    data_shape : tuple of int
        Shape of the data.
    key_path : str
        Key path to the dataset.
    data_type: str
        Type of data.
    overwrite : bool
        Overwrite the existing file if True.
    options : dict, optional
        Add metadata. E.g options={"entry/angles": angles, "entry/energy": 53}.

    Returns
    -------
    object
        hdf object.
    """
    file_path = __get_path(file_path, check_exist=False)
    file_path = file_path.resolve()
    if file_path.suffix.lower() not in {'.hdf', '.h5', '.nxs', '.hdf5'}:
        file_path = file_path.with_suffix('.hdf')
    file_path.parent.mkdir(parents=True, exist_ok=True)
    if not overwrite:
        file_path = Path(make_file_name(file_path))
    try:
        ofile = h5py.File(file_path, 'w')
    except Exception as error:
        raise ValueError(f"Couldn't write to file: {file_path}. Error {error}")
    if len(options) != 0:
        for opt_name in options:
            opts = options[opt_name]
            for key in opts:
                if key_path in key:
                    msg = "!!!Selected key-path, '{0}', can not be a child " \
                          "key-path of '{1}'!!!\n!!!Change to make sure " \
                          "they are at the same level!!!".format(key, key_path)
                    raise ValueError(msg)
                ofile.create_dataset(key, data=opts[key])
    data_out = ofile.create_dataset(key_path, data_shape, dtype=data_type)
    return data_out


[docs]def load_distortion_coefficient(file_path):
    """
    Load distortion coefficients from a text file. The file must use the
    following format:
    x_center : float
    y_center : float
    factor0 : float
    factor1 : float
    ...

    Parameters
    ----------
    file_path : str
        Path to the file

    Returns
    -------
    tuple of float and list
        Tuple of (xcenter, ycenter, list_fact).
    """
    file_path = __get_path(file_path, check_exist=True)
    with open(file_path, 'r') as f:
        x = f.read().splitlines()
        list_data = []
        for i in x:
            list_data.append(float(i.split()[-1]))
    xcenter = list_data[0]
    ycenter = list_data[1]
    list_fact = list_data[2:]
    return xcenter, ycenter, list_fact


[docs]def save_distortion_coefficient(file_path, xcenter, ycenter, list_fact,
                                overwrite=True):
    """
    Write distortion coefficients to a text file.

    Parameters
    ----------
    file_path : str
        Path to the file.
    xcenter : float
        Center of distortion in x-direction.
    ycenter : float
        Center of distortion in y-direction.
    list_fact : float
        1D array. Coefficients of the polynomial fit.
    overwrite : bool
        Overwrite an existing file if True.

    Returns
    -------
    str
        Updated file path.
    """
    file_path = __get_path(file_path, check_exist=False)
    file_path = file_path.resolve()
    if file_path.suffix.lower() not in {'.txt', '.dat'}:
        file_path = file_path.with_suffix('.txt')
    file_path.parent.mkdir(parents=True, exist_ok=True)
    if not overwrite:
        file_path = Path(make_file_name(file_path))
    metadata = OrderedDict()
    metadata['xcenter'] = xcenter
    metadata['ycenter'] = ycenter
    for i, fact in enumerate(list_fact):
        kname = 'factor' + str(i)
        metadata[kname] = fact
    with open(file_path, "w") as f:
        for line in metadata:
            f.write(str(line) + " = " + str(metadata[line]))
            f.write('\n')
    return file_path


def _get_subgroups(hdf_object, key=None):
    """
    Supplementary method for building the tree view of a hdf5 file.
    Return the name of subgroups.
    """
    list_group = []
    if key is None:
        for group in hdf_object.keys():
            list_group.append(group)
        if len(list_group) == 1:
            key = list_group[0]
        else:
            key = ""
    else:
        if key in hdf_object:
            try:
                obj = hdf_object[key]
                if isinstance(obj, h5py.Group):
                    for group in hdf_object[key].keys():
                        list_group.append(group)
            except KeyError:
                pass
    if len(list_group) > 0:
        list_group = sorted(list_group)
    return list_group, key


def _add_branches(tree, hdf_object, key, key1, index, last_index, prefix,
                  connector, level, add_shape):
    """
    Supplementary method for building the tree view of a hdf5 file.
    Add branches to the tree.
    """
    shape = None
    key_comb = key + "/" + key1
    if add_shape is True:
        if key_comb in hdf_object:
            try:
                obj = hdf_object[key_comb]
                if isinstance(obj, h5py.Dataset):
                    shape = str(obj.shape)
            except KeyError:
                shape = str("-> ???External-link???")
    if shape is not None:
        tree.append(f"{prefix}{connector} {key1} {shape}")
    else:
        tree.append(f"{prefix}{connector} {key1}")
    if index != last_index:
        prefix += PIPE_PREFIX
    else:
        prefix += SPACE_PREFIX
    _make_tree_body(tree, hdf_object, prefix=prefix, key=key_comb,
                    level=level, add_shape=add_shape)


def _make_tree_body(tree, hdf_object, prefix="", key=None, level=0,
                    add_shape=True):
    """
    Supplementary method for building the tree view of a hdf5 file.
    Create the tree body.
    """
    entries, key = _get_subgroups(hdf_object, key)
    num_ent = len(entries)
    last_index = num_ent - 1
    level = level + 1
    if num_ent > 0:
        if last_index == 0:
            key = "" if level == 1 else key
            if num_ent > 1:
                connector = PIPE
            else:
                connector = ELBOW if level > 1 else ""
            _add_branches(tree, hdf_object, key, entries[0], 0, 0, prefix,
                          connector, level, add_shape)
        else:
            for index, key1 in enumerate(entries):
                connector = ELBOW if index == last_index else TEE
                if index == 0:
                    tree.append(prefix + PIPE)
                _add_branches(tree, hdf_object, key, key1, index, last_index,
                              prefix, connector, level, add_shape)


[docs]def get_hdf_tree(file_path, output=None, add_shape=True, display=True):
    """
    Get the tree view of a hdf/nxs file.

    Parameters
    ----------
    file_path : str
        Path to the file.
    output : str or None
        Path to the output file in a text-format file (.txt, .md,...).
    add_shape : bool
        Including the shape of a dataset to the tree if True.
    display : bool
        Print the tree onto the screen if True.

    Returns
    -------
    list of string
    """
    hdf_object = h5py.File(__get_path(file_path), 'r')
    tree = deque()
    _make_tree_body(tree, hdf_object, add_shape=add_shape)
    if output is not None:
        make_folder(output)
        output_file = open(output, mode="w", encoding="UTF-8")
        with output_file as stream:
            for entry in tree:
                print(entry, file=stream)
    else:
        if display:
            for entry in tree:
                print(entry)
    return tree


def __get_ref_sam_stacks_dls(proj_idx, list_data_obj, list_sam_idx,
                             list_ref_idx, list_dark_idx, top, bot, left,
                             right, height, width, flat_field, dark_field,
                             fix_zero_div):
    """
    Supplementary method for the method of "get_reference_sample_stacks_dls"
    """
    ref_stack = []
    sam_stack = []
    num_img = len(list_data_obj)
    height1 = bot - top
    width1 = right - left
    if flat_field is not None:
        if flat_field.shape != (height, width):
            raise ValueError("Shape of flat-field image is not "
                             "the same as projection image "
                             "({0}, {1})".format(height, width))
        else:
            flat_ave = flat_field[top: bot, left:right]
    else:
        flat_ave = np.ones((height1, width1), dtype=np.float32)
    if dark_field is not None:
        if dark_field.shape != (height, width):
            raise ValueError("Shape of dark-field image is not "
                             "the same as projection image "
                             "({0}, {1})".format(height, width))
        else:
            dark_ave = dark_field[top: bot, left:right]
    for i in range(num_img):
        if dark_field is None:
            if len(list_dark_idx) != 0:
                idx1 = list_dark_idx[i][0]
                idx2 = list_dark_idx[i][-1] + 1
                dark_ave = np.mean(
                    list_data_obj[i][idx1:idx2, top:bot, left:right], axis=0)
            else:
                dark_ave = np.zeros((height1, width1), dtype=np.float32)
        if flat_field is not None:
            flat_dark = flat_ave - dark_ave
            nmean = np.mean(flat_dark)
            flat_dark[flat_dark == 0.0] = nmean
        if len(list_ref_idx) != 0:
            idx1 = list_ref_idx[i][0]
            idx2 = list_ref_idx[i][-1] + 1
            ref_ave = np.mean(list_data_obj[i][idx1:idx2, top:bot, left:right],
                              axis=0)
            if flat_field is not None:
                ref_ave = (ref_ave - dark_ave) / flat_dark
            else:
                ref_ave = ref_ave - dark_ave
            ref_stack.append(np.float32(ref_ave))
        idx = list_sam_idx[i][proj_idx]
        proj = list_data_obj[i][idx, top:bot, left:right]
        if flat_field is not None:
            proj = (proj - dark_ave) / flat_dark
        else:
            proj = (proj - dark_ave)
        sam_stack.append(np.float32(proj))
    sam_stack = np.asarray(sam_stack, dtype=np.float32)
    if fix_zero_div:
        nmean = np.mean(sam_stack)
        sam_stack[sam_stack == 0.0] = nmean
    if ref_stack:
        ref_stack = np.asarray(ref_stack, dtype=np.float32)
        if fix_zero_div:
            nmean = np.mean(ref_stack)
            ref_stack[ref_stack == 0.0] = nmean
    return ref_stack, sam_stack


[docs]def get_reference_sample_stacks_dls(proj_idx, list_path, data_key=None,
                                    image_key=None, crop=(0, 0, 0, 0),
                                    flat_field=None, dark_field=None,
                                    num_use=None, fix_zero_div=True):
    """
    A method for multi-position speckle-based phase-contrast tomography to get
    two stacks of reference images (speckle images) and sample images (at the
    same rotation angle from each tomographic dataset).

    The method is specific to tomographic datasets acquired at Diamond Light
    Source (DLS) where projection-images, flat-field images, and dark-field
    images are in the same 3d array. There is a dataset named "image_key"
    inside a hdf/nxs file used to distinguish image types.

    Parameters
    ----------
    proj_idx : int
        Index of a projection-image in a tomographic dataset.
    list_path : list of str
        List of file paths (hdf/nxs format) to tomographic datasets.
    data_key : str, optional
        Key to images. Automatically find the key if None.
    image_key : str, list, tuple, ndarray, optional
        Key to 1d-array dataset for specifying image types. Automatically
        find the key if None. Can be used to pass the 1d-array manually.
    crop : tuple of int, optional
        Crop the images from the edges, i.e.
        crop = (crop_top, crop_bottom, crop_left, crop_right).
    flat_field : ndarray, optional
        2D array or None. Used for flat-field correction if not None.
    dark_field : ndarray, optional
        2D array or None. Used for dark-field correction if not None.
    num_use : int, optional
        Number of datasets used for stacking.
    fix_zero_div : bool, optional
        Correct zeros to avoid zero-division problem down the processing line.

    Returns
    -------
    ref_stack : ndarray
        Return if reference-images found. 3D array.
    sam_stack : ndarray
        3D array. A stack of sample-images.
    """
    if not isinstance(list_path, list):
        raise ValueError("Input must be a list of strings!!!")
    num_file = len(list_path)
    if num_use is None:
        num_use = num_file
    else:
        num_use = np.clip(num_use, 1, num_file)
    if data_key is None:
        data_key = find_hdf_key(list_path[0], "data/data")[0]
        if len(data_key) != 0:
            data_key = data_key[0]
        else:
            raise ValueError("Please provide the key to dataset!!!")
    if image_key is None:
        image_key = find_hdf_key(list_path[0], "image_key")[0]
        if len(image_key) != 0:
            image_key = image_key[0]
        else:
            image_key = None
            warnings.warn("No image-key found!!!. Output will be a single "
                          "stack")
    (height, width) = load_hdf(list_path[0], data_key).shape[-2:]
    cr_top, cr_bot, cr_left, cr_right = crop
    top = cr_top
    bot = height - cr_bot
    left = cr_left
    right = width - cr_right
    height1 = bot - top
    width1 = right - left
    if height1 < 1 or width1 < 1:
        raise ValueError("Can't crop data with the given input!!!")
    list_data_obj = []
    list_sam_idx = []
    list_ref_idx = []
    list_dark_idx = []
    list_num_proj = []
    list_start_idx = []
    list_stop_idx = []
    for path in list_path[:num_use]:
        data_obj = load_hdf(path, data_key)
        num_img = len(data_obj)
        list_data_obj.append(data_obj)
        if image_key is not None:
            if isinstance(image_key, str):
                int_keys = load_hdf(path, image_key)[:]
            else:
                if not (isinstance(image_key, list) or
                        isinstance(image_key, tuple) or
                        isinstance(image_key, np.ndarray)):
                    raise ValueError("Input must be a string, list, tuple, or "
                                     "1D numpy array!!!")
                else:
                    int_keys = np.asarray(image_key, dtype=np.float32)
                    if len(int_keys) != num_img:
                        raise ValueError("Number of image-keys is not the same"
                                         " as the number of images {0}!!!"
                                         "".format(num_img))
            list_tmp = np.where(int_keys == 0.0)[0]
            if len(list_tmp) != 0:
                list_idx = np.sort(np.int32(np.squeeze(np.asarray(list_tmp))))
                list_sam_idx.append(list_idx)
                list_start_idx.append(list_idx[0])
                list_stop_idx.append(list_idx[-1])
            list_num_proj.append(len(list_tmp))
            list_tmp = np.where(int_keys == 1.0)[0]
            if len(list_tmp) != 0:
                list_ref_idx.append(
                    np.sort(np.int32(np.squeeze(np.asarray(list_tmp)))))
            list_tmp = np.where(int_keys == 2.0)[0]
            if len(list_tmp) != 0:
                list_dark_idx.append(
                    np.sort(np.int32(np.squeeze(np.asarray(list_tmp)))))
        else:
            num_proj = num_img
            list_sam_idx.append(np.arange(num_proj))
            list_start_idx.append(0)
            list_stop_idx.append(num_proj - 1)
            list_num_proj.append(num_proj)
    num_proj = np.min(np.asarray(list_num_proj))
    start_idx = np.max(np.asarray(list_start_idx))
    stop_idx = np.min(np.asarray(list_stop_idx))
    if (stop_idx - start_idx + 1) > num_proj:
        stop_idx = start_idx + num_proj - 1
    idx_off = proj_idx + start_idx
    if idx_off > stop_idx or idx_off < start_idx:
        raise ValueError("Requested projection-index is out of the range"
                         " [{0}, {1}] given the offset of "
                         "{2}".format(start_idx, stop_idx, start_idx))
    else:
        f_alias = __get_ref_sam_stacks_dls
        ref_stack, sam_stack = f_alias(proj_idx, list_data_obj, list_sam_idx,
                                       list_ref_idx, list_dark_idx, top, bot,
                                       left, right, height, width, flat_field,
                                       dark_field, fix_zero_div)
        if len(ref_stack) != 0:
            return ref_stack, sam_stack
        else:
            return sam_stack


def __check_dark_flat_field(flat_field, dark_field, height, width):
    """
    Supplementary method for checking dark-field image, flat-field image.
    """
    if flat_field is not None:
        if len(flat_field) == 3:
            flat_field = np.mean(flat_field, axis=0)
        (height2, width2) = flat_field.shape
        if height2 != height or width2 != width:
            raise ValueError("Shape of flat-field image is not "
                             "the same as projection image")
    else:
        flat_field = np.ones((height, width))
    if dark_field is not None:
        if len(dark_field) == 3:
            dark_field = np.mean(dark_field, axis=0)
        (height2, width2) = dark_field.shape
        if height2 != height or width2 != width:
            raise ValueError("Shape of dark-field image is not "
                             "the same as projection image")
    else:
        dark_field = np.zeros((height, width))
    return flat_field, dark_field


[docs]def get_reference_sample_stacks(proj_idx, ref_path, sam_path, ref_key, sam_key,
                                crop=(0, 0, 0, 0), flat_field=None,
                                dark_field=None, num_use=None,
                                fix_zero_div=True):
    """
    Get two stacks of reference images (speckle images) and sample images (at
    the same rotation angle from each tomographic dataset). A method for
    multi-position speckle-based phase-contrast tomography.

    Parameters
    ----------
    proj_idx : int
        Index of a projection-image in a tomographic dataset.
    ref_path : list of str
        List of file paths (hdf/nxs format) to reference-image datasets.
    sam_path : list of str
        List of file paths (hdf/nxs format) to tomographic datasets.
    ref_key : str
        Key to a reference-image dataset.
    sam_key : str
        Key to a projection-image dataset.
    crop : tuple of int, optional
        Crop the images from the edges, i.e.
        crop = (crop_top, crop_bottom, crop_left, crop_right).
    flat_field : ndarray, optional
        2D array or None. Used for flat-field correction if not None.
    dark_field : ndarray, optional
        2D array or None. Used for dark-field correction if not None.
    num_use : int, optional
        Number of datasets used for stacking.
    fix_zero_div : bool, optional
        Correct zeros to avoid zero-division problem down the processing line.

    Returns
    -------
    ref_stack : ndarray
        3D array. A stack of reference-images.
    sam_stack : ndarray
        3D array. A stack of sample-images.
    """
    if not isinstance(ref_path, list):
        raise ValueError("Input-path must be a list of strings!!!")
    if len(ref_path) != len(sam_path):
        raise ValueError("Number of inputs must be the same!!!")
    num_file = len(ref_path)
    if num_use is None:
        num_use = num_file
    else:
        num_use = np.clip(num_use, 1, num_file)
    (height, width) = load_hdf(ref_path[0], ref_key).shape[-2:]
    cr_top, cr_bot, cr_left, cr_right = crop
    top = cr_top
    bot = height - cr_bot
    left = cr_left
    right = width - cr_right
    height1 = bot - top
    width1 = right - left
    if height1 < 1 or width1 < 1:
        raise ValueError("Can't crop data with the given input!!!")
    fix_zeros = False if flat_field is None else True
    flat_field, dark_field = __check_dark_flat_field(flat_field, dark_field,
                                                     height, width)
    flat_field = flat_field[top:bot, left:right]
    dark_field = dark_field[top:bot, left:right]
    flat_dark = flat_field - dark_field
    if fix_zeros:
        nmean = np.mean(flat_dark)
        flat_dark[flat_dark == 0.0] = nmean
    ref_objs = []
    sam_objs = []
    for i in range(num_use):
        ref_objs.append(load_hdf(ref_path[i], ref_key))
        sam_objs.append(load_hdf(sam_path[i], sam_key))
    ref_stack = []
    sam_stack = []
    for i in range(num_use):
        if len(ref_objs[i].shape) == 3:
            ref_ave = np.mean(ref_objs[i][:, top:bot, left:right], axis=0)
        else:
            ref_ave = ref_objs[i][top:bot, left:right]
        proj = sam_objs[i][proj_idx, top:bot, left:right]
        if fix_zeros:
            ref_ave = (ref_ave - dark_field) / flat_dark
            proj = (proj - dark_field) / flat_dark
        else:
            ref_ave = ref_ave - dark_field
            proj = (proj - dark_field)
        ref_stack.append(np.float32(ref_ave))
        sam_stack.append(np.float32(proj))
    ref_stack = np.asarray(ref_stack, dtype=np.float32)
    sam_stack = np.asarray(sam_stack, dtype=np.float32)
    if fix_zero_div:
        nmean = np.mean(ref_stack)
        ref_stack[ref_stack == 0.0] = nmean
        nmean = np.mean(sam_stack)
        sam_stack[sam_stack == 0.0] = nmean
    return ref_stack, sam_stack


[docs]def get_tif_stack(file_base, idx=None, crop=(0, 0, 0, 0), flat_field=None,
                  dark_field=None, num_use=None, fix_zero_div=True):
    """
    Load tif images to a stack. Supplementary method for 'get_image_stack'.

    Parameters
    ----------
    file_base : str
        Folder path to tif images.
    idx : int or None
        Load single or multiple images.
    crop : tuple of int, optional
        Crop the images from the edges, i.e.
        crop = (crop_top, crop_bottom, crop_left, crop_right).
    flat_field : ndarray, optional
        2D array or None. Used for flat-field correction if not None.
    dark_field : ndarray, optional
        2D array or None. Used for dark-field correction if not None.
    num_use : int, optional
        Number of images used for stacking.
    fix_zero_div : bool, optional
        Correct zeros to avoid zero-division problem down the processing line.

    Returns
    -------
    img_stack : ndarray
        3D array. A stack of images.
    """
    list_file = find_file(file_base + "/*tif*")
    num_file = len(list_file)
    if num_file != 0:
        (height, width) = np.shape(load_image(list_file[0]))
    else:
        raise ValueError("No tif-images in: {}".format(file_base))
    if idx is not None:
        if idx < 0:
            idx = num_file + idx
        if idx > (num_file - 1):
            raise ValueError("Requested index: {0} is out of "
                             "the range: {1}".format(idx, num_file - 1))
    if num_use is None:
        num_use = num_file
    else:
        num_use = np.clip(num_use, 1, num_file)
    cr_top, cr_bot, cr_left, cr_right = crop
    top = cr_top
    bot = height - cr_bot
    left = cr_left
    right = width - cr_right
    height1 = bot - top
    width1 = right - left
    if height1 < 1 or width1 < 1:
        raise ValueError("Can't crop data with the given input!!!")
    fix_zeros = False if flat_field is None else True
    flat_field, dark_field = __check_dark_flat_field(flat_field, dark_field,
                                                     height, width)
    flat_field = flat_field[top:bot, left:right]
    dark_field = dark_field[top:bot, left:right]
    flat_dark = flat_field - dark_field
    if fix_zeros:
        nmean = np.mean(flat_dark)
        flat_dark[flat_dark == 0.0] = nmean
    if idx is not None:
        img_stack = load_image(list_file[idx])[top:bot, left:right]
        if fix_zeros:
            img_stack = (img_stack - dark_field) / flat_dark
        else:
            img_stack = img_stack - dark_field
        img_stack = [img_stack]
    else:
        img_stack = []
        for file in list_file[:num_use]:
            img = load_image(file)[top:bot, left:right]
            if fix_zeros:
                img = (img - dark_field) / flat_dark
            else:
                img = img - dark_field
            img_stack.append(img)
    img_stack = np.asarray(img_stack)
    if fix_zero_div:
        nmean = np.mean(img_stack)
        img_stack[img_stack == 0.0] = nmean
    return img_stack


[docs]def get_image_stack(idx, paths, data_key=None, average=False,
                    crop=(0, 0, 0, 0), flat_field=None, dark_field=None,
                    num_use=None, fix_zero_div=True):
    """
    To get multiple images with the same index from multiple datasets
    (tif format or hdf format). For tif images, if "paths" is a string
    (not a list) use idx=None to load all images. For getting a stack of images
    from a single hdf file, use the "load_hdf" method instead.

    Parameters
    ----------
    idx : int or None
        Index of an image in a dataset. Use None to load all images if only
        one dataset provided.
    paths : list of str or str
        List of hdf/nxs file-paths, list of folders of tif-images, or a folder
        of tif-images.
    data_key : str
        Requested if input is a hdf/nxs files.
    average : bool, optional
        Average images in a dataset if True.
    crop : tuple of int, optional
        Crop the images from the edges, i.e.
        crop = (crop_top, crop_bottom, crop_left, crop_right).
    flat_field : ndarray, optional
        2D array or None. Used for flat-field correction if not None.
    dark_field : ndarray, optional
        2D array or None. Used for dark-field correction if not None.
    num_use : int, optional
        Number of datasets used for stacking.
    fix_zero_div : bool, optional
        Correct zeros to avoid zero-division problem down the processing line.

    Returns
    -------
    img_stack : ndarray
        3D array. A stack of images.
    """
    if isinstance(paths, str):
        if os.path.isdir(paths):
            img_stack = get_tif_stack(paths, idx=idx, crop=crop,
                                      flat_field=flat_field,
                                      dark_field=dark_field, num_use=num_use,
                                      fix_zero_div=fix_zero_div)
        else:
            raise ValueError("The folder: {} does not exist.".format(paths))
    elif isinstance(paths, list):
        num_file = len(paths)
        if num_use is None:
            num_use = num_file
        else:
            num_use = np.clip(num_use, 1, num_file)
        tif_format = False
        if os.path.isdir(paths[0]):
            tif_format = True
        else:
            if data_key is None:
                raise ValueError(
                    "Please provide the key to a dataset in the hdf/nxs file")
        if tif_format:
            list_file = find_file(paths[0] + "/*tif*")
            if len(list_file) != 0:
                (height, width) = np.shape(load_image(list_file[0]))
            else:
                raise ValueError("No tif-images in: {}".format(paths[0]))
        else:
            (height, width) = load_hdf(paths[0], data_key).shape[-2:]
        cr_top, cr_bot, cr_left, cr_right = crop
        top = cr_top
        bot = height - cr_bot
        left = cr_left
        right = width - cr_right
        height1 = bot - top
        width1 = right - left
        if height1 < 1 or width1 < 1:
            raise ValueError("Can't crop data with the given input!!!")
        fix_zeros = False if flat_field is None else True
        flat_field, dark_field = __check_dark_flat_field(flat_field,
                                                         dark_field,
                                                         height, width)
        flat_field = flat_field[top:bot, left:right]
        dark_field = dark_field[top:bot, left:right]
        flat_dark = flat_field - dark_field
        if fix_zeros:
            nmean = np.mean(flat_dark)
            flat_dark[flat_dark == 0.0] = nmean
        img_stack = []
        if not tif_format:
            for i in range(num_use):
                data_obj = load_hdf(paths[i], data_key)
                if len(data_obj.shape) == 3:
                    if average:
                        img = np.mean(data_obj[:, top:bot, left:right], axis=0)
                    else:
                        img = data_obj[idx, top:bot, left:right]
                else:
                    img = data_obj[top:bot, left:right]
                if fix_zeros:
                    img = (img - dark_field) / flat_dark
                else:
                    img = img - dark_field
                img_stack.append(img)
        else:
            for i in range(num_use):
                list_file = find_file(paths[i] + "/*tif*")
                if average:
                    img = np.mean(
                        np.asarray([load_image(file)[top:bot, left:right]
                                    for file in list_file]), axis=0)
                else:
                    img = load_image(list_file[idx])[top:bot, left:right]
                if fix_zeros:
                    img = (img - dark_field) / flat_dark
                else:
                    img = img - dark_field
                img_stack.append(img)
        img_stack = np.asarray(img_stack)
        if fix_zero_div:
            nmean = np.mean(img_stack)
            img_stack[img_stack == 0.0] = nmean
    else:
        raise ValueError("Input must be a list of strings or a folder path!!!")
    return img_stack


[docs]def load_image_multiple(list_path, ncore=None, prefer="threads"):
    """
    Load list of images in parallel.

    Parameters
    ----------
    list_path : str
        List of file paths.
    ncore : int or None
        Number of cpu-cores. Automatically selected if None.
    prefer : {"threads", "processes"}
        Prefer backend for parallel processing.

    Returns
    -------
    array_like
        3D array.
    """
    if isinstance(list_path, list):
        if ncore is None:
            ncore = mp.cpu_count() - 1
        num_file = len(list_path)
        ncore = np.clip(ncore, 1, num_file)
        if ncore > 1:
            imgs = Parallel(n_jobs=ncore, prefer=prefer)(
                delayed(load_image)(list_path[i]) for i in range(num_file))
        else:
            imgs = [load_image(list_path[i]) for i in range(num_file)]
    else:
        raise ValueError("Input must be a list of file paths!!!")
    return np.asarray(imgs)


[docs]def save_image_multiple(list_path, image_stack, axis=0, overwrite=True,
                        ncore=None, prefer="threads", start_idx=0):
    """
    Save an 3D-array to a list of tif images in parallel.

    Parameters
    ----------
    list_path : str
        List of output paths or a folder path
    image_stack : array_like
        3D array.
    axis : int
        Axis to slice data.
    overwrite : bool
        Overwrite an existing file if True.
    ncore : int or None
        Number of cpu-cores. Automatically selected if None.
    prefer : {"threads", "processes"}
        Prefer backend for parallel processing.
    start_idx : int
        Starting index of the output files if input is a folder.
    """
    if isinstance(list_path, list):
        num_path = len(list_path)
        num_file = image_stack.shape[axis]
        if num_path != num_file:
            raise ValueError("Number of file paths: {0} is different to the "
                             "number of images: {1} given the axis of {2}!!!"
                             "".format(num_path, num_file, axis))
    elif isinstance(list_path, str):
        num_file = image_stack.shape[axis]
        start_idx = int(start_idx)
        list_path = [(list_path + "/img_" + ("00000" + str(i))[-5:] + ".tif")
                     for i in range(start_idx, start_idx + num_file)]
    else:
        raise ValueError("Input must be a list of file paths or a folder path")
    if ncore is None:
        ncore = mp.cpu_count() - 1
    ncore = np.clip(ncore, 1, num_file)
    if axis == 2:
        if ncore > 1:
            Parallel(n_jobs=ncore, prefer=prefer)(
                delayed(save_image)(list_path[i], image_stack[:, :, i],
                                    overwrite) for i in range(num_file))
        else:
            for i in range(num_file):
                save_image(list_path[i], image_stack[:, :, i], overwrite)
    elif axis == 1:
        if ncore > 1:
            Parallel(n_jobs=ncore, prefer=prefer)(
                delayed(save_image)(list_path[i], image_stack[:, i, :],
                                    overwrite) for i in range(num_file))
        else:
            for i in range(num_file):
                save_image(list_path[i], image_stack[:, i, :], overwrite)
    else:
        if ncore > 1:
            Parallel(n_jobs=ncore, prefer=prefer)(
                delayed(save_image)(list_path[i], image_stack[i],
                                    overwrite) for i in range(num_file))
        else:
            for i in range(num_file):
                save_image(list_path[i], image_stack[i], overwrite)