Source code for algotom.post.postprocessing

# ============================================================================
# ============================================================================
# Copyright (c) 2021 Nghia T. Vo. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
# Author: Nghia T. Vo
# E-mail:  
# Description: Python module for postprocessing techniques.
# Contributors:
# ============================================================================

"""
Module of methods in the postprocessing stage:

    -   Get statistical information of reconstructed images or a dataset.
    -   Downsample 2D, 3D array, or a dataset.
    -   Rescale 2D, 3D array or a dataset to 8-bit or 16-bit data-type.
    -   Reslice 3D array or a dataset (hdf/nxs file or tif images).
    -   Removing ring artifacts in a reconstructed image by transform back and
        forth between the polar coordinates and the Cartesian coordinates.
"""

import os
import sys
import shutil
import timeit
import glob
import h5py
import numpy as np
from PIL import Image
import scipy.ndimage as ndi
import algotom.util.utility as util
import algotom.io.loadersaver as losa
import algotom.prep.removal as remo


def __get_input_type(input_):
    """
    Supplementary method: to get input type
    """
    in_type = None
    if isinstance(input_, np.ndarray):
        in_type = "numpy_array"
    if isinstance(input_, str):
        file_ext = os.path.splitext(input_)[-1]
        if file_ext == "":
            list_file = losa.find_file(input_ + "/*.tif*")
            if list_file:
                in_type = "tif"
            else:
                raise ValueError(f"No tif files in the folder: {input_}")
        else:
            file_ext = file_ext.lower()
            if file_ext in (".hdf", ".h5", ".nxs", ".hdf5"):
                in_type = "hdf"
    return in_type


def __get_output_type(output):
    """
    Supplementary method: to get output type
    """
    out_type = None
    if isinstance(output, str):
        file_ext = os.path.splitext(output)[-1]
        if file_ext == "":
            out_type = "tif"
        else:
            if (file_ext == '.hdf' or file_ext == '.h5'
                    or file_ext == ".nxs"):
                out_type = "hdf"
            else:
                raise ValueError("File format must be hdf/h5/nxs !!!")
    return out_type


def __check_output(output):
    """
    Supplementary method: to check if output folder/file exists
    """
    msg = "File/folder exists!!! Choose another path or set `overwrite=True`"
    if isinstance(output, str):
        file_base, file_ext = os.path.splitext(output)
        if file_ext == "":
            if os.path.exists(file_base):
                raise ValueError(msg)
        else:
            if os.path.isfile(output):
                raise ValueError(msg)


def __get_shape(input_, key_path=None):
    """
    Supplementary method: to get the shape of a 3D data which can be given as
    a folder of tif files, a hdf file-path, or a 3D array.
    """
    in_type = __get_input_type(input_)
    if in_type == "numpy_array":
        (depth, height, width) = input_.shape
    elif in_type == "tif":
        list_file = losa.find_file(input_ + "/*.tif*")
        depth = len(list_file)
        (height, width) = np.shape(losa.load_image(list_file[0]))
    elif in_type == "hdf":
        if key_path is None:
            raise ValueError("Please provide the key path to the dataset!!!")
        else:
            hdf_object = h5py.File(input_, 'r')
            check = key_path in hdf_object
            if not check:
                raise ValueError("!!! Wrong key !!!")
            data = hdf_object[key_path]
            (depth, height, width) = data.shape
            hdf_object.close()
    else:
        raise ValueError("Input must be a folder-path to tif files, a hdf "
                         "file-path, or a numpy array!!!")
    return depth, height, width


def __get_cropped_shape(input_, crop, key_path=None):
    """
    Supplementary method: to get the cropped information of a 3D data which
    can be given as a folder of tif files, a hdf file-path, or a 3D array.
    """
    if len(crop) != 6:
        raise ValueError("Crop must be a tuple/list with the length of 6")
    (cr_d1, cr_d2, cr_h1, cr_h2, cr_w1, cr_w2) = crop
    (depth, height, width) = __get_shape(input_, key_path=key_path)
    d1, d2 = cr_d1, depth - cr_d2
    depth1 = d2 - d1
    h1, h2 = cr_h1, height - cr_h2
    w1, w2 = cr_w1, width - cr_w2
    height1, width1 = h2 - h1, w2 - w1
    if (depth1 <= 0) or (height1 <= 0) or (width1 <= 0):
        raise ValueError("Check crop parameters!!! Can't crop the data having"
                         " shape: {}".format((depth, height, width)))
    return (d1, d2, h1, h2, w1, w2), (depth1, height1, width1)


def __get_dataset_size(input_):
    """
    To get the size of 3D array, folder of tif files, or a hdf file in MB.
    """
    in_type = __get_input_type(input_)
    b_unit = 1024.0 * 1024.0
    if in_type == "numpy_array":
        size_in_MB = input_.nbytes / b_unit
    elif in_type == "hdf":
        size_in_MB = os.path.getsize(input_) / b_unit
    else:
        list_file = losa.find_file(input_ + "/*.tif*")
        if list_file:
            size_1_file = np.asarray(Image.open(list_file[0])).nbytes / b_unit
        else:
            size_1_file = 0.0
        size_in_MB = len(list_file) * size_1_file
    return size_in_MB


[docs]def get_statistical_information(mat, percentile=(0, 100), denoise=False):
    """
    Get statistical information of an image.

    Parameters
    ----------
    mat : array_like
        2D array. Projection image, sinogram image, or reconstructed image.
    percentile : tuple of floats
        Tuple of (min_percentile, max_percentile) to compute.
        Must be between 0 and 100 inclusive.
    denoise: bool, optional
        Enable/disable denoising before extracting statistical information.

    Returns
    -------
    gmin : float
        The minimum value of the data array.
    gmax : float
        The maximum value of the data array.
    min_percent : float
        The first computed percentile of the data array.
    max_percent : tuple of floats
        The last computed percentile of the data array.
    mean : float
        The mean of the data array.
    median : float
        The median of the data array.
    variance : float
        The variance of the data array.
    """
    if denoise is True:
        mat = ndi.gaussian_filter(mat, 2)
    gmin = np.min(mat)
    gmax = np.max(mat)
    min_percent = np.percentile(mat, percentile[0])
    max_percent = np.percentile(mat, percentile[-1])
    median = np.median(mat)
    mean = np.mean(mat)
    variance = np.var(mat)
    return gmin, gmax, min_percent, max_percent, mean, median, variance


[docs]def get_statistical_information_dataset(input_, percentile=(0, 100), skip=5,
                                        denoise=False, key_path=None,
                                        crop=(0, 0, 0, 0, 0, 0)):
    """
    Get statical information of a dataset. This can be a folder of tif files,
    a hdf file, or a 3D array.

    Parameters
    ----------
    input_ : str, hdf file, or array_like
        It can be a folder path to tif files, a hdf file, or a 3D array.
    percentile : tuple of floats
        Tuple of (min_percentile, max_percentile) to compute.
        Must be between 0 and 100 inclusive.
    skip : int
        Skipping step of reading input.
    denoise: bool, optional
        Enable/disable denoising before extracting statistical information.
    key_path : str, optional
        Key path to the dataset if input is a hdf file.
    crop : tuple of int, optional
        Crop 3D data from the edges, i.e.
        crop = (crop_depth1, crop_depth2, crop_height1, crop_height2,
        crop_width1, crop_width2).

    Returns
    -------
    gmin : float
        The global minimum value of the data array.
    gmax : float
        The global maximum value of the data array.
    min_percent : float
        The global min of the first computed percentile of the data array.
    max_percent : tuple of floats
        The global min of the last computed percentile of the data array.
    mean : float
        The mean of the data array.
    median : float
        The median of the data array.
    variance : float
        The mean of the variance of the data array.
    """
    results = __get_cropped_shape(input_, crop=crop, key_path=key_path)
    (d1, d2, h1, h2, w1, w2) = results[0]
    depth1 = results[1][0]
    skip = int(np.clip(skip, 1, depth1 - 1))
    in_type = __get_input_type(input_)
    f_alias = get_statistical_information
    if in_type == "tif":
        list_file = losa.find_file(input_ + "/*.tif*")
        list_stat = []
        for i in range(d1, d2, skip):
            mat = losa.load_image(list_file[i])[h1:h2, w1:w2]
            if denoise is True:
                mat = ndi.gaussian_filter(mat, 2)
            list_stat.append(f_alias(mat, percentile, denoise))
    elif in_type == "hdf":
        data = losa.load_hdf(input_, key_path)
        list_stat = []
        for i in range(d1, d2, skip):
            mat = data[i, h1:h2, w1:w2]
            if denoise is True:
                mat = ndi.gaussian_filter(mat, 2)
            list_stat.append(f_alias(mat, percentile, denoise))
    else:
        list_stat = []
        for i in range(d1, d2, skip):
            mat = input_[i, h1:h2, w1:w2]
            if denoise is True:
                mat = ndi.gaussian_filter(mat, 2)
            list_stat.append(f_alias(mat, percentile, denoise))
    list_stat = np.asarray(list_stat)
    gmin, gmax = np.min(list_stat[:, 0]), np.max(list_stat[:, 1])
    min_percent, max_percent = np.min(list_stat[:, 2]), np.max(list_stat[:, 3])
    median, mean = np.median(list_stat[:, 4]), np.mean(list_stat[:, 5])
    variance = np.mean(list_stat[:, 6])
    return gmin, gmax, min_percent, max_percent, mean, median, variance


[docs]def downsample(mat, cell_size, method="mean"):
    """
    Downsample an image.

    Parameters
    ----------
    mat : array_like
        2D array.
    cell_size : int or tuple of int
        Window size along axes used for grouping pixels.
    method : {"mean", "median", "max", "min"}
        Downsampling method.

    Returns
    -------
    array_like
        Downsampled image.
    """
    if method == "median":
        dsp_method = np.median
    elif method == "max":
        dsp_method = np.max
    elif method == "min":
        dsp_method = np.amin
    else:
        dsp_method = np.mean
    (height, width) = mat.shape
    if isinstance(cell_size, int):
        cell_size = (cell_size, cell_size)
    height_dsp = height // cell_size[0]
    width_dsp = width // cell_size[1]
    mat = mat[:height_dsp * cell_size[0], :width_dsp * cell_size[1]]
    mat_dsp = mat.reshape(
        height_dsp, cell_size[0], width_dsp, cell_size[1])
    mat_dsp = dsp_method(dsp_method(mat_dsp, axis=-1), axis=1)
    return mat_dsp


[docs]def rescale(mat, nbit=16, minmax=None):
    """
    Rescale a 32-bit array to 16-bit/8-bit data.

    Parameters
    ----------
    mat : array_like
    nbit : {8,16}
        Rescaled data-type: 8-bit or 16-bit.
    minmax : tuple of float, or None
        Minimum and maximum values used for rescaling.

    Returns
    -------
    array_like
        Rescaled array.
    """
    if nbit != 8 and nbit != 16:
        raise ValueError("Only two options for nbit: 8 or 16 !!!")
    if minmax is None:
        gmin, gmax = np.min(mat), np.max(mat)
    else:
        (gmin, gmax) = minmax
    mat = np.clip(mat, gmin, gmax)
    mat = (mat - gmin) / (gmax - gmin)
    if nbit == 8:
        mat = np.uint8(np.clip(mat * 255, 0, 255))
    else:
        mat = np.uint16(np.clip(mat * 65535, 0, 65535))
    return mat


[docs]def downsample_dataset(input_, output, cell_size, method="mean", key_path=None,
                       rescaling=False, nbit=16, minmax=None, skip=None,
                       crop=(0, 0, 0, 0, 0, 0), overwrite=False):
    """
    Downsample a dataset. Input can be a folder of tif files, a hdf file,
    or a 3D array.

    Parameters
    ----------
    input_ : str, array_like
        It can be a folder path to tif files, a hdf file, or a 3D array.
    output : str, None
        It can be a folder path, a hdf file path, or None (memory consuming).
    cell_size : int or tuple of int
        Window size along axes used for grouping pixels.
    method : {"mean", "median", "max", "min"}
        Downsampling method.
    key_path : str, optional
        Key path to the dataset if the input is a hdf file.
    rescaling : bool
        Rescale dataset if True.
    nbit : {8,16}
        If rescaling is True, select data-type: 8-bit or 16-bit.
    minmax : tuple of float, or None
        Minimum and maximum values used for rescaling if True.
    skip : int or None
        Skipping step of images used for getting statistical information if
        rescaling is True and input is 32-bit data.
    crop : tuple of int, optional
        Crop 3D data from the edges, i.e.
        crop = (crop_depth1, crop_depth2, crop_height1, crop_height2,
        crop_width1, crop_width2).
    overwrite : bool
        Overwrite an existing file/folder if True.

    Returns
    -------
    array_like or None
        If output is None, returning a 3D array.
    """
    if not overwrite:
        __check_output(output)
    results = __get_cropped_shape(input_, crop=crop, key_path=key_path)
    (d1, d2, h1, h2, w1, w2) = results[0]
    (depth1, height1, width1) = results[1]
    if method == "median":
        dsp_method = np.median
    elif method == "max":
        dsp_method = np.amax
    elif method == "min":
        dsp_method = np.amin
    else:
        dsp_method = np.mean
    if isinstance(cell_size, int):
        cell_size = (cell_size, cell_size, cell_size)
    depth_dsp = depth1 // cell_size[0]
    height_dsp = height1 // cell_size[1]
    width_dsp = width1 // cell_size[2]
    if (depth_dsp == 0) or (height_dsp == 0) or (width_dsp == 0):
        raise ValueError("Incorrect cell size {}".format(cell_size))
    in_type = __get_input_type(input_)
    if in_type == "tif":
        data = losa.find_file(input_ + "/*.tif*")
        data_type = np.asarray(Image.open(data[0])).dtype
    elif in_type == "hdf":
        data = losa.load_hdf(input_, key_path)
        data_type = data.dtype
    else:
        data = input_
        data_type = data.dtype
    res_type = str(data_type)
    if rescaling is True:
        if nbit == 16:
            res_type = "uint16"
        elif nbit == 8:
            res_type = "uint8"
        else:
            raise ValueError("Only two options for nbit: 8 or 16 !!!")
        if str(data_type) != res_type:
            if data_type == np.uint8:
                minmax = (0, 255)
            elif data_type == np.uint16:
                minmax = (0, 65535)
            else:
                if skip is None:
                    skip = int(np.clip(np.ceil(0.15 * depth1), 1, depth1 - 1))
                if minmax is None:
                    f_alias = get_statistical_information_dataset
                    minmax = f_alias(input_, percentile=(0, 100), skip=skip,
                                     crop=crop, key_path=key_path)[0:2]
        else:
            rescaling = False
    out_type = __get_output_type(output)
    if out_type == "hdf":
        data_dsp = losa.open_hdf_stream(
            output, (depth_dsp, height_dsp, width_dsp),
            data_type=res_type, key_path="entry/data", overwrite=True)
    elif out_type is None:
        data_dsp = []
    else:
        data_dsp = None
    num = 0
    for i in range(d1, d2, cell_size[0]):
        if (i + cell_size[0]) > (d1 + depth1):
            break
        else:
            if in_type == "tif":
                mat = np.asarray([losa.load_image(data[j])[h1:h2, w1:w2]
                                  for j in range(i, i + cell_size[0])])
                mat = mat[:, :height_dsp * cell_size[1],
                          :width_dsp * cell_size[2]]
                mat = mat.reshape(1, cell_size[0], height_dsp,
                                  cell_size[1], width_dsp, cell_size[2])
                mat_dsp = dsp_method(
                    dsp_method(dsp_method(mat, axis=-1), axis=1), axis=2)[0]
            else:
                mat = data[i:i + cell_size[0],
                           h1:h1 + height_dsp * cell_size[1],
                           w1:w1 + width_dsp * cell_size[2]]
                mat = mat.reshape(1, cell_size[0], height_dsp,
                                  cell_size[1], width_dsp,
                                  cell_size[2])
                mat_dsp = dsp_method(dsp_method(
                    dsp_method(mat, axis=-1), axis=1), axis=2)[0]
            if rescaling:
                mat_dsp = rescale(mat_dsp, nbit, minmax)
            if out_type is None:
                data_dsp.append(mat_dsp)
            elif out_type == "hdf":
                data_dsp[num] = mat_dsp.astype(res_type)
            else:
                out_name = "0000" + str(num)
                losa.save_image(output + "/img_" + out_name[-5:] + ".tif",
                                mat_dsp.astype(res_type))
            num += 1
    if out_type is None:
        data_dsp = np.asarray(data_dsp).astype(res_type)
    return data_dsp


[docs]def rescale_dataset(input_, output, nbit=16, minmax=None, skip=None,
                    key_path=None, crop=(0, 0, 0, 0, 0, 0), overwrite=False):
    """
    Rescale a dataset to 8-bit or 16-bit data-type. The dataset can be a
    folder of tif files, a hdf file, or a 3D array.

    Parameters
    ----------
    input_ : str, array_like
        It can be a folder path to tif files, a hdf file, or 3D array.
    output : str, None
        It can be a folder path, a hdf file path, or None (memory consuming).
    nbit : {8,16,32}
        Select rescaled data-type: 8-bit/16-bit. 32 is for cropping data only.
    minmax : tuple of float, or None
        Minimum and maximum values used for rescaling. They are calculated if
        None is given.
    skip : int or None
        Skipping step of images used for getting statistical information.
    key_path : str, optional
        Key path to the dataset if the input is a hdf file.
    crop : tuple of int, optional
        Crop 3D data from the edges, i.e.
        crop = (crop_depth1, crop_depth2, crop_height1, crop_height2,
        crop_width1, crop_width2).
    overwrite : bool
        Overwrite an existing file/folder if True.

    Returns
    -------
    array_like or None
        If output is None, returning an 3D array.
    """
    if not overwrite:
        __check_output(output)
    results = __get_cropped_shape(input_, crop=crop, key_path=key_path)
    (d1, d2, h1, h2, w1, w2) = results[0]
    (depth1, height1, width1) = results[1]
    if minmax is None:
        if skip is None:
            skip = int(np.clip(np.ceil(0.15 * depth1), 1, None))
        minmax = get_statistical_information_dataset(input_, skip=skip,
                                                     crop=crop,
                                                     key_path=key_path)[0:2]
    if nbit == 8:
        data_type = "uint8"
    elif nbit == 16:
        data_type = "uint16"
    else:
        data_type = "float32"
    out_type = __get_output_type(output)
    if out_type == "hdf":
        data_res = losa.open_hdf_stream(output, (depth1, height1, width1),
                                        key_path="entry/data",
                                        data_type=data_type,
                                        overwrite=False)
    elif out_type is None:
        data_res = []
    else:
        data_res = None
    in_type = __get_input_type(input_)
    if in_type == "tif":
        data = losa.find_file(input_ + "/*.tif*")
    elif in_type == "hdf":
        data = losa.load_hdf(input_, key_path)
    else:
        data = input_
    for i in range(d1, d2):
        if in_type == "tif":
            mat = losa.load_image(data[i])[h1:h2, w1:w2]
            if nbit != 32:
                mat = rescale(mat, nbit=nbit, minmax=minmax)
        else:
            mat = data[i, h1:h2, w1:w2]
            if nbit != 32:
                mat = rescale(mat, nbit=nbit, minmax=minmax)
        if out_type is None:
            data_res.append(mat)
        elif out_type == "hdf":
            data_res[i - d1] = mat
        else:
            out_name = "0000" + str(i)
            losa.save_image(output + "/img_" + out_name[-5:] + ".tif", mat)
    if out_type is None:
        data_res = np.asarray(data_res)
    return data_res


def __save_intermediate_data(input_, output, axis, crop, key_path=None,
                             rotate=0.0, chunk=16, mode="constant",
                             ncore=None, show_progress=True):
    """
    Supplementary method: save data to an intermediate hdf-file for fast
    reslicing and reducing the RAM requirements.
    """

    in_type = __get_input_type(input_)
    results = __get_cropped_shape(input_, crop=crop, key_path=key_path)
    (d1, d2, h1, h2, w1, w2) = results[0]
    (depth1, height1, width1) = results[1]
    chunk = np.clip(chunk, 1, depth1 - 1)
    last_chunk = depth1 - chunk * (depth1 // chunk)
    folder_tmp = os.path.splitext(output)[0] + "/tmp_/"
    file_tmp = folder_tmp + "/file_tmp.hdf"
    losa.make_folder(folder_tmp)
    out_key = "entry/data"
    b_unit = 1024.0 * 1024.0
    with h5py.File(file_tmp, 'w') as ofile:
        t0 = timeit.default_timer()
        if in_type == "tif":
            data = losa.find_file(input_ + "/*.tif*")
            data_type = np.asarray(Image.open(data[0])).dtype
            if axis == 2:
                hdf_chunk = (chunk, min(100, width1), min(100, height1))
                data_tmp = ofile.create_dataset(out_key,
                                                (depth1, width1, height1),
                                                dtype=data_type,
                                                chunks=hdf_chunk)
                for i in range(0, depth1 - last_chunk, chunk):
                    if show_progress:
                        t1 = timeit.default_timer()
                        f_size = os.path.getsize(file_tmp) / b_unit
                        msg = "Writing to an intermediate hdf-file: {0:.2f} " \
                              "MB. Time: {1:0.2f}s".format(f_size, t1 - t0)
                        len_msg = len(msg)
                        sys.stdout.write(msg)
                        sys.stdout.flush()
                    mat_tmp = []
                    mat_chunk = losa.load_image_multiple(
                        data[i + d1: i + chunk + d1], ncore=ncore,
                        prefer="threads")
                    for j in range(chunk):
                        mat = mat_chunk[j]
                        if rotate != 0.0:
                            mat = ndi.rotate(mat, rotate, mode=mode,
                                             reshape=False, order=1)
                        mat_tmp.append(np.transpose(mat[h1:h2, w1:w2]))
                    data_tmp[i:i + chunk] = np.asarray(mat_tmp)
                    if show_progress:
                        sys.stdout.write("\r" + " " * len_msg + "\r")
                if last_chunk != 0:
                    mat_tmp = []
                    mat_chunk = losa.load_image_multiple(
                        data[depth1 - last_chunk + d1: depth1 + d1],
                        ncore=ncore, prefer="threads")
                    for j in range(last_chunk):
                        mat = mat_chunk[j]
                        if rotate != 0.0:
                            mat = ndi.rotate(mat, rotate, mode=mode,
                                             reshape=False, order=1)
                        mat_tmp.append(np.transpose(mat[h1:h2, w1:w2]))
                    data_tmp[depth1 - last_chunk: depth1] = np.asarray(mat_tmp)
            else:
                hdf_chunk = (chunk, min(100, height1), min(100, width1))
                data_tmp = ofile.create_dataset(out_key,
                                                (depth1, height1, width1),
                                                dtype=data_type,
                                                chunks=hdf_chunk)
                for i in np.arange(0, depth1 - last_chunk, chunk):
                    if show_progress:
                        t1 = timeit.default_timer()
                        f_size = os.path.getsize(file_tmp) / b_unit
                        msg = "Writing to an intermediate hdf-file: {0:0.2f}" \
                              "MB. Time: {1:0.2f}s".format(f_size, t1 - t0)
                        len_msg = len(msg)
                        sys.stdout.write(msg)
                        sys.stdout.flush()
                    mat_tmp = []
                    mat_chunk = losa.load_image_multiple(
                        data[i + d1: i + chunk + d1], ncore=ncore,
                        prefer="threads")
                    for j in range(chunk):
                        mat = mat_chunk[j]
                        if rotate != 0.0:
                            mat = ndi.rotate(mat, rotate, mode=mode,
                                             reshape=False, order=1)
                        mat_tmp.append(mat[h1:h2, w1:w2])
                    data_tmp[i:i + chunk] = np.asarray(mat_tmp)
                    if show_progress:
                        sys.stdout.write("\r" + " " * len_msg + "\r")
                if last_chunk != 0:
                    mat_tmp = []
                    mat_chunk = losa.load_image_multiple(
                        data[depth1 - last_chunk + d1: depth1 + d1],
                        ncore=ncore, prefer="threads")
                    for j in range(last_chunk):
                        mat = mat_chunk[j]
                        if rotate != 0.0:
                            mat = ndi.rotate(mat, rotate, mode=mode,
                                             reshape=False, order=1)
                        mat_tmp.append(mat[h1:h2, w1:w2])
                    data_tmp[depth1 - last_chunk: depth1] = np.asarray(mat_tmp)
        else:
            data = losa.load_hdf(input_, key_path)
            data_type = data.dtype
            if axis == 2:
                hdf_chunk = (chunk, min(100, width1), min(100, height1))
                data_tmp = ofile.create_dataset(out_key,
                                                (depth1, width1, height1),
                                                dtype=data_type,
                                                chunks=hdf_chunk)
                for i in np.arange(0, depth1 - last_chunk, chunk):
                    if show_progress:
                        t1 = timeit.default_timer()
                        f_size = os.path.getsize(file_tmp) / b_unit
                        msg = "Writing to an intermediate hdf-file: {0:0.2f}" \
                              "MB. Time: {1:0.2f}s".format(f_size, t1 - t0)
                        len_msg = len(msg)
                        sys.stdout.write(msg)
                        sys.stdout.flush()
                    mat_chunk = data[i: i + chunk]
                    mat_tmp = []
                    for j in np.arange(chunk):
                        mat = mat_chunk[j]
                        if rotate != 0.0:
                            mat = ndi.rotate(mat, rotate, mode=mode,
                                             reshape=False, order=1)
                        mat_tmp.append(np.transpose(mat[h1:h2, w1:w2]))
                    data_tmp[i:i + chunk] = np.asarray(mat_tmp)
                    if show_progress:
                        sys.stdout.write("\r" + " " * len_msg + "\r")
                if last_chunk != 0:
                    mat_chunk = data[depth1 - last_chunk: depth1]
                    mat_tmp = []
                    for j in np.arange(last_chunk):
                        mat = mat_chunk[j]
                        if rotate != 0.0:
                            mat = ndi.rotate(mat, rotate, mode=mode,
                                             reshape=False, order=1)
                        mat_tmp.append(np.transpose(mat[h1:h2, w1:w2]))
                    data_tmp[depth1 - last_chunk: depth1] = np.asarray(mat_tmp)
            else:
                hdf_chunk = (chunk, min(100, height1), min(100, width1))
                data_tmp = ofile.create_dataset(out_key,
                                                (depth1, height1, width1),
                                                dtype=data_type,
                                                chunks=hdf_chunk)
                for i in np.arange(0, depth1 - last_chunk, chunk):
                    if show_progress:
                        t1 = timeit.default_timer()
                        f_size = os.path.getsize(file_tmp) / b_unit
                        msg = "Writing to an intermediate hdf-file: {0:0.2f}" \
                              "MB. Time: {1:0.2f}s".format(f_size, t1 - t0)
                        len_msg = len(msg)
                        sys.stdout.write(msg)
                        sys.stdout.flush()
                    mat_chunk = data[i: i + chunk]
                    mat_tmp = []
                    for j in np.arange(chunk):
                        mat = mat_chunk[j]
                        if rotate != 0.0:
                            mat = ndi.rotate(mat, rotate, mode=mode,
                                             reshape=False, order=1)
                        mat_tmp.append(mat[h1:h2, w1:w2])
                    data_tmp[i:i + chunk] = np.asarray(mat_tmp)
                    if show_progress:
                        sys.stdout.write("\r" + " " * len_msg + "\r")
                if last_chunk != 0:
                    mat_chunk = data[depth1 - last_chunk: depth1]
                    mat_tmp = []
                    for j in np.arange(last_chunk):
                        mat = mat_chunk[j]
                        if rotate != 0.0:
                            mat = ndi.rotate(mat, rotate, mode=mode,
                                             reshape=False, order=1)
                        mat_tmp.append(mat[h1:h2, w1:w2])
                    data_tmp[depth1 - last_chunk: depth1] = np.asarray(mat_tmp)
    if show_progress:
        t1 = timeit.default_timer()
        f_size = os.path.getsize(file_tmp) / b_unit
        print("Finish saving intermediate file! File size: {0:0.2f}MB. Time: "
              "{1:0.2f}s. The file will be deleted at the end!"
              "".format(f_size, t1 - t0))
    return file_tmp, out_key, folder_tmp


[docs]def reslice_dataset(input_, output, axis=1, key_path=None, rescaling=False,
                    nbit=16, minmax=None, skip=None, rotate=0.0, chunk=16,
                    mode="constant", crop=(0, 0, 0, 0, 0, 0), ncore=None,
                    show_progress=True, overwrite=False):
    """
    Reslice a 3d dataset. Input can be a folder of tif files or a hdf file.

    Parameters
    ----------
    input_ : str, array_like
        It can be a folder path to tif files or a hdf file.
    output : str
        It can be a folder path (for generated tif-files) or a hdf file-path.
    axis : {1,2}
        Slicing axis. This axis becomes the 0-axis of the output.
    key_path : str, optional
        Key path to the dataset if the input is a hdf file.
    rescaling : bool
        Rescale dataset if True.
    nbit : {8,16}
        If rescaling is True, select data-type: 8-bit or 16-bit.
    minmax : tuple of float, or None
        Minimum and maximum values used for rescaling if True.
    skip : int or None
        Skipping step of images used for getting statistical information if
        rescaling is True and input is 32-bit data.
    rotate : float
        Rotate image (degree). Positive direction is counterclockwise.
    chunk : int
        Number of images to be loaded/saved in one go to reduce IO overhead.
    mode : {'reflect', 'grid-mirror', 'constant', 'grid-constant', \
           'nearest', 'mirror', 'grid-wrap', 'wrap'}
        Select how the input array is extended beyond its boundaries.
    crop : tuple of int, optional
        Crop 3D data from the edges, i.e.
        crop = (crop_depth1, crop_depth2, crop_height1, crop_height2,
        crop_width1, crop_width2). Cropping is done before reslicing.
    ncore : int or None
        Number of cpu-cores. Automatically selected if None.
    show_progress : bool
        Show the progress of reslicing data if True.
    overwrite : bool
        Overwrite an existing file/folder if True.

    Returns
    -------
    array_like or None
        If output is None, returning a 3D array.
    """
    if output is None:
        raise ValueError("Wrong output type !!!")
    if axis != 1 and axis != 2:
        raise ValueError("Only two options for axis: 1 or 2")
    else:
        axis = int(axis)
    if not overwrite:
        __check_output(output)
    in_type = __get_input_type(input_)
    if in_type != "tif" and in_type != "hdf":
        raise ValueError("Wrong input type !!!")
    results = __save_intermediate_data(input_, output, axis, crop, key_path,
                                       rotate, chunk, mode, ncore,
                                       show_progress)
    file_tmp, key_tmp, folder_tmp = results
    with h5py.File(file_tmp, 'r') as hdf_object:
        data = hdf_object[key_tmp]
        (depth1, height1, width1) = data.shape
        chunk = np.clip(chunk, 1, height1 - 1)
        last_chunk = height1 - chunk * (height1 // chunk)
        data_type = data.dtype
        res_type = str(data_type)
        if rescaling is True:
            if nbit == 16:
                res_type = "uint16"
            elif nbit == 8:
                res_type = "uint8"
            else:
                raise ValueError("Only two options for nbit: 8 or 16 !!!")
            if str(data_type) != res_type:
                if data_type == np.uint8:
                    minmax = (0, 255)
                elif data_type == np.uint16:
                    minmax = (0, 65535)
                else:
                    if skip is None:
                        skip = min(20, int(0.02 * depth1))
                    skip = int(np.clip(skip, 1, depth1 - 1))
                    if minmax is None:
                        f_alias = get_statistical_information_dataset
                        minmax = f_alias(input_, percentile=(0, 100),
                                         skip=skip, crop=crop,
                                         key_path=key_path)[0:2]
            else:
                rescaling = False
        out_type = __get_output_type(output)
        t0 = timeit.default_timer()
        if out_type == "hdf":
            key_path = "entry/data" if key_path is None else key_path
            data_slice = losa.open_hdf_stream(output,
                                              (height1, depth1, width1),
                                              data_type=res_type,
                                              key_path=key_path,
                                              overwrite=True)
            for i in np.arange(0, height1 - last_chunk, chunk):
                if show_progress:
                    t1 = timeit.default_timer()
                    f_size = __get_dataset_size(output)
                    msg = "Save resliced data to file: {0:0.2f}MB." \
                          " Time: {1:0.2f}s".format(f_size, t1 - t0)
                    len_msg = len(msg)
                    sys.stdout.write(msg)
                    sys.stdout.flush()
                mat_chunk = data[:, i: i + chunk, :]
                if rescaling:
                    mat_tmp = []
                    for j in np.arange(chunk):
                        mat = rescale(mat_chunk[:, j, :], nbit, minmax)
                        mat_tmp.append(mat)
                    mat_tmp = np.asarray(mat_tmp)
                else:
                    mat_tmp = np.copy(np.moveaxis(mat_chunk, 1, 0))
                data_slice[i:i + chunk] = mat_tmp
                if show_progress:
                    sys.stdout.write("\r" + " " * len_msg + "\r")
            if last_chunk != 0:
                mat_chunk = data[:, height1 - last_chunk: height1, :]
                if rescaling:
                    mat_tmp = []
                    for j in np.arange(last_chunk):
                        mat = rescale(mat_chunk[:, j, :], nbit, minmax)
                        mat_tmp.append(mat)
                    mat_tmp = np.asarray(mat_tmp)
                else:
                    mat_tmp = np.copy(np.moveaxis(mat_chunk, 1, 0))
                data_slice[height1 - last_chunk: height1] = mat_tmp
        else:
            list_file, len_msg = None, None
            for i in np.arange(0, height1 - last_chunk, chunk):
                if show_progress:
                    t1 = timeit.default_timer()
                    list_file = glob.glob(output + "/*tif*")
                    if list_file:
                        f_size = __get_dataset_size(output)
                        msg = "Save resliced data to file: {0:0.2f}MB." \
                              " Time: {1:0.2f}s".format(f_size, t1 - t0)
                        len_msg = len(msg)
                        sys.stdout.write(msg)
                        sys.stdout.flush()
                mat_chunk = data[:, i: i + chunk, :]
                out_files = [output + "/img_" + ("0000" + str(
                    i + j))[-5:] + ".tif" for j in range(chunk)]
                if rescaling:
                    mat_chunk = rescale(mat_chunk, nbit, minmax)
                losa.save_image_multiple(out_files, mat_chunk.astype(res_type),
                                         axis=1, ncore=ncore, prefer="threads")
                if show_progress:
                    if list_file:
                        sys.stdout.write("\r" + " " * len_msg + "\r")
            if last_chunk != 0:
                idx = height1 - last_chunk
                mat_chunk = data[:, idx: height1, :]
                out_files = [output + "/img_" + ("0000" + str(
                    idx + j))[-5:] + ".tif" for j in range(last_chunk)]
                if rescaling:
                    mat_chunk = rescale(mat_chunk, nbit, minmax)
                losa.save_image_multiple(out_files, mat_chunk.astype(res_type),
                                         axis=1, ncore=ncore, prefer="threads")
    if os.path.isdir(folder_tmp):
        shutil.rmtree(folder_tmp)
        if out_type == "hdf":
            shutil.rmtree(os.path.splitext(output)[0])
    if show_progress:
        t1 = timeit.default_timer()
        f_size = __get_dataset_size(output)
        print("Finish reslicing data! File size: {0:0.2f}MB. Time: {1:0.2f}s"
              "".format(f_size, t1 - t0))


[docs]def remove_ring_based_fft(mat, u=20, n=8, v=1, sort=False):
    """
    Remove ring artifacts in the reconstructed image by combining the polar
    transform and the fft-based method.

    Parameters
    ----------
    mat : array_like
        Square array. Reconstructed image
    u : int
        Cutoff frequency.
    n : int
        Filter order.
    v : int
        Number of rows (* 2) to be applied the filter.
    sort : bool, optional
        Apply sorting (Ref. [2]) if True.

    Returns
    -------
    array_like
        Ring-removed image.

    References
    ----------
    [1] : https://doi.org/10.1063/1.1149043

    [2] : https://doi.org/10.1364/OE.26.028396
    """
    (nrow, ncol) = mat.shape
    if nrow != ncol:
        raise ValueError(
            "Width and height of the reconstructed image are not the same")
    mask = util.make_circle_mask(ncol, 1.0)
    (x_mat, y_mat) = util.rectangular_from_polar(ncol, ncol, ncol, ncol)
    (r_mat, theta_mat) = util.polar_from_rectangular(ncol, ncol, ncol, ncol)
    polar_mat = util.mapping(mat, x_mat, y_mat)
    polar_mat = remo.remove_stripe_based_fft(polar_mat, u, n, v, sort=sort)
    mat_rec = util.mapping(polar_mat, r_mat, theta_mat)
    return mat_rec * mask


[docs]def remove_ring_based_wavelet_fft(mat, level=5, size=1, wavelet_name="db9",
                                  sort=False):
    """
    Remove ring artifacts in a reconstructed image by combining the polar
    transform and the wavelet-fft-based method (Ref. [1]).

    Parameters
    ----------
    mat : array_like
        Square array. Reconstructed image
    level : int
        Wavelet decomposition level.
    size : int
        Damping parameter. Larger is stronger.
    wavelet_name : str
        Name of a wavelet. Search pywavelets API for a full list.
    sort : bool, optional
        Apply sorting (Ref. [2]) if True.

    Returns
    -------
    array_like
        Ring-removed image.

    References
    ----------
    [1] : https://doi.org/10.1364/OE.17.008567

    [2] : https://doi.org/10.1364/OE.26.028396
    """
    (nrow, ncol) = mat.shape
    if nrow != ncol:
        raise ValueError(
            "Width and height of the reconstructed image are not the same")
    mask = util.make_circle_mask(ncol, 1.0)
    (x_mat, y_mat) = util.rectangular_from_polar(ncol, ncol, ncol, ncol)
    (r_mat, theta_mat) = util.polar_from_rectangular(ncol, ncol, ncol, ncol)
    polar_mat = util.mapping(mat, x_mat, y_mat)
    polar_mat = remo.remove_stripe_based_wavelet_fft(polar_mat, level, size,
                                                     wavelet_name, sort=sort)
    mat_rec = util.mapping(polar_mat, r_mat, theta_mat)
    return mat_rec * mask