Source code for data.clean_dataset

import os
import numpy as np

from import imread
from skimage.color import rgb2gray
from skimage.util import img_as_float32
from import module_logger
from import corrupt
from import gaussian_noise
from import AbstractDatasetGenerator

[docs]class CleanDatasetGenerator(AbstractDatasetGenerator): """Dataset generator based on Keras library. This class is used for non-blind denoising problems where only clean images are available. To use such dataset to train denoising networks, you need to specify a type of artificial noise that will be added to each clean image. Attributes ---------- path : str String containing the path to image files directory. batch_size : int Size of image batch. n_channels : int 1 for grayscale, 3 for RGB. shuffle : bool Whether to shuffle the dataset at each epoch or not. channels_first : bool Whether data is formatted as (BatchSize, Height, Width, Channels) or (BatchSize, Channels, Height, Width). name : str String containing the dataset's name. preprocessing : list List of preprocessing functions, which will be applied to each image. noise_config : dict Dictionary whose keys are functions implementing the noise process, and the value is a list containing the noise function parameters. If you do not want to specify any parameters, your list should be empty. Examples -------- The following example corresponds to a Dataset Generator which reads images from "./images", yields batches of length 32, applies Gaussian noise with intensity drawn uniformely from the range [0, 55] followed by "Super Resolution noise" of intensity 4. Moreover, the dataset shuffles the data, yields them in NHWC format, and does not apply any preprocessing function. **NOTE**: your list should be in the same order as your arguments. >>> from OpenDenoising import data >>> noise_config = {data.utils.gaussian_blind_noise: [0, 55], ... data.utils.super_resolution_noise: [4]} >>> datagen = data.CleanDatasetGenerator("./images", 32, noise_config, True, False, "MyData", 1, None) """
[docs] def __init__(self, path, batch_size=32, noise_config=None, shuffle=True, name="CleanDataset", n_channels=1, preprocessing=None): super().__init__(path, batch_size, shuffle, name, n_channels) if noise_config is None: noise_config = { gaussian_noise: [25] } self.noise_functions = noise_config.keys() self.noise_args = [noise_config[noise_type] for noise_type in noise_config] self.preprocessing = [] if preprocessing is None else preprocessing self.n_channels = n_channels self.filenames = np.array(os.listdir(os.path.join(self.path, "ref"))) self.on_epoch_end()"Generating data from {}".format(os.path.join(self.path, 'ref',))) self.image_shape = self[0][0].shape module_logger.debug("[{}] Image shape: {}".format(, self.image_shape))
[docs] def __getitem__(self, i): """Generates batches of data.""" # Get batch_filenames batch_filenames = self.filenames[i * self.batch_size: (i + 1) * self.batch_size] module_logger.debug("[{}] Got following batch names: {}".format(self, batch_filenames)) # Get data batches inp, ref = self.__data_generation(batch_filenames) return inp, ref
def __data_generation(self, batch_filenames): """Data generation method Parameters ---------- batch_filenames : list List of strings containing filenames to read. Note that, for each noisy image filename there must be a clean image with same filename. Returns ------- noisy_batch : :class:`numpy.ndarray` Batch of noisy images. clean_batch : :class:`numpy.ndarray` Batch of reference images. """ # Noised image and ground truth initialization inp_batch = [] ref_batch = [] for filename in batch_filenames: filepath = os.path.join(self.path, 'ref', filename) ref = imread(filepath) ref = img_as_float32(ref) if ref.ndim == 3 and ref.shape[-1] == 3 and self.n_channels == 1: # Converts RGB to Gray ref = rgb2gray(ref) if ref.ndim == 2 and self.n_channels == 1: # Expand last dim if image is grayscale ref = np.expand_dims(ref, axis=-1) elif ref.ndim == 2 and self.n_channels == 3: raise ValueError("Expected RGB image but got Grayscale (image shape: {})".format(ref.shape)) inp = ref.copy() for noise_function, noise_arg in zip(self.noise_functions, self.noise_args): # Adds noise to the reference. inp = noise_function(inp, *noise_arg) # Applies preprocessing functions in order for func in self.preprocessing: inp, ref = func(inp, ref) ref_batch.append(ref) inp_batch.append(inp) inp_batch = np.stack(inp_batch) ref_batch = np.stack(ref_batch) if len(inp_batch.shape) > 4: inp_batch = inp_batch.reshape([-1, *inp_batch.shape[2:]]) ref_batch = ref_batch.reshape([-1, *ref_batch.shape[2:]]) return inp_batch, ref_batch def __next__(self): while True: for input_batch, output_batch in self: return input_batch, output_batch
[docs] def __str__(self): return
[docs] def __repr__(self): return "Dataset name: {}, Dataset type: {}, Path: {}, " \ "Batch Size: {}, preprocessing: {}, shape: {}".format(self, "Clean", self.path, self.batch_size, self.preprocessing, self.image_shape)