from typing import Tuple
from collections.abc import Callable
import cv2
import numpy as np
from ..deferred import deferred_execution
[docs]
@deferred_execution
def padding_2d(data: np.ndarray, target_shape: Tuple[int, int], fill_value: float = 1.0) -> np.ndarray:
"""
Pads a 2D (or 3D) array to the target shape with the specified fill value.
Args:
data (np.ndarray): The input 2D (or 3D) array representing an image.
target_shape (tuple): The desired shape of the output array (height, width).
fill_value (float, optional): The value used for padding. Defaults to 1.0.
Returns:
np.ndarray: The padded array with the target shape.
Raises:
ValueError: If the input data shape is larger than the target shape.
If the input data is not a 2D or 3D array.
Examples:
>>> data = np.array([[1, 2], [3, 4]])
>>> padding_2d(data, (4, 4), fill_value=0)
array([[0., 0., 0., 0.],
[0., 1., 2., 0.],
[0., 3., 4., 0.],
[0., 0., 0., 0.]])
"""
# Get data shape
shape = data.shape
# Assert data is an array representing an image
if len(shape) not in [2, 3]:
raise ValueError("Input data must be 2D or 3D array")
# Determine the final shape
if len(shape) == 3: # Case of a 3D image
target_shape = (*target_shape, shape[2])
# Ensure input data is smaller than target shape
if any(s > t for s, t in zip(shape, target_shape)):
raise ValueError("Data shape must be smaller than target shape to add padding"
f"target : {target_shape} ; data shape : {shape}")
# Create an array with the fill value
padded_data = np.full(target_shape, fill_value, dtype=data.dtype)
# Find padding lengths (for centering)
l_pad = (target_shape[0] - shape[0]) // 2
t_pad = (target_shape[1] - shape[1]) // 2
# Fill the array with the original image, centering it
if len(shape) == 2:
padded_data[l_pad:l_pad+shape[0], t_pad:t_pad+shape[1]] = data
else:
padded_data[l_pad:l_pad+shape[0], t_pad:t_pad+shape[1], :] = data
return padded_data
[docs]
@deferred_execution
def resize_with_max_distortion(data: np.ndarray,
target_shape: Tuple[int, int],
max_ratio_distortion: float) -> np.ndarray:
"""
Resizes the input 2D or 3D array (image) to the target shape with a constraint on maximum allowable distortion.
This function resizes an image (or any 2D/3D array) to a specified target shape while controlling the amount
of distortion (change in aspect ratio) allowed during the resizing process. If the distortion exceeds the
specified `max_ratio_distortion`, the function adjusts the stretch ratios accordingly to minimize distortion.
Args:
data (np.ndarray): The input 2D or 3D array to be resized. Typically, this represents an image.
target_shape (Tuple[int, int]): The desired target shape (height, width) for the output array.
max_ratio_distortion (float): The maximum allowable difference between the horizontal and vertical
stretch ratios. This controls how much the aspect ratio can change during
resizing. 0 as max distortion ensures aspect ratio is kept.
Returns:
np.ndarray: The resized array that fits within the specified target shape.
Raises:
ValueError: If the input data is not a 2D or 3D array.
"""
# Validate input dimensions
if len(data.shape) not in [2, 3]:
raise ValueError("Input data is not a 2D or 3D array")
# Get original dimensions
height, width = data.shape[:2]
target_height, target_width = target_shape
# Calculate aspect ratios
original_aspect_ratio = width / height
target_aspect_ratio = target_width / target_height
# Calculate allowable aspect ratio range
allowed_aspect_ratio_min = original_aspect_ratio * (1 - max_ratio_distortion)
allowed_aspect_ratio_max = original_aspect_ratio * (1 + max_ratio_distortion)
# Adjust target aspect ratio to be within allowable range
adjusted_aspect_ratio = min(max(target_aspect_ratio, allowed_aspect_ratio_min), allowed_aspect_ratio_max)
# Determine new dimensions based on adjusted aspect ratio and ensure they are within bounds
if adjusted_aspect_ratio > target_aspect_ratio:
# Adjust height to fit within the target dimensions, width follows
new_height = target_height
new_width = int(new_height * adjusted_aspect_ratio)
if new_width > target_width:
new_width = target_width
new_height = int(new_width / adjusted_aspect_ratio)
else:
# Adjust width to fit within the target dimensions, height follows
new_width = target_width
new_height = int(new_width / adjusted_aspect_ratio)
if new_height > target_height:
new_height = target_height
new_width = int(new_height * adjusted_aspect_ratio)
# Ensure new dimensions are integers
new_height = min(new_height, target_height)
new_width = min(new_width, target_width)
# Resize the image
resized_image = cv2.resize(data, (new_width, new_height))
return resized_image
[docs]
@deferred_execution
def open_rgb_image(path: str) -> np.ndarray:
"""Open an image using cv2 and convert back to RGB.
Args:
path (str): path of the image
Returns:
np.ndarray: array representing the image
Examples:
>>> img = open_rgb_image('path/to/image.jpg')
>>> img.shape
(height, width, 3)
"""
img = cv2.imread(path)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_array = np.array(img_rgb)
return img_array
[docs]
@deferred_execution
def image_to_channel_num(image: np.ndarray,
channel_number_target: int = 3,
fill_value: float | int = 0.0) -> np.ndarray:
"""
Convert an image to the specified number of channels.
Args:
image (np.ndarray): Input image array, which can be grayscale (2D),
single-channel (3D), or multi-channel (3D).
channel_number_target (int, optional): Target number of channels. Defaults to 3.
fill_value (float | int, optional): Value used to fill new channels if needed. Defaults to 1.0.
Returns:
np.ndarray: Image array with the specified number of channels.
Examples:
>>> image = np.array([[1, 2], [3, 4]])
>>> image_to_channel_num(image, channel_number_target=3)
array([[[1, 1, 1],
[2, 2, 2]],
[[3, 3, 3],
[4, 4, 4]]])
"""
# if image is BW
if len(image.shape) == 2:
image_3d = np.repeat(image[:,:, np.newaxis], channel_number_target, -1)
# if image is a BW 3d with single channel
elif len(image.shape) == 3 and image.shape[2] == 1:
image_3d = np.repeat(image, channel_number_target, -1)
# If the image has fewer channels than the target
elif len(image.shape) == 3 and image.shape[2] < channel_number_target:
image_3d = np.full((*image.shape[:2], channel_number_target), fill_value, dtype=image.dtype)
image_3d[:,:,:image.shape[2]] = image
# image has more channels than the target => truncate
else:
image_3d = image[:,:,:channel_number_target]
return image_3d
[docs]
@deferred_execution
def image_hwc_to_chw(data: np.ndarray) -> np.ndarray:
"""
Converts an image from HWC (Height-Width-Channel) format to CHW (Channel-Height-Width) format.
Args:
data (np.ndarray): The input image array in HWC format.
The shape should be (height, width, channels).
Returns:
np.ndarray: The image array in CHW format.
The shape will be (channels, height, width).
"""
if len(data.shape) != 3:
raise ValueError("input data must be dim 3")
return np.transpose(data, [2, 0, 1])
[docs]
@deferred_execution
def image_chw_to_hwc(data: np.ndarray) -> np.ndarray:
"""
Converts an image from CHW (Channel-Height-Width) format to HWC (Height-Width-Channel) format.
Args:
data (np.ndarray): The input image array in CHW format.
The shape should be (channels, height, width).
Returns:
np.ndarray: The image array in HWC format.
The shape will be (height, width, channels).
"""
if len(data.shape) != 3:
raise ValueError("input data must be dim 3")
return np.transpose(data, [1, 2, 0])
[docs]
def _reshape_array_for_pooling(data: np.ndarray, strides: int) -> np.ndarray:
"""Reshape the input data for pooling.
This function prepares a 2D array for pooling operations by reshaping the data
into smaller blocks based on the given stride.
Args:
data (np.ndarray): The input array representing the image or data to be pooled.
strides (int): The stride size that determines the size of the blocks used for pooling.
Returns:
np.ndarray: A reshaped array where the input data has been divided into blocks
of shape (mh, strides, mw, strides, -1), where mh and mw are the
dimensions after pooling.
"""
h, w = data.shape[:2]
mh = h // strides
mw = w // strides
return data[:mh*strides, :mw*strides].reshape(mh, strides, mw, strides, -1)
[docs]
@deferred_execution
def max_pooling_2d(data: np.ndarray, strides: int = 2) -> np.ndarray:
"""Apply 2D max pooling to the input data.
This function applies max pooling to the input 2D array, reducing its size by selecting
the maximum value from each block of data, based on the specified stride.
Args:
data (np.ndarray): The input array representing the image or data to be pooled.
strides (int, optional): The stride size that determines the size of the blocks
used for pooling. Defaults to 2.
Returns:
np.ndarray: A 2D array where max pooling has been applied, reducing the size
of the input array based on the stride.
"""
return _reshape_array_for_pooling(data, strides).max(axis=(1,3))
[docs]
@deferred_execution
def avg_pooling_2d(data: np.ndarray, strides: int = 2) -> np.ndarray:
"""Apply 2D average pooling to the input data.
This function applies average pooling to the input 2D array, reducing its size by calculating
the mean value from each block of data, based on the specified stride.
Args:
data (np.ndarray): The input array representing the image or data to be pooled.
strides (int, optional): The stride size that determines the size of the blocks
used for pooling. Defaults to 2.
Returns:
np.ndarray: A 2D array where average pooling has been applied, reducing the size
of the input array based on the stride.
"""
return _reshape_array_for_pooling(data, strides).mean(axis=(1,3))
[docs]
@deferred_execution
def any_pooling_2d(
data: np.ndarray,
strides: int = 2, *,
pooling_function: Callable,
axis_kw: str = "axis") -> np.ndarray:
"""Apply a custom pooling operation to the input data.
This function allows for flexible pooling operations by accepting a custom pooling
function. It reshapes the input data into blocks based on the specified stride,
and then applies the given pooling function to the blocks.
Args:
data (np.ndarray): The input array representing the image or data to be pooled.
strides (int, optional): The stride size that determines the size of the blocks
used for pooling. Defaults to 2.
pooling_function (Callable): A function that takes the reshaped blocks of data
and applies the desired pooling operation (e.g., max pooling,
average pooling).
axis_kw (str, optional): The keyword name for specifying the axis along which
pooling should be applied in the custom pooling function.
Defaults to "axis".
Returns:
np.ndarray: A 2D array where the custom pooling function has been applied,
reducing the size of the input array based on the stride.
"""
pooled = _reshape_array_for_pooling(data, strides)
kw = {axis_kw: (1,3)}
return pooling_function(pooled, **kw)