Source code for vr180_convert.transformer
from __future__ import annotations
import warnings
from abc import ABCMeta, abstractmethod
from typing import Any, Generic, Literal, Sequence, TypeVar
import attrs
import numpy as np
from numpy.typing import NDArray
from quaternion import quaternion, rotate_vectors
from sklearn.base import BaseEstimator, TransformerMixin
[docs]
class TransformerBase(
BaseEstimator,
TransformerMixin,
metaclass=ABCMeta,
):
"""Base class for transformers."""
# def fit(self, image: NDArray, **kwargs: Any) -> None:
# pass
[docs]
@abstractmethod
def transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
"""
Transform the input coordinates.
Parameters
----------
x : NDArray
x (left-right) coordinates.
y : NDArray
y (up-down) coordinates.
**kwargs : Any
Any additional keyword arguments.
Returns
-------
tuple[NDArray, NDArray]
x and y coordinates after transformation.
"""
pass
[docs]
@abstractmethod
def inverse_transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
"""
Inverse transform the input coordinates.
Parameters
----------
x : NDArray
x (left-right) coordinates.
y : NDArray
y (up-down) coordinates.
**kwargs : Any
Any additional keyword arguments.
Returns
-------
tuple[NDArray, NDArray]
x and y coordinates after transformation.
"""
def __mul__(self, other: TransformerBase) -> MultiTransformer:
"""Multiply two transformers together."""
if isinstance(self, MultiTransformer) and isinstance(other, MultiTransformer):
return MultiTransformer(
transformers=[*self.transformers, *other.transformers]
)
if isinstance(self, MultiTransformer):
return MultiTransformer(transformers=[*self.transformers, other])
if isinstance(other, MultiTransformer):
return MultiTransformer(transformers=[self, *other.transformers])
return MultiTransformer(transformers=[self, other])
T = TypeVar("T", bound=TransformerBase)
[docs]
@attrs.define()
class MultiTransformer(TransformerBase):
"""A transformer that applies multiple transformers in sequence."""
transformers: list[TransformerBase]
[docs]
def transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
for transformer in self.transformers:
x, y = transformer.transform(x, y, **kwargs)
return x, y
[docs]
def inverse_transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
for transformer in reversed(self.transformers):
x, y = transformer.inverse_transform(x, y, **kwargs)
return x, y
[docs]
def get_radius(input: NDArray, *, threshold: int = 10) -> float:
"""
Estimate the radius of the circle in the image.
Parameters
----------
input : NDArray
The input image.
threshold : int, optional
The threshold to determine if a pixel is black, by default 10
Returns
-------
float
The estimated radius.
"""
height, width = input.shape[:2]
if width > height:
center_row = input[height // 2, :, :]
else:
center_row = input[:, width // 2, :]
del height, width
# determine if a pixel is black
center_row_is_black = np.mean(center_row, axis=-1) < threshold
center_row_is_black_deriv = np.diff(center_row_is_black.astype(int))
# first and last 1 in the derivative
center_row_black_start = np.where(center_row_is_black_deriv == 1)[0][0]
center_row_black_end = np.where(center_row_is_black_deriv == -1)[0][-1]
radius = (center_row_black_end - center_row_black_start) / 2
return radius
[docs]
@attrs.define()
class NormalizeTransformer(TransformerBase):
"""Normalize the coordinates to [-1, 1]."""
center: tuple[float, float] | None = None
"""The center of the image. If None, the center is the center of the image."""
scale: tuple[float, float] | Literal["min", "max"] | None = None
"""The scale of the image. If "min" or None, the scale is the minimum of the width and height.
If "max", the scale is the maximum of the width and height."""
[docs]
def transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
center = self.center or (x.shape[1] / 2, x.shape[0] / 2)
scale = (
min(x.shape[1], x.shape[0])
if self.scale in ["min", None]
else max(x.shape[1], x.shape[0]) if self.scale == "max" else self.scale
)
x = (x - center[0]) / scale * 2
y = (y - center[1]) / scale * 2
return x, y
[docs]
def inverse_transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
center = self.center or (x.shape[1] / 2, x.shape[0] / 2)
scale = (
min(x.shape[1], x.shape[0])
if self.scale in ["min", None]
else max(x.shape[1], x.shape[0]) if self.scale == "max" else self.scale
)
x = x * scale[0] + center[0]
y = y * scale[1] + center[1]
return x, y
# @attrs.define()
# class AutoDetectRadiusNormalizeTransformer(NormalizeTransformer):
# def fit(self, image: NDArray, **kwargs: Any) -> None:
# radius = get_radius(image)
# self.center = (image.shape[1] // 2, image.shape[0] // 2)
# self.scale = (radius, radius)
[docs]
@attrs.define()
class DenormalizeTransformer(TransformerBase):
"""Denormalize the coordinates from [-1, 1] to the original image size."""
scale: tuple[float, float]
"""The scale of the image. Recommended to be the half of the width and height of the result image."""
center: tuple[float, float]
"""The center of the image. Recommended to be the center of the result image."""
[docs]
def transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
scale = self.scale
center = self.center
x = x * scale[0] + center[0]
y = y * scale[1] + center[1]
return x, y
[docs]
def inverse_transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
scale = self.scale
center = self.center
x = (x - center[0]) / scale[0]
y = (y - center[1]) / scale[1]
return x, y
[docs]
@attrs.define()
class PolarRollTransformer(TransformerBase):
"""Transform using polar coordinates."""
[docs]
@abstractmethod
def transform_polar(
self, theta: NDArray, roll: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
"""
Transform using polar coordinates.
Parameters
----------
theta : NDArray
The distance or angle from the center (front-facing direction)
roll : NDArray
The angle around the center (front-facing direction)
**kwargs : Any
Any additional keyword arguments.
Returns
-------
tuple[NDArray, NDArray]
theta and roll after transformation.
"""
pass
[docs]
@abstractmethod
def inverse_transform_polar(
self, theta: NDArray, roll: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
"""
Inverse transform using polar coordinates.
Parameters
----------
theta : NDArray
The distance or angle from the center (front-facing direction)
roll : NDArray
The angle around the center (front-facing direction)
**kwargs : Any
Any additional keyword arguments.
Returns
-------
tuple[NDArray, NDArray]
theta and roll after transformation.
"""
pass
[docs]
def transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
theta = np.sqrt(x**2 + y**2)
roll = np.arctan2(y, x)
theta, roll = self.transform_polar(theta, roll, **kwargs)
x = theta * np.cos(roll)
y = theta * np.sin(roll)
return x, y
[docs]
def inverse_transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
theta = np.sqrt(x**2 + y**2)
roll = np.arctan2(y, x)
theta, roll = self.inverse_transform_polar(theta, roll, **kwargs)
x = theta * np.cos(roll)
y = theta * np.sin(roll)
return x, y
[docs]
@attrs.define()
class RectilinearDecoder(PolarRollTransformer):
"""Encodes rectilinear image."""
# https://en.wikipedia.org/wiki/Image_sensor_format
focal_length: float
"""The focal length of the lens in mm."""
sensor_width: (
Literal["35mm", "APS-H", "APS-C", "APS-C-Canon", "Foveon", "MFT"] | str | float
) = "35mm"
"""The sensor width of the camera in mm if float,
or in inches if str, or a standard sensor width if str."""
@property
def sensor_width_mm(self) -> float:
"""Sensor width in mm."""
if self.sensor_width in ["35mm", "APS-C", "1/2.3"]:
warnings.warn(
"Sensor size may vary by about 0.2 mm depending on the camera model. "
"To get very accurate results, consider setting the sensor width in mm manually.",
UserWarning,
stacklevel=2,
)
# https://en.wikipedia.org/wiki/Image_sensor_format#Table_of_sensor_formats_and_sizes
known_widths = {
"35mm": 36.0, # ~ 35.8mm
"APS-H": 27.90,
"APS-C": 23.6, # ~ 23.7mm
"APS-C-Canon": 22.30,
"MFT": 17.30,
"1": 13.20,
"1/1.12": 11.43,
"1/1.2": 10.67,
"1/1.33": 9.6,
"1/1.6": 8.08,
"1/1.7": 7.60,
"1/1.8": 7.18,
"1/2": 6.40,
"1/2.3": 6.17,
}
if isinstance(self.sensor_width, str):
return known_widths[self.sensor_width]
return self.sensor_width
@property
def factor(self) -> float:
"""Zoom factor applied after tan."""
return 2 * self.focal_length / self.sensor_width_mm
[docs]
def transform_polar(
self, theta: NDArray, roll: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
return np.tan(theta) * self.factor, roll
[docs]
def inverse_transform_polar(
self, theta: NDArray, roll: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
# fov = 2 arctan sensor_width / (2 * focal_length)
return np.arctan(theta / self.factor), roll
[docs]
@attrs.define()
class FisheyeEncoder(PolarRollTransformer):
"""Encodes fisheye image."""
mapping_type: Literal[
"rectilinear", "stereographic", "equidistant", "equisolid", "orthographic"
]
"""The mapping type of the fisheye image."""
[docs]
def transform_polar(
self, theta: NDArray, roll: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
"""[-1, 1] -> [-pi/2, pi/2]."""
if self.mapping_type == "rectilinear":
return np.arctan(theta), roll
elif self.mapping_type == "stereographic":
return 2 * np.arctan(theta), roll
elif self.mapping_type == "equidistant":
return theta * (np.pi / 2), roll
elif self.mapping_type == "equisolid":
return 2 * np.arcsin(theta / np.sqrt(2)), roll
elif self.mapping_type == "orthographic":
return np.arcsin(theta), roll
else:
raise ValueError(
f"Unknown mapping type: {self.mapping_type}, "
"should be one of 'rectilinear', 'stereographic', 'equidistant', 'equisolid', 'orthographic'."
)
[docs]
def inverse_transform_polar(
self, theta: NDArray, roll: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
"""[-pi/2, pi/2] -> [-1, 1]."""
if self.mapping_type == "rectilinear":
return np.tan(theta), roll
elif self.mapping_type == "stereographic":
return 2 * np.tan(theta / 2), roll
elif self.mapping_type == "equidistant":
return theta / (np.pi / 2), roll
elif self.mapping_type == "equisolid":
return np.sqrt(2) * np.sin(theta / 2), roll
elif self.mapping_type == "orthographic":
return np.sin(theta), roll
else:
raise ValueError(
f"Unknown mapping type: {self.mapping_type}, "
"should be one of 'rectilinear', 'stereographic', 'equidistant', 'equisolid', 'orthographic'."
)
[docs]
@attrs.define()
class InverseTransformer(TransformerBase, Generic[T]):
"""Transformer that calls inverse_transform() in transform() and vice versa."""
transformer: T
"""The transformer to be inverted."""
[docs]
def transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
return self.transformer.inverse_transform(x, y, **kwargs)
[docs]
def inverse_transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
return self.transformer.transform(x, y, **kwargs)
[docs]
def FisheyeDecoder(
mapping_type: Literal[
"rectilinear", "stereographic", "equidistant", "equisolid", "orthographic"
]
) -> InverseTransformer[FisheyeEncoder]:
"""
Decodes fisheye image.
Parameters
----------
mapping_type : Literal['rectilinear', 'stereographic', 'equidistant', 'equisolid', 'orthographic']
The mapping type of the fisheye image.
Returns
-------
InverseTransformer
The fisheye decoder.
"""
return InverseTransformer(FisheyeEncoder(mapping_type))
[docs]
@attrs.define()
class PolynomialScaler(PolarRollTransformer):
"""Scale the polar coordinates using polynomial."""
coefs_reverse: Sequence[float] = [0, 1]
"""The coefficients of the polynomial in reverse order.
[0, 1] means y = 0 + 1 * x."""
[docs]
def transform_polar(
self, theta: NDArray, roll: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
return np.polyval(np.flip(self.coefs_reverse), theta), roll
[docs]
def inverse_transform_polar(
self, theta: NDArray, roll: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
raise NotImplementedError(
"PolynomialScaler does not support inverse transform."
)
[docs]
@attrs.define()
class ZoomTransformer(TransformerBase):
"""Zoom the image."""
scale: float
"""The zoom scale."""
[docs]
def transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
x = x / self.scale
y = y / self.scale
return x, y
[docs]
def inverse_transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
x = x * self.scale
y = y * self.scale
return x, y
[docs]
def equidistant_to_3d(x: NDArray, y: NDArray) -> NDArray:
"""
Convert 2D coordinates to 3D unit vector.
z axis is forward, x axis is right, y axis is up.
Parameters
----------
x : NDArray
The x coordinate in equidistant fisheye format.
y : NDArray
The y coordinate in equidistant fisheye format.
Returns
-------
NDArray
The 3D unit vector.
"""
phi = np.arctan2(x, y)
theta = np.sqrt(x**2 + y**2)
v = np.stack(
[np.sin(theta) * np.sin(phi), np.sin(theta) * np.cos(phi), np.cos(theta)],
axis=-1,
)
return v
[docs]
def equidistant_from_3d(v: NDArray) -> tuple[NDArray, NDArray]:
"""
Convert 3D unit vector to 2D coordinates.
Parameters
----------
v : NDArray
The 3D unit vector.
Returns
-------
tuple[NDArray, NDArray]
The x and y coordinates in equidistant fisheye format.
"""
theta = np.arccos(v[..., 2])
phi = np.arctan2(v[..., 0], v[..., 1])
x = theta * np.sin(phi)
y = theta * np.cos(phi)
return x, y
[docs]
@attrs.define()
class EquirectangularEncoder(TransformerBase):
"""Encodes equirectangular image."""
is_latitude_y: bool = True
"""Whether latitude is encoded in y axis."""
[docs]
def transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
# latitude: 日本語で緯度, phi
# longitude: 日本語で経度, theta
if self.is_latitude_y:
theta_lat = y * (np.pi / 2)
phi_lon = x * (np.pi / 2)
v = np.stack(
[
np.cos(theta_lat) * np.sin(phi_lon),
np.sin(theta_lat),
np.cos(theta_lat) * np.cos(phi_lon),
],
axis=-1,
)
else:
theta_lat = x * (np.pi / 2)
phi_lon = y * (np.pi / 2)
v = np.stack(
[
np.sin(theta_lat),
np.cos(theta_lat) * np.sin(phi_lon),
np.cos(theta_lat) * np.cos(phi_lon),
],
axis=-1,
)
return equidistant_from_3d(v)
[docs]
def inverse_transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
v = equidistant_to_3d(x, y)
if self.is_latitude_y:
theta_lat = np.arcsin(v[..., 1])
phi_lon = np.arctan2(v[..., 0], v[..., 2])
x = phi_lon / (np.pi / 2)
y = theta_lat / (np.pi / 2)
else:
theta_lat = np.arcsin(v[..., 0])
phi_lon = np.arctan2(v[..., 1], v[..., 2])
x = theta_lat / (np.pi / 2)
y = phi_lon / (np.pi / 2)
return x, y
[docs]
def EquirectangularDecoder(
is_latitude_y: bool = True,
) -> InverseTransformer[EquirectangularEncoder]:
"""
Decodes equirectangular image.
Parameters
----------
is_latitude_y : bool, optional
Whether latitude is encoded in y axis, by default True
Returns
-------
InverseTransformer[EquirectangularEncoder]
The equirectangular decoder.
"""
return InverseTransformer(EquirectangularEncoder(is_latitude_y))
[docs]
@attrs.define()
class Euclidean3DTransformer(TransformerBase):
"""Transform as 3D unit vector."""
[docs]
@abstractmethod
def transform_v(self, v: NDArray) -> NDArray:
"""
Transform 3D unit vector.
Parameters
----------
v : NDArray
The 3D unit vector.
z axis is forward, x axis is right, y axis is up.
Returns
-------
NDArray
The transformed 3D unit vector.
z axis is forward, x axis is right, y axis is up.
"""
pass
[docs]
@abstractmethod
def inverse_transform_v(self, v: NDArray) -> NDArray:
"""
Inverse transform 3D unit vector.
Parameters
----------
v : NDArray
The 3D unit vector.
z axis is forward, x axis is right, y axis is up.
Returns
-------
NDArray
The inverse transformed 3D unit vector.
z axis is forward, x axis is right, y axis is up.
"""
pass
[docs]
def transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
v = equidistant_to_3d(x, y)
v = self.transform_v(v)
x, y = equidistant_from_3d(v)
return x, y
[docs]
def inverse_transform(
self, x: NDArray, y: NDArray, **kwargs: Any
) -> tuple[NDArray, NDArray]:
v = equidistant_to_3d(x, y)
v = self.transform_v(v)
x, y = equidistant_from_3d(v)
return x, y