#!/usr/bin/env python3
#
# Copyright 2022 Graviti. Licensed under MIT License.
#
# pylint: disable=c-extension-no-member
"""The Portex builtin types."""
from sys import version_info
from typing import Iterable, Mapping, Optional, Tuple, Type, TypeVar, Union
import pyarrow as pa
from graviti.portex import ptype as PTYPE
from graviti.portex.base import PortexRecordBase, PortexType
from graviti.portex.enum import EnumValues
from graviti.portex.factory import ConnectedFieldsFactory
from graviti.portex.field import Fields
from graviti.portex.package import packages
from graviti.portex.param import Param, Params, param
from graviti.portex.register import PyArrowConversionRegister
from graviti.utility import ModuleMocker
if version_info >= (3, 9):
# pylint: disable=import-error
from zoneinfo import ZoneInfo as tz_checker
else:
try:
# pylint: disable=import-error
from pytz import timezone as tz_checker # type: ignore[import]
except ModuleNotFoundError:
[docs] tz_checker = ModuleMocker(
"'pytz' package or 'zoneinfo' module (builtin module after python 3.9) is needed "
"to support timezone"
)
[docs]builtins = packages.builtins
_E = Union[int, float, str, bool, None]
[docs]class PortexBuiltinType(PortexType): # pylint: disable=abstract-method
"""The base class of Portex builtin type."""
_T = TypeVar("_T", bound="PortexBuiltinType")
params = Params()
packages = builtins
def __init_subclass__(cls) -> None:
params = Params(cls.params)
for name in getattr(cls, "__annotations__", {}):
parameter = getattr(cls, name, None)
if isinstance(parameter, tuple):
params.add(Param(name, *parameter))
delattr(cls, name)
cls.params = params
builtins[cls.__name__] = cls
def __init__(self, nullable: bool = False) -> None:
self.nullable = PTYPE.Boolean.check(nullable)
@classmethod
def _from_pyarrow(cls: Type[_T], paarray: pa.Array) -> _T:
return cls()
[docs] def to_builtin(self: _T) -> _T:
"""Expand the top level type to Portex builtin type.
Returns:
The expanded Portex builtin type.
"""
return self
@PyArrowConversionRegister(pa.lib.Type_STRING)
[docs]class string(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex primitive type ``string``.
Arguments:
nullable: Whether it is a nullable type.
Examples:
>>> t = string()
>>> t
string()
"""
nullable: bool = param(False, ptype=PTYPE.Boolean)
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
return pa.string()
@PyArrowConversionRegister(pa.lib.Type_BINARY)
[docs]class binary(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex primitive type ``binary``.
Arguments:
nullable: Whether it is a nullable type.
Examples:
>>> t = binary()
>>> t
binary()
"""
nullable: bool = param(False, ptype=PTYPE.Boolean)
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
return pa.binary()
@PyArrowConversionRegister(pa.lib.Type_BOOL)
[docs]class boolean(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex primitive type ``boolean``.
Arguments:
nullable: Whether it is a nullable type.
Examples:
>>> t = boolean()
>>> t
boolean()
"""
nullable: bool = param(False, ptype=PTYPE.Boolean)
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
return pa.bool_()
@PyArrowConversionRegister(pa.lib.Type_INT32)
[docs]class int32(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex primitive type ``int32``.
Arguments:
nullable: Whether it is a nullable type.
Examples:
>>> t = int32()
>>> t
int32()
"""
nullable: bool = param(False, ptype=PTYPE.Boolean)
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
return pa.int32()
@PyArrowConversionRegister(pa.lib.Type_INT64)
[docs]class int64(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex primitive type ``int64``.
Arguments:
nullable: Whether it is a nullable type.
Examples:
>>> t = int64()
>>> t
int64()
"""
nullable: bool = param(False, ptype=PTYPE.Boolean)
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
return pa.int64()
@PyArrowConversionRegister(pa.lib.Type_FLOAT)
[docs]class float32(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex primitive type ``float32``.
Arguments:
nullable: Whether it is a nullable type.
Examples:
>>> t = float32()
>>> t
float32()
"""
nullable: bool = param(False, ptype=PTYPE.Boolean)
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
return pa.float32()
@PyArrowConversionRegister(pa.lib.Type_DOUBLE)
[docs]class float64(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex primitive type ``float64``.
Arguments:
nullable: Whether it is a nullable type.
Examples:
>>> t = float64()
>>> t
float64()
"""
nullable: bool = param(False, ptype=PTYPE.Boolean)
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
return pa.float64()
@PyArrowConversionRegister(pa.lib.Type_STRUCT)
[docs]class record(PortexBuiltinType, PortexRecordBase): # pylint: disable=invalid-name
"""Portex complex type ``record``.
Arguments:
fields: The fields of the record.
nullable: Whether it is a nullable type.
Examples:
Create a record by dict:
>>> t = record({"f0": int32(), "f1": float32()})
>>> t
record(
fields={
'f0': int32(),
'f1': float32(),
},
)
Create a record by tuple list:
>>> t = record([("f0", string()), ("f1", enum(["v0", "v1"]))])
>>> t
record(
fields={
'f0': string(),
'f1': enum(
values=['v0', 'v1'],
),
},
)
"""
_T = TypeVar("_T", bound="record")
_fields_factory = ConnectedFieldsFactory.from_parameter_name("fields")
fields: Fields = param(ptype=PTYPE.Fields)
nullable: bool = param(False, ptype=PTYPE.Boolean)
def __init__(
self,
fields: Union[Iterable[Tuple[str, PortexType]], Mapping[str, PortexType]],
nullable: bool = False,
) -> None:
self.fields = PTYPE.Fields.check(fields)
super().__init__(nullable=nullable)
@classmethod
def _from_pyarrow(cls: Type[_T], paarray: pa.StructArray) -> _T:
return cls(
(field.name, cls.from_pyarrow(paarray.field(field.name))) for field in paarray.type
)
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow struct DataType.
"""
return pa.struct(self.fields.to_pyarrow(_to_backend=_to_backend))
@PyArrowConversionRegister(pa.lib.Type_DICTIONARY)
[docs]class enum(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex complex type ``enum``.
Arguments:
values: The values of the enum members.
nullable: Whether it is a nullable type.
Examples:
>>> t = enum(["v0", "v1"])
>>> t
enum(
values=['v0', 'v1'],
)
"""
_T = TypeVar("_T", bound="enum")
values: EnumValues = param(ptype=PTYPE.Enum)
nullable: bool = param(False, ptype=PTYPE.Boolean)
def __init__(self, values: Iterable[_E], nullable: bool = False) -> None:
self.values = PTYPE.Enum.check(values)
super().__init__(nullable=nullable)
@classmethod
def _from_pyarrow(cls: Type[_T], paarray: pa.DictionaryArray) -> _T:
return cls(paarray.dictionary.to_pylist())
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
min_index, max_index = self.values.index_scope
if min_index >= -128 and max_index <= 127:
index_type = pa.int8()
elif min_index >= -32768 and max_index <= 32767:
index_type = pa.int16()
elif min_index >= -2147483648 and max_index <= 2147483647:
index_type = pa.int32()
else:
index_type = pa.int64()
if _to_backend:
return index_type
return pa.dictionary(index_type, self.values.to_pyarrow().type)
@PyArrowConversionRegister(pa.lib.Type_LIST, pa.lib.Type_FIXED_SIZE_LIST)
[docs]class array(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex complex type ``array``.
Arguments:
items: The item type of the array.
length: The length of the array.
nullable: Whether it is a nullable type.
Examples:
>>> t = array(int32(0), 100)
>>> t
array(
items=int32(
minimum=0,
),
length=100,
)
"""
_T = TypeVar("_T", bound="array")
items: PortexType = param(ptype=PTYPE.PortexType)
length: Optional[int] = param(None, ptype=PTYPE.Integer)
nullable: bool = param(False, ptype=PTYPE.Boolean)
def __init__(
self, items: PortexType, length: Optional[int] = None, nullable: bool = False
) -> None:
self.items = PTYPE.PortexType.check(items)
self.length = PTYPE.Integer.check(length) if length is not None else None
super().__init__(nullable=nullable)
@classmethod
def _from_pyarrow(cls: Type[_T], paarray: Union[pa.ListArray, pa.FixedSizeListArray]) -> _T:
patype = paarray.type
return cls(
cls.from_pyarrow(paarray.values),
getattr(patype, "list_size", None),
)
def _get_column_count(self) -> int:
"""Get the total column count of the portex type.
Returns:
The total column count.
"""
return self.items._get_column_count() # pylint: disable=protected-access
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
list_size = self.length if self.length else -1
return pa.list_(self.items.to_pyarrow(_to_backend=_to_backend), list_size)
@PyArrowConversionRegister(pa.lib.Type_DATE32)
[docs]class date(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex temporal type ``date``.
Arguments:
nullable: Whether it is a nullable type.
Examples:
>>> t = date()
>>> t
date()
"""
_T = TypeVar("_T", bound="date")
nullable: bool = param(False, ptype=PTYPE.Boolean)
def __init__(self, nullable: bool = False) -> None:
super().__init__(nullable=nullable)
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
return pa.date32()
@PyArrowConversionRegister(pa.lib.Type_TIME32, pa.lib.Type_TIME64)
[docs]class time(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex temporal type ``time``.
Arguments:
unit: The unit of the time, supports 's', 'ms', 'us' and 'ns'.
nullable: Whether it is a nullable type.
Examples:
>>> t = time("ms")
>>> t
times(
unit='ms',
)
"""
_T = TypeVar("_T", bound="time")
_UNIT_TO_TYPE = {
"s": pa.time32("s"),
"ms": pa.time32("ms"),
"us": pa.time64("us"),
"ns": pa.time64("ns"),
}
_TYPE_TO_UNIT = {value: key for key, value in _UNIT_TO_TYPE.items()}
unit: str = param(ptype=PTYPE.String, options=["s", "ms", "us", "ns"])
nullable: bool = param(False, ptype=PTYPE.Boolean)
def __init__(self, unit: str, nullable: bool = False) -> None:
self.unit = self.params["unit"].check(unit)
super().__init__(nullable=nullable)
@classmethod
def _from_pyarrow(cls: Type[_T], paarray: Union[pa.Time32Array, pa.Time64Array]) -> _T:
return cls(cls._TYPE_TO_UNIT[paarray.type])
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
return self._UNIT_TO_TYPE[self.unit]
@PyArrowConversionRegister(pa.lib.Type_TIMESTAMP)
[docs]class timestamp(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex temporal type ``timestamp``.
Arguments:
unit: The unit of the timestamp, supports 's', 'ms', 'us' and 'ns'.
tz: The name of the timezone, ``None`` indicates the timestamp is naive.
nullable: Whether it is a nullable type.
Examples:
>>> t = timestamp("ms")
>>> t
timestamp(
unit='ms',
)
>>>
>>> t = timestamp("us", tz="UTC")
>>> t
timestamp(
unit='ms',
tz='UTC',
)
"""
_T = TypeVar("_T", bound="timestamp")
unit: str = param(ptype=PTYPE.String, options=["s", "ms", "us", "ns"])
tz: Optional[str] = param(None, ptype=PTYPE.String)
nullable: bool = param(False, ptype=PTYPE.Boolean)
def __init__(self, unit: str, tz: Optional[str] = None, nullable: bool = False) -> None:
self.unit = self.params["unit"].check(unit)
_tz = PTYPE.String.check(tz) if tz is not None else None
if _tz is not None:
tz_checker(_tz)
self.tz = _tz
super().__init__(nullable=nullable)
@classmethod
def _from_pyarrow(cls: Type[_T], paarray: pa.TimestampArray) -> _T:
patype = paarray.type
return cls(patype.unit, patype.tz)
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
return pa.timestamp(self.unit, self.tz)
@PyArrowConversionRegister(pa.lib.Type_DURATION)
[docs]class timedelta(PortexBuiltinType): # pylint: disable=invalid-name
"""Portex temporal type ``timedelta``.
Arguments:
unit: The unit of the timedelta, supports 's', 'ms', 'us' and 'ns'.
nullable: Whether it is a nullable type.
Examples:
>>> t = timedelta("ms")
>>> t
timedelta(
unit='ms',
)
"""
_T = TypeVar("_T", bound="timedelta")
unit: str = param(ptype=PTYPE.String, options=["s", "ms", "us", "ns"])
nullable: bool = param(False, ptype=PTYPE.Boolean)
def __init__(self, unit: str, nullable: bool = False) -> None:
self.unit = self.params["unit"].check(unit)
super().__init__(nullable=nullable)
@classmethod
def _from_pyarrow(cls: Type[_T], paarray: pa.DurationArray) -> _T:
return cls(paarray.type.unit)
[docs] def to_pyarrow(self, *, _to_backend: bool = False) -> pa.DataType:
"""Convert the Portex type to the corresponding builtin PyArrow DataType.
Returns:
The corresponding builtin PyArrow DataType.
"""
return pa.duration(self.unit)