Source code for graviti.file.base

#!/usr/bin/env python3
#
# Copyright 2022 Graviti. Licensed under MIT License.
#

"""Graviti basic file class."""

import mimetypes
from hashlib import sha1
from pathlib import Path
from typing import TYPE_CHECKING, Dict, Optional, Type, TypeVar, Union

import pyarrow as pa
from _io import BufferedReader

from graviti.portex import STANDARD_URL, ExternalElementResgister
from graviti.utility import PathLike, ReprMixin, UserResponse, shorten

if TYPE_CHECKING:
    from graviti.manager import ObjectPermissionManager

_FB = TypeVar("_FB", bound="FileBase")
_F = TypeVar("_F", bound="File")
_RF = TypeVar("_RF", bound="RemoteFile")

_ENCODINGS = mimetypes.encodings_map


[docs]class FileBase(ReprMixin): """This class represents the file in a DataFrame.""" __slots__ = ("_key", "_extension", "_size", "_post_key") _key: str _extension: str _size: int _post_key: str def _to_post_data(self) -> Dict[str, Union[int, str]]: return {"key": self._post_key, "extension": self.extension, "size": self.size} @classmethod def _from_pyarrow( cls: Type[_FB], scalar: pa.StructScalar, _: Optional["ObjectPermissionManager"] = None, ) -> _FB: obj: _FB = object.__new__(cls) pyobj = scalar.as_py() obj._key = pyobj["key"] obj._extension = pyobj["extension"] obj._size = pyobj["size"] return obj @property
[docs] def key(self) -> str: """Get the key of the file. Returns: The key of the file. """ return self._key
@property
[docs] def extension(self) -> str: """Get the extension of the file. Returns: The extension of the file. """ return self._extension
@property
[docs] def size(self) -> int: """Get the size of the file. Returns: The size of the file. """ return self._size
[docs] def open(self) -> Union[UserResponse, BufferedReader]: """Return the binary file pointer of this file. Raises: NotImplementedError: The method of the base class should not be called. """ raise NotImplementedError
[docs]class File(FileBase): """This class represents local files. Arguments: path: The local path of the file. """ __slots__ = ("_path", "_checksum") _BUFFER_SIZE = 65536 _checksum: str def __init__(self, path: PathLike) -> None: self._path = Path(path).absolute() def _repr_head(self) -> str: return f'{self.__class__.__name__}("{self._path.name}")' @classmethod def _from_pyarrow( cls: Type[_F], scalar: pa.StructScalar, _: Optional["ObjectPermissionManager"] = None, ) -> _F: obj: _F = object.__new__(cls) pyobj = scalar.as_py() obj._key = pyobj["key"] obj._path = Path(pyobj["key"]) obj._extension = pyobj["extension"] obj._size = pyobj["size"] return obj @property
[docs] def path(self) -> Path: """Get the path of the file. Returns: The path of the file. """ return self._path
@property
[docs] def key(self) -> str: """Get the key of the file. Returns: The key of the file. """ if not hasattr(self, "_key"): self._key = str(self._path) return self._key
@property
[docs] def extension(self) -> str: """Get the extension of the file. Returns: The extension of the file. """ if not hasattr(self, "_extension"): suffix = self._path.suffix if suffix in _ENCODINGS: suffix = Path(self._path.stem).suffix + suffix self._extension = suffix return self._extension
@property
[docs] def size(self) -> int: """Get the size of the file. Returns: The size of the file. """ if not hasattr(self, "_size"): self._size = self._path.stat().st_size return self._size
[docs] def get_checksum(self) -> str: """Get the sha1 checksum of the local file. Returns: The sha1 checksum of the local file. """ if not hasattr(self, "_checksum"): sha1_object = sha1() with self._path.open("rb") as fp: while True: data = fp.read(self._BUFFER_SIZE) if not data: break sha1_object.update(data) self._checksum = sha1_object.hexdigest() return self._checksum
[docs] def open(self) -> BufferedReader: """Return the binary file pointer of this file. Returns: The local file pointer. """ return self._path.open("rb")
@ExternalElementResgister(STANDARD_URL, "main", "file.File")
[docs]class RemoteFile(FileBase): """This class represents the file on Graviti platform. Arguments: key: The key of the file. extension: The extension of the file. size: The size of the file. object_permission_manager: The permission to access the file. """ __slots__ = ("_object_permission",) def __init__( self, key: str, extension: str, size: int, object_permission_manager: "ObjectPermissionManager", ) -> None: self._key = key self._post_key = key self._extension = extension self._size = size self._object_permission = object_permission_manager def _repr_head(self) -> str: short_checksum = shorten(self.key.rsplit("/", 1)[1]) return f'{self.__class__.__name__}("{short_checksum}")' @classmethod def _from_pyarrow( # type: ignore[override] # pylint: disable=signature-differs cls: Type[_RF], scalar: pa.StructScalar, object_permission_manager: "ObjectPermissionManager", ) -> _RF: obj: _RF = object.__new__(cls) pyobj = scalar.as_py() obj._key = pyobj["key"] obj._extension = pyobj["extension"] obj._size = pyobj["size"] obj._object_permission = object_permission_manager return obj
[docs] def open(self) -> UserResponse: """Return the binary file pointer of this file. Returns: The remote file pointer. """ return self._object_permission.get_object(self._key)