Source code for graviti.openapi.sheet

#!/usr/bin/env python3
#
# Copyright 2022 Graviti. Licensed under MIT License.
#

"""Interfaces about the sheet."""

from typing import Any, Dict, Optional

from graviti.openapi.requests import open_api_do


def _list_sheet(
    access_key: str,
    url: str,
    with_record_count: Optional[bool],
    offset: Optional[int],
    limit: Optional[int],
) -> Dict[str, Any]:

    params = {
        "with_record_count": with_record_count,
        "offset": offset,
        "limit": limit,
    }

    return open_api_do("GET", access_key, url, params=params).json()  # type: ignore[no-any-return]


def _get_sheet(
    access_key: str,
    url: str,
    with_record_count: Optional[bool],
    schema_format: Optional[str],
) -> Dict[str, Any]:
    params = {
        "with_record_count": with_record_count,
        "schema_format": schema_format,
    }

    return open_api_do("GET", access_key, url, params=params).json()  # type: ignore[no-any-return]


[docs]def create_sheet(
    access_key: str,
    url: str,
    workspace: str,
    dataset: str,
    *,
    draft_number: int,
    name: str,
    schema: str,
    _avro_schema: str,
    _arrow_schema: Optional[str] = None,
    record_key_strategy: Optional[str] = None,
) -> None:
    """Execute the OpenAPI `POST /v2/datasets/{workspace}/{dataset}/drafts/{draft_number}/sheets`.

    Arguments:
        access_key: User's access key.
        url: The URL of the graviti website.
        workspace: The workspace of the dataset.
        dataset: Name of the dataset, unique for a user.
        draft_number: The draft number.
        name: The sheet name.
        schema: The portex schema of the sheet.
        record_key_strategy: The ``__record_key`` generation strategy.
            If None, it is batch auto-increment sorting record key.

    Examples:
        >>> create_sheet(
        ...     "ACCESSKEY-********",
        ...     "https://api.graviti.com",
        ...     "graviti-example",
        ...     "MNIST",
        ...     draft_number = 1,
        ...     name = "val",
        ...     schema = '{"imports": [{"repo": "https://github.com/Project-OpenBytes/portex-\
standard@main", "types": [{"name": "file.Image"}]}], "type": "record", "fields": [{"name": \
"filename", "type": "string"}, {"name": "image", "type": "file.Image"}]}',
        ...     _avro_schema = '{"type": "record", "name": "root", "namespace": "cn.graviti.portex"\
, "aliases": [], "fields": [{"name": "filename", "type": "string"}, {"name": "image", "type": \
{"type": "record", "name": "image", "namespace": "cn.graviti.portex.root", "aliases": [], \
"fields": [{"name": "checksum", "type": [null, "string"]}]}}]}',
        ... )

    """
    url = f"{url}/v2/datasets/{workspace}/{dataset}/drafts/{draft_number}/sheets"
    post_data = {"name": name, "schema": schema, "_avro_schema": _avro_schema}

    if _arrow_schema is not None:
        post_data["_arrow_schema"] = _arrow_schema
    if record_key_strategy is not None:
        post_data["record_key_strategy"] = record_key_strategy

    open_api_do("POST", access_key, url, json=post_data)


[docs]def list_draft_sheets(
    access_key: str,
    url: str,
    workspace: str,
    dataset: str,
    *,
    draft_number: int,
    with_record_count: Optional[bool] = None,
    offset: Optional[int] = None,
    limit: Optional[int] = None,
) -> Dict[str, Any]:
    """Execute the OpenAPI `GET /v2/datasets/{workspace}/{dataset}/drafts/{draft_number}/sheets`.

    Arguments:
        access_key: User's access key.
        url: The URL of the graviti website.
        workspace: The workspace of the dataset.
        dataset: Name of the dataset, unique for a user.
        draft_number: The draft number.
        with_record_count: Whether return the record count of each sheet. The default value of
            this param in OpenAPI is False.
        offset: The offset of the page. The default value of this param in OpenAPIv2 is 0.
        limit: The limit of the page. The default value of this param in OpenAPIv2 is 128.

    Returns:
        The response of OpenAPI.

    Examples:
        >>> list_draft_sheets(
        ...     "ACCESSKEY-********",
        ...     "https://api.graviti.com",
        ...     "graviti-example",
        ...     "MNIST",
        ...     draft_number = 1,
        ... )
        {
            "sheets": [
                {
                    "name": "test",
                    "created_at": "2021-03-03T18:58:10Z",
                    "updated_at": "2021-03-04T18:58:10Z",
                },
                {
                    "name": "trainval",
                    "created_at": "2021-03-05T18:58:10Z",
                    "updated_at": "2021-03-06T18:58:10Z",
                }
            ],
            "offset": 0,
            "record_size": 2,
            "total_count": 2
        }

    """
    url = f"{url}/v2/datasets/{workspace}/{dataset}/drafts/{draft_number}/sheets"

    return _list_sheet(
        access_key, url, with_record_count=with_record_count, offset=offset, limit=limit
    )


[docs]def list_commit_sheets(
    access_key: str,
    url: str,
    workspace: str,
    dataset: str,
    *,
    commit_id: str,
    with_record_count: Optional[bool] = None,
    offset: Optional[int] = None,
    limit: Optional[int] = None,
) -> Dict[str, Any]:
    """Execute the OpenAPI `GET /v2/datasets/{workspace}/{dataset}/commits/{commit_id}/sheets`.

    Arguments:
        access_key: User's access key.
        url: The URL of the graviti website.
        workspace: The workspace of the dataset.
        dataset: Name of the dataset, unique for a user.
        commit_id: The commit id.
        with_record_count: Whether return the record count of each sheet. The default value of
            this param in OpenAPI is False.
        offset: The offset of the page. The default value of this param in OpenAPIv2 is 0.
        limit: The limit of the page. The default value of this param in OpenAPIv2 is 128.

    Returns:
        The response of OpenAPI.

    Examples:
        >>> list_commit_sheets(
        ...     "ACCESSKEY-********",
        ...     "https://api.graviti.com",
        ...     "graviti-example",
        ...     "MNIST",
        ...     commit_id = "fde63f357daf46088639e9f57fd81cad",
        ... )
        {
            "sheets": [
                {
                    "name": "test",
                    "created_at": "2021-03-03T18:58:10Z",
                    "updated_at": "2021-03-04T18:58:10Z",
                },
                {
                    "name": "trainval",
                    "created_at": "2021-03-05T18:58:10Z",
                    "updated_at": "2021-03-06T18:58:10Z",
                }
            ],
            "offset": 0,
            "record_size": 2,
            "total_count": 2
        }

    """
    url = f"{url}/v2/datasets/{workspace}/{dataset}/commits/{commit_id}/sheets"

    return _list_sheet(
        access_key, url, with_record_count=with_record_count, offset=offset, limit=limit
    )


[docs]def get_draft_sheet(
    access_key: str,
    url: str,
    workspace: str,
    dataset: str,
    *,
    draft_number: int,
    sheet: str,
    with_record_count: Optional[bool] = None,
    schema_format: Optional[str] = None,
) -> Dict[str, Any]:
    """Execute the OpenAPI `GET /v2/datasets/{workspace}/{dataset}/drafts/{draft_number}/sheets`.

    Arguments:
        access_key: User's access key.
        url: The URL of the graviti website.
        workspace: The workspace of the dataset.
        dataset: Name of the dataset, unique for a user.
        draft_number: The draft number.
        sheet: The sheet name.
        with_record_count: Whether return the record count of each sheet. The default value of
            this param in OpenAPI is False.
        schema_format: Fill "JSON"/"YAML" to determine whether the schema_format of the returned
            schema is json or yaml. None means "JSON" format.

    Returns:
        The response of OpenAPI.

    Examples:
        >>> get_draft_sheet(
        ...     "ACCESSKEY-********",
        ...     "https://api.graviti.com",
        ...     "graviti-example",
        ...     "MNIST",
        ...     draft_number = 1,
        ...     sheet = "sheet-2",
        ...     with_record_count=True,
        ... )
        {
            "name": "trainval",
            "created_at": "2021-03-05T18:58:10Z",
            "updated_at": "2021-03-06T18:58:10Z",
            "record_count": 10000,
            "schema": '{"imports": [{"repo": "https://github.com/Project-OpenBytes/...'
        }

    """
    url = f"{url}/v2/datasets/{workspace}/{dataset}/drafts/{draft_number}/sheets/{sheet}"

    return _get_sheet(
        access_key, url, with_record_count=with_record_count, schema_format=schema_format
    )


[docs]def get_commit_sheet(
    access_key: str,
    url: str,
    workspace: str,
    dataset: str,
    *,
    commit_id: str,
    sheet: str,
    with_record_count: Optional[bool] = None,
    schema_format: Optional[str] = None,
) -> Dict[str, Any]:
    """Execute the OpenAPI `GET /v2/datasets/{workspace}/{dataset}/commits/\
    {commit_id}/sheets/{sheet}`.

    Arguments:
        access_key: User's access key.
        url: The URL of the graviti website.
        workspace: The workspace of the dataset.
        dataset: Name of the dataset, unique for a user.
        commit_id: The commit id..
        sheet: The sheet name.
        with_record_count: Whether return the record count of each sheet. The default value of
            this param in OpenAPI is False.
        schema_format: Fill "JSON"/"YAML" to determine whether the schema_format of the returned
            schema is json or yaml. None means "JSON" format.

    Returns:
        The response of OpenAPI.

    Examples:
        >>> get_commit_sheet(
        ...     "ACCESSKEY-********",
        ...     "https://api.graviti.com",
        ...     "graviti-example",
        ...     "MNIST",
        ...     commit_id = "fde63f357daf46088639e9f57fd81cad",
        ...     sheet = "sheet-2",
        ...     with_record_count=True,
        ... )
        {
            "name": "trainval",
            "created_at": "2021-03-05T18:58:10Z",
            "updated_at": "2021-03-06T18:58:10Z",
            "record_count": 10000,
            "schema": '{"imports": [{"repo": "https://github.com/Project-OpenBytes/...'
        }

    """
    url = f"{url}/v2/datasets/{workspace}/{dataset}/commits/{commit_id}/sheets/{sheet}"

    return _get_sheet(
        access_key, url, with_record_count=with_record_count, schema_format=schema_format
    )


[docs]def delete_sheet(
    access_key: str,
    url: str,
    workspace: str,
    dataset: str,
    *,
    draft_number: int,
    sheet: str,
) -> None:
    """Execute the OpenAPI `DELETE /v2/datasets/{workspace}/{dataset}/drafts/\
    {draft_number}/sheets/{sheet}`.

    Arguments:
        access_key: User's access key.
        url: The URL of the graviti website.
        workspace: The workspace of the dataset.
        dataset: Name of the dataset, unique for a user.
        draft_number: The draft number.
        sheet: The name of the sheet to be deleted.

    Examples:
        >>> delete_sheet(
        ...     "ACCESSKEY-********",
        ...     "https://api.graviti.com",
        ...     "graviti-example",
        ...     "MNIST",
        ...     draft_number=1,
        ...     sheet="sheet-2"
        ... )

    """
    url = f"{url}/v2/datasets/{workspace}/{dataset}/drafts/{draft_number}/sheets/{sheet}"
    open_api_do("DELETE", access_key, url)