graviti.portex.extractors
#
Schema to colomn extractors related methods.
Module Contents#
Functions#
|
Get the extractors and dtypes for colomns. |
Attributes#
- graviti.portex.extractors.get_extractors(schema)[source]#
Get the extractors and dtypes for colomns.
- Parameters
schema (Dict[str, Any]) – The schema of a DataFrame.
- Returns
A dict containing the extractors and dtypes for all colomns.
- Return type
Extractors
Examples
>>> import yaml >>> >>> from graviti.client import list_data_details >>> from graviti.utility.lazy import LazyFactory, LazyList >>> from graviti.portex import catalog_to_schema, get_extractors >>> >>> from tensorbay import GAS >>> from tensorbay.dataset import Dataset >>> ACCESSKEY = "ACCESSKEY-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" >>> URL = "https://gas.graviti.com/" >>> DATASET_NAME = "MNIST" >>> TOTAL_COUNT = 1000 >>> >>> gas = GAS(ACCESSKEY) >>> dataset = Dataset(DATASET_NAME, gas) >>> dataset_client = gas.get_dataset(DATASET_NAME) >>> >>> getter = lambda offset, limit: list_data_details( ... url=URL, ... access_key=ACCESSKEY, ... dataset_id=dataset_client.dataset_id, ... segment_name="train", ... commit=dataset_client.status.commit_id, ... offset=offset, ... limit=limit, ... ) >>> factory = LazyFactory(TOTAL_COUNT, 128, getter) >>> schema = yaml.load( ... catalog_to_schema(dataset.catalog, dataset["train"][0], dataset.notes), yaml.Loader ... ) >>> extractors = get_extractors(schema) >>> lazy_lists = {} >>> for key, arguments in extractors.items(): ... lazy_lists[key] = factory.create_list(*arguments) >>> lazy_lists {'filename': LazyList [ 'train_image_00000.png', 'train_image_00001.png', 'train_image_00002.png', 'train_image_00003.png', 'train_image_00004.png', 'train_image_00005.png', 'train_image_00006.png', 'train_image_00007.png', 'train_image_00008.png', 'train_image_00009.png', 'train_image_00010.png', 'train_image_00011.png', 'train_image_00012.png', 'train_image_00013.png', ... (985 items are folded), 'train_image_00999.png' ], 'image': LazyList [ RemoteFileMixin("train_image_00000.png"), RemoteFileMixin("train_image_00001.png"), RemoteFileMixin("train_image_00002.png"), RemoteFileMixin("train_image_00003.png"), RemoteFileMixin("train_image_00004.png"), RemoteFileMixin("train_image_00005.png"), RemoteFileMixin("train_image_00006.png"), RemoteFileMixin("train_image_00007.png"), RemoteFileMixin("train_image_00008.png"), RemoteFileMixin("train_image_00009.png"), RemoteFileMixin("train_image_00010.png"), RemoteFileMixin("train_image_00011.png"), RemoteFileMixin("train_image_00012.png"), RemoteFileMixin("train_image_00013.png"), ... (985 items are folded), RemoteFileMixin("train_image_00999.png") ], 'category': LazyList [ '5', '0', '4', '1', '9', '2', '1', '3', '1', '4', '3', '5', '3', '6', ... (985 items are folded), '6' ]}