Skip to main content
Version: 1.15.2

PandasS3Datasource

Signature

class great_expectations.datasource.fluent.PandasS3Datasource(
*,
type: Literal['pandas_s3'] = 'pandas_s3',
name: str,
id: Optional[uuid.UUID] = None,
assets: List[great_expectations.datasource.fluent.data_asset.path.file_asset.FileDataAsset] = [],
bucket: str,
boto3_options: Dict[str,
Union[great_expectations.datasource.fluent.config_str.ConfigStr,
Any]] = {}
)

PandasS3Datasource is a PandasDatasource that uses Amazon S3 as a data store.

Methods

add_csv_asset

Signature

add_csv_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5fad41a0> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5fad4b60> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5fad4d70> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5fad4560> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5fad5040> = None,
sep: typing.Optional[str] = None,
delimiter: typing.Optional[str] = None,
header: Union[int,
Sequence[int],
None,
Literal['infer']] = 'infer',
names: Union[Sequence[str],
None] = None,
index_col: Union[IndexLabel,
Literal[False],
None] = None,
usecols: typing.Optional[typing.Union[int,
str,
typing.Sequence[int]]] = None,
dtype: typing.Optional[dict] = None,
engine: Union[CSVEngine,
None] = None,
true_values: typing.Optional[typing.List] = None,
false_values: typing.Optional[typing.List] = None,
skipinitialspace: bool = False,
skiprows: typing.Optional[typing.Union[typing.Sequence[int],
int]] = None,
skipfooter: int = 0,
nrows: typing.Optional[int] = None,
na_values: Union[str,
Iterable[str],
None] = None,
keep_default_na: bool = True,
na_filter: bool = True,
skip_blank_lines: bool = True,
parse_dates: Union[bool,
Sequence[str],
None] = None,
date_format: typing.Optional[str] = None,
dayfirst: bool = False,
cache_dates: bool = True,
iterator: bool = False,
chunksize: typing.Optional[int] = None,
compression: CompressionOptions = 'infer',
thousands: typing.Optional[str] = None,
decimal: str = '.',
lineterminator: typing.Optional[str] = None,
quotechar: str = '"',
quoting: int = 0,
doublequote: bool = True,
escapechar: typing.Optional[str] = None,
comment: typing.Optional[str] = None,
encoding: typing.Optional[str] = None,
encoding_errors: typing.Optional[str] = 'strict',
dialect: typing.Optional[str] = None,
on_bad_lines: str = 'error',
low_memory: bool = True,
memory_map: bool = False,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add a csv asset to the datasource.

add_excel_asset

Signature

add_excel_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5fa41670> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5fa40f50> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5fa41220> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5fa41a90> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5fa41b50> = None,
sheet_name: typing.Optional[typing.Union[str,
int,
typing.List[typing.Union[int,
str]]]] = 0,
header: Union[int,
Sequence[int],
None] = 0,
index_col: Union[int,
str,
Sequence[int],
None] = None,
usecols: typing.Optional[typing.Union[int,
str,
typing.Sequence[int]]] = None,
dtype: typing.Optional[dict] = None,
true_values: Union[Iterable[str],
None] = None,
false_values: Union[Iterable[str],
None] = None,
skiprows: typing.Optional[typing.Union[typing.Sequence[int],
int]] = None,
nrows: typing.Optional[int] = None,
na_values: typing.Any = None,
keep_default_na: bool = True,
na_filter: bool = True,
verbose: bool = False,
parse_dates: typing.Union[typing.List,
typing.Dict,
bool] = False,
date_format: typing.Optional[str] = None,
thousands: typing.Optional[str] = None,
decimal: str = '.',
comment: typing.Optional[str] = None,
skipfooter: int = 0,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
engine_kwargs: typing.Optional[typing.Dict] = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add an excel asset to the datasource.

add_feather_asset

Signature

add_feather_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5fa42f30> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5fa43050> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5fa431a0> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5fa43350> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5fa43410> = None,
columns: Union[Sequence[str],
None] = None,
use_threads: bool = True,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add a feather asset to the datasource.

add_fwf_asset

Signature

add_fwf_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5fa43b90> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5fa43c50> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5fa43da0> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5fa43f50> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f96c050> = None,
colspecs: Union[Sequence[Tuple[int,
int]],
str,
None] = 'infer',
widths: Union[Sequence[int],
None] = None,
infer_nrows: int = 100,
iterator: bool = False,
chunksize: typing.Optional[int] = None,
kwargs: typing.Optional[dict] = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add a fwf asset to the datasource.

add_hdf_asset

Signature

add_hdf_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5f96c8c0> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5f96c980> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5f96cad0> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5f96cc80> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f96cd40> = None,
key: typing.Any = None,
mode: str = 'r',
errors: str = 'strict',
where: typing.Optional[typing.Union[str,
typing.List]] = None,
start: typing.Optional[int] = None,
stop: typing.Optional[int] = None,
columns: typing.Optional[typing.List[str]] = None,
iterator: bool = False,
chunksize: typing.Optional[int] = None,
kwargs: typing.Optional[dict] = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add a hdf asset to the datasource.

add_html_asset

Signature

add_html_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5f96d520> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5f96d5e0> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5f96d730> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5f96d8e0> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f96d9a0> = None,
match: Union[str,
Pattern] = '.+',
header: Union[int,
Sequence[int],
None] = None,
index_col: Union[int,
Sequence[int],
None] = None,
skiprows: typing.Optional[typing.Union[typing.Sequence[int],
int]] = None,
attrs: typing.Optional[typing.Dict[str,
str]] = None,
parse_dates: bool = False,
thousands: typing.Optional[str] = ',
',
encoding: typing.Optional[str] = None,
decimal: str = '.',
converters: typing.Optional[typing.Dict] = None,
na_values: Union[Iterable[object],
None] = None,
keep_default_na: bool = True,
displayed_only: bool = True,
dtype_backend: DtypeBackend = None,
storage_options: StorageOptions = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add a html asset to the datasource.

add_iceberg_asset

Signature

add_iceberg_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5f96e5d0> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5f96e690> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5f96e7e0> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5f96e990> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f96ea50> = None,
catalog_name: str | None = None,
catalog_properties: dict[str,
typing.Any] | None = None,
columns: list[str] | None = None,
row_filter: str | None = None,
case_sensitive: bool = True,
snapshot_id: int | None = None,
limit: int | None = None,
scan_properties: dict[str,
typing.Any] | None = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add an iceberg asset to the datasource.

add_json_asset

Signature

add_json_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5f96f230> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5f96f2f0> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5f96f440> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5f96f5f0> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f96f6b0> = None,
orient: typing.Optional[str] = None,
typ: Literal['frame',
'series'] = 'frame',
dtype: typing.Optional[dict] = None,
convert_axes: typing.Optional[bool] = None,
convert_dates: typing.Union[bool,
typing.List[str]] = True,
keep_default_dates: bool = True,
precise_float: bool = False,
date_unit: typing.Optional[str] = None,
encoding: typing.Optional[str] = None,
encoding_errors: typing.Optional[str] = 'strict',
lines: bool = False,
chunksize: typing.Optional[int] = None,
compression: CompressionOptions = 'infer',
nrows: typing.Optional[int] = None,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add a json asset to the datasource.

add_orc_asset

Signature

add_orc_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a0170> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a02c0> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a0410> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a05c0> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a0680> = None,
columns: typing.Optional[typing.List[str]] = None,
dtype_backend: DtypeBackend = None,
kwargs: typing.Optional[dict] = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add an orc asset to the datasource.

add_parquet_asset

Signature

add_parquet_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a0dd0> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a0e90> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a0fe0> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a1190> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a1250> = None,
engine: str = 'auto',
columns: typing.Optional[typing.List[str]] = None,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
to_pandas_kwargs: typing.Optional[typing.Dict] = None,
kwargs: typing.Optional[dict] = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add a parquet asset to the datasource.

add_pickle_asset

Signature

add_pickle_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a1a00> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a1ac0> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a1c10> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a1dc0> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a1e80> = None,
compression: CompressionOptions = 'infer',
storage_options: Union[StorageOptions,
None] = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add a pickle asset to the datasource.

add_sas_asset

Signature

add_sas_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a2540> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a2600> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a2750> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a2900> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a29c0> = None,
format: typing.Optional[str] = None,
index: typing.Optional[str] = None,
encoding: typing.Optional[str] = None,
chunksize: typing.Optional[int] = None,
iterator: bool = False,
compression: CompressionOptions = 'infer',
**extra_data: typing.Any
) → pydantic.BaseModel

Add a sas asset to the datasource.

add_spss_asset

Signature

add_spss_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a3170> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a3230> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a3380> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a3530> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a35f0> = None,
usecols: typing.Optional[typing.Union[int,
str,
typing.Sequence[int]]] = None,
convert_categoricals: bool = True,
dtype_backend: DtypeBackend = None,
kwargs: typing.Optional[dict] = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add a spss asset to the datasource.

add_stata_asset

Signature

add_stata_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a3e00> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5f7a3ec0> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5f7c8050> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5f7c8200> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f7c82c0> = None,
convert_dates: bool = True,
convert_categoricals: bool = True,
index_col: typing.Optional[str] = None,
convert_missing: bool = False,
preserve_dtypes: bool = True,
columns: Union[Sequence[str],
None] = None,
order_categoricals: bool = True,
chunksize: typing.Optional[int] = None,
iterator: bool = False,
compression: CompressionOptions = 'infer',
storage_options: Union[StorageOptions,
None] = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add a stata asset to the datasource.

add_xml_asset

Signature

add_xml_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fef5f7c8bc0> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fef5f7c8c80> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fef5f7c8dd0> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fef5f7c8f80> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fef5f7c9040> = None,
xpath: str = './*',
namespaces: typing.Optional[typing.Dict[str,
str]] = None,
elems_only: bool = False,
attrs_only: bool = False,
names: Union[Sequence[str],
None] = None,
dtype: typing.Optional[dict] = None,
encoding: typing.Optional[str] = 'utf-8',
stylesheet: Union[FilePath,
None] = None,
iterparse: typing.Optional[typing.Dict[str,
typing.List[str]]] = None,
compression: CompressionOptions = 'infer',
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: typing.Any
) → pydantic.BaseModel

Add a xml asset to the datasource.

delete_asset

Signature

delete_asset(
name: str
)None

Removes the DataAsset referred to by asset_name from internal list of available DataAsset objects.

Parameters

NameDescription

name

name of DataAsset to be deleted.

get_asset

Signature

get_asset(
name: str
) → great_expectations.datasource.fluent.interfaces._DataAssetT

Returns the DataAsset referred to by asset_name

Parameters

NameDescription

name

name of DataAsset sought.

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces._DataAssetT

if named "DataAsset" object exists; otherwise, exception is raised.