import { PreparedDataset } from './prepared-dataset';
import { Construct } from 'constructs';
import { Location } from 'aws-cdk-lib/aws-s3';
import { Duration } from 'aws-cdk-lib';
/**
 * The properties for the Bring Your Own Data generator
 */
export interface CustomDatasetProps {
    /**
     * The S3 location of the input data
     */
    readonly s3Location: Location;
    /**
     * The format of the input data
     */
    readonly inputFormat: CustomDatasetInputFormat;
    /**
     * The datetime column to use for data generation as the time reference
     */
    readonly datetimeColumn: string;
    /**
     * The datetime columns to use for data generation
     */
    readonly datetimeColumnsToAdjust: string[];
    /**
     * The interval to partition data and optimize the data generation in Minutes
     */
    readonly partitionRange: Duration;
    /**
     * Approximate data size (in GB) of the custom dataset.
     * @default - The Glue job responsible for preparing the data uses autoscaling with a maximum of 100 workers
     */
    readonly approximateDataSize?: number;
}
export declare enum CustomDatasetInputFormat {
    CSV = "csv",
    PARQUET = "parquet",
    JSON = "json"
}
/**
 * A CustomDataset is a dataset that you need to prepare for the [BatchReplayer](@link BatchReplayer) to generate data.
 * The dataset is transformed into a [PreparedDataset](@link PreparedDataset) by a Glue Job that runs synchronously during the CDK deploy.
 * The Glue job is sized based on the approximate size of the input data or uses autoscaling (max 100) if no data size is provided.
 *
 * The Glue job is applying the following transformations to the input dataset:
 * 1. Read the input dataset based on its format. Currently, it supports data in CSV, JSON and Parquet
 * 2. Group rows into tumbling windows based on the partition range parameter provided.
 * The partition range should be adapted to the data volume and the total dataset time range
 * 3. Convert dates from MM-dd-yyyy HH:mm:ss.SSS to MM-dd-yyyyTHH:mm:ss.SSSZ format and remove null values
 * 4. Write data into the output bucket partitioned by the tumbling window time.
 * For example, one partition for every 5 minutes.
 * 5. Generate a manifest file based on the previous output to be used by the BatchReplayer for generating data
 *
 * The CloudWatch log group is stored as an object parameter to help check any error with the Glue job.
 *
 * Usage example:
 * ```typescript
 * import { CustomDataset, CustomDatasetInputFormat } from './data-generator/custom-dataset';
 *
 * const app = new App();
 * const stack = new Stack(app, 'CustomDatasetStack');
 *
 * const custom = new CustomDataset(stack, 'CustomDataset', {
 *   s3Location: {
 *     bucketName: 'aws-analytics-reference-architecture',
 *     objectKey: 'datasets/custom',
 *   },
 *   inputFormat: CustomDatasetInputFormat.CSV,
 *   datetimeColumn: 'tpep_pickup_datetime',
 *   datetimeColumnsToAdjust: ['tpep_pickup_datetime'],
 *   partitionRange: Duration.minutes(5),
 *   approximateDataSize: 1,
 * });
 *
 * new CfnOutput(this, 'LogGroupName', {
 *   exportName: 'logGroupName,
 *   value: custom.glueJobLogGroup,
 * });
 * ```
 *
 * An example of a custom dataset that can be processed by this construct is available in s3://aws-analytics-reference-architecture/datasets/custom
 */
export declare class CustomDataset extends Construct {
    /**
     * The prepared dataset generated from the custom dataset
     */
    readonly preparedDataset: PreparedDataset;
    /**
     * The location of the logs to analyze potential errors in the Glue job
     */
    readonly glueJobLogGroup: string;
    /**
     * Constructs a new instance of a CustomDataset construct that extends a PreparedDataset
     * @param {Construct} scope the Scope of the CDK Construct
     * @param {string} id the ID of the CDK Construct
     * @param {CustomDatasetProps} props the CustomDataset [properties]{@link CustomDatasetProps}
     */
    constructor(scope: Construct, id: string, props: CustomDatasetProps);
}
