Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
type | Yes | - | No | The value must be "application". | "application" |
_type | Yes | - | No | The value must be "application". | "application" |
appName | Yes | - | No | The name of the application | "Job-Executor" |
appType | Yes | - | No | The value must be "job-summarize-executor". | "job-summarize-executor" |
config | Yes | - | No | The value must be "com.accenture.aspire:app-jobsummarize-executor". | "com.accenture.aspire:app-jobsummarize-executor" |
description | Yes | - | No | The description | "Job-Executor" |
properties | Yes | - | No | Configuration object | |
dataPath | Yes | - | No | The path of the job that contains the tables data | "/doc" |
containerPath | Yes | - | No | The sub path of the data that contains each table | "container" |
tableIdPath | Yes | - | No | The sub path of table data that contains the table id | "container/url" |
seedIdPath | Yes | - | No | The sub path of table data that contains the seed id | "container/seed/id" |
columnsPath | Yes | - | No | The sub path of table objects that contains the columns information | "dataProfile/columns" |
columnNamePath | Yes | - | No | The sub path of column objects that contains the column name | "columnName" |
columnTypePath | Yes | - | No | The sub path of column objects that contains the column type | "column_type" |
columnsPatterns | Yes | [] | Yes | The columns patterns to detect each column type | [{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}] |
type | Yes | "TEXT" | No | The data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN" | "TEXT" |
pattern | Yes | - | No | The pattern to match | "STRING" |
logFrequency | Yes | 1000 | No | The frequency for reporting the processed rows. | 1000 |
filterRows | Yes | false | No | Enable to filter the rows to process. | true |
useFilterFile | Yes | true | No | Enable to use a groovy file to filter the rows | true |
groovyPath | No | - | No | The path of the groovy script that contains the filter logic. It must return a boolean value, if true the row will be filtered | "C:\\Aspire\\config\\rowsGroovyFilter.txt" |
groovyScript | No | - | No | Script used to filter the rows. It must return a boolean value, if true the row will be filtered | "row.getBoolean(\"sensitive\") == true" |
url | Yes | - | No | Server URL | "http://localhost:9200/" |
authType | Yes | "none" | No | The authentication type. Accepted values: "none", "basic", "aws" | "none" |
username | No | - | No | User with the permissions to read from the Elastic index specified. Used only if the authType is "basic" | "admin" |
password | No | - | No | The password for the specified user. Used only if the authType is "basic" | "password" |
region | No | - | No | AWS region. Used only if the authType is "aws" | "us-east-2" |
useCredentialsProviderChain | No | false | No | Use AWS Credentials Provider Chain. Used only if the authType is "aws" | "true" |
accessKey | No | - | No | Key utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false | "AKIAIOSFODNN7EXAMPLE" |
secretKey | No | - | No | Secret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false | "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" |
assumeRole | No | false | No | Enable to assume the specified role to get the credentials. Used only if the authType is "aws" | true |
roleArn | No | - | No | The Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true | "arn:aws:iam::123456789012:user/group/role" |
index | Yes | - | No | The elastic index to use | "values-index" |
query | Yes | - | No | The query for fetching the unique values. The placeholders ${seedId} and ${tableId}. | "{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}" |
uniqueValues | Yes | true | No | If enable the expected row format will be the one used for unique values, if not it will use the _source content as the row body | true |
scrollTime | Yes | "5m" | No | The time to keep each scroll request active | "5m" |
idleConnectionTimeout | Yes | 3600000 | No | Maximum time (in milliseconds) to keep an idle connection open | 3600000 |
maxConnections | Yes | 100 | No | Maximum number of connections to be opened | 100 |
maxConnectionsPerRoute | Yes | 10 | No | Maximum number of connections opened for the same target | 10 |
connectionTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for the connection | 15000 |
socketTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for a socket response | 15000 |
useThrottling | Yes | false | No | Flag to enable connection throttling | true |
throttlingRate | No | 5000 | No | Time period (in milliseconds) to throttle the connection. Used only if useThrottling is true | 5000 |
throttlingConnectionRate | No | 500 | No | Maximum number of connections used during the throttling period. Used only if useThrottling is true | 500 |
maxRetries | Yes | 3 | No | Maximum number of retries a failed document | 3 |
retryWaitTime | Yes | 5000 | No | Time (in milliseconds) to wait before a retry | 5000 |
{ "type": "application", "_type": "application", "appName": "Job_Summarize_Executor", "appType": "job-summarize-executor", "config": "com.accenture.aspire:app-jobsummarize-executor", "description": "job-summarizer", "properties": { "dataPath": "/doc", "containerPath": "container", "tableIdPath": "container/url", "seedIdPath": "container/seed/id", "columnsPath": "dataProfile/columns", "columnNamePath": "columnName", "columnTypePath": "column_type", "columnsPatterns": [{ "type": "TEXT", "pattern": "STRING" }, { "type": "INT", "pattern": "INT32" } ], "logFrequency": 1000, "filterRows": false, "debug": false, "url": "http://localhost:9200/", "authType": "none", "index": "parquet-data", "query": "{\n \"query\": {\n \"bool\": {\n \"must\": [{\n \"term\": {\n \"name.keyword\": {\n \"value\": \"column-value\"\n }\n }\n }, {\n \"term\": {\n \"value.seedId\": {\n \"value\": \"${seedId}\"\n }\n }\n }, {\n \"term\": {\n \"value.tableId\": {\n \"value\": \"${tableId}\"\n }\n }\n }\n ]\n }\n }\n}\n", "uniqueValues": true, "scrollTime": "5m", "idleConnectionTimeout": 3600000, "maxConnections": 100, "maxConnectionsPerRoute": 10, "connectionTimeout": 15000, "socketTimeout": 15000, "useThrottling": false, "maxRetries": 3, "retryWaitTime": 5000 } }
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
id | Yes | - | No | Id of the application to update | "61014782-442a-4587-ab85-ba1439a7f7b5" |
type | Yes | - | No | The value must be "application". | "application" |
_type | Yes | - | No | The value must be "application". | "application" |
appName | Yes | - | No | The name of the application | "Job-Executor" |
appType | Yes | - | No | The value must be "job-summarize-executor". | "job-summarize-executor" |
config | Yes | - | No | The value must be "com.accenture.aspire:app-jobsummarize-executor". | "com.accenture.aspire:app-jobsummarize-executor" |
description | Yes | - | No | The description | "Job-Executor" |
properties | Yes | - | No | Configuration object | |
dataPath | Yes | - | No | The path of the job that contains the tables data | "/doc" |
containerPath | Yes | - | No | The sub path of the data that contains each table | "container" |
tableIdPath | Yes | - | No | The sub path of table data that contains the table id | "container/url" |
seedIdPath | Yes | - | No | The sub path of table data that contains the seed id | "container/seed/id" |
columnsPath | Yes | - | No | The sub path of table objects that contains the columns information | "dataProfile/columns" |
columnNamePath | Yes | - | No | The sub path of column objects that contains the column name | "columnName" |
columnTypePath | Yes | - | No | The sub path of column objects that contains the column type | "column_type" |
columnsPatterns | Yes | [] | Yes | The columns patterns to detect each column type | [{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}] |
type | Yes | "TEXT" | No | The data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN" | "TEXT" |
pattern | Yes | - | No | The pattern to match | "STRING" |
logFrequency | Yes | 1000 | No | The frequency for reporting the processed rows. | 1000 |
filterRows | Yes | false | No | Enable to filter the rows to process. | true |
useFilterFile | Yes | true | No | Enable to use a groovy file to filter the rows | true |
groovyPath | No | - | No | The path of the groovy script that contains the filter logic. It must return a boolean value, if true the row will be filtered | "C:\\Aspire\\config\\rowsGroovyFilter.txt" |
groovyScript | No | - | No | Script used to filter the rows. It must return a boolean value, if true the row will be filtered | "row.getBoolean(\"sensitive\") == true" |
url | Yes | - | No | Server URL | "http://localhost:9200/" |
authType | Yes | "none" | No | The authentication type. Accepted values: "none", "basic", "aws" | "none" |
username | No | - | No | User with the permissions to read from the Elastic index specified. Used only if the authType is "basic" | "admin" |
password | No | - | No | The password for the specified user. Used only if the authType is "basic" | "password" |
region | No | - | No | AWS region. Used only if the authType is "aws" | "us-east-2" |
useCredentialsProviderChain | No | false | No | Use AWS Credentials Provider Chain. Used only if the authType is "aws" | "true" |
accessKey | No | - | No | Key utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false | "AKIAIOSFODNN7EXAMPLE" |
secretKey | No | - | No | Secret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false | "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" |
assumeRole | No | false | No | Enable to assume the specified role to get the credentials. Used only if the authType is "aws" | true |
roleArn | No | - | No | The Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true | "arn:aws:iam::123456789012:user/group/role" |
index | Yes | - | No | The elastic index to use | "values-index" |
query | Yes | - | No | The query for fetching the unique values. The placeholders ${seedId} and ${tableId}. | "{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}" |
uniqueValues | Yes | true | No | If enable the expected row format will be the one used for unique values, if not it will use the _source content as the row body | true |
scrollTime | Yes | "5m" | No | The time to keep each scroll request active | "5m" |
idleConnectionTimeout | Yes | 3600000 | No | Maximum time (in milliseconds) to keep an idle connection open | 3600000 |
maxConnections | Yes | 100 | No | Maximum number of connections to be opened | 100 |
maxConnectionsPerRoute | Yes | 10 | No | Maximum number of connections opened for the same target | 10 |
connectionTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for the connection | 15000 |
socketTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for a socket response | 15000 |
useThrottling | Yes | false | No | Flag to enable connection throttling | true |
throttlingRate | No | 5000 | No | Time period (in milliseconds) to throttle the connection. Used only if useThrottling is true | 5000 |
throttlingConnectionRate | No | 500 | No | Maximum number of connections used during the throttling period. Used only if useThrottling is true | 500 |
maxRetries | Yes | 3 | No | Maximum number of retries a failed document | 3 |
retryWaitTime | Yes | 5000 | No | Time (in milliseconds) to wait before a retry | 5000 |
{ "id": "951cf9a0-6078-43f2-bce1-6e377fc22fc5", "type": "application", "_type": "application", "appName": "Job_Summarize_Executor", "appType": "job-summarize-executor", "config": "com.accenture.aspire:app-jobsummarize-executor", "description": "job-summarizer", "properties": { "dataPath": "/doc", "containerPath": "container", "tableIdPath": "container/url", "seedIdPath": "container/seed/id", "columnsPath": "dataProfile/columns", "columnNamePath": "columnName", "columnTypePath": "column_type", "columnsPatterns": [{ "type": "TEXT", "pattern": "STRING" }, { "type": "INT", "pattern": "INT32" } ], "logFrequency": 1000, "filterRows": false, "debug": false, "url": "http://localhost:9200/", "authType": "none", "index": "parquet-data", "query": "{\n \"query\": {\n \"bool\": {\n \"must\": [{\n \"term\": {\n \"name.keyword\": {\n \"value\": \"column-value\"\n }\n }\n }, {\n \"term\": {\n \"value.seedId\": {\n \"value\": \"${seedId}\"\n }\n }\n }, {\n \"term\": {\n \"value.tableId\": {\n \"value\": \"${tableId}\"\n }\n }\n }\n ]\n }\n }\n}\n", "uniqueValues": true, "scrollTime": "5m", "idleConnectionTimeout": 3600000, "maxConnections": 100, "maxConnectionsPerRoute": 10, "connectionTimeout": 15000, "socketTimeout": 15000, "useThrottling": false, "maxRetries": 3, "retryWaitTime": 5000 } }