Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
type | Yes | - | No | The value must be |
“application”. |
“application” | ||||
_type | Yes | - | No | The value must be |
“application”. |
“application” | ||||
appName | Yes | - | No | The name of the application |
“Job- |
Executor” | ||||
appType | Yes | - | No | The value must be |
“job-summarize- |
executor”. |
“job-summarize- |
executor” | ||||
config | Yes | - | No | The value must be |
“com.accenture.aspire:app-jobsummarize- |
executor”. |
“com.accenture.aspire:app-jobsummarize- |
executor” | ||||
description | Yes | - | No | The description |
“Job- |
Executor” | ||||
properties | Yes | - | No | Configuration object |
---|
dataPath | Yes | - | No | The path of the job that contains the tables data | “/doc” |
containerPath | Yes | - | No | The sub path of the data that contains each table | “container” |
tableIdPath | Yes | - | No | The sub path of table data that contains the table ID | “container/url” |
seedIdPath | Yes |
- | No |
The sub path of table data that contains the seed ID | “container/seed/id” | ||||
columnsPath | Yes | - | No | The sub path of table objects that contains the columns information | “dataProfile/columns” |
columnNamePath | Yes | - | No | The sub path of column objects that contains the column name | “columnName” |
columnTypePath | Yes | - | No | The sub path of column objects that contains the column type | “column_type” |
columnsPatterns | Yes | [] | Yes | The column patterns to detect each column type | [{"type":”TEXT”,”pattern”:”STRING”},{"type":"INT","pattern":"INT32"}] |
---|---|---|---|---|---|
type | Yes | "TEXT" | No | The data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN" | "TEXT" |
pattern | Yes | - | No | The pattern to match | "STRING" |
logFrequency | Yes | 1000 | No | The frequency for reporting the processed rows. | 1000 |
filterRows | Yes | false | No | Enable to filter the rows to process. | true |
useFilterFile | Yes | true | No | Enable to use a groovy file to filter the rows | true |
groovyPath | No | - | No | The path of the groovy script that contains the filter logic. It must return a boolean value, if true, the row will be filtered. | "C:\\Aspire\\config\\rowsGroovyFilter.txt" |
groovyScript | No | - | No | Script used to filter the rows. It must return a boolean value, if true, the row will be filtered. | "row.getBoolean(\"sensitive\") == true" |
url | Yes | - | No | Server URL | "http://localhost:9200/" |
authType | Yes | "none" | No | The authentication type. Accepted values: "none", "basic", "aws". | "none" |
username | No | - | No | User with the permissions to read from the Elastic index specified. Used only if the authType is "basic". | "admin" |
password | No | - | No | The password for the specified user. Used only if the authType is "basic". | "password" |
region | No | - | No | AWS region. Used only if the authType is "aws". | "us-east-2" |
useCredentialsProviderChain | No | false | No | Use AWS Credentials Provider Chain. Used only if the authType is "aws". | "true" |
accessKey | No | - | No | Key utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false. | "AKIAIOSFODNN7EXAMPLE" |
secretKey | No | - | No | Secret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false. | "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" |
assumeRole | No | false | No | Enable to assume the specified role to get the credentials. Used only if the authType is "aws". | true |
roleArn | No | - | No | The Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true. | "arn:aws:iam::123456789012:user/group/role" |
index | Yes | - | No | The elastic index to use. | "values-index" |
query | Yes | - | No | The query for fetching the unique values. The placeholders ${seedId} and ${tableId}.. | "{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}" |
uniqueValues | Yes | true | No | If enabled, the expected row format will be the one used for unique values, if not, it will use the _source content as the row body. | true |
scrollTime | Yes | "5m" | No | The time to keep each scroll request active. | "5m" |
idleConnectionTimeout | Yes | 3600000 | No | Maximum time (in milliseconds) to keep an idle connection open. | 3600000 |
maxConnections | Yes | 100 | No | Maximum number of connections to be opened. | 100 |
maxConnectionsPerRoute | Yes | 10 | No | Maximum number of connections opened for the same target. | 10 |
connectionTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for the connection. | 15000 |
socketTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for a socket response. | 15000 |
useThrottling | Yes | false | No | Flag to enable connection throttling. | true |
throttlingRate | No | 5000 | No | Time (in milliseconds) to throttle the connection. Used only if useThrottling is true. | 5000 |
throttlingConnectionRate | No | 500 | No | Maximum number of connections used during the throttling period. Used only if useThrottling is true. | 500 |
maxRetries | Yes | 3 | No | Maximum number of retries for each request. | 3 |
retryWaitTime | Yes | 5000 | No | Time (in milliseconds) to wait before a retry. | 5000 |
Code Block | ||||
---|---|---|---|---|
| ||||
{ "type": "application", "_type": "application", "descriptionappName": "Job-Executor_Summarize_Executor", "appType": "job-summarize-executor", "config": "com.accenture.aspire:app-Jobsummarizejobsummarize-executor", "appTypedescription": "Job-summarize-executorjob-summarizer", "properties": { "dataPath": "/doc", "containerPath": "container", "tableIdPath": "container/url", "seedIdPath": "container/seed/id", "columnsPath": "dataProfile/columns", "appNamecolumnNamePath": "Job Summarize ExecutorcolumnName", "columnTypePath": "column_type", "propertiescolumnsPatterns": [{ "addSchematype": true"TEXT", "useTempFilepattern": true, "debug": false, "STRING" }, { "type": "INT", "threadPoolpattern": 5 "INT32" } ], "logFrequency": 1000, "filterRows": truefalse, "useFilterFiledebug": false, "groovyScripturl": "http://localhost:9200/ This script must return a boolean.\n// The references of the job, doc, component, row and table objects are available.\n// Javadoc references \n// Row (row) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Row.html\n// Table (table) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Table.html\nrow.getBoolean(\"sensitive\") == true"", "authType": "none", "index": "parquet-data", "query": "{\n \"query\": {\n \"bool\": {\n \"must\": [{\n \"term\": {\n \"name.keyword\": {\n \"value\": \"column-value\"\n }\n }\n }, {\n \"term\": {\n \"value.seedId\": {\n \"value\": \"${seedId}\"\n }\n }\n }, {\n \"term\": {\n \"value.tableId\": {\n \"value\": \"${tableId}\"\n }\n }\n }\n ]\n }\n }\n}\n", "uniqueValues": true, "scrollTime": "5m", "idleConnectionTimeout": 3600000, "maxConnections": 100, "maxConnectionsPerRoute": 10, "connectionTimeout": 15000, "socketTimeout": 15000, "useThrottling": false, "maxRetries": 3, "retryWaitTime": 5000 } } |
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
id | Yes | - | No |
ID of the application to update | "61014782-442a-4587-ab85-ba1439a7f7b5" | ||||
type | Yes | - | No | The value must be "application". | "application" |
_type | Yes | - | No | The value must be "application". | "application" |
appName | Yes | - | No | The name of the application | "Job-Executor" |
appType | Yes | - | No | The value must be "job-summarize-executor". | "job-summarize-executor" |
config | Yes | - | No | The value must be "com.accenture.aspire:app-jobsummarize-executor". | "com.accenture.aspire:app-jobsummarize-executor" |
description | Yes | - | No | The description | "Job-Executor" |
properties | Yes | - | No | Configuration object |
---|
dataPath | Yes | - | No | The path of the job that contains the tables data | "/doc" |
containerPath | Yes | - | No | The sub path of the data that contains each table | "container" |
tableIdPath | Yes | - | No | The sub path of table data that contains the table ID | "container/url" |
seedIdPath | Yes |
- | No |
The sub path of table data that contains the seed ID | "container/seed/id" | ||||
columnsPath | Yes | - | No | The sub path of table objects that contains the columns information | "dataProfile/columns" |
columnNamePath | Yes | - | No | The sub path of column objects that contains the column name | "columnName" |
columnTypePath | Yes | - | No | The sub path of column objects that contains the column type | "column_type" |
columnsPatterns | Yes | [] | Yes | The column patterns to detect each column type | [{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}] |
---|---|---|---|---|---|
type | Yes | "TEXT" | No | The data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN" | "TEXT" |
pattern | Yes | - | No | The pattern to match. | "STRING" |
logFrequency | Yes | 1000 | No | The frequency for reporting the processed rows. | 1000 |
filterRows | Yes | false | No | Enable to filter the rows to process. | true |
useFilterFile | Yes | true | No | Enable to use a groovy file to filter the rows | true |
groovyPath | No | - | No | The path of the groovy script that contains the filter logic. It must return a boolean value. If true, |
the row will be filtered. | "C:\\Aspire\\config\\rowsGroovyFilter.txt" | |||
groovyScript | No | - | No | Script used to filter the rows. It must return a boolean value. If true, |
the row will be filtered. | "row.getBoolean(\"sensitive\") == true" | ||||
url | Yes | - | No | Server URL | "http://localhost:9200/" |
authType | Yes | "none" | No | The authentication type. Accepted values: "none", "basic", "aws" | "none" |
username | No | - | No | User with the permissions to read from the Elastic index specified. Used only if the authType is "basic". | "admin" |
password | No | - | No | The password for the specified user. Used only if the authType is "basic". | "password" |
region | No | - | No | AWS region. Used only if the authType is "aws". | "us-east-2" |
useCredentialsProviderChain | No | false | No | Use AWS Credentials Provider Chain. Used only if the authType is "aws". | "true" |
accessKey | No | - | No | Key utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false. | "AKIAIOSFODNN7EXAMPLE" |
secretKey | No | - | No | Secret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false. | "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" |
assumeRole | No | false | No | Enable to assume the specified role to get the credentials. Used only if the authType is "aws". | true |
roleArn | No | - | No | The Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true. | "arn:aws:iam::123456789012:user/group/role" |
index | Yes | - | No | The elastic index to use. | "values-index" |
query | Yes | - | No | The query for fetching the unique values. The placeholders ${seedId} and ${tableId}. | "{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}" |
uniqueValues | Yes | true | No | If enabled, the expected row format will be the one used for unique values, if not, it will use the _source content as the row body. | true |
scrollTime | Yes | "5m" | No | The time to keep each scroll request active. | "5m" |
idleConnectionTimeout | Yes | 3600000 | No | Maximum time (in milliseconds) to keep an idle connection open. | 3600000 |
maxConnections | Yes | 100 | No | Maximum number of connections to be opened. | 100 |
maxConnectionsPerRoute | Yes | 10 | No | Maximum number of connections opened for the same target. | 10 |
connectionTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for the connection. | 15000 |
socketTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for a socket response. | 15000 |
useThrottling | Yes | false | No | Flag to enable connection throttling. | true |
throttlingRate | No | 5000 | No | Time period (in milliseconds) to throttle the connection. Used only if useThrottling is true. | 5000 |
throttlingConnectionRate | No | 500 | No | Maximum number of connections used during the throttling period. Used only if useThrottling is true. | 500 |
maxRetries | Yes | 3 | No | Maximum number of retries for a failed document. | 3 |
retryWaitTime | Yes | 5000 | No | Time (in milliseconds) to wait before a retry. | 5000 |
Code Block | ||||
---|---|---|---|---|
| ||||
{ "id": "61014782951cf9a0-442a6078-458743f2-ab85bce1-ba1439a7f7b56e377fc22fc5", "type": "application", "_type": "application", "descriptionappName": "Job-Executor_Summarize_Executor", "appType": "job-summarize-executor", "config": "com.accenture.aspire:app-Jobsummarizejobsummarize-executor", "appTypedescription": "Job-summarize-executor", "appName": "Job Summarize Executor", "properties": { "addSchema": truejob-summarizer", "properties": { "dataPath": "/doc", "containerPath": "container", "tableIdPath": "container/url", "seedIdPath": "container/seed/id", "columnsPath": "dataProfile/columns", "columnNamePath": "columnName", "columnTypePath": "column_type", "columnsPatterns": [{ "type": "TEXT", "useTempFilepattern": true, "debug": false, "STRING" }, { "type": "INT", "threadPoolpattern": 5 "INT32" } ], "logFrequency": 1000, "filterRows": truefalse, "useFilterFiledebug": false, "groovyScripturl": "http:// This script must return a boolean.\n// The references of the job, doc, component, row and table objects are available.\n// Javadoc references \n// Row (row) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Row.html\n// Table (table) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Table.html\nrow.getBoolean(\"sensitive\") == true"localhost:9200/", "authType": "none", "index": "parquet-data", "query": "{\n \"query\": {\n \"bool\": {\n \"must\": [{\n \"term\": {\n \"name.keyword\": {\n \"value\": \"column-value\"\n }\n }\n }, {\n \"term\": {\n \"value.seedId\": {\n \"value\": \"${seedId}\"\n }\n }\n }, {\n \"term\": {\n \"value.tableId\": {\n \"value\": \"${tableId}\"\n }\n }\n }\n ]\n }\n }\n}\n", "uniqueValues": true, "scrollTime": "5m", "idleConnectionTimeout": 3600000, "maxConnections": 100, "maxConnectionsPerRoute": 10, "connectionTimeout": 15000, "socketTimeout": 15000, "useThrottling": false, "maxRetries": 3, "retryWaitTime": 5000 } } |