Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
type | Yes | - | No | The value must be "application". | "application" |
_type | Yes | - | No | The value must be "application". | "application" |
appName | Yes | - | No | The name of the application | "Job-Executor" |
appType | Yes | - | No | The value must be "job-summarize-executor". | "job-summarize-executor" |
config | Yes | - | No | The value must be "com.accenture.aspire:app-jobsummarize-executor". | "com.accenture.aspire:app-jobsummarize-executor" |
description | Yes | - | No | The description | "Job-Executor" |
properties | Yes | - | No | Configuration object | |
dataPath | Yes | - | No | The path of the job that contains the tables data | "/doc" |
containerPath | Yes | - | No | The sub path of the data that contains each table | "container" |
tableIdPath | Yes | - | No | The sub path of table data that contains the table id | "container/url" |
seedIdPath | Yes | - | No | The sub path of table data that contains the seed id | "container/seed/id" |
columnsPath | Yes | - | No | The sub path of table objects that contains the columns information | "dataProfile/columns" |
columnNamePath | Yes | - | No | The sub path of column objects that contains the column name | "columnName" |
columnTypePath | Yes | - | No | The sub path of column objects that contains the column type | "column_type" |
columnsPatterns | Yes | [] | Yes | The columns patterns to detect each column type | [{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}] |
type | Yes | "TEXT" | No | The data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN" | "TEXT" |
pattern | Yes | - | No | The pattern to match | "STRING" |
logFrequency | Yes | 1000 | No | The frequency for reporting the processed rows. | 1000 |
filterRows | Yes | false | No | Enable to filter the rows to process. | true |
useFilterFile | Yes | true | No | Enable to use a groovy file to filter the rows | true |
groovyPath | No | - | No | The path of the groovy script that contains the filter logic. It must return a boolean value, if true the row will be filtered | "C:\\Aspire\\config\\rowsGroovyFilter.txt" |
groovyScript | No | - | No | Script used to filter the rows. It must return a boolean value, if true the row will be filtered | "row.getBoolean(\"sensitive\") == true" |
url | Yes | - | No | Server URL | "http://localhost:9200/" |
authType | Yes | "none" | No | The authentication type. Accepted values: "none", "basic", "aws" | "none" |
username |
No | - | No | User with the permissions to read from the Elastic index specified. Used only if the authType is "basic" | "admin" |
password |
No | - | No | The password for the specified user. Used only if the authType is "basic" | "password" |
region |
No | - | No | AWS region. Used only if the authType is "aws" | "us-east-2" |
useCredentialsProviderChain |
No | false | No | Use AWS Credentials Provider Chain. Used only if the authType is "aws" | "true" |
accessKey |
No | - | No | Key utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false | "AKIAIOSFODNN7EXAMPLE" |
secretKey |
No | - | No | Secret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false | "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" | |
assumeRole | No | false | No | Enable to assume the specified role to get the credentials. Used only if the authType is "aws" | true |
roleArn | No | - | No | The Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true | "arn:aws:iam::123456789012:user/group/role" |
index |
Yes |
- |
No | The elastic index to use | "values-index" | |||
query | Yes | - | No | The query for fetching the unique values. The placeholders ${seedId} and ${tableId}. | "{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}" |
uniqueValues | Yes | true | No | If enable the expected row format will be the one used for unique values, if not it will use the _source content as the row body | true |
scrollTime | Yes | "5m" | No | The time to keep each scroll request active | "5m" |
idleConnectionTimeout | Yes | 3600000 | No | Maximum time (in milliseconds) to keep an idle connection open | 3600000 |
maxConnections | Yes | 100 | No | Maximum number of connections to be opened | 100 |
maxConnectionsPerRoute | Yes | 10 | No | Maximum number of connections opened for the same target | 10 |
connectionTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for the connection | 15000 |
socketTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for a socket response | 15000 |
useThrottling | Yes | false | No | Flag to enable connection throttling | true |
throttlingRate | No | 5000 | No | Time period (in milliseconds) to throttle the connection. Used only if useThrottling is true | 5000 |
throttlingConnectionRate | No | 500 | No | Maximum number of connections used during the throttling period. Used only if useThrottling is true | 500 |
maxRetries | Yes | 3 | No | Maximum number of retries a failed document | 3 |
retryWaitTime | Yes | 5000 | No | Time (in milliseconds) to wait before a retry | 5000 |
Code Block | ||||
---|---|---|---|---|
| ||||
{
"type": "application",
"_type": "application",
"appName": "Job_Summarize_Executor",
"appType": "job-summarize-executor",
"config": |
{
"type": "application",
"_type": "application",
"description": "Job-Executor",
"config": "com.accenture.aspire:app-Jobsummarize-executor",
"appType": "Job-summarize-executor",
"appName": "Job Summarize Executor",
"properties": {
"addSchema": true,
"useTempFile": true,
"debug": false,
"threadPool": 5,
"logFrequency": 1000,
"filterRows": true,
"useFilterFile": false,
"groovyScript": "// This script must return a boolean.\n// The references of the job, doc, component, row and table objects are available.\n// Javadoc references \n// Row (row) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Row.html\n// Table (table) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Table.html\nrow.getBoolean(\"sensitive\") == true"
}
}
Field
Required
Default
Multiple
The value must be "application".
"application"
The value must be "application".
"application"
"com.accenture.aspire:app-jobsummarize-executor" |
,
"description": "job-summarizer",
"properties": {
"dataPath": "/doc",
"containerPath": "container",
"tableIdPath": "container/url",
"seedIdPath": "container/seed/id",
"columnsPath": "dataProfile/columns",
"columnNamePath": "columnName",
"columnTypePath": "column_type",
"columnsPatterns": [{
"type": "TEXT",
"pattern": "STRING"
}, {
"type": "INT",
"pattern": "INT32"
}
],
"logFrequency": 1000,
"filterRows": false,
"debug": false,
"url": "http://localhost:9200/",
"authType": "none",
"index": "parquet-data",
"query": "{\n \"query\": {\n \"bool\": {\n \"must\": [{\n \"term\": {\n \"name.keyword\": {\n \"value\": \"column-value\"\n }\n }\n }, {\n \"term\": {\n \"value.seedId\": {\n \"value\": \"${seedId}\"\n }\n }\n }, {\n \"term\": {\n \"value.tableId\": {\n \"value\": \"${tableId}\"\n }\n }\n }\n ]\n }\n }\n}\n",
"uniqueValues": true,
"scrollTime": "5m",
"idleConnectionTimeout": 3600000,
"maxConnections": 100,
"maxConnectionsPerRoute": 10,
"connectionTimeout": 15000,
"socketTimeout": 15000,
"useThrottling": false,
"maxRetries": 3,
"retryWaitTime": 5000
}
} |
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
id | Yes | - | No | Id of the application to update | "61014782-442a-4587-ab85-ba1439a7f7b5" |
type | Yes | - | No | The value must be "application". | "application" |
_type | Yes | - | No | The value must be "application". | "application" |
appName | Yes | - | No | The name of the application | "Job-Executor" |
appType | Yes | - | No | The value must be "job-summarize-executor". | "job-summarize-executor" |
config | Yes | - | No | The value must be "com.accenture.aspire:app-jobsummarize-executor". | "com.accenture.aspire:app-jobsummarize-executor" |
description | Yes | - | No | The description | "Job-Executor" |
properties | Yes | - | No | Configuration object | |
dataPath | Yes | - | No | The path of the job that contains the tables data | "/doc" |
containerPath | Yes | - | No | The sub path of the data that contains each table | "container" |
tableIdPath | Yes | - | No | The sub path of table data that contains the table id | "container/url" |
seedIdPath | Yes | - | No | The sub path of table data that contains the seed id | "container/seed/id" |
columnsPath | Yes | - | No | The sub path of table objects that contains the columns information | "dataProfile/columns" |
columnNamePath | Yes | - | No | The sub path of column objects that contains the column name | "columnName" |
columnTypePath | Yes | - | No | The sub path of column objects that contains the column type | "column_type" |
columnsPatterns | Yes | [] | Yes | The columns patterns to detect each column type | [{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}] |
type | Yes | "TEXT" | No | The data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN" | "TEXT" |
pattern | Yes | - | No | The pattern to match | "STRING" |
logFrequency | Yes | 1000 | No | The frequency for reporting the processed rows. | 1000 |
filterRows | Yes | false | No | Enable to filter the rows to process. | true |
useFilterFile | Yes | true | No | Enable to use a groovy file to filter the rows | true |
groovyPath | No | - | No | The path of the groovy script that contains the filter logic. It must return a boolean value, if true the row will be filtered | "C:\\Aspire\\config\\rowsGroovyFilter.txt" |
groovyScript | No | - | No | Script used to filter the rows. It must return a boolean value, if true the row will be filtered | "row.getBoolean(\"sensitive\") == true" |
url | Yes | - | No | Server URL | "http://localhost:9200/" |
authType | Yes | "none" | No | The authentication type. Accepted values: "none", "basic", "aws" | "none" |
username | No | - | No | User with the permissions to read from the Elastic index specified. Used only if the authType is "basic" | "admin" |
password | No | - | No | The password for the specified user. Used only if the authType is "basic" | "password" |
region | No | - | No | AWS region. Used only if the authType is "aws" | "us-east-2" |
useCredentialsProviderChain | No | false | No | Use AWS Credentials Provider Chain. Used only if the authType is "aws" | "true" |
accessKey | No | - | No | Key utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false | "AKIAIOSFODNN7EXAMPLE" |
secretKey | No | - | No | Secret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false | "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" |
assumeRole | No | false | No | Enable to assume the specified role to get the credentials. Used only if the authType is "aws" | true |
roleArn | No | - | No | The Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true | "arn:aws:iam::123456789012:user/group/role" |
index | Yes | - | No | The elastic index to use | "values-index" |
query | Yes | - | No | The query for fetching the unique values. The placeholders ${seedId} and ${tableId}. | "{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}" |
uniqueValues | Yes | true | No | If enable the expected row format will be the one used for unique values, if not it will use the _source content as the row body | true |
scrollTime | Yes | "5m" | No | The time to keep each scroll request active | "5m" |
idleConnectionTimeout | Yes | 3600000 | No | Maximum time (in milliseconds) to keep an idle connection open | 3600000 |
maxConnections | Yes | 100 | No | Maximum number of connections to be opened | 100 |
maxConnectionsPerRoute | Yes | 10 | No | Maximum number of connections opened for the same target | 10 |
connectionTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for the connection | 15000 |
socketTimeout | Yes | 15000 | No | Maximum time (in milliseconds) to wait for a socket response | 15000 |
useThrottling | Yes | false | No | Flag to enable connection throttling | true |
throttlingRate | No | 5000 | No | Time period (in milliseconds) to throttle the connection. Used only if useThrottling is true | 5000 |
throttlingConnectionRate | No | 500 | No | Maximum number of connections used during the throttling period. Used only if useThrottling is true | 500 |
maxRetries | Yes | 3 | No | Maximum number of retries a failed document | 3 |
retryWaitTime | Yes | 5000 | No | Time (in milliseconds) to wait before a retry | 5000 |
Code Block | ||||
---|---|---|---|---|
| ||||
{
"id": "951cf9a0-6078-43f2-bce1-6e377fc22fc5",
"type": "application",
"_type": "application",
"appName": "Job_Summarize_Executor",
"appType": "job-summarize-executor",
"config": "com.accenture.aspire:app-jobsummarize-executor",
"description": "job-summarizer",
"properties": {
"dataPath": "/doc",
"containerPath": "container",
"tableIdPath": "container/url",
"seedIdPath": "container/seed/id",
"columnsPath": "dataProfile/columns",
"columnNamePath": "columnName",
"columnTypePath": "column_type",
"columnsPatterns": [{
"type": "TEXT",
"pattern": "STRING"
}, {
"type": "INT",
"pattern": "INT32"
}
],
"logFrequency": 1000,
"filterRows": false,
"debug": false,
"url": "http://localhost:9200/",
"authType": "none",
"index": "parquet-data",
"query": "{\n \"query\": {\n \"bool\": {\n \"must\": [{\n \"term\": {\n \"name.keyword\": {\n \"value\": \"column-value\"\n }\n }\n }, {\n \"term\": {\n \"value.seedId\": {\n \"value\": \"${seedId}\"\n }\n }\n }, {\n \"term\": {\n \"value.tableId\": {\n \"value\": \"${tableId}\"\n }\n }\n }\n ]\n }\n }\n}\n",
"uniqueValues": true,
"scrollTime": "5m",
"idleConnectionTimeout": 3600000,
"maxConnections": 100,
"maxConnectionsPerRoute": 10,
"connectionTimeout": 15000,
"socketTimeout": 15000,
"useThrottling": false,
"maxRetries": 3,
"retryWaitTime": 5000 |
"Job-Executor"
Code Block | ||||
---|---|---|---|---|
| ||||
{
"id": "61014782-442a-4587-ab85-ba1439a7f7b5",
"type": "application",
"_type": "application",
"description": "Job-Executor",
"config": "com.accenture.aspire:app-Jobsummarize-executor",
"appType": "Job-summarize-executor",
"appName": "Job Summarize Executor",
"properties": {
"addSchema": true,
"useTempFile": true,
"debug": false,
"threadPool": 5,
"logFrequency": 1000,
"filterRows": true,
"useFilterFile": false,
"groovyScript": "// This script must return a boolean.\n// The references of the job, doc, component, row and table objects are available.\n// Javadoc references \n// Row (row) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Row.html\n// Table (table) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Table.html\nrow.getBoolean(\"sensitive\") == true"
}
} |