Create Job Summarizer Executor

Field

Required

Default

Multiple

Notes

Example

type

Yes

-

No

The value must be

"application"

“application”.

"application"

“application”
_type	Yes	-	No	The value must be

"application"

“application”.

"application"

“application”
appName	Yes	-	No	The name of the application

"Job

“Job-

Executor"

Executor”
appType	Yes	-	No	The value must be

"job

“job-summarize-

executor"

executor”.

"job

“job-summarize-

executor"

executor”
config	Yes	-	No	The value must be

"com

“com.accenture.aspire:app-jobsummarize-

executor"

executor”.

"com

“com.accenture.aspire:app-jobsummarize-

executor"

executor”
description	Yes	-	No	The description

"Job

“Job-

Executor"

properties	Yes	-	No	Configuration object
Executor”

addSchema


dataPath	Yes	-	No	The path of the job that contains the tables data	“/doc”
containerPath	Yes	-	No	The sub path of the data that contains each table	“container”
tableIdPath	Yes	-	No	The sub path of table data that contains the table ID	“container/url”
seedIdPath	Yes

true

-

No

If enabled the table schema will be added to the processed columns.true

columnsPatterns	Yes	[]	Yes	The column patterns to detect each column type	[{"type":”TEXT”,”pattern”:”STRING”},{"type":"INT","pattern":"INT32"}]
The sub path of table data that contains the seed ID	“container/seed/id”
columnsPath	Yes	-	No	The sub path of table objects that contains the columns information	“dataProfile/columns”
columnNamePath	Yes	-	No	The sub path of column objects that contains the column name	“columnName”
columnTypePath	Yes	-	No	The sub path of column objects that contains the column type	“column_type”
type	Yes	"TEXT"	No	The data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN"	"TEXT"
pattern	Yes	-	No	The pattern to match	"STRING"

threadPoolYes5NoThe number of threads to use for parallel processing.5



logFrequency	Yes	1000	No	The frequency for reporting the processed rows.	1000
filterRows	Yes	false	No	Enable to filter the rows to process.	true
useFilterFile	Yes	true	No	Enable to use a groovy file to filter the rows	true
groovyPath	No	-	No	The path of the groovy script that contains the filter logic. It must return a boolean value, if true, the row will be filtered.	"C:\\Aspire\\config\\rowsGroovyFilter.txt"
groovyScript	No	-	No	Script used to filter the rows. It must return a boolean value, if true, the row will be filtered.	"row.getBoolean(\"sensitive\") == true"
url	Yes	-	No	Server URL	"http://localhost:9200/"
authType	Yes	"none"	No	The authentication type. Accepted values: "none", "basic", "aws".	"none"
username	No	-	No	User with the permissions to read from the Elastic index specified. Used only if the authType is "basic".	"admin"
password	No	-	No	The password for the specified user. Used only if the authType is "basic".	"password"
region	No	-	No	AWS region. Used only if the authType is "aws".	"us-east-2"
useCredentialsProviderChain	No	false	No	Use AWS Credentials Provider Chain. Used only if the authType is "aws".	"true"
accessKey	No	-	No	Key utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false.	"AKIAIOSFODNN7EXAMPLE"
secretKey	No	-	No	Secret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false.	"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
assumeRole	No	false	No	Enable to assume the specified role to get the credentials. Used only if the authType is "aws".	true
roleArn	No	-	No	The Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true.	"arn:aws:iam::123456789012:user/group/role"
index	Yes	-	No	The elastic index to use.	"values-index"
query	Yes	-	No	The query for fetching the unique values. The placeholders ${seedId} and ${tableId}..	"{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}"
uniqueValues	Yes	true	No	If enabled, the expected row format will be the one used for unique values, if not, it will use the _source content as the row body.	true
scrollTime	Yes	"5m"	No	The time to keep each scroll request active.	"5m"
idleConnectionTimeout	Yes	3600000	No	Maximum time (in milliseconds) to keep an idle connection open.	3600000
maxConnections	Yes	100	No	Maximum number of connections to be opened.	100
maxConnectionsPerRoute	Yes	10	No	Maximum number of connections opened for the same target.	10
connectionTimeout	Yes	15000	No	Maximum time (in milliseconds) to wait for the connection.	15000
socketTimeout	Yes	15000	No	Maximum time (in milliseconds) to wait for a socket response.	15000
useThrottling	Yes	false	No	Flag to enable connection throttling.	true
throttlingRate	No	5000	No	Time (in milliseconds) to throttle the connection. Used only if useThrottling is true.	5000
throttlingConnectionRate	No	500	No	Maximum number of connections used during the throttling period. Used only if useThrottling is true.	500
maxRetries	Yes	3	No	Maximum number of retries for each request.	3
retryWaitTime	Yes	5000	No	Time (in milliseconds) to wait before a retry.	5000

Example

Code Block

theme	RDark
title	POST /aspire/_api/workflows/{workflow}/rules

{
  "type": "application",
  "_type": "application",
  "descriptionappName": "Job-Executor_Summarize_Executor",
  "appType": "job-summarize-executor",
  "config": "com.accenture.aspire:app-Jobsummarizejobsummarize-executor",
  "appTypedescription": "Job-summarize-executorjob-summarizer",
  "properties": {
    "dataPath": "/doc",
    "containerPath": "container",
    "tableIdPath": "container/url",
    "seedIdPath": "container/seed/id",
    "columnsPath": "dataProfile/columns",
    "appNamecolumnNamePath": "Job Summarize ExecutorcolumnName",
    "columnTypePath": "column_type",
    "propertiescolumnsPatterns": [{
        "addSchematype": true"TEXT",
        "useTempFilepattern": true,
    "debug": false,
 "STRING"
      }, {
        "type": "INT",
        "threadPoolpattern": 5 "INT32"
      }
    ],
    "logFrequency": 1000,
    "filterRows": truefalse,
    "useFilterFiledebug": false,
    "groovyScripturl": "http://localhost:9200/ This script must return a boolean.\n// The references of the job, doc, component, row and table objects are available.\n// Javadoc references \n// Row (row) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Row.html\n// Table (table) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Table.html\nrow.getBoolean(\"sensitive\") == true"",
    "authType": "none",
    "index": "parquet-data",
    "query": "{\n  \"query\": {\n    \"bool\": {\n      \"must\": [{\n          \"term\": {\n            \"name.keyword\": {\n              \"value\": \"column-value\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.seedId\": {\n              \"value\": \"${seedId}\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.tableId\": {\n              \"value\": \"${tableId}\"\n            }\n          }\n        }\n      ]\n    }\n  }\n}\n",
    "uniqueValues": true,
    "scrollTime": "5m",
    "idleConnectionTimeout": 3600000,
    "maxConnections": 100,
    "maxConnectionsPerRoute": 10,
    "connectionTimeout": 15000,
    "socketTimeout": 15000,
    "useThrottling": false,
    "maxRetries": 3,
    "retryWaitTime": 5000
  }
}

Update Job Summarizer Executor

Field

Required

Default

Multiple

Notes

Example

id

Yes

-

No

Id

properties	Yes	-	No	Configuration object
ID of the application to update	"61014782-442a-4587-ab85-ba1439a7f7b5"
type	Yes	-	No	The value must be "application".	"application"
_type	Yes	-	No	The value must be "application".	"application"
appName	Yes	-	No	The name of the application	"Job-Executor"
appType	Yes	-	No	The value must be "job-summarize-executor".	"job-summarize-executor"
config	Yes	-	No	The value must be "com.accenture.aspire:app-jobsummarize-executor".	"com.accenture.aspire:app-jobsummarize-executor"
description	Yes	-	No	The description	"Job-Executor"

addSchema


dataPath	Yes	-	No	The path of the job that contains the tables data	"/doc"
containerPath	Yes	-	No	The sub path of the data that contains each table	"container"
tableIdPath	Yes	-	No	The sub path of table data that contains the table ID	"container/url"
seedIdPath	Yes

true

-

No

If enabled the table schema will be added to the processed columns.true

columnsPatterns	Yes	[]	Yes	The column patterns to detect each column type	[{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}]
The sub path of table data that contains the seed ID	"container/seed/id"
columnsPath	Yes	-	No	The sub path of table objects that contains the columns information	"dataProfile/columns"
columnNamePath	Yes	-	No	The sub path of column objects that contains the column name	"columnName"
columnTypePath	Yes	-	No	The sub path of column objects that contains the column type	"column_type"
type	Yes	"TEXT"	No	The data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN"	"TEXT"
pattern	Yes	-	No	The pattern to match.	"STRING"

threadPoolYes5NoThe number of threads to use for parallel processing.5



logFrequency	Yes	1000	No	The frequency for reporting the processed rows.	1000
filterRows	Yes	false	No	Enable to filter the rows to process.	true
useFilterFile	Yes	true	No	Enable to use a groovy file to filter the rows	true
groovyPath	No	-	No	The path of the groovy script that contains the filter logic. It must return a boolean value. If true,

if true

the row will be filtered.	"C:\\Aspire\\config\\rowsGroovyFilter.txt"
groovyScript	No	-	No	Script used to filter the rows. It must return a boolean value. If true,

if true

the row will be filtered.	"row.getBoolean(\"sensitive\") == true"
url	Yes	-	No	Server URL	"http://localhost:9200/"
authType	Yes	"none"	No	The authentication type. Accepted values: "none", "basic", "aws"	"none"
username	No	-	No	User with the permissions to read from the Elastic index specified. Used only if the authType is "basic".	"admin"
password	No	-	No	The password for the specified user. Used only if the authType is "basic".	"password"
region	No	-	No	AWS region. Used only if the authType is "aws".	"us-east-2"
useCredentialsProviderChain	No	false	No	Use AWS Credentials Provider Chain. Used only if the authType is "aws".	"true"
accessKey	No	-	No	Key utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false.	"AKIAIOSFODNN7EXAMPLE"
secretKey	No	-	No	Secret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false.	"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
assumeRole	No	false	No	Enable to assume the specified role to get the credentials. Used only if the authType is "aws".	true
roleArn	No	-	No	The Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true.	"arn:aws:iam::123456789012:user/group/role"
index	Yes	-	No	The elastic index to use.	"values-index"
query	Yes	-	No	The query for fetching the unique values. The placeholders ${seedId} and ${tableId}.	"{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}"
uniqueValues	Yes	true	No	If enabled, the expected row format will be the one used for unique values, if not, it will use the _source content as the row body.	true
scrollTime	Yes	"5m"	No	The time to keep each scroll request active.	"5m"
idleConnectionTimeout	Yes	3600000	No	Maximum time (in milliseconds) to keep an idle connection open.	3600000
maxConnections	Yes	100	No	Maximum number of connections to be opened.	100
maxConnectionsPerRoute	Yes	10	No	Maximum number of connections opened for the same target.	10
connectionTimeout	Yes	15000	No	Maximum time (in milliseconds) to wait for the connection.	15000
socketTimeout	Yes	15000	No	Maximum time (in milliseconds) to wait for a socket response.	15000
useThrottling	Yes	false	No	Flag to enable connection throttling.	true
throttlingRate	No	5000	No	Time period (in milliseconds) to throttle the connection. Used only if useThrottling is true.	5000
throttlingConnectionRate	No	500	No	Maximum number of connections used during the throttling period. Used only if useThrottling is true.	500
maxRetries	Yes	3	No	Maximum number of retries for a failed document.	3
retryWaitTime	Yes	5000	No	Time (in milliseconds) to wait before a retry.	5000

Example

Code Block

theme	RDark
title	PUT /aspire/_api/workflows/{workflow}/rules/{id}

{
  "id": "61014782951cf9a0-442a6078-458743f2-ab85bce1-ba1439a7f7b56e377fc22fc5", 
   "type": "application",
  "_type": "application",
  "descriptionappName": "Job-Executor_Summarize_Executor",
  "appType": "job-summarize-executor",
  "config": "com.accenture.aspire:app-Jobsummarizejobsummarize-executor",
  "appTypedescription": "Job-summarize-executor",
  "appName": "Job Summarize Executor",
  "properties": {
    "addSchema": truejob-summarizer",
  "properties": {
    "dataPath": "/doc",
    "containerPath": "container",
    "tableIdPath": "container/url",
    "seedIdPath": "container/seed/id",
    "columnsPath": "dataProfile/columns",
    "columnNamePath": "columnName",
    "columnTypePath": "column_type",
    "columnsPatterns": [{
        "type": "TEXT",
        "useTempFilepattern": true,
    "debug": false,
 "STRING"
      }, {
        "type": "INT",
        "threadPoolpattern": 5 "INT32"
      }
    ],
    "logFrequency": 1000,
    "filterRows": truefalse,
    "useFilterFiledebug": false,
    "groovyScripturl": "http:// This script must return a boolean.\n// The references of the job, doc, component, row and table objects are available.\n// Javadoc references \n// Row (row) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Row.html\n// Table (table) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Table.html\nrow.getBoolean(\"sensitive\") == true"localhost:9200/",
    "authType": "none",
    "index": "parquet-data",
    "query": "{\n  \"query\": {\n    \"bool\": {\n      \"must\": [{\n          \"term\": {\n            \"name.keyword\": {\n              \"value\": \"column-value\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.seedId\": {\n              \"value\": \"${seedId}\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.tableId\": {\n              \"value\": \"${tableId}\"\n            }\n          }\n        }\n      ]\n    }\n  }\n}\n",
    "uniqueValues": true,
    "scrollTime": "5m",
    "idleConnectionTimeout": 3600000,
    "maxConnections": 100,
    "maxConnectionsPerRoute": 10,
    "connectionTimeout": 15000,
    "socketTimeout": 15000,
    "useThrottling": false,
    "maxRetries": 3,
    "retryWaitTime": 5000
  }
}

Page tree

Versions Compared

Old Version 2

New Version Current

Key

Create Job Summarizer Executor

Example

Update Job Summarizer Executor

Example

Page tree

Page History

Versions Compared

Old Version 2

New Version Current

Key

Create Job Summarizer Executor

Example

Update Job Summarizer Executor

Example