Create Job Summarizer Executor

Field	Required	Default	Multiple	Notes	Example
type	Yes	-	No	The value must be "application".	"application"
_type	Yes	-	No	The value must be "application".	"application"
appName	Yes	-	No	The name of the application	"Job-Executor"
appType	Yes	-	No	The value must be "job-summarize-executor".	"job-summarize-executor"
config	Yes	-	No	The value must be "com.accenture.aspire:app-jobsummarize-executor".	"com.accenture.aspire:app-jobsummarize-executor"
description	Yes	-	No	The description	"Job-Executor"
properties	Yes	-	No	Configuration object
dataPath	Yes	-	No	The path of the job that contains the tables data	"/doc"
containerPath	Yes	-	No	The sub path of the data that contains each table	"container"
tableIdPath	Yes	-	No	The sub path of table data that contains the table id	"container/url"
seedIdPath	Yes	-	No	The sub path of table data that contains the seed id	"container/seed/id"
columnsPath	Yes	-	No	The sub path of table objects that contains the columns information	"dataProfile/columns"
columnNamePath	Yes	-	No	The sub path of column objects that contains the column name	"columnName"
columnTypePath	Yes	-	No	The sub path of column objects that contains the column type	"column_type"
columnsPatterns	Yes	[]	Yes	The columns patterns to detect each column type	[{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}]
type	Yes	"TEXT"	No	The data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN"	"TEXT"
pattern	Yes	-	No	The pattern to match	"STRING"

logFrequency	Yes	1000	No	The frequency for reporting the processed rows.	1000
filterRows	Yes	false	No	Enable to filter the rows to process.	true
useFilterFile	Yes	true	No	Enable to use a groovy file to filter the rows	true
groovyPath	No	-	No	The path of the groovy script that contains the filter logic. It must return a boolean value, if true the row will be filtered	"C:\\Aspire\\config\\rowsGroovyFilter.txt"
groovyScript	No	-	No	Script used to filter the rows. It must return a boolean value, if true the row will be filtered	"row.getBoolean(\"sensitive\") == true"
url	Yes	-	No	Server URL	"http://localhost:9200/"
authType	Yes	"none"	No	The authentication type. Accepted values: "none", "basic", "aws"	"none"
username	No	-	No	User with the permissions to read from the Elastic index specified. Used only if the authType is "basic"	"admin"
password	No	-	No	The password for the specified user. Used only if the authType is "basic"	"password"
region	No	-	No	AWS region. Used only if the authType is "aws"	"us-east-2"
useCredentialsProviderChain	No	false	No	Use AWS Credentials Provider Chain. Used only if the authType is "aws"	"true"
accessKey	No	-	No	Key utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false	"AKIAIOSFODNN7EXAMPLE"
secretKey	No	-	No	Secret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false	"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
assumeRole	No	false	No	Enable to assume the specified role to get the credentials. Used only if the authType is "aws"	true
roleArn	No	-	No	The Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true	"arn:aws:iam::123456789012:user/group/role"
index	Yes	-	No	The elastic index to use	"values-index"
query	Yes	-	No	The query for fetching the unique values. The placeholders ${seedId} and ${tableId}.	"{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}"
uniqueValues	Yes	true	No	If enable the expected row format will be the one used for unique values, if not it will use the _source content as the row body	true
scrollTime	Yes	"5m"	No	The time to keep each scroll request active	"5m"
idleConnectionTimeout	Yes	3600000	No	Maximum time (in milliseconds) to keep an idle connection open	3600000
maxConnections	Yes	100	No	Maximum number of connections to be opened	100
maxConnectionsPerRoute	Yes	10	No	Maximum number of connections opened for the same target	10
connectionTimeout	Yes	15000	No	Maximum time (in milliseconds) to wait for the connection	15000
socketTimeout	Yes	15000	No	Maximum time (in milliseconds) to wait for a socket response	15000
useThrottling	Yes	false	No	Flag to enable connection throttling	true
throttlingRate	No	5000	No	Time period (in milliseconds) to throttle the connection. Used only if useThrottling is true	5000
throttlingConnectionRate	No	500	No	Maximum number of connections used during the throttling period. Used only if useThrottling is true	500
maxRetries	Yes	3	No	Maximum number of retries a failed document	3
retryWaitTime	Yes	5000	No	Time (in milliseconds) to wait before a retry	5000

Example

POST /aspire/_api/workflows/{workflow}/rules

{
  "type": "application",
  "_type": "application",
  "appName": "Job_Summarize_Executor",
  "appType": "job-summarize-executor",
  "config": "com.accenture.aspire:app-jobsummarize-executor",
  "description": "job-summarizer",
  "properties": {
    "dataPath": "/doc",
    "containerPath": "container",
    "tableIdPath": "container/url",
    "seedIdPath": "container/seed/id",
    "columnsPath": "dataProfile/columns",
    "columnNamePath": "columnName",
    "columnTypePath": "column_type",
    "columnsPatterns": [{
        "type": "TEXT",
        "pattern": "STRING"
      }, {
        "type": "INT",
        "pattern": "INT32"
      }
    ],
    "logFrequency": 1000,
    "filterRows": false,
    "debug": false,
    "url": "http://localhost:9200/",
    "authType": "none",
    "index": "parquet-data",
    "query": "{\n  \"query\": {\n    \"bool\": {\n      \"must\": [{\n          \"term\": {\n            \"name.keyword\": {\n              \"value\": \"column-value\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.seedId\": {\n              \"value\": \"${seedId}\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.tableId\": {\n              \"value\": \"${tableId}\"\n            }\n          }\n        }\n      ]\n    }\n  }\n}\n",
    "uniqueValues": true,
    "scrollTime": "5m",
    "idleConnectionTimeout": 3600000,
    "maxConnections": 100,
    "maxConnectionsPerRoute": 10,
    "connectionTimeout": 15000,
    "socketTimeout": 15000,
    "useThrottling": false,
    "maxRetries": 3,
    "retryWaitTime": 5000
  }
}

Update Job Summarizer Executor

Field	Required	Default	Multiple	Notes	Example
id	Yes	-	No	Id of the application to update	"61014782-442a-4587-ab85-ba1439a7f7b5"
type	Yes	-	No	The value must be "application".	"application"
_type	Yes	-	No	The value must be "application".	"application"
appName	Yes	-	No	The name of the application	"Job-Executor"
appType	Yes	-	No	The value must be "job-summarize-executor".	"job-summarize-executor"
config	Yes	-	No	The value must be "com.accenture.aspire:app-jobsummarize-executor".	"com.accenture.aspire:app-jobsummarize-executor"
description	Yes	-	No	The description	"Job-Executor"
properties	Yes	-	No	Configuration object
dataPath	Yes	-	No	The path of the job that contains the tables data	"/doc"
containerPath	Yes	-	No	The sub path of the data that contains each table	"container"
tableIdPath	Yes	-	No	The sub path of table data that contains the table id	"container/url"
seedIdPath	Yes	-	No	The sub path of table data that contains the seed id	"container/seed/id"
columnsPath	Yes	-	No	The sub path of table objects that contains the columns information	"dataProfile/columns"
columnNamePath	Yes	-	No	The sub path of column objects that contains the column name	"columnName"
columnTypePath	Yes	-	No	The sub path of column objects that contains the column type	"column_type"
columnsPatterns	Yes	[]	Yes	The columns patterns to detect each column type	[{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}]
type	Yes	"TEXT"	No	The data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN"	"TEXT"
pattern	Yes	-	No	The pattern to match	"STRING"

logFrequency	Yes	1000	No	The frequency for reporting the processed rows.	1000
filterRows	Yes	false	No	Enable to filter the rows to process.	true
useFilterFile	Yes	true	No	Enable to use a groovy file to filter the rows	true
groovyPath	No	-	No	The path of the groovy script that contains the filter logic. It must return a boolean value, if true the row will be filtered	"C:\\Aspire\\config\\rowsGroovyFilter.txt"
groovyScript	No	-	No	Script used to filter the rows. It must return a boolean value, if true the row will be filtered	"row.getBoolean(\"sensitive\") == true"
url	Yes	-	No	Server URL	"http://localhost:9200/"
authType	Yes	"none"	No	The authentication type. Accepted values: "none", "basic", "aws"	"none"
username	No	-	No	User with the permissions to read from the Elastic index specified. Used only if the authType is "basic"	"admin"
password	No	-	No	The password for the specified user. Used only if the authType is "basic"	"password"
region	No	-	No	AWS region. Used only if the authType is "aws"	"us-east-2"
useCredentialsProviderChain	No	false	No	Use AWS Credentials Provider Chain. Used only if the authType is "aws"	"true"
accessKey	No	-	No	Key utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false	"AKIAIOSFODNN7EXAMPLE"
secretKey	No	-	No	Secret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false	"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
assumeRole	No	false	No	Enable to assume the specified role to get the credentials. Used only if the authType is "aws"	true
roleArn	No	-	No	The Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true	"arn:aws:iam::123456789012:user/group/role"
index	Yes	-	No	The elastic index to use	"values-index"
query	Yes	-	No	The query for fetching the unique values. The placeholders ${seedId} and ${tableId}.	"{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}"
uniqueValues	Yes	true	No	If enable the expected row format will be the one used for unique values, if not it will use the _source content as the row body	true
scrollTime	Yes	"5m"	No	The time to keep each scroll request active	"5m"
idleConnectionTimeout	Yes	3600000	No	Maximum time (in milliseconds) to keep an idle connection open	3600000
maxConnections	Yes	100	No	Maximum number of connections to be opened	100
maxConnectionsPerRoute	Yes	10	No	Maximum number of connections opened for the same target	10
connectionTimeout	Yes	15000	No	Maximum time (in milliseconds) to wait for the connection	15000
socketTimeout	Yes	15000	No	Maximum time (in milliseconds) to wait for a socket response	15000
useThrottling	Yes	false	No	Flag to enable connection throttling	true
throttlingRate	No	5000	No	Time period (in milliseconds) to throttle the connection. Used only if useThrottling is true	5000
throttlingConnectionRate	No	500	No	Maximum number of connections used during the throttling period. Used only if useThrottling is true	500
maxRetries	Yes	3	No	Maximum number of retries a failed document	3
retryWaitTime	Yes	5000	No	Time (in milliseconds) to wait before a retry	5000

Example

PUT /aspire/_api/workflows/{workflow}/rules/{id}

{
  "id": "951cf9a0-6078-43f2-bce1-6e377fc22fc5",
  "type": "application",
  "_type": "application",
  "appName": "Job_Summarize_Executor",
  "appType": "job-summarize-executor",
  "config": "com.accenture.aspire:app-jobsummarize-executor",
  "description": "job-summarizer",
  "properties": {
    "dataPath": "/doc",
    "containerPath": "container",
    "tableIdPath": "container/url",
    "seedIdPath": "container/seed/id",
    "columnsPath": "dataProfile/columns",
    "columnNamePath": "columnName",
    "columnTypePath": "column_type",
    "columnsPatterns": [{
        "type": "TEXT",
        "pattern": "STRING"
      }, {
        "type": "INT",
        "pattern": "INT32"
      }
    ],
    "logFrequency": 1000,
    "filterRows": false,
    "debug": false,
    "url": "http://localhost:9200/",
    "authType": "none",
    "index": "parquet-data",
    "query": "{\n  \"query\": {\n    \"bool\": {\n      \"must\": [{\n          \"term\": {\n            \"name.keyword\": {\n              \"value\": \"column-value\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.seedId\": {\n              \"value\": \"${seedId}\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.tableId\": {\n              \"value\": \"${tableId}\"\n            }\n          }\n        }\n      ]\n    }\n  }\n}\n",
    "uniqueValues": true,
    "scrollTime": "5m",
    "idleConnectionTimeout": 3600000,
    "maxConnections": 100,
    "maxConnectionsPerRoute": 10,
    "connectionTimeout": 15000,
    "socketTimeout": 15000,
    "useThrottling": false,
    "maxRetries": 3,
    "retryWaitTime": 5000
  }
}

Page tree

Create Job Summarizer Executor

Example

Update Job Summarizer Executor

Example

Contact Us: [email protected]

Page tree

Rest API - Job Summarizer Executor

Create Job Summarizer Executor

Example

Update Job Summarizer Executor

Example

Contact Us: [email protected]