Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

The Job Summarizer Executor can be configured using the Rest API. 

Easy Heading Free
navigationTitleOn this Page
wrapNavigationTexttrue
navigationExpandOptionexpand-all-by-default

Create Job Summarizer Executor


Field

Required

Default

Multiple

NotesExample
typeYes-No

The value must be "application".

"application"

_typeYes-No

The value must be "application".

"application"

appNameYes-NoThe name of the application"Job-Executor"
appTypeYes-NoThe value must be "job-summarize-executor"."job-summarize-executor"
configYes-NoThe value must be "com.accenture.aspire:app-jobsummarize-executor"."com.accenture.aspire:app-jobsummarize-executor"
descriptionYes-NoThe description

"Job-Executor"

propertiesYes-NoConfiguration object
dataPathYes-NoThe path of the job that contains the tables data"/doc"
containerPathYes-NoThe sub path of the data that contains each table"container"
tableIdPathYes-NoThe sub path of table data that contains the table idID"container/url"
seedIdPathYes-NoThe sub path of table data that contains the seed idID"container/seed/id"
columnsPathYes-NoThe sub path of table objects that contains the columns information"dataProfile/columns"
columnNamePathYes-NoThe sub path of column objects that contains the column name"columnName"
columnTypePathYes-NoThe sub path of column objects that contains the column type"column_type"
columnsPatternsYes[]YesThe columns column patterns to detect each column type[{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}]
typeYes"TEXT"NoThe data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN""TEXT"
patternYes-NoThe pattern to match"STRING"






logFrequencyYes1000NoThe frequency for reporting the processed rows.1000
filterRowsYesfalseNoEnable to filter the rows to process.true
useFilterFileYestrueNoEnable to use a groovy file to filter the rowstrue
groovyPathNo-NoThe path of the groovy script that contains the filter logic.  It must return a boolean value, if true the row will be filtered"C:\\Aspire\\config\\rowsGroovyFilter.txt"
groovyScriptNo-NoScript used to filter the rows. It must return a boolean value, if true the row will be filtered"row.getBoolean(\"sensitive\") == true"
urlYes-NoServer URL"http://localhost:9200/"
authTypeYes"none"NoThe authentication type. Accepted values: "none", "basic", "aws""none"
usernameNo-NoUser with the permissions to read from the Elastic index specified. Used only if the authType is "basic""admin"
passwordNo-NoThe password for the specified user. Used only if the authType is "basic""password"
regionNo-NoAWS region. Used only if the authType is "aws""us-east-2"
useCredentialsProviderChainNofalseNoUse AWS Credentials Provider Chain. Used only if the authType is "aws""true"
accessKeyNo-NoKey utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false"AKIAIOSFODNN7EXAMPLE"
secretKeyNo-NoSecret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
assumeRoleNofalseNoEnable to assume the specified role to get the credentials. Used only if the authType is "aws"true
roleArnNo-NoThe Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true"arn:aws:iam::123456789012:user/group/role"
indexYes-NoThe elastic index to use"values-index"
queryYes-NoThe query for fetching the unique values. The placeholders ${seedId} and ${tableId}."{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}"
uniqueValuesYestrueNoIf enable enabled, the expected row format will be the one used for unique values, if not, it will use the _source content as the row bodytrue
scrollTimeYes"5m"NoThe time to keep each scroll request active"5m"
idleConnectionTimeoutYes3600000NoMaximum time (in milliseconds) to keep an idle connection open3600000
maxConnectionsYes100NoMaximum number of connections to be opened100
maxConnectionsPerRouteYes10NoMaximum number of connections opened for the same target10
connectionTimeoutYes15000NoMaximum time (in milliseconds) to wait for the connection15000
socketTimeoutYes15000NoMaximum time (in milliseconds) to wait for a socket response15000
useThrottlingYesfalseNoFlag to enable connection throttlingtrue
throttlingRateNo5000NoTime period (in milliseconds) to throttle the connection. Used only if useThrottling is true5000
throttlingConnectionRateNo500NoMaximum number of connections used during the throttling period. Used only if useThrottling is true500
maxRetriesYes3NoMaximum number of retries for each request3
retryWaitTimeYes5000NoTime (in milliseconds) to wait before a retry5000

Example

Code Block
themeRDark
titlePOST /aspire/_api/workflows/{workflow}/rules
{
  "type": "application",
  "_type": "application",
  "appName": "Job_Summarize_Executor",
  "appType": "job-summarize-executor",
  "config": "com.accenture.aspire:app-jobsummarize-executor",
  "description": "job-summarizer",
  "properties": {
    "dataPath": "/doc",
    "containerPath": "container",
    "tableIdPath": "container/url",
    "seedIdPath": "container/seed/id",
    "columnsPath": "dataProfile/columns",
    "columnNamePath": "columnName",
    "columnTypePath": "column_type",
    "columnsPatterns": [{
        "type": "TEXT",
        "pattern": "STRING"
      }, {
        "type": "INT",
        "pattern": "INT32"
      }
    ],
    "logFrequency": 1000,
    "filterRows": false,
    "debug": false,
    "url": "http://localhost:9200/",
    "authType": "none",
    "index": "parquet-data",
    "query": "{\n  \"query\": {\n    \"bool\": {\n      \"must\": [{\n          \"term\": {\n            \"name.keyword\": {\n              \"value\": \"column-value\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.seedId\": {\n              \"value\": \"${seedId}\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.tableId\": {\n              \"value\": \"${tableId}\"\n            }\n          }\n        }\n      ]\n    }\n  }\n}\n",
    "uniqueValues": true,
    "scrollTime": "5m",
    "idleConnectionTimeout": 3600000,
    "maxConnections": 100,
    "maxConnectionsPerRoute": 10,
    "connectionTimeout": 15000,
    "socketTimeout": 15000,
    "useThrottling": false,
    "maxRetries": 3,
    "retryWaitTime": 5000
  }
}

Update Job Summarizer Executor


Field

Required

Default

Multiple

NotesExample
idYes-NoId ID of the application to update"61014782-442a-4587-ab85-ba1439a7f7b5"
typeYes-No

The value must be "application".

"application"

_typeYes-No

The value must be "application".

"application"

appNameYes-NoThe name of the application"Job-Executor"
appTypeYes-NoThe value must be "job-summarize-executor"."job-summarize-executor"
configYes-NoThe value must be "com.accenture.aspire:app-jobsummarize-executor"."com.accenture.aspire:app-jobsummarize-executor"
descriptionYes-NoThe description

"Job-Executor"

propertiesYes-NoConfiguration object
dataPathYes-NoThe path of the job that contains the tables data"/doc"
containerPathYes-NoThe sub path of the data that contains each table"container"
tableIdPathYes-NoThe sub path of table data that contains the table idID"container/url"
seedIdPathYes-NoThe sub path of table data that contains the seed idID"container/seed/id"
columnsPathYes-NoThe sub path of table objects that contains the columns information"dataProfile/columns"
columnNamePathYes-NoThe sub path of column objects that contains the column name"columnName"
columnTypePathYes-NoThe sub path of column objects that contains the column type"column_type"
columnsPatternsYes[]YesThe columns column patterns to detect each column type[{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}]
typeYes"TEXT"NoThe data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN""TEXT"
patternYes-NoThe pattern to match"STRING"






logFrequencyYes1000NoThe frequency for reporting the processed rows.1000
filterRowsYesfalseNoEnable to filter the rows to process.true
useFilterFileYestrueNoEnable to use a groovy file to filter the rowstrue
groovyPathNo-NoThe path of the groovy script that contains the filter logic.  It must return a boolean value, if true the row will be filtered"C:\\Aspire\\config\\rowsGroovyFilter.txt"
groovyScriptNo-NoScript used to filter the rows. It must return a boolean value, if true the row will be filtered"row.getBoolean(\"sensitive\") == true"
urlYes-NoServer URL"http://localhost:9200/"
authTypeYes"none"NoThe authentication type. Accepted values: "none", "basic", "aws""none"
usernameNo-NoUser with the permissions to read from the Elastic index specified. Used only if the authType is "basic""admin"
passwordNo-NoThe password for the specified user. Used only if the authType is "basic""password"
regionNo-NoAWS region. Used only if the authType is "aws""us-east-2"
useCredentialsProviderChainNofalseNoUse AWS Credentials Provider Chain. Used only if the authType is "aws""true"
accessKeyNo-NoKey utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false"AKIAIOSFODNN7EXAMPLE"
secretKeyNo-NoSecret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
assumeRoleNofalseNoEnable to assume the specified role to get the credentials. Used only if the authType is "aws"true
roleArnNo-NoThe Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true"arn:aws:iam::123456789012:user/group/role"
indexYes-NoThe elastic index to use"values-index"
queryYes-NoThe query for fetching the unique values. The placeholders ${seedId} and ${tableId}."{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}"
uniqueValuesYestrueNoIf enable enabled, the expected row format will be the one used for unique values, if not, it will use the _source content as the row bodytrue
scrollTimeYes"5m"NoThe time to keep each scroll request active"5m"
idleConnectionTimeoutYes3600000NoMaximum time (in milliseconds) to keep an idle connection open3600000
maxConnectionsYes100NoMaximum number of connections to be opened100
maxConnectionsPerRouteYes10NoMaximum number of connections opened for the same target10
connectionTimeoutYes15000NoMaximum time (in milliseconds) to wait for the connection15000
socketTimeoutYes15000NoMaximum time (in milliseconds) to wait for a socket response15000
useThrottlingYesfalseNoFlag to enable connection throttlingtrue
throttlingRateNo5000NoTime period (in milliseconds) to throttle the connection. Used only if useThrottling is true5000
throttlingConnectionRateNo500NoMaximum number of connections used during the throttling period. Used only if useThrottling is true500
maxRetriesYes3NoMaximum number of retries for a failed document3
retryWaitTimeYes5000NoTime (in milliseconds) to wait before a retry5000

Example

Code Block
themeRDark
titlePUT /aspire/_api/workflows/{workflow}/rules/{id}
{
  "id": "951cf9a0-6078-43f2-bce1-6e377fc22fc5",
  "type": "application",
  "_type": "application",
  "appName": "Job_Summarize_Executor",
  "appType": "job-summarize-executor",
  "config": "com.accenture.aspire:app-jobsummarize-executor",
  "description": "job-summarizer",
  "properties": {
    "dataPath": "/doc",
    "containerPath": "container",
    "tableIdPath": "container/url",
    "seedIdPath": "container/seed/id",
    "columnsPath": "dataProfile/columns",
    "columnNamePath": "columnName",
    "columnTypePath": "column_type",
    "columnsPatterns": [{
        "type": "TEXT",
        "pattern": "STRING"
      }, {
        "type": "INT",
        "pattern": "INT32"
      }
    ],
    "logFrequency": 1000,
    "filterRows": false,
    "debug": false,
    "url": "http://localhost:9200/",
    "authType": "none",
    "index": "parquet-data",
    "query": "{\n  \"query\": {\n    \"bool\": {\n      \"must\": [{\n          \"term\": {\n            \"name.keyword\": {\n              \"value\": \"column-value\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.seedId\": {\n              \"value\": \"${seedId}\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.tableId\": {\n              \"value\": \"${tableId}\"\n            }\n          }\n        }\n      ]\n    }\n  }\n}\n",
    "uniqueValues": true,
    "scrollTime": "5m",
    "idleConnectionTimeout": 3600000,
    "maxConnections": 100,
    "maxConnectionsPerRoute": 10,
    "connectionTimeout": 15000,
    "socketTimeout": 15000,
    "useThrottling": false,
    "maxRetries": 3,
    "retryWaitTime": 5000
  }
}